Initial draft

This commit is contained in:
Arpad Müller
2025-05-21 03:02:39 +02:00
parent 5df8284961
commit ab751abdbd
5 changed files with 190 additions and 10 deletions

View File

@@ -408,7 +408,7 @@ pub enum TimelineCreateRequestMode {
import_pgdata: TimelineCreateRequestModeImportPgdata,
},
Template {
template_tenant_id: TenantId,
template_tenant_id: TenantShardId,
template_timeline_id: TimelineId,
},
// NB: Bootstrap is all-optional, and thus the serde(untagged) will cause serde to stop at Bootstrap.

View File

@@ -49,6 +49,7 @@ use remote_timeline_client::{
};
use secondary::heatmap::{HeatMapTenant, HeatMapTimeline};
use storage_broker::BrokerClientChannel;
use storage_layer::Layer;
use timeline::compaction::{CompactionOutcome, GcCompactionQueue};
use timeline::import_pgdata::ImportingTimeline;
use timeline::offload::{OffloadError, offload_timeline};
@@ -107,7 +108,7 @@ use crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mg
static INIT_DB_SEMAPHORE: Lazy<Semaphore> = Lazy::new(|| Semaphore::new(8));
use utils::crashsafe;
use utils::generation::Generation;
use utils::id::{TenantId, TimelineId};
use utils::id::TimelineId;
use utils::lsn::{Lsn, RecordLsn};
pub mod blob_io;
@@ -879,7 +880,7 @@ pub(crate) struct CreateTimelineParamsBootstrap {
#[derive(Debug)]
pub(crate) struct CreateTimelineParamsTemplate {
pub(crate) new_timeline_id: TimelineId,
pub(crate) template_tenant_id: TenantId,
pub(crate) template_tenant_id: TenantShardId,
pub(crate) template_timeline_id: TimelineId,
}
@@ -929,6 +930,7 @@ pub(crate) enum CreateTimelineIdempotency {
ancestor_start_lsn: Lsn,
},
ImportPgdata(CreatingTimelineIdempotencyImportPgdata),
Template(TenantShardId, TimelineId),
}
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -2750,7 +2752,107 @@ impl TenantShard {
template_tenant_id,
template_timeline_id,
} = params;
todo!()
// 0. create a guard to prevent parallel creation attempts.
let timeline_create_guard = match self
.start_creating_timeline(
new_timeline_id,
CreateTimelineIdempotency::Template(template_tenant_id, template_timeline_id),
)
.await?
{
StartCreatingTimelineResult::CreateGuard(guard) => guard,
StartCreatingTimelineResult::Idempotent(timeline) => {
return Ok(CreateTimelineResult::Idempotent(timeline));
}
};
// 1. download the index part of the template timeline
// TODO can we hardcode the generation here or should we pass it as parameter?
let template_generation = Generation::new(1);
let (template_index_part, template_generation, _) =
remote_timeline_client::download_template_index_part(
&self.remote_storage,
&template_tenant_id,
&template_timeline_id,
template_generation,
&self.cancel,
)
.await
.unwrap();
tracing::info!(
"downloaded template index_part.json with generation {}",
template_generation.get_suffix()
);
// 2. create the timeline, initializing the index part of the new timeline with the layers from the template
let template_metadata = &template_index_part.metadata;
let new_metadata = TimelineMetadata::new(
template_metadata.disk_consistent_lsn(),
None,
None,
Lsn(0),
template_metadata.latest_gc_cutoff_lsn(),
template_metadata.initdb_lsn(),
template_metadata.pg_version(),
);
let (mut raw_timeline, _timeline_ctx) = self
.prepare_new_timeline(
new_timeline_id,
&new_metadata,
timeline_create_guard,
template_metadata.initdb_lsn(),
None,
None,
ctx,
)
.await?;
let _tenant_shard_id = raw_timeline.owning_tenant.tenant_shard_id;
raw_timeline
.write(|unfinished_timeline| async move {
// TODO make this more sophisticated: maybe a variant of load_layer_map?
let layers = template_index_part
.layer_metadata
.iter()
.map(|(layer_name, metadata)| {
// TODO support local sharing of layers
let mut metadata = metadata.clone();
metadata.template_ttid = Some((template_tenant_id, template_timeline_id));
Layer::for_evicted(
self.conf,
&unfinished_timeline,
layer_name.clone(),
metadata,
)
})
.collect();
unfinished_timeline
.layers
.blocking_write()
.open_mut()
.map_err(|_| CreateTimelineError::ShuttingDown)?
.initialize_local_layers(layers, template_metadata.disk_consistent_lsn());
fail::fail_point!("before-checkpoint-new-timeline", |_| {
Err(CreateTimelineError::Other(anyhow::anyhow!(
"failpoint before-checkpoint-new-timeline"
)))
});
Ok(())
})
.await?;
// All done!
let timeline = raw_timeline.finish_creation().await?;
// Callers are responsible to wait for uploads to complete and for activating the timeline.
// 4. finish
Ok(CreateTimelineResult::Created(timeline))
}
/// The returned [`Arc<Timeline>`] is NOT in the [`TenantShard::timelines`] map until the import

View File

@@ -189,8 +189,9 @@ use anyhow::Context;
use camino::Utf8Path;
use chrono::{NaiveDateTime, Utc};
pub(crate) use download::{
download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file,
list_remote_tenant_shards, list_remote_timelines,
download_index_part, download_initdb_tar_zst, download_template_index_part,
download_tenant_manifest, is_temp_download_file, list_remote_tenant_shards,
list_remote_timelines,
};
use index::GcCompactionState;
pub(crate) use index::LayerFileMetadata;
@@ -1768,7 +1769,7 @@ impl RemoteTimelineClient {
adopted_as: &Layer,
cancel: &CancellationToken,
) -> anyhow::Result<()> {
let source_remote_path = remote_layer_path(
let source_remote_path = remote_layer_path_maybe_template(
&self.tenant_shard_id.tenant_id,
&adopted
.get_timeline_id()
@@ -1776,6 +1777,7 @@ impl RemoteTimelineClient {
adopted.metadata().shard,
&adopted.layer_desc().layer_name(),
adopted.metadata().generation,
adopted.metadata().template_ttid,
);
let target_remote_path = remote_layer_path(
@@ -2684,6 +2686,31 @@ pub fn remote_layer_path(
RemotePath::from_string(&path).expect("Failed to construct path")
}
pub fn remote_layer_path_maybe_template(
tenant_id: &TenantId,
timeline_id: &TimelineId,
shard: ShardIndex,
layer_file_name: &LayerName,
generation: Generation,
template_ttid: Option<(TenantShardId, TimelineId)>,
) -> RemotePath {
if let Some((template_tenant_shard_id, template_timeline_id)) = template_ttid {
let template_tenant_id = template_tenant_shard_id.tenant_id;
let template_shard_id = template_tenant_shard_id.to_index();
// Generation-aware key format
let path = format!(
"templates/{template_tenant_id}{0}/{TIMELINES_SEGMENT_NAME}/{template_timeline_id}/{1}{2}",
template_shard_id.get_suffix(),
layer_file_name,
generation.get_suffix()
);
RemotePath::from_string(&path).expect("Failed to construct path")
} else {
remote_layer_path(tenant_id, timeline_id, shard, layer_file_name, generation)
}
}
/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
///
@@ -2729,6 +2756,19 @@ pub fn remote_index_path(
.expect("Failed to construct path")
}
pub fn remote_template_index_path(
tenant_shard_id: &TenantShardId,
timeline_id: &TimelineId,
generation: Generation,
) -> RemotePath {
RemotePath::from_string(&format!(
"templates/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{0}{1}",
IndexPart::FILE_NAME,
generation.get_suffix()
))
.expect("Failed to construct path")
}
pub(crate) fn remote_heatmap_path(tenant_shard_id: &TenantShardId) -> RemotePath {
RemotePath::from_string(&format!(
"tenants/{tenant_shard_id}/{TENANT_HEATMAP_BASENAME}"

View File

@@ -29,7 +29,7 @@ use super::manifest::TenantManifest;
use super::{
FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path,
parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path,
remote_initdb_preserved_archive_path, remote_tenant_manifest_path,
remote_initdb_preserved_archive_path, remote_template_index_path, remote_tenant_manifest_path,
remote_tenant_manifest_prefix, remote_tenant_path,
};
use crate::TEMP_FILE_SUFFIX;
@@ -39,7 +39,9 @@ use crate::span::{
debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id,
};
use crate::tenant::Generation;
use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
use crate::tenant::remote_timeline_client::{
remote_layer_path_maybe_template, remote_timelines_path,
};
use crate::tenant::storage_layer::LayerName;
use crate::virtual_file;
use crate::virtual_file::owned_buffers_io::write::FlushTaskError;
@@ -68,12 +70,13 @@ pub async fn download_layer_file<'a>(
let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);
let remote_path = remote_layer_path(
let remote_path = remote_layer_path_maybe_template(
&tenant_shard_id.tenant_id,
&timeline_id,
layer_metadata.shard,
layer_file_name,
layer_metadata.generation,
layer_metadata.template_ttid,
);
let (bytes_amount, temp_file) = download_retry(
@@ -559,6 +562,35 @@ pub(crate) async fn download_index_part(
.await
}
/// index_part.json objects are suffixed with a generation number, so we cannot
/// directly GET the latest index part without doing some probing.
///
/// In this function we probe for the most recent index in a generation <= our current generation.
/// See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md
pub(crate) async fn download_template_index_part(
storage: &GenericRemoteStorage,
tenant_shard_id: &TenantShardId,
timeline_id: &TimelineId,
my_generation: Generation,
cancel: &CancellationToken,
) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
debug_assert_current_span_has_tenant_and_timeline_id();
let index_prefix = remote_template_index_path(tenant_shard_id, timeline_id, Generation::none());
download_generation_object(
storage,
tenant_shard_id,
Some(timeline_id),
my_generation,
"index_part",
index_prefix,
do_download_index_part,
parse_remote_index_path,
cancel,
)
.await
}
pub(crate) async fn download_tenant_manifest(
storage: &GenericRemoteStorage,
tenant_shard_id: &TenantShardId,

View File

@@ -12,6 +12,7 @@ use pageserver_api::shard::ShardIndex;
use serde::{Deserialize, Serialize};
use utils::id::TimelineId;
use utils::lsn::Lsn;
use utils::shard::TenantShardId;
use super::is_same_remote_layer_path;
use crate::tenant::Generation;
@@ -233,6 +234,10 @@ pub struct LayerFileMetadata {
#[serde(default = "ShardIndex::unsharded")]
#[serde(skip_serializing_if = "ShardIndex::is_unsharded")]
pub shard: ShardIndex,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub template_ttid: Option<(TenantShardId, TimelineId)>,
}
impl LayerFileMetadata {
@@ -241,6 +246,7 @@ impl LayerFileMetadata {
file_size,
generation,
shard,
template_ttid: None,
}
}
/// Helper to get both generation and file size in a tuple