Hardlink resident layers during detach ancestor (#10729)

After a detach ancestor operation, we don't want to on-demand download
layers that are already resident. This has shown to impede performance,
sometimes quite a lot (50 seconds:
https://github.com/neondatabase/neon/issues/8828#issuecomment-2643735644)

Fixes #8828.
This commit is contained in:
Arpad Müller
2025-02-11 17:58:34 +01:00
committed by GitHub
parent be447ba4f8
commit f7b2293317
2 changed files with 59 additions and 33 deletions

View File

@@ -353,7 +353,6 @@ impl Layer {
/// while the guard exists.
///
/// Returns None if the layer is currently evicted or becoming evicted.
#[cfg(test)]
pub(crate) async fn keep_resident(&self) -> Option<ResidentLayer> {
let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?;
@@ -530,7 +529,6 @@ impl ResidentOrWantedEvicted {
/// This is not used on the read path (anything that calls
/// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win
/// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`].
#[cfg(test)]
fn get(&self) -> Option<Arc<DownloadedLayer>> {
match self {
ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),

View File

@@ -6,7 +6,9 @@ use crate::{
task_mgr::TaskKind,
tenant::{
remote_timeline_client::index::GcBlockingReason::DetachAncestor,
storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
storage_layer::{
layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer,
},
Tenant,
},
virtual_file::{MaybeFatalIo, VirtualFile},
@@ -351,18 +353,7 @@ pub(super) async fn prepare(
// FIXME: the fsync should be mandatory, after both rewrites and copies
if wrote_any {
let timeline_dir = VirtualFile::open(
&detached
.conf
.timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
ctx,
)
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
fsync_timeline_dir(detached, ctx).await;
}
}
@@ -376,7 +367,7 @@ pub(super) async fn prepare(
tasks.spawn(
async move {
let _permit = limiter.acquire().await;
let owned = remote_copy(
let (owned, did_hardlink) = remote_copy(
&adopted,
&timeline,
timeline.generation,
@@ -384,16 +375,20 @@ pub(super) async fn prepare(
&timeline.cancel,
)
.await?;
tracing::info!(layer=%owned, "remote copied");
Ok(owned)
tracing::info!(layer=%owned, did_hard_link=%did_hardlink, "remote copied");
Ok((owned, did_hardlink))
}
.in_current_span(),
);
}
let mut should_fsync = false;
while let Some(res) = tasks.join_next().await {
match res {
Ok(Ok(owned)) => {
Ok(Ok((owned, did_hardlink))) => {
if did_hardlink {
should_fsync = true;
}
new_layers.push(owned);
}
Ok(Err(failed)) => {
@@ -403,7 +398,10 @@ pub(super) async fn prepare(
}
}
// TODO: fsync directory again if we hardlinked something
// fsync directory again if we hardlinked something
if should_fsync {
fsync_timeline_dir(detached, ctx).await;
}
let prepared = PreparedTimelineDetach { layers: new_layers };
@@ -629,35 +627,52 @@ async fn copy_lsn_prefix(
}
}
/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
/// storage on successful return without the adopted layer being added to `index_part.json`.
/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote
/// storage on successful return. without the adopted layer being added to `index_part.json`.
/// Returns (Layer, did hardlink)
async fn remote_copy(
adopted: &Layer,
adoptee: &Arc<Timeline>,
generation: Generation,
shard_identity: ShardIdentity,
cancel: &CancellationToken,
) -> Result<Layer, Error> {
// depending if Layer::keep_resident we could hardlink
) -> Result<(Layer, bool), Error> {
let mut metadata = adopted.metadata();
debug_assert!(metadata.generation <= generation);
metadata.generation = generation;
metadata.shard = shard_identity.shard_index();
let owned = crate::tenant::storage_layer::Layer::for_evicted(
adoptee.conf,
adoptee,
adopted.layer_desc().layer_name(),
metadata,
);
let conf = adoptee.conf;
let file_name = adopted.layer_desc().layer_name();
adoptee
// depending if Layer::keep_resident, do a hardlink
let did_hardlink;
let owned = if let Some(adopted_resident) = adopted.keep_resident().await {
let adopted_path = adopted_resident.local_path();
let adoptee_path = local_layer_path(
conf,
&adoptee.tenant_shard_id,
&adoptee.timeline_id,
&file_name,
&metadata.generation,
);
std::fs::hard_link(adopted_path, &adoptee_path)
.map_err(|e| Error::launder(e.into(), Error::Prepare))?;
did_hardlink = true;
Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()
} else {
did_hardlink = false;
Layer::for_evicted(conf, adoptee, file_name, metadata)
};
let layer = adoptee
.remote_client
.copy_timeline_layer(adopted, &owned, cancel)
.await
.map(move |()| owned)
.map_err(|e| Error::launder(e, Error::Prepare))
.map_err(|e| Error::launder(e, Error::Prepare))?;
Ok((layer, did_hardlink))
}
pub(crate) enum DetachingAndReparenting {
@@ -1001,3 +1016,16 @@ fn check_no_archived_children_of_ancestor(
}
Ok(())
}
async fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) {
let path = &timeline
.conf
.timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id);
let timeline_dir = VirtualFile::open(&path, ctx)
.await
.fatal_err("VirtualFile::open for timeline dir fsync");
timeline_dir
.sync_all()
.await
.fatal_err("VirtualFile::sync_all timeline dir");
}