mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-16 01:42:55 +00:00
Hardlink resident layers during detach ancestor (#10729)
After a detach ancestor operation, we don't want to on-demand download layers that are already resident. This has shown to impede performance, sometimes quite a lot (50 seconds: https://github.com/neondatabase/neon/issues/8828#issuecomment-2643735644) Fixes #8828.
This commit is contained in:
@@ -353,7 +353,6 @@ impl Layer {
|
||||
/// while the guard exists.
|
||||
///
|
||||
/// Returns None if the layer is currently evicted or becoming evicted.
|
||||
#[cfg(test)]
|
||||
pub(crate) async fn keep_resident(&self) -> Option<ResidentLayer> {
|
||||
let downloaded = self.0.inner.get().and_then(|rowe| rowe.get())?;
|
||||
|
||||
@@ -530,7 +529,6 @@ impl ResidentOrWantedEvicted {
|
||||
/// This is not used on the read path (anything that calls
|
||||
/// [`LayerInner::get_or_maybe_download`]) because it was decided that reads always win
|
||||
/// evictions, and part of that winning is using [`ResidentOrWantedEvicted::get_and_upgrade`].
|
||||
#[cfg(test)]
|
||||
fn get(&self) -> Option<Arc<DownloadedLayer>> {
|
||||
match self {
|
||||
ResidentOrWantedEvicted::Resident(strong) => Some(strong.clone()),
|
||||
|
||||
@@ -6,7 +6,9 @@ use crate::{
|
||||
task_mgr::TaskKind,
|
||||
tenant::{
|
||||
remote_timeline_client::index::GcBlockingReason::DetachAncestor,
|
||||
storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
|
||||
storage_layer::{
|
||||
layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer,
|
||||
},
|
||||
Tenant,
|
||||
},
|
||||
virtual_file::{MaybeFatalIo, VirtualFile},
|
||||
@@ -351,18 +353,7 @@ pub(super) async fn prepare(
|
||||
|
||||
// FIXME: the fsync should be mandatory, after both rewrites and copies
|
||||
if wrote_any {
|
||||
let timeline_dir = VirtualFile::open(
|
||||
&detached
|
||||
.conf
|
||||
.timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
timeline_dir
|
||||
.sync_all()
|
||||
.await
|
||||
.fatal_err("VirtualFile::sync_all timeline dir");
|
||||
fsync_timeline_dir(detached, ctx).await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -376,7 +367,7 @@ pub(super) async fn prepare(
|
||||
tasks.spawn(
|
||||
async move {
|
||||
let _permit = limiter.acquire().await;
|
||||
let owned = remote_copy(
|
||||
let (owned, did_hardlink) = remote_copy(
|
||||
&adopted,
|
||||
&timeline,
|
||||
timeline.generation,
|
||||
@@ -384,16 +375,20 @@ pub(super) async fn prepare(
|
||||
&timeline.cancel,
|
||||
)
|
||||
.await?;
|
||||
tracing::info!(layer=%owned, "remote copied");
|
||||
Ok(owned)
|
||||
tracing::info!(layer=%owned, did_hard_link=%did_hardlink, "remote copied");
|
||||
Ok((owned, did_hardlink))
|
||||
}
|
||||
.in_current_span(),
|
||||
);
|
||||
}
|
||||
|
||||
let mut should_fsync = false;
|
||||
while let Some(res) = tasks.join_next().await {
|
||||
match res {
|
||||
Ok(Ok(owned)) => {
|
||||
Ok(Ok((owned, did_hardlink))) => {
|
||||
if did_hardlink {
|
||||
should_fsync = true;
|
||||
}
|
||||
new_layers.push(owned);
|
||||
}
|
||||
Ok(Err(failed)) => {
|
||||
@@ -403,7 +398,10 @@ pub(super) async fn prepare(
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: fsync directory again if we hardlinked something
|
||||
// fsync directory again if we hardlinked something
|
||||
if should_fsync {
|
||||
fsync_timeline_dir(detached, ctx).await;
|
||||
}
|
||||
|
||||
let prepared = PreparedTimelineDetach { layers: new_layers };
|
||||
|
||||
@@ -629,35 +627,52 @@ async fn copy_lsn_prefix(
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
|
||||
/// storage on successful return without the adopted layer being added to `index_part.json`.
|
||||
/// Creates a new Layer instance for the adopted layer, and ensures it is found in the remote
|
||||
/// storage on successful return. without the adopted layer being added to `index_part.json`.
|
||||
/// Returns (Layer, did hardlink)
|
||||
async fn remote_copy(
|
||||
adopted: &Layer,
|
||||
adoptee: &Arc<Timeline>,
|
||||
generation: Generation,
|
||||
shard_identity: ShardIdentity,
|
||||
cancel: &CancellationToken,
|
||||
) -> Result<Layer, Error> {
|
||||
// depending if Layer::keep_resident we could hardlink
|
||||
|
||||
) -> Result<(Layer, bool), Error> {
|
||||
let mut metadata = adopted.metadata();
|
||||
debug_assert!(metadata.generation <= generation);
|
||||
metadata.generation = generation;
|
||||
metadata.shard = shard_identity.shard_index();
|
||||
|
||||
let owned = crate::tenant::storage_layer::Layer::for_evicted(
|
||||
adoptee.conf,
|
||||
adoptee,
|
||||
adopted.layer_desc().layer_name(),
|
||||
metadata,
|
||||
);
|
||||
let conf = adoptee.conf;
|
||||
let file_name = adopted.layer_desc().layer_name();
|
||||
|
||||
adoptee
|
||||
// depending if Layer::keep_resident, do a hardlink
|
||||
let did_hardlink;
|
||||
let owned = if let Some(adopted_resident) = adopted.keep_resident().await {
|
||||
let adopted_path = adopted_resident.local_path();
|
||||
let adoptee_path = local_layer_path(
|
||||
conf,
|
||||
&adoptee.tenant_shard_id,
|
||||
&adoptee.timeline_id,
|
||||
&file_name,
|
||||
&metadata.generation,
|
||||
);
|
||||
std::fs::hard_link(adopted_path, &adoptee_path)
|
||||
.map_err(|e| Error::launder(e.into(), Error::Prepare))?;
|
||||
did_hardlink = true;
|
||||
Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard()
|
||||
} else {
|
||||
did_hardlink = false;
|
||||
Layer::for_evicted(conf, adoptee, file_name, metadata)
|
||||
};
|
||||
|
||||
let layer = adoptee
|
||||
.remote_client
|
||||
.copy_timeline_layer(adopted, &owned, cancel)
|
||||
.await
|
||||
.map(move |()| owned)
|
||||
.map_err(|e| Error::launder(e, Error::Prepare))
|
||||
.map_err(|e| Error::launder(e, Error::Prepare))?;
|
||||
|
||||
Ok((layer, did_hardlink))
|
||||
}
|
||||
|
||||
pub(crate) enum DetachingAndReparenting {
|
||||
@@ -1001,3 +1016,16 @@ fn check_no_archived_children_of_ancestor(
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fsync_timeline_dir(timeline: &Timeline, ctx: &RequestContext) {
|
||||
let path = &timeline
|
||||
.conf
|
||||
.timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id);
|
||||
let timeline_dir = VirtualFile::open(&path, ctx)
|
||||
.await
|
||||
.fatal_err("VirtualFile::open for timeline dir fsync");
|
||||
timeline_dir
|
||||
.sync_all()
|
||||
.await
|
||||
.fatal_err("VirtualFile::sync_all timeline dir");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user