pageserver: fix deletion/creation race

Closes: ?https://github.com/neondatabase/neon/issues/5878
This commit is contained in:
John Spray
2023-11-17 23:23:16 +00:00
parent 5d13a2e426
commit 1c7df523e2
2 changed files with 37 additions and 8 deletions

View File

@@ -647,20 +647,52 @@ impl RemoteTimelineClient {
/// deletion won't actually be performed, until all previously scheduled
/// upload operations, and the index file upload, have completed
/// successfully.
///
/// No work is done if the layers are not present in the remote index. Returns
/// false if no work was done.
pub fn schedule_layer_file_deletion(
self: &Arc<Self>,
names: &[LayerFileName],
) -> anyhow::Result<()> {
) -> anyhow::Result<bool> {
let mut guard = self.upload_queue.lock().unwrap();
let upload_queue = guard.initialized_mut()?;
let with_generations =
self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());
self.schedule_deletion_of_unlinked0(upload_queue, with_generations);
if with_generations.is_empty() {
// No-op.
Ok(false)
} else {
self.schedule_deletion_of_unlinked0(upload_queue, with_generations);
// Launch the tasks immediately, if possible
self.launch_queued_tasks(upload_queue);
// Launch the tasks immediately, if possible
self.launch_queued_tasks(upload_queue);
Ok(true)
}
}
/// Schedule layer deletions and wait for them to fully execute.
///
/// This is not the normal way to delete layers: usually deletion is scheduled and
/// left to run in the background. However, during startup in [`crate::tenant::Timeline::load_layer_map`]
/// we may find that there are some layers in the future wrt disk_consistent_lsn,
/// and drop them. This is different to a normal deletion, because we are deleting layers that
/// we may soon re-upload with the same name: it's important that the deletions do not race with
/// those later uploads. So this function includes a full flush of the deletion queue.
///
/// TODO: remote, as we will no longer need this function when we are always running pageservers with
/// generations enabled, because layer keys after a restart will always differ to layers before
/// the restart by their generation suffix.
pub async fn flushing_delete_layers(
self: &Arc<Self>,
names: &[LayerFileName],
) -> anyhow::Result<()> {
if self.schedule_layer_file_deletion(names)? {
self.wait_completion().await?;
self.deletion_queue_client.flush_execute().await?;
}
Ok(())
}

View File

@@ -1709,10 +1709,7 @@ impl Timeline {
guard.initialize_local_layers(loaded_layers, disk_consistent_lsn + 1);
if let Some(rtc) = self.remote_client.as_ref() {
rtc.schedule_layer_file_deletion(&needs_cleanup)?;
rtc.schedule_index_upload_for_file_changes()?;
// Tenant::create_timeline will wait for these uploads to happen before returning, or
// on retry.
rtc.flushing_delete_layers(&needs_cleanup).await?;
}
info!(