From 6eb01fb5969dcab0c358df3bcb21af0aaaca3ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Sat, 13 Jan 2024 09:50:26 +0100 Subject: [PATCH] retrying version of remove_dir_all --- libs/utils/src/fs_ext.rs | 33 +++++++++++++++++++++++++++++++++ pageserver/src/tenant.rs | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/libs/utils/src/fs_ext.rs b/libs/utils/src/fs_ext.rs index 90ba348a02..4bd08674f1 100644 --- a/libs/utils/src/fs_ext.rs +++ b/libs/utils/src/fs_ext.rs @@ -2,6 +2,7 @@ use std::{fs, io, path::Path}; use anyhow::Context; +use camino::Utf8Path; pub trait PathExt { /// Returns an error if `self` is not a directory. @@ -38,6 +39,38 @@ pub async fn list_dir(path: impl AsRef) -> anyhow::Result> { Ok(content) } +/// Version of [`std::fs::remove_dir_all`] that is idempotent and tolerates parallel removals of the same path or sub-paths +/// +/// The idempotency implies that we return `Ok(())`` even if the file is already gone or has never existed, +/// unlike `remove_dir_all` from std/tokio. +pub async fn remove_dir_all io::Error>( + path: impl AsRef, + cancel: crate::backoff::Cancel, +) -> io::Result<()> { + crate::backoff::retry( + || async { + match tokio::fs::remove_dir_all(path.as_ref()).await { + // If the directory is gone, we are done. + Err(e) if e.kind() == io::ErrorKind::NotFound && !path.as_ref().exists() => Ok(()), + other => other, + } + }, + |err| { + if err.kind() == io::ErrorKind::NotFound { + // We got a not found error and the directory still exists. + // This was likely due to a removal we are racing with, so retry. + return false; + } + true + }, + 3, + u32::MAX, + "directory removal", + cancel, + ) + .await +} + pub fn ignore_not_found(e: io::Error) -> io::Result<()> { if e.kind() == io::ErrorKind::NotFound { Ok(()) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 0e09f2cbf1..4da42e509d 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3198,7 +3198,7 @@ impl Tenant { )); scopeguard::defer! { - if let Err(e) = fs::remove_file(&temp_path) { + if let Err(e) = utils::fs_ext::remove_dir_all(&temp_path, backoff::Cancel::new(CancellationToken::new(), || panic!())).await { error!("Failed to remove temporary initdb archive '{temp_path}': {e}"); } }