diff --git a/control_plane/attachment_service/src/persistence.rs b/control_plane/attachment_service/src/persistence.rs index db4f04d001..b9c79ff916 100644 --- a/control_plane/attachment_service/src/persistence.rs +++ b/control_plane/attachment_service/src/persistence.rs @@ -1,5 +1,6 @@ use std::{collections::HashMap, str::FromStr}; +use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use control_plane::{ attachment_service::{NodeAvailability, NodeSchedulingPolicy}, @@ -41,10 +42,14 @@ struct PendingWrite { } impl PendingWrite { - async fn commit(&self) -> anyhow::Result<()> { - tokio::fs::write(&self.path, &self.bytes).await?; - - Ok(()) + async fn commit(self) -> anyhow::Result<()> { + tokio::task::spawn_blocking(move || { + let tmp_path = utils::crashsafe::path_with_suffix_extension(&self.path, "___new"); + utils::crashsafe::overwrite(&self.path, &tmp_path, &self.bytes) + }) + .await + .context("spawn_blocking")? + .context("write file") } } diff --git a/libs/utils/src/crashsafe.rs b/libs/utils/src/crashsafe.rs index b089af4a02..0c6855d17b 100644 --- a/libs/utils/src/crashsafe.rs +++ b/libs/utils/src/crashsafe.rs @@ -1,7 +1,7 @@ use std::{ borrow::Cow, fs::{self, File}, - io, + io::{self, Write}, }; use camino::{Utf8Path, Utf8PathBuf}; @@ -112,6 +112,48 @@ pub async fn fsync_async(path: impl AsRef) -> Result<(), std::io::Erro tokio::fs::File::open(path.as_ref()).await?.sync_all().await } +/// Writes a file to the specified `final_path` in a crash safe fasion +/// +/// The file is first written to the specified tmp_path, and in a second +/// step, the tmp path is renamed to the final path. As renames are +/// atomic, a crash during the write operation will never leave behind a +/// partially written file. +/// +/// NB: an async variant of this code exists in Pageserver's VirtualFile. +pub fn overwrite( + final_path: &Utf8Path, + tmp_path: &Utf8Path, + content: &[u8], +) -> std::io::Result<()> { + let Some(final_path_parent) = final_path.parent() else { + return Err(std::io::Error::from_raw_os_error( + nix::errno::Errno::EINVAL as i32, + )); + }; + std::fs::remove_file(tmp_path).or_else(crate::fs_ext::ignore_not_found)?; + let mut file = std::fs::OpenOptions::new() + .write(true) + // Use `create_new` so that, if we race with ourselves or something else, + // we bail out instead of causing damage. + .create_new(true) + .open(tmp_path)?; + file.write_all(content)?; + file.sync_all()?; + drop(file); // before the rename, that's important! + // renames are atomic + std::fs::rename(tmp_path, final_path)?; + // Only open final path parent dirfd now, so that this operation only + // ever holds one VirtualFile fd at a time. That's important because + // the current `find_victim_slot` impl might pick the same slot for both + // VirtualFile., and it eventually does a blocking write lock instead of + // try_lock. + let final_parent_dirfd = std::fs::OpenOptions::new() + .read(true) + .open(final_path_parent)?; + final_parent_dirfd.sync_all()?; + Ok(()) +} + #[cfg(test)] mod tests { diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 9feefd8a32..06f58b5c52 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -356,12 +356,7 @@ impl VirtualFile { Ok(vfile) } - /// Writes a file to the specified `final_path` in a crash safe fasion - /// - /// The file is first written to the specified tmp_path, and in a second - /// step, the tmp path is renamed to the final path. As renames are - /// atomic, a crash during the write operation will never leave behind a - /// partially written file. + /// Async & [`VirtualFile`]-enabled version of [`::utils::crashsafe::overwrite`]. pub async fn crashsafe_overwrite( final_path: &Utf8Path, tmp_path: &Utf8Path,