Files
neon/libs/utils/src/crashsafe.rs
Christian Schwarz ec3efc56a8 Revert "Revert "refactor(VirtualFile::crashsafe_overwrite): avoid Handle::block_on in callers"" (#6775)
Reverts neondatabase/neon#6765 , bringing back #6731

We concluded that #6731 never was the root cause for the instability in
staging.
More details:
https://neondb.slack.com/archives/C033RQ5SPDH/p1708011674755319

However, the massive amount of concurrent `spawn_blocking` calls from
the `save_metadata` calls during startups might cause a performance
regression.
So, we'll merge this PR here after we've stopped writing the metadata
#6769).
2024-02-23 17:16:43 +01:00

283 lines
9.1 KiB
Rust

use std::{
borrow::Cow,
fs::{self, File},
io::{self, Write},
};
use camino::{Utf8Path, Utf8PathBuf};
/// Similar to [`std::fs::create_dir`], except we fsync the
/// created directory and its parent.
pub fn create_dir(path: impl AsRef<Utf8Path>) -> io::Result<()> {
let path = path.as_ref();
fs::create_dir(path)?;
fsync_file_and_parent(path)?;
Ok(())
}
/// Similar to [`std::fs::create_dir_all`], except we fsync all
/// newly created directories and the pre-existing parent.
pub fn create_dir_all(path: impl AsRef<Utf8Path>) -> io::Result<()> {
let mut path = path.as_ref();
let mut dirs_to_create = Vec::new();
// Figure out which directories we need to create.
loop {
match path.metadata() {
Ok(metadata) if metadata.is_dir() => break,
Ok(_) => {
return Err(io::Error::new(
io::ErrorKind::AlreadyExists,
format!("non-directory found in path: {path}"),
));
}
Err(ref e) if e.kind() == io::ErrorKind::NotFound => {}
Err(e) => return Err(e),
}
dirs_to_create.push(path);
match path.parent() {
Some(parent) => path = parent,
None => {
return Err(io::Error::new(
io::ErrorKind::InvalidInput,
format!("can't find parent of path '{path}'"),
));
}
}
}
// Create directories from parent to child.
for &path in dirs_to_create.iter().rev() {
fs::create_dir(path)?;
}
// Fsync the created directories from child to parent.
for &path in dirs_to_create.iter() {
fsync(path)?;
}
// If we created any new directories, fsync the parent.
if !dirs_to_create.is_empty() {
fsync(path)?;
}
Ok(())
}
/// Adds a suffix to the file(directory) name, either appending the suffix to the end of its extension,
/// or if there's no extension, creates one and puts a suffix there.
pub fn path_with_suffix_extension(
original_path: impl AsRef<Utf8Path>,
suffix: &str,
) -> Utf8PathBuf {
let new_extension = match original_path.as_ref().extension() {
Some(extension) => Cow::Owned(format!("{extension}.{suffix}")),
None => Cow::Borrowed(suffix),
};
original_path.as_ref().with_extension(new_extension)
}
pub fn fsync_file_and_parent(file_path: &Utf8Path) -> io::Result<()> {
let parent = file_path.parent().ok_or_else(|| {
io::Error::new(
io::ErrorKind::Other,
format!("File {file_path:?} has no parent"),
)
})?;
fsync(file_path)?;
fsync(parent)?;
Ok(())
}
pub fn fsync(path: &Utf8Path) -> io::Result<()> {
File::open(path)
.map_err(|e| io::Error::new(e.kind(), format!("Failed to open the file {path:?}: {e}")))
.and_then(|file| {
file.sync_all().map_err(|e| {
io::Error::new(
e.kind(),
format!("Failed to sync file {path:?} data and metadata: {e}"),
)
})
})
.map_err(|e| io::Error::new(e.kind(), format!("Failed to fsync file {path:?}: {e}")))
}
pub async fn fsync_async(path: impl AsRef<Utf8Path>) -> Result<(), std::io::Error> {
tokio::fs::File::open(path.as_ref()).await?.sync_all().await
}
pub async fn fsync_async_opt(
path: impl AsRef<Utf8Path>,
do_fsync: bool,
) -> Result<(), std::io::Error> {
if do_fsync {
fsync_async(path.as_ref()).await?;
}
Ok(())
}
/// Like postgres' durable_rename, renames file issuing fsyncs do make it
/// durable. After return, file and rename are guaranteed to be persisted.
///
/// Unlike postgres, it only does fsyncs to 1) file to be renamed to make
/// contents durable; 2) its directory entry to make rename durable 3) again to
/// already renamed file, which is not required by standards but postgres does
/// it, let's stick to that. Postgres additionally fsyncs newpath *before*
/// rename if it exists to ensure that at least one of the files survives, but
/// current callers don't need that.
///
/// virtual_file.rs has similar code, but it doesn't use vfs.
///
/// Useful links: <https://lwn.net/Articles/457667/>
/// <https://www.postgresql.org/message-id/flat/56583BDD.9060302%402ndquadrant.com>
/// <https://thunk.org/tytso/blog/2009/03/15/dont-fear-the-fsync/>
pub async fn durable_rename(
old_path: impl AsRef<Utf8Path>,
new_path: impl AsRef<Utf8Path>,
do_fsync: bool,
) -> io::Result<()> {
// first fsync the file
fsync_async_opt(old_path.as_ref(), do_fsync).await?;
// Time to do the real deal.
tokio::fs::rename(old_path.as_ref(), new_path.as_ref()).await?;
// Postgres'ish fsync of renamed file.
fsync_async_opt(new_path.as_ref(), do_fsync).await?;
// Now fsync the parent
let parent = match new_path.as_ref().parent() {
Some(p) => p,
None => Utf8Path::new("./"), // assume current dir if there is no parent
};
fsync_async_opt(parent, do_fsync).await?;
Ok(())
}
/// Writes a file to the specified `final_path` in a crash safe fasion, using [`std::fs`].
///
/// The file is first written to the specified `tmp_path`, and in a second
/// step, the `tmp_path` is renamed to the `final_path`. Intermediary fsync
/// and atomic rename guarantee that, if we crash at any point, there will never
/// be a partially written file at `final_path` (but maybe at `tmp_path`).
///
/// Callers are responsible for serializing calls of this function for a given `final_path`.
/// If they don't, there may be an error due to conflicting `tmp_path`, or there will
/// be no error and the content of `final_path` will be the "winner" caller's `content`.
/// I.e., the atomticity guarantees still hold.
pub fn overwrite(
final_path: &Utf8Path,
tmp_path: &Utf8Path,
content: &[u8],
) -> std::io::Result<()> {
let Some(final_path_parent) = final_path.parent() else {
return Err(std::io::Error::from_raw_os_error(
nix::errno::Errno::EINVAL as i32,
));
};
std::fs::remove_file(tmp_path).or_else(crate::fs_ext::ignore_not_found)?;
let mut file = std::fs::OpenOptions::new()
.write(true)
// Use `create_new` so that, if we race with ourselves or something else,
// we bail out instead of causing damage.
.create_new(true)
.open(tmp_path)?;
file.write_all(content)?;
file.sync_all()?;
drop(file); // don't keep the fd open for longer than we have to
std::fs::rename(tmp_path, final_path)?;
let final_parent_dirfd = std::fs::OpenOptions::new()
.read(true)
.open(final_path_parent)?;
final_parent_dirfd.sync_all()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_dir_fsyncd() {
let dir = camino_tempfile::tempdir().unwrap();
let existing_dir_path = dir.path();
let err = create_dir(existing_dir_path).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);
let child_dir = existing_dir_path.join("child");
create_dir(child_dir).unwrap();
let nested_child_dir = existing_dir_path.join("child1").join("child2");
let err = create_dir(nested_child_dir).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::NotFound);
}
#[test]
fn test_create_dir_all_fsyncd() {
let dir = camino_tempfile::tempdir().unwrap();
let existing_dir_path = dir.path();
create_dir_all(existing_dir_path).unwrap();
let child_dir = existing_dir_path.join("child");
assert!(!child_dir.exists());
create_dir_all(&child_dir).unwrap();
assert!(child_dir.exists());
let nested_child_dir = existing_dir_path.join("child1").join("child2");
assert!(!nested_child_dir.exists());
create_dir_all(&nested_child_dir).unwrap();
assert!(nested_child_dir.exists());
let file_path = existing_dir_path.join("file");
std::fs::write(&file_path, b"").unwrap();
let err = create_dir_all(&file_path).unwrap_err();
assert_eq!(err.kind(), io::ErrorKind::AlreadyExists);
let invalid_dir_path = file_path.join("folder");
create_dir_all(invalid_dir_path).unwrap_err();
}
#[test]
fn test_path_with_suffix_extension() {
let p = Utf8PathBuf::from("/foo/bar");
assert_eq!(
&path_with_suffix_extension(p, "temp").to_string(),
"/foo/bar.temp"
);
let p = Utf8PathBuf::from("/foo/bar");
assert_eq!(
&path_with_suffix_extension(p, "temp.temp").to_string(),
"/foo/bar.temp.temp"
);
let p = Utf8PathBuf::from("/foo/bar.baz");
assert_eq!(
&path_with_suffix_extension(p, "temp.temp").to_string(),
"/foo/bar.baz.temp.temp"
);
let p = Utf8PathBuf::from("/foo/bar.baz");
assert_eq!(
&path_with_suffix_extension(p, ".temp").to_string(),
"/foo/bar.baz..temp"
);
let p = Utf8PathBuf::from("/foo/bar/dir/");
assert_eq!(
&path_with_suffix_extension(p, ".temp").to_string(),
"/foo/bar/dir..temp"
);
}
}