safekeeper: fsync filesystem on start.

We can't really rely on files contents after boot without fsync'ing
them.
This commit is contained in:
Arseny Sher
2024-09-06 17:40:21 +03:00
committed by Arseny Sher
parent 8eab7009c1
commit c1a51416db
4 changed files with 35 additions and 20 deletions

View File

@@ -1,9 +1,11 @@
use std::os::fd::AsRawFd;
use std::{
borrow::Cow,
fs::{self, File},
io::{self, Write},
};
use anyhow::Context;
use camino::{Utf8Path, Utf8PathBuf};
/// Similar to [`std::fs::create_dir`], except we fsync the
@@ -203,6 +205,25 @@ pub fn overwrite(
Ok(())
}
/// Syncs the filesystem for the given file descriptor.
pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> {
// Linux guarantees durability for syncfs.
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
#[cfg(target_os = "linux")]
{
nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?;
}
#[cfg(target_os = "macos")]
{
// macOS is not a production platform for Neon, don't even bother.
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
{
compile_error!("Unsupported OS");
}
Ok(())
}
#[cfg(test)]
mod tests {

View File

@@ -37,6 +37,7 @@ use pageserver::{
virtual_file,
};
use postgres_backend::AuthType;
use utils::crashsafe::syncfs;
use utils::failpoint_support;
use utils::logging::TracingErrorLayerEnablement;
use utils::{
@@ -155,23 +156,7 @@ fn main() -> anyhow::Result<()> {
};
let started = Instant::now();
// Linux guarantees durability for syncfs.
// POSIX doesn't have syncfs, and further does not actually guarantee durability of sync().
#[cfg(target_os = "linux")]
{
use std::os::fd::AsRawFd;
nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?;
}
#[cfg(target_os = "macos")]
{
// macOS is not a production platform for Neon, don't even bother.
drop(dirfd);
}
#[cfg(not(any(target_os = "linux", target_os = "macos")))]
{
compile_error!("Unsupported OS");
}
syncfs(dirfd)?;
let elapsed = started.elapsed();
info!(
elapsed_ms = elapsed.as_millis(),

View File

@@ -19,7 +19,7 @@ use std::fs::{self, File};
use std::io::{ErrorKind, Write};
use std::str::FromStr;
use std::sync::Arc;
use std::time::Duration;
use std::time::{Duration, Instant};
use storage_broker::Uri;
use tracing::*;
@@ -373,6 +373,16 @@ async fn main() -> anyhow::Result<()> {
type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
// fsync the datadir to make sure we have a consistent state on disk.
let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
let started = Instant::now();
utils::crashsafe::syncfs(dfd)?;
let elapsed = started.elapsed();
info!(
elapsed_ms = elapsed.as_millis(),
"syncfs data directory done"
);
info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
error!("failed to bind to address {}: {}", conf.listen_pg_addr, e);

View File

@@ -179,8 +179,7 @@ impl PhysicalStorage {
)
};
// TODO: do we really know that write_lsn is fully flushed to disk?
// If not, maybe it's better to call fsync() here to be sure?
// note: this assumes we fsync'ed whole datadir on start.
let flush_lsn = write_lsn;
debug!(