diff --git a/libs/utils/src/crashsafe.rs b/libs/utils/src/crashsafe.rs index 756b19138c..946fedf6e5 100644 --- a/libs/utils/src/crashsafe.rs +++ b/libs/utils/src/crashsafe.rs @@ -1,9 +1,11 @@ +use std::os::fd::AsRawFd; use std::{ borrow::Cow, fs::{self, File}, io::{self, Write}, }; +use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; /// Similar to [`std::fs::create_dir`], except we fsync the @@ -203,6 +205,25 @@ pub fn overwrite( Ok(()) } +/// Syncs the filesystem for the given file descriptor. +pub fn syncfs(fd: impl AsRawFd) -> anyhow::Result<()> { + // Linux guarantees durability for syncfs. + // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync(). + #[cfg(target_os = "linux")] + { + nix::unistd::syncfs(fd.as_raw_fd()).context("syncfs")?; + } + #[cfg(target_os = "macos")] + { + // macOS is not a production platform for Neon, don't even bother. + } + #[cfg(not(any(target_os = "linux", target_os = "macos")))] + { + compile_error!("Unsupported OS"); + } + Ok(()) +} + #[cfg(test)] mod tests { diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 59194ab4bd..d15a0e47a4 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -37,6 +37,7 @@ use pageserver::{ virtual_file, }; use postgres_backend::AuthType; +use utils::crashsafe::syncfs; use utils::failpoint_support; use utils::logging::TracingErrorLayerEnablement; use utils::{ @@ -155,23 +156,7 @@ fn main() -> anyhow::Result<()> { }; let started = Instant::now(); - // Linux guarantees durability for syncfs. - // POSIX doesn't have syncfs, and further does not actually guarantee durability of sync(). - #[cfg(target_os = "linux")] - { - use std::os::fd::AsRawFd; - nix::unistd::syncfs(dirfd.as_raw_fd()).context("syncfs")?; - } - #[cfg(target_os = "macos")] - { - // macOS is not a production platform for Neon, don't even bother. - drop(dirfd); - } - #[cfg(not(any(target_os = "linux", target_os = "macos")))] - { - compile_error!("Unsupported OS"); - } - + syncfs(dirfd)?; let elapsed = started.elapsed(); info!( elapsed_ms = elapsed.as_millis(), diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 644d5e6eaf..5270934f5e 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -19,7 +19,7 @@ use std::fs::{self, File}; use std::io::{ErrorKind, Write}; use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; +use std::time::{Duration, Instant}; use storage_broker::Uri; use tracing::*; @@ -373,6 +373,16 @@ async fn main() -> anyhow::Result<()> { type JoinTaskRes = Result, JoinError>; async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> { + // fsync the datadir to make sure we have a consistent state on disk. + let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?; + let started = Instant::now(); + utils::crashsafe::syncfs(dfd)?; + let elapsed = started.elapsed(); + info!( + elapsed_ms = elapsed.as_millis(), + "syncfs data directory done" + ); + info!("starting safekeeper WAL service on {}", conf.listen_pg_addr); let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| { error!("failed to bind to address {}: {}", conf.listen_pg_addr, e); diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index c477fe5c7b..46c260901d 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -179,8 +179,7 @@ impl PhysicalStorage { ) }; - // TODO: do we really know that write_lsn is fully flushed to disk? - // If not, maybe it's better to call fsync() here to be sure? + // note: this assumes we fsync'ed whole datadir on start. let flush_lsn = write_lsn; debug!(