From cde1654d7ba38881b2e34c2b4c2e15392726a190 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Fri, 27 Sep 2024 14:58:50 -0400 Subject: [PATCH] fix(pageserver): abort process if fsync fails (#9108) close https://github.com/neondatabase/neon/issues/8140 The original issue is rather vague on what we should do. After discussion w/ @problame we decided to narrow down the problems we want to solve in that issue. * read path -- do not panic for now. * write path -- panic only on write errors (i.e., device error, fsync error), but not on no-space for now. The guideline is that if the pageserver behavior could lead to violation of persistent constraints (i.e., return an operation as successful but not actually persisting things), we should panic. Fsync is the place where both of us agree that we should panic, because if fsync fails, the kernel will mark dirty pages as clean, and the next fsync will not necessarily return false. This would make the storage client assume the operation is successful. ## Summary of changes Make fsync panic on fatal errors. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/mgr.rs | 6 +++++- pageserver/src/tenant/remote_timeline_client/download.rs | 3 +++ pageserver/src/tenant/storage_layer/delta_layer.rs | 6 ++++-- pageserver/src/tenant/storage_layer/image_layer.rs | 6 ++++-- pageserver/src/virtual_file.rs | 5 +++-- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index f6249056d8..c7212e89ba 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -219,7 +219,11 @@ async fn safe_rename_tenant_dir(path: impl AsRef) -> std::io::Result( destination_file .flush() .await + .maybe_fatal_err("download_object sync_all") .with_context(|| format!("flush source file at {dst_path}")) .map_err(DownloadError::Other)?; @@ -185,6 +186,7 @@ async fn download_object<'a>( destination_file .sync_all() .await + .maybe_fatal_err("download_object sync_all") .with_context(|| format!("failed to fsync source file at {dst_path}")) .map_err(DownloadError::Other)?; @@ -232,6 +234,7 @@ async fn download_object<'a>( destination_file .sync_all() .await + .maybe_fatal_err("download_object sync_all") .with_context(|| format!("failed to fsync source file at {dst_path}")) .map_err(DownloadError::Other)?; diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 9241ff569c..6f9eda85f5 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -44,7 +44,7 @@ use crate::tenant::vectored_blob_io::{ }; use crate::tenant::PageReconstructError; use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt}; -use crate::virtual_file::{self, VirtualFile}; +use crate::virtual_file::{self, MaybeFatalIo, VirtualFile}; use crate::{walrecord, TEMP_FILE_SUFFIX}; use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION}; use anyhow::{anyhow, bail, ensure, Context, Result}; @@ -589,7 +589,9 @@ impl DeltaLayerWriterInner { ); // fsync the file - file.sync_all().await?; + file.sync_all() + .await + .maybe_fatal_err("delta_layer sync_all")?; trace!("created delta layer {}", self.path); diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 940d169db0..3dcd7bc962 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -41,7 +41,7 @@ use crate::tenant::vectored_blob_io::{ }; use crate::tenant::PageReconstructError; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; -use crate::virtual_file::{self, VirtualFile}; +use crate::virtual_file::{self, MaybeFatalIo, VirtualFile}; use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX}; use anyhow::{anyhow, bail, ensure, Context, Result}; use bytes::{Bytes, BytesMut}; @@ -889,7 +889,9 @@ impl ImageLayerWriterInner { // set inner.file here. The first read will have to re-open it. // fsync the file - file.sync_all().await?; + file.sync_all() + .await + .maybe_fatal_err("image_layer sync_all")?; trace!("created image layer {}", self.path); diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 17d9f94c95..5b7b279888 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -466,6 +466,7 @@ impl VirtualFile { &[] }; utils::crashsafe::overwrite(&final_path, &tmp_path, content) + .maybe_fatal_err("crashsafe_overwrite") }) .await .expect("blocking task is never aborted") @@ -475,7 +476,7 @@ impl VirtualFile { pub async fn sync_all(&self) -> Result<(), Error> { with_file!(self, StorageIoOperation::Fsync, |file_guard| { let (_file_guard, res) = io_engine::get().sync_all(file_guard).await; - res + res.maybe_fatal_err("sync_all") }) } @@ -483,7 +484,7 @@ impl VirtualFile { pub async fn sync_data(&self) -> Result<(), Error> { with_file!(self, StorageIoOperation::Fsync, |file_guard| { let (_file_guard, res) = io_engine::get().sync_data(file_guard).await; - res + res.maybe_fatal_err("sync_data") }) }