From 487f3202feb740fe71d8e4bf539befa676e5372e Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 13 Feb 2025 21:53:39 +0100 Subject: [PATCH] pageserver read path: abort on fatal IO errors from disk / filesystem (#10786) Before this PR, an IO error returned from the kernel, e.g., due to a bad disk, would get bubbled up, all the way to a user-visible query failing. This is against the IO error handling policy where we have established and is hence being rectified in this PR. [[(internal Policy document link)]](https://github.com/neondatabase/docs/blob/bef44149f746d6705c709b6d9c5e342c0ecac49c/src/storage/handling_io_and_logical_errors.md#L33-L35) The practice on the write path seems to be that we call `maybe_fatal_err()` or `fatal_err()` fairly high up the stack. That is, regardless of whether std::fs, tokio::fs, or VirtualFile is used to perform the IO. For the read path, I choose a centralized approach in this PR by checking for errors as close to the kernel interface as possible. I believe this is better for long-term consistency. To mitigate the problem of missing context if we abort so far down in the stack, the `on_fatal_io_error` now captures and logs a backtrace. I grepped the pageserver code base for `fs::read` to convince myself that all non-VirtualFile reads already handle IO errors according to policy. Refs - fixes https://github.com/neondatabase/neon/issues/10454 --- pageserver/src/virtual_file.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 9d539198c7..c966ad813f 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -496,7 +496,8 @@ pub(crate) fn is_fatal_io_error(e: &std::io::Error) -> bool { /// bad storage or bad configuration, and we can't fix that from inside /// a running process. pub(crate) fn on_fatal_io_error(e: &std::io::Error, context: &str) -> ! { - tracing::error!("Fatal I/O error: {e}: {context})"); + let backtrace = std::backtrace::Backtrace::force_capture(); + tracing::error!("Fatal I/O error: {e}: {context})\n{backtrace}"); std::process::abort(); } @@ -947,13 +948,18 @@ impl VirtualFileInner { where Buf: tokio_epoll_uring::IoBufMut + Send, { - let file_guard = match self.lock_file().await { + let file_guard = match self + .lock_file() + .await + .maybe_fatal_err("lock_file inside VirtualFileInner::read_at") + { Ok(file_guard) => file_guard, Err(e) => return (buf, Err(e)), }; observe_duration!(StorageIoOperation::Read, { let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await; + let res = res.maybe_fatal_err("io_engine read_at inside VirtualFileInner::read_at"); if let Ok(size) = res { STORAGE_IO_SIZE .with_label_values(&[