From 85f0867db85783145c45ef79865329578b04be8a Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 24 Nov 2023 09:04:06 +0000 Subject: [PATCH] failure during timeline creation: always clean up timeline dir Context: https://app.incident.io/neondb/incidents/103 Before this change, there's the following race condition if the tenant gets deleted during timeline creation: TBD, copy from https://neondb.slack.com/archives/C066ZFAJU85/p1700751858049319 The root of the problem is that we bail out of timeline creation without cleaning up the uninit marker or timeline directory. This PR changes the code to do that, and with our new policy for infallible local IO, it also takes the opportunity to clean stuff up around `TimelineUninitMark` & `UninitializedTimeline`. I also added a missing fsync of the common parent directory between timelines_dir removal and uninit marker removal. We could probably get rid of the entire uninit mark idea, as we no longer treat local FS state as the source of truth, and we only upload to remote storage after successful creation (right?). --- pageserver/src/tenant.rs | 19 +-- pageserver/src/tenant/timeline/uninit.rs | 183 +++++++++++++++-------- 2 files changed, 123 insertions(+), 79 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index ef14f75a47..8f98afdc8f 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -86,7 +86,6 @@ use crate::tenant::storage_layer::ImageLayer; use crate::InitializationOrder; use crate::tenant::timeline::delete::DeleteTimelineFlow; -use crate::tenant::timeline::uninit::cleanup_timeline_directory; use crate::virtual_file::VirtualFile; use crate::walredo::PostgresRedoManager; use crate::TEMP_FILE_SUFFIX; @@ -3091,7 +3090,7 @@ impl Tenant { .await { error!("Failed to create initial files for timeline {tenant_id}/{new_timeline_id}, cleaning up: {e:?}"); - cleanup_timeline_directory(uninit_mark); + drop(uninit_mark); // does the cleanup return Err(e); } @@ -3143,20 +3142,8 @@ impl Tenant { "Timeline {timeline_path} already exists, cannot create its uninit mark file", ); - let uninit_mark_path = self - .conf - .timeline_uninit_mark_file_path(tenant_id, timeline_id); - fs::File::create(&uninit_mark_path) - .context("Failed to create uninit mark file") - .and_then(|_| { - crashsafe::fsync_file_and_parent(&uninit_mark_path) - .context("Failed to fsync uninit mark file") - }) - .with_context(|| { - format!("Failed to crate uninit mark for timeline {tenant_id}/{timeline_id}") - })?; - - let uninit_mark = TimelineUninitMark::new(uninit_mark_path, timeline_path); + let uninit_mark = TimelineUninitMark::new(self.conf, tenant_id, timeline_id) + .context("create uninit mark")?; Ok(uninit_mark) } diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs index 6b68fdeb84..524d65f83d 100644 --- a/pageserver/src/tenant/timeline/uninit.rs +++ b/pageserver/src/tenant/timeline/uninit.rs @@ -2,10 +2,20 @@ use std::{collections::hash_map::Entry, fs, sync::Arc}; use anyhow::Context; use camino::Utf8PathBuf; -use tracing::{error, info, info_span, warn}; -use utils::{crashsafe, fs_ext, id::TimelineId, lsn::Lsn}; +use tracing::{info, info_span, warn}; +use utils::{ + crashsafe, + id::{TenantId, TimelineId}, + lsn::Lsn, +}; -use crate::{context::RequestContext, import_datadir, tenant::Tenant}; +use crate::{ + config::PageServerConf, + context::RequestContext, + import_datadir, + tenant::Tenant, + virtual_file::{on_fatal_io_error, MaybeFatalIo}, +}; use super::Timeline; @@ -62,13 +72,8 @@ impl<'t> UninitializedTimeline<'t> { "Found freshly initialized timeline {tenant_id}/{timeline_id} in the tenant map" ), Entry::Vacant(v) => { - uninit_mark.remove_uninit_mark().with_context(|| { - format!( - "Failed to remove uninit mark file for timeline {tenant_id}/{timeline_id}" - ) - })?; + uninit_mark.remove_uninit_mark(); v.insert(Arc::clone(&new_timeline)); - new_timeline.maybe_spawn_flush_loop(); } } @@ -126,29 +131,6 @@ impl<'t> UninitializedTimeline<'t> { } } -impl Drop for UninitializedTimeline<'_> { - fn drop(&mut self) { - if let Some((_, uninit_mark)) = self.raw_timeline.take() { - let _entered = info_span!("drop_uninitialized_timeline", tenant_id = %self.owning_tenant.tenant_id, timeline_id = %self.timeline_id).entered(); - error!("Timeline got dropped without initializing, cleaning its files"); - cleanup_timeline_directory(uninit_mark); - } - } -} - -pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) { - let timeline_path = &uninit_mark.timeline_path; - match fs_ext::ignore_absent_files(|| fs::remove_dir_all(timeline_path)) { - Ok(()) => { - info!("Timeline dir {timeline_path:?} removed successfully, removing the uninit mark") - } - Err(e) => { - error!("Failed to clean up uninitialized timeline directory {timeline_path:?}: {e:?}") - } - } - drop(uninit_mark); // mark handles its deletion on drop, gets retained if timeline dir exists -} - /// An uninit mark file, created along the timeline dir to ensure the timeline either gets fully initialized and loaded into pageserver's memory, /// or gets removed eventually. /// @@ -158,58 +140,133 @@ pub(crate) struct TimelineUninitMark { uninit_mark_deleted: bool, uninit_mark_path: Utf8PathBuf, pub(crate) timeline_path: Utf8PathBuf, + common_parent: Utf8PathBuf, } impl TimelineUninitMark { - pub(crate) fn new(uninit_mark_path: Utf8PathBuf, timeline_path: Utf8PathBuf) -> Self { - Self { + pub(crate) fn new( + conf: &'static PageServerConf, + tenant_id: TenantId, + timeline_id: TimelineId, + ) -> anyhow::Result { + let timeline_path = conf.timeline_path(&tenant_id, &timeline_id); + let uninit_mark_path = conf.timeline_uninit_mark_file_path(tenant_id, timeline_id); + + // assert they share the same parent + let timeline_parent_path = timeline_path + .parent() + .expect("timeline_path must have a parent"); + let uninit_mark_parent_path = uninit_mark_path + .parent() + .expect("uninit mark path must have a parent"); + assert_eq!(timeline_parent_path, uninit_mark_parent_path); + let common_parent = uninit_mark_parent_path; + + // crate the uninit file + let _ = fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&uninit_mark_path) + .context("create uninit mark file")?; + crashsafe::fsync_file_and_parent(&common_parent).context("fsync uninit mark file")?; + + Ok(Self { uninit_mark_deleted: false, + common_parent: common_parent.to_owned(), uninit_mark_path, timeline_path, - } + }) } - fn remove_uninit_mark(mut self) -> anyhow::Result<()> { - if !self.uninit_mark_deleted { - self.delete_mark_file_if_present()?; - } + fn remove_uninit_mark(mut self) { + // remove the uninit mark + fs::remove_file(&self.uninit_mark_path).fatal_err(&format!( + "TimelineUninitMark::drop: remove_file uninit mark: {}", + self.uninit_mark_path + )); - Ok(()) - } + // fsync to persist the removal + crashsafe::fsync(&self.common_parent).fatal_err(&format!( + "TimelineUninitMark::drop: fsync common parent dir: {}", + self.common_parent + )); - fn delete_mark_file_if_present(&mut self) -> anyhow::Result<()> { - let uninit_mark_file = &self.uninit_mark_path; - let uninit_mark_parent = uninit_mark_file - .parent() - .with_context(|| format!("Uninit mark file {uninit_mark_file:?} has no parent"))?; - fs_ext::ignore_absent_files(|| fs::remove_file(uninit_mark_file)).with_context(|| { - format!("Failed to remove uninit mark file at path {uninit_mark_file:?}") - })?; - crashsafe::fsync(uninit_mark_parent).context("Failed to fsync uninit mark parent")?; self.uninit_mark_deleted = true; - - Ok(()) } } impl Drop for TimelineUninitMark { fn drop(&mut self) { if !self.uninit_mark_deleted { - if self.timeline_path.exists() { - error!( - "Uninit mark {} is not removed, timeline {} stays uninitialized", - self.uninit_mark_path, self.timeline_path - ) - } else { - // unblock later timeline creation attempts - warn!( - "Removing intermediate uninit mark file {}", + // unblock later timeline creation attempts + let _entered = + info_span!("TimelineUninitMark_drop", timeline_path=%self.timeline_path).entered(); + warn!("removing timeline dir and uninit mark file"); + + // sanity-check: ensure the uninit mark file still exists on disk + let uninit_mark_file_exists = self.uninit_mark_path.try_exists().fatal_err(&format!( + "TimelineUninitMark::drop: stat() uninit mark file: {}", + self.uninit_mark_path + )); + if !uninit_mark_file_exists { + panic!( + "uninit mark file assumed to exists but doesn't: {}", self.uninit_mark_path ); - if let Err(e) = self.delete_mark_file_if_present() { - error!("Failed to remove the uninit mark file: {e}") + } + + // recursively delete `timeline_path`, ignoring NotFound errors and aborting the process on all others. + match fs::remove_dir_all(&self.timeline_path) { + Ok(()) => { + info!("timeline dir removed successfully"); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // this can happen both if the timeline_path does not exist + // and if the timeline_path exists and there's another thread + // still operating on that directory and our remove_dir_all call + // effectively got hit by time-of-check vs time-of-use. + // Disambiguate by calling remove_dir against the timeline_path + match std::fs::remove_dir(&self.timeline_path) { + Ok(()) => { + warn!("retrying timeline dir removal succeeded after NotFound, this is indicative of a race condition"); + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + // this is the good case: the first NotFound was because the dir didn't exist + info!("timeline dir does not exist"); + } + Err(e) => { + on_fatal_io_error(&e, &format!("TimelineUninitMark::drop: remove_dir_all failed with NotFound, then remove_dir failed: {}", self.timeline_path)); + } + } + } + Err(e) => { + on_fatal_io_error( + &e, + &format!( + "TimelineUninitMark::drop: delete timeline directory: {:?}", + self.timeline_path + ), + ); } } + + // fsync to order timelines_dir removal before unint mark removal + crashsafe::fsync(&self.common_parent).fatal_err(&format!( + "TimelineUninitMark::drop: fsync after timeline dir removal: {}", + self.common_parent, + )); + + // remove the uninit mark + fs::remove_file(&self.common_parent).fatal_err(&format!( + "TimelineUninitMark::drop: remove_file uninit mark: {}", + self.common_parent, + )); + + // fsync to persist the removal + crashsafe::fsync(&self.common_parent).fatal_err(&format!( + "TimelineUninitMark::drop: fsync common parent dir: {}", + self.common_parent, + )); } } }