mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-04 12:02:55 +00:00
pageserver: improve debuggability of timeline creation failures during chaos testing (#11300)
## Problem We're seeing timeline creation failures that look suspiciously like some race with the cleanup-deletion of initdb temporary directories. I couldn't spot the bug, but we can make it a bit easier to debug. Related: https://github.com/neondatabase/neon/issues/11296 ## Summary of changes - Avoid surfacing distracting ENOENT failure to delete as a log error -- this is fine, and can happen if timeline is cancelled while doing initdb, or if initdb itself has an error where it doesn't write the dir (this error is surfaced separately) - Log after purging initdb temp directories
This commit is contained in:
@@ -5095,14 +5095,17 @@ impl Tenant {
|
||||
fs::remove_dir_all(&pgdata_path).with_context(|| {
|
||||
format!("Failed to remove already existing initdb directory: {pgdata_path}")
|
||||
})?;
|
||||
tracing::info!("removed previous attempt's temporary initdb directory '{pgdata_path}'");
|
||||
}
|
||||
|
||||
// this new directory is very temporary, set to remove it immediately after bootstrap, we don't need it
|
||||
let pgdata_path_deferred = pgdata_path.clone();
|
||||
scopeguard::defer! {
|
||||
if let Err(e) = fs::remove_dir_all(&pgdata_path_deferred) {
|
||||
if let Err(e) = fs::remove_dir_all(&pgdata_path_deferred).or_else(fs_ext::ignore_not_found) {
|
||||
// this is unlikely, but we will remove the directory on pageserver restart or another bootstrap call
|
||||
error!("Failed to remove temporary initdb directory '{pgdata_path_deferred}': {e}");
|
||||
} else {
|
||||
tracing::info!("removed temporary initdb directory '{pgdata_path_deferred}'");
|
||||
}
|
||||
}
|
||||
if let Some(existing_initdb_timeline_id) = load_existing_initdb {
|
||||
|
||||
Reference in New Issue
Block a user