From 1684bf36c8f8875940b5259433cffb999b595750 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 7 Jun 2023 19:28:08 +0200 Subject: [PATCH] tenant load: make tempfile/tempdir cleanup logic bail out if it fails Before this patch, when loading a tenant, we'd iterate the timeline directory to figure out which timelines the tenant has, and also remove temporary entries in the timelines directory that are created during timeline creation. When we would fail to clean up temporary entries, we'd log an error but continue. This error-ignoring behavior forces all code that runs later, e.g., timeline creation code, to idempotently re-try those cleanups. It's much cleaner to remove such temp entries once during startup, then expect them to be absent later. Why is that? Because 1. it de-clutters the not-startup code 2. we can use O_EXCL when we create the uninit marker, giving us extra robustness in depth that we're not creating the same timeline concurrently 3. same exclusive-op argument for when we create the initdb dir This patch refactors the timeline loading code to a fix-point iteration loop that does the cleanups. If a cleanup fails, we bail out. Once we've reached the fixpoint (no temp stuff left), we assume each remaining entry is a timeline dir, parse it into a TimelineId, and continue the loading as usual. --- pageserver/src/tenant.rs | 160 ++++++++++---------- test_runner/regress/test_broken_timeline.py | 111 ++++++++++++++ 2 files changed, 190 insertions(+), 81 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 95393c380a..5c31e6f0b0 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -29,6 +29,7 @@ use std::collections::BTreeSet; use std::collections::HashMap; use std::ffi::OsStr; use std::fs; +use std::fs::DirEntry; use std::fs::File; use std::fs::OpenOptions; use std::io; @@ -960,96 +961,93 @@ impl Tenant { let tenant_id = self.tenant_id; let conf = self.conf; let span = info_span!("blocking"); - + let myself = Arc::clone(self); let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || { let _g = span.entered(); - let mut timelines_to_load: HashMap = HashMap::new(); let timelines_dir = conf.timelines_path(&tenant_id); - for entry in - std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")? - { - let entry = entry.context("read timeline dir entry")?; - let timeline_dir = entry.path(); + // Prune out all the temporary directories, if any, until only (fully initialized) timeline dirs remain. + // This is essentially a fix-point operation. + // + // If we fail to do cleanup, we bail out. It's better to bail out here than + // to require all later code (timeline create) to deal with leftover temp entries. + // The blast radius is constrained to this tenant, so, it's not too bad. + let entries: Vec = loop { + let mut entries = Vec::new(); + for entry in std::fs::read_dir(&timelines_dir).with_context(|| { + format!( + "Failed to list timelines directory for tenant {}", + myself.tenant_id + ) + })? { + let entry = entry.with_context(|| { + format!("cannot read timeline dir entry for {}", myself.tenant_id) + })?; + entries.push(entry); + } - if crate::is_temporary(&timeline_dir) { - info!( - "Found temporary timeline directory, removing: {}", - timeline_dir.display() - ); - if let Err(e) = std::fs::remove_dir_all(&timeline_dir) { - error!( - "Failed to remove temporary directory '{}': {:?}", - timeline_dir.display(), - e - ); - } - } else if is_uninit_mark(&timeline_dir) { - let timeline_uninit_mark_file = &timeline_dir; - info!( - "Found an uninit mark file {}, removing the timeline and its uninit mark", - timeline_uninit_mark_file.display() - ); - let timeline_id = timeline_uninit_mark_file - .file_stem() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline uninit mark name {}", - timeline_uninit_mark_file.display() - ) - })?; - let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id); - if let Err(e) = - remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file) - { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - } else { - let timeline_id = timeline_dir - .file_name() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline dir name {}", - timeline_dir.display() - ) - })?; - let timeline_uninit_mark_file = - conf.timeline_uninit_mark_file_path(tenant_id, timeline_id); - if timeline_uninit_mark_file.exists() { + let mut removed_something = false; + for entry in &entries { + let timeline_dir = entry.path(); + if crate::is_temporary(&timeline_dir) { + // This branch is for the temporary dir created by the bootstrap_timeline procedure. + // It should always start with basebackup-.*. info!( - %timeline_id, - "Found an uninit mark file, removing the timeline and its uninit mark", + "Found temporary directory of timeline_bootstrap, removing: {}", + timeline_dir.display() ); - if let Err(e) = remove_timeline_and_uninit_mark( - &timeline_dir, - &timeline_uninit_mark_file, - ) { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - continue; - } - - let file_name = entry.file_name(); - if let Ok(timeline_id) = - file_name.to_str().unwrap_or_default().parse::() - { - let metadata = load_metadata(conf, timeline_id, tenant_id) - .context("failed to load metadata")?; - timelines_to_load.insert(timeline_id, metadata); - } else { - // A file or directory that doesn't look like a timeline ID - warn!( - "unexpected file or directory in timelines directory: {}", - file_name.to_string_lossy() + std::fs::remove_dir_all(&timeline_dir).with_context(|| format!("remove temporary timeline_bootstrap dir {timeline_dir:?}"))?; + removed_something = true; + } else if is_uninit_mark(&timeline_dir) { + // This branch is to remove timelines that didn't finish creating. + let timeline_uninit_mark_file = &timeline_dir; + info!( + "Found an uninit mark file {}, removing the timeline and its uninit mark", + timeline_uninit_mark_file.display() ); + let timeline_id = timeline_uninit_mark_file + .file_stem() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline uninit mark name {}", + timeline_uninit_mark_file.display() + ) + })?; + let timeline_dir = myself.conf.timeline_path(&timeline_id, &myself.tenant_id); + remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)?; + removed_something = true; } } + + if removed_something { + continue; + } + + break entries; + }; + + let mut timelines_to_load: HashMap = HashMap::new(); + for entry in entries { + let timeline_dir = entry.path(); + assert!(!crate::is_temporary(&timeline_dir), "removed above"); + assert!(!is_uninit_mark(&timeline_dir), "removed above"); + let timeline_id = timeline_dir + .file_name() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline dir name {}", + timeline_dir.display() + ) + })?; + let metadata = load_metadata(myself.conf, timeline_id, myself.tenant_id) + .context("failed to load metadata")?; + timelines_to_load.insert(timeline_id, metadata); } // Sort the array of timeline IDs into tree-order, so that parent comes before @@ -2978,7 +2976,7 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a }) .with_context(|| { format!( - "Failed to remove unit marked timeline directory {}", + "Failed to remove uninit marked timeline directory {}", timeline_dir.display() ) })?; diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index 0fb3b4f262..901ee060cc 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -1,10 +1,12 @@ import concurrent.futures import os +import re from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder +from fixtures.pageserver.utils import wait_until_tenant_state from fixtures.types import TenantId, TimelineId @@ -160,6 +162,115 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv): ), "pageserver should clean its temp timeline files on timeline creation failure" +def test_timeline_init_crash_restart_cleans_up_intermediate_files(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, _ = env.neon_cli.create_tenant() + + timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" + old_tenant_timelines = env.neon_cli.list_timelines(tenant_id) + initial_timeline_dirs = [d for d in timelines_dir.iterdir()] + + # simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed. + pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit")) + with pytest.raises(Exception): + _ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id) + # it should already be dead, any way to check that? + env.neon_cli.pageserver_stop(immediate=True) + + # self-test: ensure that we crashed the pageserver whith intermediate files present + crashed_timeline_dirs = [d for d in timelines_dir.iterdir()] + assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs)) + new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs) + expected = { + "initdb dir": r"^basebackup-.*\.___temp$", + "timeline dir": r"^[0-9a-f]+$", + "timeline uninit marker file": r"^[0-9a-f]+\.___uninit$", + } + assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}" + for new_dir in new_dirs: + for direntry_type, pattern in expected.items(): + if re.match(pattern, new_dir.name): + expected.pop(direntry_type) + break + assert len(expected.keys()) == 0, f"unexpected new directories: {expected}" + + # start the pageserver and assert that it cleans up + env.neon_cli.pageserver_start() + + # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally. + new_tenant_timelines = env.neon_cli.list_timelines(tenant_id) + assert ( + new_tenant_timelines == old_tenant_timelines + ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}" + + timeline_dirs = [d for d in timelines_dir.iterdir()] + assert ( + timeline_dirs == initial_timeline_dirs + ), "pageserver should clean its temp timeline files on timeline creation failure" + + +def test_timeline_init_crash_removal_failure_breaks_tenant(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, _ = env.neon_cli.create_tenant() + + timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" + initial_timeline_dirs = [d for d in timelines_dir.iterdir()] + + # simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed. + pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit")) + with pytest.raises(Exception): + _ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id) + # it should already be dead, any way to check that? + env.neon_cli.pageserver_stop(immediate=True) + + # self-test: ensure that we crashed the pageserver whith intermediate files present + crashed_timeline_dirs = [d for d in timelines_dir.iterdir()] + assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs)) + new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs) + expected = { + "initdb dir": r"^basebackup-.*\.___temp$", + "timeline dir": r"^[0-9a-f]+$", + "timeline uninit marker file": r"^[0-9a-f]+\.___uninit$", + } + assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}" + for new_dir in new_dirs: + for direntry_type, pattern in expected.items(): + if re.match(pattern, new_dir.name): + expected.pop(direntry_type) + break + assert len(expected.keys()) == 0, f"unexpected new directories: {expected}" + + try: + # make the directory unremovable + for new_dir in new_dirs: + new_dir.chmod(0o444) + + # start the pageserver, it should attempt the removal + env.neon_cli.pageserver_start() + # and the removal should fail, causing the tenant to be Broken + wait_until_tenant_state(pageserver_http, tenant_id, "Broken", 5) + + # inspect the reason + status = pageserver_http.tenant_status(tenant_id) + assert "remove" in status["state"]["data"]["reason"] + log_line = r".*load.*Failed to remove uninit marked timeline directory" + assert env.pageserver.log_contains(log_line) + env.pageserver.allowed_errors.append(log_line) + + env.neon_cli.pageserver_stop(immediate=True) + finally: + # make the directory removable again + for new_dir in new_dirs: + new_dir.chmod(0o755) + + env.neon_cli.pageserver_start() + wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5) + + def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http = env.pageserver.http_client()