diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 95393c380a..5c31e6f0b0 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -29,6 +29,7 @@ use std::collections::BTreeSet; use std::collections::HashMap; use std::ffi::OsStr; use std::fs; +use std::fs::DirEntry; use std::fs::File; use std::fs::OpenOptions; use std::io; @@ -960,96 +961,93 @@ impl Tenant { let tenant_id = self.tenant_id; let conf = self.conf; let span = info_span!("blocking"); - + let myself = Arc::clone(self); let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || { let _g = span.entered(); - let mut timelines_to_load: HashMap = HashMap::new(); let timelines_dir = conf.timelines_path(&tenant_id); - for entry in - std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")? - { - let entry = entry.context("read timeline dir entry")?; - let timeline_dir = entry.path(); + // Prune out all the temporary directories, if any, until only (fully initialized) timeline dirs remain. + // This is essentially a fix-point operation. + // + // If we fail to do cleanup, we bail out. It's better to bail out here than + // to require all later code (timeline create) to deal with leftover temp entries. + // The blast radius is constrained to this tenant, so, it's not too bad. + let entries: Vec = loop { + let mut entries = Vec::new(); + for entry in std::fs::read_dir(&timelines_dir).with_context(|| { + format!( + "Failed to list timelines directory for tenant {}", + myself.tenant_id + ) + })? { + let entry = entry.with_context(|| { + format!("cannot read timeline dir entry for {}", myself.tenant_id) + })?; + entries.push(entry); + } - if crate::is_temporary(&timeline_dir) { - info!( - "Found temporary timeline directory, removing: {}", - timeline_dir.display() - ); - if let Err(e) = std::fs::remove_dir_all(&timeline_dir) { - error!( - "Failed to remove temporary directory '{}': {:?}", - timeline_dir.display(), - e - ); - } - } else if is_uninit_mark(&timeline_dir) { - let timeline_uninit_mark_file = &timeline_dir; - info!( - "Found an uninit mark file {}, removing the timeline and its uninit mark", - timeline_uninit_mark_file.display() - ); - let timeline_id = timeline_uninit_mark_file - .file_stem() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline uninit mark name {}", - timeline_uninit_mark_file.display() - ) - })?; - let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id); - if let Err(e) = - remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file) - { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - } else { - let timeline_id = timeline_dir - .file_name() - .and_then(OsStr::to_str) - .unwrap_or_default() - .parse::() - .with_context(|| { - format!( - "Could not parse timeline id out of the timeline dir name {}", - timeline_dir.display() - ) - })?; - let timeline_uninit_mark_file = - conf.timeline_uninit_mark_file_path(tenant_id, timeline_id); - if timeline_uninit_mark_file.exists() { + let mut removed_something = false; + for entry in &entries { + let timeline_dir = entry.path(); + if crate::is_temporary(&timeline_dir) { + // This branch is for the temporary dir created by the bootstrap_timeline procedure. + // It should always start with basebackup-.*. info!( - %timeline_id, - "Found an uninit mark file, removing the timeline and its uninit mark", + "Found temporary directory of timeline_bootstrap, removing: {}", + timeline_dir.display() ); - if let Err(e) = remove_timeline_and_uninit_mark( - &timeline_dir, - &timeline_uninit_mark_file, - ) { - error!("Failed to clean up uninit marked timeline: {e:?}"); - } - continue; - } - - let file_name = entry.file_name(); - if let Ok(timeline_id) = - file_name.to_str().unwrap_or_default().parse::() - { - let metadata = load_metadata(conf, timeline_id, tenant_id) - .context("failed to load metadata")?; - timelines_to_load.insert(timeline_id, metadata); - } else { - // A file or directory that doesn't look like a timeline ID - warn!( - "unexpected file or directory in timelines directory: {}", - file_name.to_string_lossy() + std::fs::remove_dir_all(&timeline_dir).with_context(|| format!("remove temporary timeline_bootstrap dir {timeline_dir:?}"))?; + removed_something = true; + } else if is_uninit_mark(&timeline_dir) { + // This branch is to remove timelines that didn't finish creating. + let timeline_uninit_mark_file = &timeline_dir; + info!( + "Found an uninit mark file {}, removing the timeline and its uninit mark", + timeline_uninit_mark_file.display() ); + let timeline_id = timeline_uninit_mark_file + .file_stem() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline uninit mark name {}", + timeline_uninit_mark_file.display() + ) + })?; + let timeline_dir = myself.conf.timeline_path(&timeline_id, &myself.tenant_id); + remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)?; + removed_something = true; } } + + if removed_something { + continue; + } + + break entries; + }; + + let mut timelines_to_load: HashMap = HashMap::new(); + for entry in entries { + let timeline_dir = entry.path(); + assert!(!crate::is_temporary(&timeline_dir), "removed above"); + assert!(!is_uninit_mark(&timeline_dir), "removed above"); + let timeline_id = timeline_dir + .file_name() + .and_then(OsStr::to_str) + .unwrap_or_default() + .parse::() + .with_context(|| { + format!( + "Could not parse timeline id out of the timeline dir name {}", + timeline_dir.display() + ) + })?; + let metadata = load_metadata(myself.conf, timeline_id, myself.tenant_id) + .context("failed to load metadata")?; + timelines_to_load.insert(timeline_id, metadata); } // Sort the array of timeline IDs into tree-order, so that parent comes before @@ -2978,7 +2976,7 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a }) .with_context(|| { format!( - "Failed to remove unit marked timeline directory {}", + "Failed to remove uninit marked timeline directory {}", timeline_dir.display() ) })?; diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py index 0fb3b4f262..901ee060cc 100644 --- a/test_runner/regress/test_broken_timeline.py +++ b/test_runner/regress/test_broken_timeline.py @@ -1,10 +1,12 @@ import concurrent.futures import os +import re from typing import List, Tuple import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder +from fixtures.pageserver.utils import wait_until_tenant_state from fixtures.types import TenantId, TimelineId @@ -160,6 +162,115 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv): ), "pageserver should clean its temp timeline files on timeline creation failure" +def test_timeline_init_crash_restart_cleans_up_intermediate_files(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, _ = env.neon_cli.create_tenant() + + timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" + old_tenant_timelines = env.neon_cli.list_timelines(tenant_id) + initial_timeline_dirs = [d for d in timelines_dir.iterdir()] + + # simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed. + pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit")) + with pytest.raises(Exception): + _ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id) + # it should already be dead, any way to check that? + env.neon_cli.pageserver_stop(immediate=True) + + # self-test: ensure that we crashed the pageserver whith intermediate files present + crashed_timeline_dirs = [d for d in timelines_dir.iterdir()] + assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs)) + new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs) + expected = { + "initdb dir": r"^basebackup-.*\.___temp$", + "timeline dir": r"^[0-9a-f]+$", + "timeline uninit marker file": r"^[0-9a-f]+\.___uninit$", + } + assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}" + for new_dir in new_dirs: + for direntry_type, pattern in expected.items(): + if re.match(pattern, new_dir.name): + expected.pop(direntry_type) + break + assert len(expected.keys()) == 0, f"unexpected new directories: {expected}" + + # start the pageserver and assert that it cleans up + env.neon_cli.pageserver_start() + + # Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally. + new_tenant_timelines = env.neon_cli.list_timelines(tenant_id) + assert ( + new_tenant_timelines == old_tenant_timelines + ), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}" + + timeline_dirs = [d for d in timelines_dir.iterdir()] + assert ( + timeline_dirs == initial_timeline_dirs + ), "pageserver should clean its temp timeline files on timeline creation failure" + + +def test_timeline_init_crash_removal_failure_breaks_tenant(neon_simple_env: NeonEnv): + env = neon_simple_env + pageserver_http = env.pageserver.http_client() + + tenant_id, _ = env.neon_cli.create_tenant() + + timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines" + initial_timeline_dirs = [d for d in timelines_dir.iterdir()] + + # simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed. + pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit")) + with pytest.raises(Exception): + _ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id) + # it should already be dead, any way to check that? + env.neon_cli.pageserver_stop(immediate=True) + + # self-test: ensure that we crashed the pageserver whith intermediate files present + crashed_timeline_dirs = [d for d in timelines_dir.iterdir()] + assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs)) + new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs) + expected = { + "initdb dir": r"^basebackup-.*\.___temp$", + "timeline dir": r"^[0-9a-f]+$", + "timeline uninit marker file": r"^[0-9a-f]+\.___uninit$", + } + assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}" + for new_dir in new_dirs: + for direntry_type, pattern in expected.items(): + if re.match(pattern, new_dir.name): + expected.pop(direntry_type) + break + assert len(expected.keys()) == 0, f"unexpected new directories: {expected}" + + try: + # make the directory unremovable + for new_dir in new_dirs: + new_dir.chmod(0o444) + + # start the pageserver, it should attempt the removal + env.neon_cli.pageserver_start() + # and the removal should fail, causing the tenant to be Broken + wait_until_tenant_state(pageserver_http, tenant_id, "Broken", 5) + + # inspect the reason + status = pageserver_http.tenant_status(tenant_id) + assert "remove" in status["state"]["data"]["reason"] + log_line = r".*load.*Failed to remove uninit marked timeline directory" + assert env.pageserver.log_contains(log_line) + env.pageserver.allowed_errors.append(log_line) + + env.neon_cli.pageserver_stop(immediate=True) + finally: + # make the directory removable again + for new_dir in new_dirs: + new_dir.chmod(0o755) + + env.neon_cli.pageserver_start() + wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5) + + def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv): env = neon_simple_env pageserver_http = env.pageserver.http_client()