tenant load: make tempfile/tempdir cleanup logic bail out if it fails

Before this patch, when loading a tenant, we'd iterate the timeline directory
to figure out which timelines the tenant has, and also remove temporary
entries in the timelines directory that are created during timeline
creation.

When we would fail to clean up temporary entries, we'd log an error but
continue.
This error-ignoring behavior forces all code that runs later, e.g.,
timeline creation code, to idempotently re-try those cleanups.

It's much cleaner to remove such temp entries once during startup,
then expect them to be absent later.
Why is that? Because
1. it de-clutters the not-startup code
2. we can use O_EXCL when we create the uninit marker, giving
   us extra robustness in depth that we're not creating the same
   timeline concurrently
3. same exclusive-op argument for when we create the initdb dir

This patch refactors the timeline loading code to a fix-point
iteration loop that does the cleanups.

If a cleanup fails, we bail out.

Once we've reached the fixpoint (no temp stuff left), we assume
each remaining entry is a timeline dir, parse it into a TimelineId,
and continue the loading as usual.
This commit is contained in:
Christian Schwarz
2023-06-07 19:28:08 +02:00
parent bc5ade701b
commit 1684bf36c8
2 changed files with 190 additions and 81 deletions

View File

@@ -29,6 +29,7 @@ use std::collections::BTreeSet;
use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
use std::fs::DirEntry;
use std::fs::File;
use std::fs::OpenOptions;
use std::io;
@@ -960,96 +961,93 @@ impl Tenant {
let tenant_id = self.tenant_id;
let conf = self.conf;
let span = info_span!("blocking");
let myself = Arc::clone(self);
let sorted_timelines: Vec<(_, _)> = tokio::task::spawn_blocking(move || {
let _g = span.entered();
let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
let timelines_dir = conf.timelines_path(&tenant_id);
for entry in
std::fs::read_dir(&timelines_dir).context("list timelines directory for tenant")?
{
let entry = entry.context("read timeline dir entry")?;
let timeline_dir = entry.path();
// Prune out all the temporary directories, if any, until only (fully initialized) timeline dirs remain.
// This is essentially a fix-point operation.
//
// If we fail to do cleanup, we bail out. It's better to bail out here than
// to require all later code (timeline create) to deal with leftover temp entries.
// The blast radius is constrained to this tenant, so, it's not too bad.
let entries: Vec<DirEntry> = loop {
let mut entries = Vec::new();
for entry in std::fs::read_dir(&timelines_dir).with_context(|| {
format!(
"Failed to list timelines directory for tenant {}",
myself.tenant_id
)
})? {
let entry = entry.with_context(|| {
format!("cannot read timeline dir entry for {}", myself.tenant_id)
})?;
entries.push(entry);
}
if crate::is_temporary(&timeline_dir) {
info!(
"Found temporary timeline directory, removing: {}",
timeline_dir.display()
);
if let Err(e) = std::fs::remove_dir_all(&timeline_dir) {
error!(
"Failed to remove temporary directory '{}': {:?}",
timeline_dir.display(),
e
);
}
} else if is_uninit_mark(&timeline_dir) {
let timeline_uninit_mark_file = &timeline_dir;
info!(
"Found an uninit mark file {}, removing the timeline and its uninit mark",
timeline_uninit_mark_file.display()
);
let timeline_id = timeline_uninit_mark_file
.file_stem()
.and_then(OsStr::to_str)
.unwrap_or_default()
.parse::<TimelineId>()
.with_context(|| {
format!(
"Could not parse timeline id out of the timeline uninit mark name {}",
timeline_uninit_mark_file.display()
)
})?;
let timeline_dir = conf.timeline_path(&timeline_id, &tenant_id);
if let Err(e) =
remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)
{
error!("Failed to clean up uninit marked timeline: {e:?}");
}
} else {
let timeline_id = timeline_dir
.file_name()
.and_then(OsStr::to_str)
.unwrap_or_default()
.parse::<TimelineId>()
.with_context(|| {
format!(
"Could not parse timeline id out of the timeline dir name {}",
timeline_dir.display()
)
})?;
let timeline_uninit_mark_file =
conf.timeline_uninit_mark_file_path(tenant_id, timeline_id);
if timeline_uninit_mark_file.exists() {
let mut removed_something = false;
for entry in &entries {
let timeline_dir = entry.path();
if crate::is_temporary(&timeline_dir) {
// This branch is for the temporary dir created by the bootstrap_timeline procedure.
// It should always start with basebackup-.*.
info!(
%timeline_id,
"Found an uninit mark file, removing the timeline and its uninit mark",
"Found temporary directory of timeline_bootstrap, removing: {}",
timeline_dir.display()
);
if let Err(e) = remove_timeline_and_uninit_mark(
&timeline_dir,
&timeline_uninit_mark_file,
) {
error!("Failed to clean up uninit marked timeline: {e:?}");
}
continue;
}
let file_name = entry.file_name();
if let Ok(timeline_id) =
file_name.to_str().unwrap_or_default().parse::<TimelineId>()
{
let metadata = load_metadata(conf, timeline_id, tenant_id)
.context("failed to load metadata")?;
timelines_to_load.insert(timeline_id, metadata);
} else {
// A file or directory that doesn't look like a timeline ID
warn!(
"unexpected file or directory in timelines directory: {}",
file_name.to_string_lossy()
std::fs::remove_dir_all(&timeline_dir).with_context(|| format!("remove temporary timeline_bootstrap dir {timeline_dir:?}"))?;
removed_something = true;
} else if is_uninit_mark(&timeline_dir) {
// This branch is to remove timelines that didn't finish creating.
let timeline_uninit_mark_file = &timeline_dir;
info!(
"Found an uninit mark file {}, removing the timeline and its uninit mark",
timeline_uninit_mark_file.display()
);
let timeline_id = timeline_uninit_mark_file
.file_stem()
.and_then(OsStr::to_str)
.unwrap_or_default()
.parse::<TimelineId>()
.with_context(|| {
format!(
"Could not parse timeline id out of the timeline uninit mark name {}",
timeline_uninit_mark_file.display()
)
})?;
let timeline_dir = myself.conf.timeline_path(&timeline_id, &myself.tenant_id);
remove_timeline_and_uninit_mark(&timeline_dir, timeline_uninit_mark_file)?;
removed_something = true;
}
}
if removed_something {
continue;
}
break entries;
};
let mut timelines_to_load: HashMap<TimelineId, TimelineMetadata> = HashMap::new();
for entry in entries {
let timeline_dir = entry.path();
assert!(!crate::is_temporary(&timeline_dir), "removed above");
assert!(!is_uninit_mark(&timeline_dir), "removed above");
let timeline_id = timeline_dir
.file_name()
.and_then(OsStr::to_str)
.unwrap_or_default()
.parse::<TimelineId>()
.with_context(|| {
format!(
"Could not parse timeline id out of the timeline dir name {}",
timeline_dir.display()
)
})?;
let metadata = load_metadata(myself.conf, timeline_id, myself.tenant_id)
.context("failed to load metadata")?;
timelines_to_load.insert(timeline_id, metadata);
}
// Sort the array of timeline IDs into tree-order, so that parent comes before
@@ -2978,7 +2976,7 @@ fn remove_timeline_and_uninit_mark(timeline_dir: &Path, uninit_mark: &Path) -> a
})
.with_context(|| {
format!(
"Failed to remove unit marked timeline directory {}",
"Failed to remove uninit marked timeline directory {}",
timeline_dir.display()
)
})?;

View File

@@ -1,10 +1,12 @@
import concurrent.futures
import os
import re
from typing import List, Tuple
import pytest
from fixtures.log_helper import log
from fixtures.neon_fixtures import Endpoint, NeonEnv, NeonEnvBuilder
from fixtures.pageserver.utils import wait_until_tenant_state
from fixtures.types import TenantId, TimelineId
@@ -160,6 +162,115 @@ def test_timeline_init_break_before_checkpoint(neon_simple_env: NeonEnv):
), "pageserver should clean its temp timeline files on timeline creation failure"
def test_timeline_init_crash_restart_cleans_up_intermediate_files(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, _ = env.neon_cli.create_tenant()
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
old_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed.
pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit"))
with pytest.raises(Exception):
_ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id)
# it should already be dead, any way to check that?
env.neon_cli.pageserver_stop(immediate=True)
# self-test: ensure that we crashed the pageserver whith intermediate files present
crashed_timeline_dirs = [d for d in timelines_dir.iterdir()]
assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs))
new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs)
expected = {
"initdb dir": r"^basebackup-.*\.___temp$",
"timeline dir": r"^[0-9a-f]+$",
"timeline uninit marker file": r"^[0-9a-f]+\.___uninit$",
}
assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}"
for new_dir in new_dirs:
for direntry_type, pattern in expected.items():
if re.match(pattern, new_dir.name):
expected.pop(direntry_type)
break
assert len(expected.keys()) == 0, f"unexpected new directories: {expected}"
# start the pageserver and assert that it cleans up
env.neon_cli.pageserver_start()
# Creating the timeline didn't finish. The other timelines on tenant should still be present and work normally.
new_tenant_timelines = env.neon_cli.list_timelines(tenant_id)
assert (
new_tenant_timelines == old_tenant_timelines
), f"Pageserver after restart should ignore non-initialized timelines for tenant {tenant_id}"
timeline_dirs = [d for d in timelines_dir.iterdir()]
assert (
timeline_dirs == initial_timeline_dirs
), "pageserver should clean its temp timeline files on timeline creation failure"
def test_timeline_init_crash_removal_failure_breaks_tenant(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()
tenant_id, _ = env.neon_cli.create_tenant()
timelines_dir = env.repo_dir / "tenants" / str(tenant_id) / "timelines"
initial_timeline_dirs = [d for d in timelines_dir.iterdir()]
# simulate crash during timeline init (some intermediate files are on disk), before it's checkpointed.
pageserver_http.configure_failpoints(("before-checkpoint-new-timeline", "exit"))
with pytest.raises(Exception):
_ = env.neon_cli.create_timeline("test_timeline_init_break_before_checkpoint", tenant_id)
# it should already be dead, any way to check that?
env.neon_cli.pageserver_stop(immediate=True)
# self-test: ensure that we crashed the pageserver whith intermediate files present
crashed_timeline_dirs = [d for d in timelines_dir.iterdir()]
assert set(crashed_timeline_dirs).issuperset(set(initial_timeline_dirs))
new_dirs = set(crashed_timeline_dirs) - set(initial_timeline_dirs)
expected = {
"initdb dir": r"^basebackup-.*\.___temp$",
"timeline dir": r"^[0-9a-f]+$",
"timeline uninit marker file": r"^[0-9a-f]+\.___uninit$",
}
assert len(new_dirs) == len(expected.keys()), f"unexpected new directories: {new_dirs}"
for new_dir in new_dirs:
for direntry_type, pattern in expected.items():
if re.match(pattern, new_dir.name):
expected.pop(direntry_type)
break
assert len(expected.keys()) == 0, f"unexpected new directories: {expected}"
try:
# make the directory unremovable
for new_dir in new_dirs:
new_dir.chmod(0o444)
# start the pageserver, it should attempt the removal
env.neon_cli.pageserver_start()
# and the removal should fail, causing the tenant to be Broken
wait_until_tenant_state(pageserver_http, tenant_id, "Broken", 5)
# inspect the reason
status = pageserver_http.tenant_status(tenant_id)
assert "remove" in status["state"]["data"]["reason"]
log_line = r".*load.*Failed to remove uninit marked timeline directory"
assert env.pageserver.log_contains(log_line)
env.pageserver.allowed_errors.append(log_line)
env.neon_cli.pageserver_stop(immediate=True)
finally:
# make the directory removable again
for new_dir in new_dirs:
new_dir.chmod(0o755)
env.neon_cli.pageserver_start()
wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)
def test_timeline_create_break_after_uninit_mark(neon_simple_env: NeonEnv):
env = neon_simple_env
pageserver_http = env.pageserver.http_client()