mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-27 10:00:38 +00:00
improve overlayfs cleanup code
This commit is contained in:
@@ -70,6 +70,7 @@ from fixtures.utils import (
|
||||
subprocess_capture,
|
||||
wait_until,
|
||||
)
|
||||
from fixtures import overlayfs
|
||||
|
||||
"""
|
||||
This file contains pytest fixtures. A fixture is a test resource that can be
|
||||
@@ -425,6 +426,7 @@ class NeonEnvBuilder:
|
||||
pg_version: PgVersion,
|
||||
test_name: str,
|
||||
test_output_dir: Path,
|
||||
test_overlay_dir: Optional[Path] = None,
|
||||
pageserver_remote_storage: Optional[RemoteStorage] = None,
|
||||
pageserver_config_override: Optional[str] = None,
|
||||
num_safekeepers: int = 1,
|
||||
@@ -469,7 +471,8 @@ class NeonEnvBuilder:
|
||||
self.initial_timeline = initial_timeline or TimelineId.generate()
|
||||
self.scrub_on_exit = False
|
||||
self.test_output_dir = test_output_dir
|
||||
self.overlay_mounts: List[Tuple[str, Path]] = []
|
||||
self.test_overlay_dir = test_overlay_dir
|
||||
self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = []
|
||||
|
||||
assert test_name.startswith(
|
||||
"test_"
|
||||
@@ -522,7 +525,6 @@ class NeonEnvBuilder:
|
||||
repo_dir: Path,
|
||||
neon_binpath: Optional[Path] = None,
|
||||
pg_distrib_dir: Optional[Path] = None,
|
||||
use_overlay: bool = False,
|
||||
) -> NeonEnv:
|
||||
"""
|
||||
A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
|
||||
@@ -550,10 +552,10 @@ class NeonEnvBuilder:
|
||||
tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
|
||||
|
||||
log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
|
||||
if not use_overlay:
|
||||
if not self.test_overlay_dir:
|
||||
shutil.copytree(tenants_from_dir, tenants_to_dir)
|
||||
else:
|
||||
self.mount_overlay(f"{ps_dir.name}:tenants", tenants_from_dir, tenants_to_dir)
|
||||
self.overlay_mount(f"{ps_dir.name}:tenants", tenants_from_dir, tenants_to_dir)
|
||||
|
||||
for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
|
||||
sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
|
||||
@@ -562,12 +564,12 @@ class NeonEnvBuilder:
|
||||
shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
|
||||
|
||||
shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
|
||||
if not use_overlay:
|
||||
if not self.test_overlay_dir:
|
||||
shutil.copytree(
|
||||
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
|
||||
)
|
||||
else:
|
||||
self.mount_overlay("local_fs_remote_storage",
|
||||
self.overlay_mount("local_fs_remote_storage",
|
||||
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage")
|
||||
|
||||
if (attachments_json := Path(repo_dir / "attachments.json")).exists():
|
||||
@@ -585,16 +587,16 @@ class NeonEnvBuilder:
|
||||
|
||||
return self.env
|
||||
|
||||
@property
|
||||
def overlay_state_dir(self):
|
||||
return self.test_output_dir / "overlay-state"
|
||||
|
||||
def mount_overlay(self, ident: str, srcdir: Path, dstdir: Path):
|
||||
assert self.overlay_state_dir.parent in dstdir.parents # so the post-cleanup assertion in self.cleanup_overlay is simpler
|
||||
self.overlay_state_dir.mkdir(exist_ok=True)
|
||||
def overlay_mount(self, ident: str, srcdir: Path, dstdir: Path):
|
||||
"""
|
||||
Mount `srcdir` as an overlayfs mount at `dstdir`.
|
||||
The overlayfs `upperdir` and `workdir` will be placed in test_overlay_dir.
|
||||
"""
|
||||
assert self.test_overlay_dir
|
||||
assert self.test_output_dir in dstdir.parents # so that teardown & test_overlay_dir fixture work
|
||||
assert srcdir.is_dir()
|
||||
dstdir.mkdir(exist_ok=False, parents=False)
|
||||
ident_state_dir = self.overlay_state_dir / ident
|
||||
ident_state_dir = self.test_overlay_dir / ident
|
||||
upper = ident_state_dir / "upper"
|
||||
work = ident_state_dir / "work"
|
||||
ident_state_dir.mkdir(exist_ok=False, parents=False) # exists_ok=False also checks uniqueness in self.overlay_mounts
|
||||
@@ -603,27 +605,28 @@ class NeonEnvBuilder:
|
||||
cmd = [ "sudo", "mount", "-t", "overlay", "overlay",
|
||||
"-o", f"lowerdir={srcdir},upperdir={upper},workdir={work}", str(dstdir) ]
|
||||
log.info(f"Mounting overlayfs srcdir={srcdir} dstdir={dstdir}: {cmd}")
|
||||
subprocess_capture(self.overlay_state_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
self.overlay_mounts.append((ident, dstdir))
|
||||
subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
self.overlay_mounts_created_by_us.append((ident, dstdir))
|
||||
|
||||
def cleanup_overlay(self):
|
||||
while len(self.overlay_mounts) > 0:
|
||||
(ident, mountpoint) = self.overlay_mounts.pop()
|
||||
ident_state_dir = self.overlay_state_dir / ident
|
||||
def overlay_cleanup_teardown(self):
|
||||
"""
|
||||
Unmount the overlayfs mounts created by `self.overlay_mount()`.
|
||||
Supposed to be called during env teardown.
|
||||
"""
|
||||
if not self.test_overlay_dir:
|
||||
return
|
||||
while len(self.overlay_mounts_created_by_us) > 0:
|
||||
(ident, mountpoint) = self.overlay_mounts_created_by_us.pop()
|
||||
ident_state_dir = self.test_overlay_dir / ident
|
||||
cmd = [ "sudo", "umount", str(mountpoint) ]
|
||||
log.info(f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}: {cmd}")
|
||||
subprocess_capture(self.overlay_state_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
log.info(f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}")
|
||||
cmd = [ "sudo", "rm", "-rf", str(ident_state_dir)]
|
||||
subprocess_capture(self.overlay_state_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
|
||||
self.overlay_state_dir.rmdir() # fails if empty, which acts as an assertion that above cleanup is complete
|
||||
subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
|
||||
|
||||
# assert all overlayfs mounts in our test directory are gone
|
||||
for part in psutil.disk_partitions():
|
||||
if part.fstype == "overlay":
|
||||
mountpoint = Path(part.mountpoint)
|
||||
assert not self.test_output_dir in mountpoint.parents
|
||||
assert [] == list(overlayfs.iter_mounts_beneath(self.test_overlay_dir))
|
||||
|
||||
def enable_scrub_on_exit(self):
|
||||
"""
|
||||
@@ -731,7 +734,10 @@ class NeonEnvBuilder:
|
||||
sk.stop(immediate=True)
|
||||
|
||||
for pageserver in self.env.pageservers:
|
||||
pageserver.assert_no_metric_errors()
|
||||
# if the test threw an exception, don't check for errors
|
||||
# as a failing assertion would cause the cleanup below to fail
|
||||
if exc_type is not None:
|
||||
pageserver.assert_no_metric_errors()
|
||||
|
||||
pageserver.stop(immediate=True)
|
||||
|
||||
@@ -747,7 +753,7 @@ class NeonEnvBuilder:
|
||||
cleanup_error = e
|
||||
|
||||
try:
|
||||
self.cleanup_overlay()
|
||||
self.overlay_cleanup_teardown()
|
||||
except Exception as e:
|
||||
log.error(f"Error cleaning up overlay state: {e}")
|
||||
if cleanup_error is not None:
|
||||
@@ -1074,6 +1080,7 @@ def neon_env_builder(
|
||||
default_broker: NeonBroker,
|
||||
run_id: uuid.UUID,
|
||||
request: FixtureRequest,
|
||||
test_overlay_dir: Path,
|
||||
) -> Iterator[NeonEnvBuilder]:
|
||||
"""
|
||||
Fixture to create a Neon environment for test.
|
||||
@@ -1104,6 +1111,7 @@ def neon_env_builder(
|
||||
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
|
||||
test_name=request.node.name,
|
||||
test_output_dir=test_output_dir,
|
||||
test_overlay_dir=test_overlay_dir,
|
||||
) as builder:
|
||||
yield builder
|
||||
|
||||
@@ -3240,10 +3248,10 @@ class S3Scrubber:
|
||||
raise
|
||||
|
||||
|
||||
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
def _get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str) -> Path:
|
||||
"""Compute the working directory for an individual test."""
|
||||
test_name = request.node.name
|
||||
test_dir = top_output_dir / test_name.replace("/", "-")
|
||||
test_dir = top_output_dir / (prefix+test_name.replace("/", "-"))
|
||||
|
||||
# We rerun flaky tests multiple times, use a separate directory for each run.
|
||||
if (suffix := getattr(request.node, "execution_count", None)) is not None:
|
||||
@@ -3254,6 +3262,14 @@ def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
assert isinstance(test_dir, Path)
|
||||
return test_dir
|
||||
|
||||
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
return _get_test_dir(request, top_output_dir, "")
|
||||
|
||||
def get_test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
return _get_test_dir(request, top_output_dir, "overlay-")
|
||||
|
||||
def get_test_snapshot_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
return _get_test_dir(request, top_output_dir, "snapshot")
|
||||
|
||||
def get_test_repo_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
|
||||
return get_test_output_dir(request, top_output_dir) / "repo"
|
||||
@@ -3283,9 +3299,11 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
|
||||
# this fixture ensures that the directory exists. That works because
|
||||
# 'autouse' fixtures are run before other fixtures.
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[Path]:
|
||||
def test_output_dir(request: FixtureRequest, top_output_dir: Path, test_overlay_dir: Path) -> Iterator[Path]:
|
||||
"""Create the working directory for an individual test."""
|
||||
|
||||
_ = test_overlay_dir # request it so it can do cleanups
|
||||
|
||||
# one directory per test
|
||||
test_dir = get_test_output_dir(request, top_output_dir)
|
||||
log.info(f"test_output_dir is {test_dir}")
|
||||
@@ -3296,6 +3314,36 @@ def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[P
|
||||
|
||||
allure_attach_from_dir(test_dir)
|
||||
|
||||
@pytest.fixture(scope="function", autouse=True)
|
||||
def test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Optional[Path]:
|
||||
"""Create the overlay state directory for an individual test."""
|
||||
|
||||
if not os.getenv("NEON_ENV_FROM_REPO_DIR_USE_OVERLAYFS"):
|
||||
return None
|
||||
|
||||
# one directory per test
|
||||
overlay_dir = get_test_overlay_dir(request, top_output_dir)
|
||||
log.info(f"test_overlay_dir is {overlay_dir}")
|
||||
|
||||
overlay_dir.mkdir(exist_ok=True)
|
||||
# unmount stale overlayfs mounts which subdirectories of `overlay_dir/*` as the overlayfs `upperdir` and `workdir`
|
||||
for mountpoint in overlayfs.iter_mounts_beneath(get_test_output_dir(request, top_output_dir)):
|
||||
cmd = [ "sudo", "umount", str(mountpoint) ]
|
||||
log.info(f"Unmounting stale overlayfs mount probably created during earlier test run: {cmd}")
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
# the overlayfs `workdir`` is owned by `root`
|
||||
cmd = [ "sudo", "rm", "-rf", str(overlay_dir)]
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
|
||||
overlay_dir.mkdir()
|
||||
|
||||
return overlay_dir
|
||||
|
||||
# no need to clean up anything: on clean shutdown,
|
||||
# NeonEnvBuilder.overlay_cleanup_teardown takes care of cleanup
|
||||
# and on unclean shutdown, this function will take care of it
|
||||
# on the next test run
|
||||
|
||||
|
||||
SKIP_DIRS = frozenset(
|
||||
(
|
||||
|
||||
11
test_runner/fixtures/overlayfs.py
Normal file
11
test_runner/fixtures/overlayfs.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from typing import Iterator
|
||||
import psutil
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def iter_mounts_beneath(topdir: Path) -> Iterator[Path]:
|
||||
for part in psutil.disk_partitions(all=True):
|
||||
if part.fstype == "overlay":
|
||||
mountpoint = Path(part.mountpoint)
|
||||
if topdir in mountpoint.parents:
|
||||
yield mountpoint
|
||||
@@ -15,8 +15,9 @@ from fixtures.types import TenantId, TimelineId
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
@pytest.mark.timeout(1000)
|
||||
def snapshotting_env(
|
||||
neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path
|
||||
neon_env_builder: NeonEnvBuilder, pg_bin: PgBin, test_output_dir: Path,
|
||||
) -> Tuple[NeonEnv, TimelineId, List[TenantId]]:
|
||||
"""
|
||||
The fixture prepares environment or restores it from a snapshot.
|
||||
@@ -44,7 +45,7 @@ def snapshotting_env(
|
||||
}
|
||||
|
||||
if snapshot_dir.exists():
|
||||
env = neon_env_builder.from_repo_dir(snapshot_dir, use_overlay=True)
|
||||
env = neon_env_builder.from_repo_dir(snapshot_dir)
|
||||
ps_http = env.pageserver.http_client()
|
||||
tenants = list({TenantId(t.name) for t in (snapshot_dir.glob("pageserver_*/tenants/*"))})
|
||||
template_timeline = env.initial_timeline
|
||||
@@ -78,7 +79,7 @@ def snapshotting_env(
|
||||
src_timelines_dir: Path = remote_storage.tenant_path(template_tenant) / "timelines"
|
||||
assert src_timelines_dir.is_dir(), f"{src_timelines_dir} is not a directory"
|
||||
tenants = [template_tenant]
|
||||
for i in range(0, 20):
|
||||
for i in range(0, 100):
|
||||
new_tenant = TenantId.generate()
|
||||
tenants.append(new_tenant)
|
||||
log.info("Duplicating tenant #%s: %s", i, new_tenant)
|
||||
@@ -152,6 +153,7 @@ def test_getpage_throughput(
|
||||
env.pageserver.connstr(password=None),
|
||||
"--runtime",
|
||||
duration,
|
||||
# "--per-target-rate-limit", "50",
|
||||
*[f"{tenant}/{template_timeline}" for tenant in tenants],
|
||||
]
|
||||
log.info(f"command: {' '.join(cmd)}")
|
||||
|
||||
Reference in New Issue
Block a user