python tests: support overlayfs for NeonEnvBuilder.from_repo_dir (#6295)

Part of #5771
Extracted from https://github.com/neondatabase/neon/pull/6214

This PR makes the test suite sensitive to the new env var
`NEON_ENV_BUILDER_FROM_REPO_DIR_USE_OVERLAYFS`.
If it is set, `NeonEnvBuilder.from_repo_dir` uses overlayfs
to duplicate the the snapshot repo dir contents.

Since mounting requires root privileges, we use sudo to perform
the mounts. That, and macOS support, is also why copytree remains
the default.

If we ever run on a filesystem with copy reflink support, we should
consider that as an alternative.

This PR can be tried on a Linux machine on the
`test_backward_compatiblity` test, which uses `from_repo_dir`.
This commit is contained in:
Christian Schwarz
2024-01-09 11:15:46 +01:00
committed by GitHub
parent 4b6004e8c9
commit 90e0219b29
2 changed files with 170 additions and 9 deletions

View File

@@ -40,6 +40,7 @@ from psycopg2.extensions import make_dsn, parse_dsn
from typing_extensions import Literal
from urllib3.util.retry import Retry
from fixtures import overlayfs
from fixtures.broker import NeonBroker
from fixtures.log_helper import log
from fixtures.pageserver.allowed_errors import (
@@ -424,6 +425,7 @@ class NeonEnvBuilder:
pg_version: PgVersion,
test_name: str,
test_output_dir: Path,
test_overlay_dir: Optional[Path] = None,
pageserver_remote_storage: Optional[RemoteStorage] = None,
pageserver_config_override: Optional[str] = None,
num_safekeepers: int = 1,
@@ -468,6 +470,8 @@ class NeonEnvBuilder:
self.initial_timeline = initial_timeline or TimelineId.generate()
self.scrub_on_exit = False
self.test_output_dir = test_output_dir
self.test_overlay_dir = test_overlay_dir
self.overlay_mounts_created_by_us: List[Tuple[str, Path]] = []
assert test_name.startswith(
"test_"
@@ -547,7 +551,10 @@ class NeonEnvBuilder:
tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
shutil.copytree(tenants_from_dir, tenants_to_dir)
if self.test_overlay_dir is None:
shutil.copytree(tenants_from_dir, tenants_to_dir)
else:
self.overlay_mount(f"{ps_dir.name}:tenants", tenants_from_dir, tenants_to_dir)
for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
@@ -556,9 +563,16 @@ class NeonEnvBuilder:
shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
shutil.copytree(
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
)
if self.test_overlay_dir is None:
shutil.copytree(
repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
)
else:
self.overlay_mount(
"local_fs_remote_storage",
repo_dir / "local_fs_remote_storage",
self.repo_dir / "local_fs_remote_storage",
)
if (attachments_json := Path(repo_dir / "attachments.json")).exists():
shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
@@ -575,6 +589,69 @@ class NeonEnvBuilder:
return self.env
def overlay_mount(self, ident: str, srcdir: Path, dstdir: Path):
"""
Mount `srcdir` as an overlayfs mount at `dstdir`.
The overlayfs `upperdir` and `workdir` will be placed in test_overlay_dir.
"""
assert self.test_overlay_dir
assert (
self.test_output_dir in dstdir.parents
) # so that teardown & test_overlay_dir fixture work
assert srcdir.is_dir()
dstdir.mkdir(exist_ok=False, parents=False)
ident_state_dir = self.test_overlay_dir / ident
upper = ident_state_dir / "upper"
work = ident_state_dir / "work"
ident_state_dir.mkdir(
exist_ok=False, parents=False
) # exists_ok=False also checks uniqueness in self.overlay_mounts
upper.mkdir()
work.mkdir()
cmd = [
"sudo",
"mount",
"-t",
"overlay",
"overlay",
"-o",
f"lowerdir={srcdir},upperdir={upper},workdir={work}",
str(dstdir),
]
log.info(f"Mounting overlayfs srcdir={srcdir} dstdir={dstdir}: {cmd}")
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
self.overlay_mounts_created_by_us.append((ident, dstdir))
def overlay_cleanup_teardown(self):
"""
Unmount the overlayfs mounts created by `self.overlay_mount()`.
Supposed to be called during env teardown.
"""
if self.test_overlay_dir is None:
return
while len(self.overlay_mounts_created_by_us) > 0:
(ident, mountpoint) = self.overlay_mounts_created_by_us.pop()
ident_state_dir = self.test_overlay_dir / ident
cmd = ["sudo", "umount", str(mountpoint)]
log.info(
f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}: {cmd}"
)
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
log.info(
f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}"
)
cmd = ["sudo", "rm", "-rf", str(ident_state_dir)]
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
# assert all overlayfs mounts in our test directory are gone
assert [] == list(overlayfs.iter_mounts_beneath(self.test_overlay_dir))
def enable_scrub_on_exit(self):
"""
Call this if you would like the fixture to automatically run
@@ -681,7 +758,10 @@ class NeonEnvBuilder:
sk.stop(immediate=True)
for pageserver in self.env.pageservers:
pageserver.assert_no_metric_errors()
# if the test threw an exception, don't check for errors
# as a failing assertion would cause the cleanup below to fail
if exc_type is not None:
pageserver.assert_no_metric_errors()
pageserver.stop(immediate=True)
@@ -696,6 +776,13 @@ class NeonEnvBuilder:
log.error(f"Error during remote storage scrub: {e}")
cleanup_error = e
try:
self.overlay_cleanup_teardown()
except Exception as e:
log.error(f"Error cleaning up overlay state: {e}")
if cleanup_error is not None:
cleanup_error = e
try:
self.cleanup_remote_storage()
except Exception as e:
@@ -1017,6 +1104,7 @@ def neon_env_builder(
default_broker: NeonBroker,
run_id: uuid.UUID,
request: FixtureRequest,
test_overlay_dir: Path,
) -> Iterator[NeonEnvBuilder]:
"""
Fixture to create a Neon environment for test.
@@ -1047,6 +1135,7 @@ def neon_env_builder(
preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
test_name=request.node.name,
test_output_dir=test_output_dir,
test_overlay_dir=test_overlay_dir,
) as builder:
yield builder
@@ -3194,10 +3283,10 @@ class S3Scrubber:
raise
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
"""Compute the working directory for an individual test."""
def _get_test_dir(request: FixtureRequest, top_output_dir: Path, prefix: str) -> Path:
"""Compute the path to a working directory for an individual test."""
test_name = request.node.name
test_dir = top_output_dir / test_name.replace("/", "-")
test_dir = top_output_dir / f"{prefix}{test_name.replace('/', '-')}"
# We rerun flaky tests multiple times, use a separate directory for each run.
if (suffix := getattr(request.node, "execution_count", None)) is not None:
@@ -3209,6 +3298,21 @@ def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
return test_dir
def get_test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
"""
The working directory for a test.
"""
return _get_test_dir(request, top_output_dir, "")
def get_test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
"""
Directory that contains `upperdir` and `workdir` for overlayfs mounts
that a test creates. See `NeonEnvBuilder.overlay_mount`.
"""
return _get_test_dir(request, top_output_dir, "overlay-")
def get_test_repo_dir(request: FixtureRequest, top_output_dir: Path) -> Path:
return get_test_output_dir(request, top_output_dir) / "repo"
@@ -3236,8 +3340,12 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile( # type: ignore[type-arg]
# scope. So it uses the get_test_output_dir() function to get the path, and
# this fixture ensures that the directory exists. That works because
# 'autouse' fixtures are run before other fixtures.
#
# NB: we request the overlay dir fixture so the fixture does its cleanups
@pytest.fixture(scope="function", autouse=True)
def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[Path]:
def test_output_dir(
request: FixtureRequest, top_output_dir: Path, test_overlay_dir: Path
) -> Iterator[Path]:
"""Create the working directory for an individual test."""
# one directory per test
@@ -3251,6 +3359,43 @@ def test_output_dir(request: FixtureRequest, top_output_dir: Path) -> Iterator[P
allure_attach_from_dir(test_dir)
@pytest.fixture(scope="function")
def test_overlay_dir(request: FixtureRequest, top_output_dir: Path) -> Optional[Path]:
"""
Idempotently create a test's overlayfs mount state directory.
If the functionality isn't enabled via env var, returns None.
The procedure cleans up after previous runs that were aborted (e.g. due to Ctrl-C, OOM kills, etc).
"""
if os.getenv("NEON_ENV_BUILDER_FROM_REPO_DIR_USE_OVERLAYFS") is None:
return None
overlay_dir = get_test_overlay_dir(request, top_output_dir)
log.info(f"test_overlay_dir is {overlay_dir}")
overlay_dir.mkdir(exist_ok=True)
# unmount stale overlayfs mounts which subdirectories of `overlay_dir/*` as the overlayfs `upperdir` and `workdir`
for mountpoint in overlayfs.iter_mounts_beneath(get_test_output_dir(request, top_output_dir)):
cmd = ["sudo", "umount", str(mountpoint)]
log.info(
f"Unmounting stale overlayfs mount probably created during earlier test run: {cmd}"
)
subprocess.run(cmd, capture_output=True, check=True)
# the overlayfs `workdir`` is owned by `root`, shutil.rmtree won't work.
cmd = ["sudo", "rm", "-rf", str(overlay_dir)]
subprocess.run(cmd, capture_output=True, check=True)
overlay_dir.mkdir()
return overlay_dir
# no need to clean up anything: on clean shutdown,
# NeonEnvBuilder.overlay_cleanup_teardown takes care of cleanup
# and on unclean shutdown, this function will take care of it
# on the next test run
SKIP_DIRS = frozenset(
(
"pg_wal",

View File

@@ -0,0 +1,16 @@
from pathlib import Path
from typing import Iterator
import psutil
def iter_mounts_beneath(topdir: Path) -> Iterator[Path]:
"""
Iterate over the overlayfs mounts beneath the specififed `topdir`.
The `topdir` itself isn't considered.
"""
for part in psutil.disk_partitions(all=True):
if part.fstype == "overlay":
mountpoint = Path(part.mountpoint)
if topdir in mountpoint.parents:
yield mountpoint