many_tenants: create the snapshot using overlayfs if supported

The single-threaded copytree to create the snasphot was by far the
costliest part of creating the snapshot in my 20k tenants example.

As part of this change, also fix when we do overlayfs teardown.
We want to do it after the log-checking has happened.
Actually, we want it to happen even if `not self.env`.
However, that means the local storage cleanups would do a lot of
useless work if we mounted a large snapshot via overlayfs.
So, make the local storage cleanup skip files on overlayfs mounts.
All storage used by the changes in the overlayfs will be cleared
by overlayfs teardown.
This commit is contained in:
Christian Schwarz
2024-01-09 15:25:22 +00:00
parent a2febc5a90
commit 8c855f4e1f
2 changed files with 112 additions and 26 deletions

View File

@@ -625,6 +625,54 @@ class NeonEnvBuilder:
)
self.overlay_mounts_created_by_us.append((ident, dstdir))
def _overlay_umount(self, mountpoint: Path):
cmd = ["sudo", "umount", str(mountpoint)]
assert mountpoint.is_mount()
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
def overlay_unmount_and_move(self, ident: str, dst: Path):
"""
Unmount previously established overlayfs mount at `dstdir` and move the upperdir contents to `dst`.
If `dst` is an empty directory, it gets replaced.
Caller is responsible for ensuring the unmount will succeed, i.e., that there aren't any nested mounts.
Raises exception if self.test_overlay_dir is None
"""
assert self.test_overlay_dir is not None
# not mutating state yet, make checks
ident_state_dir = self.test_overlay_dir / ident
assert ident_state_dir.is_dir()
upper = ident_state_dir / "upper"
work = ident_state_dir / "work"
assert upper.is_dir()
assert work.is_dir()
assert (
self.test_overlay_dir not in dst.parents
), "otherwise workdir cleanup below wouldn't work"
# find index, still not mutating state
idxmap = {
existing_ident: idx
for idx, (existing_ident, _) in enumerate(self.overlay_mounts_created_by_us)
}
idx = idxmap.get(ident)
if idx is None:
raise RuntimeError(f"cannot find mount for ident {ident}")
if dst.is_dir():
dst.rmdir() # raises exception if not empty, which is what we want
_, mountpoint = self.overlay_mounts_created_by_us.pop(idx)
self._overlay_umount(mountpoint)
upper.rename(dst)
# we moved the upperdir, clean up workdir and then its parent ident_state_dir
cmd = ["sudo", "rm", "-rf", str(work)]
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
ident_state_dir.rmdir() # should be empty since we moved `upper` out
def overlay_cleanup_teardown(self):
"""
Unmount the overlayfs mounts created by `self.overlay_mount()`.
@@ -635,12 +683,17 @@ class NeonEnvBuilder:
while len(self.overlay_mounts_created_by_us) > 0:
(ident, mountpoint) = self.overlay_mounts_created_by_us.pop()
ident_state_dir = self.test_overlay_dir / ident
cmd = [ "sudo", "umount", str(mountpoint) ]
log.info(f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}: {cmd}")
subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
log.info(f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}")
cmd = [ "sudo", "rm", "-rf", str(ident_state_dir)]
subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True)
log.info(
f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}"
)
self._overlay_umount(mountpoint)
log.info(
f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}"
)
cmd = ["sudo", "rm", "-rf", str(ident_state_dir)]
subprocess_capture(
self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True
)
# assert all overlayfs mounts in our test directory are gone
assert [] == list(overlayfs.iter_mounts_beneath(self.test_overlay_dir))
@@ -711,8 +764,15 @@ class NeonEnvBuilder:
if self.preserve_database_files:
return
overlayfs_mounts = {mountpoint for _, mountpoint in self.overlay_mounts_created_by_us}
directories_to_clean: List[Path] = []
for test_entry in Path(self.repo_dir).glob("**/*"):
if test_entry in overlayfs_mounts:
continue
for parent in test_entry.parents:
if parent in overlayfs_mounts:
continue
if test_entry.is_file():
test_file = test_entry
if ATTACHMENT_NAME_REGEX.fullmatch(test_file.name):
@@ -761,13 +821,6 @@ class NeonEnvBuilder:
log.error(f"Error during remote storage scrub: {e}")
cleanup_error = e
try:
self.overlay_cleanup_teardown()
except Exception as e:
log.error(f"Error cleaning up overlay state: {e}")
if cleanup_error is not None:
cleanup_error = e
try:
self.cleanup_remote_storage()
except Exception as e:
@@ -788,6 +841,13 @@ class NeonEnvBuilder:
for pageserver in self.env.pageservers:
pageserver.assert_no_errors()
try:
self.overlay_cleanup_teardown()
except Exception as e:
log.error(f"Error cleaning up overlay state: {e}")
if cleanup_error is not None:
cleanup_error = e
class NeonEnv:
"""

View File

@@ -47,9 +47,22 @@ def single_timeline(
{TenantId(t.name) for t in (snapshot_dir.path.glob("pageserver_*/tenants/*"))}
)
template_timeline = env.initial_timeline
neon_env_builder.start()
else:
if snapshot_dir.path.exists():
shutil.rmtree(snapshot_dir.path)
if save_snapshot and neon_env_builder.test_overlay_dir is not None:
# Make repo_dir an overlayfs mount with lowerdir being the empty snapshot_dir.
# When we're done filling up repo_dir, tear everything down, unmount the overlayfs, and use
# the upperdir as the snapshot. This is equivalent to docker `FROM scratch`.
assert not neon_env_builder.repo_dir.exists()
assert neon_env_builder.repo_dir.parent.exists()
snapshot_dir.path.mkdir()
neon_env_builder.overlay_mount(
"create-snapshot-repo-dir", snapshot_dir.path, neon_env_builder.repo_dir
)
neon_env_builder.config_init_force = "empty-dir-ok"
env = neon_env_builder.init_start()
remote_storage = env.pageserver_remote_storage
@@ -88,7 +101,7 @@ def single_timeline(
config=template_config.copy(),
)
time.sleep(0.1)
wait_until_tenant_state(ps_http, tenant, "Broken", 3)
wait_until_tenant_state(ps_http, tenant, "Broken", 10)
work_queue.do(22, tenants, attach_broken)
@@ -100,7 +113,29 @@ def single_timeline(
fixtures.pageserver.remote_storage.copy_all_remote_layer_files_to_local_tenant_dir(
env, tenant_timelines
)
env.pageserver.start()
if save_snapshot:
env.stop(immediate=True, ps_assert_metric_no_errors=True)
if neon_env_builder.test_overlay_dir is None:
log.info(f"take snapshot using shutil.copytree")
shutil.copytree(env.repo_dir, snapshot_dir.path)
else:
log.info(f"take snapshot by using overlayfs upperdir")
neon_env_builder.overlay_unmount_and_move(
"create-snapshot-repo-dir", snapshot_dir.path
)
log.info("remove empty repo_dir (previously mountpoint) for snapshot overlay_mount")
env.repo_dir.rmdir()
# TODO from here on, we should be able to reset / goto top where snapshot_dir.is_initialized()
log.info(f"make repo_dir an overlayfs mount of the snapshot we just created")
neon_env_builder.overlay_mount(
"repo-dir-after-taking-snapshot", snapshot_dir.path, env.repo_dir
)
snapshot_dir.set_initialized()
else:
log.info("skip taking snapshot")
env.start()
log.info(f"wait for tenants to become active")
for tenant in tenants:
@@ -113,14 +148,5 @@ def single_timeline(
for layer in info.historic_layers:
assert not layer.remote
# take snapshot after download all layers so tenant dir restoration is fast
# TODO: use overlayfs to make this step less costly; we'd implement half of docker at that point
if save_snapshot:
log.info(f"take snapshot")
shutil.copytree(env.repo_dir, snapshot_dir.path)
snapshot_dir.set_initialized()
else:
log.info("skip taking snapshot")
log.info("ready")
return SingleTimeline(env, template_timeline, tenants)