From 8c855f4e1f7710f8d87562c0d8ed5048339b2ebd Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 9 Jan 2024 15:25:22 +0000 Subject: [PATCH] many_tenants: create the snapshot using overlayfs if supported The single-threaded copytree to create the snasphot was by far the costliest part of creating the snapshot in my 20k tenants example. As part of this change, also fix when we do overlayfs teardown. We want to do it after the log-checking has happened. Actually, we want it to happen even if `not self.env`. However, that means the local storage cleanups would do a lot of useless work if we mounted a large snapshot via overlayfs. So, make the local storage cleanup skip files on overlayfs mounts. All storage used by the changes in the overlayfs will be cleared by overlayfs teardown. --- test_runner/fixtures/neon_fixtures.py | 86 ++++++++++++++++--- .../fixtures/pageserver/many_tenants.py | 52 ++++++++--- 2 files changed, 112 insertions(+), 26 deletions(-) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 969929fd30..45d99f5017 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -625,6 +625,54 @@ class NeonEnvBuilder: ) self.overlay_mounts_created_by_us.append((ident, dstdir)) + def _overlay_umount(self, mountpoint: Path): + cmd = ["sudo", "umount", str(mountpoint)] + assert mountpoint.is_mount() + subprocess_capture( + self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True + ) + + def overlay_unmount_and_move(self, ident: str, dst: Path): + """ + Unmount previously established overlayfs mount at `dstdir` and move the upperdir contents to `dst`. + If `dst` is an empty directory, it gets replaced. + Caller is responsible for ensuring the unmount will succeed, i.e., that there aren't any nested mounts. + + Raises exception if self.test_overlay_dir is None + """ + assert self.test_overlay_dir is not None + # not mutating state yet, make checks + ident_state_dir = self.test_overlay_dir / ident + assert ident_state_dir.is_dir() + upper = ident_state_dir / "upper" + work = ident_state_dir / "work" + assert upper.is_dir() + assert work.is_dir() + assert ( + self.test_overlay_dir not in dst.parents + ), "otherwise workdir cleanup below wouldn't work" + # find index, still not mutating state + idxmap = { + existing_ident: idx + for idx, (existing_ident, _) in enumerate(self.overlay_mounts_created_by_us) + } + idx = idxmap.get(ident) + if idx is None: + raise RuntimeError(f"cannot find mount for ident {ident}") + + if dst.is_dir(): + dst.rmdir() # raises exception if not empty, which is what we want + + _, mountpoint = self.overlay_mounts_created_by_us.pop(idx) + self._overlay_umount(mountpoint) + upper.rename(dst) + # we moved the upperdir, clean up workdir and then its parent ident_state_dir + cmd = ["sudo", "rm", "-rf", str(work)] + subprocess_capture( + self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True + ) + ident_state_dir.rmdir() # should be empty since we moved `upper` out + def overlay_cleanup_teardown(self): """ Unmount the overlayfs mounts created by `self.overlay_mount()`. @@ -635,12 +683,17 @@ class NeonEnvBuilder: while len(self.overlay_mounts_created_by_us) > 0: (ident, mountpoint) = self.overlay_mounts_created_by_us.pop() ident_state_dir = self.test_overlay_dir / ident - cmd = [ "sudo", "umount", str(mountpoint) ] - log.info(f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}: {cmd}") - subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True) - log.info(f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}") - cmd = [ "sudo", "rm", "-rf", str(ident_state_dir)] - subprocess_capture(self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True) + log.info( + f"Unmounting overlayfs mount created during setup for ident {ident} at {mountpoint}" + ) + self._overlay_umount(mountpoint) + log.info( + f"Cleaning up overlayfs state dir (owned by root user) for ident {ident} at {ident_state_dir}" + ) + cmd = ["sudo", "rm", "-rf", str(ident_state_dir)] + subprocess_capture( + self.test_output_dir, cmd, check=True, echo_stderr=True, echo_stdout=True + ) # assert all overlayfs mounts in our test directory are gone assert [] == list(overlayfs.iter_mounts_beneath(self.test_overlay_dir)) @@ -711,8 +764,15 @@ class NeonEnvBuilder: if self.preserve_database_files: return + overlayfs_mounts = {mountpoint for _, mountpoint in self.overlay_mounts_created_by_us} + directories_to_clean: List[Path] = [] for test_entry in Path(self.repo_dir).glob("**/*"): + if test_entry in overlayfs_mounts: + continue + for parent in test_entry.parents: + if parent in overlayfs_mounts: + continue if test_entry.is_file(): test_file = test_entry if ATTACHMENT_NAME_REGEX.fullmatch(test_file.name): @@ -761,13 +821,6 @@ class NeonEnvBuilder: log.error(f"Error during remote storage scrub: {e}") cleanup_error = e - try: - self.overlay_cleanup_teardown() - except Exception as e: - log.error(f"Error cleaning up overlay state: {e}") - if cleanup_error is not None: - cleanup_error = e - try: self.cleanup_remote_storage() except Exception as e: @@ -788,6 +841,13 @@ class NeonEnvBuilder: for pageserver in self.env.pageservers: pageserver.assert_no_errors() + try: + self.overlay_cleanup_teardown() + except Exception as e: + log.error(f"Error cleaning up overlay state: {e}") + if cleanup_error is not None: + cleanup_error = e + class NeonEnv: """ diff --git a/test_runner/fixtures/pageserver/many_tenants.py b/test_runner/fixtures/pageserver/many_tenants.py index f3b9d563bb..5b44b96069 100644 --- a/test_runner/fixtures/pageserver/many_tenants.py +++ b/test_runner/fixtures/pageserver/many_tenants.py @@ -47,9 +47,22 @@ def single_timeline( {TenantId(t.name) for t in (snapshot_dir.path.glob("pageserver_*/tenants/*"))} ) template_timeline = env.initial_timeline - - neon_env_builder.start() else: + if snapshot_dir.path.exists(): + shutil.rmtree(snapshot_dir.path) + + if save_snapshot and neon_env_builder.test_overlay_dir is not None: + # Make repo_dir an overlayfs mount with lowerdir being the empty snapshot_dir. + # When we're done filling up repo_dir, tear everything down, unmount the overlayfs, and use + # the upperdir as the snapshot. This is equivalent to docker `FROM scratch`. + assert not neon_env_builder.repo_dir.exists() + assert neon_env_builder.repo_dir.parent.exists() + snapshot_dir.path.mkdir() + neon_env_builder.overlay_mount( + "create-snapshot-repo-dir", snapshot_dir.path, neon_env_builder.repo_dir + ) + neon_env_builder.config_init_force = "empty-dir-ok" + env = neon_env_builder.init_start() remote_storage = env.pageserver_remote_storage @@ -88,7 +101,7 @@ def single_timeline( config=template_config.copy(), ) time.sleep(0.1) - wait_until_tenant_state(ps_http, tenant, "Broken", 3) + wait_until_tenant_state(ps_http, tenant, "Broken", 10) work_queue.do(22, tenants, attach_broken) @@ -100,7 +113,29 @@ def single_timeline( fixtures.pageserver.remote_storage.copy_all_remote_layer_files_to_local_tenant_dir( env, tenant_timelines ) - env.pageserver.start() + + if save_snapshot: + env.stop(immediate=True, ps_assert_metric_no_errors=True) + if neon_env_builder.test_overlay_dir is None: + log.info(f"take snapshot using shutil.copytree") + shutil.copytree(env.repo_dir, snapshot_dir.path) + else: + log.info(f"take snapshot by using overlayfs upperdir") + neon_env_builder.overlay_unmount_and_move( + "create-snapshot-repo-dir", snapshot_dir.path + ) + log.info("remove empty repo_dir (previously mountpoint) for snapshot overlay_mount") + env.repo_dir.rmdir() + # TODO from here on, we should be able to reset / goto top where snapshot_dir.is_initialized() + log.info(f"make repo_dir an overlayfs mount of the snapshot we just created") + neon_env_builder.overlay_mount( + "repo-dir-after-taking-snapshot", snapshot_dir.path, env.repo_dir + ) + snapshot_dir.set_initialized() + else: + log.info("skip taking snapshot") + + env.start() log.info(f"wait for tenants to become active") for tenant in tenants: @@ -113,14 +148,5 @@ def single_timeline( for layer in info.historic_layers: assert not layer.remote - # take snapshot after download all layers so tenant dir restoration is fast - # TODO: use overlayfs to make this step less costly; we'd implement half of docker at that point - if save_snapshot: - log.info(f"take snapshot") - shutil.copytree(env.repo_dir, snapshot_dir.path) - snapshot_dir.set_initialized() - else: - log.info("skip taking snapshot") - log.info("ready") return SingleTimeline(env, template_timeline, tenants)