diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4d7c79535d..92dd1b532d 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1488,6 +1488,17 @@ class NeonPageserver(PgProtocol): self.running = True return self + def _wait_for_death(self): + """Wait for pageserver to die. Assumes kill signal is sent.""" + pid_path = pathlib.Path(self.env.repo_dir) / "pageserver.pid" + pid = read_pid(pid_path) + retries_left = 20 + while check_pid(pid): + time.sleep(0.1) + retries_left -= 1 + if retries_left == 0: + raise AssertionError("Pageserver failed to die") + def stop(self, immediate=False) -> 'NeonPageserver': """ Stop the page server. @@ -1495,10 +1506,7 @@ class NeonPageserver(PgProtocol): """ if self.running: self.env.neon_cli.pageserver_stop(immediate) - # HACK This fixes https://github.com/neondatabase/neon/issues/2247 - # in most cases, but we should probably wait on some event rather - # than wait 0.1 seconds. - time.sleep(0.1) + self._wait_for_death() self.running = False return self @@ -2008,6 +2016,17 @@ def read_pid(path: Path) -> int: return int(path.read_text()) +def check_pid(pid): + """Check whether pid is running.""" + try: + # If sig is 0, then no signal is sent, but error checking is still performed. + os.kill(pid, 0) + except OSError: + return False + else: + return True + + @dataclass class SafekeeperPort: pg: int