diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 8994db8cf2..49857d5151 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -3446,6 +3446,12 @@ class Endpoint(PgProtocol, LogUtils): self.active_safekeepers: List[int] = list(map(lambda sk: sk.id, env.safekeepers)) # path to conf is /endpoints//pgdata/postgresql.conf + # This lock prevents concurrent start & stop operations, keeping `self.running` consistent + # with whether we're really running. Tests generally wouldn't try and do these concurrently, + # but endpoints are also stopped during test teardown, which might happen concurrently with + # destruction of objects in tests. + self.lock = threading.Lock() + def http_client( self, auth_token: Optional[str] = None, retries: Optional[Retry] = None ) -> EndpointHttpClient: @@ -3516,14 +3522,15 @@ class Endpoint(PgProtocol, LogUtils): log.info(f"Starting postgres endpoint {self.endpoint_id}") - self.env.neon_cli.endpoint_start( - self.endpoint_id, - safekeepers=self.active_safekeepers, - remote_ext_config=remote_ext_config, - pageserver_id=pageserver_id, - allow_multiple=allow_multiple, - ) - self.running = True + with self.lock: + self.env.neon_cli.endpoint_start( + self.endpoint_id, + safekeepers=self.active_safekeepers, + remote_ext_config=remote_ext_config, + pageserver_id=pageserver_id, + allow_multiple=allow_multiple, + ) + self.running = True return self @@ -3615,15 +3622,20 @@ class Endpoint(PgProtocol, LogUtils): def stop(self, mode: str = "fast") -> "Endpoint": """ Stop the Postgres instance if it's running. + + Because test teardown might try and stop an endpoint concurrently with test code + stopping the endpoint, this method is thread safe + Returns self. """ - if self.running: - assert self.endpoint_id is not None - self.env.neon_cli.endpoint_stop( - self.endpoint_id, check_return_code=self.check_stop_result, mode=mode - ) - self.running = False + with self.lock: + if self.running: + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, check_return_code=self.check_stop_result, mode=mode + ) + self.running = False return self @@ -3633,12 +3645,13 @@ class Endpoint(PgProtocol, LogUtils): Returns self. """ - assert self.endpoint_id is not None - self.env.neon_cli.endpoint_stop( - self.endpoint_id, True, check_return_code=self.check_stop_result, mode=mode - ) - self.endpoint_id = None - self.running = False + with self.lock: + assert self.endpoint_id is not None + self.env.neon_cli.endpoint_stop( + self.endpoint_id, True, check_return_code=self.check_stop_result, mode=mode + ) + self.endpoint_id = None + self.running = False return self