In python tests, skip ports that are already in use.

We've seen some failures with "Address already in use" errors in the tests. It's not clear why, perhaps some server processes are not cleaned up properly after test, or maybe the socket is still in TIME_WAIT state. In any case, let's make the tests more robust by checking that the port is free, before trying to use it.
2026-01-05 20:42:54 +00:00 · 2021-10-27 00:46:24 +03:00
parent d88377f9f0
commit 41d48719e1
1 changed files with 28 additions and 3 deletions
--- a/test_runner/fixtures/zenith_fixtures.py
+++ b/test_runner/fixtures/zenith_fixtures.py
@@ -12,6 +12,7 @@ import psycopg2
 import pytest
 import shutil
 import signal
+import socket
 import subprocess
 import time
 import filecmp
@@ -157,14 +158,38 @@ def worker_base_port(worker_seq_no: int):
    return BASE_PORT + worker_seq_no * WORKER_PORT_NUM


+def can_bind(host: str, port: int) -> bool:
+    """
+    Check whether a host:port is available to bind for listening
+
+    Inspired by the can_bind() perl function used in Postgres tests, in
+    vendor/postgres/src/test/perl/PostgresNode.pm
+    """
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+        # TODO: The pageserver and safekeepers don't use SO_REUSEADDR at the
+        # moment. If that changes, we should use start using SO_REUSEADDR here
+        # too, to allow reusing ports more quickly.
+        # See https://github.com/zenithdb/zenith/issues/801
+        #sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+
+        try:
+            sock.bind((host, port))
+            sock.listen()
+            return True
+        except socket.error:
+            log.info(f"Port {port} is in use, skipping")
+            return False
+
+
 class PortDistributor:
    def __init__(self, base_port: int, port_number: int) -> None:
        self.iterator = iter(range(base_port, base_port + port_number))

    def get_port(self) -> int:
-        try:
-            return next(self.iterator)
-        except StopIteration:
+        for port in self.iterator:
+            if can_bind("localhost", port):
+                return port
+        else:
            raise RuntimeError(
                'port range configured for test is exhausted, consider enlarging the range')