Merge branch 'main' into amasterov/regress-arm

2026-05-19 22:20:37 +00:00 · 2024-09-10 14:29:04 +02:00
parent b45560db75 26b5fcdc50
commit f8b9ec0dd0
149 changed files with 2062 additions and 1191 deletions
--- a/test_runner/README.md
+++ b/test_runner/README.md
@@ -18,8 +18,7 @@ Prerequisites:

 Regression tests are in the 'regress' directory. They can be run in
 parallel to minimize total runtime. Most regression test sets up their
-environment with its own pageservers and safekeepers (but see
-`TEST_SHARED_FIXTURES`).
+environment with its own pageservers and safekeepers.

 'pg_clients' contains tests for connecting with various client
 libraries. Each client test uses a Dockerfile that pulls an image that
@@ -74,7 +73,6 @@ This is used to construct full path to the postgres binaries.
 Format is 2-digit major version nubmer, i.e. `DEFAULT_PG_VERSION=16`
 `TEST_OUTPUT`: Set the directory where test state and test output files
 should go.
-`TEST_SHARED_FIXTURES`: Try to re-use a single pageserver for all the tests.
 `RUST_LOG`: logging configuration to pass into Neon CLI

 Useful parameters and commands:
@@ -259,11 +257,8 @@ compute Postgres nodes. The connections between them can be configured to use JW
 authentication tokens, and some other configuration options can be tweaked too.

 The easiest way to get access to a Neon Environment is by using the `neon_simple_env`
-fixture. The 'simple' env may be shared across multiple tests, so don't shut down the nodes
-or make other destructive changes in that environment. Also don't assume that
-there are no tenants or branches or data in the cluster. For convenience, there is a
-branch called `empty`, though. The convention is to create a test-specific branch of
-that and load any test data there, instead of the 'main' branch.
+fixture. For convenience, there is a branch called `main` in environments created with
+'neon_simple_env', ready to be used in the test.

 For more complicated cases, you can build a custom Neon Environment, with the `neon_env`
 fixture:
--- a/test_runner/fixtures/common_types.py
+++ b/test_runner/fixtures/common_types.py
@@ -140,6 +140,14 @@ class TenantId(Id):
        return self.id.hex()


+class NodeId(Id):
+    def __repr__(self) -> str:
+        return f'`NodeId("{self.id.hex()}")'
+
+    def __str__(self) -> str:
+        return self.id.hex()
+
+
 class TimelineId(Id):
    def __repr__(self) -> str:
        return f'TimelineId("{self.id.hex()}")'
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -57,12 +57,11 @@ from _pytest.fixtures import FixtureRequest
 from psycopg2.extensions import connection as PgConnection
 from psycopg2.extensions import cursor as PgCursor
 from psycopg2.extensions import make_dsn, parse_dsn
-from typing_extensions import Literal
 from urllib3.util.retry import Retry

 from fixtures import overlayfs
 from fixtures.broker import NeonBroker
-from fixtures.common_types import Lsn, TenantId, TenantShardId, TimelineId
+from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
 from fixtures.endpoint.http import EndpointHttpClient
 from fixtures.log_helper import log
 from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
@@ -221,33 +220,6 @@ def neon_api(neon_api_key: str, neon_api_base_url: str) -> NeonAPI:
    return NeonAPI(neon_api_key, neon_api_base_url)


-def shareable_scope(fixture_name: str, config: Config) -> Literal["session", "function"]:
-    """Return either session of function scope, depending on TEST_SHARED_FIXTURES envvar.
-
-    This function can be used as a scope like this:
-    @pytest.fixture(scope=shareable_scope)
-    def myfixture(...)
-       ...
-    """
-    scope: Literal["session", "function"]
-
-    if os.environ.get("TEST_SHARED_FIXTURES") is None:
-        # Create the environment in the per-test output directory
-        scope = "function"
-    elif (
-        os.environ.get("BUILD_TYPE") is not None
-        and os.environ.get("DEFAULT_PG_VERSION") is not None
-    ):
-        scope = "session"
-    else:
-        pytest.fail(
-            "Shared environment(TEST_SHARED_FIXTURES) requires BUILD_TYPE and DEFAULT_PG_VERSION to be set",
-            pytrace=False,
-        )
-
-    return scope
-
-
@pytest.fixture(scope="session")
 def worker_port_num():
    return (32768 - BASE_PORT) // int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "1"))
@@ -758,6 +730,9 @@ class NeonEnvBuilder:
        patch_script = ""
        for ps in self.env.pageservers:
            patch_script += f"UPDATE nodes SET listen_http_port={ps.service_port.http}, listen_pg_port={ps.service_port.pg}  WHERE node_id = '{ps.id}';"
+            # This is a temporary to get the backward compat test happy
+            # since the compat snapshot was generated with an older version of neon local
+            patch_script += f"UPDATE nodes SET availability_zone_id='{ps.az_id}'  WHERE node_id = '{ps.id}' AND availability_zone_id IS NULL;"
        patch_script_path.write_text(patch_script)

        # Update the config with info about tenants and timelines
@@ -1428,8 +1403,8 @@ class NeonEnv:
        return "ep-" + str(self.endpoint_counter)


-@pytest.fixture(scope=shareable_scope)
-def _shared_simple_env(
+@pytest.fixture(scope="function")
+def neon_simple_env(
    request: FixtureRequest,
    pytestconfig: Config,
    port_distributor: PortDistributor,
@@ -1447,19 +1422,13 @@ def _shared_simple_env(
    pageserver_io_buffer_alignment: Optional[int],
 ) -> Iterator[NeonEnv]:
    """
-    # Internal fixture backing the `neon_simple_env` fixture. If TEST_SHARED_FIXTURES
-     is set, this is shared by all tests using `neon_simple_env`.
+    Simple Neon environment, with no authentication and no safekeepers.

    This fixture will use RemoteStorageKind.LOCAL_FS with pageserver.
    """

-    if os.environ.get("TEST_SHARED_FIXTURES") is None:
-        # Create the environment in the per-test output directory
-        repo_dir = get_test_repo_dir(request, top_output_dir)
-    else:
-        # We're running shared fixtures. Share a single directory.
-        repo_dir = top_output_dir / "shared_repo"
-        shutil.rmtree(repo_dir, ignore_errors=True)
+    # Create the environment in the per-test output directory
+    repo_dir = get_test_repo_dir(request, top_output_dir)

    with NeonEnvBuilder(
        top_output_dir=top_output_dir,
@@ -1481,27 +1450,9 @@ def _shared_simple_env(
    ) as builder:
        env = builder.init_start()

-        # For convenience in tests, create a branch from the freshly-initialized cluster.
-        env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
-
        yield env


-@pytest.fixture(scope="function")
-def neon_simple_env(_shared_simple_env: NeonEnv) -> Iterator[NeonEnv]:
-    """
-    Simple Neon environment, with no authentication and no safekeepers.
-
-    If TEST_SHARED_FIXTURES environment variable is set, we reuse the same
-    environment for all tests that use 'neon_simple_env', keeping the
-    page server and safekeepers running. Any compute nodes are stopped after
-    each the test, however.
-    """
-    yield _shared_simple_env
-
-    _shared_simple_env.endpoints.stop_all()
-
-
@pytest.fixture(scope="function")
 def neon_env_builder(
    pytestconfig: Config,
@@ -2557,7 +2508,7 @@ class NeonStorageController(MetricsGetter, LogUtils):

    def tenant_describe(self, tenant_id: TenantId):
        """
-        :return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int}
+        :return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int, preferred_az_id: str}
        """
        response = self.request(
            "GET",
@@ -2567,6 +2518,30 @@ class NeonStorageController(MetricsGetter, LogUtils):
        response.raise_for_status()
        return response.json()

+    def nodes(self):
+        """
+        :return: list of {"id": ""}
+        """
+        response = self.request(
+            "GET",
+            f"{self.api}/control/v1/node",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+        response.raise_for_status()
+        return response.json()
+
+    def node_shards(self, node_id: NodeId):
+        """
+        :return: list of {"shard_id": "", "is_secondary": bool}
+        """
+        response = self.request(
+            "GET",
+            f"{self.api}/control/v1/node/{node_id}/shards",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+        response.raise_for_status()
+        return response.json()
+
    def tenant_shard_split(
        self, tenant_id: TenantId, shard_count: int, shard_stripe_size: Optional[int] = None
    ) -> list[TenantShardId]:
@@ -2883,6 +2858,17 @@ class NeonStorageController(MetricsGetter, LogUtils):
                return None
            raise e

+    def set_preferred_azs(self, preferred_azs: dict[TenantShardId, str]) -> list[TenantShardId]:
+        response = self.request(
+            "PUT",
+            f"{self.api}/control/v1/preferred_azs",
+            headers=self.headers(TokenScope.ADMIN),
+            json={str(tid): az for tid, az in preferred_azs.items()},
+        )
+
+        response.raise_for_status()
+        return [TenantShardId.parse(tid) for tid in response.json()["updated"]]
+
    def __enter__(self) -> "NeonStorageController":
        return self

@@ -4860,14 +4846,7 @@ SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile(  # type: ignore[type-arg]


 # This is autouse, so the test output directory always gets created, even
-# if a test doesn't put anything there. It also solves a problem with the
-# neon_simple_env fixture: if TEST_SHARED_FIXTURES is not set, it
-# creates the repo in the test output directory. But it cannot depend on
-# 'test_output_dir' fixture, because when TEST_SHARED_FIXTURES is not set,
-# it has 'session' scope and cannot access fixtures with 'function'
-# scope. So it uses the get_test_output_dir() function to get the path, and
-# this fixture ensures that the directory exists.  That works because
-# 'autouse' fixtures are run before other fixtures.
+# if a test doesn't put anything there.
 #
 # NB: we request the overlay dir fixture so the fixture does its cleanups
@pytest.fixture(scope="function", autouse=True)
--- a/test_runner/fixtures/safekeeper/http.py
+++ b/test_runner/fixtures/safekeeper/http.py
@@ -50,6 +50,19 @@ class SafekeeperMetrics(Metrics):
        ).value


+@dataclass
+class TermBumpResponse:
+    previous_term: int
+    current_term: int
+
+    @classmethod
+    def from_json(cls, d: Dict[str, Any]) -> "TermBumpResponse":
+        return TermBumpResponse(
+            previous_term=d["previous_term"],
+            current_term=d["current_term"],
+        )
+
+
 class SafekeeperHttpClient(requests.Session, MetricsGetter):
    HTTPError = requests.HTTPError

@@ -252,6 +265,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
        res.raise_for_status()
        return res.json()

+    def term_bump(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        term: Optional[int],
+    ) -> TermBumpResponse:
+        body = {}
+        if term is not None:
+            body["term"] = term
+        res = self.post(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/term_bump",
+            json=body,
+        )
+        res.raise_for_status()
+        return TermBumpResponse.from_json(res.json())
+
    def record_safekeeper_info(self, tenant_id: TenantId, timeline_id: TimelineId, body):
        res = self.post(
            f"http://localhost:{self.port}/v1/record_safekeeper_info/{tenant_id}/{timeline_id}",
--- a/test_runner/performance/test_logical_replication.py
+++ b/test_runner/performance/test_logical_replication.py
@@ -22,10 +22,8 @@ if TYPE_CHECKING:
 def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg):
    env = neon_simple_env

-    env.neon_cli.create_branch("test_logical_replication", "empty")
-    endpoint = env.endpoints.create_start("test_logical_replication")
+    endpoint = env.endpoints.create_start("main")

-    log.info("postgres is running on 'test_logical_replication' branch")
    pg_bin.run_capture(["pgbench", "-i", "-s10", endpoint.connstr()])

    endpoint.safe_psql("create publication pub1 for table pgbench_accounts, pgbench_history")
--- a/test_runner/regress/test_basebackup_error.py
+++ b/test_runner/regress/test_basebackup_error.py
@@ -8,11 +8,10 @@ from fixtures.neon_fixtures import NeonEnv
 #
 def test_basebackup_error(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_basebackup_error", "empty")
    pageserver_http = env.pageserver.http_client()

    # Introduce failpoint
    pageserver_http.configure_failpoints(("basebackup-before-control-file", "return"))

    with pytest.raises(Exception, match="basebackup-before-control-file"):
-        env.endpoints.create_start("test_basebackup_error")
+        env.endpoints.create_start("main")
--- a/test_runner/regress/test_clog_truncate.py
+++ b/test_runner/regress/test_clog_truncate.py
@@ -11,7 +11,6 @@ from fixtures.utils import query_scalar
 #
 def test_clog_truncate(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_clog_truncate", "empty")

    # set aggressive autovacuum to make sure that truncation will happen
    config = [
@@ -24,7 +23,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
        "autovacuum_freeze_max_age=100000",
    ]

-    endpoint = env.endpoints.create_start("test_clog_truncate", config_lines=config)
+    endpoint = env.endpoints.create_start("main", config_lines=config)

    # Install extension containing function needed for test
    endpoint.safe_psql("CREATE EXTENSION neon_test_utils")
@@ -58,7 +57,7 @@ def test_clog_truncate(neon_simple_env: NeonEnv):
    # create new branch after clog truncation and start a compute node on it
    log.info(f"create branch at lsn_after_truncation {lsn_after_truncation}")
    env.neon_cli.create_branch(
-        "test_clog_truncate_new", "test_clog_truncate", ancestor_start_lsn=lsn_after_truncation
+        "test_clog_truncate_new", "main", ancestor_start_lsn=lsn_after_truncation
    )
    endpoint2 = env.endpoints.create_start("test_clog_truncate_new")

--- a/test_runner/regress/test_compute_catalog.py
+++ b/test_runner/regress/test_compute_catalog.py
@@ -4,9 +4,8 @@ from fixtures.neon_fixtures import NeonEnv

 def test_compute_catalog(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_config", "empty")

-    endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
+    endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])
    client = endpoint.http_client()

    objects = client.dbs_and_roles()
--- a/test_runner/regress/test_config.py
+++ b/test_runner/regress/test_config.py
@@ -9,10 +9,9 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
 #
 def test_config(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_config", "empty")

    # change config
-    endpoint = env.endpoints.create_start("test_config", config_lines=["log_min_messages=debug1"])
+    endpoint = env.endpoints.create_start("main", config_lines=["log_min_messages=debug1"])

    with closing(endpoint.connect()) as conn:
        with conn.cursor() as cur:
--- a/test_runner/regress/test_createdropdb.py
+++ b/test_runner/regress/test_createdropdb.py
@@ -17,9 +17,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
    if env.pg_version == PgVersion.V14 and strategy == "wal_log":
        pytest.skip("wal_log strategy not supported on PostgreSQL 14")

-    env.neon_cli.create_branch("test_createdb", "empty")
-
-    endpoint = env.endpoints.create_start("test_createdb")
+    endpoint = env.endpoints.create_start("main")

    with endpoint.cursor() as cur:
        # Cause a 'relmapper' change in the original branch
@@ -33,7 +31,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
        lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create a branch
-    env.neon_cli.create_branch("test_createdb2", "test_createdb", ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch("test_createdb2", "main", ancestor_start_lsn=lsn)
    endpoint2 = env.endpoints.create_start("test_createdb2")

    # Test that you can connect to the new database on both branches
@@ -62,8 +60,7 @@ def test_createdb(neon_simple_env: NeonEnv, strategy: str):
 #
 def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_dropdb", "empty")
-    endpoint = env.endpoints.create_start("test_dropdb")
+    endpoint = env.endpoints.create_start("main")

    with endpoint.cursor() as cur:
        cur.execute("CREATE DATABASE foodb")
@@ -80,14 +77,10 @@ def test_dropdb(neon_simple_env: NeonEnv, test_output_dir):
        lsn_after_drop = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create two branches before and after database drop.
-    env.neon_cli.create_branch(
-        "test_before_dropdb", "test_dropdb", ancestor_start_lsn=lsn_before_drop
-    )
+    env.neon_cli.create_branch("test_before_dropdb", "main", ancestor_start_lsn=lsn_before_drop)
    endpoint_before = env.endpoints.create_start("test_before_dropdb")

-    env.neon_cli.create_branch(
-        "test_after_dropdb", "test_dropdb", ancestor_start_lsn=lsn_after_drop
-    )
+    env.neon_cli.create_branch("test_after_dropdb", "main", ancestor_start_lsn=lsn_after_drop)
    endpoint_after = env.endpoints.create_start("test_after_dropdb")

    # Test that database exists on the branch before drop
--- a/test_runner/regress/test_createuser.py
+++ b/test_runner/regress/test_createuser.py
@@ -7,8 +7,7 @@ from fixtures.utils import query_scalar
 #
 def test_createuser(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_createuser", "empty")
-    endpoint = env.endpoints.create_start("test_createuser")
+    endpoint = env.endpoints.create_start("main")

    with endpoint.cursor() as cur:
        # Cause a 'relmapper' change in the original branch
@@ -19,7 +18,7 @@ def test_createuser(neon_simple_env: NeonEnv):
        lsn = query_scalar(cur, "SELECT pg_current_wal_insert_lsn()")

    # Create a branch
-    env.neon_cli.create_branch("test_createuser2", "test_createuser", ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch("test_createuser2", "main", ancestor_start_lsn=lsn)
    endpoint2 = env.endpoints.create_start("test_createuser2")

    # Test that you can connect to new branch as a new user
--- a/test_runner/regress/test_ddl_forwarding.py
+++ b/test_runner/regress/test_ddl_forwarding.py
@@ -290,9 +290,8 @@ def assert_db_connlimit(endpoint: Any, db_name: str, connlimit: int, msg: str):
 # Here we test the latter. The first one is tested in test_ddl_forwarding
 def test_ddl_forwarding_invalid_db(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_ddl_forwarding_invalid_db", "empty")
    endpoint = env.endpoints.create_start(
-        "test_ddl_forwarding_invalid_db",
+        "main",
        # Some non-existent url
        config_lines=["neon.console_url=http://localhost:9999/unknown/api/v0/roles_and_databases"],
    )
--- a/test_runner/regress/test_explain_with_lfc_stats.py
+++ b/test_runner/regress/test_explain_with_lfc_stats.py
@@ -10,11 +10,9 @@ def test_explain_with_lfc_stats(neon_simple_env: NeonEnv):
    cache_dir = Path(env.repo_dir) / "file_cache"
    cache_dir.mkdir(exist_ok=True)

-    branchname = "test_explain_with_lfc_stats"
-    env.neon_cli.create_branch(branchname, "empty")
-    log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
+    log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
    endpoint = env.endpoints.create_start(
-        branchname,
+        "main",
        config_lines=[
            "shared_buffers='1MB'",
            f"neon.file_cache_path='{cache_dir}/file.cache'",
--- a/test_runner/regress/test_lfc_resize.py
+++ b/test_runner/regress/test_lfc_resize.py
@@ -16,9 +16,8 @@ from fixtures.neon_fixtures import NeonEnv, PgBin
@pytest.mark.timeout(600)
 def test_lfc_resize(neon_simple_env: NeonEnv, pg_bin: PgBin):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_lfc_resize", "empty")
    endpoint = env.endpoints.create_start(
-        "test_lfc_resize",
+        "main",
        config_lines=[
            "neon.file_cache_path='file.cache'",
            "neon.max_file_cache_size=512MB",
--- a/test_runner/regress/test_lfc_working_set_approximation.py
+++ b/test_runner/regress/test_lfc_working_set_approximation.py
@@ -12,11 +12,9 @@ def test_lfc_working_set_approximation(neon_simple_env: NeonEnv):
    cache_dir = Path(env.repo_dir) / "file_cache"
    cache_dir.mkdir(exist_ok=True)

-    branchname = "test_approximate_working_set_size"
-    env.neon_cli.create_branch(branchname, "empty")
-    log.info(f"Creating endopint with 1MB shared_buffers and 64 MB LFC for branch {branchname}")
+    log.info("Creating endpoint with 1MB shared_buffers and 64 MB LFC")
    endpoint = env.endpoints.create_start(
-        branchname,
+        "main",
        config_lines=[
            "shared_buffers='1MB'",
            f"neon.file_cache_path='{cache_dir}/file.cache'",
--- a/test_runner/regress/test_local_file_cache.py
+++ b/test_runner/regress/test_local_file_cache.py
@@ -5,7 +5,7 @@ import threading
 import time
 from typing import List

-from fixtures.neon_fixtures import DEFAULT_BRANCH_NAME, NeonEnvBuilder
+from fixtures.neon_fixtures import NeonEnvBuilder
 from fixtures.utils import query_scalar


@@ -15,11 +15,8 @@ def test_local_file_cache_unlink(neon_env_builder: NeonEnvBuilder):
    cache_dir = os.path.join(env.repo_dir, "file_cache")
    os.mkdir(cache_dir)

-    env.neon_cli.create_branch("empty", ancestor_branch_name=DEFAULT_BRANCH_NAME)
-    env.neon_cli.create_branch("test_local_file_cache_unlink", "empty")
-
    endpoint = env.endpoints.create_start(
-        "test_local_file_cache_unlink",
+        "main",
        config_lines=[
            "shared_buffers='1MB'",
            f"neon.file_cache_path='{cache_dir}/file.cache'",
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -36,10 +36,8 @@ def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
    env = neon_simple_env

    tenant_id = env.initial_tenant
-    timeline_id = env.neon_cli.create_branch("test_logical_replication", "empty")
-    endpoint = env.endpoints.create_start(
-        "test_logical_replication", config_lines=["log_statement=all"]
-    )
+    timeline_id = env.initial_timeline
+    endpoint = env.endpoints.create_start("main", config_lines=["log_statement=all"])

    pg_conn = endpoint.connect()
    cur = pg_conn.cursor()
@@ -185,10 +183,9 @@ def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):

    env = neon_simple_env

-    env.neon_cli.create_branch("test_logical_replication", "empty")
    # set low neon.logical_replication_max_snap_files
    endpoint = env.endpoints.create_start(
-        "test_logical_replication",
+        "main",
        config_lines=["log_statement=all", "neon.logical_replication_max_snap_files=1"],
    )

@@ -472,7 +469,7 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
 def test_replication_shutdown(neon_simple_env: NeonEnv):
    # Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
    env = neon_simple_env
-    env.neon_cli.create_branch("test_replication_shutdown_publisher", "empty")
+    env.neon_cli.create_branch("test_replication_shutdown_publisher", "main")
    pub = env.endpoints.create("test_replication_shutdown_publisher")

    env.neon_cli.create_branch("test_replication_shutdown_subscriber")
--- a/test_runner/regress/test_migrations.py
+++ b/test_runner/regress/test_migrations.py
@@ -9,9 +9,8 @@ if TYPE_CHECKING:

 def test_migrations(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_migrations", "empty")

-    endpoint = env.endpoints.create("test_migrations")
+    endpoint = env.endpoints.create("main")
    endpoint.respec(skip_pg_catalog_updates=False)
    endpoint.start()

--- a/test_runner/regress/test_multixact.py
+++ b/test_runner/regress/test_multixact.py
@@ -14,8 +14,7 @@ from fixtures.utils import query_scalar
 #
 def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_multixact", "empty")
-    endpoint = env.endpoints.create_start("test_multixact")
+    endpoint = env.endpoints.create_start("main")

    cur = endpoint.connect().cursor()
    cur.execute(
@@ -73,7 +72,9 @@ def test_multixact(neon_simple_env: NeonEnv, test_output_dir):
    assert int(next_multixact_id) > int(next_multixact_id_old)

    # Branch at this point
-    env.neon_cli.create_branch("test_multixact_new", "test_multixact", ancestor_start_lsn=lsn)
+    env.neon_cli.create_branch(
+        "test_multixact_new", ancestor_branch_name="main", ancestor_start_lsn=lsn
+    )
    endpoint_new = env.endpoints.create_start("test_multixact_new")

    next_multixact_id_new = endpoint_new.safe_psql(
--- a/test_runner/regress/test_neon_superuser.py
+++ b/test_runner/regress/test_neon_superuser.py
@@ -6,7 +6,7 @@ from fixtures.utils import wait_until

 def test_neon_superuser(neon_simple_env: NeonEnv, pg_version: PgVersion):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_neon_superuser_publisher", "empty")
+    env.neon_cli.create_branch("test_neon_superuser_publisher", "main")
    pub = env.endpoints.create("test_neon_superuser_publisher")

    env.neon_cli.create_branch("test_neon_superuser_subscriber")
--- a/test_runner/regress/test_parallel_copy.py
+++ b/test_runner/regress/test_parallel_copy.py
@@ -41,8 +41,7 @@ async def parallel_load_same_table(endpoint: Endpoint, n_parallel: int):
 # Load data into one table with COPY TO from 5 parallel connections
 def test_parallel_copy(neon_simple_env: NeonEnv, n_parallel=5):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_parallel_copy", "empty")
-    endpoint = env.endpoints.create_start("test_parallel_copy")
+    endpoint = env.endpoints.create_start("main")

    # Create test table
    conn = endpoint.connect()
--- a/test_runner/regress/test_pg_query_cancellation.py
+++ b/test_runner/regress/test_pg_query_cancellation.py
@@ -42,11 +42,9 @@ def test_cancellations(neon_simple_env: NeonEnv):
    ps_http = ps.http_client()
    ps_http.is_testing_enabled_or_skip()

-    env.neon_cli.create_branch("test_config", "empty")
-
    # We don't want to have any racy behaviour with autovacuum IOs
    ep = env.endpoints.create_start(
-        "test_config",
+        "main",
        config_lines=[
            "autovacuum = off",
            "shared_buffers = 128MB",
--- a/test_runner/regress/test_pg_waldump.py
+++ b/test_runner/regress/test_pg_waldump.py
@@ -22,8 +22,8 @@ def check_wal_segment(pg_waldump_path: str, segment_path: str, test_output_dir):
 def test_pg_waldump(neon_simple_env: NeonEnv, test_output_dir, pg_bin: PgBin):
    env = neon_simple_env
    tenant_id = env.initial_tenant
-    timeline_id = env.neon_cli.create_branch("test_pg_waldump", "empty")
-    endpoint = env.endpoints.create_start("test_pg_waldump")
+    timeline_id = env.initial_timeline
+    endpoint = env.endpoints.create_start("main")

    cur = endpoint.connect().cursor()
    cur.execute(
--- a/test_runner/regress/test_read_validation.py
+++ b/test_runner/regress/test_read_validation.py
@@ -15,12 +15,8 @@ extensions = ["pageinspect", "neon_test_utils", "pg_buffercache"]
 #
 def test_read_validation(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_read_validation", "empty")
-
-    endpoint = env.endpoints.create_start(
-        "test_read_validation",
-    )

+    endpoint = env.endpoints.create_start("main")
    with closing(endpoint.connect()) as con:
        with con.cursor() as c:
            for e in extensions:
@@ -131,13 +127,9 @@ def test_read_validation(neon_simple_env: NeonEnv):

 def test_read_validation_neg(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_read_validation_neg", "empty")
-
    env.pageserver.allowed_errors.append(".*invalid LSN\\(0\\) in request.*")

-    endpoint = env.endpoints.create_start(
-        "test_read_validation_neg",
-    )
+    endpoint = env.endpoints.create_start("main")

    with closing(endpoint.connect()) as con:
        with con.cursor() as c:
--- a/test_runner/regress/test_readonly_node.py
+++ b/test_runner/regress/test_readonly_node.py
@@ -22,8 +22,7 @@ from fixtures.utils import query_scalar
 #
 def test_readonly_node(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_readonly_node", "empty")
-    endpoint_main = env.endpoints.create_start("test_readonly_node")
+    endpoint_main = env.endpoints.create_start("main")

    env.pageserver.allowed_errors.extend(
        [
@@ -74,12 +73,12 @@ def test_readonly_node(neon_simple_env: NeonEnv):

    # Create first read-only node at the point where only 100 rows were inserted
    endpoint_hundred = env.endpoints.create_start(
-        branch_name="test_readonly_node", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
+        branch_name="main", endpoint_id="ep-readonly_node_hundred", lsn=lsn_a
    )

    # And another at the point where 200100 rows were inserted
    endpoint_more = env.endpoints.create_start(
-        branch_name="test_readonly_node", endpoint_id="ep-readonly_node_more", lsn=lsn_b
+        branch_name="main", endpoint_id="ep-readonly_node_more", lsn=lsn_b
    )

    # On the 'hundred' node, we should see only 100 rows
@@ -100,7 +99,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):

    # Check creating a node at segment boundary
    endpoint = env.endpoints.create_start(
-        branch_name="test_readonly_node",
+        branch_name="main",
        endpoint_id="ep-branch_segment_boundary",
        lsn=Lsn("0/3000000"),
    )
@@ -112,7 +111,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
    with pytest.raises(Exception, match="invalid basebackup lsn"):
        # compute node startup with invalid LSN should fail
        env.endpoints.create_start(
-            branch_name="test_readonly_node",
+            branch_name="main",
            endpoint_id="ep-readonly_node_preinitdb",
            lsn=Lsn("0/42"),
        )
@@ -218,14 +217,10 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
 # Similar test, but with more data, and we force checkpoints
 def test_timetravel(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    pageserver_http_client = env.pageserver.http_client()
-    env.neon_cli.create_branch("test_timetravel", "empty")
-    endpoint = env.endpoints.create_start("test_timetravel")
-
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
    client = env.pageserver.http_client()
-
-    tenant_id = endpoint.safe_psql("show neon.tenant_id")[0][0]
-    timeline_id = endpoint.safe_psql("show neon.timeline_id")[0][0]
+    endpoint = env.endpoints.create_start("main")

    lsns = []

@@ -249,7 +244,7 @@ def test_timetravel(neon_simple_env: NeonEnv):
        wait_for_last_record_lsn(client, tenant_id, timeline_id, current_lsn)

        # run checkpoint manually to force a new layer file
-        pageserver_http_client.timeline_checkpoint(tenant_id, timeline_id)
+        client.timeline_checkpoint(tenant_id, timeline_id)

    ##### Restart pageserver
    env.endpoints.stop_all()
@@ -258,7 +253,7 @@ def test_timetravel(neon_simple_env: NeonEnv):

    for i, lsn in lsns:
        endpoint_old = env.endpoints.create_start(
-            branch_name="test_timetravel", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
+            branch_name="main", endpoint_id=f"ep-old_lsn_{i}", lsn=lsn
        )
        with endpoint_old.cursor() as cur:
            assert query_scalar(cur, f"select count(*) from testtab where iteration={i}") == 100000
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -1552,6 +1552,12 @@ def test_tenant_import(neon_env_builder: NeonEnvBuilder, shard_count, remote_sto
    literal_shard_count = 1 if shard_count is None else shard_count
    assert len(describe["shards"]) == literal_shard_count

+    nodes = env.storage_controller.nodes()
+    assert len(nodes) == 2
+    describe1 = env.storage_controller.node_shards(nodes[0]["id"])
+    describe2 = env.storage_controller.node_shards(nodes[1]["id"])
+    assert len(describe1["shards"]) + len(describe2["shards"]) == literal_shard_count
+
    # Check the data is still there: this implicitly proves that we recovered generation numbers
    # properly, for the timeline which was written to after a generation bump.
    for timeline, branch, expect_rows in [
@@ -2512,3 +2518,55 @@ def eq_safekeeper_records(a: dict[str, Any], b: dict[str, Any]) -> bool:
                del d[key]

    return compared[0] == compared[1]
+
+
+@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
+def test_shard_preferred_azs(neon_env_builder: NeonEnvBuilder):
+    def assign_az(ps_cfg):
+        az = f"az-{ps_cfg['id']}"
+        ps_cfg["availability_zone"] = az
+
+    neon_env_builder.pageserver_config_override = assign_az
+    neon_env_builder.num_pageservers = 2
+    env = neon_env_builder.init_configs()
+    env.start()
+
+    tids = [TenantId.generate() for _ in range(0, 3)]
+    for tid in tids:
+        env.storage_controller.tenant_create(tid)
+
+        shards = env.storage_controller.tenant_describe(tid)["shards"]
+        assert len(shards) == 1
+        attached_to = shards[0]["node_attached"]
+        expected_az = env.get_pageserver(attached_to).az_id
+
+        assert shards[0]["preferred_az_id"] == expected_az
+
+    updated = env.storage_controller.set_preferred_azs(
+        {TenantShardId(tid, 0, 0): "foo" for tid in tids}
+    )
+
+    assert set(updated) == set([TenantShardId(tid, 0, 0) for tid in tids])
+
+    for tid in tids:
+        shards = env.storage_controller.tenant_describe(tid)["shards"]
+        assert len(shards) == 1
+        assert shards[0]["preferred_az_id"] == "foo"
+
+    # Generate a layer to avoid shard split handling on ps from tripping
+    # up on debug assert.
+    timeline_id = TimelineId.generate()
+    env.neon_cli.create_timeline("bar", tids[0], timeline_id)
+
+    workload = Workload(env, tids[0], timeline_id, branch_name="bar")
+    workload.init()
+    workload.write_rows(256)
+    workload.validate()
+
+    env.storage_controller.tenant_shard_split(tids[0], shard_count=2)
+    shards = env.storage_controller.tenant_describe(tids[0])["shards"]
+    assert len(shards) == 2
+    for shard in shards:
+        attached_to = shard["node_attached"]
+        expected_az = env.get_pageserver(attached_to).az_id
+        assert shard["preferred_az_id"] == expected_az
--- a/test_runner/regress/test_subxacts.py
+++ b/test_runner/regress/test_subxacts.py
@@ -9,8 +9,7 @@ from fixtures.neon_fixtures import NeonEnv, check_restored_datadir_content
 # CLOG.
 def test_subxacts(neon_simple_env: NeonEnv, test_output_dir):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_subxacts", "empty")
-    endpoint = env.endpoints.create_start("test_subxacts")
+    endpoint = env.endpoints.create_start("main")

    pg_conn = endpoint.connect()
    cur = pg_conn.cursor()
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -68,10 +68,13 @@ def test_timeline_delete(neon_simple_env: NeonEnv):

    # construct pair of branches to validate that pageserver prohibits
    # deletion of ancestor timelines when they have child branches
-    parent_timeline_id = env.neon_cli.create_branch("test_ancestor_branch_delete_parent", "empty")
+    parent_timeline_id = env.neon_cli.create_branch(
+        new_branch_name="test_ancestor_branch_delete_parent", ancestor_branch_name="main"
+    )

    leaf_timeline_id = env.neon_cli.create_branch(
-        "test_ancestor_branch_delete_branch1", "test_ancestor_branch_delete_parent"
+        new_branch_name="test_ancestor_branch_delete_branch1",
+        ancestor_branch_name="test_ancestor_branch_delete_parent",
    )

    timeline_path = env.pageserver.timeline_dir(env.initial_tenant, parent_timeline_id)
--- a/test_runner/regress/test_timeline_gc_blocking.py
+++ b/test_runner/regress/test_timeline_gc_blocking.py
@@ -1,17 +1,32 @@
 import time
+from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
+from typing import List, Optional

+import pytest
+from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
+    LogCursor,
    NeonEnvBuilder,
+    NeonPageserver,
 )
 from fixtures.pageserver.utils import wait_timeline_detail_404


-def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize("sharded", [True, False])
+def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool):
+    neon_env_builder.num_pageservers = 2 if sharded else 1
    env = neon_env_builder.init_start(
-        initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"}
+        initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"},
+        initial_tenant_shard_count=2 if sharded else None,
    )
-    ps = env.pageserver
-    http = ps.http_client()
+
+    if sharded:
+        http = env.storage_controller.pageserver_api()
+    else:
+        http = env.pageserver.http_client()
+
+    pss = ManyPageservers(list(map(lambda ps: ScrollableLog(ps, None), env.pageservers)))

    foo_branch = env.neon_cli.create_branch("foo", "main", env.initial_tenant)

@@ -22,9 +37,8 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
    tenant_before = http.tenant_status(env.initial_tenant)

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_active_line)
-
-    assert ps.log_contains(gc_skipped_line, offset) is None
+    pss.assert_log_contains(gc_active_line)
+    pss.assert_log_does_not_contain(gc_skipped_line)

    http.timeline_block_gc(env.initial_tenant, foo_branch)

@@ -34,34 +48,78 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder):
    assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }"

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_skipped_line, offset)
+    pss.assert_log_contains(gc_skipped_line)

-    ps.restart()
-    ps.quiesce_tenants()
+    pss.restart()
+    pss.quiesce_tenants()

-    _, offset = env.pageserver.assert_log_contains(init_gc_skipped, offset)
+    pss.assert_log_contains(init_gc_skipped)

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_skipped_line, offset)
+    pss.assert_log_contains(gc_skipped_line)

    # deletion unblocks gc
    http.timeline_delete(env.initial_tenant, foo_branch)
    wait_timeline_detail_404(http, env.initial_tenant, foo_branch, 10, 1.0)

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_active_line, offset)
+    pss.assert_log_contains(gc_active_line)

    http.timeline_block_gc(env.initial_tenant, env.initial_timeline)

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_skipped_line, offset)
+    pss.assert_log_contains(gc_skipped_line)

    # removing the manual block also unblocks gc
    http.timeline_unblock_gc(env.initial_tenant, env.initial_timeline)

    wait_for_another_gc_round()
-    _, offset = ps.assert_log_contains(gc_active_line, offset)
+    pss.assert_log_contains(gc_active_line)


 def wait_for_another_gc_round():
    time.sleep(2)
+
+
+@dataclass
+class ScrollableLog:
+    pageserver: NeonPageserver
+    offset: Optional[LogCursor]
+
+    def assert_log_contains(self, what: str):
+        msg, offset = self.pageserver.assert_log_contains(what, offset=self.offset)
+        old = self.offset
+        self.offset = offset
+        log.info(f"{old} -> {offset}: {msg}")
+
+    def assert_log_does_not_contain(self, what: str):
+        assert self.pageserver.log_contains(what) is None
+
+
+@dataclass(frozen=True)
+class ManyPageservers:
+    many: List[ScrollableLog]
+
+    def assert_log_contains(self, what: str):
+        for one in self.many:
+            one.assert_log_contains(what)
+
+    def assert_log_does_not_contain(self, what: str):
+        for one in self.many:
+            one.assert_log_does_not_contain(what)
+
+    def restart(self):
+        def do_restart(x: ScrollableLog):
+            x.pageserver.restart()
+
+        with ThreadPoolExecutor(max_workers=len(self.many)) as rt:
+            rt.map(do_restart, self.many)
+            rt.shutdown(wait=True)
+
+    def quiesce_tenants(self):
+        def do_quiesce(x: ScrollableLog):
+            x.pageserver.quiesce_tenants()
+
+        with ThreadPoolExecutor(max_workers=len(self.many)) as rt:
+            rt.map(do_quiesce, self.many)
+            rt.shutdown(wait=True)
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -36,7 +36,7 @@ from fixtures.utils import get_timeline_dir_size, wait_until

 def test_timeline_size(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "empty")
+    new_timeline_id = env.neon_cli.create_branch("test_timeline_size", "main")

    client = env.pageserver.http_client()
    client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
@@ -68,7 +68,7 @@ def test_timeline_size(neon_simple_env: NeonEnv):

 def test_timeline_size_createdropdb(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "empty")
+    new_timeline_id = env.neon_cli.create_branch("test_timeline_size_createdropdb", "main")

    client = env.pageserver.http_client()
    client.timeline_wait_logical_size(env.initial_tenant, new_timeline_id)
--- a/test_runner/regress/test_twophase.py
+++ b/test_runner/regress/test_twophase.py
@@ -9,10 +9,7 @@ from fixtures.neon_fixtures import NeonEnv, fork_at_current_lsn
 #
 def test_twophase(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_twophase", "empty")
-    endpoint = env.endpoints.create_start(
-        "test_twophase", config_lines=["max_prepared_transactions=5"]
-    )
+    endpoint = env.endpoints.create_start("main", config_lines=["max_prepared_transactions=5"])

    conn = endpoint.connect()
    cur = conn.cursor()
@@ -56,7 +53,7 @@ def test_twophase(neon_simple_env: NeonEnv):
    assert len(twophase_files) == 2

    # Create a branch with the transaction in prepared state
-    fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "test_twophase")
+    fork_at_current_lsn(env, endpoint, "test_twophase_prepared", "main")

    # Start compute on the new branch
    endpoint2 = env.endpoints.create_start(
--- a/test_runner/regress/test_unlogged.py
+++ b/test_runner/regress/test_unlogged.py
@@ -9,8 +9,7 @@ from fixtures.pg_version import PgVersion
 #
 def test_unlogged(neon_simple_env: NeonEnv):
    env = neon_simple_env
-    env.neon_cli.create_branch("test_unlogged", "empty")
-    endpoint = env.endpoints.create_start("test_unlogged")
+    endpoint = env.endpoints.create_start("main")

    conn = endpoint.connect()
    cur = conn.cursor()
@@ -22,7 +21,7 @@ def test_unlogged(neon_simple_env: NeonEnv):
    cur.execute("INSERT INTO iut (id) values (42);")

    # create another compute to fetch inital empty contents from pageserver
-    fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "test_unlogged")
+    fork_at_current_lsn(env, endpoint, "test_unlogged_basebackup", "main")
    endpoint2 = env.endpoints.create_start("test_unlogged_basebackup")

    conn2 = endpoint2.connect()
--- a/test_runner/regress/test_vm_bits.py
+++ b/test_runner/regress/test_vm_bits.py
@@ -13,8 +13,7 @@ from fixtures.utils import query_scalar
 def test_vm_bit_clear(neon_simple_env: NeonEnv):
    env = neon_simple_env

-    env.neon_cli.create_branch("test_vm_bit_clear", "empty")
-    endpoint = env.endpoints.create_start("test_vm_bit_clear")
+    endpoint = env.endpoints.create_start("main")

    pg_conn = endpoint.connect()
    cur = pg_conn.cursor()
@@ -58,7 +57,7 @@ def test_vm_bit_clear(neon_simple_env: NeonEnv):
    cur.execute("UPDATE vmtest_cold_update2 SET id = 5000, filler=repeat('x', 200) WHERE id = 1")

    # Branch at this point, to test that later
-    fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "test_vm_bit_clear")
+    fork_at_current_lsn(env, endpoint, "test_vm_bit_clear_new", "main")

    # Clear the buffer cache, to force the VM page to be re-fetched from
    # the page server
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1057,6 +1057,24 @@ def test_restart_endpoint(neon_env_builder: NeonEnvBuilder):
        endpoint.start()


+# Try restarting endpoint immediately after xlog switch.
+# https://github.com/neondatabase/neon/issues/8911
+def test_restart_endpoint_after_switch_wal(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+
+    endpoint = env.endpoints.create_start("main")
+
+    endpoint.safe_psql("create table t (i int)")
+
+    endpoint.safe_psql("SELECT pg_switch_wal()")
+
+    # we want immediate shutdown to have endpoint restart on xlog switch record,
+    # so prevent shutdown checkpoint.
+    endpoint.stop(mode="immediate")
+    endpoint = env.endpoints.create_start("main")
+    endpoint.safe_psql("SELECT 'works'")
+
+
 # Context manager which logs passed time on exit.
 class DurationLogger:
    def __init__(self, desc):
@@ -2176,6 +2194,43 @@ def test_patch_control_file(neon_env_builder: NeonEnvBuilder):
    assert res["timelines"][0]["control_file"]["timeline_start_lsn"] == "0/1"


+def test_term_bump(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_safekeepers = 1
+    env = neon_env_builder.init_start()
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    endpoint = env.endpoints.create_start("main")
+    # initialize safekeeper
+    endpoint.safe_psql("create table t(key int, value text)")
+
+    http_cli = env.safekeepers[0].http_client()
+
+    # check that bump up to specific term works
+    curr_term = http_cli.timeline_status(tenant_id, timeline_id).term
+    bump_to = curr_term + 3
+    res = http_cli.term_bump(tenant_id, timeline_id, bump_to)
+    log.info(f"bump to {bump_to} res: {res}")
+    assert res.current_term >= bump_to
+
+    # check that bump to none increments current term
+    res = http_cli.term_bump(tenant_id, timeline_id, None)
+    log.info(f"bump to None res: {res}")
+    assert res.current_term > bump_to
+    assert res.current_term > res.previous_term
+
+    # check that bumping doesn't work downward
+    res = http_cli.term_bump(tenant_id, timeline_id, 2)
+    log.info(f"bump to 2 res: {res}")
+    assert res.current_term > bump_to
+    assert res.current_term == res.previous_term
+
+    # check that this doesn't kill endpoint because last WAL flush was his and
+    # thus its basebackup is still good
+    endpoint.safe_psql("insert into t values (1, 'payload')")
+
+
 # Test disables periodic pushes from safekeeper to the broker and checks that
 # pageserver can still discover safekeepers with discovery requests.
 def test_broker_discovery(neon_env_builder: NeonEnvBuilder):
--- a/test_runner/test_broken.py
+++ b/test_runner/test_broken.py
@@ -23,8 +23,7 @@ run_broken = pytest.mark.skipif(
 def test_broken(neon_simple_env: NeonEnv, pg_bin):
    env = neon_simple_env

-    env.neon_cli.create_branch("test_broken", "empty")
-    env.endpoints.create_start("test_broken")
+    env.endpoints.create_start("main")
    log.info("postgres is running")

    log.info("THIS NEXT COMMAND WILL FAIL:")