Merge branch 'main' into devin/1745492468-add-dev-flag-pr11517

2026-05-18 13:40:37 +00:00 · 2025-06-26 07:32:08 -07:00
parent e8c39d260a 605fb04f89
commit 0618845bbb
425 changed files with 12213 additions and 5436 deletions
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -24,7 +24,7 @@ The value to place in the `aud` claim.

@final
 class ComputeClaimsScope(StrEnum):
-    ADMIN = "admin"
+    ADMIN = "compute_ctl:admin"


@final
@@ -69,15 +69,17 @@ class EndpointHttpClient(requests.Session):
        json: dict[str, str] = res.json()
        return json

-    def prewarm_lfc(self):
-        self.post(f"http://localhost:{self.external_port}/lfc/prewarm").raise_for_status()
+    def prewarm_lfc(self, from_endpoint_id: str | None = None):
+        url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
+        params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
+        self.post(url, params=params).raise_for_status()

        def prewarmed():
            json = self.prewarm_lfc_status()
            status, err = json["status"], json.get("error")
            assert status == "completed", f"{status}, error {err}"

-        wait_until(prewarmed)
+        wait_until(prewarmed, timeout=60)

    def offload_lfc(self):
        url = f"http://localhost:{self.external_port}/lfc/offload"
--- a/test_runner/fixtures/neon_api.py
+++ b/test_runner/fixtures/neon_api.py
@@ -129,6 +129,18 @@ class NeonAPI:

        return cast("dict[str, Any]", resp.json())

+    def get_project_limits(self, project_id: str) -> dict[str, Any]:
+        resp = self.__request(
+            "GET",
+            f"/projects/{project_id}/limits",
+            headers={
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            },
+        )
+
+        return cast("dict[str, Any]", resp.json())
+
    def delete_project(
        self,
        project_id: str,
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -497,6 +497,7 @@ class NeonLocalCli(AbstractNeonCli):
        tenant_id: TenantId,
        pg_version: PgVersion,
        endpoint_id: str | None = None,
+        grpc: bool | None = None,
        hot_standby: bool = False,
        lsn: Lsn | None = None,
        pageserver_id: int | None = None,
@@ -521,6 +522,8 @@ class NeonLocalCli(AbstractNeonCli):
            args.extend(["--external-http-port", str(external_http_port)])
        if internal_http_port is not None:
            args.extend(["--internal-http-port", str(internal_http_port)])
+        if grpc:
+            args.append("--grpc")
        if endpoint_id is not None:
            args.append(endpoint_id)
        if hot_standby:
@@ -564,6 +567,7 @@ class NeonLocalCli(AbstractNeonCli):
        basebackup_request_tries: int | None = None,
        timeout: str | None = None,
        env: dict[str, str] | None = None,
+        dev: bool = False,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
@@ -589,6 +593,8 @@ class NeonLocalCli(AbstractNeonCli):
            args.extend(["--create-test-user"])
        if timeout is not None:
            args.extend(["--start-timeout", str(timeout)])
+        if dev:
+            args.extend(["--dev"])

        res = self.raw_cli(args, extra_env_vars)
        res.check_returncode()
@@ -617,7 +623,7 @@ class NeonLocalCli(AbstractNeonCli):
        destroy=False,
        check_return_code=True,
        mode: str | None = None,
-    ) -> subprocess.CompletedProcess[str]:
+    ) -> tuple[Lsn | None, subprocess.CompletedProcess[str]]:
        args = [
            "endpoint",
            "stop",
@@ -629,7 +635,11 @@ class NeonLocalCli(AbstractNeonCli):
        if endpoint_id is not None:
            args.append(endpoint_id)

-        return self.raw_cli(args, check_return_code=check_return_code)
+        proc = self.raw_cli(args, check_return_code=check_return_code)
+        log.debug(f"endpoint stop stdout: {proc.stdout}")
+        lsn_str = proc.stdout.split()[-1]
+        lsn: Lsn | None = None if lsn_str == "null" else Lsn(lsn_str)
+        return lsn, proc

    def mappings_map_branch(
        self, name: str, tenant_id: TenantId, timeline_id: TimelineId
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -453,6 +453,7 @@ class NeonEnvBuilder:
        pageserver_get_vectored_concurrent_io: str | None = None,
        pageserver_tracing_config: PageserverTracingConfig | None = None,
        pageserver_import_config: PageserverImportConfig | None = None,
+        storcon_kick_secondary_downloads: bool | None = None,
    ):
        self.repo_dir = repo_dir
        self.rust_log_override = rust_log_override
@@ -489,7 +490,9 @@ class NeonEnvBuilder:
        self.config_init_force: str | None = None
        self.top_output_dir = top_output_dir
        self.control_plane_hooks_api: str | None = None
-        self.storage_controller_config: dict[Any, Any] | None = None
+        self.storage_controller_config: dict[Any, Any] | None = {
+            "timelines_onto_safekeepers": True,
+        }

        # Flag to enable https listener in pageserver, generate local ssl certs,
        # and force storage controller to use https for pageserver api.
@@ -512,6 +515,8 @@ class NeonEnvBuilder:
        self.pageserver_tracing_config = pageserver_tracing_config
        self.pageserver_import_config = pageserver_import_config

+        self.storcon_kick_secondary_downloads = storcon_kick_secondary_downloads
+
        self.pageserver_default_tenant_config_compaction_algorithm: dict[str, Any] | None = (
            pageserver_default_tenant_config_compaction_algorithm
        )
@@ -1219,6 +1224,14 @@ class NeonEnv:
            else:
                cfg["storage_controller"] = {"use_local_compute_notifications": False}

+        if config.storcon_kick_secondary_downloads is not None:
+            # Configure whether storage controller should actively kick off secondary downloads
+            if "storage_controller" not in cfg:
+                cfg["storage_controller"] = {}
+            cfg["storage_controller"]["kick_secondary_downloads"] = (
+                config.storcon_kick_secondary_downloads
+            )
+
        # Create config for pageserver
        http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
        pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -1228,6 +1241,7 @@ class NeonEnv:
        ):
            pageserver_port = PageserverPort(
                pg=self.port_distributor.get_port(),
+                grpc=self.port_distributor.get_port(),
                http=self.port_distributor.get_port(),
                https=self.port_distributor.get_port() if config.use_https_pageserver_api else None,
            )
@@ -1243,13 +1257,14 @@ class NeonEnv:
            ps_cfg: dict[str, Any] = {
                "id": ps_id,
                "listen_pg_addr": f"localhost:{pageserver_port.pg}",
+                "listen_grpc_addr": f"localhost:{pageserver_port.grpc}",
                "listen_http_addr": f"localhost:{pageserver_port.http}",
                "listen_https_addr": f"localhost:{pageserver_port.https}"
                if config.use_https_pageserver_api
                else None,
                "pg_auth_type": pg_auth_type,
-                "http_auth_type": http_auth_type,
                "grpc_auth_type": grpc_auth_type,
+                "http_auth_type": http_auth_type,
                "availability_zone": availability_zone,
                # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids
                # the pageserver taking a long time to start up due to syncfs flushing other tests' data
@@ -1762,6 +1777,7 @@ def neon_env_builder(
@dataclass
 class PageserverPort:
    pg: int
+    grpc: int
    http: int
    https: int | None = None

@@ -2054,6 +2070,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
            headers=self.headers(TokenScope.ADMIN),
        )

+    def tombstone_delete(self, node_id):
+        log.info(f"tombstone_delete({node_id})")
+        self.request(
+            "DELETE",
+            f"{self.api}/debug/v1/tombstone/{node_id}",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+
    def node_drain(self, node_id):
        log.info(f"node_drain({node_id})")
        self.request(
@@ -2110,6 +2134,14 @@ class NeonStorageController(MetricsGetter, LogUtils):
        )
        return response.json()

+    def tombstone_list(self):
+        response = self.request(
+            "GET",
+            f"{self.api}/debug/v1/tombstone",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+        return response.json()
+
    def tenant_shard_dump(self):
        """
        Debug listing API: dumps the internal map of tenant shards
@@ -2207,6 +2239,17 @@ class NeonStorageController(MetricsGetter, LogUtils):
        shards: list[dict[str, Any]] = body["shards"]
        return shards

+    def timeline_locate(self, tenant_id: TenantId, timeline_id: TimelineId):
+        """
+        :return: dict {"generation": int, "sk_set": [int], "new_sk_set": [int]}
+        """
+        response = self.request(
+            "GET",
+            f"{self.api}/debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate",
+            headers=self.headers(TokenScope.ADMIN),
+        )
+        return response.json()
+
    def tenant_describe(self, tenant_id: TenantId):
        """
        :return: list of {"shard_id": "", "node_id": int, "listen_pg_addr": str, "listen_pg_port": int, "listen_http_addr: str, "listen_http_port: int, preferred_az_id: str}
@@ -2333,6 +2376,7 @@ class NeonStorageController(MetricsGetter, LogUtils):
        delay_max = max_interval
        while n > 0:
            n = self.reconcile_all()
+
            if n == 0:
                break
            elif time.time() - start_at > timeout_secs:
@@ -4030,6 +4074,16 @@ def static_proxy(
        "CREATE TABLE neon_control_plane.endpoints (endpoint_id VARCHAR(255) PRIMARY KEY, allowed_ips VARCHAR(255))"
    )

+    vanilla_pg.stop()
+    vanilla_pg.edit_hba(
+        [
+            "local all all              trust",
+            "host  all all 127.0.0.1/32 scram-sha-256",
+            "host  all all ::1/128      scram-sha-256",
+        ]
+    )
+    vanilla_pg.start()
+
    proxy_port = port_distributor.get_port()
    mgmt_port = port_distributor.get_port()
    http_port = port_distributor.get_port()
@@ -4155,6 +4209,8 @@ class Endpoint(PgProtocol, LogUtils):
        self._running = threading.Semaphore(0)
        self.__jwt: str | None = None

+        self.terminate_flush_lsn: Lsn | None = None
+
    def http_client(self, retries: Retry | None = None) -> EndpointHttpClient:
        assert self.__jwt is not None
        return EndpointHttpClient(
@@ -4167,6 +4223,7 @@ class Endpoint(PgProtocol, LogUtils):
        self,
        branch_name: str,
        endpoint_id: str | None = None,
+        grpc: bool | None = None,
        hot_standby: bool = False,
        lsn: Lsn | None = None,
        config_lines: list[str] | None = None,
@@ -4191,6 +4248,7 @@ class Endpoint(PgProtocol, LogUtils):
            endpoint_id=self.endpoint_id,
            tenant_id=self.tenant_id,
            lsn=lsn,
+            grpc=grpc,
            hot_standby=hot_standby,
            pg_port=self.pg_port,
            external_http_port=self.external_http_port,
@@ -4457,9 +4515,10 @@ class Endpoint(PgProtocol, LogUtils):
        running = self._running.acquire(blocking=False)
        if running:
            assert self.endpoint_id is not None
-            self.env.neon_cli.endpoint_stop(
+            lsn, _ = self.env.neon_cli.endpoint_stop(
                self.endpoint_id, check_return_code=self.check_stop_result, mode=mode
            )
+            self.terminate_flush_lsn = lsn

        if sks_wait_walreceiver_gone is not None:
            for sk in sks_wait_walreceiver_gone[0]:
@@ -4477,9 +4536,10 @@ class Endpoint(PgProtocol, LogUtils):
        running = self._running.acquire(blocking=False)
        if running:
            assert self.endpoint_id is not None
-            self.env.neon_cli.endpoint_stop(
+            lsn, _ = self.env.neon_cli.endpoint_stop(
                self.endpoint_id, True, check_return_code=self.check_stop_result, mode=mode
            )
+            self.terminate_flush_lsn = lsn
            self.endpoint_id = None

        return self
@@ -4488,6 +4548,7 @@ class Endpoint(PgProtocol, LogUtils):
        self,
        branch_name: str,
        endpoint_id: str | None = None,
+        grpc: bool | None = None,
        hot_standby: bool = False,
        lsn: Lsn | None = None,
        config_lines: list[str] | None = None,
@@ -4505,6 +4566,7 @@ class Endpoint(PgProtocol, LogUtils):
            branch_name=branch_name,
            endpoint_id=endpoint_id,
            config_lines=config_lines,
+            grpc=grpc,
            hot_standby=hot_standby,
            lsn=lsn,
            pageserver_id=pageserver_id,
@@ -4592,6 +4654,7 @@ class EndpointFactory:
        endpoint_id: str | None = None,
        tenant_id: TenantId | None = None,
        lsn: Lsn | None = None,
+        grpc: bool | None = None,
        hot_standby: bool = False,
        config_lines: list[str] | None = None,
        remote_ext_base_url: str | None = None,
@@ -4611,6 +4674,7 @@ class EndpointFactory:
        return ep.create_start(
            branch_name=branch_name,
            endpoint_id=endpoint_id,
+            grpc=grpc,
            hot_standby=hot_standby,
            config_lines=config_lines,
            lsn=lsn,
@@ -4625,6 +4689,7 @@ class EndpointFactory:
        endpoint_id: str | None = None,
        tenant_id: TenantId | None = None,
        lsn: Lsn | None = None,
+        grpc: bool | None = None,
        hot_standby: bool = False,
        config_lines: list[str] | None = None,
        pageserver_id: int | None = None,
@@ -4647,6 +4712,7 @@ class EndpointFactory:
            branch_name=branch_name,
            endpoint_id=endpoint_id,
            lsn=lsn,
+            grpc=grpc,
            hot_standby=hot_standby,
            config_lines=config_lines,
            pageserver_id=pageserver_id,
@@ -4671,6 +4737,7 @@ class EndpointFactory:
        self,
        origin: Endpoint,
        endpoint_id: str | None = None,
+        grpc: bool | None = None,
        config_lines: list[str] | None = None,
    ) -> Endpoint:
        branch_name = origin.branch_name
@@ -4682,6 +4749,7 @@ class EndpointFactory:
            endpoint_id=endpoint_id,
            tenant_id=origin.tenant_id,
            lsn=None,
+            grpc=grpc,
            hot_standby=True,
            config_lines=config_lines,
        )
@@ -4690,6 +4758,7 @@ class EndpointFactory:
        self,
        origin: Endpoint,
        endpoint_id: str | None = None,
+        grpc: bool | None = None,
        config_lines: list[str] | None = None,
    ) -> Endpoint:
        branch_name = origin.branch_name
@@ -4701,6 +4770,7 @@ class EndpointFactory:
            endpoint_id=endpoint_id,
            tenant_id=origin.tenant_id,
            lsn=None,
+            grpc=grpc,
            hot_standby=True,
            config_lines=config_lines,
        )
@@ -4852,6 +4922,9 @@ class Safekeeper(LogUtils):
        log.info(f"finished pulling timeline from {src_ids} to {self.id}")
        return res

+    def safekeeper_id(self) -> SafekeeperId:
+        return SafekeeperId(self.id, "localhost", self.port.pg_tenant_only)
+
    @property
    def data_dir(self) -> Path:
        return self.env.repo_dir / "safekeepers" / f"sk{self.id}"
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -1219,3 +1219,31 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
        )
        self.verbose_error(res)
        return res.json()
+
+    def force_override_feature_flag(self, flag: str, value: str | None = None):
+        if value is None:
+            res = self.delete(
+                f"http://localhost:{self.port}/v1/feature_flag/{flag}",
+            )
+        else:
+            res = self.put(
+                f"http://localhost:{self.port}/v1/feature_flag/{flag}",
+                params={"value": value},
+            )
+        self.verbose_error(res)
+
+    def evaluate_feature_flag_boolean(self, tenant_id: TenantId, flag: str) -> Any:
+        res = self.get(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
+            params={"as": "boolean"},
+        )
+        self.verbose_error(res)
+        return res.json()
+
+    def evaluate_feature_flag_multivariate(self, tenant_id: TenantId, flag: str) -> Any:
+        res = self.get(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/feature_flag/{flag}",
+            params={"as": "multivariate"},
+        )
+        self.verbose_error(res)
+        return res.json()
--- a/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_action_blocks.sql
@@ -0,0 +1,22 @@
+-- add 100000 rows or approximately 11 MB to the action_blocks table
+-- takes about 1 second
+INSERT INTO workflows.action_blocks (
+    id,
+    uuid,
+    created_at,
+    status,
+    function_signature,
+    reference_id,
+    blocking,
+    run_synchronously
+)
+SELECT
+    id,
+    uuid_generate_v4(),
+    now() - (random() * interval '100 days'), -- Random date within the last 100 days
+    'CONDITIONS_NOT_MET',
+    'function_signature_' || id, -- Create a unique function signature using id
+    CASE WHEN random() > 0.5 THEN 'reference_' || id ELSE NULL END, -- 50% chance of being NULL
+    true,
+    CASE WHEN random() > 0.5 THEN true ELSE false END -- Random boolean value
+FROM generate_series(1, 100000) AS id;
--- a/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_action_kwargs.sql
@@ -0,0 +1,11 @@
+-- add 100000 rows or approximately 10 MB to the action_kwargs table
+-- takes about 5 minutes
+INSERT INTO workflows.action_kwargs (created_at, key, uuid, value_id, state_value_id, action_block_id)
+SELECT 
+    now(),  -- Using the default value for `created_at`
+    'key_' || gs.id,  -- Generating a unique key based on the id
+    uuid_generate_v4(),  -- Generating a new UUID for each row
+    CASE WHEN gs.id % 2 = 0 THEN gs.id ELSE NULL END,  -- Setting value_id for even ids
+    CASE WHEN gs.id % 2 <> 0 THEN gs.id ELSE NULL END,  -- Setting state_value_id for odd ids
+    1  -- Setting action_block_id as 1 for simplicity
+FROM generate_series(1, 100000) AS gs(id);
--- a/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_device_fingerprint_event.sql
@@ -0,0 +1,56 @@
+-- add 100000 rows or approx. 30 MB to the device_fingerprint_event table
+-- takes about 4 minutes
+INSERT INTO authentication.device_fingerprint_event (
+    uuid,
+    created_at,
+    identity_uuid,
+    fingerprint_request_id,
+    fingerprint_id,
+    confidence_score,
+    ip_address,
+    url,
+    client_referrer,
+    last_seen_at,
+    raw_fingerprint_response,
+    session_uuid,
+    fingerprint_response,
+    browser_version,
+    browser_name,
+    device,
+    operating_system,
+    operating_system_version,
+    user_agent,
+    ip_address_location_city,
+    ip_address_location_region,
+    ip_address_location_country_code,
+    ip_address_location_latitude,
+    ip_address_location_longitude,
+    is_incognito
+)
+SELECT
+    gen_random_uuid(),  -- Generates a random UUID for primary key
+    now() - (random() * interval '10 days'),  -- Random timestamp within the last 10 days
+    gen_random_uuid(),  -- Random UUID for identity
+    md5(gs::text),  -- Simulates unique fingerprint request ID using `md5` hash of series number
+    md5((gs + 10000)::text),  -- Simulates unique fingerprint ID
+    round(CAST(random() AS numeric), 2),  -- Generates a random score between 0 and 1, cast `random()` to numeric
+    '192.168.' || (random() * 255)::int || '.' || (random() * 255)::int,  -- Random IP address
+    'https://example.com/' || (gs % 1000),  -- Random URL with series number suffix
+    CASE WHEN random() < 0.5 THEN NULL ELSE 'https://referrer.com/' || (gs % 100)::text END,  -- Random referrer, 50% chance of being NULL
+    now() - (random() * interval '5 days'),  -- Last seen timestamp within the last 5 days
+    NULL,  -- Keeping raw_fingerprint_response NULL for simplicity
+    CASE WHEN random() < 0.3 THEN gen_random_uuid() ELSE NULL END,  -- Session UUID, 30% chance of NULL
+    NULL,  -- Keeping fingerprint_response NULL for simplicity
+    CASE WHEN random() < 0.5 THEN '93.0' ELSE '92.0' END,  -- Random browser version
+    CASE WHEN random() < 0.5 THEN 'Firefox' ELSE 'Chrome' END,  -- Random browser name
+    CASE WHEN random() < 0.5 THEN 'Desktop' ELSE 'Mobile' END,  -- Random device type
+    'Windows',  -- Static value for operating system
+    '10.0',  -- Static value for operating system version
+    'Mozilla/5.0',  -- Static value for user agent
+    'City ' || (gs % 1000)::text,  -- Random city name
+    'Region ' || (gs % 100)::text,  -- Random region name
+    'US',  -- Static country code
+    random() * 180 - 90,  -- Random latitude between -90 and 90
+    random() * 360 - 180,  -- Random longitude between -180 and 180
+    random() < 0.1  -- 10% chance of being incognito
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_edges.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_edges.sql
@@ -0,0 +1,10 @@
+-- add 100000 rows or approximately 11 MB to the edges table
+-- takes about 1 minute
+INSERT INTO workflows.edges (created_at, workflow_id, uuid, from_vertex_id, to_vertex_id)
+SELECT 
+    now() - (random() * interval '365 days'), -- Random `created_at` timestamp in the last year
+    (random() * 100)::int + 1,                -- Random `workflow_id` between 1 and 100
+    uuid_generate_v4(),                       -- Generate a new UUID for each row
+    (random() * 100000)::bigint + 1,           -- Random `from_vertex_id` between 1 and 100,000
+    (random() * 100000)::bigint + 1           -- Random `to_vertex_id` between 1 and 100,000
+FROM generate_series(1, 100000) AS gs;         -- Generate 100,000 sequential IDs
--- a/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_hotel_rate_mapping.sql
@@ -0,0 +1,21 @@
+-- add 100000 rows or approximately 10 MB to the hotel_rate_mapping table
+-- takes about 1 second
+INSERT INTO booking_inventory.hotel_rate_mapping (
+    uuid,
+    created_at,
+    updated_at,
+    hotel_rate_id,
+    remote_id,
+    source
+)
+SELECT
+    uuid_generate_v4(), -- Unique UUID for each row
+    now(), -- Created at timestamp
+    now(), -- Updated at timestamp
+    'rate_' || gs AS hotel_rate_id, -- Unique hotel_rate_id
+    'remote_' || gs AS remote_id, -- Unique remote_id
+    CASE WHEN gs % 3 = 0 THEN 'source_1'
+         WHEN gs % 3 = 1 THEN 'source_2'
+         ELSE 'source_3'
+    END AS source -- Distributing sources among three options
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_ocr_pipeline_results_version.sql
@@ -0,0 +1,31 @@
+-- add 100000 rows or approximately 20 MB to the ocr_pipeline_results_version table
+-- takes about 1 second
+INSERT INTO ocr.ocr_pipeline_results_version (
+    id, transaction_id, operation_type, created_at, updated_at, s3_filename, completed_at, result,
+    end_transaction_id, pipeline_type, is_async, callback, callback_kwargs, input, error, file_type, s3_bucket_name, pipeline_kwargs
+)
+SELECT
+    gs.aid,  -- id
+    gs.aid,  -- transaction_id (same as id for simplicity)
+    (gs.aid % 5)::smallint + 1,  -- operation_type (cyclic values from 1 to 5)
+    now() - interval '1 day' * (random() * 30),  -- created_at (random timestamp within the last 30 days)
+    now() - interval '1 day' * (random() * 30),  -- updated_at (random timestamp within the last 30 days)
+    's3_file_' || gs.aid || '.txt',  -- s3_filename (synthetic filename)
+    now() - interval '1 day' * (random() * 30),  -- completed_at (random timestamp within the last 30 days)
+    '{}'::jsonb,  -- result (empty JSON object)
+    NULL,  -- end_transaction_id (NULL)
+    CASE (gs.aid % 3)  -- pipeline_type (cyclic text values)
+        WHEN 0 THEN 'OCR'
+        WHEN 1 THEN 'PDF'
+        ELSE 'Image'
+    END,
+    gs.aid % 2 = 0,  -- is_async (alternating between true and false)
+    'http://callback/' || gs.aid,  -- callback (synthetic URL)
+    '{}'::jsonb,  -- callback_kwargs (empty JSON object)
+    'Input text ' || gs.aid,  -- input (synthetic input text)
+    NULL,  -- error (NULL)
+    'pdf',  -- file_type (default to 'pdf')
+    'bucket_' || gs.aid % 10,  -- s3_bucket_name (synthetic bucket names)
+    '{}'::jsonb  -- pipeline_kwargs (empty JSON object)
+FROM
+    generate_series(1, 100000) AS gs(aid);
--- a/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_priceline_raw_response.sql
@@ -0,0 +1,18 @@
+-- add 100000 rows or approx. 20 MB to the priceline_raw_response table
+-- takes about 20 seconds
+INSERT INTO booking_inventory.priceline_raw_response (
+    uuid, created_at, updated_at, url, base_url, path, method, params, request, response
+)
+SELECT 
+    gen_random_uuid(),  -- Generate random UUIDs
+    now() - (random() * interval '30 days'),  -- Random creation time within the past 30 days
+    now() - (random() * interval '30 days'),  -- Random update time within the past 30 days
+    'https://example.com/resource/' || gs,  -- Construct a unique URL for each row
+    'https://example.com',  -- Base URL for all rows
+    '/resource/' || gs,  -- Path for each row
+    CASE WHEN gs % 2 = 0 THEN 'GET' ELSE 'POST' END,  -- Alternate between GET and POST methods
+    'id=' || gs,  -- Simple parameter pattern for each row
+    '{}'::jsonb,  -- Empty JSON object for request
+    jsonb_build_object('status', 'success', 'data', gs)  -- Construct a valid JSON response
+FROM 
+    generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_relabled_transactions.sql
@@ -0,0 +1,26 @@
+-- add 100000 rows or approx. 1 MB to the relabeled_transactions table
+-- takes about 1 second
+INSERT INTO heron.relabeled_transactions (
+    id, 
+    created_at, 
+    universal_transaction_id, 
+    raw_result, 
+    category, 
+    category_confidence, 
+    merchant, 
+    batch_id
+)
+SELECT 
+    gs.aid AS id, 
+    now() - (gs.aid % 1000) * interval '1 second' AS created_at, 
+    'txn_' || gs.aid AS universal_transaction_id, 
+    '{}'::jsonb AS raw_result, 
+    CASE WHEN gs.aid % 5 = 0 THEN 'grocery' 
+         WHEN gs.aid % 5 = 1 THEN 'electronics' 
+         WHEN gs.aid % 5 = 2 THEN 'clothing' 
+         WHEN gs.aid % 5 = 3 THEN 'utilities' 
+         ELSE NULL END AS category, 
+    ROUND(RANDOM()::numeric, 2) AS category_confidence, 
+    CASE WHEN gs.aid % 2 = 0 THEN 'Merchant_' || gs.aid % 20 ELSE NULL END AS merchant, 
+    gs.aid % 100 + 1 AS batch_id
+FROM generate_series(1, 100000) AS gs(aid);
--- a/test_runner/performance/large_synthetic_oltp/grow_state_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_state_values.sql
@@ -0,0 +1,9 @@
+-- add 100000 rows or approx.10 MB to the state_values table
+-- takes about 14 seconds
+INSERT INTO workflows.state_values (key, workflow_id, state_type, value_id)
+SELECT 
+    'key_' || gs::text,               -- Key: Generate as 'key_1', 'key_2', etc.
+    (gs - 1) / 1000 + 1,              -- workflow_id: Distribute over a range (1000 workflows)
+    'STATIC',                         -- state_type: Use constant 'STATIC' as defined in schema
+    gs::bigint                        -- value_id: Use the same as the series value
+FROM generate_series(1, 100000) AS gs; -- Generate 100,000 rows
--- a/test_runner/performance/large_synthetic_oltp/grow_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_values.sql
@@ -0,0 +1,30 @@
+-- add 100000 rows or approx. 24 MB to the values table
+-- takes about 126 seconds
+INSERT INTO workflows.values (
+    id,
+    type,
+    int_value,
+    string_value,
+    child_type,
+    bool_value,
+    uuid,
+    numeric_value,
+    workflow_id,
+    jsonb_value,
+    parent_value_id
+)
+SELECT
+    gs AS id,
+    'TYPE_A' AS type,
+    CASE WHEN selector = 1 THEN gs ELSE NULL END AS int_value,
+    CASE WHEN selector = 2 THEN 'string_value_' || gs::text ELSE NULL END AS string_value,
+    'CHILD_TYPE_A' AS child_type,  -- Always non-null
+    CASE WHEN selector = 3 THEN (gs % 2 = 0) ELSE NULL END AS bool_value,
+    uuid_generate_v4() AS uuid,  -- Always non-null
+    CASE WHEN selector = 4 THEN gs * 1.0 ELSE NULL END AS numeric_value,
+    (array[1, 2, 3, 4, 5])[gs % 5 + 1] AS workflow_id,  -- Use only existing workflow IDs
+    CASE WHEN selector = 5 THEN ('{"key":' || gs::text || '}')::jsonb ELSE NULL END AS jsonb_value,
+    (gs % 100) + 1 AS parent_value_id  -- Always non-null
+FROM
+    generate_series(1, 100000) AS gs,
+    (SELECT floor(random() * 5 + 1)::int AS selector) AS s;
--- a/test_runner/performance/large_synthetic_oltp/grow_vertices.sql
+++ b/test_runner/performance/large_synthetic_oltp/grow_vertices.sql
@@ -0,0 +1,26 @@
+-- add 100000 rows or approx. 18 MB to the vertices table
+-- takes about 90 seconds
+INSERT INTO workflows.vertices(
+  uuid,
+  created_at,
+  condition_block_id,
+  operator,
+  has_been_visited,
+  reference_id,
+  workflow_id,
+  meta_data,
+  -- id,
+  action_block_id
+)
+SELECT
+  uuid_generate_v4() AS uuid,
+  now() AS created_at,
+  CASE WHEN (gs % 2 = 0) THEN gs % 10 ELSE NULL END AS condition_block_id, -- Every alternate row has a condition_block_id
+  'operator_' || (gs % 10) AS operator, -- Cyclical operator values (e.g., operator_0, operator_1)
+  false AS has_been_visited,
+  'ref_' || gs AS reference_id, -- Unique reference_id for each row
+  (gs % 1000) + 1 AS workflow_id, -- Random workflow_id values between 1 and 1000
+  '{}'::jsonb AS meta_data, -- Empty JSON metadata
+  -- gs AS id, -- default from sequence to get unique ID
+  CASE WHEN (gs % 2 = 1) THEN gs ELSE NULL END AS action_block_id -- Complementary to condition_block_id
+FROM generate_series(1, 100000) AS gs;
--- a/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_accounting_coding_body_tracking_category_selection.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 200 kb in the accounting_coding_body_tracking_category_selection table
+-- takes about 1 second
+UPDATE  accounting.accounting_coding_body_tracking_category_selection
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  accounting.accounting_coding_body_tracking_category_selection
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_action_blocks.sql
@@ -0,0 +1,9 @@
+-- update approximately 9000 rows or 1 MB in the action_blocks table
+-- takes about 1 second
+UPDATE  workflows.action_blocks 
+SET run_synchronously = NOT run_synchronously
+WHERE ctid in (
+    SELECT ctid
+    FROM  workflows.action_blocks 
+    TABLESAMPLE SYSTEM (0.001) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_action_kwargs.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the action_kwargs table
+-- takes about 1 second
+UPDATE workflows.action_kwargs
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.action_kwargs
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_denormalized_approval_workflow.sql
@@ -0,0 +1,10 @@
+-- update approximately 3000 rows or 500 KB in the denormalized_approval_workflow table
+-- takes about 1 second
+UPDATE  approvals_v2.denormalized_approval_workflow 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  approvals_v2.denormalized_approval_workflow 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
+
--- a/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_device_fingerprint_event.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 1 MB in the device_fingerprint_event table
+-- takes about 5 seconds
+UPDATE authentication.device_fingerprint_event
+SET is_incognito = NOT is_incognito
+WHERE ctid in (
+    SELECT ctid
+    FROM authentication.device_fingerprint_event
+    TABLESAMPLE SYSTEM (0.001) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_edges.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_edges.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 600 kb in the edges table
+-- takes about 1 second
+UPDATE workflows.edges
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.edges
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enriched_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 10000 rows or 200 KB in the heron_transaction_enriched_log table
+-- takes about 1 minutes
+UPDATE heron.heron_transaction_enriched_log
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM heron.heron_transaction_enriched_log
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_heron_transaction_enrichment_requests.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 1 MB in the heron_transaction_enrichment_requests table
+-- takes about 2 minutes
+UPDATE  heron.heron_transaction_enrichment_requests  
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  heron.heron_transaction_enrichment_requests  
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_hotel_rate_mapping.sql
@@ -0,0 +1,9 @@
+-- update approximately 6000 rows or 600 kb in the hotel_rate_mapping table
+-- takes about 1 second
+UPDATE  booking_inventory.hotel_rate_mapping
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  booking_inventory.hotel_rate_mapping
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_incoming_webhooks.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 1 MB in the incoming_webhooks table
+-- takes about 5 seconds
+UPDATE webhook.incoming_webhooks
+SET is_body_encrypted = NOT is_body_encrypted
+WHERE ctid in (
+    SELECT ctid
+    FROM webhook.incoming_webhooks
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_manual_transaction.sql
@@ -0,0 +1,9 @@
+-- update approximately 1000 rows or 200 kb in the manual_transaction table
+-- takes about 2 seconds
+UPDATE banking.manual_transaction
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM  banking.manual_transaction
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_ml_receipt_matching_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 1000 rows or 100 kb in the ml_receipt_matching_log table
+-- takes about 1 second
+UPDATE   receipt.ml_receipt_matching_log 
+SET is_shadow_mode = NOT is_shadow_mode
+WHERE ctid in (
+    SELECT ctid
+    FROM   receipt.ml_receipt_matching_log 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_ocr_pipeine_results_version.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 400 kb in the ocr_pipeline_results_version table
+-- takes about 1 second
+UPDATE   ocr.ocr_pipeline_results_version 
+SET is_async = NOT is_async
+WHERE ctid in (
+    SELECT ctid
+    FROM   ocr.ocr_pipeline_results_version 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results.sql
@@ -0,0 +1,9 @@
+-- update approximately 3000 rows or 1 MB in the ocr_pipeline_step_results table
+-- takes about 11 seconds
+UPDATE     ocr.ocr_pipeline_step_results 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM    ocr.ocr_pipeline_step_results 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_orc_pipeline_step_results_version.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the ocr_pipeline_step_results_version table
+-- takes about 40 seconds
+UPDATE    ocr.ocr_pipeline_step_results_version  
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM    ocr.ocr_pipeline_step_results_version  
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_priceline_raw_response.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the priceline_raw_response table
+-- takes about 1 second
+UPDATE booking_inventory.priceline_raw_response
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM booking_inventory.priceline_raw_response
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_quickbooks_transactions.sql
@@ -0,0 +1,9 @@
+-- update approximately 5000 rows or 1 MB in the quickbooks_transactions table
+-- takes about 30 seconds
+UPDATE   accounting.quickbooks_transactions 
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM   accounting.quickbooks_transactions 
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_raw_finicity_transaction.sql
@@ -0,0 +1,15 @@
+-- update approximately 6000 rows or 600 kb in the raw_finicity_transaction table
+-- takes about 1 second
+UPDATE banking.raw_finicity_transaction
+SET raw_data = 
+    jsonb_set(
+        raw_data,
+        '{updated}',
+        to_jsonb(now()),
+        true
+    )
+WHERE ctid IN (
+    SELECT ctid
+    FROM banking.raw_finicity_transaction
+    TABLESAMPLE SYSTEM (0.0005)
+);
--- a/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_relabeled_transactions.sql
@@ -0,0 +1,9 @@
+-- update approximately 8000 rows or 1 MB in the relabeled_transactions table
+-- takes about 1 second
+UPDATE heron.relabeled_transactions
+SET created_at = now()
+WHERE ctid in (
+    SELECT ctid
+    FROM heron.relabeled_transactions
+    TABLESAMPLE SYSTEM (0.0005) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_state_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_state_values.sql
@@ -0,0 +1,9 @@
+-- update approximately 8000 rows or 1 MB in the state_values table
+-- takes about 2 minutes
+UPDATE workflows.state_values
+SET state_type = now()::text
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.state_values
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_stripe_authorization_event_log.sql
@@ -0,0 +1,9 @@
+-- update approximately 4000 rows or 1 MB in the stripe_authorization_event_log table
+-- takes about 5 minutes
+UPDATE stripe.stripe_authorization_event_log
+SET approved = NOT approved
+WHERE ctid in (
+    SELECT ctid
+    FROM stripe.stripe_authorization_event_log
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_transaction.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_transaction.sql
@@ -0,0 +1,9 @@
+-- update approximately 2000 rows or 301 MB in the transaction table
+-- takes about 90 seconds
+UPDATE transaction.transaction
+SET is_last = NOT is_last
+WHERE ctid in (
+    SELECT ctid
+    FROM transaction.transaction
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/large_synthetic_oltp/update_values.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_values.sql
@@ -0,0 +1,9 @@
+-- update approximately 2500 rows or 1 MB in the values table
+-- takes about 3 minutes
+UPDATE workflows.values
+SET bool_value = NOT bool_value
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.values
+    TABLESAMPLE SYSTEM (0.0002) 
+) AND bool_value IS NOT NULL;
--- a/test_runner/performance/large_synthetic_oltp/update_vertices.sql
+++ b/test_runner/performance/large_synthetic_oltp/update_vertices.sql
@@ -0,0 +1,9 @@
+-- update approximately 10000 rows or 2 MB in the vertices table
+-- takes about 1 minute
+UPDATE workflows.vertices
+SET has_been_visited = NOT has_been_visited
+WHERE ctid in (
+    SELECT ctid
+    FROM workflows.vertices
+    TABLESAMPLE SYSTEM (0.0002) 
+);
--- a/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
+++ b/test_runner/performance/pageserver/pagebench/test_large_slru_basebackup.py
@@ -146,8 +146,6 @@ def run_benchmark(env: NeonEnv, pg_bin: PgBin, record, duration_secs: int):
        ps_http.base_url,
        "--page-service-connstring",
        env.pageserver.connstr(password=None),
-        "--gzip-probability",
-        "1",
        "--runtime",
        f"{duration_secs}s",
        # don't specify the targets explicitly, let pagebench auto-discover them
--- a/test_runner/performance/test_perf_oltp_large_tenant.py
+++ b/test_runner/performance/test_perf_oltp_large_tenant.py
@@ -31,7 +31,9 @@ def get_custom_scripts(
    return rv


-def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
+def run_test_pgbench(
+    env: PgCompare, custom_scripts: str, duration: int, clients: int = 500, jobs: int = 100
+):
    password = env.pg.default_options.get("password", None)
    options = env.pg.default_options.get("options", "")
    # drop password from the connection string by passing password=None and set password separately
@@ -46,8 +48,8 @@ def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int):
        "-n",  # no explicit vacuum before the test - we want to rely on auto-vacuum
        "-M",
        "prepared",
-        "--client=500",
-        "--jobs=100",
+        f"--client={clients}",
+        f"--jobs={jobs}",
        f"-T{duration}",
        "-P60",  # progress every minute
        "--progress-timestamp",
@@ -164,6 +166,12 @@ def test_perf_oltp_large_tenant_pgbench(
    run_test_pgbench(remote_compare, custom_scripts, duration)


+@pytest.mark.parametrize("duration", get_durations_matrix())
+@pytest.mark.remote_cluster
+def test_perf_oltp_large_tenant_growth(remote_compare: PgCompare, duration: int):
+    run_test_pgbench(remote_compare, " ".join(get_custom_scripts()), duration, 35, 35)
+
+
@pytest.mark.remote_cluster
 def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare):
    # run analyze, vacuum, re-index after the test and measure and report its duration
--- a/test_runner/random_ops/test_random_ops.py
+++ b/test_runner/random_ops/test_random_ops.py
@@ -45,6 +45,8 @@ class NeonEndpoint:
        if self.branch.connect_env:
            self.connect_env = self.branch.connect_env.copy()
            self.connect_env["PGHOST"] = self.host
+        if self.type == "read_only":
+            self.project.read_only_endpoints_total += 1

    def delete(self):
        self.project.delete_endpoint(self.id)
@@ -228,8 +230,13 @@ class NeonProject:
        self.benchmarks: dict[str, subprocess.Popen[Any]] = {}
        self.restore_num: int = 0
        self.restart_pgbench_on_console_errors: bool = False
+        self.limits: dict[str, Any] = self.get_limits()["limits"]
+        self.read_only_endpoints_total: int = 0

-    def delete(self):
+    def get_limits(self) -> dict[str, Any]:
+        return self.neon_api.get_project_limits(self.id)
+
+    def delete(self) -> None:
        self.neon_api.delete_project(self.id)

    def create_branch(self, parent_id: str | None = None) -> NeonBranch | None:
@@ -282,6 +289,7 @@ class NeonProject:
        self.neon_api.delete_endpoint(self.id, endpoint_id)
        self.endpoints[endpoint_id].branch.endpoints.pop(endpoint_id)
        self.endpoints.pop(endpoint_id)
+        self.read_only_endpoints_total -= 1
        self.wait()

    def start_benchmark(self, target: str, clients: int = 10) -> subprocess.Popen[Any]:
@@ -369,49 +377,64 @@ def setup_class(
        print(f"::warning::Retried on 524 error {neon_api.retries524} times")
    if neon_api.retries4xx > 0:
        print(f"::warning::Retried on 4xx error {neon_api.retries4xx} times")
-    log.info("Removing the project")
+    log.info("Removing the project %s", project.id)
    project.delete()


-def do_action(project: NeonProject, action: str) -> None:
+def do_action(project: NeonProject, action: str) -> bool:
    """
    Runs the action
    """
    log.info("Action: %s", action)
    if action == "new_branch":
        log.info("Trying to create a new branch")
+        if 0 <= project.limits["max_branches"] <= len(project.branches):
+            log.info(
+                "Maximum branch limit exceeded (%s of %s)",
+                len(project.branches),
+                project.limits["max_branches"],
+            )
+            return False
        parent = project.branches[
            random.choice(list(set(project.branches.keys()) - project.reset_branches))
        ]
        log.info("Parent: %s", parent)
        child = parent.create_child_branch()
        if child is None:
-            return
+            return False
        log.info("Created branch %s", child)
        child.start_benchmark()
    elif action == "delete_branch":
        if project.leaf_branches:
-            target = random.choice(list(project.leaf_branches.values()))
+            target: NeonBranch = random.choice(list(project.leaf_branches.values()))
            log.info("Trying to delete branch %s", target)
            target.delete()
        else:
            log.info("Leaf branches not found, skipping")
+            return False
    elif action == "new_ro_endpoint":
+        if 0 <= project.limits["max_read_only_endpoints"] <= project.read_only_endpoints_total:
+            log.info(
+                "Maximum read only endpoint limit exceeded (%s of %s)",
+                project.read_only_endpoints_total,
+                project.limits["max_read_only_endpoints"],
+            )
+            return False
        ep = random.choice(
            [br for br in project.branches.values() if br.id not in project.reset_branches]
        ).create_ro_endpoint()
        log.info("Created the RO endpoint with id %s branch: %s", ep.id, ep.branch.id)
        ep.start_benchmark()
    elif action == "delete_ro_endpoint":
+        if project.read_only_endpoints_total == 0:
+            log.info("no read_only endpoints present, skipping")
+            return False
        ro_endpoints: list[NeonEndpoint] = [
            endpoint for endpoint in project.endpoints.values() if endpoint.type == "read_only"
        ]
-        if ro_endpoints:
-            target_ep: NeonEndpoint = random.choice(ro_endpoints)
-            target_ep.delete()
-            log.info("endpoint %s deleted", target_ep.id)
-        else:
-            log.info("no read_only endpoints present, skipping")
+        target_ep: NeonEndpoint = random.choice(ro_endpoints)
+        target_ep.delete()
+        log.info("endpoint %s deleted", target_ep.id)
    elif action == "restore_random_time":
        if project.leaf_branches:
            br: NeonBranch = random.choice(list(project.leaf_branches.values()))
@@ -419,8 +442,10 @@ def do_action(project: NeonProject, action: str) -> None:
            br.restore_random_time()
        else:
            log.info("No leaf branches found")
+            return False
    else:
        raise ValueError(f"The action {action} is unknown")
+    return True


@pytest.mark.timeout(7200)
@@ -457,8 +482,9 @@ def test_api_random(
    pg_bin.run(["pgbench", "-i", "-I", "dtGvp", "-s100"], env=project.main_branch.connect_env)
    for _ in range(num_operations):
        log.info("Starting action #%s", _ + 1)
-        do_action(
+        while not do_action(
            project, random.choices([a[0] for a in ACTIONS], weights=[w[1] for w in ACTIONS])[0]
-        )
+        ):
+            log.info("Retrying...")
        project.check_all_benchmarks()
    assert True
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -184,7 +184,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
        "timeline_offloading": False,
        "rel_size_v2_enabled": True,
        "relsize_snapshot_cache_capacity": 10000,
-        "gc_compaction_enabled": True,
+        "gc_compaction_enabled": False,
        "gc_compaction_verification": False,
        "gc_compaction_initial_threshold_kb": 1024000,
        "gc_compaction_ratio_percent": 200,
--- a/test_runner/regress/test_basebackup.py
+++ b/test_runner/regress/test_basebackup.py
@@ -26,6 +26,10 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
    ps = env.pageserver
    ps_http = ps.http_client()

+    storcon_managed_timelines = (env.storage_controller_config or {}).get(
+        "timelines_onto_safekeepers", False
+    )
+
    # 1. Check that we always hit the cache after compute restart.
    for i in range(3):
        ep.start()
@@ -33,15 +37,26 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):

        def check_metrics(i=i):
            metrics = ps_http.get_metrics()
-            # Never miss.
-            # The first time compute_ctl sends `get_basebackup` with lsn=None, we do not cache such requests.
-            # All other requests should be a hit
-            assert (
-                metrics.query_one(
-                    "pageserver_basebackup_cache_read_total", {"result": "miss"}
-                ).value
-                == 0
-            )
+            if storcon_managed_timelines:
+                # We do not cache the initial basebackup yet,
+                # so the first compute startup should be a miss.
+                assert (
+                    metrics.query_one(
+                        "pageserver_basebackup_cache_read_total", {"result": "miss"}
+                    ).value
+                    == 1
+                )
+            else:
+                # If the timeline is not initialized on safekeeprs,
+                # the compute_ctl sends `get_basebackup` with lsn=None for the first startup.
+                # We do not use cache for such requests, so it's niether a hit nor a miss.
+                assert (
+                    metrics.query_one(
+                        "pageserver_basebackup_cache_read_total", {"result": "miss"}
+                    ).value
+                    == 0
+                )
+
            # All but the first requests are hits.
            assert (
                metrics.query_one("pageserver_basebackup_cache_read_total", {"result": "hit"}).value
@@ -54,6 +69,11 @@ def test_basebackup_cache(neon_env_builder: NeonEnvBuilder):
                ).value
                == i + 1
            )
+            # There should be only one basebackup file in the cache.
+            assert metrics.query_one("pageserver_basebackup_cache_entries_total").value == 1
+            # The size of one basebackup for new DB is ~20KB.
+            size_bytes = metrics.query_one("pageserver_basebackup_cache_size_bytes").value
+            assert 10 * 1024 <= size_bytes <= 100 * 1024

        wait_until(check_metrics)

--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -11,6 +11,7 @@ from fixtures.common_types import Lsn, TimelineId
 from fixtures.log_helper import log
 from fixtures.pageserver.http import PageserverApiException
 from fixtures.pageserver.utils import wait_until_tenant_active
+from fixtures.safekeeper.http import MembershipConfiguration, TimelineCreateRequest
 from fixtures.utils import query_scalar
 from performance.test_perf_pgbench import get_scales_matrix
 from requests import RequestException
@@ -164,6 +165,19 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
    env.pageserver.tenant_create(env.initial_tenant)

+    sk = env.safekeepers[0]
+    assert sk
+    sk.http_client().timeline_create(
+        TimelineCreateRequest(
+            env.initial_tenant,
+            env.initial_timeline,
+            MembershipConfiguration(generation=1, members=[sk.safekeeper_id()], new_members=None),
+            int(env.pg_version) * 10000,
+            Lsn(0),
+            None,
+        )
+    )
+
    initial_branch = "initial_branch"

    def start_creating_timeline():
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -18,6 +18,8 @@ from fixtures.neon_fixtures import (
    NeonEnv,
    NeonEnvBuilder,
    PgBin,
+    Safekeeper,
+    StorageControllerApiException,
    flush_ep_to_pageserver,
 )
 from fixtures.pageserver.http import PageserverApiException
@@ -26,6 +28,7 @@ from fixtures.pageserver.utils import (
 )
 from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
+from fixtures.safekeeper.http import MembershipConfiguration
 from fixtures.workload import Workload

 if TYPE_CHECKING:
@@ -125,6 +128,12 @@ check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
    reason="CHECK_ONDISK_DATA_COMPATIBILITY env is not set",
 )

+skip_old_debug_versions = pytest.mark.skipif(
+    os.getenv("BUILD_TYPE", "debug") == "debug"
+    and os.getenv("DEFAULT_PG_VERSION") in [PgVersion.V14, PgVersion.V15, PgVersion.V16],
+    reason="compatibility snaphots not available for old versions of debug builds",
+)
+

@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(before="test_forward_compatibility")
@@ -195,6 +204,7 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_


@check_ondisk_data_compatibility_if_enabled
+@skip_old_debug_versions
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
 def test_backward_compatibility(
@@ -222,6 +232,7 @@ def test_backward_compatibility(


@check_ondisk_data_compatibility_if_enabled
+@skip_old_debug_versions
@pytest.mark.xdist_group("compatibility")
@pytest.mark.order(after="test_create_snapshot")
 def test_forward_compatibility(
@@ -291,7 +302,20 @@ def test_forward_compatibility(
 def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
    ep = env.endpoints.create("main")
    ep_env = {"LD_LIBRARY_PATH": str(env.pg_distrib_dir / f"v{env.pg_version}/lib")}
-    ep.start(env=ep_env)
+
+    # If the compatibility snapshot was created with --timelines-onto-safekeepers=false,
+    # we should not pass safekeeper_generation to the endpoint because the compute
+    # will not be able to start.
+    # Zero generation is INVALID_GENERATION.
+    generation = 0
+    try:
+        res = env.storage_controller.timeline_locate(env.initial_tenant, env.initial_timeline)
+        generation = res["generation"]
+    except StorageControllerApiException as e:
+        if e.status_code != 404 or not re.search(r"Timeline .* not found", str(e)):
+            raise e
+
+    ep.start(env=ep_env, safekeeper_generation=generation)

    connstr = ep.connstr()

@@ -341,7 +365,7 @@ def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, r
    )

    # Timeline exists again: restart the endpoint
-    ep.start(env=ep_env)
+    ep.start(env=ep_env, safekeeper_generation=generation)

    pg_bin.run_capture(
        ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
@@ -542,6 +566,24 @@ def test_historic_storage_formats(
    # All our artifacts should contain at least one timeline
    assert len(timelines) > 0

+    # Import tenant does not create the timeline on safekeepers,
+    # because it is a debug handler and the timeline may have already been
+    # created on some set of safekeepers.
+    # Create the timeline on safekeepers manually.
+    # TODO(diko): when we have the script/storcon handler to migrate
+    # the timeline to storcon, we can replace this code with it.
+    mconf = MembershipConfiguration(
+        generation=1,
+        members=Safekeeper.sks_to_safekeeper_ids([env.safekeepers[0]]),
+        new_members=None,
+    )
+    members_sks = Safekeeper.mconf_sks(env, mconf)
+
+    for timeline in timelines:
+        Safekeeper.create_timeline(
+            dataset.tenant_id, timeline["timeline_id"], env.pageserver, mconf, members_sks
+        )
+
    # TODO: ensure that the snapshots we're importing contain a sensible variety of content, at the very
    # least they should include a mixture of deltas and image layers.  Preferably they should also
    # contain some "exotic" stuff like aux files from logical replication.
@@ -573,6 +615,7 @@ def test_historic_storage_formats(


@check_ondisk_data_compatibility_if_enabled
+@skip_old_debug_versions
@pytest.mark.xdist_group("compatibility")
@pytest.mark.parametrize(
    **fixtures.utils.allpairs_versions(),
--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -418,7 +418,7 @@ def test_sql_exporter_metrics_e2e(
    pg_user = conn_options["user"]
    pg_dbname = conn_options["dbname"]
    pg_application_name = f"sql_exporter{stem_suffix}"
-    connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}"
+    connstr = f"postgresql://{pg_user}@{pg_host}:{pg_port}/{pg_dbname}?sslmode=disable&application_name={pg_application_name}&pgaudit.log=none"

    def escape_go_filepath_match_characters(s: str) -> str:
        """
--- a/test_runner/regress/test_compute_reconfigure.py
+++ b/test_runner/regress/test_compute_reconfigure.py
@@ -9,6 +9,8 @@ from fixtures.utils import wait_until
 if TYPE_CHECKING:
    from fixtures.neon_fixtures import NeonEnv

+from fixtures.log_helper import log
+

 def test_compute_reconfigure(neon_simple_env: NeonEnv):
    """
@@ -85,3 +87,57 @@ def test_compute_reconfigure(neon_simple_env: NeonEnv):
    samples = metrics.query_all("compute_ctl_up", {"build_tag": build_tag})
    assert len(samples) == 1
    assert samples[0].value == 1
+
+
+def test_compute_safekeeper_connstrings_duplicate(neon_simple_env: NeonEnv):
+    """
+    Test that we catch duplicate entries in neon.safekeepers.
+    """
+    env = neon_simple_env
+
+    endpoint = env.endpoints.create_start("main")
+
+    # grab the current value of neon.safekeepers
+    sk_list = []
+    with endpoint.cursor() as cursor:
+        cursor.execute("SHOW neon.safekeepers;")
+        row = cursor.fetchone()
+        assert row is not None
+
+        log.info(f'    initial neon.safekeepers: "{row}"')
+
+        # build a safekeepers list with a duplicate
+        sk_list.append(row[0])
+        sk_list.append(row[0])
+
+    safekeepers = ",".join(sk_list)
+    log.info(f'reconfigure neon.safekeepers: "{safekeepers}"')
+
+    # introduce duplicate entry in neon.safekeepers, on purpose
+    endpoint.respec_deep(
+        **{
+            "spec": {
+                "skip_pg_catalog_updates": True,
+                "cluster": {
+                    "settings": [
+                        {
+                            "name": "neon.safekeepers",
+                            "vartype": "string",
+                            "value": safekeepers,
+                        }
+                    ]
+                },
+            },
+        }
+    )
+
+    try:
+        endpoint.reconfigure()
+
+        # Check that in logs we see that it was actually reconfigured,
+        # not restarted or something else.
+        endpoint.log_contains("INFO request{method=POST uri=/configure")
+
+    except Exception as e:
+        # we except a failure here
+        log.info(f"RAISED: {e}" % e)
--- a/test_runner/regress/test_feature_flag.py
+++ b/test_runner/regress/test_feature_flag.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from fixtures.utils import run_only_on_default_postgres
+
+if TYPE_CHECKING:
+    from fixtures.neon_fixtures import NeonEnvBuilder
+
+
+@run_only_on_default_postgres("Pageserver-only test only needs to run on one version")
+def test_feature_flag(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+    env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "true")
+    assert env.pageserver.http_client().evaluate_feature_flag_boolean(
+        env.initial_tenant, "test-feature-flag"
+    )["result"]["Ok"]
+    assert (
+        env.pageserver.http_client().evaluate_feature_flag_multivariate(
+            env.initial_tenant, "test-feature-flag"
+        )["result"]["Ok"]
+        == "true"
+    )
+
+    env.pageserver.http_client().force_override_feature_flag("test-feature-flag", "false")
+    assert (
+        env.pageserver.http_client().evaluate_feature_flag_boolean(
+            env.initial_tenant, "test-feature-flag"
+        )["result"]["Err"]
+        == "No condition group is matched"
+    )
+    assert (
+        env.pageserver.http_client().evaluate_feature_flag_multivariate(
+            env.initial_tenant, "test-feature-flag"
+        )["result"]["Ok"]
+        == "false"
+    )
+
+    env.pageserver.http_client().force_override_feature_flag("test-feature-flag", None)
+    assert (
+        "Err"
+        in env.pageserver.http_client().evaluate_feature_flag_boolean(
+            env.initial_tenant, "test-feature-flag"
+        )["result"]
+    )
+    assert (
+        "Err"
+        in env.pageserver.http_client().evaluate_feature_flag_multivariate(
+            env.initial_tenant, "test-feature-flag"
+        )["result"]
+    )
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -87,6 +87,9 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build

    # Set up pageserver for import
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": True,
+    }
    env = neon_env_builder.init_start()

    env.pageserver.tenant_create(tenant)
--- a/test_runner/regress/test_lfc_prewarm.py
+++ b/test_runner/regress/test_lfc_prewarm.py
@@ -59,7 +59,7 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):

    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()
-    pg_cur.execute("create extension neon version '1.6'")
+    pg_cur.execute("create extension neon")
    pg_cur.execute("create database lfc")

    lfc_conn = endpoint.connect(dbname="lfc")
@@ -84,11 +84,8 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
    endpoint.stop()
    endpoint.start()

-    # wait until compute_ctl completes downgrade of extension to default version
-    time.sleep(1)
    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()
-    pg_cur.execute("alter extension neon update to '1.6'")

    lfc_conn = endpoint.connect(dbname="lfc")
    lfc_cur = lfc_conn.cursor()
@@ -144,7 +141,7 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet

    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()
-    pg_cur.execute("create extension neon version '1.6'")
+    pg_cur.execute("create extension neon")
    pg_cur.execute("CREATE DATABASE lfc")

    lfc_conn = endpoint.connect(dbname="lfc")
@@ -188,7 +185,8 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMet
            pg_cur.execute("select pg_reload_conf()")

            if query is LfcQueryMethod.COMPUTE_CTL:
-                http_client.prewarm_lfc()
+                # Same thing as prewarm_lfc(), testing other method
+                http_client.prewarm_lfc(endpoint.endpoint_id)
            else:
                pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))

--- a/test_runner/regress/test_neon_extension.py
+++ b/test_runner/regress/test_neon_extension.py
@@ -29,7 +29,7 @@ def test_neon_extension(neon_env_builder: NeonEnvBuilder):
            # IMPORTANT:
            # If the version has changed, the test should be updated.
            # Ensure that the default version is also updated in the neon.control file
-            assert cur.fetchone() == ("1.5",)
+            assert cur.fetchone() == ("1.6",)
            cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
            res = cur.fetchall()
            log.info(res)
@@ -53,10 +53,10 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
            # IMPORTANT:
            # If the version has changed, the test should be updated.
            # Ensure that the default version is also updated in the neon.control file
-            assert cur.fetchone() == ("1.5",)
+            assert cur.fetchone() == ("1.6",)
            cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
-            all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
-            current_version = "1.5"
+            all_versions = ["1.6", "1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
+            current_version = "1.6"
            for idx, begin_version in enumerate(all_versions):
                for target_version in all_versions[idx + 1 :]:
                    if current_version != begin_version:
--- a/test_runner/regress/test_normal_work.py
+++ b/test_runner/regress/test_normal_work.py
@@ -64,6 +64,11 @@ def test_normal_work(
    """

    neon_env_builder.num_safekeepers = num_safekeepers
+
+    if safekeeper_proto_version == 2:
+        neon_env_builder.storage_controller_config = {
+            "timelines_onto_safekeepers": False,
+        }
    env = neon_env_builder.init_start()
    pageserver_http = env.pageserver.http_client()

--- a/test_runner/regress/test_pageserver_restarts_under_workload.py
+++ b/test_runner/regress/test_pageserver_restarts_under_workload.py
@@ -16,7 +16,7 @@ if TYPE_CHECKING:

 # Test restarting page server, while safekeeper and compute node keep
 # running.
-def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgBin):
+def test_pageserver_restarts_under_workload(neon_simple_env: NeonEnv, pg_bin: PgBin):
    env = neon_simple_env
    env.create_branch("test_pageserver_restarts")
    endpoint = env.endpoints.create_start("test_pageserver_restarts")
@@ -28,7 +28,11 @@ def test_pageserver_restarts_under_worload(neon_simple_env: NeonEnv, pg_bin: PgB
        pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", f"-s{scale}", connstr])
        pg_bin.run_capture(["pgbench", f"-T{n_restarts}", connstr])

-    thread = threading.Thread(target=run_pgbench, args=(endpoint.connstr(),), daemon=True)
+    thread = threading.Thread(
+        target=run_pgbench,
+        args=(endpoint.connstr(options="-cstatement_timeout=360s"),),
+        daemon=True,
+    )
    thread.start()

    for _ in range(n_restarts):
--- a/test_runner/regress/test_pg_regress.py
+++ b/test_runner/regress/test_pg_regress.py
@@ -173,7 +173,11 @@ def test_pg_regress(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
-    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/regress"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
@@ -250,7 +254,11 @@ def test_isolation(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_isolation_regress will need.
-    build_path = pg_distrib_dir / f"build/{env.pg_version.v_prefixed}/src/test/isolation"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/isolation"
    src_path = base_dir / f"vendor/postgres-{env.pg_version.v_prefixed}/src/test/isolation"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "isolation_schedule"
@@ -306,13 +314,7 @@ def test_sql_regress(
    )

    # Connect to postgres and create a database called "regression".
-    endpoint = env.endpoints.create_start(
-        "main",
-        config_lines=[
-            # Enable the test mode, so that we don't need to patch the test cases.
-            "neon.regress_test_mode = true",
-        ],
-    )
+    endpoint = env.endpoints.create_start("main")
    endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")

    # Create some local directories for pg_regress to run in.
@@ -320,8 +322,11 @@ def test_sql_regress(
    (runpath / "testtablespace").mkdir(parents=True)

    # Compute all the file locations that pg_regress will need.
-    # This test runs neon specific tests
-    build_path = pg_distrib_dir / f"build/v{env.pg_version}/src/test/regress"
+    #
+    # XXX: We assume that the `build` directory is a sibling of the
+    # pg_distrib_dir.  That is the default when you check out the
+    # repository; `build` and `pg_install` are created side by side.
+    build_path = pg_distrib_dir / f"../build/{env.pg_version.v_prefixed}/src/test/regress"
    src_path = base_dir / "test_runner/sql_regress"
    bindir = pg_distrib_dir / f"v{env.pg_version}/bin"
    schedule = src_path / "parallel_schedule"
--- a/test_runner/regress/test_proxy_allowed_ips.py
+++ b/test_runner/regress/test_proxy_allowed_ips.py
@@ -19,11 +19,15 @@ TABLE_NAME = "neon_control_plane.endpoints"
 async def test_proxy_psql_allowed_ips(static_proxy: NeonProxy, vanilla_pg: VanillaPostgres):
    # Shouldn't be able to connect to this project
    vanilla_pg.safe_psql(
-        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('private-project', '8.8.8.8')"
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('private-project', '8.8.8.8')",
+        user="proxy",
+        password="password",
    )
    # Should be able to connect to this project
    vanilla_pg.safe_psql(
-        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('generic-project', '::1,127.0.0.1')"
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('generic-project', '::1,127.0.0.1')",
+        user="proxy",
+        password="password",
    )

    def check_cannot_connect(**kwargs):
@@ -60,7 +64,9 @@ async def test_proxy_http_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil

    # Shouldn't be able to connect to this project
    vanilla_pg.safe_psql(
-        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('proxy', '8.8.8.8')"
+        f"INSERT INTO {TABLE_NAME} (endpoint_id, allowed_ips) VALUES ('proxy', '8.8.8.8')",
+        user="proxy",
+        password="password",
    )

    def query(status: int, query: str, *args):
@@ -75,6 +81,8 @@ async def test_proxy_http_allowed_ips(static_proxy: NeonProxy, vanilla_pg: Vanil
    query(400, "select 1;")  # ip address is not allowed
    # Should be able to connect to this project
    vanilla_pg.safe_psql(
-        f"UPDATE {TABLE_NAME} SET allowed_ips = '8.8.8.8,127.0.0.1' WHERE endpoint_id = 'proxy'"
+        f"UPDATE {TABLE_NAME} SET allowed_ips = '8.8.8.8,127.0.0.1' WHERE endpoint_id = 'proxy'",
+        user="proxy",
+        password="password",
    )
    query(200, "select 1;")  # should work now
--- a/test_runner/regress/test_replica_promotes.py
+++ b/test_runner/regress/test_replica_promotes.py
@@ -4,13 +4,25 @@ File with secondary->primary promotion testing.
 This far, only contains a test that we don't break and that the data is persisted.
 """

+from typing import cast
+
 import psycopg2
+from fixtures.common_types import Lsn
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import Endpoint, NeonEnv, wait_replica_caughtup
 from fixtures.pg_version import PgVersion
 from pytest import raises


+def stop_and_check_lsn(ep: Endpoint, expected_lsn: Lsn | None):
+    ep.stop(mode="immediate-terminate")
+    lsn = ep.terminate_flush_lsn
+    if expected_lsn is not None:
+        assert lsn >= expected_lsn, f"{expected_lsn=} < {lsn=}"
+    else:
+        assert lsn == expected_lsn, f"{expected_lsn=} != {lsn=}"
+
+
 def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
    """
    Test that a replica safely promotes, and can commit data updates which
@@ -37,7 +49,9 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
                   pg_current_wal_flush_lsn()
            """
        )
-        log.info(f"Primary: Current LSN after workload is {primary_cur.fetchone()}")
+        lsn_triple = cast("tuple[str, str, str]", primary_cur.fetchone())
+        log.info(f"Primary: Current LSN after workload is {lsn_triple}")
+        expected_primary_lsn: Lsn = Lsn(lsn_triple[2])
        primary_cur.execute("show neon.safekeepers")
        safekeepers = primary_cur.fetchall()[0][0]

@@ -57,7 +71,7 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
        secondary_cur.execute("select count(*) from t")
        assert secondary_cur.fetchone() == (100,)

-    primary.stop_and_destroy(mode="immediate")
+    stop_and_check_lsn(primary, expected_primary_lsn)

    # Reconnect to the secondary to make sure we get a read-write connection
    promo_conn = secondary.connect()
@@ -109,9 +123,10 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):

    # wait_for_last_flush_lsn(env, secondary, env.initial_tenant, env.initial_timeline)

-    secondary.stop_and_destroy()
+    # secondaries don't sync safekeepers on finish so LSN will be None
+    stop_and_check_lsn(secondary, None)

-    primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary")
+    primary = env.endpoints.create_start(branch_name="main", endpoint_id="primary2")

    with primary.connect() as new_primary:
        new_primary_cur = new_primary.cursor()
@@ -122,7 +137,9 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
                   pg_current_wal_flush_lsn()
            """
        )
-        log.info(f"New primary: Boot LSN is {new_primary_cur.fetchone()}")
+        lsn_triple = cast("tuple[str, str, str]", new_primary_cur.fetchone())
+        expected_primary_lsn = Lsn(lsn_triple[2])
+        log.info(f"New primary: Boot LSN is {lsn_triple}")

        new_primary_cur.execute("select count(*) from t")
        assert new_primary_cur.fetchone() == (200,)
@@ -130,4 +147,4 @@ def test_replica_promotes(neon_simple_env: NeonEnv, pg_version: PgVersion):
        new_primary_cur.execute("select count(*) from t")
        assert new_primary_cur.fetchone() == (300,)

-    primary.stop(mode="immediate")
+    stop_and_check_lsn(primary, expected_primary_lsn)
--- a/test_runner/regress/test_s3_restore.py
+++ b/test_runner/regress/test_s3_restore.py
@@ -74,7 +74,7 @@ def test_tenant_s3_restore(
            last_flush_lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0])
            last_flush_lsns.append(last_flush_lsn)
        ps_http.timeline_checkpoint(tenant_id, timeline_id)
-        wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn)
+        wait_for_upload(ps_http, tenant_id, timeline_id, last_flush_lsn, timeout=60)
        log.info(f"{timeline} timeline {timeline_id} {last_flush_lsn=}")
        parent = timeline

--- a/test_runner/regress/test_safekeeper_deletion.py
+++ b/test_runner/regress/test_safekeeper_deletion.py
@@ -30,6 +30,7 @@ def test_safekeeper_delete_timeline(neon_env_builder: NeonEnvBuilder, auth_enabl
    env.pageserver.allowed_errors.extend(
        [
            ".*Timeline .* was not found in global map.*",
+            ".*Timeline .* has been deleted.*",
            ".*Timeline .* was cancelled and cannot be used anymore.*",
        ]
    )
@@ -198,6 +199,7 @@ def test_safekeeper_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder)
        env.pageserver.allowed_errors.extend(
            [
                ".*Timeline.*was cancelled.*",
+                ".*Timeline.*has been deleted.*",
                ".*Timeline.*was not found.*",
            ]
        )
--- a/test_runner/regress/test_sharding.py
+++ b/test_runner/regress/test_sharding.py
@@ -1337,7 +1337,7 @@ def test_sharding_split_failures(
    # Create bystander tenants with various shard counts. They should not be affected by the aborted
    # splits. Regression test for https://github.com/neondatabase/cloud/issues/28589.
    bystanders = {}  # id → shard_count
-    for bystander_shard_count in [1, 2, 4, 8]:
+    for bystander_shard_count in [1, 2, 4]:
        id, _ = env.create_tenant(shard_count=bystander_shard_count)
        bystanders[id] = bystander_shard_count

@@ -1358,6 +1358,8 @@ def test_sharding_split_failures(
            ".*Reconcile error.*Cancelled.*",
            # While parent shard's client is stopped during split, flush loop updating LSNs will emit this warning
            ".*Failed to schedule metadata upload after updating disk_consistent_lsn.*",
+            # We didn't identify a secondary to remove.
+            ".*Keeping extra secondaries.*",
        ]
    )

@@ -1388,51 +1390,36 @@ def test_sharding_split_failures(
    with pytest.raises(failure.expect_exception()):
        env.storage_controller.tenant_shard_split(tenant_id, shard_count=4)

+    def assert_shard_count(shard_count: int, exclude_ps_id: int | None = None) -> None:
+        secondary_count = 0
+        attached_count = 0
+        log.info(f"Iterating over {len(env.pageservers)} pageservers to check shard count")
+        for ps in env.pageservers:
+            if exclude_ps_id is not None and ps.id == exclude_ps_id:
+                continue
+
+            locations = ps.http_client().tenant_list_locations()["tenant_shards"]
+            for loc in locations:
+                tenant_shard_id = TenantShardId.parse(loc[0])
+                if tenant_shard_id.tenant_id != tenant_id:
+                    continue  # skip bystanders
+                log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
+                assert tenant_shard_id.shard_count == shard_count
+                if loc[1]["mode"] == "Secondary":
+                    secondary_count += 1
+                else:
+                    attached_count += 1
+        assert secondary_count == shard_count
+        assert attached_count == shard_count
+
    # We expect that the overall operation will fail, but some split requests
    # will have succeeded: the net result should be to return to a clean state, including
    # detaching any child shards.
    def assert_rolled_back(exclude_ps_id=None) -> None:
-        secondary_count = 0
-        attached_count = 0
-        for ps in env.pageservers:
-            if exclude_ps_id is not None and ps.id == exclude_ps_id:
-                continue
-
-            locations = ps.http_client().tenant_list_locations()["tenant_shards"]
-            for loc in locations:
-                tenant_shard_id = TenantShardId.parse(loc[0])
-                if tenant_shard_id.tenant_id != tenant_id:
-                    continue  # skip bystanders
-                log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
-                assert tenant_shard_id.shard_count == initial_shard_count
-                if loc[1]["mode"] == "Secondary":
-                    secondary_count += 1
-                else:
-                    attached_count += 1
-
-        assert secondary_count == initial_shard_count
-        assert attached_count == initial_shard_count
+        assert_shard_count(initial_shard_count, exclude_ps_id)

    def assert_split_done(exclude_ps_id: int | None = None) -> None:
-        secondary_count = 0
-        attached_count = 0
-        for ps in env.pageservers:
-            if exclude_ps_id is not None and ps.id == exclude_ps_id:
-                continue
-
-            locations = ps.http_client().tenant_list_locations()["tenant_shards"]
-            for loc in locations:
-                tenant_shard_id = TenantShardId.parse(loc[0])
-                if tenant_shard_id.tenant_id != tenant_id:
-                    continue  # skip bystanders
-                log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
-                assert tenant_shard_id.shard_count == split_shard_count
-                if loc[1]["mode"] == "Secondary":
-                    secondary_count += 1
-                else:
-                    attached_count += 1
-        assert attached_count == split_shard_count
-        assert secondary_count == split_shard_count
+        assert_shard_count(split_shard_count, exclude_ps_id)

    def finish_split():
        # Having failed+rolled back, we should be able to split again
@@ -1468,6 +1455,7 @@ def test_sharding_split_failures(

        # The split should appear to be rolled back from the point of view of all pageservers
        # apart from the one that is offline
+        env.storage_controller.reconcile_until_idle(timeout_secs=60, max_interval=2)
        wait_until(lambda: assert_rolled_back(exclude_ps_id=failure.pageserver_id))

        finish_split()
@@ -1482,6 +1470,7 @@ def test_sharding_split_failures(
        log.info("Clearing failure...")
        failure.clear(env)

+        env.storage_controller.reconcile_until_idle(timeout_secs=60, max_interval=2)
        wait_until(assert_rolled_back)

        # Having rolled back, the tenant should be working
@@ -1836,3 +1825,90 @@ def test_sharding_gc(
        shard_gc_cutoff_lsn = Lsn(shard_index["metadata_bytes"]["latest_gc_cutoff_lsn"])
        log.info(f"Shard {shard_number} cutoff LSN: {shard_gc_cutoff_lsn}")
        assert shard_gc_cutoff_lsn == shard_0_gc_cutoff_lsn
+
+
+def test_split_ps_delete_old_shard_after_commit(neon_env_builder: NeonEnvBuilder):
+    """
+    Check that PageServer only deletes old shards after the split is committed such that it doesn't
+    have to download a lot of files during abort.
+    """
+    DBNAME = "regression"
+
+    init_shard_count = 4
+    neon_env_builder.num_pageservers = init_shard_count
+    stripe_size = 32
+
+    env = neon_env_builder.init_start(
+        initial_tenant_shard_count=init_shard_count, initial_tenant_shard_stripe_size=stripe_size
+    )
+
+    env.storage_controller.allowed_errors.extend(
+        [
+            # All split failures log a warning when they enqueue the abort operation
+            ".*Enqueuing background abort.*",
+            # Tolerate any error logs that mention a failpoint
+            ".*failpoint.*",
+        ]
+    )
+
+    endpoint = env.endpoints.create("main")
+    endpoint.respec(skip_pg_catalog_updates=False)
+    endpoint.start()
+
+    # Write some initial data.
+    endpoint.safe_psql(f"CREATE DATABASE {DBNAME}")
+    endpoint.safe_psql("CREATE TABLE usertable ( YCSB_KEY INT, FIELD0 TEXT);")
+
+    for _ in range(1000):
+        endpoint.safe_psql(
+            "INSERT INTO usertable SELECT random(), repeat('a', 1000);", log_query=False
+        )
+
+    # Record how many bytes we've downloaded before the split.
+    def collect_downloaded_bytes() -> list[float | None]:
+        downloaded_bytes = []
+        for page_server in env.pageservers:
+            metric = page_server.http_client().get_metric_value(
+                "pageserver_remote_ondemand_downloaded_bytes_total"
+            )
+            downloaded_bytes.append(metric)
+        return downloaded_bytes
+
+    downloaded_bytes_before = collect_downloaded_bytes()
+
+    # Attempt to split the tenant, but fail the split before it completes.
+    env.storage_controller.configure_failpoints(("shard-split-pre-complete", "return(1)"))
+    with pytest.raises(StorageControllerApiException):
+        env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=16)
+
+    # Wait until split is aborted.
+    def check_split_is_aborted():
+        tenants = env.storage_controller.tenant_list()
+        assert len(tenants) == 1
+        shards = tenants[0]["shards"]
+        assert len(shards) == 4
+        for shard in shards:
+            assert not shard["is_splitting"]
+            assert not shard["is_reconciling"]
+
+        # Make sure all new shards have been deleted.
+        valid_shards = 0
+        for ps in env.pageservers:
+            for tenant_dir in os.listdir(ps.workdir / "tenants"):
+                try:
+                    tenant_shard_id = TenantShardId.parse(tenant_dir)
+                    valid_shards += 1
+                    assert tenant_shard_id.shard_count == 4
+                except ValueError:
+                    log.info(f"{tenant_dir} is not valid tenant shard id")
+        assert valid_shards >= 4
+
+    wait_until(check_split_is_aborted)
+
+    endpoint.safe_psql("SELECT count(*) from usertable;", log_query=False)
+
+    # Make sure we didn't download anything following the aborted split.
+    downloaded_bytes_after = collect_downloaded_bytes()
+
+    assert downloaded_bytes_before == downloaded_bytes_after
+    endpoint.stop_and_destroy()
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -88,6 +88,12 @@ def test_storage_controller_smoke(
    neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api
    env = neon_env_builder.init_configs()

+    # These bubble up from safekeepers
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )
+
    # Start services by hand so that we can skip a pageserver (this will start + register later)
    env.broker.start()
    env.storage_controller.start()
@@ -2956,7 +2962,7 @@ def test_storage_controller_leadership_transfer_during_split(
        env.storage_controller.allowed_errors.extend(
            [".*Unexpected child shard count.*", ".*Enqueuing background abort.*"]
        )
-        pause_failpoint = "shard-split-pre-complete"
+        pause_failpoint = "shard-split-pre-complete-pause"
        env.storage_controller.configure_failpoints((pause_failpoint, "pause"))

        split_fut = executor.submit(
@@ -3003,7 +3009,7 @@ def test_storage_controller_leadership_transfer_during_split(
        env.storage_controller.request(
            "PUT",
            f"http://127.0.0.1:{storage_controller_1_port}/debug/v1/failpoints",
-            json=[{"name": "shard-split-pre-complete", "actions": "off"}],
+            json=[{"name": pause_failpoint, "actions": "off"}],
            headers=env.storage_controller.headers(TokenScope.ADMIN),
        )

@@ -3093,6 +3099,58 @@ def test_storage_controller_ps_restarted_during_drain(neon_env_builder: NeonEnvB
    wait_until(reconfigure_node_again)


+def test_ps_unavailable_after_delete(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_pageservers = 3
+
+    env = neon_env_builder.init_start()
+
+    def assert_nodes_count(n: int):
+        nodes = env.storage_controller.node_list()
+        assert len(nodes) == n
+
+    # Nodes count must remain the same before deletion
+    assert_nodes_count(3)
+
+    ps = env.pageservers[0]
+    env.storage_controller.node_delete(ps.id)
+
+    # After deletion, the node count must be reduced
+    assert_nodes_count(2)
+
+    # Running pageserver CLI init in a separate thread
+    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
+        log.info("Restarting tombstoned pageserver...")
+        ps.stop()
+        ps_start_fut = executor.submit(lambda: ps.start(await_active=False))
+
+        # After deleted pageserver restart, the node count must remain the same
+        assert_nodes_count(2)
+
+        tombstones = env.storage_controller.tombstone_list()
+        assert len(tombstones) == 1 and tombstones[0]["id"] == ps.id
+
+        env.storage_controller.tombstone_delete(ps.id)
+
+        tombstones = env.storage_controller.tombstone_list()
+        assert len(tombstones) == 0
+
+        # Wait for the pageserver start operation to complete.
+        # If it fails with an exception, we try restarting the pageserver since the failure
+        # may be due to the storage controller refusing to register the node.
+        # However, if we get a TimeoutError that means the pageserver is completely hung,
+        # which is an unexpected failure mode that we'll let propagate up.
+        try:
+            ps_start_fut.result(timeout=20)
+        except TimeoutError:
+            raise
+        except Exception:
+            log.info("Restarting deleted pageserver...")
+            ps.restart()
+
+        # Finally, the node can be registered again after tombstone is deleted
+        wait_until(lambda: assert_nodes_count(3))
+
+
 def test_storage_controller_timeline_crud_race(neon_env_builder: NeonEnvBuilder):
    """
    The storage controller is meant to handle the case where a timeline CRUD operation races
@@ -3403,7 +3461,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):

    assert target.get_safekeeper(fake_id) is None

-    assert len(target.get_safekeepers()) == 0
+    start_sks = target.get_safekeepers()

    sk_0 = env.safekeepers[0]

@@ -3425,7 +3483,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):

    inserted = target.get_safekeeper(fake_id)
    assert inserted is not None
-    assert target.get_safekeepers() == [inserted]
+    assert target.get_safekeepers() == start_sks + [inserted]
    assert eq_safekeeper_records(body, inserted)

    # error out if pk is changed (unexpected)
@@ -3437,7 +3495,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
    assert exc.value.status_code == 400

    inserted_again = target.get_safekeeper(fake_id)
-    assert target.get_safekeepers() == [inserted_again]
+    assert target.get_safekeepers() == start_sks + [inserted_again]
    assert inserted_again is not None
    assert eq_safekeeper_records(inserted, inserted_again)

@@ -3446,7 +3504,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
    body["version"] += 1
    target.on_safekeeper_deploy(fake_id, body)
    inserted_now = target.get_safekeeper(fake_id)
-    assert target.get_safekeepers() == [inserted_now]
+    assert target.get_safekeepers() == start_sks + [inserted_now]
    assert inserted_now is not None

    assert eq_safekeeper_records(body, inserted_now)
@@ -3455,7 +3513,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
    body["https_port"] = 123
    target.on_safekeeper_deploy(fake_id, body)
    inserted_now = target.get_safekeeper(fake_id)
-    assert target.get_safekeepers() == [inserted_now]
+    assert target.get_safekeepers() == start_sks + [inserted_now]
    assert inserted_now is not None
    assert eq_safekeeper_records(body, inserted_now)
    env.storage_controller.consistency_check()
@@ -3464,7 +3522,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder):
    body["https_port"] = None
    target.on_safekeeper_deploy(fake_id, body)
    inserted_now = target.get_safekeeper(fake_id)
-    assert target.get_safekeepers() == [inserted_now]
+    assert target.get_safekeepers() == start_sks + [inserted_now]
    assert inserted_now is not None
    assert eq_safekeeper_records(body, inserted_now)
    env.storage_controller.consistency_check()
@@ -3583,6 +3641,11 @@ def test_timeline_delete_mid_live_migration(neon_env_builder: NeonEnvBuilder, mi
    env = neon_env_builder.init_configs()
    env.start()

+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )
+
    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
    env.storage_controller.tenant_create(tenant_id, placement_policy={"Attached": 1})
@@ -4373,6 +4436,53 @@ def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder,
        assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == []


+def test_attached_0_graceful_migration(neon_env_builder: NeonEnvBuilder):
+    neon_env_builder.num_pageservers = 4
+    neon_env_builder.num_azs = 2
+
+    neon_env_builder.storcon_kick_secondary_downloads = False
+
+    env = neon_env_builder.init_start()
+
+    # It is default, but we want to ensure that there are no secondary locations requested
+    env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 0}})
+    env.storage_controller.reconcile_until_idle()
+
+    desc = env.storage_controller.tenant_describe(env.initial_tenant)["shards"][0]
+    src_ps_id = desc["node_attached"]
+    src_ps = env.get_pageserver(src_ps_id)
+    src_az = desc["preferred_az_id"]
+
+    # There must be no secondary locations with Attached(0) placement policy
+    assert len(desc["node_secondary"]) == 0
+
+    # Migrate tenant shard to the same AZ node
+    dst_ps = [ps for ps in env.pageservers if ps.id != src_ps_id and ps.az_id == src_az][0]
+
+    env.storage_controller.tenant_shard_migrate(
+        TenantShardId(env.initial_tenant, 0, 0),
+        dst_ps.id,
+        config=StorageControllerMigrationConfig(prewarm=True),
+    )
+
+    def tenant_shard_migrated():
+        src_locations = src_ps.http_client().tenant_list_locations()["tenant_shards"]
+        assert len(src_locations) == 0
+        log.info(f"Tenant shard migrated from {src_ps.id}")
+        dst_locations = dst_ps.http_client().tenant_list_locations()["tenant_shards"]
+        assert len(dst_locations) == 1
+        assert dst_locations[0][1]["mode"] == "AttachedSingle"
+        log.info(f"Tenant shard migrated to {dst_ps.id}")
+
+    # After all we expect that tenant shard exists only on dst node.
+    # We wait so long because [`DEFAULT_HEATMAP_PERIOD`] and [`DEFAULT_DOWNLOAD_INTERVAL`]
+    # are set to 60 seconds by default.
+    #
+    # TODO: we should consider making these configurable, so the test can run faster.
+    wait_until(tenant_shard_migrated, timeout=180, interval=5, status_interval=10)
+    log.info("Tenant shard migrated successfully")
+
+
@run_only_on_default_postgres("this is like a 'unit test' against storcon db")
 def test_storage_controller_migrate_with_pageserver_restart(
    neon_env_builder: NeonEnvBuilder, make_httpserver
--- a/test_runner/regress/test_storage_scrubber.py
+++ b/test_runner/regress/test_storage_scrubber.py
@@ -341,6 +341,11 @@ def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder
    env = neon_env_builder.init_configs()
    env.start()

+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )
+
    tenant_id = TenantId.generate()
    timeline_id = TimelineId.generate()
    env.create_tenant(
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -430,6 +430,7 @@ def test_tenant_delete_stale_shards(neon_env_builder: NeonEnvBuilder, pg_bin: Pg
    workload.init()
    workload.write_rows(256)
    workload.validate()
+    workload.stop()

    assert_prefix_not_empty(
        neon_env_builder.pageserver_remote_storage,
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -12,7 +12,7 @@ from typing import TYPE_CHECKING

 import pytest
 import requests
-from fixtures.common_types import Lsn, TenantId, TimelineId
+from fixtures.common_types import Lsn, TenantId, TimelineArchivalState, TimelineId
 from fixtures.log_helper import log
 from fixtures.metrics import (
    PAGESERVER_GLOBAL_METRICS,
@@ -299,6 +299,65 @@ def test_pageserver_metrics_removed_after_detach(neon_env_builder: NeonEnvBuilde
        assert post_detach_samples == set()


+def test_pageserver_metrics_removed_after_offload(neon_env_builder: NeonEnvBuilder):
+    """Tests that when a timeline is offloaded, the tenant specific metrics are not left behind"""
+
+    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.MOCK_S3)
+
+    neon_env_builder.num_safekeepers = 3
+
+    env = neon_env_builder.init_start()
+    tenant_1, _ = env.create_tenant()
+
+    timeline_1 = env.create_timeline("test_metrics_removed_after_offload_1", tenant_id=tenant_1)
+    timeline_2 = env.create_timeline("test_metrics_removed_after_offload_2", tenant_id=tenant_1)
+
+    endpoint_tenant1 = env.endpoints.create_start(
+        "test_metrics_removed_after_offload_1", tenant_id=tenant_1
+    )
+    endpoint_tenant2 = env.endpoints.create_start(
+        "test_metrics_removed_after_offload_2", tenant_id=tenant_1
+    )
+
+    for endpoint in [endpoint_tenant1, endpoint_tenant2]:
+        with closing(endpoint.connect()) as conn:
+            with conn.cursor() as cur:
+                cur.execute("CREATE TABLE t(key int primary key, value text)")
+                cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
+                cur.execute("SELECT sum(key) FROM t")
+                assert cur.fetchone() == (5000050000,)
+        endpoint.stop()
+
+    def get_ps_metric_samples_for_timeline(
+        tenant_id: TenantId, timeline_id: TimelineId
+    ) -> list[Sample]:
+        ps_metrics = env.pageserver.http_client().get_metrics()
+        samples = []
+        for metric_name in ps_metrics.metrics:
+            for sample in ps_metrics.query_all(
+                name=metric_name,
+                filter={"tenant_id": str(tenant_id), "timeline_id": str(timeline_id)},
+            ):
+                samples.append(sample)
+        return samples
+
+    for timeline in [timeline_1, timeline_2]:
+        pre_offload_samples = set(
+            [x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
+        )
+        assert len(pre_offload_samples) > 0, f"expected at least one sample for {timeline}"
+        env.pageserver.http_client().timeline_archival_config(
+            tenant_1,
+            timeline,
+            state=TimelineArchivalState.ARCHIVED,
+        )
+        env.pageserver.http_client().timeline_offload(tenant_1, timeline)
+        post_offload_samples = set(
+            [x.name for x in get_ps_metric_samples_for_timeline(tenant_1, timeline)]
+        )
+        assert post_offload_samples == set()
+
+
 def test_pageserver_with_empty_tenants(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

--- a/test_runner/regress/test_timeline_detach_ancestor.py
+++ b/test_runner/regress/test_timeline_detach_ancestor.py
@@ -21,7 +21,10 @@ from fixtures.neon_fixtures import (
    last_flush_lsn_upload,
    wait_for_last_flush_lsn,
 )
-from fixtures.pageserver.http import HistoricLayerInfo, PageserverApiException
+from fixtures.pageserver.http import (
+    HistoricLayerInfo,
+    PageserverApiException,
+)
 from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_timeline_detail_404
 from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
 from fixtures.utils import assert_pageserver_backups_equal, skip_in_debug_build, wait_until
@@ -413,6 +416,7 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
            "read_only": True,
        },
    )
+
    sk = env.safekeepers[0]
    assert sk
    with pytest.raises(requests.exceptions.HTTPError, match="Not Found"):
@@ -504,8 +508,15 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder, snapshots
            assert len(lineage.get("original_ancestor", [])) == 0
            assert len(lineage.get("reparenting_history", [])) == 0

-    for name, _, _, rows, starts in expected_result:
-        with env.endpoints.create_start(name, tenant_id=env.initial_tenant) as ep:
+    for branch_name, queried_timeline, _, rows, starts in expected_result:
+        details = client.timeline_detail(env.initial_tenant, queried_timeline)
+        log.info(f"reading data from branch {branch_name}")
+        # specifying the lsn makes the endpoint read-only and not connect to safekeepers
+        with env.endpoints.create(
+            branch_name,
+            lsn=Lsn(details["last_record_lsn"]),
+        ) as ep:
+            ep.start(safekeeper_generation=1)
            assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows
            assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1

@@ -1088,6 +1099,9 @@ def test_timeline_detach_ancestor_interrupted_by_deletion(

    for ps in env.pageservers:
        ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )

    pageservers = dict((int(p.id), p) for p in env.pageservers)

@@ -1209,6 +1223,9 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv

    for ps in env.pageservers:
        ps.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS)
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )

    pageservers = dict((int(p.id), p) for p in env.pageservers)

--- a/test_runner/regress/test_timeline_gc_blocking.py
+++ b/test_runner/regress/test_timeline_gc_blocking.py
@@ -24,6 +24,10 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
        initial_tenant_conf={"gc_period": "1s", "lsn_lease_length": "0s"},
        initial_tenant_shard_count=2 if sharded else None,
    )
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Timeline.* has been deleted.*", ".*Timeline.*was cancelled and cannot be used"]
+        )

    if sharded:
        http = env.storage_controller.pageserver_api()
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -229,7 +229,7 @@ def test_many_timelines(neon_env_builder: NeonEnvBuilder):

    # Test timeline_list endpoint.
    http_cli = env.safekeepers[0].http_client()
-    assert len(http_cli.timeline_list()) == 3
+    assert len(http_cli.timeline_list()) == 4


 # Check that dead minority doesn't prevent the commits: execute insert n_inserts
@@ -433,6 +433,7 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder):
    env.pageserver.allowed_errors.extend(
        [
            ".*Timeline .* was not found in global map.*",
+            ".*Timeline .* has been deleted.*",
            ".*Timeline .* was cancelled and cannot be used anymore.*",
        ]
    )
@@ -739,8 +740,8 @@ def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    env = neon_env_builder.init_start()

    tenant_id = env.initial_tenant
-    timeline_id = env.create_branch("test_timeline_status")
-    endpoint = env.endpoints.create_start("test_timeline_status")
+    timeline_id = env.initial_timeline
+    endpoint = env.endpoints.create_start("main")

    wa = env.safekeepers[0]

@@ -1291,6 +1292,12 @@ def test_lagging_sk(neon_env_builder: NeonEnvBuilder):
 # it works without compute at all.
 def test_peer_recovery(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 3
+
+    # timelines should be created the old way
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
    env = neon_env_builder.init_start()

    tenant_id = env.initial_tenant
@@ -1532,6 +1539,11 @@ def test_safekeeper_without_pageserver(


 def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder):
+    # timelines should be created the old way manually until we have migration support
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
    def execute_payload(endpoint: Endpoint):
        with closing(endpoint.connect()) as conn:
            with conn.cursor() as cur:
@@ -1661,6 +1673,15 @@ def test_pull_timeline(neon_env_builder: NeonEnvBuilder, live_sk_change: bool):
    res = env.safekeepers[3].pull_timeline(
        [env.safekeepers[0], env.safekeepers[2]], tenant_id, timeline_id
    )
+    sk_id_1 = env.safekeepers[0].safekeeper_id()
+    sk_id_3 = env.safekeepers[2].safekeeper_id()
+    sk_id_4 = env.safekeepers[3].safekeeper_id()
+    new_conf = MembershipConfiguration(
+        generation=2, members=[sk_id_1, sk_id_3, sk_id_4], new_members=None
+    )
+    for i in [0, 2, 3]:
+        env.safekeepers[i].http_client().membership_switch(tenant_id, timeline_id, new_conf)
+
    log.info("Finished pulling timeline")
    log.info(res)

@@ -1705,13 +1726,15 @@ def test_pull_timeline_gc(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 3
    neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
    env = neon_env_builder.init_start()
-    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline

    (src_sk, dst_sk) = (env.safekeepers[0], env.safekeepers[2])

+    dst_sk.stop()
+
+    [tenant_id, timeline_id] = env.create_tenant()
+
    log.info("use only first 2 safekeepers, 3rd will be seeded")
-    endpoint = env.endpoints.create("main")
+    endpoint = env.endpoints.create("main", tenant_id=tenant_id)
    endpoint.active_safekeepers = [1, 2]
    endpoint.start()
    endpoint.safe_psql("create table t(key int, value text)")
@@ -1723,6 +1746,7 @@ def test_pull_timeline_gc(neon_env_builder: NeonEnvBuilder):
    src_http = src_sk.http_client()
    # run pull_timeline which will halt before downloading files
    src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
+    dst_sk.start()
    pt_handle = PropagatingThread(
        target=dst_sk.pull_timeline, args=([src_sk], tenant_id, timeline_id)
    )
@@ -1782,23 +1806,27 @@ def test_pull_timeline_term_change(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
    env = neon_env_builder.init_start()
    tenant_id = env.initial_tenant
-    timeline_id = env.initial_timeline

    (src_sk, dst_sk) = (env.safekeepers[0], env.safekeepers[2])
+    dst_sk.stop()

+    src_http = src_sk.http_client()
+    src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
+
+    timeline_id = env.create_branch("pull_timeline_term_changes")
+
+    # run pull_timeline which will halt before downloading files
    log.info("use only first 2 safekeepers, 3rd will be seeded")
-    ep = env.endpoints.create("main")
+    ep = env.endpoints.create("pull_timeline_term_changes")
    ep.active_safekeepers = [1, 2]
    ep.start()
    ep.safe_psql("create table t(key int, value text)")
    ep.safe_psql("insert into t select generate_series(1, 1000), 'pear'")

-    src_http = src_sk.http_client()
-    # run pull_timeline which will halt before downloading files
-    src_http.configure_failpoints(("sk-snapshot-after-list-pausable", "pause"))
    pt_handle = PropagatingThread(
        target=dst_sk.pull_timeline, args=([src_sk], tenant_id, timeline_id)
    )
+    dst_sk.start()
    pt_handle.start()
    src_sk.wait_until_paused("sk-snapshot-after-list-pausable")

@@ -1807,7 +1835,7 @@ def test_pull_timeline_term_change(neon_env_builder: NeonEnvBuilder):

    # restart compute to bump term
    ep.stop()
-    ep = env.endpoints.create("main")
+    ep = env.endpoints.create("pull_timeline_term_changes")
    ep.active_safekeepers = [1, 2]
    ep.start()
    ep.safe_psql("insert into t select generate_series(1, 100), 'pear'")
@@ -1929,12 +1957,18 @@ def test_pull_timeline_while_evicted(neon_env_builder: NeonEnvBuilder):
@run_only_on_default_postgres("tests only safekeeper API")
 def test_membership_api(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 1
+    # timelines should be created the old way
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
    env = neon_env_builder.init_start()

    # These are expected after timeline deletion on safekeepers.
    env.pageserver.allowed_errors.extend(
        [
            ".*Timeline .* was not found in global map.*",
+            ".*Timeline .* has been deleted.*",
            ".*Timeline .* was cancelled and cannot be used anymore.*",
        ]
    )
@@ -2008,6 +2042,12 @@ def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder):
    created manually, later storcon will do that.
    """
    neon_env_builder.num_safekeepers = 3
+
+    # timelines should be created the old way manually
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
    env = neon_env_builder.init_start()

    tenant_id = env.initial_tenant
@@ -2063,7 +2103,7 @@ def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):
    env = neon_env_builder.init_start()

    tenant_id = env.initial_tenant
-    timeline_id = env.create_branch("test_idle_reconnections")
+    timeline_id = env.initial_timeline

    def collect_stats() -> dict[str, float]:
        # we need to collect safekeeper_pg_queries_received_total metric from all safekeepers
@@ -2094,7 +2134,7 @@ def test_idle_reconnections(neon_env_builder: NeonEnvBuilder):

    collect_stats()

-    endpoint = env.endpoints.create_start("test_idle_reconnections")
+    endpoint = env.endpoints.create_start("main")
    # just write something to the timeline
    endpoint.safe_psql("create table t(i int)")
    collect_stats()
--- a/test_runner/regress/test_wal_acceptor_async.py
+++ b/test_runner/regress/test_wal_acceptor_async.py
@@ -590,6 +590,13 @@ async def run_wal_truncation(env: NeonEnv, safekeeper_proto_version: int):
@pytest.mark.parametrize("safekeeper_proto_version", [2, 3])
 def test_wal_truncation(neon_env_builder: NeonEnvBuilder, safekeeper_proto_version: int):
    neon_env_builder.num_safekeepers = 3
+    if safekeeper_proto_version == 2:
+        # On the legacy protocol, we don't support generations, which are part of
+        # `timelines_onto_safekeepers`
+        neon_env_builder.storage_controller_config = {
+            "timelines_onto_safekeepers": False,
+        }
+
    env = neon_env_builder.init_start()

    asyncio.run(run_wal_truncation(env, safekeeper_proto_version))
@@ -713,6 +720,11 @@ async def run_quorum_sanity(env: NeonEnv):
 # we don't.
 def test_quorum_sanity(neon_env_builder: NeonEnvBuilder):
    neon_env_builder.num_safekeepers = 4
+
+    # The test fails basically always on the new mode.
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
    env = neon_env_builder.init_start()

    asyncio.run(run_quorum_sanity(env))
--- a/test_runner/regress/test_wal_receiver.py
+++ b/test_runner/regress/test_wal_receiver.py
@@ -16,6 +16,13 @@ if TYPE_CHECKING:
 # Checks that pageserver's walreceiver state is printed in the logs during WAL wait timeout.
 # Ensures that walreceiver does not run without any data inserted and only starts after the insertion.
 def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
+    # we assert below that the walreceiver is not active before data writes.
+    # with manually created timelines, it is active.
+    # FIXME: remove this test once we remove timelines_onto_safekeepers
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
    # Trigger WAL wait timeout faster
    neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
    env = neon_env_builder.init_start()
--- a/test_runner/sql_regress/expected/neon-event-triggers.out
+++ b/test_runner/sql_regress/expected/neon-event-triggers.out
@@ -0,0 +1,90 @@
+create or replace function admin_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'admin event trigger is executed for %', current_user;
+end;
+$$;
+create role neon_superuser;
+create role neon_admin login inherit createrole createdb in role neon_superuser;
+grant create on schema public to neon_admin;
+create database neondb with owner neon_admin;
+grant all privileges on database neondb to neon_superuser;
+create role neon_user;
+grant create on schema public to neon_user;
+create event trigger on_ddl1 on ddl_command_end
+execute procedure admin_proc();
+set role neon_user;
+-- check that non-privileged user can not change neon.event_triggers
+set neon.event_triggers to false;
+ERROR:  permission denied to set neon.event_triggers
+DETAIL:  Only "neon_superuser" is allowed to set the GUC
+-- Non-privileged neon user should not be able to create event trigers
+create event trigger on_ddl2 on ddl_command_end
+execute procedure admin_proc();
+ERROR:  permission denied to create event trigger "on_ddl2"
+HINT:  Must be superuser to create an event trigger.
+set role neon_admin;
+-- neon_superuser should be able to create event trigers
+create or replace function neon_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'neon event trigger is executed for %', current_user;
+end;
+$$;
+NOTICE:  admin event trigger is executed for neon_admin
+create event trigger on_ddl2 on ddl_command_end
+execute procedure neon_proc();
+\c neondb neon_admin
+create or replace function neondb_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'neondb event trigger is executed for %', current_user;
+end;
+$$;
+create or replace function neondb_secdef_proc()
+    returns event_trigger
+    language plpgsql
+    SECURITY DEFINER
+as
+$$
+begin
+    raise notice 'neondb secdef event trigger is executed for %', current_user;
+end;
+$$;
+-- neon_admin (neon_superuser member) should be able to create event triggers
+create event trigger on_ddl3 on ddl_command_end
+execute procedure neondb_proc();
+create event trigger on_ddl4 on ddl_command_end
+execute procedure neondb_secdef_proc();
+-- Check that event trigger is fired for neon_admin
+create table t1(x integer);
+NOTICE:  neondb event trigger is executed for neon_admin
+NOTICE:  neondb secdef event trigger is executed for neon_admin
+-- Check that event trigger can be skipped
+set neon.event_triggers to false;
+create table t2(x integer);
+WARNING:  Skipping Event Trigger: neon.event_triggers is false
+WARNING:  Skipping Event Trigger: neon.event_triggers is false
+\c regression cloud_admin
+-- Check that event triggers are not fired for superuser
+create table t3(x integer);
+NOTICE:  admin event trigger is executed for cloud_admin
+WARNING:  Skipping Event Trigger
+DETAIL:  Event Trigger function "neon_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
+\c neondb cloud_admin
+-- Check that user-defined event triggers are not fired for superuser
+create table t4(x integer);
+WARNING:  Skipping Event Trigger
+DETAIL:  Event Trigger function "neondb_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
+WARNING:  Skipping Event Trigger
+DETAIL:  Event Trigger function "neondb_secdef_proc" is owned by non-superuser role "neon_admin", and current_user "cloud_admin" is superuser
+\c neondb neon_admin
+-- Check that neon_admin can drop event triggers
+drop event trigger on_ddl3;
+drop event trigger on_ddl4;
--- a/test_runner/sql_regress/parallel_schedule
+++ b/test_runner/sql_regress/parallel_schedule
@@ -9,3 +9,4 @@ test: neon-rel-truncate
 test: neon-clog
 test: neon-test-utils
 test: neon-vacuum-full
+test: neon-event-triggers
--- a/test_runner/sql_regress/sql/neon-event-triggers.sql
+++ b/test_runner/sql_regress/sql/neon-event-triggers.sql
@@ -0,0 +1,96 @@
+create or replace function admin_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'admin event trigger is executed for %', current_user;
+end;
+$$;
+
+create role neon_superuser;
+create role neon_admin login inherit createrole createdb in role neon_superuser;
+grant create on schema public to neon_admin;
+create database neondb with owner neon_admin;
+grant all privileges on database neondb to neon_superuser;
+
+create role neon_user;
+grant create on schema public to neon_user;
+
+create event trigger on_ddl1 on ddl_command_end
+execute procedure admin_proc();
+
+set role neon_user;
+
+-- check that non-privileged user can not change neon.event_triggers
+set neon.event_triggers to false;
+
+-- Non-privileged neon user should not be able to create event trigers
+create event trigger on_ddl2 on ddl_command_end
+execute procedure admin_proc();
+
+set role neon_admin;
+
+-- neon_superuser should be able to create event trigers
+create or replace function neon_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'neon event trigger is executed for %', current_user;
+end;
+$$;
+
+create event trigger on_ddl2 on ddl_command_end
+execute procedure neon_proc();
+
+\c neondb neon_admin
+
+create or replace function neondb_proc()
+    returns event_trigger
+    language plpgsql as
+$$
+begin
+    raise notice 'neondb event trigger is executed for %', current_user;
+end;
+$$;
+
+create or replace function neondb_secdef_proc()
+    returns event_trigger
+    language plpgsql
+    SECURITY DEFINER
+as
+$$
+begin
+    raise notice 'neondb secdef event trigger is executed for %', current_user;
+end;
+$$;
+
+-- neon_admin (neon_superuser member) should be able to create event triggers
+create event trigger on_ddl3 on ddl_command_end
+execute procedure neondb_proc();
+
+create event trigger on_ddl4 on ddl_command_end
+execute procedure neondb_secdef_proc();
+
+-- Check that event trigger is fired for neon_admin
+create table t1(x integer);
+
+-- Check that event trigger can be skipped
+set neon.event_triggers to false;
+create table t2(x integer);
+
+\c regression cloud_admin
+
+-- Check that event triggers are not fired for superuser
+create table t3(x integer);
+
+\c neondb cloud_admin
+
+-- Check that user-defined event triggers are not fired for superuser
+create table t4(x integer);
+
+\c neondb neon_admin
+
+-- Check that neon_admin can drop event triggers
+drop event trigger on_ddl3;
+drop event trigger on_ddl4;