tests: update for tenant generations (#5449)

## Problem Some existing tests are written in a way that's incompatible with tenant generations. ## Summary of changes Update all the tests that need updating: this is things like calling through the NeonPageserver.tenant_attach helper to get a generation number, instead of calling directly into the pageserver API. There are various more subtle cases.
2025-12-22 21:59:59 +00:00 · 2023-12-07 12:27:16 +00:00
parent f9401fdd31
commit e89e41f8ba
27 changed files with 424 additions and 317 deletions
--- a/control_plane/src/bin/attachment_service.rs
+++ b/control_plane/src/bin/attachment_service.rs
@@ -201,6 +201,12 @@ async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiEr
        // TODO(sharding): make this shard-aware
        if let Some(tenant_state) = locked.tenants.get(&req_tenant.id.tenant_id) {
            let valid = tenant_state.generation == req_tenant.gen;
+            tracing::info!(
+                "handle_validate: {}(gen {}): valid={valid} (latest {})",
+                req_tenant.id,
+                req_tenant.gen,
+                tenant_state.generation
+            );
            response.tenants.push(ValidateResponseTenant {
                id: req_tenant.id,
                valid,
@@ -250,6 +256,13 @@ async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, Ap
    tenant_state.pageserver = attach_req.node_id;
    let generation = tenant_state.generation;

+    tracing::info!(
+        "handle_attach_hook: tenant {} set generation {}, pageserver {}",
+        attach_req.tenant_id,
+        tenant_state.generation,
+        attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
+    );
+
    locked.save().await.map_err(ApiError::InternalServerError)?;

    json_response(
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -323,6 +323,7 @@ impl TenantConfigRequest {

 #[derive(Debug, Deserialize)]
 pub struct TenantAttachRequest {
+    #[serde(default)]
    pub config: TenantAttachConfig,
    #[serde(default)]
    pub generation: Option<u32>,
@@ -330,7 +331,7 @@ pub struct TenantAttachRequest {

 /// Newtype to enforce deny_unknown_fields on TenantConfig for
 /// its usage inside `TenantAttachRequest`.
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize, Deserialize, Default)]
 #[serde(deny_unknown_fields)]
 pub struct TenantAttachConfig {
    #[serde(flatten)]
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -99,27 +99,35 @@ impl LocalFs {
        };

        // If we were given a directory, we may use it as our starting point.
-        // Otherwise, we must go up to the parent directory.  This is because
+        // Otherwise, we must go up to the first ancestor dir that exists.  This is because
        // S3 object list prefixes can be arbitrary strings, but when reading
        // the local filesystem we need a directory to start calling read_dir on.
        let mut initial_dir = full_path.clone();
-        match fs::metadata(full_path.clone()).await {
-            Ok(meta) => {
-                if !meta.is_dir() {
+        loop {
+            // Did we make it to the root?
+            if initial_dir.parent().is_none() {
+                anyhow::bail!("list_files: failed to find valid ancestor dir for {full_path}");
+            }
+
+            match fs::metadata(initial_dir.clone()).await {
+                Ok(meta) if meta.is_dir() => {
+                    // We found a directory, break
+                    break;
+                }
+                Ok(_meta) => {
                    // It's not a directory: strip back to the parent
                    initial_dir.pop();
                }
-            }
-            Err(e) if e.kind() == ErrorKind::NotFound => {
-                // It's not a file that exists: strip the prefix back to the parent directory
-                initial_dir.pop();
-            }
-            Err(e) => {
-                // Unexpected I/O error
-                anyhow::bail!(e)
+                Err(e) if e.kind() == ErrorKind::NotFound => {
+                    // It's not a file that exists: strip the prefix back to the parent directory
+                    initial_dir.pop();
+                }
+                Err(e) => {
+                    // Unexpected I/O error
+                    anyhow::bail!(e)
+                }
            }
        }
-
        // Note that Utf8PathBuf starts_with only considers full path segments, but
        // object prefixes are arbitrary strings, so we need the strings for doing
        // starts_with later.
--- a/pageserver/src/deletion_queue/list_writer.rs
+++ b/pageserver/src/deletion_queue/list_writer.rs
@@ -312,7 +312,18 @@ impl ListWriter {
                for (tenant_shard_id, tenant_list) in &mut deletion_list.tenants {
                    if let Some(attached_gen) = attached_tenants.get(tenant_shard_id) {
                        if attached_gen.previous() == tenant_list.generation {
+                            info!(
+                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,
+                                shard_id=%tenant_shard_id.shard_slug(),
+                                old_gen=?tenant_list.generation, new_gen=?attached_gen,
+                                "Updating gen on recovered list");
                            tenant_list.generation = *attached_gen;
+                        } else {
+                            info!(
+                                seq=%s, tenant_id=%tenant_shard_id.tenant_id,
+                                shard_id=%tenant_shard_id.shard_slug(),
+                                old_gen=?tenant_list.generation, new_gen=?attached_gen,
+                                "Encountered stale generation on recovered list");
                        }
                    }
                }
--- a/scripts/export_import_between_pageservers.py
+++ b/scripts/export_import_between_pageservers.py
@@ -266,9 +266,7 @@ class NeonPageserverHttpClient(requests.Session):
    def tenant_create(self, new_tenant_id: uuid.UUID, ok_if_exists):
        res = self.post(
            f"http://{self.host}:{self.port}/v1/tenant",
-            json={
-                "new_tenant_id": new_tenant_id.hex,
-            },
+            json={"new_tenant_id": new_tenant_id.hex, "generation": 1},
        )

        if res.status_code == 409:
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -455,7 +455,7 @@ class NeonEnvBuilder:
        self.preserve_database_files = preserve_database_files
        self.initial_tenant = initial_tenant or TenantId.generate()
        self.initial_timeline = initial_timeline or TimelineId.generate()
-        self.enable_generations = False
+        self.enable_generations = True
        self.scrub_on_exit = False
        self.test_output_dir = test_output_dir

@@ -1571,6 +1571,20 @@ class NeonAttachmentService:
        )
        response.raise_for_status()

+    def inspect(self, tenant_id: TenantId) -> Optional[tuple[int, int]]:
+        response = requests.post(
+            f"{self.env.control_plane_api}/inspect",
+            json={"tenant_id": str(tenant_id)},
+        )
+        response.raise_for_status()
+        json = response.json()
+        log.info(f"Response: {json}")
+        if json["attachment"]:
+            # Explicit int() to make python type linter happy
+            return (int(json["attachment"][0]), int(json["attachment"][1]))
+        else:
+            return None
+
    def __enter__(self) -> "NeonAttachmentService":
        return self

@@ -1769,13 +1783,10 @@ class NeonPageserver(PgProtocol):
        Tenant attachment passes through here to acquire a generation number before proceeding
        to call into the pageserver HTTP client.
        """
-        if self.env.attachment_service is not None:
-            generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
-        else:
-            generation = None
-
        client = self.http_client()
-        return client.tenant_attach(tenant_id, config, config_null, generation=generation)
+        return client.tenant_attach(
+            tenant_id, config, config_null, generation=self.maybe_get_generation(tenant_id)
+        )

    def tenant_detach(self, tenant_id: TenantId):
        if self.env.attachment_service is not None:
@@ -1784,6 +1795,34 @@ class NeonPageserver(PgProtocol):
        client = self.http_client()
        return client.tenant_detach(tenant_id)

+    def tenant_create(
+        self,
+        tenant_id: TenantId,
+        conf: Optional[Dict[str, Any]] = None,
+        auth_token: Optional[str] = None,
+    ) -> TenantId:
+        client = self.http_client(auth_token=auth_token)
+        return client.tenant_create(
+            tenant_id, conf, generation=self.maybe_get_generation(tenant_id)
+        )
+
+    def tenant_load(self, tenant_id: TenantId):
+        client = self.http_client()
+        return client.tenant_load(tenant_id, generation=self.maybe_get_generation(tenant_id))
+
+    def maybe_get_generation(self, tenant_id: TenantId):
+        """
+        For tests that would like to use an HTTP client directly instead of using
+        the `tenant_attach` and `tenant_create` helpers here: issue a generation
+        number for a tenant.
+
+        Returns None if the attachment service is not enabled (legacy mode)
+        """
+        if self.env.attachment_service is not None:
+            return self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
+        else:
+            return None
+

 def append_pageserver_param_overrides(
    params_to_update: List[str],
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -210,16 +210,25 @@ class PageserverHttpClient(requests.Session):
        return res_json

    def tenant_create(
-        self, new_tenant_id: TenantId, conf: Optional[Dict[str, Any]] = None
+        self,
+        new_tenant_id: TenantId,
+        conf: Optional[Dict[str, Any]] = None,
+        generation: Optional[int] = None,
    ) -> TenantId:
        if conf is not None:
            assert "new_tenant_id" not in conf.keys()
+
+        body: Dict[str, Any] = {
+            "new_tenant_id": str(new_tenant_id),
+            **(conf or {}),
+        }
+
+        if generation is not None:
+            body.update({"generation": generation})
+
        res = self.post(
            f"http://localhost:{self.port}/v1/tenant",
-            json={
-                "new_tenant_id": str(new_tenant_id),
-                **(conf or {}),
-            },
+            json=body,
        )
        self.verbose_error(res)
        if res.status_code == 409:
@@ -273,8 +282,11 @@ class PageserverHttpClient(requests.Session):
        self.verbose_error(res)
        return res

-    def tenant_load(self, tenant_id: TenantId):
-        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load")
+    def tenant_load(self, tenant_id: TenantId, generation=None):
+        body = None
+        if generation is not None:
+            body = {"generation": generation}
+        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/load", json=body)
        self.verbose_error(res)

    def tenant_ignore(self, tenant_id: TenantId):
--- a/test_runner/fixtures/pageserver/types.py
+++ b/test_runner/fixtures/pageserver/types.py
@@ -6,9 +6,8 @@ from fixtures.types import KEY_MAX, KEY_MIN, Key, Lsn

@dataclass
 class IndexLayerMetadata:
-    @classmethod
-    def from_json(cls, d: Dict[str, Any]):
-        return {}
+    file_size: int
+    generation: int


@dataclass(frozen=True)
@@ -139,7 +138,7 @@ class IndexPartDump:
    def from_json(cls, d: Dict[str, Any]) -> "IndexPartDump":
        return IndexPartDump(
            layer_metadata={
-                parse_layer_file_name(n): IndexLayerMetadata.from_json(v)
+                parse_layer_file_name(n): IndexLayerMetadata(v["file_size"], v["generation"])
                for n, v in d["layer_metadata"].items()
            },
            disk_consistent_lsn=Lsn(d["disk_consistent_lsn"]),
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -12,7 +12,6 @@ import boto3
 from mypy_boto3_s3 import S3Client

 from fixtures.log_helper import log
-from fixtures.pageserver.types import LayerFileName
 from fixtures.types import TenantId, TimelineId

 TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
@@ -88,13 +87,46 @@ class LocalFsStorage:
    def timeline_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
        return self.tenant_path(tenant_id) / "timelines" / str(timeline_id)

-    def layer_path(
-        self, tenant_id: TenantId, timeline_id: TimelineId, layer_file_name: LayerFileName
-    ):
-        return self.timeline_path(tenant_id, timeline_id) / layer_file_name.to_str()
+    def timeline_latest_generation(self, tenant_id, timeline_id):
+        timeline_files = os.listdir(self.timeline_path(tenant_id, timeline_id))
+        index_parts = [f for f in timeline_files if f.startswith("index_part")]
+
+        def parse_gen(filename):
+            log.info(f"parsing index_part '{filename}'")
+            parts = filename.split("-")
+            if len(parts) == 2:
+                return int(parts[1], 16)
+            else:
+                return None
+
+        generations = sorted([parse_gen(f) for f in index_parts])
+        if len(generations) == 0:
+            raise RuntimeError(f"No index_part found for {tenant_id}/{timeline_id}")
+        return generations[-1]

    def index_path(self, tenant_id: TenantId, timeline_id: TimelineId) -> Path:
-        return self.timeline_path(tenant_id, timeline_id) / TIMELINE_INDEX_PART_FILE_NAME
+        latest_gen = self.timeline_latest_generation(tenant_id, timeline_id)
+        if latest_gen is None:
+            filename = TIMELINE_INDEX_PART_FILE_NAME
+        else:
+            filename = f"{TIMELINE_INDEX_PART_FILE_NAME}-{latest_gen:08x}"
+
+        return self.timeline_path(tenant_id, timeline_id) / filename
+
+    def remote_layer_path(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        local_name: str,
+        generation: Optional[int] = None,
+    ):
+        if generation is None:
+            generation = self.timeline_latest_generation(tenant_id, timeline_id)
+
+        assert generation is not None, "Cannot calculate remote layer path without generation"
+
+        filename = f"{local_name}-{generation:08x}"
+        return self.timeline_path(tenant_id, timeline_id) / filename

    def index_content(self, tenant_id: TenantId, timeline_id: TimelineId):
        with self.index_path(tenant_id, timeline_id).open("r") as f:
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -100,7 +100,6 @@ def test_config_with_unknown_keys_is_bad_request(negative_env: NegativeTests):

    env = negative_env.neon_env
    tenant_id = negative_env.tenant_id
-    ps_http = env.pageserver.http_client()

    config_with_unknown_keys = {
        "compaction_period": "1h",
@@ -108,16 +107,16 @@ def test_config_with_unknown_keys_is_bad_request(negative_env: NegativeTests):
    }

    with pytest.raises(PageserverApiException) as e:
-        ps_http.tenant_attach(tenant_id, config=config_with_unknown_keys)
+        env.pageserver.tenant_attach(tenant_id, config=config_with_unknown_keys)
    assert e.type == PageserverApiException
    assert e.value.status_code == 400


@pytest.mark.parametrize("content_type", [None, "application/json"])
-def test_empty_body(positive_env: NeonEnv, content_type: Optional[str]):
+def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
    """
-    For backwards-compatibility: if we send an empty body,
-    the request should be accepted and the config should be the default config.
+    When the 'config' body attribute is omitted, the request should be accepted
+    and the tenant should use the default configuration
    """
    env = positive_env
    ps_http = env.pageserver.http_client()
@@ -128,9 +127,14 @@ def test_empty_body(positive_env: NeonEnv, content_type: Optional[str]):
    ps_http.tenant_detach(tenant_id)
    assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]

+    body = {}
+    gen = env.pageserver.maybe_get_generation(tenant_id)
+    if gen is not None:
+        body["generation"] = gen
+
    ps_http.post(
        f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
-        data=b"",
+        json=body,
        headers=None if content_type else {"Content-Type": "application/json"},
    ).raise_for_status()

@@ -191,7 +195,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
    }, "ensure our custom config has different values than the default config for all config options, so we know we overrode everything"

    ps_http.tenant_detach(tenant_id)
-    ps_http.tenant_attach(tenant_id, config=fully_custom_config)
+    env.pageserver.tenant_attach(tenant_id, config=fully_custom_config)

    assert ps_http.tenant_config(tenant_id).tenant_specific_overrides == fully_custom_config
    assert set(ps_http.tenant_config(tenant_id).effective_config.keys()) == set(
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -60,14 +60,14 @@ def test_pageserver_auth(neon_env_builder: NeonEnvBuilder):
        assert_client_authorized(env, invalid_tenant_http_client)

    # create tenant using management token
-    pageserver_http_client.tenant_create(TenantId.generate())
+    env.pageserver.tenant_create(TenantId.generate(), auth_token=pageserver_token)

    # fail to create tenant using tenant token
    with pytest.raises(
        PageserverApiException,
        match="Forbidden: JWT authentication error",
    ):
-        tenant_http_client.tenant_create(TenantId.generate())
+        env.pageserver.tenant_create(TenantId.generate(), auth_token=tenant_token)


 def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -158,7 +158,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE

    # pause all uploads
    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
-    ps_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)

    initial_branch = "initial_branch"

@@ -200,7 +200,7 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder

    # pause all uploads
    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
-    ps_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)

    def start_creating_timeline():
        with pytest.raises(RequestException):
@@ -257,7 +257,7 @@ def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: N

    # pause all uploads
    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
-    ps_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)

    def start_creating_timeline():
        ps_http.timeline_create(
@@ -343,8 +343,7 @@ def test_non_uploaded_root_timeline_is_deleted_after_restart(neon_env_builder: N
    )
    ps_http = env.pageserver.http_client()

-    # pause all uploads
-    ps_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)

    # Create a timeline whose creation will succeed.  The tenant will need at least one
    # timeline to be loadable.
@@ -397,7 +396,7 @@ def test_non_uploaded_branch_is_deleted_after_restart(neon_env_builder: NeonEnvB
    )
    ps_http = env.pageserver.http_client()

-    ps_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)
    ps_http.timeline_create(env.pg_version, env.initial_tenant, env.initial_timeline)

    # pause all uploads
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -160,7 +160,7 @@ def test_timeline_init_break_before_checkpoint_recreate(
        ]
    )

-    pageserver_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)
    tenant_id = env.initial_tenant

    timelines_dir = env.pageserver.timeline_dir(tenant_id)
--- a/test_runner/regress/test_change_pageserver.py
+++ b/test_runner/regress/test_change_pageserver.py
@@ -14,6 +14,11 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
    )
    env = neon_env_builder.init_start()

+    for pageserver in env.pageservers:
+        # This test dual-attaches a tenant, one of the pageservers will therefore
+        # be running with a stale generation.
+        pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*")
+
    env.neon_cli.create_branch("test_change_pageserver")
    endpoint = env.endpoints.create_start("test_change_pageserver")

@@ -79,6 +84,11 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
    # Try failing back, and this time we will stop the current pageserver before reconfiguring
    # the endpoint.  Whereas the previous reconfiguration was like a healthy migration, this
    # is more like what happens in an unexpected  pageserver failure.
+    #
+    # Since we're dual-attached, need to tip-off attachment service to treat the one we're
+    # about to start as the attached pageserver
+    assert env.attachment_service is not None
+    env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
    env.pageservers[0].start()
    env.pageservers[1].stop()

@@ -88,6 +98,9 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
    assert fetchone() == (100000,)

    env.pageservers[0].stop()
+    # Since we're dual-attached, need to tip-off attachment service to treat the one we're
+    # about to start as the attached pageserver
+    env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[1].id)
    env.pageservers[1].start()

    # Test a (former) bug where a child process spins without updating its connection string
--- a/test_runner/regress/test_duplicate_layers.py
+++ b/test_runner/regress/test_duplicate_layers.py
@@ -112,7 +112,9 @@ def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin)

    assert l1_found is not None, "failed to find L1 locally"

-    uploaded = env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / l1_found.name
+    uploaded = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, l1_found.name
+    )
    assert not uploaded.exists(), "to-be-overwritten should not yet be uploaded"

    env.pageserver.start()
@@ -139,4 +141,7 @@ def test_actually_duplicated_l1(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin)

    wait_for_upload_queue_empty(pageserver_http, tenant_id, timeline_id)

+    uploaded = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, l1_found.name
+    )
    assert uploaded.exists(), "the L1 is uploaded"
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -84,8 +84,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
    env = neon_env_builder.init_start()

-    client = env.pageserver.http_client()
-    client.tenant_create(tenant)
+    env.pageserver.tenant_create(tenant)

    env.pageserver.allowed_errors.extend(
        [
@@ -149,6 +148,7 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
        ".*WARN.*ignored .* unexpected bytes after the tar archive.*"
    )

+    client = env.pageserver.http_client()
    timeline_delete_wait_completed(client, tenant, timeline)

    # Importing correct backup works
@@ -292,7 +292,7 @@ def _import(
    # Import to pageserver
    endpoint_id = "ep-import_from_pageserver"
    client = env.pageserver.http_client()
-    client.tenant_create(tenant)
+    env.pageserver.tenant_create(tenant)
    env.neon_cli.raw_cli(
        [
            "timeline",
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -149,19 +149,28 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
        f"got layer from the future: lsn={future_layer.lsn} disk_consistent_lsn={ip.disk_consistent_lsn} last_record_lsn={last_record_lsn}"
    )
    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-    future_layer_path = env.pageserver_remote_storage.layer_path(
-        tenant_id, timeline_id, future_layer
+    future_layer_path = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, future_layer.to_str()
    )
    log.info(f"future layer path: {future_layer_path}")
    pre_stat = future_layer_path.stat()
    time.sleep(1.1)  # so that we can use change in pre_stat.st_mtime to detect overwrites

+    def get_generation_number():
+        assert env.attachment_service is not None
+        attachment = env.attachment_service.inspect(tenant_id)
+        assert attachment is not None
+        return attachment[0]
+
    # force removal of layers from the future
    tenant_conf = ps_http.tenant_config(tenant_id)
-    ps_http.tenant_detach(tenant_id)
+    generation_before_detach = get_generation_number()
+    env.pageserver.tenant_detach(tenant_id)
    failpoint_name = "before-delete-layer-pausable"
+
    ps_http.configure_failpoints((failpoint_name, "pause"))
-    ps_http.tenant_attach(tenant_id, tenant_conf.tenant_specific_overrides)
+    env.pageserver.tenant_attach(tenant_id, tenant_conf.tenant_specific_overrides)
+    generation_after_reattach = get_generation_number()
    wait_until_tenant_active(ps_http, tenant_id)

    # Ensure the IndexPart upload that unlinks the layer file finishes, i.e., doesn't clog the queue.
@@ -177,6 +186,10 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
        assert env.pageserver.log_contains(f".*{tenant_id}.*at failpoint.*{failpoint_name}")

    wait_until(10, 0.5, delete_at_pause_point)
+    future_layer_path = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, future_layer.to_str(), generation=generation_before_detach
+    )
+    log.info(f"future layer path: {future_layer_path}")
    assert future_layer_path.exists()

    # wait for re-ingestion of the WAL from safekeepers into the in-memory layer
@@ -215,12 +228,17 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
    # Examine the resulting S3 state.
    log.info("integrity-check the remote storage")
    ip = get_index_part()
-    for layer_file_name in ip.layer_metadata.keys():
-        layer_path = env.pageserver_remote_storage.layer_path(
-            tenant_id, timeline_id, layer_file_name
+    for layer_file_name, layer_metadata in ip.layer_metadata.items():
+        log.info(f"Layer metadata {layer_file_name.to_str()}: {layer_metadata}")
+        layer_path = env.pageserver_remote_storage.remote_layer_path(
+            tenant_id, timeline_id, layer_file_name.to_str(), layer_metadata.generation
        )
        assert layer_path.exists(), f"{layer_file_name.to_str()}"

    log.info("assert that the overwritten layer won")
+    future_layer_path = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, future_layer.to_str(), generation=generation_after_reattach
+    )
    final_stat = future_layer_path.stat()
+    log.info(f"future layer path: {future_layer_path}")
    assert final_stat.st_mtime != pre_stat.st_mtime
--- a/test_runner/regress/test_neon_cli.py
+++ b/test_runner/regress/test_neon_cli.py
@@ -133,6 +133,7 @@ def test_cli_start_stop(neon_env_builder: NeonEnvBuilder):
    # Stop default ps/sk
    env.neon_cli.pageserver_stop(env.pageserver.id)
    env.neon_cli.safekeeper_stop()
+    env.neon_cli.attachment_service_stop(False)

    # Keep NeonEnv state up to date, it usually owns starting/stopping services
    env.pageserver.running = False
@@ -173,6 +174,9 @@ def test_cli_start_stop_multi(neon_env_builder: NeonEnvBuilder):
    env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 1)
    env.neon_cli.safekeeper_stop(neon_env_builder.safekeepers_id_start + 2)

+    # Stop this to get out of the way of the following `start`
+    env.neon_cli.attachment_service_stop(False)
+
    # Default start
    res = env.neon_cli.raw_cli(["start"])
    res.check_returncode()
--- a/test_runner/regress/test_pageserver_api.py
+++ b/test_runner/regress/test_pageserver_api.py
@@ -8,7 +8,6 @@ from fixtures.neon_fixtures import (
    NeonEnvBuilder,
 )
 from fixtures.pageserver.http import PageserverHttpClient
-from fixtures.pg_version import PgVersion
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import wait_until

@@ -62,7 +61,10 @@ def test_pageserver_init_node_id(
    assert "has node id already, it cannot be overridden" in bad_update.stderr


-def check_client(pg_version: PgVersion, client: PageserverHttpClient, initial_tenant: TenantId):
+def check_client(env: NeonEnv, client: PageserverHttpClient):
+    pg_version = env.pg_version
+    initial_tenant = env.initial_tenant
+
    client.check_status()

    # check initial tenant is there
@@ -70,7 +72,7 @@ def check_client(pg_version: PgVersion, client: PageserverHttpClient, initial_te

    # create new tenant and check it is also there
    tenant_id = TenantId.generate()
-    client.tenant_create(tenant_id)
+    client.tenant_create(tenant_id, generation=env.pageserver.maybe_get_generation(tenant_id))
    assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}

    timelines = client.timeline_list(tenant_id)
@@ -181,7 +183,7 @@ def test_pageserver_http_get_wal_receiver_success(neon_simple_env: NeonEnv):
 def test_pageserver_http_api_client(neon_simple_env: NeonEnv):
    env = neon_simple_env
    with env.pageserver.http_client() as client:
-        check_client(env.pg_version, client, env.initial_tenant)
+        check_client(env, client)


 def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilder):
@@ -191,4 +193,4 @@ def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilde
    pageserver_token = env.auth_keys.generate_pageserver_token()

    with env.pageserver.http_client(auth_token=pageserver_token) as client:
-        check_client(env.pg_version, client, env.initial_tenant)
+        check_client(env, client)
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -23,7 +23,6 @@ from fixtures.pageserver.utils import (
    wait_until_tenant_state,
 )
 from fixtures.remote_storage import (
-    TIMELINE_INDEX_PART_FILE_NAME,
    LocalFsStorage,
    RemoteStorageKind,
    available_remote_storages,
@@ -350,6 +349,13 @@ def test_remote_storage_upload_queue_retries(
    env.pageserver.stop(immediate=True)
    env.endpoints.stop_all()

+    # We are about to forcibly drop local dirs.  Attachment service will increment generation in re-attach before
+    # we later increment when actually attaching it again, leading to skipping a generation and potentially getting
+    # these warnings if there was a durable but un-executed deletion list at time of restart.
+    env.pageserver.allowed_errors.extend(
+        [".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
+    )
+
    dir_to_clear = env.pageserver.tenant_dir()
    shutil.rmtree(dir_to_clear)
    os.mkdir(dir_to_clear)
@@ -648,7 +654,7 @@ def test_empty_branch_remote_storage_upload(neon_env_builder: NeonEnvBuilder):
    ), f"Expected to have an initial timeline and the branch timeline only, but got {timelines_before_detach}"

    client.tenant_detach(env.initial_tenant)
-    client.tenant_attach(env.initial_tenant)
+    env.pageserver.tenant_attach(env.initial_tenant)
    wait_until_tenant_state(client, env.initial_tenant, "Active", 5)

    timelines_after_detach = set(
@@ -758,10 +764,11 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv
        # this is because creating a timeline always awaits for the uploads to complete
        assert_nothing_to_upload(client, env.initial_tenant, new_branch_timeline_id)

-        assert (
-            new_branch_on_remote_storage / TIMELINE_INDEX_PART_FILE_NAME
+        assert env.pageserver_remote_storage.index_path(
+            env.initial_tenant, new_branch_timeline_id
        ).is_file(), "uploads scheduled during initial load should had been awaited for"
    finally:
+        barrier.abort()
        create_thread.join()


--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -314,7 +314,7 @@ def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):

    assert not config_path.exists(), "detach did not remove config file"

-    http_client.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)
    wait_until(
        number_of_iterations=5,
        interval=1,
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -380,7 +380,7 @@ def test_tenant_delete_is_resumed_on_attach(
    env.pageserver.start()

    # now we call attach
-    ps_http.tenant_attach(tenant_id=tenant_id)
+    env.pageserver.tenant_attach(tenant_id=tenant_id)

    # delete should be resumed
    wait_tenant_status_404(ps_http, tenant_id, iterations)
@@ -419,7 +419,7 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
        f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
    )

-    pageserver_http.tenant_create(env.initial_tenant)
+    env.pageserver.tenant_create(env.initial_tenant)

    failpoint = "flush-layer-cancel-after-writing-layer-out-pausable"
    pageserver_http.configure_failpoints((failpoint, "pause"))
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -82,6 +82,10 @@ def test_tenant_reattach(

    env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)

+    # Our re-attach may race with the deletion queue processing LSN updates
+    # from the original attachment.
+    env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*")
+
    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
        with endpoint.cursor() as cur:
            cur.execute("CREATE TABLE t(key int primary key, value text)")
@@ -112,8 +116,8 @@ def test_tenant_reattach(

    if mode == ReattachMode.REATTACH_EXPLICIT:
        # Explicitly detach then attach the tenant as two separate API calls
-        pageserver_http.tenant_detach(tenant_id)
-        pageserver_http.tenant_attach(tenant_id)
+        env.pageserver.tenant_detach(tenant_id)
+        env.pageserver.tenant_attach(tenant_id)
    elif mode in (ReattachMode.REATTACH_RESET, ReattachMode.REATTACH_RESET_DROP):
        # Use the reset API to detach/attach in one shot
        pageserver_http.tenant_reset(tenant_id, mode == ReattachMode.REATTACH_RESET_DROP)
@@ -192,6 +196,9 @@ def test_tenant_reattach_while_busy(
    updates_finished = 0
    updates_to_perform = 0

+    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
+    env = neon_env_builder.init_start()
+
    # Run random UPDATEs on test table. On failure, try again.
    async def update_table(pg_conn: asyncpg.Connection):
        nonlocal updates_started, updates_finished, updates_to_perform
@@ -223,7 +230,7 @@ def test_tenant_reattach_while_busy(
        pageserver_http.tenant_detach(tenant_id)
        await asyncio.sleep(1)
        log.info("Re-attaching tenant")
-        pageserver_http.tenant_attach(tenant_id)
+        env.pageserver.tenant_attach(tenant_id)
        log.info("Re-attach finished")

        # Continue with 5000 more updates
@@ -244,9 +251,6 @@ def test_tenant_reattach_while_busy(

        assert updates_finished == updates_to_perform

-    neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
-    env = neon_env_builder.init_start()
-
    pageserver_http = env.pageserver.http_client()

    # create new nenant
@@ -454,6 +458,10 @@ def test_detach_while_attaching(

    env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)

+    # Our re-attach may race with the deletion queue processing LSN updates
+    # from the original attachment.
+    env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*")
+
    # Create table, and insert some rows. Make it big enough that it doesn't fit in
    # shared_buffers, otherwise the SELECT after restart will just return answer
    # from shared_buffers without hitting the page server, which defeats the point
@@ -487,7 +495,7 @@ def test_detach_while_attaching(
    # And re-attach
    pageserver_http.configure_failpoints([("attach-before-activate", "return(5000)")])

-    pageserver_http.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)

    # Before it has chance to finish, detach it again
    pageserver_http.tenant_detach(tenant_id)
@@ -497,7 +505,7 @@ def test_detach_while_attaching(

    # Attach it again. If the GC and compaction loops from the previous attach/detach
    # cycle are still running, things could get really confusing..
-    pageserver_http.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)

    with endpoint.cursor() as cur:
        cur.execute("SELECT COUNT(*) FROM foo")
@@ -556,7 +564,7 @@ def test_ignored_tenant_reattach(neon_env_builder: NeonEnvBuilder):
    ), "Ignored tenant should not be reloaded after pageserver restart"

    # now, load it from the local files and expect it works
-    pageserver_http.tenant_load(tenant_id=ignored_tenant_id)
+    env.pageserver.tenant_load(tenant_id=ignored_tenant_id)
    wait_until_tenant_state(pageserver_http, ignored_tenant_id, "Active", 5)

    tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()]
@@ -611,7 +619,7 @@ def test_ignored_tenant_download_missing_layers(neon_env_builder: NeonEnvBuilder
    assert layers_removed, f"Found no layers for tenant {timeline_dir}"

    # now, load it from the local files and expect it to work due to remote storage restoration
-    pageserver_http.tenant_load(tenant_id=tenant_id)
+    env.pageserver.tenant_load(tenant_id=tenant_id)
    wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)

    tenants_after_attach = [tenant["id"] for tenant in pageserver_http.tenant_list()]
@@ -645,13 +653,13 @@ def test_load_attach_negatives(neon_env_builder: NeonEnvBuilder):
        expected_exception=PageserverApiException,
        match=f"tenant {tenant_id} already exists, state: Active",
    ):
-        pageserver_http.tenant_load(tenant_id)
+        env.pageserver.tenant_load(tenant_id)

    with pytest.raises(
        expected_exception=PageserverApiException,
        match=f"tenant {tenant_id} already exists, state: Active",
    ):
-        pageserver_http.tenant_attach(tenant_id)
+        env.pageserver.tenant_attach(tenant_id)

    pageserver_http.tenant_ignore(tenant_id)

@@ -660,7 +668,7 @@ def test_load_attach_negatives(neon_env_builder: NeonEnvBuilder):
        expected_exception=PageserverApiException,
        match="tenant directory already exists",
    ):
-        pageserver_http.tenant_attach(tenant_id)
+        env.pageserver.tenant_attach(tenant_id)


 def test_ignore_while_attaching(
@@ -679,6 +687,10 @@ def test_ignore_while_attaching(

    env.pageserver.allowed_errors.extend(PERMIT_PAGE_SERVICE_ERRORS)

+    # Our re-attach may race with the deletion queue processing LSN updates
+    # from the original attachment.
+    env.pageserver.allowed_errors.append(".*Dropped remote consistent LSN updates.*")
+
    data_id = 1
    data_secret = "very secret secret"
    insert_test_data(pageserver_http, tenant_id, timeline_id, data_id, data_secret, endpoint)
@@ -689,7 +701,7 @@ def test_ignore_while_attaching(
    pageserver_http.tenant_detach(tenant_id)
    # And re-attach, but stop attach task_mgr task from completing
    pageserver_http.configure_failpoints([("attach-before-activate", "return(5000)")])
-    pageserver_http.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)
    # Run ignore on the task, thereby cancelling the attach.
    # XXX This should take priority over attach, i.e., it should cancel the attach task.
    # But neither the failpoint, nor the proper remote_timeline_client download functions,
@@ -704,7 +716,7 @@ def test_ignore_while_attaching(
        expected_exception=PageserverApiException,
        match="tenant directory already exists",
    ):
-        pageserver_http.tenant_attach(tenant_id)
+        env.pageserver.tenant_attach(tenant_id)

    tenants_after_ignore = [tenant["id"] for tenant in pageserver_http.tenant_list()]
    assert tenant_id not in tenants_after_ignore, "Ignored tenant should be missing"
@@ -714,7 +726,7 @@ def test_ignore_while_attaching(

    # Calling load will bring the tenant back online
    pageserver_http.configure_failpoints([("attach-before-activate", "off")])
-    pageserver_http.tenant_load(tenant_id)
+    env.pageserver.tenant_load(tenant_id)

    wait_until_tenant_state(pageserver_http, tenant_id, "Active", 5)

@@ -818,7 +830,7 @@ def test_metrics_while_ignoring_broken_tenant_and_reloading(
        found_broken
    ), f"broken should still be in set, but it is not in the tenant state count: broken={broken}, broken_set={broken_set}"

-    client.tenant_load(env.initial_tenant)
+    env.pageserver.tenant_load(env.initial_tenant)

    found_active = False
    active, broken_set = ([], [])
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -7,13 +7,8 @@ from pathlib import Path
 from typing import Any, Dict, Optional, Tuple

 import pytest
-from fixtures.broker import NeonBroker
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import (
-    Endpoint,
-    NeonEnv,
-    NeonEnvBuilder,
-)
+from fixtures.neon_fixtures import Endpoint, NeonEnvBuilder, NeonPageserver
 from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import (
    assert_tenant_state,
@@ -30,7 +25,6 @@ from fixtures.remote_storage import (
 from fixtures.types import Lsn, TenantId, TimelineId
 from fixtures.utils import (
    query_scalar,
-    start_in_background,
    subprocess_capture,
    wait_until,
 )
@@ -40,58 +34,6 @@ def assert_abs_margin_ratio(a: float, b: float, margin_ratio: float):
    assert abs(a - b) / a < margin_ratio, abs(a - b) / a


-@contextmanager
-def new_pageserver_service(
-    new_pageserver_dir: Path,
-    pageserver_bin: Path,
-    remote_storage_mock_path: Path,
-    pg_port: int,
-    http_port: int,
-    broker: Optional[NeonBroker],
-    pg_distrib_dir: Path,
-):
-    """
-    cannot use NeonPageserver yet because it depends on neon cli
-    which currently lacks support for multiple pageservers
-    """
-    # actually run new pageserver
-    cmd = [
-        str(pageserver_bin),
-        "--workdir",
-        str(new_pageserver_dir),
-        "--update-config",
-        f"-c listen_pg_addr='localhost:{pg_port}'",
-        f"-c listen_http_addr='localhost:{http_port}'",
-        f"-c pg_distrib_dir='{pg_distrib_dir}'",
-        "-c id=2",
-        f"-c remote_storage={{local_path='{remote_storage_mock_path}'}}",
-    ]
-    if broker is not None:
-        cmd.append(
-            f"-c broker_endpoint='{broker.client_url()}'",
-        )
-    pageserver_client = PageserverHttpClient(
-        port=http_port,
-        auth_token=None,
-        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
-    )
-    try:
-        pageserver_process = start_in_background(
-            cmd, new_pageserver_dir, "pageserver.log", pageserver_client.check_status
-        )
-    except Exception as e:
-        log.error(e)
-        pageserver_process.kill()
-        raise Exception(f"Failed to start pageserver as {cmd}, reason: {e}") from e
-
-    log.info("new pageserver started")
-    try:
-        yield pageserver_process
-    finally:
-        log.info("stopping new pageserver")
-        pageserver_process.kill()
-
-
@contextmanager
 def pg_cur(endpoint):
    with closing(endpoint.connect()) as conn:
@@ -201,7 +143,7 @@ def check_timeline_attached(


 def switch_pg_to_new_pageserver(
-    env: NeonEnv,
+    origin_ps: NeonPageserver,
    endpoint: Endpoint,
    new_pageserver_port: int,
    tenant_id: TenantId,
@@ -216,7 +158,7 @@ def switch_pg_to_new_pageserver(

    endpoint.start()

-    timeline_to_detach_local_path = env.pageserver.timeline_dir(tenant_id, timeline_id)
+    timeline_to_detach_local_path = origin_ps.timeline_dir(tenant_id, timeline_id)
    files_before_detach = os.listdir(timeline_to_detach_local_path)
    assert (
        "metadata" in files_before_detach
@@ -269,27 +211,32 @@ def test_tenant_relocation(
    with_load: str,
 ):
    neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS)
+    neon_env_builder.num_pageservers = 2

    env = neon_env_builder.init_start()

    tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209")

    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
+    env.pageservers[0].allowed_errors.append(
        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
    )

-    # Needed for detach polling.
-    env.pageserver.allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
+    # Needed for detach polling on the original pageserver
+    env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
+    # We will dual-attach in this test, so stale generations are expected
+    env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*")

    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
-    remote_storage_mock_path = env.pageserver_remote_storage.root

    # we use two branches to check that they are both relocated
    # first branch is used for load, compute for second one is used to
    # check that data is not lost

-    pageserver_http = env.pageserver.http_client()
+    origin_ps = env.pageservers[0]
+    destination_ps = env.pageservers[1]
+    origin_http = origin_ps.http_client()
+    destination_http = destination_ps.http_client()

    _, initial_timeline_id = env.neon_cli.create_tenant(tenant_id)
    log.info("tenant to relocate %s initial_timeline_id %s", tenant_id, initial_timeline_id)
@@ -302,7 +249,7 @@ def test_tenant_relocation(
    timeline_id_main, current_lsn_main = populate_branch(
        ep_main,
        tenant_id=tenant_id,
-        ps_http=pageserver_http,
+        ps_http=origin_http,
        create_table=True,
        expected_sum=500500,
    )
@@ -320,17 +267,17 @@ def test_tenant_relocation(
    timeline_id_second, current_lsn_second = populate_branch(
        ep_second,
        tenant_id=tenant_id,
-        ps_http=pageserver_http,
+        ps_http=origin_http,
        create_table=False,
        expected_sum=1001000,
    )

    # wait until pageserver receives that data
-    wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_main, current_lsn_main)
-    timeline_detail_main = pageserver_http.timeline_detail(tenant_id, timeline_id_main)
+    wait_for_last_record_lsn(origin_http, tenant_id, timeline_id_main, current_lsn_main)
+    timeline_detail_main = origin_http.timeline_detail(tenant_id, timeline_id_main)

-    wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id_second, current_lsn_second)
-    timeline_detail_second = pageserver_http.timeline_detail(tenant_id, timeline_id_second)
+    wait_for_last_record_lsn(origin_http, tenant_id, timeline_id_second, current_lsn_second)
+    timeline_detail_second = origin_http.timeline_detail(tenant_id, timeline_id_second)

    if with_load == "with_load":
        # create load table
@@ -350,170 +297,149 @@ def test_tenant_relocation(
    # if user creates a branch during migration
    # it wont appear on the new pageserver
    ensure_checkpoint(
-        pageserver_http=pageserver_http,
+        pageserver_http=origin_http,
        tenant_id=tenant_id,
        timeline_id=timeline_id_main,
        current_lsn=current_lsn_main,
    )

    ensure_checkpoint(
-        pageserver_http=pageserver_http,
+        pageserver_http=origin_http,
        tenant_id=tenant_id,
        timeline_id=timeline_id_second,
        current_lsn=current_lsn_second,
    )

-    log.info("inititalizing new pageserver")
-    # bootstrap second pageserver
-    new_pageserver_dir = env.repo_dir / "new_pageserver"
-    new_pageserver_dir.mkdir()
+    # Migrate either by attaching from s3 or import/export basebackup
+    if method == "major":
+        cmd = [
+            "poetry",
+            "run",
+            "python",
+            str(base_dir / "scripts/export_import_between_pageservers.py"),
+            "--tenant-id",
+            str(tenant_id),
+            "--from-host",
+            "localhost",
+            "--from-http-port",
+            str(origin_http.port),
+            "--from-pg-port",
+            str(origin_ps.service_port.pg),
+            "--to-host",
+            "localhost",
+            "--to-http-port",
+            str(destination_http.port),
+            "--to-pg-port",
+            str(destination_ps.service_port.pg),
+            "--pg-distrib-dir",
+            str(neon_env_builder.pg_distrib_dir),
+            "--work-dir",
+            str(test_output_dir),
+            "--tmp-pg-port",
+            str(port_distributor.get_port()),
+        ]
+        subprocess_capture(test_output_dir, cmd, check=True)

-    new_pageserver_pg_port = port_distributor.get_port()
-    new_pageserver_http_port = port_distributor.get_port()
-    log.info("new pageserver ports pg %s http %s", new_pageserver_pg_port, new_pageserver_http_port)
-    pageserver_bin = neon_binpath / "pageserver"
+        destination_ps.allowed_errors.append(
+            ".*ignored .* unexpected bytes after the tar archive.*"
+        )
+    elif method == "minor":
+        # call to attach timeline to new pageserver
+        destination_ps.tenant_attach(tenant_id)

-    new_pageserver_http = PageserverHttpClient(
-        port=new_pageserver_http_port,
-        auth_token=None,
-        is_testing_enabled_or_skip=env.pageserver.is_testing_enabled_or_skip,
-    )
+        # wait for tenant to finish attaching
+        wait_until(
+            number_of_iterations=10,
+            interval=1,
+            func=lambda: assert_tenant_state(destination_http, tenant_id, "Active"),
+        )

-    with new_pageserver_service(
-        new_pageserver_dir,
-        pageserver_bin,
-        remote_storage_mock_path,
-        new_pageserver_pg_port,
-        new_pageserver_http_port,
-        neon_env_builder.broker,
-        neon_env_builder.pg_distrib_dir,
-    ):
-        # Migrate either by attaching from s3 or import/export basebackup
-        if method == "major":
-            cmd = [
-                "poetry",
-                "run",
-                "python",
-                str(base_dir / "scripts/export_import_between_pageservers.py"),
-                "--tenant-id",
-                str(tenant_id),
-                "--from-host",
-                "localhost",
-                "--from-http-port",
-                str(pageserver_http.port),
-                "--from-pg-port",
-                str(env.pageserver.service_port.pg),
-                "--to-host",
-                "localhost",
-                "--to-http-port",
-                str(new_pageserver_http_port),
-                "--to-pg-port",
-                str(new_pageserver_pg_port),
-                "--pg-distrib-dir",
-                str(neon_env_builder.pg_distrib_dir),
-                "--work-dir",
-                str(test_output_dir),
-                "--tmp-pg-port",
-                str(port_distributor.get_port()),
-            ]
-            subprocess_capture(test_output_dir, cmd, check=True)
-        elif method == "minor":
-            # call to attach timeline to new pageserver
-            new_pageserver_http.tenant_attach(tenant_id)
-
-            # wait for tenant to finish attaching
-            wait_until(
-                number_of_iterations=10,
-                interval=1,
-                func=lambda: assert_tenant_state(new_pageserver_http, tenant_id, "Active"),
-            )
-
-            check_timeline_attached(
-                new_pageserver_http,
-                tenant_id,
-                timeline_id_main,
-                timeline_detail_main,
-                current_lsn_main,
-            )
-
-            check_timeline_attached(
-                new_pageserver_http,
-                tenant_id,
-                timeline_id_second,
-                timeline_detail_second,
-                current_lsn_second,
-            )
-
-        # rewrite neon cli config to use new pageserver for basebackup to start new compute
-        lines = (env.repo_dir / "config").read_text().splitlines()
-        for i, line in enumerate(lines):
-            if line.startswith("listen_http_addr"):
-                lines[i] = f"listen_http_addr = 'localhost:{new_pageserver_http_port}'"
-            if line.startswith("listen_pg_addr"):
-                lines[i] = f"listen_pg_addr = 'localhost:{new_pageserver_pg_port}'"
-        (env.repo_dir / "config").write_text("\n".join(lines))
-
-        old_local_path_main = switch_pg_to_new_pageserver(
-            env,
-            ep_main,
-            new_pageserver_pg_port,
+        check_timeline_attached(
+            destination_http,
            tenant_id,
            timeline_id_main,
+            timeline_detail_main,
+            current_lsn_main,
        )

-        old_local_path_second = switch_pg_to_new_pageserver(
-            env,
-            ep_second,
-            new_pageserver_pg_port,
+        check_timeline_attached(
+            destination_http,
            tenant_id,
            timeline_id_second,
+            timeline_detail_second,
+            current_lsn_second,
        )

-        # detach tenant from old pageserver before we check
-        # that all the data is there to be sure that old pageserver
-        # is no longer involved, and if it is, we will see the error
-        pageserver_http.tenant_detach(tenant_id)
+    # rewrite neon cli config to use new pageserver for basebackup to start new compute
+    lines = (env.repo_dir / "config").read_text().splitlines()
+    for i, line in enumerate(lines):
+        if line.startswith("listen_http_addr"):
+            lines[i] = f"listen_http_addr = 'localhost:{destination_http.port}'"
+        if line.startswith("listen_pg_addr"):
+            lines[i] = f"listen_pg_addr = 'localhost:{destination_ps.service_port.pg}'"
+    (env.repo_dir / "config").write_text("\n".join(lines))

-        # Wait a little, so that the detach operation has time to finish.
-        wait_tenant_status_404(pageserver_http, tenant_id, iterations=100, interval=1)
+    old_local_path_main = switch_pg_to_new_pageserver(
+        origin_ps,
+        ep_main,
+        destination_ps.service_port.pg,
+        tenant_id,
+        timeline_id_main,
+    )

-        post_migration_check(ep_main, 500500, old_local_path_main)
-        post_migration_check(ep_second, 1001000, old_local_path_second)
+    old_local_path_second = switch_pg_to_new_pageserver(
+        origin_ps,
+        ep_second,
+        destination_ps.service_port.pg,
+        tenant_id,
+        timeline_id_second,
+    )

-        # ensure that we can successfully read all relations on the new pageserver
-        with pg_cur(ep_second) as cur:
-            cur.execute(
-                """
-                DO $$
-                DECLARE
-                r RECORD;
-                BEGIN
-                FOR r IN
-                SELECT relname FROM pg_class WHERE relkind='r'
-                LOOP
-                    RAISE NOTICE '%', r.relname;
-                    EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname;
-                END LOOP;
-                END$$;
-                """
-            )
+    # detach tenant from old pageserver before we check
+    # that all the data is there to be sure that old pageserver
+    # is no longer involved, and if it is, we will see the error
+    origin_http.tenant_detach(tenant_id)

-        if with_load == "with_load":
-            assert load_ok_event.wait(3)
-            log.info("stopping load thread")
-            load_stop_event.set()
-            load_thread.join(timeout=10)
-            log.info("load thread stopped")
+    # Wait a little, so that the detach operation has time to finish.
+    wait_tenant_status_404(origin_http, tenant_id, iterations=100, interval=1)

-        # bring old pageserver back for clean shutdown via neon cli
-        # new pageserver will be shut down by the context manager
-        lines = (env.repo_dir / "config").read_text().splitlines()
-        for i, line in enumerate(lines):
-            if line.startswith("listen_http_addr"):
-                lines[i] = f"listen_http_addr = 'localhost:{env.pageserver.service_port.http}'"
-            if line.startswith("listen_pg_addr"):
-                lines[i] = f"listen_pg_addr = 'localhost:{env.pageserver.service_port.pg}'"
-        (env.repo_dir / "config").write_text("\n".join(lines))
+    post_migration_check(ep_main, 500500, old_local_path_main)
+    post_migration_check(ep_second, 1001000, old_local_path_second)
+
+    # ensure that we can successfully read all relations on the new pageserver
+    with pg_cur(ep_second) as cur:
+        cur.execute(
+            """
+            DO $$
+            DECLARE
+            r RECORD;
+            BEGIN
+            FOR r IN
+            SELECT relname FROM pg_class WHERE relkind='r'
+            LOOP
+                RAISE NOTICE '%', r.relname;
+                EXECUTE 'SELECT count(*) FROM quote_ident($1)' USING r.relname;
+            END LOOP;
+            END$$;
+            """
+        )
+
+    if with_load == "with_load":
+        assert load_ok_event.wait(3)
+        log.info("stopping load thread")
+        load_stop_event.set()
+        load_thread.join(timeout=10)
+        log.info("load thread stopped")
+
+    # bring old pageserver back for clean shutdown via neon cli
+    # new pageserver will be shut down by the context manager
+    lines = (env.repo_dir / "config").read_text().splitlines()
+    for i, line in enumerate(lines):
+        if line.startswith("listen_http_addr"):
+            lines[i] = f"listen_http_addr = 'localhost:{origin_ps.service_port.http}'"
+        if line.startswith("listen_pg_addr"):
+            lines[i] = f"listen_pg_addr = 'localhost:{origin_ps.service_port.pg}'"
+    (env.repo_dir / "config").write_text("\n".join(lines))


 # Simulate hard crash of pageserver and re-attach a tenant with a branch
@@ -571,7 +497,7 @@ def test_emergency_relocate_with_branches_slow_replay(
    # Attach and wait a few seconds to give it time to load the tenants, attach to the
    # safekeepers, and to stream and ingest the WAL up to the pause-point.
    before_attach_time = time.time()
-    pageserver_http.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)
    time.sleep(3)

    # The wal ingestion on the main timeline should now be paused at the fail point.
@@ -718,7 +644,7 @@ def test_emergency_relocate_with_branches_createdb(
    # ingest the WAL, but let's make this less dependent on accidental timing.
    pageserver_http.configure_failpoints([("wal-ingest-logical-message-sleep", "return(5000)")])
    before_attach_time = time.time()
-    pageserver_http.tenant_attach(tenant_id)
+    env.pageserver.tenant_attach(tenant_id)

    child_endpoint.start()
    with child_endpoint.cursor(dbname="neondb") as cur:
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -297,8 +297,8 @@ def test_tenant_redownloads_truncated_file_on_startup(
    assert os.stat(path).st_size == expected_size, "truncated layer should had been re-downloaded"

    # the remote side of local_layer_truncated
-    remote_layer_path = (
-        env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / path.name
+    remote_layer_path = env.pageserver_remote_storage.remote_layer_path(
+        tenant_id, timeline_id, path.name
    )

    # if the upload ever was ongoing, this check would be racy, but at least one
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -396,7 +396,7 @@ def test_timeline_resurrection_on_attach(
    ##### Second start, restore the data and ensure that we see only timeline that wasnt deleted
    env.pageserver.start()

-    ps_http.tenant_attach(tenant_id=tenant_id)
+    env.pageserver.tenant_attach(tenant_id=tenant_id)

    wait_until_tenant_active(ps_http, tenant_id=tenant_id, iterations=10, period=0.5)

@@ -897,7 +897,7 @@ def test_timeline_delete_resumed_on_attach(
    env.pageserver.start()

    # now we call attach
-    ps_http.tenant_attach(tenant_id=tenant_id)
+    env.pageserver.tenant_attach(tenant_id=tenant_id)

    # delete should be resumed
    wait_timeline_detail_404(ps_http, env.initial_tenant, timeline_id, iterations=iterations)
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -298,17 +298,21 @@ def test_broker(neon_env_builder: NeonEnvBuilder):

    # and wait till remote_consistent_lsn propagates to all safekeepers
    #
-    # TODO: this executes long as timeline on safekeeper is immediately
-    # deactivated once rcl reaches pageserver one, and thus we generally wait
-    # till pageserver reconnects to all safekeepers one by one here. Timeline
-    # status on safekeeper should take into account peers state as well.
+    # This timeout is long: safekeepers learn about remote_consistent_lsn updates when a pageserver
+    # connects, receives a PrimaryKeepAlive, and sends a PageserverFeedback.  So the timeout has to encompass:
+    # - pageserver deletion_queue to validate + publish the remote_consistent_lsn
+    # - pageserver to reconnect to all safekeepers one by one, with multi-second delays between
+    #
+    # TODO: timeline status on safekeeper should take into account peers state as well.
+    rcl_propagate_secs = 60
+
    started_at = time.time()
    while True:
        stat_after = [cli.timeline_status(tenant_id, timeline_id) for cli in clients]
        if all([s_after.remote_consistent_lsn >= new_rcl for s_after in stat_after]):
            break
        elapsed = time.time() - started_at
-        if elapsed > 30:
+        if elapsed > rcl_propagate_secs:
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}"
            )