Implement replica prewarm

2026-01-31 09:10:38 +00:00 · 2024-10-21 17:10:48 +03:00
127 changed files with 1747 additions and 2910 deletions
--- a/.github/workflows/_benchmarking_preparation.yml
+++ b/.github/workflows/_benchmarking_preparation.yml
@@ -27,7 +27,7 @@ jobs:

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -124,28 +124,28 @@ jobs:
        uses: actions/cache@v4
        with:
          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
        uses: actions/cache@v4
        with:
          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
        uses: actions/cache@v4
        with:
          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

      - name: Cache postgres v17 build
        id: cache_pg_17
        uses: actions/cache@v4
        with:
          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -83,7 +83,7 @@ jobs:

    runs-on: ${{ matrix.RUNNER }}
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -178,7 +178,7 @@ jobs:

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -280,7 +280,7 @@ jobs:
        region_id_default=${{ env.DEFAULT_REGION_ID }}
        runner_default='["self-hosted", "us-east-2", "x64"]'
        runner_azure='["self-hosted", "eastus2", "x64"]'
-        image_default="neondatabase/build-tools:pinned-bookworm"
+        image_default="neondatabase/build-tools:pinned"
        matrix='{
          "pg_version" : [
            16
@@ -299,9 +299,9 @@ jobs:
          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
+                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
        }'

@@ -665,7 +665,7 @@ jobs:

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -772,7 +772,7 @@ jobs:

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
@@ -877,7 +877,7 @@ jobs:

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -82,7 +82,7 @@ jobs:

      - uses: docker/build-push-action@v6
        with:
-          file: build-tools.Dockerfile
+          file: Dockerfile.build-tools
          context: .
          provenance: false
          push: true
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -683,7 +683,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/compute-node.Dockerfile
+          file: compute/Dockerfile.compute-node
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
          tags: |
@@ -703,7 +703,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/compute-node.Dockerfile
+          file: compute/Dockerfile.compute-node
          target: neon-pg-ext-test
          cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
@@ -728,7 +728,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/compute-node.Dockerfile
+          file: compute/Dockerfile.compute-node
          cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-tools-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
          tags: |
--- a/.github/workflows/check-build-tools-image.yml
+++ b/.github/workflows/check-build-tools-image.yml
@@ -31,7 +31,7 @@ jobs:
        id: get-build-tools-tag
        env:
          IMAGE_TAG: |
-            ${{ hashFiles('build-tools.Dockerfile',
+            ${{ hashFiles('Dockerfile.build-tools',
                          '.github/workflows/check-build-tools-image.yml',
                          '.github/workflows/build-build-tools-image.yml') }}
        run: |
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -31,7 +31,7 @@ jobs:

    runs-on: us-east-2
    container:
-      image: neondatabase/build-tools:pinned-bookworm
+      image: neondatabase/build-tools:pinned
      options: --init

    steps:
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -112,7 +112,7 @@ jobs:
                # This isn't exhaustive, just the paths that are most directly compute-related.
                # For example, compute_ctl also depends on libs/utils, but we don't trigger
                # an e2e run on that.
-                vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/compute-node.Dockerfile)
+                vendor/*|pgxn/*|compute_tools/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
                  ;;
                *)
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -142,7 +142,7 @@ RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/sourc
 # Use the same version of libicu as the compute nodes so that
 # clusters created using inidb on pageserver can be used by computes.
 #
-# TODO: at this time, compute-node.Dockerfile uses the debian bullseye libicu
+# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
 # package, which is 67.1. We're duplicating that knowledge here, and also, technically,
 # Debian has a few patches on top of 67.1 that we're not adding here.
 ENV ICU_VERSION=67.1
--- a/4
+++ b/4
@@ -297,7 +297,7 @@ clean: postgres-clean neon-pg-clean-ext
 # This removes everything
 .PHONY: distclean
 distclean:
-	$(RM) -r $(POSTGRES_INSTALL_DIR)
+	rm -rf $(POSTGRES_INSTALL_DIR)
 	$(CARGO_CMD_PREFIX) cargo clean

 .PHONY: fmt
@@ -329,7 +329,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/pgindent --typedefs postgres-$*-typedefs-full.list \
 		$(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/ \
 		--excludes $(ROOT_PROJECT_DIR)/vendor/postgres-$*/src/tools/pgindent/exclude_file_patterns
-	$(RM) pg*.BAK
+	rm -f pg*.BAK

 # Indent pxgn/neon.
 .PHONY: neon-pgindent
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -975,8 +975,8 @@ ARG PG_VERSION
 RUN case "${PG_VERSION}" in "v17") \
    echo "pg_session_jwt does not yet have a release that supports pg17" && exit 0;; \
    esac && \
-    wget https://github.com/neondatabase/pg_session_jwt/archive/e1310b08ba51377a19e0559e4d1194883b9b2ba2.tar.gz -O pg_session_jwt.tar.gz && \
-    echo "837932a077888d5545fd54b0abcc79e5f8e37017c2769a930afc2f5c94df6f4e pg_session_jwt.tar.gz" | sha256sum --check && \
+    wget https://github.com/neondatabase/pg_session_jwt/archive/e642528f429dd3f5403845a50191b78d434b84a6.tar.gz -O pg_session_jwt.tar.gz && \
+    echo "1a69210703cc91224785e59a0a67562dd9eed9a0914ac84b11447582ca0d5b93 pg_session_jwt.tar.gz" | sha256sum --check && \
    mkdir pg_session_jwt-src && cd pg_session_jwt-src && tar xzf ../pg_session_jwt.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release
--- a/compute/Makefile
+++ b/compute/Makefile
@@ -34,7 +34,7 @@ sql_exporter_autoscaling.yml: $(jsonnet_files)

 .PHONY: clean
 clean:
-	$(RM) \
+	rm --force \
 		etc/neon_collector.yml \
 		etc/neon_collector_autoscaling.yml \
 		etc/sql_exporter.yml \
--- a/compute/README.md
+++ b/compute/README.md
@@ -1,7 +1,7 @@
 This directory contains files that are needed to build the compute
 images, or included in the compute images.

-compute-node.Dockerfile
+Dockerfile.compute-node
 	To build the compute image

 vm-image-spec.yaml
@@ -14,8 +14,8 @@ etc/
 patches/
 	Some extensions need to be patched to work with Neon. This
 	directory contains such patches. They are applied to the extension
-	sources in compute-node.Dockerfile
+	sources in Dockerfile.compute-node

 In addition to these, postgres itself, the neon postgres extension,
 and compute_ctl are built and copied into the compute image by
-compute-node.Dockerfile.
+Dockerfile.compute-node.
--- a/compute/etc/sql_exporter/checkpoints_timed.libsonnet
+++ b/compute/etc/sql_exporter/checkpoints_timed.libsonnet
@@ -1,7 +1,7 @@
 local neon = import 'neon.libsonnet';

-local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_timed.sql';
-local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_timed.17.sql';
+local pg_stat_bgwriter = importstr 'sql_exporter/checkpoints_req.sql';
+local pg_stat_checkpointer = importstr 'sql_exporter/checkpoints_req.17.sql';

 {
  metric_name: 'checkpoints_timed',
--- a/compute/etc/sql_exporter/retained_wal.sql
+++ b/compute/etc/sql_exporter/retained_wal.sql
@@ -1,10 +1,5 @@
 SELECT
  slot_name,
-  pg_wal_lsn_diff(
-    CASE
-      WHEN pg_is_in_recovery() THEN pg_last_wal_replay_lsn()
-      ELSE pg_current_wal_lsn()
-    END,
-    restart_lsn)::FLOAT8 AS retained_wal
+  pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
 FROM pg_replication_slots
 WHERE active = false;
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -20,16 +20,7 @@ use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
 use reqwest::Method;
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{
-    ffi::OsStr,
-    fs,
-    net::SocketAddr,
-    path::PathBuf,
-    process::ExitStatus,
-    str::FromStr,
-    sync::OnceLock,
-    time::{Duration, Instant},
-};
+use std::{fs, net::SocketAddr, path::PathBuf, str::FromStr, sync::OnceLock};
 use tokio::process::Command;
 use tracing::instrument;
 use url::Url;
@@ -177,6 +168,16 @@ impl StorageController {
        .expect("non-Unicode path")
    }

+    /// PIDFile for the postgres instance used to store storage controller state
+    fn postgres_pid_file(&self) -> Utf8PathBuf {
+        Utf8PathBuf::from_path_buf(
+            self.env
+                .base_data_dir
+                .join("storage_controller_postgres.pid"),
+        )
+        .expect("non-Unicode path")
+    }
+
    /// Find the directory containing postgres subdirectories, such `bin` and `lib`
    ///
    /// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
@@ -295,31 +296,6 @@ impl StorageController {
            .map_err(anyhow::Error::new)
    }

-    /// Wrapper for the pg_ctl binary, which we spawn as a short-lived subprocess when starting and stopping postgres
-    async fn pg_ctl<I, S>(&self, args: I) -> ExitStatus
-    where
-        I: IntoIterator<Item = S>,
-        S: AsRef<OsStr>,
-    {
-        let pg_bin_dir = self.get_pg_bin_dir().await.unwrap();
-        let bin_path = pg_bin_dir.join("pg_ctl");
-
-        let pg_lib_dir = self.get_pg_lib_dir().await.unwrap();
-        let envs = [
-            ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-            ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-        ];
-
-        Command::new(bin_path)
-            .args(args)
-            .envs(envs)
-            .spawn()
-            .expect("Failed to spawn pg_ctl, binary_missing?")
-            .wait()
-            .await
-            .expect("Failed to wait for pg_ctl termination")
-    }
-
    pub async fn start(&self, start_args: NeonStorageControllerStartArgs) -> anyhow::Result<()> {
        let instance_dir = self.storage_controller_instance_dir(start_args.instance_id);
        if let Err(err) = tokio::fs::create_dir(&instance_dir).await {
@@ -428,34 +404,20 @@ impl StorageController {
                db_start_args
            );

-            let db_start_status = self.pg_ctl(db_start_args).await;
-            let start_timeout: Duration = start_args.start_timeout.into();
-            let db_start_deadline = Instant::now() + start_timeout;
-            if !db_start_status.success() {
-                return Err(anyhow::anyhow!(
-                    "Failed to start postgres {}",
-                    db_start_status.code().unwrap()
-                ));
-            }
-
-            loop {
-                if Instant::now() > db_start_deadline {
-                    return Err(anyhow::anyhow!("Timed out waiting for postgres to start"));
-                }
-
-                match self.pg_isready(&pg_bin_dir, postgres_port).await {
-                    Ok(true) => {
-                        tracing::info!("storage controller postgres is now ready");
-                        break;
-                    }
-                    Ok(false) => {
-                        tokio::time::sleep(Duration::from_millis(100)).await;
-                    }
-                    Err(e) => {
-                        tracing::warn!("Failed to check postgres status: {e}")
-                    }
-                }
-            }
+            background_process::start_process(
+                "storage_controller_db",
+                &self.env.base_data_dir,
+                pg_bin_dir.join("pg_ctl").as_std_path(),
+                db_start_args,
+                vec![
+                    ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+                    ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
+                ],
+                background_process::InitialPidFile::Create(self.postgres_pid_file()),
+                &start_args.start_timeout,
+                || self.pg_isready(&pg_bin_dir, postgres_port),
+            )
+            .await?;

            self.setup_database(postgres_port).await?;
        }
@@ -621,10 +583,15 @@ impl StorageController {
        }

        let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
+        let pg_bin_dir = self.get_pg_bin_dir().await?;

        println!("Stopping storage controller database...");
        let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
-        let stop_status = self.pg_ctl(pg_stop_args).await;
+        let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
+            .args(pg_stop_args)
+            .spawn()?
+            .wait()
+            .await?;
        if !stop_status.success() {
            match self.is_postgres_running().await {
                Ok(false) => {
@@ -645,9 +612,14 @@ impl StorageController {

    async fn is_postgres_running(&self) -> anyhow::Result<bool> {
        let pg_data_path = self.env.base_data_dir.join("storage_controller_db");
+        let pg_bin_dir = self.get_pg_bin_dir().await?;

        let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
-        let status_exitcode = self.pg_ctl(pg_status_args).await;
+        let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
+            .args(pg_status_args)
+            .spawn()?
+            .wait()
+            .await?;

        // pg_ctl status returns this exit code if postgres is not running: in this case it is
        // fine that stop failed.  Otherwise it is an error that stop failed.
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -5,7 +5,7 @@
 Currently we build two main images:

 - [neondatabase/neon](https://hub.docker.com/repository/docker/neondatabase/neon) — image with pre-built `pageserver`, `safekeeper` and `proxy` binaries and all the required runtime dependencies. Built from [/Dockerfile](/Dockerfile).
- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/compute-node.Dockerfile).
+- [neondatabase/compute-node-v16](https://hub.docker.com/repository/docker/neondatabase/compute-node-v16) — compute node image with pre-built Postgres binaries from [neondatabase/postgres](https://github.com/neondatabase/postgres). Similar images exist for v15 and v14. Built from [/compute-node/Dockerfile](/compute/Dockerfile.compute-node).

 And additional intermediate image:

@@ -56,7 +56,7 @@ CREATE TABLE
 postgres=# insert into t values(1, 1);
 INSERT 0 1
 postgres=# select * from t;
- key | value
+ key | value 
 -----+-------
   1 | 1
 (1 row)
@@ -84,4 +84,4 @@ Access http://localhost:9001 and sign in.
 - Username: `minio`
 - Password: `password`

-You can see durable pages and WAL data in `neon` bucket.
+You can see durable pages and WAL data in `neon` bucket.
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -684,25 +684,6 @@ pub struct TimelineArchivalConfigRequest {
    pub state: TimelineArchivalState,
 }

-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct TimelinesInfoAndOffloaded {
-    pub timelines: Vec<TimelineInfo>,
-    pub offloaded: Vec<OffloadedTimelineInfo>,
-}
-
-/// Analog of [`TimelineInfo`] for offloaded timelines.
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct OffloadedTimelineInfo {
-    pub tenant_id: TenantShardId,
-    pub timeline_id: TimelineId,
-    /// Whether the timeline has a parent it has been branched off from or not
-    pub ancestor_timeline_id: Option<TimelineId>,
-    /// Whether to retain the branch lsn at the ancestor or not
-    pub ancestor_retain_lsn: Option<Lsn>,
-    /// The time point when the timeline was archived
-    pub archived_at: chrono::DateTime<chrono::Utc>,
-}
-
 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TimelineInfo {
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -184,6 +184,7 @@ pub const XLOG_NEON_HEAP_UPDATE: u8 = 0x20;
 pub const XLOG_NEON_HEAP_HOT_UPDATE: u8 = 0x30;
 pub const XLOG_NEON_HEAP_LOCK: u8 = 0x40;
 pub const XLOG_NEON_HEAP_MULTI_INSERT: u8 = 0x50;
+pub const XLOG_NEON_LFC_PREWARM: u8 = 0x60;

 pub const XLOG_NEON_HEAP_VISIBLE: u8 = 0x40;

--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -164,11 +164,7 @@ fn criterion_benchmark(c: &mut Criterion) {
    let conf: &'static PageServerConf = Box::leak(Box::new(
        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
    ));
-    virtual_file::init(
-        16384,
-        virtual_file::io_engine_for_bench(),
-        conf.virtual_file_io_mode,
-    );
+    virtual_file::init(16384, virtual_file::io_engine_for_bench());
    page_cache::init(conf.page_cache_size);

    {
--- a/pageserver/ctl/src/index_part.rs
+++ b/pageserver/ctl/src/index_part.rs
@@ -11,7 +11,7 @@ pub(crate) async fn main(cmd: &IndexPartCmd) -> anyhow::Result<()> {
    match cmd {
        IndexPartCmd::Dump { path } => {
            let bytes = tokio::fs::read(path).await.context("read file")?;
-            let des: IndexPart = IndexPart::from_json_bytes(&bytes).context("deserialize")?;
+            let des: IndexPart = IndexPart::from_s3_bytes(&bytes).context("deserialize")?;
            let output = serde_json::to_string_pretty(&des).context("serialize output")?;
            println!("{output}");
            Ok(())
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -7,7 +7,6 @@ use camino::{Utf8Path, Utf8PathBuf};
 use pageserver::context::{DownloadBehavior, RequestContext};
 use pageserver::task_mgr::TaskKind;
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
-use pageserver::virtual_file::api::IoMode;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::ops::Range;
@@ -153,11 +152,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
-    pageserver::virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        IoMode::preferred(),
-    );
+    pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    pageserver::page_cache::init(100);

    let mut total_delta_layers = 0usize;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -11,7 +11,6 @@ use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
 use pageserver::tenant::storage_layer::{delta_layer, image_layer};
 use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer};
 use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
-use pageserver::virtual_file::api::IoMode;
 use pageserver::{page_cache, virtual_file};
 use pageserver::{
    repository::{Key, KEY_SIZE},
@@ -60,11 +59,7 @@ pub(crate) enum LayerCmd {

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
-    virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        IoMode::preferred(),
-    );
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let file = VirtualFile::open(path, ctx).await?;
    let file_id = page_cache::next_file_id();
@@ -195,11 +190,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            new_tenant_id,
            new_timeline_id,
        } => {
-            pageserver::virtual_file::init(
-                10,
-                virtual_file::api::IoEngineKind::StdFs,
-                IoMode::preferred(),
-            );
+            pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
            pageserver::page_cache::init(100);

            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -24,7 +24,7 @@ use pageserver::{
    page_cache,
    task_mgr::TaskKind,
    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
-    virtual_file::{self, api::IoMode},
+    virtual_file,
 };
 use pageserver_api::shard::TenantShardId;
 use postgres_ffi::ControlFileData;
@@ -205,11 +205,7 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {

 async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        IoMode::preferred(),
-    );
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    dump_layerfile_from_path(path, true, &ctx).await
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -167,11 +167,7 @@ fn main() -> anyhow::Result<()> {
    let scenario = failpoint_support::init();

    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        conf.max_file_descriptors,
-        conf.virtual_file_io_engine,
-        conf.virtual_file_io_mode,
-    );
+    virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
    page_cache::init(conf.page_cache_size);

    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -26,7 +26,6 @@ use pageserver_api::models::LocationConfigListResponse;
 use pageserver_api::models::LocationConfigMode;
 use pageserver_api::models::LsnLease;
 use pageserver_api::models::LsnLeaseRequest;
-use pageserver_api::models::OffloadedTimelineInfo;
 use pageserver_api::models::ShardParameters;
 use pageserver_api::models::TenantDetails;
 use pageserver_api::models::TenantLocationConfigRequest;
@@ -38,7 +37,6 @@ use pageserver_api::models::TenantShardSplitRequest;
 use pageserver_api::models::TenantShardSplitResponse;
 use pageserver_api::models::TenantSorting;
 use pageserver_api::models::TimelineArchivalConfigRequest;
-use pageserver_api::models::TimelinesInfoAndOffloaded;
 use pageserver_api::models::TopTenantShardItem;
 use pageserver_api::models::TopTenantShardsRequest;
 use pageserver_api::models::TopTenantShardsResponse;
@@ -83,7 +81,6 @@ use crate::tenant::timeline::CompactFlags;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::GetTimelineError;
-use crate::tenant::OffloadedTimeline;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
 use crate::{disk_usage_eviction_task, tenant};
 use pageserver_api::models::{
@@ -480,24 +477,6 @@ async fn build_timeline_info_common(
    Ok(info)
 }

-fn build_timeline_offloaded_info(offloaded: &Arc<OffloadedTimeline>) -> OffloadedTimelineInfo {
-    let &OffloadedTimeline {
-        tenant_shard_id,
-        timeline_id,
-        ancestor_retain_lsn,
-        ancestor_timeline_id,
-        archived_at,
-        ..
-    } = offloaded.as_ref();
-    OffloadedTimelineInfo {
-        tenant_id: tenant_shard_id,
-        timeline_id,
-        ancestor_retain_lsn,
-        ancestor_timeline_id,
-        archived_at: archived_at.and_utc(),
-    }
-}
-
 // healthcheck handler
 async fn status_handler(
    request: Request<Body>,
@@ -664,7 +643,7 @@ async fn timeline_list_handler(
            )
            .instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
            .await
-            .context("Failed to build timeline info")
+            .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
            .map_err(ApiError::InternalServerError)?;

            response_data.push(timeline_info);
@@ -679,62 +658,6 @@ async fn timeline_list_handler(
    json_response(StatusCode::OK, response_data)
 }

-async fn timeline_and_offloaded_list_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    let include_non_incremental_logical_size: Option<bool> =
-        parse_query_param(&request, "include-non-incremental-logical-size")?;
-    let force_await_initial_logical_size: Option<bool> =
-        parse_query_param(&request, "force-await-initial-logical-size")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    let state = get_state(&request);
-    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-
-    let response_data = async {
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
-
-        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
-        let (timelines, offloadeds) = tenant.list_timelines_and_offloaded();
-
-        let mut timeline_infos = Vec::with_capacity(timelines.len());
-        for timeline in timelines {
-            let timeline_info = build_timeline_info(
-                &timeline,
-                include_non_incremental_logical_size.unwrap_or(false),
-                force_await_initial_logical_size.unwrap_or(false),
-                &ctx,
-            )
-            .instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
-            .await
-            .context("Failed to build timeline info")
-            .map_err(ApiError::InternalServerError)?;
-
-            timeline_infos.push(timeline_info);
-        }
-        let offloaded_infos = offloadeds
-            .into_iter()
-            .map(|offloaded| build_timeline_offloaded_info(&offloaded))
-            .collect::<Vec<_>>();
-        let res = TimelinesInfoAndOffloaded {
-            timelines: timeline_infos,
-            offloaded: offloaded_infos,
-        };
-        Ok::<TimelinesInfoAndOffloaded, ApiError>(res)
-    }
-    .instrument(info_span!("timeline_and_offloaded_list",
-                tenant_id = %tenant_shard_id.tenant_id,
-                shard_id = %tenant_shard_id.shard_slug()))
-    .await?;
-
-    json_response(StatusCode::OK, response_data)
-}
-
 async fn timeline_preserve_initdb_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
@@ -3070,9 +2993,6 @@ pub fn make_router(
        .get("/v1/tenant/:tenant_shard_id/timeline", |r| {
            api_handler(r, timeline_list_handler)
        })
-        .get("/v1/tenant/:tenant_shard_id/timeline_and_offloaded", |r| {
-            api_handler(r, timeline_and_offloaded_list_handler)
-        })
        .post("/v1/tenant/:tenant_shard_id/timeline", |r| {
            api_handler(r, timeline_create_handler)
        })
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2092,7 +2092,6 @@ pub(crate) struct WalIngestMetrics {
    pub(crate) records_received: IntCounter,
    pub(crate) records_committed: IntCounter,
    pub(crate) records_filtered: IntCounter,
-    pub(crate) gap_blocks_zeroed_on_rel_extend: IntCounter,
 }

 pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMetrics {
@@ -2116,11 +2115,6 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
        "Number of WAL records filtered out due to sharding"
    )
    .expect("failed to define a metric"),
-    gap_blocks_zeroed_on_rel_extend: register_int_counter!(
-        "pageserver_gap_blocks_zeroed_on_rel_extend",
-        "Total number of zero gap blocks written on relation extends"
-    )
-    .expect("failed to define a metric"),
 });

 pub(crate) static WAL_REDO_TIME: Lazy<Histogram> = Lazy::new(|| {
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -82,7 +82,6 @@ use once_cell::sync::OnceCell;
 use crate::{
    context::RequestContext,
    metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics},
-    virtual_file::{IoBufferMut, IoPageSlice},
 };

 static PAGE_CACHE: OnceCell<PageCache> = OnceCell::new();
@@ -145,7 +144,7 @@ struct SlotInner {
    key: Option<CacheKey>,
    // for `coalesce_readers_permit`
    permit: std::sync::Mutex<Weak<PinnedSlotsPermit>>,
-    buf: IoPageSlice<'static>,
+    buf: &'static mut [u8; PAGE_SZ],
 }

 impl Slot {
@@ -235,13 +234,13 @@ impl std::ops::Deref for PageReadGuard<'_> {
    type Target = [u8; PAGE_SZ];

    fn deref(&self) -> &Self::Target {
-        self.slot_guard.buf.deref()
+        self.slot_guard.buf
    }
 }

 impl AsRef<[u8; PAGE_SZ]> for PageReadGuard<'_> {
    fn as_ref(&self) -> &[u8; PAGE_SZ] {
-        self.slot_guard.buf.as_ref()
+        self.slot_guard.buf
    }
 }

@@ -267,7 +266,7 @@ enum PageWriteGuardState<'i> {
 impl std::ops::DerefMut for PageWriteGuard<'_> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        match &mut self.state {
-            PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref_mut(),
+            PageWriteGuardState::Invalid { inner, _permit } => inner.buf,
            PageWriteGuardState::Downgraded => unreachable!(),
        }
    }
@@ -278,7 +277,7 @@ impl std::ops::Deref for PageWriteGuard<'_> {

    fn deref(&self) -> &Self::Target {
        match &self.state {
-            PageWriteGuardState::Invalid { inner, _permit } => inner.buf.deref(),
+            PageWriteGuardState::Invalid { inner, _permit } => inner.buf,
            PageWriteGuardState::Downgraded => unreachable!(),
        }
    }
@@ -644,7 +643,7 @@ impl PageCache {
        // We could use Vec::leak here, but that potentially also leaks
        // uninitialized reserved capacity. With into_boxed_slice and Box::leak
        // this is avoided.
-        let page_buffer = IoBufferMut::with_capacity_zeroed(num_pages * PAGE_SZ).leak();
+        let page_buffer = Box::leak(vec![0u8; num_pages * PAGE_SZ].into_boxed_slice());

        let size_metrics = &crate::metrics::PAGE_CACHE_SIZE;
        size_metrics.max_bytes.set_page_sz(num_pages);
@@ -653,8 +652,7 @@ impl PageCache {
        let slots = page_buffer
            .chunks_exact_mut(PAGE_SZ)
            .map(|chunk| {
-                // SAFETY: Each chunk has `PAGE_SZ` (8192) bytes, greater than 512, still aligned.
-                let buf = unsafe { IoPageSlice::new_unchecked(chunk.try_into().unwrap()) };
+                let buf: &mut [u8; PAGE_SZ] = chunk.try_into().unwrap();

                Slot {
                    inner: tokio::sync::RwLock::new(SlotInner {
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -16,7 +16,6 @@ use anyhow::{bail, Context};
 use arc_swap::ArcSwap;
 use camino::Utf8Path;
 use camino::Utf8PathBuf;
-use chrono::NaiveDateTime;
 use enumset::EnumSet;
 use futures::stream::FuturesUnordered;
 use futures::StreamExt;
@@ -32,10 +31,6 @@ use pageserver_api::shard::TenantShardId;
 use remote_storage::DownloadError;
 use remote_storage::GenericRemoteStorage;
 use remote_storage::TimeoutOrCancel;
-use remote_timeline_client::manifest::{
-    OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION,
-};
-use remote_timeline_client::UploadQueueNotReadyError;
 use std::collections::BTreeMap;
 use std::fmt;
 use std::future::Future;
@@ -70,14 +65,13 @@ use self::config::TenantConf;
 use self::metadata::TimelineMetadata;
 use self::mgr::GetActiveTenantError;
 use self::mgr::GetTenantError;
-use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest};
+use self::remote_timeline_client::upload::upload_index_part;
 use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError};
 use self::timeline::uninit::TimelineCreateGuard;
 use self::timeline::uninit::TimelineExclusionError;
 use self::timeline::uninit::UninitializedTimeline;
 use self::timeline::EvictionTaskTenantState;
 use self::timeline::GcCutoffs;
-use self::timeline::TimelineDeleteProgress;
 use self::timeline::TimelineResources;
 use self::timeline::WaitLsnError;
 use crate::config::PageServerConf;
@@ -246,7 +240,6 @@ struct TimelinePreload {
 }

 pub(crate) struct TenantPreload {
-    tenant_manifest: TenantManifest,
    timelines: HashMap<TimelineId, TimelinePreload>,
 }

@@ -495,12 +488,6 @@ impl WalRedoManager {
    }
 }

-/// A very lightweight memory representation of an offloaded timeline.
-///
-/// We need to store the list of offloaded timelines so that we can perform operations on them,
-/// like unoffloading them, or (at a later date), decide to perform flattening.
-/// This type has a much smaller memory impact than [`Timeline`], and thus we can store many
-/// more offloaded timelines than we can manage ones that aren't.
 pub struct OffloadedTimeline {
    pub tenant_shard_id: TenantShardId,
    pub timeline_id: TimelineId,
@@ -508,78 +495,27 @@ pub struct OffloadedTimeline {
    /// Whether to retain the branch lsn at the ancestor or not
    pub ancestor_retain_lsn: Option<Lsn>,

-    /// When the timeline was archived.
-    ///
-    /// Present for future flattening deliberations.
-    pub archived_at: NaiveDateTime,
-
-    /// Lazily constructed remote client for the timeline
-    ///
-    /// If we offload a timeline, we keep around the remote client
-    /// for the duration of the process. If we find it through the
-    /// manifest, we don't construct it up until it's needed (deletion).
-    pub remote_client: Option<Arc<RemoteTimelineClient>>,
+    // TODO: once we persist offloaded state, make this lazily constructed
+    pub remote_client: Arc<RemoteTimelineClient>,

    /// Prevent two tasks from deleting the timeline at the same time. If held, the
    /// timeline is being deleted. If 'true', the timeline has already been deleted.
-    pub delete_progress: TimelineDeleteProgress,
+    pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
 }

 impl OffloadedTimeline {
-    /// Obtains an offloaded timeline from a given timeline object.
-    ///
-    /// Returns `None` if the `archived_at` flag couldn't be obtained, i.e.
-    /// the timeline is not in a stopped state.
-    /// Panics if the timeline is not archived.
-    fn from_timeline(timeline: &Timeline) -> Result<Self, UploadQueueNotReadyError> {
+    fn from_timeline(timeline: &Timeline) -> Self {
        let ancestor_retain_lsn = timeline
            .get_ancestor_timeline_id()
            .map(|_timeline_id| timeline.get_ancestor_lsn());
-        let archived_at = timeline
-            .remote_client
-            .archived_at_stopped_queue()?
-            .expect("must be called on an archived timeline");
-        Ok(Self {
+        Self {
            tenant_shard_id: timeline.tenant_shard_id,
            timeline_id: timeline.timeline_id,
            ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
            ancestor_retain_lsn,
-            archived_at,

-            remote_client: Some(timeline.remote_client.clone()),
+            remote_client: timeline.remote_client.clone(),
            delete_progress: timeline.delete_progress.clone(),
-        })
-    }
-    fn from_manifest(tenant_shard_id: TenantShardId, manifest: &OffloadedTimelineManifest) -> Self {
-        let OffloadedTimelineManifest {
-            timeline_id,
-            ancestor_timeline_id,
-            ancestor_retain_lsn,
-            archived_at,
-        } = *manifest;
-        Self {
-            tenant_shard_id,
-            timeline_id,
-            ancestor_timeline_id,
-            ancestor_retain_lsn,
-            archived_at,
-            remote_client: None,
-            delete_progress: TimelineDeleteProgress::default(),
-        }
-    }
-    fn manifest(&self) -> OffloadedTimelineManifest {
-        let Self {
-            timeline_id,
-            ancestor_timeline_id,
-            ancestor_retain_lsn,
-            archived_at,
-            ..
-        } = self;
-        OffloadedTimelineManifest {
-            timeline_id: *timeline_id,
-            ancestor_timeline_id: *ancestor_timeline_id,
-            ancestor_retain_lsn: *ancestor_retain_lsn,
-            archived_at: *archived_at,
        }
    }
 }
@@ -615,19 +551,10 @@ impl TimelineOrOffloaded {
            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
        }
    }
-    pub fn remote_client_maybe_construct(&self, tenant: &Tenant) -> Arc<RemoteTimelineClient> {
+    pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
        match self {
-            TimelineOrOffloaded::Timeline(timeline) => timeline.remote_client.clone(),
-            TimelineOrOffloaded::Offloaded(offloaded) => match offloaded.remote_client.clone() {
-                Some(remote_client) => remote_client,
-                None => {
-                    let remote_client = tenant.build_timeline_client(
-                        offloaded.timeline_id,
-                        tenant.remote_storage.clone(),
-                    );
-                    Arc::new(remote_client)
-                }
-            },
+            TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
+            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
        }
    }
 }
@@ -869,7 +796,7 @@ impl Tenant {
        &self,
        timeline_id: TimelineId,
        resources: TimelineResources,
-        index_part: IndexPart,
+        index_part: Option<IndexPart>,
        metadata: TimelineMetadata,
        ancestor: Option<Arc<Timeline>>,
        _ctx: &RequestContext,
@@ -894,7 +821,24 @@ impl Tenant {
            "these are used interchangeably"
        );

-        timeline.remote_client.init_upload_queue(&index_part)?;
+        if let Some(index_part) = index_part.as_ref() {
+            timeline.remote_client.init_upload_queue(index_part)?;
+        } else {
+            // No data on the remote storage, but we have local metadata file. We can end up
+            // here with timeline_create being interrupted before finishing index part upload.
+            // By doing what we do here, the index part upload is retried.
+            // If control plane retries timeline creation in the meantime, the mgmt API handler
+            // for timeline creation will coalesce on the upload we queue here.
+
+            // FIXME: this branch should be dead code as we no longer write local metadata.
+
+            timeline
+                .remote_client
+                .init_upload_queue_for_empty_remote(&metadata)?;
+            timeline
+                .remote_client
+                .schedule_index_upload_for_full_metadata_update(&metadata)?;
+        }

        timeline
            .load_layer_map(disk_consistent_lsn, index_part)
@@ -1187,35 +1131,14 @@ impl Tenant {
            cancel.clone(),
        )
        .await?;
-        let (offloaded_add, tenant_manifest) =
-            match remote_timeline_client::do_download_tenant_manifest(
-                remote_storage,
-                &self.tenant_shard_id,
-                &cancel,
-            )
-            .await
-            {
-                Ok((tenant_manifest, _generation)) => (
-                    format!("{} offloaded", tenant_manifest.offloaded_timelines.len()),
-                    tenant_manifest,
-                ),
-                Err(DownloadError::NotFound) => {
-                    ("no manifest".to_string(), TenantManifest::empty())
-                }
-                Err(e) => Err(e)?,
-            };

-        info!(
-            "found {} timelines, and {offloaded_add}",
-            remote_timeline_ids.len()
-        );
+        info!("found {} timelines", remote_timeline_ids.len(),);

        for k in other_keys {
            warn!("Unexpected non timeline key {k}");
        }

        Ok(TenantPreload {
-            tenant_manifest,
            timelines: self
                .load_timelines_metadata(remote_timeline_ids, remote_storage, cancel)
                .await?,
@@ -1240,26 +1163,12 @@ impl Tenant {
            anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624");
        };

-        let mut offloaded_timeline_ids = HashSet::new();
-        let mut offloaded_timelines_list = Vec::new();
-        for timeline_manifest in preload.tenant_manifest.offloaded_timelines.iter() {
-            let timeline_id = timeline_manifest.timeline_id;
-            let offloaded_timeline =
-                OffloadedTimeline::from_manifest(self.tenant_shard_id, timeline_manifest);
-            offloaded_timelines_list.push((timeline_id, Arc::new(offloaded_timeline)));
-            offloaded_timeline_ids.insert(timeline_id);
-        }
-
        let mut timelines_to_resume_deletions = vec![];

        let mut remote_index_and_client = HashMap::new();
        let mut timeline_ancestors = HashMap::new();
        let mut existent_timelines = HashSet::new();
        for (timeline_id, preload) in preload.timelines {
-            if offloaded_timeline_ids.remove(&timeline_id) {
-                // The timeline is offloaded, skip loading it.
-                continue;
-            }
            let index_part = match preload.index_part {
                Ok(i) => {
                    debug!("remote index part exists for timeline {timeline_id}");
@@ -1363,43 +1272,6 @@ impl Tenant {
            .context("resume_deletion")
            .map_err(LoadLocalTimelineError::ResumeDeletion)?;
        }
-        // Complete deletions for offloaded timeline id's.
-        offloaded_timelines_list
-            .retain(|(offloaded_id, _offloaded)| {
-                // At this point, offloaded_timeline_ids has the list of all offloaded timelines
-                // without a prefix in S3, so they are inexistent.
-                // In the end, existence of a timeline is finally determined by the existence of an index-part.json in remote storage.
-                // If there is a dangling reference in another location, they need to be cleaned up.
-                let delete = offloaded_timeline_ids.contains(offloaded_id);
-                if delete {
-                    tracing::info!("Removing offloaded timeline {offloaded_id} from manifest as no remote prefix was found");
-                }
-                !delete
-        });
-        if !offloaded_timelines_list.is_empty() {
-            tracing::info!(
-                "Tenant has {} offloaded timelines",
-                offloaded_timelines_list.len()
-            );
-        }
-        {
-            let mut offloaded_timelines_accessor = self.timelines_offloaded.lock().unwrap();
-            offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
-        }
-        if !offloaded_timeline_ids.is_empty() {
-            let manifest = self.tenant_manifest();
-            // TODO: generation support
-            let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-            upload_tenant_manifest(
-                &self.remote_storage,
-                &self.tenant_shard_id,
-                generation,
-                &manifest,
-                &self.cancel,
-            )
-            .await
-            .map_err(TimelineArchivalError::Other)?;
-        }

        // The local filesystem contents are a cache of what's in the remote IndexPart;
        // IndexPart is the source of truth.
@@ -1524,7 +1396,7 @@ impl Tenant {
        self.timeline_init_and_sync(
            timeline_id,
            resources,
-            index_part,
+            Some(index_part),
            remote_metadata,
            ancestor,
            ctx,
@@ -1571,28 +1443,20 @@ impl Tenant {
        Ok(timeline_preloads)
    }

-    fn build_timeline_client(
-        &self,
-        timeline_id: TimelineId,
-        remote_storage: GenericRemoteStorage,
-    ) -> RemoteTimelineClient {
-        RemoteTimelineClient::new(
-            remote_storage.clone(),
-            self.deletion_queue_client.clone(),
-            self.conf,
-            self.tenant_shard_id,
-            timeline_id,
-            self.generation,
-        )
-    }
-
    fn load_timeline_metadata(
        self: &Arc<Tenant>,
        timeline_id: TimelineId,
        remote_storage: GenericRemoteStorage,
        cancel: CancellationToken,
    ) -> impl Future<Output = TimelinePreload> {
-        let client = self.build_timeline_client(timeline_id, remote_storage);
+        let client = RemoteTimelineClient::new(
+            remote_storage.clone(),
+            self.deletion_queue_client.clone(),
+            self.conf,
+            self.tenant_shard_id,
+            timeline_id,
+            self.generation,
+        );
        async move {
            debug_assert_current_span_has_tenant_and_timeline_id();
            debug!("starting index part download");
@@ -1683,7 +1547,7 @@ impl Tenant {
        info!("unoffloading timeline");
        let cancel = self.cancel.clone();
        let timeline_preload = self
-            .load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel.clone())
+            .load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
            .await;

        let index_part = match timeline_preload.index_part {
@@ -1728,37 +1592,17 @@ impl Tenant {
            )
        })
        .map_err(TimelineArchivalError::Other)?;
-
-        let timeline = {
-            let timelines = self.timelines.lock().unwrap();
-            let Some(timeline) = timelines.get(&timeline_id) else {
-                warn!("timeline not available directly after attach");
-                // This is not a panic because no locks are held between `load_remote_timeline`
-                // which puts the timeline into timelines, and our look into the timeline map.
-                return Err(TimelineArchivalError::Other(anyhow::anyhow!(
-                    "timeline not available directly after attach"
-                )));
-            };
-            let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
-            if offloaded_timelines.remove(&timeline_id).is_none() {
-                warn!("timeline already removed from offloaded timelines");
-            }
-            Arc::clone(timeline)
+        let timelines = self.timelines.lock().unwrap();
+        let Some(timeline) = timelines.get(&timeline_id) else {
+            warn!("timeline not available directly after attach");
+            return Err(TimelineArchivalError::Other(anyhow::anyhow!(
+                "timeline not available directly after attach"
+            )));
        };
-
-        // Upload new list of offloaded timelines to S3
-        let manifest = self.tenant_manifest();
-        // TODO: generation support
-        let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-        upload_tenant_manifest(
-            &self.remote_storage,
-            &self.tenant_shard_id,
-            generation,
-            &manifest,
-            &cancel,
-        )
-        .await
-        .map_err(TimelineArchivalError::Other)?;
+        let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
+        if offloaded_timelines.remove(&timeline_id).is_none() {
+            warn!("timeline already removed from offloaded timelines");
+        }

        // Activate the timeline (if it makes sense)
        if !(timeline.is_broken() || timeline.is_stopping()) {
@@ -1772,7 +1616,7 @@ impl Tenant {
        }

        info!("timeline unoffloading complete");
-        Ok(timeline)
+        Ok(Arc::clone(timeline))
    }

    pub(crate) async fn apply_timeline_archival_config(
@@ -1911,7 +1755,7 @@ impl Tenant {
    }

    /// Lists timelines the tenant contains.
-    /// It's up to callers to omit certain timelines that are not considered ready for use.
+    /// Up to tenant's implementation to omit certain timelines that ar not considered ready for use.
    pub fn list_timelines(&self) -> Vec<Arc<Timeline>> {
        self.timelines
            .lock()
@@ -1921,29 +1765,6 @@ impl Tenant {
            .collect()
    }

-    /// Lists timelines the tenant manages, including offloaded ones.
-    ///
-    /// It's up to callers to omit certain timelines that are not considered ready for use.
-    pub fn list_timelines_and_offloaded(
-        &self,
-    ) -> (Vec<Arc<Timeline>>, Vec<Arc<OffloadedTimeline>>) {
-        let timelines = self
-            .timelines
-            .lock()
-            .unwrap()
-            .values()
-            .map(Arc::clone)
-            .collect();
-        let offloaded = self
-            .timelines_offloaded
-            .lock()
-            .unwrap()
-            .values()
-            .map(Arc::clone)
-            .collect();
-        (timelines, offloaded)
-    }
-
    pub fn list_timeline_ids(&self) -> Vec<TimelineId> {
        self.timelines.lock().unwrap().keys().cloned().collect()
    }
@@ -2949,26 +2770,6 @@ impl Tenant {
            }
        }

-        // TODO: also copy index files of offloaded timelines
-
-        let tenant_manifest = self.tenant_manifest();
-        // TODO: generation support
-        let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-        for child_shard in child_shards {
-            tracing::info!(
-                "Uploading tenant manifest for child {}",
-                child_shard.to_index()
-            );
-            upload_tenant_manifest(
-                &self.remote_storage,
-                child_shard,
-                generation,
-                &tenant_manifest,
-                &self.cancel,
-            )
-            .await?;
-        }
-
        Ok(())
    }

@@ -3146,22 +2947,6 @@ impl Tenant {
            .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
    }

-    pub(crate) fn tenant_manifest(&self) -> TenantManifest {
-        let timelines_offloaded = self.timelines_offloaded.lock().unwrap();
-
-        let mut timeline_manifests = timelines_offloaded
-            .iter()
-            .map(|(_timeline_id, offloaded)| offloaded.manifest())
-            .collect::<Vec<_>>();
-        // Sort the manifests so that our output is deterministic
-        timeline_manifests.sort_by_key(|timeline_manifest| timeline_manifest.timeline_id);
-
-        TenantManifest {
-            version: LATEST_TENANT_MANIFEST_VERSION,
-            offloaded_timelines: timeline_manifests,
-        }
-    }
-
    pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
        // Use read-copy-update in order to avoid overwriting the location config
        // state if this races with [`Tenant::set_new_location_config`]. Note that
@@ -4154,21 +3939,18 @@ impl Tenant {
        Ok(timeline)
    }

-    fn build_timeline_remote_client(&self, timeline_id: TimelineId) -> RemoteTimelineClient {
-        RemoteTimelineClient::new(
+    /// Call this before constructing a timeline, to build its required structures
+    fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
+        let remote_client = RemoteTimelineClient::new(
            self.remote_storage.clone(),
            self.deletion_queue_client.clone(),
            self.conf,
            self.tenant_shard_id,
            timeline_id,
            self.generation,
-        )
-    }
-
-    /// Call this before constructing a timeline, to build its required structures
-    fn build_timeline_resources(&self, timeline_id: TimelineId) -> TimelineResources {
+        );
        TimelineResources {
-            remote_client: self.build_timeline_remote_client(timeline_id),
+            remote_client,
            timeline_get_throttle: self.timeline_get_throttle.clone(),
            l0_flush_global_state: self.l0_flush_global_state.clone(),
        }
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -5,8 +5,6 @@
 use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
 use crate::context::RequestContext;
 use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
-#[cfg(test)]
-use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::VirtualFile;
 use bytes::Bytes;
 use std::ops::Deref;
@@ -42,7 +40,7 @@ pub enum BlockLease<'a> {
    #[cfg(test)]
    Arc(std::sync::Arc<[u8; PAGE_SZ]>),
    #[cfg(test)]
-    IoBufferMut(IoBufferMut),
+    Vec(Vec<u8>),
 }

 impl From<PageReadGuard<'static>> for BlockLease<'static> {
@@ -69,7 +67,7 @@ impl Deref for BlockLease<'_> {
            #[cfg(test)]
            BlockLease::Arc(v) => v.deref(),
            #[cfg(test)]
-            BlockLease::IoBufferMut(v) => {
+            BlockLease::Vec(v) => {
                TryFrom::try_from(&v[..]).expect("caller must ensure that v has PAGE_SZ")
            }
        }
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -6,11 +6,10 @@ use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::page_cache;
 use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File;
-use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
 use crate::virtual_file::owned_buffers_io::slice::SliceMutExt;
 use crate::virtual_file::owned_buffers_io::util::size_tracking_writer;
 use crate::virtual_file::owned_buffers_io::write::Buffer;
-use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile};
+use crate::virtual_file::{self, owned_buffers_io, VirtualFile};
 use bytes::BytesMut;
 use camino::Utf8PathBuf;
 use num_traits::Num;
@@ -108,18 +107,15 @@ impl EphemeralFile {
        self.page_cache_file_id
    }

-    pub(crate) async fn load_to_io_buf(
-        &self,
-        ctx: &RequestContext,
-    ) -> Result<IoBufferMut, io::Error> {
+    pub(crate) async fn load_to_vec(&self, ctx: &RequestContext) -> Result<Vec<u8>, io::Error> {
        let size = self.len().into_usize();
-        let buf = IoBufferMut::with_capacity(size);
-        let (slice, nread) = self.read_exact_at_eof_ok(0, buf.slice_full(), ctx).await?;
+        let vec = Vec::with_capacity(size);
+        let (slice, nread) = self.read_exact_at_eof_ok(0, vec.slice_full(), ctx).await?;
        assert_eq!(nread, size);
-        let buf = slice.into_inner();
-        assert_eq!(buf.len(), nread);
-        assert_eq!(buf.capacity(), size, "we shouldn't be reallocating");
-        Ok(buf)
+        let vec = slice.into_inner();
+        assert_eq!(vec.len(), nread);
+        assert_eq!(vec.capacity(), size, "we shouldn't be reallocating");
+        Ok(vec)
    }

    /// Returns the offset at which the first byte of the input was written, for use
@@ -162,7 +158,7 @@ impl EphemeralFile {
 }

 impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
-    async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+    async fn read_exact_at_eof_ok<'a, 'b, B: tokio_epoll_uring::IoBufMut + Send>(
        &'b self,
        start: u64,
        dst: tokio_epoll_uring::Slice<B>,
@@ -349,7 +345,7 @@ mod tests {
        assert!(file.len() as usize == write_nbytes);
        for i in 0..write_nbytes {
            assert_eq!(value_offsets[i], i.into_u64());
-            let buf = IoBufferMut::with_capacity(1);
+            let buf = Vec::with_capacity(1);
            let (buf_slice, nread) = file
                .read_exact_at_eof_ok(i.into_u64(), buf.slice_full(), &ctx)
                .await
@@ -389,7 +385,7 @@ mod tests {

        // assert the state is as this test expects it to be
        assert_eq!(
-            &file.load_to_io_buf(&ctx).await.unwrap(),
+            &file.load_to_vec(&ctx).await.unwrap(),
            &content[0..cap + cap / 2]
        );
        let md = file
@@ -444,7 +440,7 @@ mod tests {
                let (buf, nread) = file
                    .read_exact_at_eof_ok(
                        start.into_u64(),
-                        IoBufferMut::with_capacity(len).slice_full(),
+                        Vec::with_capacity(len).slice_full(),
                        ctx,
                    )
                    .await
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -180,7 +180,6 @@

 pub(crate) mod download;
 pub mod index;
-pub mod manifest;
 pub(crate) mod upload;

 use anyhow::Context;
@@ -192,6 +191,7 @@ use pageserver_api::models::TimelineArchivalState;
 use pageserver_api::shard::{ShardIndex, TenantShardId};
 use scopeguard::ScopeGuard;
 use tokio_util::sync::CancellationToken;
+pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
    self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
@@ -245,11 +245,9 @@ use super::upload_queue::{NotInitialized, SetDeletedFlagProgress};
 use super::Generation;

 pub(crate) use download::{
-    do_download_tenant_manifest, download_index_part, is_temp_download_file,
-    list_remote_tenant_shards, list_remote_timelines,
+    download_index_part, is_temp_download_file, list_remote_tenant_shards, list_remote_timelines,
 };
 pub(crate) use index::LayerFileMetadata;
-pub(crate) use upload::{upload_initdb_dir, upload_tenant_manifest};

 // Occasional network issues and such can cause remote operations to fail, and
 // that's expected. If a download fails, we log it at info-level, and retry.
@@ -274,12 +272,6 @@ pub(crate) const BUFFER_SIZE: usize = 32 * 1024;
 /// which we warn and skip.
 const DELETION_QUEUE_FLUSH_TIMEOUT: Duration = Duration::from_secs(10);

-/// Hardcode a generation for the tenant manifest for now so that we don't
-/// need to deal with generation-less manifests in the future.
-///
-/// TODO: add proper generation support to all the places that use this.
-pub(crate) const TENANT_MANIFEST_GENERATION: Generation = Generation::new(1);
-
 pub enum MaybeDeletedIndexPart {
    IndexPart(IndexPart),
    Deleted(IndexPart),
@@ -303,10 +295,6 @@ pub enum WaitCompletionError {
    UploadQueueShutDownOrStopped,
 }

-#[derive(Debug, thiserror::Error)]
-#[error("Upload queue either in unexpected state or hasn't downloaded manifest yet")]
-pub struct UploadQueueNotReadyError;
-
 /// A client for accessing a timeline's data in remote storage.
 ///
 /// This takes care of managing the number of connections, and balancing them
@@ -480,20 +468,6 @@ impl RemoteTimelineClient {
            .ok()
    }

-    /// Returns `Ok(Some(timestamp))` if the timeline has been archived, `Ok(None)` if the timeline hasn't been archived.
-    ///
-    /// Return Err(_) if the remote index_part hasn't been downloaded yet, or the timeline hasn't been stopped yet.
-    pub(crate) fn archived_at_stopped_queue(
-        &self,
-    ) -> Result<Option<NaiveDateTime>, UploadQueueNotReadyError> {
-        self.upload_queue
-            .lock()
-            .unwrap()
-            .stopped_mut()
-            .map(|q| q.upload_queue_for_deletion.clean.0.archived_at)
-            .map_err(|_| UploadQueueNotReadyError)
-    }
-
    fn update_remote_physical_size_gauge(&self, current_remote_index_part: Option<&IndexPart>) {
        let size: u64 = if let Some(current_remote_index_part) = current_remote_index_part {
            current_remote_index_part
@@ -2224,17 +2198,6 @@ pub fn remote_tenant_path(tenant_shard_id: &TenantShardId) -> RemotePath {
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-pub fn remote_tenant_manifest_path(
-    tenant_shard_id: &TenantShardId,
-    generation: Generation,
-) -> RemotePath {
-    let path = format!(
-        "tenants/{tenant_shard_id}/tenant-manifest{}.json",
-        generation.get_suffix()
-    );
-    RemotePath::from_string(&path).expect("Failed to construct path")
-}
-
 pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
    let path = format!("tenants/{tenant_shard_id}/{TIMELINES_SEGMENT_NAME}");
    RemotePath::from_string(&path).expect("Failed to construct path")
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -34,11 +34,10 @@ use utils::id::{TenantId, TimelineId};
 use utils::pausable_failpoint;

 use super::index::{IndexPart, LayerFileMetadata};
-use super::manifest::TenantManifest;
 use super::{
    parse_remote_index_path, remote_index_path, remote_initdb_archive_path,
-    remote_initdb_preserved_archive_path, remote_tenant_manifest_path, remote_tenant_path,
-    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
+    remote_initdb_preserved_archive_path, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD,
+    FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
 };

 ///
@@ -339,15 +338,19 @@ pub async fn list_remote_timelines(
    list_identifiers::<TimelineId>(storage, remote_path, cancel).await
 }

-async fn do_download_remote_path_retry_forever(
+async fn do_download_index_part(
    storage: &GenericRemoteStorage,
-    remote_path: &RemotePath,
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+    index_generation: Generation,
    cancel: &CancellationToken,
-) -> Result<(Vec<u8>, SystemTime), DownloadError> {
-    download_retry_forever(
+) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
+    let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
+
+    let (index_part_bytes, index_part_mtime) = download_retry_forever(
        || async {
            let download = storage
-                .download(remote_path, &DownloadOpts::default(), cancel)
+                .download(&remote_path, &DownloadOpts::default(), cancel)
                .await?;

            let mut bytes = Vec::new();
@@ -362,39 +365,7 @@ async fn do_download_remote_path_retry_forever(
        &format!("download {remote_path:?}"),
        cancel,
    )
-    .await
-}
-
-pub async fn do_download_tenant_manifest(
-    storage: &GenericRemoteStorage,
-    tenant_shard_id: &TenantShardId,
-    cancel: &CancellationToken,
-) -> Result<(TenantManifest, Generation), DownloadError> {
-    // TODO: generation support
-    let generation = super::TENANT_MANIFEST_GENERATION;
-    let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);
-
-    let (manifest_bytes, _manifest_bytes_mtime) =
-        do_download_remote_path_retry_forever(storage, &remote_path, cancel).await?;
-
-    let tenant_manifest = TenantManifest::from_json_bytes(&manifest_bytes)
-        .with_context(|| format!("deserialize tenant manifest file at {remote_path:?}"))
-        .map_err(DownloadError::Other)?;
-
-    Ok((tenant_manifest, generation))
-}
-
-async fn do_download_index_part(
-    storage: &GenericRemoteStorage,
-    tenant_shard_id: &TenantShardId,
-    timeline_id: &TimelineId,
-    index_generation: Generation,
-    cancel: &CancellationToken,
-) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
-    let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
-
-    let (index_part_bytes, index_part_mtime) =
-        do_download_remote_path_retry_forever(storage, &remote_path, cancel).await?;
+    .await?;

    let index_part: IndexPart = serde_json::from_slice(&index_part_bytes)
        .with_context(|| format!("deserialize index part file at {remote_path:?}"))
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -121,11 +121,11 @@ impl IndexPart {
        self.disk_consistent_lsn
    }

-    pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
+    pub fn from_s3_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
        serde_json::from_slice::<IndexPart>(bytes)
    }

-    pub fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {
+    pub fn to_s3_bytes(&self) -> serde_json::Result<Vec<u8>> {
        serde_json::to_vec(self)
    }

@@ -383,7 +383,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -427,7 +427,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -472,7 +472,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -520,7 +520,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap();
+        let empty_layers_parsed = IndexPart::from_s3_bytes(empty_layers_json.as_bytes()).unwrap();

        assert_eq!(empty_layers_parsed, expected);
    }
@@ -563,7 +563,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -609,7 +609,7 @@ mod tests {
            last_aux_file_policy: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -660,7 +660,7 @@ mod tests {
            last_aux_file_policy: Some(AuxFilePolicy::V2),
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -716,7 +716,7 @@ mod tests {
            last_aux_file_policy: Default::default(),
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -773,7 +773,7 @@ mod tests {
            last_aux_file_policy: Default::default(),
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

@@ -835,7 +835,7 @@ mod tests {
            archived_at: None,
        };

-        let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap();
+        let part = IndexPart::from_s3_bytes(example.as_bytes()).unwrap();
        assert_eq!(part, expected);
    }

--- a/pageserver/src/tenant/remote_timeline_client/manifest.rs
+++ b/pageserver/src/tenant/remote_timeline_client/manifest.rs
@@ -1,53 +0,0 @@
-use chrono::NaiveDateTime;
-use serde::{Deserialize, Serialize};
-use utils::{id::TimelineId, lsn::Lsn};
-
-/// Tenant-shard scoped manifest
-#[derive(Clone, Serialize, Deserialize)]
-pub struct TenantManifest {
-    /// Debugging aid describing the version of this manifest.
-    /// Can also be used for distinguishing breaking changes later on.
-    pub version: usize,
-
-    /// The list of offloaded timelines together with enough information
-    /// to not have to actually load them.
-    ///
-    /// Note: the timelines mentioned in this list might be deleted, i.e.
-    /// we don't hold an invariant that the references aren't dangling.
-    /// Existence of index-part.json is the actual indicator of timeline existence.
-    pub offloaded_timelines: Vec<OffloadedTimelineManifest>,
-}
-
-/// The remote level representation of an offloaded timeline.
-///
-/// Very similar to [`pageserver_api::models::OffloadedTimelineInfo`],
-/// but the two datastructures serve different needs, this is for a persistent disk format
-/// that must be backwards compatible, while the other is only for informative purposes.
-#[derive(Clone, Serialize, Deserialize, Copy)]
-pub struct OffloadedTimelineManifest {
-    pub timeline_id: TimelineId,
-    /// Whether the timeline has a parent it has been branched off from or not
-    pub ancestor_timeline_id: Option<TimelineId>,
-    /// Whether to retain the branch lsn at the ancestor or not
-    pub ancestor_retain_lsn: Option<Lsn>,
-    /// The time point when the timeline was archived
-    pub archived_at: NaiveDateTime,
-}
-
-pub const LATEST_TENANT_MANIFEST_VERSION: usize = 1;
-
-impl TenantManifest {
-    pub(crate) fn empty() -> Self {
-        Self {
-            version: LATEST_TENANT_MANIFEST_VERSION,
-            offloaded_timelines: vec![],
-        }
-    }
-    pub(crate) fn from_json_bytes(bytes: &[u8]) -> Result<Self, serde_json::Error> {
-        serde_json::from_slice::<Self>(bytes)
-    }
-
-    pub(crate) fn to_json_bytes(&self) -> serde_json::Result<Vec<u8>> {
-        serde_json::to_vec(self)
-    }
-}
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -13,11 +13,9 @@ use tokio_util::sync::CancellationToken;
 use utils::{backoff, pausable_failpoint};

 use super::index::IndexPart;
-use super::manifest::TenantManifest;
 use super::Generation;
 use crate::tenant::remote_timeline_client::{
    remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path,
-    remote_tenant_manifest_path,
 };
 use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
 use utils::id::{TenantId, TimelineId};
@@ -41,7 +39,7 @@ pub(crate) async fn upload_index_part<'a>(
    pausable_failpoint!("before-upload-index-pausable");

    // FIXME: this error comes too late
-    let serialized = index_part.to_json_bytes()?;
+    let serialized = index_part.to_s3_bytes()?;
    let serialized = Bytes::from(serialized);

    let index_part_size = serialized.len();
@@ -57,37 +55,6 @@ pub(crate) async fn upload_index_part<'a>(
        .await
        .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
 }
-/// Serializes and uploads the given tenant manifest data to the remote storage.
-pub(crate) async fn upload_tenant_manifest(
-    storage: &GenericRemoteStorage,
-    tenant_shard_id: &TenantShardId,
-    generation: Generation,
-    tenant_manifest: &TenantManifest,
-    cancel: &CancellationToken,
-) -> anyhow::Result<()> {
-    tracing::trace!("uploading new tenant manifest");
-
-    fail_point!("before-upload-manifest", |_| {
-        bail!("failpoint before-upload-manifest")
-    });
-    pausable_failpoint!("before-upload-manifest-pausable");
-
-    let serialized = tenant_manifest.to_json_bytes()?;
-    let serialized = Bytes::from(serialized);
-
-    let tenant_manifest_site = serialized.len();
-
-    let remote_path = remote_tenant_manifest_path(tenant_shard_id, generation);
-    storage
-        .upload_storage_object(
-            futures::stream::once(futures::future::ready(Ok(serialized))),
-            tenant_manifest_site,
-            &remote_path,
-            cancel,
-        )
-        .await
-        .with_context(|| format!("upload tenant manifest for '{tenant_shard_id}'"))
-}

 /// Attempts to upload given layer files.
 /// No extra checks for overlapping files is made and any files that are already present remotely will be overwritten, if submitted during the upload.
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -44,11 +44,11 @@ use crate::tenant::vectored_blob_io::{
 };
 use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
-use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{anyhow, bail, ensure, Context, Result};
+use bytes::BytesMut;
 use camino::{Utf8Path, Utf8PathBuf};
 use futures::StreamExt;
 use itertools::Itertools;
@@ -515,8 +515,8 @@ impl DeltaLayerWriterInner {
    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
        let temp_path = self.path.clone();
        let result = self.finish0(key_end, ctx).await;
-        if let Err(ref e) = result {
-            tracing::info!(%temp_path, "cleaning up temporary file after error during writing: {e}");
+        if result.is_err() {
+            tracing::info!(%temp_path, "cleaning up temporary file after error during writing");
            if let Err(e) = std::fs::remove_file(&temp_path) {
                tracing::warn!(error=%e, %temp_path, "error cleaning up temporary layer file after error during writing");
            }
@@ -1002,7 +1002,7 @@ impl DeltaLayerInner {
            .0
            .into();
        let buf_size = Self::get_min_read_buffer_size(&reads, max_vectored_read_bytes);
-        let mut buf = Some(IoBufferMut::with_capacity(buf_size));
+        let mut buf = Some(BytesMut::with_capacity(buf_size));

        // Note that reads are processed in reverse order (from highest key+lsn).
        // This is the order that `ReconstructState` requires such that it can
@@ -1029,7 +1029,7 @@ impl DeltaLayerInner {

                    // We have "lost" the buffer since the lower level IO api
                    // doesn't return the buffer on error. Allocate a new one.
-                    buf = Some(IoBufferMut::with_capacity(buf_size));
+                    buf = Some(BytesMut::with_capacity(buf_size));

                    continue;
                }
@@ -1203,7 +1203,7 @@ impl DeltaLayerInner {
            .map(|x| x.0.get())
            .unwrap_or(8192);

-        let mut buffer = Some(IoBufferMut::with_capacity(max_read_size));
+        let mut buffer = Some(BytesMut::with_capacity(max_read_size));

        // FIXME: buffering of DeltaLayerWriter
        let mut per_blob_copy = Vec::new();
@@ -1561,11 +1561,12 @@ impl<'a> DeltaLayerIterator<'a> {
        let vectored_blob_reader = VectoredBlobReader::new(&self.delta_layer.file);
        let mut next_batch = std::collections::VecDeque::new();
        let buf_size = plan.size();
-        let buf = IoBufferMut::with_capacity(buf_size);
+        let buf = BytesMut::with_capacity(buf_size);
        let blobs_buf = vectored_blob_reader
            .read_blobs(&plan, buf, self.ctx)
            .await?;
-        let view = BufView::new_slice(&blobs_buf.buf);
+        let frozen_buf = blobs_buf.buf.freeze();
+        let view = BufView::new_bytes(frozen_buf);
        for meta in blobs_buf.blobs.iter() {
            let blob_read = meta.read(&view).await?;
            let value = Value::des(&blob_read)?;
@@ -1940,7 +1941,7 @@ pub(crate) mod test {
                &vectored_reads,
                constants::MAX_VECTORED_READ_BYTES,
            );
-            let mut buf = Some(IoBufferMut::with_capacity(buf_size));
+            let mut buf = Some(BytesMut::with_capacity(buf_size));

            for read in vectored_reads {
                let blobs_buf = vectored_blob_reader
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -41,11 +41,10 @@ use crate::tenant::vectored_blob_io::{
 };
 use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
-use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
 use anyhow::{anyhow, bail, ensure, Context, Result};
-use bytes::Bytes;
+use bytes::{Bytes, BytesMut};
 use camino::{Utf8Path, Utf8PathBuf};
 use hex;
 use itertools::Itertools;
@@ -548,10 +547,10 @@ impl ImageLayerInner {
        for read in plan.into_iter() {
            let buf_size = read.size();

-            let buf = IoBufferMut::with_capacity(buf_size);
+            let buf = BytesMut::with_capacity(buf_size);
            let blobs_buf = vectored_blob_reader.read_blobs(&read, buf, ctx).await?;
-
-            let view = BufView::new_slice(&blobs_buf.buf);
+            let frozen_buf = blobs_buf.buf.freeze();
+            let view = BufView::new_bytes(frozen_buf);

            for meta in blobs_buf.blobs.iter() {
                let img_buf = meta.read(&view).await?;
@@ -610,12 +609,13 @@ impl ImageLayerInner {
                }
            }

-            let buf = IoBufferMut::with_capacity(buf_size);
+            let buf = BytesMut::with_capacity(buf_size);
            let res = vectored_blob_reader.read_blobs(&read, buf, ctx).await;

            match res {
                Ok(blobs_buf) => {
-                    let view = BufView::new_slice(&blobs_buf.buf);
+                    let frozen_buf = blobs_buf.buf.freeze();
+                    let view = BufView::new_bytes(frozen_buf);
                    for meta in blobs_buf.blobs.iter() {
                        let img_buf = meta.read(&view).await;

@@ -827,25 +827,6 @@ impl ImageLayerWriterInner {
        self,
        ctx: &RequestContext,
        end_key: Option<Key>,
-    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
-        let temp_path = self.path.clone();
-        let result = self.finish0(ctx, end_key).await;
-        if let Err(ref e) = result {
-            tracing::info!(%temp_path, "cleaning up temporary file after error during writing: {e}");
-            if let Err(e) = std::fs::remove_file(&temp_path) {
-                tracing::warn!(error=%e, %temp_path, "error cleaning up temporary layer file after error during writing");
-            }
-        }
-        result
-    }
-
-    ///
-    /// Finish writing the image layer.
-    ///
-    async fn finish0(
-        self,
-        ctx: &RequestContext,
-        end_key: Option<Key>,
    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
        let index_start_blk = self.blob_writer.size().div_ceil(PAGE_SZ as u64) as u32;

@@ -1069,11 +1050,12 @@ impl<'a> ImageLayerIterator<'a> {
        let vectored_blob_reader = VectoredBlobReader::new(&self.image_layer.file);
        let mut next_batch = std::collections::VecDeque::new();
        let buf_size = plan.size();
-        let buf = IoBufferMut::with_capacity(buf_size);
+        let buf = BytesMut::with_capacity(buf_size);
        let blobs_buf = vectored_blob_reader
            .read_blobs(&plan, buf, self.ctx)
            .await?;
-        let view = BufView::new_slice(&blobs_buf.buf);
+        let frozen_buf = blobs_buf.buf.freeze();
+        let view = BufView::new_bytes(frozen_buf);
        for meta in blobs_buf.blobs.iter() {
            let img_buf = meta.read(&view).await?;
            next_batch.push_back((
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -14,6 +14,7 @@ use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::{l0_flush, page_cache};
 use anyhow::{anyhow, Context, Result};
+use bytes::Bytes;
 use camino::Utf8PathBuf;
 use pageserver_api::key::CompactKey;
 use pageserver_api::keyspace::KeySpace;
@@ -808,8 +809,9 @@ impl InMemoryLayer {

        match l0_flush_global_state {
            l0_flush::Inner::Direct { .. } => {
-                let file_contents = inner.file.load_to_io_buf(ctx).await?;
-                let file_contents = file_contents.freeze();
+                let file_contents: Vec<u8> = inner.file.load_to_vec(ctx).await?;
+
+                let file_contents = Bytes::from(file_contents);

                for (key, vec_map) in inner.index.iter() {
                    // Write all page versions
@@ -823,7 +825,7 @@ impl InMemoryLayer {
                            len,
                            will_init,
                        } = entry;
-                        let buf = file_contents.slice(pos as usize..(pos + len) as usize);
+                        let buf = Bytes::slice(&file_contents, pos as usize..(pos + len) as usize);
                        let (_buf, res) = delta_layer_writer
                            .put_value_bytes(
                                Key::from_compact(*key),
--- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
@@ -9,7 +9,6 @@ use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice};
 use crate::{
    assert_u64_eq_usize::{U64IsUsize, UsizeIsU64},
    context::RequestContext,
-    virtual_file::{owned_buffers_io::io_buf_aligned::IoBufAlignedMut, IoBufferMut},
 };

 /// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`].
@@ -25,7 +24,7 @@ pub trait File: Send {
    /// [`std::io::ErrorKind::UnexpectedEof`] error if the file is shorter than `start+dst.len()`.
    ///
    /// No guarantees are made about the remaining bytes in `dst` in case of a short read.
-    async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+    async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>(
        &'b self,
        start: u64,
        dst: Slice<B>,
@@ -228,7 +227,7 @@ where

    // Execute physical reads and fill the logical read buffers
    // TODO: pipelined reads; prefetch;
-    let get_io_buffer = |nchunks| IoBufferMut::with_capacity(nchunks * DIO_CHUNK_SIZE);
+    let get_io_buffer = |nchunks| Vec::with_capacity(nchunks * DIO_CHUNK_SIZE);
    for PhysicalRead {
        start_chunk_no,
        nchunks,
@@ -460,7 +459,7 @@ mod tests {
        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let file = InMemoryFile::new_random(10);
        let test_read = |pos, len| {
-            let buf = IoBufferMut::with_capacity_zeroed(len);
+            let buf = vec![0; len];
            let fut = file.read_exact_at_eof_ok(pos, buf.slice_full(), &ctx);
            use futures::FutureExt;
            let (slice, nread) = fut
@@ -471,9 +470,9 @@ mod tests {
            buf.truncate(nread);
            buf
        };
-        assert_eq!(&test_read(0, 1), &file.content[0..1]);
-        assert_eq!(&test_read(1, 2), &file.content[1..3]);
-        assert_eq!(&test_read(9, 2), &file.content[9..]);
+        assert_eq!(test_read(0, 1), &file.content[0..1]);
+        assert_eq!(test_read(1, 2), &file.content[1..3]);
+        assert_eq!(test_read(9, 2), &file.content[9..]);
        assert!(test_read(10, 2).is_empty());
        assert!(test_read(11, 2).is_empty());
    }
@@ -610,7 +609,7 @@ mod tests {
    }

    impl<'x> File for RecorderFile<'x> {
-        async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+        async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>(
            &'b self,
            start: u64,
            dst: Slice<B>,
@@ -783,7 +782,7 @@ mod tests {
            2048,  1024 => Err("foo".to_owned()),
        };

-        let buf = IoBufferMut::with_capacity(512);
+        let buf = Vec::with_capacity(512);
        let (buf, nread) = mock_file
            .read_exact_at_eof_ok(0, buf.slice_full(), &ctx)
            .await
@@ -791,7 +790,7 @@ mod tests {
        assert_eq!(nread, 512);
        assert_eq!(&buf.into_inner()[..nread], &[0; 512]);

-        let buf = IoBufferMut::with_capacity(512);
+        let buf = Vec::with_capacity(512);
        let (buf, nread) = mock_file
            .read_exact_at_eof_ok(512, buf.slice_full(), &ctx)
            .await
@@ -799,7 +798,7 @@ mod tests {
        assert_eq!(nread, 512);
        assert_eq!(&buf.into_inner()[..nread], &[1; 512]);

-        let buf = IoBufferMut::with_capacity(512);
+        let buf = Vec::with_capacity(512);
        let (buf, nread) = mock_file
            .read_exact_at_eof_ok(1024, buf.slice_full(), &ctx)
            .await
@@ -807,7 +806,7 @@ mod tests {
        assert_eq!(nread, 10);
        assert_eq!(&buf.into_inner()[..nread], &[2; 10]);

-        let buf = IoBufferMut::with_capacity(1024);
+        let buf = Vec::with_capacity(1024);
        let err = mock_file
            .read_exact_at_eof_ok(2048, buf.slice_full(), &ctx)
            .await
--- a/pageserver/src/tenant/storage_layer/split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/split_writer.rs
@@ -42,7 +42,7 @@ impl SplitWriterResult {
 pub struct SplitImageLayerWriter {
    inner: ImageLayerWriter,
    target_layer_size: u64,
-    generated_layer_writers: Vec<(ImageLayerWriter, PersistentLayerKey)>,
+    generated_layers: Vec<SplitWriterResult>,
    conf: &'static PageServerConf,
    timeline_id: TimelineId,
    tenant_shard_id: TenantShardId,
@@ -71,7 +71,7 @@ impl SplitImageLayerWriter {
                ctx,
            )
            .await?,
-            generated_layer_writers: Vec::new(),
+            generated_layers: Vec::new(),
            conf,
            timeline_id,
            tenant_shard_id,
@@ -80,12 +80,18 @@ impl SplitImageLayerWriter {
        })
    }

-    pub async fn put_image(
+    pub async fn put_image_with_discard_fn<D, F>(
        &mut self,
        key: Key,
        img: Bytes,
+        tline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+        discard: D,
+    ) -> anyhow::Result<()>
+    where
+        D: FnOnce(&PersistentLayerKey) -> F,
+        F: Future<Output = bool>,
+    {
        // The current estimation is an upper bound of the space that the key/image could take
        // because we did not consider compression in this estimation. The resulting image layer
        // could be smaller than the target size.
@@ -102,83 +108,72 @@ impl SplitImageLayerWriter {
                ctx,
            )
            .await?;
+            let prev_image_writer = std::mem::replace(&mut self.inner, next_image_writer);
            let layer_key = PersistentLayerKey {
                key_range: self.start_key..key,
                lsn_range: PersistentLayerDesc::image_layer_lsn_range(self.lsn),
                is_delta: false,
            };
-            let prev_image_writer = std::mem::replace(&mut self.inner, next_image_writer);
            self.start_key = key;

-            self.generated_layer_writers
-                .push((prev_image_writer, layer_key));
+            if discard(&layer_key).await {
+                drop(prev_image_writer);
+                self.generated_layers
+                    .push(SplitWriterResult::Discarded(layer_key));
+            } else {
+                let (desc, path) = prev_image_writer.finish_with_end_key(key, ctx).await?;
+
+                let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+                self.generated_layers
+                    .push(SplitWriterResult::Produced(layer));
+            }
        }
        self.inner.put_image(key, img, ctx).await
    }

+    #[cfg(test)]
+    pub async fn put_image(
+        &mut self,
+        key: Key,
+        img: Bytes,
+        tline: &Arc<Timeline>,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
+        self.put_image_with_discard_fn(key, img, tline, ctx, |_| async { false })
+            .await
+    }
+
    pub(crate) async fn finish_with_discard_fn<D, F>(
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Key,
-        discard_fn: D,
+        discard: D,
    ) -> anyhow::Result<Vec<SplitWriterResult>>
    where
-        D: Fn(&PersistentLayerKey) -> F,
+        D: FnOnce(&PersistentLayerKey) -> F,
        F: Future<Output = bool>,
    {
        let Self {
-            mut generated_layer_writers,
+            mut generated_layers,
            inner,
            ..
        } = self;
-        if inner.num_keys() != 0 {
-            let layer_key = PersistentLayerKey {
-                key_range: self.start_key..end_key,
-                lsn_range: PersistentLayerDesc::image_layer_lsn_range(self.lsn),
-                is_delta: false,
-            };
-            generated_layer_writers.push((inner, layer_key));
+        if inner.num_keys() == 0 {
+            return Ok(generated_layers);
        }
-        let clean_up_layers = |generated_layers: Vec<SplitWriterResult>| {
-            for produced_layer in generated_layers {
-                if let SplitWriterResult::Produced(image_layer) = produced_layer {
-                    let layer: Layer = image_layer.into();
-                    layer.delete_on_drop();
-                }
-            }
+        let layer_key = PersistentLayerKey {
+            key_range: self.start_key..end_key,
+            lsn_range: PersistentLayerDesc::image_layer_lsn_range(self.lsn),
+            is_delta: false,
        };
-        // BEGIN: catch every error and do the recovery in the below section
-        let mut generated_layers = Vec::new();
-        for (inner, layer_key) in generated_layer_writers {
-            if discard_fn(&layer_key).await {
-                generated_layers.push(SplitWriterResult::Discarded(layer_key));
-            } else {
-                let layer = match inner
-                    .finish_with_end_key(layer_key.key_range.end, ctx)
-                    .await
-                {
-                    Ok((desc, path)) => {
-                        match Layer::finish_creating(self.conf, tline, desc, &path) {
-                            Ok(layer) => layer,
-                            Err(e) => {
-                                tokio::fs::remove_file(&path).await.ok();
-                                clean_up_layers(generated_layers);
-                                return Err(e);
-                            }
-                        }
-                    }
-                    Err(e) => {
-                        // ImageLayerWriter::finish will clean up the temporary layer if anything goes wrong,
-                        // so we don't need to remove the layer we just failed to create by ourselves.
-                        clean_up_layers(generated_layers);
-                        return Err(e);
-                    }
-                };
-                generated_layers.push(SplitWriterResult::Produced(layer));
-            }
+        if discard(&layer_key).await {
+            generated_layers.push(SplitWriterResult::Discarded(layer_key));
+        } else {
+            let (desc, path) = inner.finish_with_end_key(end_key, ctx).await?;
+            let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+            generated_layers.push(SplitWriterResult::Produced(layer));
        }
-        // END: catch every error and do the recovery in the above section
        Ok(generated_layers)
    }

@@ -192,6 +187,11 @@ impl SplitImageLayerWriter {
        self.finish_with_discard_fn(tline, ctx, end_key, |_| async { false })
            .await
    }
+
+    /// This function will be deprecated with #8841.
+    pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, ImageLayerWriter)> {
+        Ok((self.generated_layers, self.inner))
+    }
 }

 /// A delta writer that takes key-lsn-values and produces multiple delta layers.
@@ -206,7 +206,7 @@ impl SplitImageLayerWriter {
 pub struct SplitDeltaLayerWriter {
    inner: Option<(Key, DeltaLayerWriter)>,
    target_layer_size: u64,
-    generated_layer_writers: Vec<(DeltaLayerWriter, PersistentLayerKey)>,
+    generated_layers: Vec<SplitWriterResult>,
    conf: &'static PageServerConf,
    timeline_id: TimelineId,
    tenant_shard_id: TenantShardId,
@@ -225,7 +225,7 @@ impl SplitDeltaLayerWriter {
        Ok(Self {
            target_layer_size,
            inner: None,
-            generated_layer_writers: Vec::new(),
+            generated_layers: Vec::new(),
            conf,
            timeline_id,
            tenant_shard_id,
@@ -234,13 +234,20 @@ impl SplitDeltaLayerWriter {
        })
    }

-    pub async fn put_value(
+    /// Put value into the layer writer. In the case the writer decides to produce a layer, and the discard fn returns true, no layer will be written in the end.
+    pub async fn put_value_with_discard_fn<D, F>(
        &mut self,
        key: Key,
        lsn: Lsn,
        val: Value,
+        tline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+        discard: D,
+    ) -> anyhow::Result<()>
+    where
+        D: FnOnce(&PersistentLayerKey) -> F,
+        F: Future<Output = bool>,
+    {
        // The current estimation is key size plus LSN size plus value size estimation. This is not an accurate
        // number, and therefore the final layer size could be a little bit larger or smaller than the target.
        //
@@ -284,8 +291,16 @@ impl SplitDeltaLayerWriter {
                    lsn_range: self.lsn_range.clone(),
                    is_delta: true,
                };
-                self.generated_layer_writers
-                    .push((prev_delta_writer, layer_key));
+                if discard(&layer_key).await {
+                    drop(prev_delta_writer);
+                    self.generated_layers
+                        .push(SplitWriterResult::Discarded(layer_key));
+                } else {
+                    let (desc, path) = prev_delta_writer.finish(key, ctx).await?;
+                    let delta_layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+                    self.generated_layers
+                        .push(SplitWriterResult::Produced(delta_layer));
+                }
            } else if inner.estimated_size() >= S3_UPLOAD_LIMIT {
                // We have to produce a very large file b/c a key is updated too often.
                anyhow::bail!(
@@ -300,68 +315,52 @@ impl SplitDeltaLayerWriter {
        inner.put_value(key, lsn, val, ctx).await
    }

+    pub async fn put_value(
+        &mut self,
+        key: Key,
+        lsn: Lsn,
+        val: Value,
+        tline: &Arc<Timeline>,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<()> {
+        self.put_value_with_discard_fn(key, lsn, val, tline, ctx, |_| async { false })
+            .await
+    }
+
    pub(crate) async fn finish_with_discard_fn<D, F>(
        self,
        tline: &Arc<Timeline>,
        ctx: &RequestContext,
-        discard_fn: D,
+        discard: D,
    ) -> anyhow::Result<Vec<SplitWriterResult>>
    where
-        D: Fn(&PersistentLayerKey) -> F,
+        D: FnOnce(&PersistentLayerKey) -> F,
        F: Future<Output = bool>,
    {
        let Self {
-            mut generated_layer_writers,
+            mut generated_layers,
            inner,
            ..
        } = self;
-        if let Some((start_key, writer)) = inner {
-            if writer.num_keys() != 0 {
-                let end_key = self.last_key_written.next();
-                let layer_key = PersistentLayerKey {
-                    key_range: start_key..end_key,
-                    lsn_range: self.lsn_range.clone(),
-                    is_delta: true,
-                };
-                generated_layer_writers.push((writer, layer_key));
-            }
-        }
-        let clean_up_layers = |generated_layers: Vec<SplitWriterResult>| {
-            for produced_layer in generated_layers {
-                if let SplitWriterResult::Produced(delta_layer) = produced_layer {
-                    let layer: Layer = delta_layer.into();
-                    layer.delete_on_drop();
-                }
-            }
+        let Some((start_key, inner)) = inner else {
+            return Ok(generated_layers);
        };
-        // BEGIN: catch every error and do the recovery in the below section
-        let mut generated_layers = Vec::new();
-        for (inner, layer_key) in generated_layer_writers {
-            if discard_fn(&layer_key).await {
-                generated_layers.push(SplitWriterResult::Discarded(layer_key));
-            } else {
-                let layer = match inner.finish(layer_key.key_range.end, ctx).await {
-                    Ok((desc, path)) => {
-                        match Layer::finish_creating(self.conf, tline, desc, &path) {
-                            Ok(layer) => layer,
-                            Err(e) => {
-                                tokio::fs::remove_file(&path).await.ok();
-                                clean_up_layers(generated_layers);
-                                return Err(e);
-                            }
-                        }
-                    }
-                    Err(e) => {
-                        // DeltaLayerWriter::finish will clean up the temporary layer if anything goes wrong,
-                        // so we don't need to remove the layer we just failed to create by ourselves.
-                        clean_up_layers(generated_layers);
-                        return Err(e);
-                    }
-                };
-                generated_layers.push(SplitWriterResult::Produced(layer));
-            }
+        if inner.num_keys() == 0 {
+            return Ok(generated_layers);
+        }
+        let end_key = self.last_key_written.next();
+        let layer_key = PersistentLayerKey {
+            key_range: start_key..end_key,
+            lsn_range: self.lsn_range.clone(),
+            is_delta: true,
+        };
+        if discard(&layer_key).await {
+            generated_layers.push(SplitWriterResult::Discarded(layer_key));
+        } else {
+            let (desc, path) = inner.finish(end_key, ctx).await?;
+            let delta_layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+            generated_layers.push(SplitWriterResult::Produced(delta_layer));
        }
-        // END: catch every error and do the recovery in the above section
        Ok(generated_layers)
    }

@@ -374,6 +373,11 @@ impl SplitDeltaLayerWriter {
        self.finish_with_discard_fn(tline, ctx, |_| async { false })
            .await
    }
+
+    /// This function will be deprecated with #8841.
+    pub(crate) fn take(self) -> anyhow::Result<(Vec<SplitWriterResult>, Option<DeltaLayerWriter>)> {
+        Ok((self.generated_layers, self.inner.map(|x| x.1)))
+    }
 }

 #[cfg(test)]
@@ -443,7 +447,7 @@ mod tests {
        .unwrap();

        image_writer
-            .put_image(get_key(0), get_img(0), &ctx)
+            .put_image(get_key(0), get_img(0), &tline, &ctx)
            .await
            .unwrap();
        let layers = image_writer
@@ -453,7 +457,13 @@ mod tests {
        assert_eq!(layers.len(), 1);

        delta_writer
-            .put_value(get_key(0), Lsn(0x18), Value::Image(get_img(0)), &ctx)
+            .put_value(
+                get_key(0),
+                Lsn(0x18),
+                Value::Image(get_img(0)),
+                &tline,
+                &ctx,
+            )
            .await
            .unwrap();
        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
@@ -476,18 +486,14 @@ mod tests {

    #[tokio::test]
    async fn write_split() {
-        // Test the split writer with retaining all the layers we have produced (discard=false)
        write_split_helper("split_writer_write_split", false).await;
    }

    #[tokio::test]
    async fn write_split_discard() {
-        // Test the split writer with discarding all the layers we have produced (discard=true)
-        write_split_helper("split_writer_write_split_discard", true).await;
+        write_split_helper("split_writer_write_split_discard", false).await;
    }

-    /// Test the image+delta writer by writing a large number of images and deltas. If discard is
-    /// set to true, all layers will be discarded.
    async fn write_split_helper(harness_name: &'static str, discard: bool) {
        let harness = TenantHarness::create(harness_name).await.unwrap();
        let (tenant, ctx) = harness.load().await;
@@ -521,63 +527,69 @@ mod tests {
        for i in 0..N {
            let i = i as u32;
            image_writer
-                .put_image(get_key(i), get_large_img(), &ctx)
+                .put_image_with_discard_fn(get_key(i), get_large_img(), &tline, &ctx, |_| async {
+                    discard
+                })
                .await
                .unwrap();
            delta_writer
-                .put_value(get_key(i), Lsn(0x20), Value::Image(get_large_img()), &ctx)
+                .put_value_with_discard_fn(
+                    get_key(i),
+                    Lsn(0x20),
+                    Value::Image(get_large_img()),
+                    &tline,
+                    &ctx,
+                    |_| async { discard },
+                )
                .await
                .unwrap();
        }
        let image_layers = image_writer
-            .finish_with_discard_fn(&tline, &ctx, get_key(N as u32), |_| async { discard })
+            .finish(&tline, &ctx, get_key(N as u32))
            .await
            .unwrap();
-        let delta_layers = delta_writer
-            .finish_with_discard_fn(&tline, &ctx, |_| async { discard })
-            .await
-            .unwrap();
-        let image_layers = image_layers
-            .into_iter()
-            .map(|x| {
-                if discard {
-                    x.into_discarded_layer()
-                } else {
-                    x.into_resident_layer().layer_desc().key()
+        let delta_layers = delta_writer.finish(&tline, &ctx).await.unwrap();
+        if discard {
+            for layer in image_layers {
+                layer.into_discarded_layer();
+            }
+            for layer in delta_layers {
+                layer.into_discarded_layer();
+            }
+        } else {
+            let image_layers = image_layers
+                .into_iter()
+                .map(|x| x.into_resident_layer())
+                .collect_vec();
+            let delta_layers = delta_layers
+                .into_iter()
+                .map(|x| x.into_resident_layer())
+                .collect_vec();
+            assert_eq!(image_layers.len(), N / 512 + 1);
+            assert_eq!(delta_layers.len(), N / 512 + 1);
+            assert_eq!(
+                delta_layers.first().unwrap().layer_desc().key_range.start,
+                get_key(0)
+            );
+            assert_eq!(
+                delta_layers.last().unwrap().layer_desc().key_range.end,
+                get_key(N as u32)
+            );
+            for idx in 0..image_layers.len() {
+                assert_ne!(image_layers[idx].layer_desc().key_range.start, Key::MIN);
+                assert_ne!(image_layers[idx].layer_desc().key_range.end, Key::MAX);
+                assert_ne!(delta_layers[idx].layer_desc().key_range.start, Key::MIN);
+                assert_ne!(delta_layers[idx].layer_desc().key_range.end, Key::MAX);
+                if idx > 0 {
+                    assert_eq!(
+                        image_layers[idx - 1].layer_desc().key_range.end,
+                        image_layers[idx].layer_desc().key_range.start
+                    );
+                    assert_eq!(
+                        delta_layers[idx - 1].layer_desc().key_range.end,
+                        delta_layers[idx].layer_desc().key_range.start
+                    );
                }
-            })
-            .collect_vec();
-        let delta_layers = delta_layers
-            .into_iter()
-            .map(|x| {
-                if discard {
-                    x.into_discarded_layer()
-                } else {
-                    x.into_resident_layer().layer_desc().key()
-                }
-            })
-            .collect_vec();
-        assert_eq!(image_layers.len(), N / 512 + 1);
-        assert_eq!(delta_layers.len(), N / 512 + 1);
-        assert_eq!(delta_layers.first().unwrap().key_range.start, get_key(0));
-        assert_eq!(
-            delta_layers.last().unwrap().key_range.end,
-            get_key(N as u32)
-        );
-        for idx in 0..image_layers.len() {
-            assert_ne!(image_layers[idx].key_range.start, Key::MIN);
-            assert_ne!(image_layers[idx].key_range.end, Key::MAX);
-            assert_ne!(delta_layers[idx].key_range.start, Key::MIN);
-            assert_ne!(delta_layers[idx].key_range.end, Key::MAX);
-            if idx > 0 {
-                assert_eq!(
-                    image_layers[idx - 1].key_range.end,
-                    image_layers[idx].key_range.start
-                );
-                assert_eq!(
-                    delta_layers[idx - 1].key_range.end,
-                    delta_layers[idx].key_range.start
-                );
            }
        }
    }
@@ -617,11 +629,11 @@ mod tests {
        .unwrap();

        image_writer
-            .put_image(get_key(0), get_img(0), &ctx)
+            .put_image(get_key(0), get_img(0), &tline, &ctx)
            .await
            .unwrap();
        image_writer
-            .put_image(get_key(1), get_large_img(), &ctx)
+            .put_image(get_key(1), get_large_img(), &tline, &ctx)
            .await
            .unwrap();
        let layers = image_writer
@@ -631,11 +643,23 @@ mod tests {
        assert_eq!(layers.len(), 2);

        delta_writer
-            .put_value(get_key(0), Lsn(0x18), Value::Image(get_img(0)), &ctx)
+            .put_value(
+                get_key(0),
+                Lsn(0x18),
+                Value::Image(get_img(0)),
+                &tline,
+                &ctx,
+            )
            .await
            .unwrap();
        delta_writer
-            .put_value(get_key(1), Lsn(0x1A), Value::Image(get_large_img()), &ctx)
+            .put_value(
+                get_key(1),
+                Lsn(0x1A),
+                Value::Image(get_large_img()),
+                &tline,
+                &ctx,
+            )
            .await
            .unwrap();
        let layers = delta_writer.finish(&tline, &ctx).await.unwrap();
@@ -699,6 +723,7 @@ mod tests {
                    get_key(0),
                    Lsn(i as u64 * 16 + 0x10),
                    Value::Image(get_large_img()),
+                    &tline,
                    &ctx,
                )
                .await
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -371,7 +371,7 @@ pub struct Timeline {

    /// Prevent two tasks from deleting the timeline at the same time. If held, the
    /// timeline is being deleted. If 'true', the timeline has already been deleted.
-    pub delete_progress: TimelineDeleteProgress,
+    pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,

    eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,

@@ -426,8 +426,6 @@ pub struct Timeline {
    pub(crate) attach_wal_lag_cooldown: Arc<OnceLock<WalLagCooldown>>,
 }

-pub type TimelineDeleteProgress = Arc<tokio::sync::Mutex<DeleteTimelineFlow>>;
-
 pub struct WalReceiverInfo {
    pub wal_source_connconf: PgConnectionConfig,
    pub last_received_msg_lsn: Lsn,
@@ -2252,7 +2250,7 @@ impl Timeline {
                eviction_task_timeline_state: tokio::sync::Mutex::new(
                    EvictionTaskTimelineState::default(),
                ),
-                delete_progress: TimelineDeleteProgress::default(),
+                delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTimelineFlow::default())),

                cancel,
                gate: Gate::default(),
@@ -2404,7 +2402,7 @@ impl Timeline {
    pub(super) async fn load_layer_map(
        &self,
        disk_consistent_lsn: Lsn,
-        index_part: IndexPart,
+        index_part: Option<IndexPart>,
    ) -> anyhow::Result<()> {
        use init::{Decision::*, Discovered, DismissedLayer};
        use LayerName::*;
@@ -2468,7 +2466,8 @@ impl Timeline {
                    );
                }

-                let decided = init::reconcile(discovered_layers, &index_part, disk_consistent_lsn);
+                let decided =
+                    init::reconcile(discovered_layers, index_part.as_ref(), disk_consistent_lsn);

                let mut loaded_layers = Vec::new();
                let mut needs_cleanup = Vec::new();
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -121,12 +121,18 @@ impl KeyHistoryRetention {
    async fn pipe_to(
        self,
        key: Key,
+        tline: &Arc<Timeline>,
        delta_writer: &mut SplitDeltaLayerWriter,
        mut image_writer: Option<&mut SplitImageLayerWriter>,
        stat: &mut CompactionStatistics,
+        dry_run: bool,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        let mut first_batch = true;
+        let discard = |key: &PersistentLayerKey| {
+            let key = key.clone();
+            async move { Self::discard_key(&key, tline, dry_run).await }
+        };
        for (cutoff_lsn, KeyLogAtLsn(logs)) in self.below_horizon {
            if first_batch {
                if logs.len() == 1 && logs[0].1.is_image() {
@@ -135,30 +141,45 @@ impl KeyHistoryRetention {
                    };
                    stat.produce_image_key(img);
                    if let Some(image_writer) = image_writer.as_mut() {
-                        image_writer.put_image(key, img.clone(), ctx).await?;
+                        image_writer
+                            .put_image_with_discard_fn(key, img.clone(), tline, ctx, discard)
+                            .await?;
                    } else {
                        delta_writer
-                            .put_value(key, cutoff_lsn, Value::Image(img.clone()), ctx)
+                            .put_value_with_discard_fn(
+                                key,
+                                cutoff_lsn,
+                                Value::Image(img.clone()),
+                                tline,
+                                ctx,
+                                discard,
+                            )
                            .await?;
                    }
                } else {
                    for (lsn, val) in logs {
                        stat.produce_key(&val);
-                        delta_writer.put_value(key, lsn, val, ctx).await?;
+                        delta_writer
+                            .put_value_with_discard_fn(key, lsn, val, tline, ctx, discard)
+                            .await?;
                    }
                }
                first_batch = false;
            } else {
                for (lsn, val) in logs {
                    stat.produce_key(&val);
-                    delta_writer.put_value(key, lsn, val, ctx).await?;
+                    delta_writer
+                        .put_value_with_discard_fn(key, lsn, val, tline, ctx, discard)
+                        .await?;
                }
            }
        }
        let KeyLogAtLsn(above_horizon_logs) = self.above_horizon;
        for (lsn, val) in above_horizon_logs {
            stat.produce_key(&val);
-            delta_writer.put_value(key, lsn, val, ctx).await?;
+            delta_writer
+                .put_value_with_discard_fn(key, lsn, val, tline, ctx, discard)
+                .await?;
        }
        Ok(())
    }
@@ -1969,9 +1990,11 @@ impl Timeline {
                retention
                    .pipe_to(
                        *last_key,
+                        self,
                        &mut delta_layer_writer,
                        image_layer_writer.as_mut(),
                        &mut stat,
+                        dry_run,
                        ctx,
                    )
                    .await?;
@@ -1998,9 +2021,11 @@ impl Timeline {
        retention
            .pipe_to(
                last_key,
+                self,
                &mut delta_layer_writer,
                image_layer_writer.as_mut(),
                &mut stat,
+                dry_run,
                ctx,
            )
            .await?;
@@ -2016,7 +2041,8 @@ impl Timeline {
                    .finish_with_discard_fn(self, ctx, Key::MAX, discard)
                    .await?
            } else {
-                drop(writer);
+                let (layers, _) = writer.take()?;
+                assert!(layers.is_empty(), "image layers produced in dry run mode?");
                Vec::new()
            }
        } else {
@@ -2028,7 +2054,8 @@ impl Timeline {
                .finish_with_discard_fn(self, ctx, discard)
                .await?
        } else {
-            drop(delta_layer_writer);
+            let (layers, _) = delta_layer_writer.take()?;
+            assert!(layers.is_empty(), "delta layers produced in dry run mode?");
            Vec::new()
        };

--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -14,9 +14,7 @@ use crate::{
    task_mgr::{self, TaskKind},
    tenant::{
        metadata::TimelineMetadata,
-        remote_timeline_client::{
-            self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
-        },
+        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
        CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
    },
 };
@@ -27,9 +25,12 @@ use super::{Timeline, TimelineResources};
 /// during attach or pageserver restart.
 /// See comment in persist_index_part_with_deleted_flag.
 async fn set_deleted_in_remote_index(
-    remote_client: &Arc<RemoteTimelineClient>,
+    timeline: &TimelineOrOffloaded,
 ) -> Result<(), DeleteTimelineError> {
-    let res = remote_client.persist_index_part_with_deleted_flag().await;
+    let res = timeline
+        .remote_client()
+        .persist_index_part_with_deleted_flag()
+        .await;
    match res {
        // If we (now, or already) marked it successfully as deleted, we can proceed
        Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
@@ -128,10 +129,12 @@ pub(super) async fn delete_local_timeline_directory(
 }

 /// Removes remote layers and an index file after them.
-async fn delete_remote_layers_and_index(
-    remote_client: &Arc<RemoteTimelineClient>,
-) -> anyhow::Result<()> {
-    remote_client.delete_all().await.context("delete_all")
+async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
+    timeline
+        .remote_client()
+        .delete_all()
+        .await
+        .context("delete_all")
 }

 /// It is important that this gets called when DeletionGuard is being held.
@@ -176,32 +179,6 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
    Ok(())
 }

-/// It is important that this gets called when DeletionGuard is being held.
-/// For more context see comments in [`DeleteTimelineFlow::prepare`]
-async fn upload_new_tenant_manifest(
-    tenant: &Tenant,
-    _: &DeletionGuard, // using it as a witness
-) -> anyhow::Result<()> {
-    // This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash
-    // between the deletion of the index-part.json and reaching of this code.
-    // So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
-    // However, we handle this case in tenant loading code so the next time we attach, the issue is
-    // resolved.
-    let manifest = tenant.tenant_manifest();
-    // TODO: generation support
-    let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-    remote_timeline_client::upload_tenant_manifest(
-        &tenant.remote_storage,
-        &tenant.tenant_shard_id,
-        generation,
-        &manifest,
-        &tenant.cancel,
-    )
-    .await?;
-
-    Ok(())
-}
-
 /// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,
 /// and deletes its data from both disk and s3.
 /// The sequence of steps:
@@ -258,8 +235,7 @@ impl DeleteTimelineFlow {
            ))?
        });

-        let remote_client = timeline.remote_client_maybe_construct(tenant);
-        set_deleted_in_remote_index(&remote_client).await?;
+        set_deleted_in_remote_index(&timeline).await?;

        fail::fail_point!("timeline-delete-before-schedule", |_| {
            Err(anyhow::anyhow!(
@@ -267,13 +243,7 @@ impl DeleteTimelineFlow {
            ))?
        });

-        Self::schedule_background(
-            guard,
-            tenant.conf,
-            Arc::clone(tenant),
-            timeline,
-            remote_client,
-        );
+        Self::schedule_background(guard, tenant.conf, Arc::clone(tenant), timeline);

        Ok(())
    }
@@ -331,9 +301,8 @@ impl DeleteTimelineFlow {

        guard.mark_in_progress()?;

-        let remote_client = timeline.remote_client.clone();
        let timeline = TimelineOrOffloaded::Timeline(timeline);
-        Self::schedule_background(guard, tenant.conf, tenant, timeline, remote_client);
+        Self::schedule_background(guard, tenant.conf, tenant, timeline);

        Ok(())
    }
@@ -411,7 +380,6 @@ impl DeleteTimelineFlow {
        conf: &'static PageServerConf,
        tenant: Arc<Tenant>,
        timeline: TimelineOrOffloaded,
-        remote_client: Arc<RemoteTimelineClient>,
    ) {
        let tenant_shard_id = timeline.tenant_shard_id();
        let timeline_id = timeline.timeline_id();
@@ -423,7 +391,7 @@ impl DeleteTimelineFlow {
            Some(timeline_id),
            "timeline_delete",
            async move {
-                if let Err(err) = Self::background(guard, conf, &tenant, &timeline, remote_client).await {
+                if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
                    error!("Error: {err:#}");
                    if let TimelineOrOffloaded::Timeline(timeline) = timeline {
                        timeline.set_broken(format!("{err:#}"))
@@ -440,7 +408,6 @@ impl DeleteTimelineFlow {
        conf: &PageServerConf,
        tenant: &Tenant,
        timeline: &TimelineOrOffloaded,
-        remote_client: Arc<RemoteTimelineClient>,
    ) -> Result<(), DeleteTimelineError> {
        // Offloaded timelines have no local state
        // TODO: once we persist offloaded information, delete the timeline from there, too
@@ -448,14 +415,12 @@ impl DeleteTimelineFlow {
            delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
        }

-        delete_remote_layers_and_index(&remote_client).await?;
+        delete_remote_layers_and_index(timeline).await?;

        pausable_failpoint!("in_progress_delete");

        remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;

-        upload_new_tenant_manifest(tenant, &guard).await?;
-
        *guard = Self::Finished;

        Ok(())
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -125,9 +125,19 @@ pub(super) enum DismissedLayer {
 /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
 pub(super) fn reconcile(
    local_layers: Vec<(LayerName, LocalLayerFileMetadata)>,
-    index_part: &IndexPart,
+    index_part: Option<&IndexPart>,
    disk_consistent_lsn: Lsn,
 ) -> Vec<(LayerName, Result<Decision, DismissedLayer>)> {
+    let Some(index_part) = index_part else {
+        // If we have no remote metadata, no local layer files are considered valid to load
+        return local_layers
+            .into_iter()
+            .map(|(layer_name, local_metadata)| {
+                (layer_name, Err(DismissedLayer::LocalOnly(local_metadata)))
+            })
+            .collect();
+    };
+
    let mut result = Vec::new();

    let mut remote_layers = HashMap::new();
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,17 +1,17 @@
 use std::sync::Arc;

-use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
-use super::Timeline;
-use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::{remote_timeline_client, OffloadedTimeline, Tenant, TimelineOrOffloaded};
+use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
+
+use super::{
+    delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
+    Timeline,
+};

 pub(crate) async fn offload_timeline(
    tenant: &Tenant,
    timeline: &Arc<Timeline>,
 ) -> anyhow::Result<()> {
-    debug_assert_current_span_has_tenant_and_timeline_id();
    tracing::info!("offloading archived timeline");
-
    let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;

    let TimelineOrOffloaded::Timeline(timeline) = timeline else {
@@ -19,28 +19,14 @@ pub(crate) async fn offload_timeline(
        return Ok(());
    };

-    let is_archived = timeline.is_archived();
-    match is_archived {
-        Some(true) => (),
-        Some(false) => {
-            tracing::warn!(?is_archived, "tried offloading a non-archived timeline");
-            anyhow::bail!("timeline isn't archived");
-        }
-        None => {
-            tracing::warn!(
-                ?is_archived,
-                "tried offloading a timeline where manifest is not yet available"
-            );
-            anyhow::bail!("timeline manifest hasn't been loaded yet");
-        }
-    }
-
    // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
    timeline.shutdown(super::ShutdownMode::Hard).await;

    // TODO extend guard mechanism above with method
    // to make deletions possible while offloading is in progress

+    // TODO mark timeline as offloaded in S3
+
    let conf = &tenant.conf;
    delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;

@@ -50,31 +36,10 @@ pub(crate) async fn offload_timeline(
        let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
        offloaded_timelines.insert(
            timeline.timeline_id,
-            Arc::new(
-                OffloadedTimeline::from_timeline(&timeline)
-                    .expect("we checked above that timeline was ready"),
-            ),
+            Arc::new(OffloadedTimeline::from_timeline(&timeline)),
        );
    }

-    // Last step: mark timeline as offloaded in S3
-    // TODO: maybe move this step above, right above deletion of the local timeline directory,
-    // then there is no potential race condition where we partially offload a timeline, and
-    // at the next restart attach it again.
-    // For that to happen, we'd need to make the manifest reflect our *intended* state,
-    // not our actual state of offloaded timelines.
-    let manifest = tenant.tenant_manifest();
-    // TODO: generation support
-    let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-    remote_timeline_client::upload_tenant_manifest(
-        &tenant.remote_storage,
-        &tenant.tenant_shard_id,
-        generation,
-        &manifest,
-        &tenant.cancel,
-    )
-    .await?;
-
    Ok(())
 }

--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -18,7 +18,7 @@
 use std::collections::BTreeMap;
 use std::ops::Deref;

-use bytes::Bytes;
+use bytes::{Bytes, BytesMut};
 use pageserver_api::key::Key;
 use tokio::io::AsyncWriteExt;
 use tokio_epoll_uring::BoundedBuf;
@@ -27,7 +27,6 @@ use utils::vec_map::VecMap;

 use crate::context::RequestContext;
 use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK};
-use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::{self, VirtualFile};

 /// Metadata bundled with the start and end offset of a blob.
@@ -159,7 +158,7 @@ impl std::fmt::Display for VectoredBlob {
 /// Return type of [`VectoredBlobReader::read_blobs`]
 pub struct VectoredBlobsBuf {
    /// Buffer for all blobs in this read
-    pub buf: IoBufferMut,
+    pub buf: BytesMut,
    /// Offsets into the buffer and metadata for all blobs in this read
    pub blobs: Vec<VectoredBlob>,
 }
@@ -442,7 +441,7 @@ impl<'a> VectoredBlobReader<'a> {
    pub async fn read_blobs(
        &self,
        read: &VectoredRead,
-        buf: IoBufferMut,
+        buf: BytesMut,
        ctx: &RequestContext,
    ) -> Result<VectoredBlobsBuf, std::io::Error> {
        assert!(read.size() > 0);
@@ -917,7 +916,7 @@ mod tests {

        // Multiply by two (compressed data might need more space), and add a few bytes for the header
        let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
-        let mut buf = IoBufferMut::with_capacity(reserved_bytes);
+        let mut buf = BytesMut::with_capacity(reserved_bytes);

        let vectored_blob_reader = VectoredBlobReader::new(&file);
        let meta = BlobMeta {
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -18,9 +18,6 @@ use crate::page_cache::{PageWriteGuard, PAGE_SZ};
 use crate::tenant::TENANTS_SEGMENT_NAME;
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
-use owned_buffers_io::aligned_buffer::buffer::AlignedBuffer;
-use owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlign};
-use owned_buffers_io::io_buf_aligned::IoBufAlignedMut;
 use owned_buffers_io::io_buf_ext::FullSlice;
 use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver_api::shard::TenantShardId;
@@ -58,8 +55,6 @@ pub(crate) mod owned_buffers_io {
    //! but for the time being we're proving out the primitives in the neon.git repo
    //! for faster iteration.

-    pub(crate) mod aligned_buffer;
-    pub(crate) mod io_buf_aligned;
    pub(crate) mod io_buf_ext;
    pub(crate) mod slice;
    pub(crate) mod write;
@@ -201,7 +196,7 @@ impl VirtualFile {
        ctx: &RequestContext,
    ) -> Result<Slice<Buf>, Error>
    where
-        Buf: IoBufAlignedMut + Send,
+        Buf: IoBufMut + Send,
    {
        self.inner.read_exact_at(slice, offset, ctx).await
    }
@@ -776,7 +771,7 @@ impl VirtualFileInner {
        ctx: &RequestContext,
    ) -> Result<Slice<Buf>, Error>
    where
-        Buf: IoBufAlignedMut + Send,
+        Buf: IoBufMut + Send,
    {
        let assert_we_return_original_bounds = if cfg!(debug_assertions) {
            Some((slice.stable_ptr() as usize, slice.bytes_total()))
@@ -1227,14 +1222,12 @@ impl VirtualFileInner {
        ctx: &RequestContext,
    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
        use crate::page_cache::PAGE_SZ;
-        let slice = IoBufferMut::with_capacity(PAGE_SZ).slice_full();
+        let slice = Vec::with_capacity(PAGE_SZ).slice_full();
        assert_eq!(slice.bytes_total(), PAGE_SZ);
        let slice = self
            .read_exact_at(slice, blknum as u64 * (PAGE_SZ as u64), ctx)
            .await?;
-        Ok(crate::tenant::block_io::BlockLease::IoBufferMut(
-            slice.into_inner(),
-        ))
+        Ok(crate::tenant::block_io::BlockLease::Vec(slice.into_inner()))
    }

    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
@@ -1332,11 +1325,10 @@ impl OpenFiles {
 /// server startup.
 ///
 #[cfg(not(test))]
-pub fn init(num_slots: usize, engine: IoEngineKind, mode: IoMode) {
+pub fn init(num_slots: usize, engine: IoEngineKind) {
    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
        panic!("virtual_file::init called twice");
    }
-    set_io_mode(mode);
    io_engine::init(engine);
    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }
@@ -1365,11 +1357,6 @@ pub(crate) const fn get_io_buffer_alignment() -> usize {
    DEFAULT_IO_BUFFER_ALIGNMENT
 }

-pub(crate) type IoBufferMut = AlignedBufferMut<ConstAlign<{ get_io_buffer_alignment() }>>;
-pub(crate) type IoBuffer = AlignedBuffer<ConstAlign<{ get_io_buffer_alignment() }>>;
-pub(crate) type IoPageSlice<'a> =
-    AlignedSlice<'a, PAGE_SZ, ConstAlign<{ get_io_buffer_alignment() }>>;
-
 static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);

 pub(crate) fn set_io_mode(mode: IoMode) {
@@ -1408,10 +1395,10 @@ mod tests {
    impl MaybeVirtualFile {
        async fn read_exact_at(
            &self,
-            mut slice: tokio_epoll_uring::Slice<IoBufferMut>,
+            mut slice: tokio_epoll_uring::Slice<Vec<u8>>,
            offset: u64,
            ctx: &RequestContext,
-        ) -> Result<tokio_epoll_uring::Slice<IoBufferMut>, Error> {
+        ) -> Result<tokio_epoll_uring::Slice<Vec<u8>>, Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(slice, offset, ctx).await,
                MaybeVirtualFile::File(file) => {
@@ -1479,13 +1466,12 @@ mod tests {
            len: usize,
            ctx: &RequestContext,
        ) -> Result<String, Error> {
-            let slice = IoBufferMut::with_capacity(len).slice_full();
+            let slice = Vec::with_capacity(len).slice_full();
            assert_eq!(slice.bytes_total(), len);
            let slice = self.read_exact_at(slice, pos, ctx).await?;
-            let buf = slice.into_inner();
-            assert_eq!(buf.len(), len);
-
-            Ok(String::from_utf8(buf.to_vec()).unwrap())
+            let vec = slice.into_inner();
+            assert_eq!(vec.len(), len);
+            Ok(String::from_utf8(vec).unwrap())
        }
    }

@@ -1709,7 +1695,7 @@ mod tests {
            let files = files.clone();
            let ctx = ctx.detached_child(TaskKind::UnitTest, DownloadBehavior::Error);
            let hdl = rt.spawn(async move {
-                let mut buf = IoBufferMut::with_capacity_zeroed(SIZE);
+                let mut buf = vec![0u8; SIZE];
                let mut rng = rand::rngs::OsRng;
                for _ in 1..1000 {
                    let f = &files[rng.gen_range(0..files.len())];
@@ -1718,7 +1704,7 @@ mod tests {
                        .await
                        .unwrap()
                        .into_inner();
-                    assert!(buf[..] == SAMPLE);
+                    assert!(buf == SAMPLE);
                }
            });
            hdls.push(hdl);
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer.rs
@@ -1,9 +0,0 @@
-pub mod alignment;
-pub mod buffer;
-pub mod buffer_mut;
-pub mod raw;
-pub mod slice;
-
-pub use alignment::*;
-pub use buffer_mut::AlignedBufferMut;
-pub use slice::AlignedSlice;
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/alignment.rs
@@ -1,26 +0,0 @@
-pub trait Alignment: std::marker::Unpin + 'static {
-    /// Returns the required alignments.
-    fn align(&self) -> usize;
-}
-
-/// Alignment at compile time.
-#[derive(Debug)]
-pub struct ConstAlign<const A: usize>;
-
-impl<const A: usize> Alignment for ConstAlign<A> {
-    fn align(&self) -> usize {
-        A
-    }
-}
-
-/// Alignment at run time.
-#[derive(Debug)]
-pub struct RuntimeAlign {
-    align: usize,
-}
-
-impl Alignment for RuntimeAlign {
-    fn align(&self) -> usize {
-        self.align
-    }
-}
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs
@@ -1,124 +0,0 @@
-use std::{
-    ops::{Deref, Range, RangeBounds},
-    sync::Arc,
-};
-
-use super::{alignment::Alignment, raw::RawAlignedBuffer};
-
-/// An shared, immutable aligned buffer type.
-pub struct AlignedBuffer<A: Alignment> {
-    /// Shared raw buffer.
-    raw: Arc<RawAlignedBuffer<A>>,
-    /// Range that specifies the current slice.
-    range: Range<usize>,
-}
-
-impl<A: Alignment> AlignedBuffer<A> {
-    /// Creates an immutable `IoBuffer` from the raw buffer
-    pub(super) fn from_raw(raw: RawAlignedBuffer<A>, range: Range<usize>) -> Self {
-        AlignedBuffer {
-            raw: Arc::new(raw),
-            range,
-        }
-    }
-
-    /// Returns the number of bytes in the buffer, also referred to as its 'length'.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.range.len()
-    }
-
-    /// Returns the alignment of the buffer.
-    #[inline]
-    pub fn align(&self) -> usize {
-        self.raw.align()
-    }
-
-    #[inline]
-    fn as_ptr(&self) -> *const u8 {
-        // SAFETY: `self.range.start` is guaranteed to be within [0, self.len()).
-        unsafe { self.raw.as_ptr().add(self.range.start) }
-    }
-
-    /// Extracts a slice containing the entire buffer.
-    ///
-    /// Equivalent to `&s[..]`.
-    #[inline]
-    fn as_slice(&self) -> &[u8] {
-        &self.raw.as_slice()[self.range.start..self.range.end]
-    }
-
-    /// Returns a slice of self for the index range `[begin..end)`.
-    pub fn slice(&self, range: impl RangeBounds<usize>) -> Self {
-        use core::ops::Bound;
-        let len = self.len();
-
-        let begin = match range.start_bound() {
-            Bound::Included(&n) => n,
-            Bound::Excluded(&n) => n.checked_add(1).expect("out of range"),
-            Bound::Unbounded => 0,
-        };
-
-        let end = match range.end_bound() {
-            Bound::Included(&n) => n.checked_add(1).expect("out of range"),
-            Bound::Excluded(&n) => n,
-            Bound::Unbounded => len,
-        };
-
-        assert!(
-            begin <= end,
-            "range start must not be greater than end: {:?} <= {:?}",
-            begin,
-            end,
-        );
-        assert!(
-            end <= len,
-            "range end out of bounds: {:?} <= {:?}",
-            end,
-            len,
-        );
-
-        let begin = self.range.start + begin;
-        let end = self.range.start + end;
-
-        AlignedBuffer {
-            raw: Arc::clone(&self.raw),
-            range: begin..end,
-        }
-    }
-}
-
-impl<A: Alignment> Deref for AlignedBuffer<A> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_slice()
-    }
-}
-
-impl<A: Alignment> AsRef<[u8]> for AlignedBuffer<A> {
-    fn as_ref(&self) -> &[u8] {
-        self.as_slice()
-    }
-}
-
-impl<A: Alignment> PartialEq<[u8]> for AlignedBuffer<A> {
-    fn eq(&self, other: &[u8]) -> bool {
-        self.as_slice().eq(other)
-    }
-}
-
-/// SAFETY: the underlying buffer references a stable memory region.
-unsafe impl<A: Alignment> tokio_epoll_uring::IoBuf for AlignedBuffer<A> {
-    fn stable_ptr(&self) -> *const u8 {
-        self.as_ptr()
-    }
-
-    fn bytes_init(&self) -> usize {
-        self.len()
-    }
-
-    fn bytes_total(&self) -> usize {
-        self.len()
-    }
-}
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs
@@ -1,347 +0,0 @@
-use std::ops::{Deref, DerefMut};
-
-use super::{
-    alignment::{Alignment, ConstAlign},
-    buffer::AlignedBuffer,
-    raw::RawAlignedBuffer,
-};
-
-/// A mutable aligned buffer type.
-#[derive(Debug)]
-pub struct AlignedBufferMut<A: Alignment> {
-    raw: RawAlignedBuffer<A>,
-}
-
-impl<const A: usize> AlignedBufferMut<ConstAlign<A>> {
-    /// Constructs a new, empty `IoBufferMut` with at least the specified capacity and alignment.
-    ///
-    /// The buffer will be able to hold at most `capacity` elements and will never resize.
-    ///
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new capacity exceeds `isize::MAX` _bytes_, or if the following alignment requirement is not met:
-    /// * `align` must not be zero,
-    ///
-    /// * `align` must be a power of two,
-    ///
-    /// * `capacity`, when rounded up to the nearest multiple of `align`,
-    ///    must not overflow isize (i.e., the rounded value must be
-    ///    less than or equal to `isize::MAX`).
-    pub fn with_capacity(capacity: usize) -> Self {
-        AlignedBufferMut {
-            raw: RawAlignedBuffer::with_capacity(capacity),
-        }
-    }
-
-    /// Constructs a new `IoBufferMut` with at least the specified capacity and alignment, filled with zeros.
-    pub fn with_capacity_zeroed(capacity: usize) -> Self {
-        use bytes::BufMut;
-        let mut buf = Self::with_capacity(capacity);
-        buf.put_bytes(0, capacity);
-        // SAFETY: `put_bytes` filled the entire buffer.
-        unsafe { buf.set_len(capacity) };
-        buf
-    }
-}
-
-impl<A: Alignment> AlignedBufferMut<A> {
-    /// Returns the total number of bytes the buffer can hold.
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.raw.capacity()
-    }
-
-    /// Returns the alignment of the buffer.
-    #[inline]
-    pub fn align(&self) -> usize {
-        self.raw.align()
-    }
-
-    /// Returns the number of bytes in the buffer, also referred to as its 'length'.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.raw.len()
-    }
-
-    /// Force the length of the buffer to `new_len`.
-    #[inline]
-    unsafe fn set_len(&mut self, new_len: usize) {
-        self.raw.set_len(new_len)
-    }
-
-    #[inline]
-    fn as_ptr(&self) -> *const u8 {
-        self.raw.as_ptr()
-    }
-
-    #[inline]
-    fn as_mut_ptr(&mut self) -> *mut u8 {
-        self.raw.as_mut_ptr()
-    }
-
-    /// Extracts a slice containing the entire buffer.
-    ///
-    /// Equivalent to `&s[..]`.
-    #[inline]
-    fn as_slice(&self) -> &[u8] {
-        self.raw.as_slice()
-    }
-
-    /// Extracts a mutable slice of the entire buffer.
-    ///
-    /// Equivalent to `&mut s[..]`.
-    fn as_mut_slice(&mut self) -> &mut [u8] {
-        self.raw.as_mut_slice()
-    }
-
-    /// Drops the all the contents of the buffer, setting its length to `0`.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.raw.clear()
-    }
-
-    /// Reserves capacity for at least `additional` more bytes to be inserted
-    /// in the given `IoBufferMut`. The collection may reserve more space to
-    /// speculatively avoid frequent reallocations. After calling `reserve`,
-    /// capacity will be greater than or equal to `self.len() + additional`.
-    /// Does nothing if capacity is already sufficient.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.
-    pub fn reserve(&mut self, additional: usize) {
-        self.raw.reserve(additional);
-    }
-
-    /// Shortens the buffer, keeping the first len bytes.
-    pub fn truncate(&mut self, len: usize) {
-        self.raw.truncate(len);
-    }
-
-    /// Consumes and leaks the `IoBufferMut`, returning a mutable reference to the contents, &'a mut [u8].
-    pub fn leak<'a>(self) -> &'a mut [u8] {
-        self.raw.leak()
-    }
-
-    pub fn freeze(self) -> AlignedBuffer<A> {
-        let len = self.len();
-        AlignedBuffer::from_raw(self.raw, 0..len)
-    }
-}
-
-impl<A: Alignment> Deref for AlignedBufferMut<A> {
-    type Target = [u8];
-
-    fn deref(&self) -> &Self::Target {
-        self.as_slice()
-    }
-}
-
-impl<A: Alignment> DerefMut for AlignedBufferMut<A> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.as_mut_slice()
-    }
-}
-
-impl<A: Alignment> AsRef<[u8]> for AlignedBufferMut<A> {
-    fn as_ref(&self) -> &[u8] {
-        self.as_slice()
-    }
-}
-
-impl<A: Alignment> AsMut<[u8]> for AlignedBufferMut<A> {
-    fn as_mut(&mut self) -> &mut [u8] {
-        self.as_mut_slice()
-    }
-}
-
-impl<A: Alignment> PartialEq<[u8]> for AlignedBufferMut<A> {
-    fn eq(&self, other: &[u8]) -> bool {
-        self.as_slice().eq(other)
-    }
-}
-
-/// SAFETY: When advancing the internal cursor, the caller needs to make sure the bytes advcanced past have been initialized.
-unsafe impl<A: Alignment> bytes::BufMut for AlignedBufferMut<A> {
-    #[inline]
-    fn remaining_mut(&self) -> usize {
-        // Although a `Vec` can have at most isize::MAX bytes, we never want to grow `IoBufferMut`.
-        // Thus, it can have at most `self.capacity` bytes.
-        self.capacity() - self.len()
-    }
-
-    // SAFETY: Caller needs to make sure the bytes being advanced past have been initialized.
-    #[inline]
-    unsafe fn advance_mut(&mut self, cnt: usize) {
-        let len = self.len();
-        let remaining = self.remaining_mut();
-
-        if remaining < cnt {
-            panic_advance(cnt, remaining);
-        }
-
-        // Addition will not overflow since the sum is at most the capacity.
-        self.set_len(len + cnt);
-    }
-
-    #[inline]
-    fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice {
-        let cap = self.capacity();
-        let len = self.len();
-
-        // SAFETY: Since `self.ptr` is valid for `cap` bytes, `self.ptr.add(len)` must be
-        // valid for `cap - len` bytes. The subtraction will not underflow since
-        // `len <= cap`.
-        unsafe {
-            bytes::buf::UninitSlice::from_raw_parts_mut(self.as_mut_ptr().add(len), cap - len)
-        }
-    }
-}
-
-/// Panic with a nice error message.
-#[cold]
-fn panic_advance(idx: usize, len: usize) -> ! {
-    panic!(
-        "advance out of bounds: the len is {} but advancing by {}",
-        len, idx
-    );
-}
-
-/// Safety: [`AlignedBufferMut`] has exclusive ownership of the io buffer,
-/// and the underlying pointer remains stable while io-uring is owning the buffer.
-/// The tokio-epoll-uring crate itself will not resize the buffer and will respect
-/// [`tokio_epoll_uring::IoBuf::bytes_total`].
-unsafe impl<A: Alignment> tokio_epoll_uring::IoBuf for AlignedBufferMut<A> {
-    fn stable_ptr(&self) -> *const u8 {
-        self.as_ptr()
-    }
-
-    fn bytes_init(&self) -> usize {
-        self.len()
-    }
-
-    fn bytes_total(&self) -> usize {
-        self.capacity()
-    }
-}
-
-// SAFETY: See above.
-unsafe impl<A: Alignment> tokio_epoll_uring::IoBufMut for AlignedBufferMut<A> {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.as_mut_ptr()
-    }
-
-    unsafe fn set_init(&mut self, init_len: usize) {
-        if self.len() < init_len {
-            self.set_len(init_len);
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-
-    use super::*;
-
-    const ALIGN: usize = 4 * 1024;
-    type TestIoBufferMut = AlignedBufferMut<ConstAlign<ALIGN>>;
-
-    #[test]
-    fn test_with_capacity() {
-        let v = TestIoBufferMut::with_capacity(ALIGN * 4);
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), ALIGN * 4);
-        assert_eq!(v.align(), ALIGN);
-        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-
-        let v = TestIoBufferMut::with_capacity(ALIGN / 2);
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), ALIGN / 2);
-        assert_eq!(v.align(), ALIGN);
-        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-    }
-
-    #[test]
-    fn test_with_capacity_zeroed() {
-        let v = TestIoBufferMut::with_capacity_zeroed(ALIGN);
-        assert_eq!(v.len(), ALIGN);
-        assert_eq!(v.capacity(), ALIGN);
-        assert_eq!(v.align(), ALIGN);
-        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-        assert_eq!(&v[..], &[0; ALIGN])
-    }
-
-    #[test]
-    fn test_reserve() {
-        use bytes::BufMut;
-        let mut v = TestIoBufferMut::with_capacity(ALIGN);
-        let capacity = v.capacity();
-        v.reserve(capacity);
-        assert_eq!(v.capacity(), capacity);
-        let data = [b'a'; ALIGN];
-        v.put(&data[..]);
-        v.reserve(capacity);
-        assert!(v.capacity() >= capacity * 2);
-        assert_eq!(&v[..], &data[..]);
-        let capacity = v.capacity();
-        v.clear();
-        v.reserve(capacity);
-        assert_eq!(capacity, v.capacity());
-    }
-
-    #[test]
-    fn test_bytes_put() {
-        use bytes::BufMut;
-        let mut v = TestIoBufferMut::with_capacity(ALIGN * 4);
-        let x = [b'a'; ALIGN];
-
-        for _ in 0..2 {
-            for _ in 0..4 {
-                v.put(&x[..]);
-            }
-            assert_eq!(v.len(), ALIGN * 4);
-            assert_eq!(v.capacity(), ALIGN * 4);
-            assert_eq!(v.align(), ALIGN);
-            assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-            v.clear()
-        }
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), ALIGN * 4);
-        assert_eq!(v.align(), ALIGN);
-        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-    }
-
-    #[test]
-    #[should_panic]
-    fn test_bytes_put_panic() {
-        use bytes::BufMut;
-        const ALIGN: usize = 4 * 1024;
-        let mut v = TestIoBufferMut::with_capacity(ALIGN * 4);
-        let x = [b'a'; ALIGN];
-        for _ in 0..5 {
-            v.put_slice(&x[..]);
-        }
-    }
-
-    #[test]
-    fn test_io_buf_put_slice() {
-        use tokio_epoll_uring::BoundedBufMut;
-        const ALIGN: usize = 4 * 1024;
-        let mut v = TestIoBufferMut::with_capacity(ALIGN);
-        let x = [b'a'; ALIGN];
-
-        for _ in 0..2 {
-            v.put_slice(&x[..]);
-            assert_eq!(v.len(), ALIGN);
-            assert_eq!(v.capacity(), ALIGN);
-            assert_eq!(v.align(), ALIGN);
-            assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-            v.clear()
-        }
-        assert_eq!(v.len(), 0);
-        assert_eq!(v.capacity(), ALIGN);
-        assert_eq!(v.align(), ALIGN);
-        assert_eq!(v.as_ptr().align_offset(ALIGN), 0);
-    }
-}
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs
@@ -1,216 +0,0 @@
-use core::slice;
-use std::{
-    alloc::{self, Layout},
-    cmp,
-    mem::ManuallyDrop,
-};
-
-use super::alignment::{Alignment, ConstAlign};
-
-#[derive(Debug)]
-struct AlignedBufferPtr(*mut u8);
-
-// SAFETY: We gurantees no one besides `IoBufferPtr` itself has the raw pointer.
-unsafe impl Send for AlignedBufferPtr {}
-
-// SAFETY: We gurantees no one besides `IoBufferPtr` itself has the raw pointer.
-unsafe impl Sync for AlignedBufferPtr {}
-
-/// An aligned buffer type.
-#[derive(Debug)]
-pub struct RawAlignedBuffer<A: Alignment> {
-    ptr: AlignedBufferPtr,
-    capacity: usize,
-    len: usize,
-    align: A,
-}
-
-impl<const A: usize> RawAlignedBuffer<ConstAlign<A>> {
-    /// Constructs a new, empty `IoBufferMut` with at least the specified capacity and alignment.
-    ///
-    /// The buffer will be able to hold at most `capacity` elements and will never resize.
-    ///
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new capacity exceeds `isize::MAX` _bytes_, or if the following alignment requirement is not met:
-    /// * `align` must not be zero,
-    ///
-    /// * `align` must be a power of two,
-    ///
-    /// * `capacity`, when rounded up to the nearest multiple of `align`,
-    ///    must not overflow isize (i.e., the rounded value must be
-    ///    less than or equal to `isize::MAX`).
-    pub fn with_capacity(capacity: usize) -> Self {
-        let align = ConstAlign::<A>;
-        let layout = Layout::from_size_align(capacity, align.align()).expect("Invalid layout");
-
-        // SAFETY:  Making an allocation with a sized and aligned layout. The memory is manually freed with the same layout.
-        let ptr = unsafe {
-            let ptr = alloc::alloc(layout);
-            if ptr.is_null() {
-                alloc::handle_alloc_error(layout);
-            }
-            AlignedBufferPtr(ptr)
-        };
-
-        RawAlignedBuffer {
-            ptr,
-            capacity,
-            len: 0,
-            align,
-        }
-    }
-}
-
-impl<A: Alignment> RawAlignedBuffer<A> {
-    /// Returns the total number of bytes the buffer can hold.
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        self.capacity
-    }
-
-    /// Returns the alignment of the buffer.
-    #[inline]
-    pub fn align(&self) -> usize {
-        self.align.align()
-    }
-
-    /// Returns the number of bytes in the buffer, also referred to as its 'length'.
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    /// Force the length of the buffer to `new_len`.
-    #[inline]
-    pub unsafe fn set_len(&mut self, new_len: usize) {
-        debug_assert!(new_len <= self.capacity());
-        self.len = new_len;
-    }
-
-    #[inline]
-    pub fn as_ptr(&self) -> *const u8 {
-        self.ptr.0
-    }
-
-    #[inline]
-    pub fn as_mut_ptr(&mut self) -> *mut u8 {
-        self.ptr.0
-    }
-
-    /// Extracts a slice containing the entire buffer.
-    ///
-    /// Equivalent to `&s[..]`.
-    #[inline]
-    pub fn as_slice(&self) -> &[u8] {
-        // SAFETY: The pointer is valid and `len` bytes are initialized.
-        unsafe { slice::from_raw_parts(self.as_ptr(), self.len) }
-    }
-
-    /// Extracts a mutable slice of the entire buffer.
-    ///
-    /// Equivalent to `&mut s[..]`.
-    pub fn as_mut_slice(&mut self) -> &mut [u8] {
-        // SAFETY: The pointer is valid and `len` bytes are initialized.
-        unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
-    }
-
-    /// Drops the all the contents of the buffer, setting its length to `0`.
-    #[inline]
-    pub fn clear(&mut self) {
-        self.len = 0;
-    }
-
-    /// Reserves capacity for at least `additional` more bytes to be inserted
-    /// in the given `IoBufferMut`. The collection may reserve more space to
-    /// speculatively avoid frequent reallocations. After calling `reserve`,
-    /// capacity will be greater than or equal to `self.len() + additional`.
-    /// Does nothing if capacity is already sufficient.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new capacity exceeds `isize::MAX` _bytes_.
-    pub fn reserve(&mut self, additional: usize) {
-        if additional > self.capacity() - self.len() {
-            self.reserve_inner(additional);
-        }
-    }
-
-    fn reserve_inner(&mut self, additional: usize) {
-        let Some(required_cap) = self.len().checked_add(additional) else {
-            capacity_overflow()
-        };
-
-        let old_capacity = self.capacity();
-        let align = self.align();
-        // This guarantees exponential growth. The doubling cannot overflow
-        // because `cap <= isize::MAX` and the type of `cap` is `usize`.
-        let cap = cmp::max(old_capacity * 2, required_cap);
-
-        if !is_valid_alloc(cap) {
-            capacity_overflow()
-        }
-        let new_layout = Layout::from_size_align(cap, self.align()).expect("Invalid layout");
-
-        let old_ptr = self.as_mut_ptr();
-
-        // SAFETY: old allocation was allocated with std::alloc::alloc with the same layout,
-        // and we panics on null pointer.
-        let (ptr, cap) = unsafe {
-            let old_layout = Layout::from_size_align_unchecked(old_capacity, align);
-            let ptr = alloc::realloc(old_ptr, old_layout, new_layout.size());
-            if ptr.is_null() {
-                alloc::handle_alloc_error(new_layout);
-            }
-            (AlignedBufferPtr(ptr), cap)
-        };
-
-        self.ptr = ptr;
-        self.capacity = cap;
-    }
-
-    /// Shortens the buffer, keeping the first len bytes.
-    pub fn truncate(&mut self, len: usize) {
-        if len > self.len {
-            return;
-        }
-        self.len = len;
-    }
-
-    /// Consumes and leaks the `IoBufferMut`, returning a mutable reference to the contents, &'a mut [u8].
-    pub fn leak<'a>(self) -> &'a mut [u8] {
-        let mut buf = ManuallyDrop::new(self);
-        // SAFETY: leaking the buffer as intended.
-        unsafe { slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.len) }
-    }
-}
-
-fn capacity_overflow() -> ! {
-    panic!("capacity overflow")
-}
-
-// We need to guarantee the following:
-// * We don't ever allocate `> isize::MAX` byte-size objects.
-// * We don't overflow `usize::MAX` and actually allocate too little.
-//
-// On 64-bit we just need to check for overflow since trying to allocate
-// `> isize::MAX` bytes will surely fail. On 32-bit and 16-bit we need to add
-// an extra guard for this in case we're running on a platform which can use
-// all 4GB in user-space, e.g., PAE or x32.
-#[inline]
-fn is_valid_alloc(alloc_size: usize) -> bool {
-    !(usize::BITS < 64 && alloc_size > isize::MAX as usize)
-}
-
-impl<A: Alignment> Drop for RawAlignedBuffer<A> {
-    fn drop(&mut self) {
-        // SAFETY: memory was allocated with std::alloc::alloc with the same layout.
-        unsafe {
-            alloc::dealloc(
-                self.as_mut_ptr(),
-                Layout::from_size_align_unchecked(self.capacity, self.align.align()),
-            )
-        }
-    }
-}
--- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/slice.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/slice.rs
@@ -1,40 +0,0 @@
-use std::ops::{Deref, DerefMut};
-
-use super::alignment::{Alignment, ConstAlign};
-
-/// Newtype for an aligned slice.
-pub struct AlignedSlice<'a, const N: usize, A: Alignment> {
-    /// underlying byte slice
-    buf: &'a mut [u8; N],
-    /// alignment marker
-    _align: A,
-}
-
-impl<'a, const N: usize, const A: usize> AlignedSlice<'a, N, ConstAlign<A>> {
-    /// Create a new aligned slice from a mutable byte slice. The input must already satisify the alignment.
-    pub unsafe fn new_unchecked(buf: &'a mut [u8; N]) -> Self {
-        let _align = ConstAlign::<A>;
-        assert_eq!(buf.as_ptr().align_offset(_align.align()), 0);
-        AlignedSlice { buf, _align }
-    }
-}
-
-impl<'a, const N: usize, A: Alignment> Deref for AlignedSlice<'a, N, A> {
-    type Target = [u8; N];
-
-    fn deref(&self) -> &Self::Target {
-        self.buf
-    }
-}
-
-impl<'a, const N: usize, A: Alignment> DerefMut for AlignedSlice<'a, N, A> {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        self.buf
-    }
-}
-
-impl<'a, const N: usize, A: Alignment> AsRef<[u8; N]> for AlignedSlice<'a, N, A> {
-    fn as_ref(&self) -> &[u8; N] {
-        self.buf
-    }
-}
--- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_aligned.rs
@@ -1,9 +0,0 @@
-use tokio_epoll_uring::IoBufMut;
-
-use crate::virtual_file::{IoBufferMut, PageWriteGuardBuf};
-
-pub trait IoBufAlignedMut: IoBufMut {}
-
-impl IoBufAlignedMut for IoBufferMut {}
-
-impl IoBufAlignedMut for PageWriteGuardBuf {}
--- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs
@@ -1,6 +1,5 @@
 //! See [`FullSlice`].

-use crate::virtual_file::{IoBuffer, IoBufferMut};
 use bytes::{Bytes, BytesMut};
 use std::ops::{Deref, Range};
 use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};
@@ -77,5 +76,3 @@ macro_rules! impl_io_buf_ext {
 impl_io_buf_ext!(Bytes);
 impl_io_buf_ext!(BytesMut);
 impl_io_buf_ext!(Vec<u8>);
-impl_io_buf_ext!(IoBufferMut);
-impl_io_buf_ext!(IoBuffer);
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -1159,6 +1159,7 @@ impl WalIngest {
                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
                        }
                    }
+                    pg_constants::XLOG_NEON_LFC_PREWARM => {}
                    info => bail!("Unknown WAL record type for Neon RMGR: {}", info),
                }
            }
@@ -1915,9 +1916,7 @@ impl WalIngest {
            modification.put_rel_extend(rel, new_nblocks, ctx).await?;

            let mut key = rel_block_to_key(rel, blknum);
-
            // fill the gap with zeros
-            let mut gap_blocks_filled: u64 = 0;
            for gap_blknum in old_nblocks..blknum {
                key.field6 = gap_blknum;

@@ -1926,12 +1925,7 @@ impl WalIngest {
                }

                modification.put_rel_page_image_zero(rel, gap_blknum)?;
-                gap_blocks_filled += 1;
            }
-
-            WAL_INGEST
-                .gap_blocks_zeroed_on_rel_extend
-                .inc_by(gap_blocks_filled);
        }
        Ok(())
    }
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -54,7 +54,7 @@ walproposer-lib: libwalproposer.a;

 .PHONY: libwalproposer.a
 libwalproposer.a: $(WALPROP_OBJS)
-	$(RM) $@
+	rm -f $@
 	$(AR) $(AROPT) $@ $^

 # needs vars:
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -767,7 +767,7 @@ HandleDropRole(DropRoleStmt *stmt)
 		entry->type = Op_Delete;
 		entry->password = NULL;
 		if (!found)
-			memset(entry->old_name, 0, sizeof(entry->old_name));
+			memset(entry->old_name, 0, sizeof(entry));
 	}
 }

--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -22,13 +22,17 @@
 #include "neon_pgversioncompat.h"

 #include "access/parallel.h"
+#include "access/xlog.h"
+#include "access/xloginsert.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "pagestore_client.h"
+#include "file_cache.h"
 #include "common/hashfn.h"
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "postmaster/bgworker.h"
+#include "postmaster/interrupt.h"
 #include RELFILEINFO_HDR
 #include "storage/buf_internals.h"
 #include "storage/fd.h"
@@ -39,12 +43,18 @@
 #include "utils/builtins.h"
 #include "utils/dynahash.h"
 #include "utils/guc.h"
+#include "../neon_rmgr/neon_rmgr.h"

 #include "hll.h"
 #include "bitmap.h"
 #include "neon.h"
 #include "neon_perf_counters.h"

+#if PG_VERSION_NUM>=160000
+#include "access/neon_xlog.h"
+#endif
+
+
 #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)

 /*
@@ -78,29 +88,17 @@
 * before extending the nominal size of the file.
 */

-/* Local file storage allocation chunk.
- * Should be power of two. Using larger than page chunks can
- * 1. Reduce hash-map memory footprint: 8TB database contains billion pages
- *    and size of hash entry is 40 bytes, so we need 40Gb just for hash map.
- *    1Mb chunks can reduce hash map size to 320Mb.
- * 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed
- */
-#define BLOCKS_PER_CHUNK	128 /* 1Mb chunk */
-/*
- * Smaller chunk seems to be better for OLTP workload
- */
-// #define BLOCKS_PER_CHUNK	8 /* 64kb chunk */
 #define MB					((uint64)1024*1024)

 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK))
-#define CHUNK_BITMAP_SIZE ((BLOCKS_PER_CHUNK + 31) / 32)

 typedef struct FileCacheEntry
 {
 	BufferTag	key;
 	uint32		hash;
 	uint32		offset;
-	uint32		access_count;
+	uint32		access_count : 31;
+	uint32      synced : 1;
 	uint32		bitmap[CHUNK_BITMAP_SIZE];
 	dlist_node	list_node;		/* LRU/holes list node */
 } FileCacheEntry;
@@ -124,11 +122,15 @@ typedef struct FileCacheControl
 	HyperLogLogState wss_estimation; /* estimation of working set size */
 } FileCacheControl;

+#define LFC_MAX_PREWARM_SIZE      1024
+#define LFC_PREWARM_POLL_INTERVAL 1000000 /* 1 second */
+
 static HTAB *lfc_hash;
 static int	lfc_desc = 0;
 static LWLockId lfc_lock;
 static int	lfc_max_size;
 static int	lfc_size_limit;
+static int  lfc_prewarm_rate;
 static char *lfc_path;
 static FileCacheControl *lfc_ctl;
 static shmem_startup_hook_type prev_shmem_startup_hook;
@@ -374,6 +376,7 @@ lfc_change_limit_hook(int newval, void *extra)
 		hole->hash = hash;
 		hole->offset = offset;
 		hole->access_count = 0;
+		hole->synced = 0;
 		CriticalAssert(!found);
 		dlist_push_tail(&lfc_ctl->holes, &hole->list_node);

@@ -388,6 +391,26 @@ lfc_change_limit_hook(int newval, void *extra)
 	LWLockRelease(lfc_lock);
 }

+static void
+lfc_register_prewarm_worker()
+{
+#if PG_MAJORVERSION_NUM >= 16
+	BackgroundWorker bgw;
+	memset(&bgw, 0, sizeof(bgw));
+	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
+	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
+	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon");
+	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "FileCachePrewarmMain");
+	snprintf(bgw.bgw_name, BGW_MAXLEN, "LFC prewarm");
+	snprintf(bgw.bgw_type, BGW_MAXLEN, "LFC prewarm");
+	bgw.bgw_restart_time = 5;
+	bgw.bgw_notify_pid = 0;
+	bgw.bgw_main_arg = (Datum) 0;
+
+	RegisterBackgroundWorker(&bgw);
+#endif
+}
+
 void
 lfc_init(void)
 {
@@ -436,6 +459,19 @@ lfc_init(void)
 							   NULL,
 							   NULL);

+	DefineCustomIntVariable("neon.file_cache_prewarm_rate",
+							"Interval of generating prewarm WAL records",
+							NULL,
+							&lfc_prewarm_rate,
+							0,	/* disabled by default */
+							0,
+							INT_MAX,
+							PGC_SIGHUP,
+							GUC_UNIT_MS,
+							NULL,
+							NULL,
+							NULL);
+
 	if (lfc_max_size == 0)
 		return;

@@ -447,6 +483,8 @@ lfc_init(void)
 #else
 	lfc_shmem_request();
 #endif
+
+	lfc_register_prewarm_worker();
 }

 /*
@@ -693,7 +731,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);

-	/* 
+	/*
 	 * For every chunk that has blocks we're interested in, we
 	 * 1. get the chunk header
 	 * 2. Check if the chunk actually has the blocks we're interested in
@@ -961,6 +999,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			entry->hash = hash;
 			memset(entry->bitmap, 0, sizeof entry->bitmap);
 		}
+		entry->synced = false;

 		generation = lfc_ctl->generation;
 		entry_offset = entry->offset;
@@ -1013,6 +1052,57 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	}
 }

+#if PG_MAJORVERSION_NUM >= 16
+PGDLLEXPORT void
+FileCachePrewarmMain(Datum main_arg)
+{
+	pqsignal(SIGHUP, SignalHandlerForConfigReload);
+	pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
+	BackgroundWorkerUnblockSignals();
+
+	while (!ShutdownRequestPending)
+	{
+		FileCacheEntryDesc prewarm[LFC_MAX_PREWARM_SIZE];
+		size_t n_prewarm = 0;
+		dlist_iter iter;
+
+		pg_usleep(lfc_prewarm_rate ? lfc_prewarm_rate*1000 : LFC_PREWARM_POLL_INTERVAL);
+
+		CHECK_FOR_INTERRUPTS();
+
+		if (lfc_prewarm_rate == 0)
+			continue;
+
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+		/* First send most recently used entryies */
+		dlist_reverse_foreach(iter, &lfc_ctl->lru)
+		{
+			FileCacheEntry* entry = dlist_container(FileCacheEntry, list_node, iter.cur);
+			if (!entry->synced)
+			{
+				prewarm[n_prewarm].key = entry->key;
+				memcpy(prewarm[n_prewarm].bitmap, entry->bitmap, sizeof(entry->bitmap));
+				entry->synced = true;
+				if (++n_prewarm == LFC_MAX_PREWARM_SIZE)
+					break;
+			}
+		}
+		LWLockRelease(lfc_lock);
+
+		if (n_prewarm > 0)
+		{
+			XLogBeginInsert();
+			XLogRegisterData((char *) &prewarm, n_prewarm*sizeof(FileCacheEntryDesc));
+			XLogFlush(XLogInsert(RM_NEON_ID, XLOG_NEON_LFC_PREWARM));
+		}
+	}
+}
+#endif
+
+/*
+ * Admin functions
+ */
 typedef struct
 {
 	TupleDesc	tupdesc;
--- a/pgxn/neon/file_cache.h
+++ b/pgxn/neon/file_cache.h
@@ -0,0 +1,64 @@
+/*-------------------------------------------------------------------------
+ *
+ * file_cache.h
+ *
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef file_cache_h
+#define file_cache_h
+
+#include "neon_pgversioncompat.h"
+
+/* Local file storage allocation chunk.
+ * Should be power of two. Using larger than page chunks can
+ * 1. Reduce hash-map memory footprint: 8TB database contains billion pages
+ *    and size of hash entry is 40 bytes, so we need 40Gb just for hash map.
+ *    1Mb chunks can reduce hash map size to 320Mb.
+ * 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed
+ */
+#define BLOCKS_PER_CHUNK	128 /* 1Mb chunk */
+#define CHUNK_BITMAP_SIZE   ((BLOCKS_PER_CHUNK + 31) / 32)
+
+typedef struct
+{
+	BufferTag	key;
+	uint32		bitmap[CHUNK_BITMAP_SIZE];
+} FileCacheEntryDesc;
+
+PGDLLEXPORT void FileCachePrewarmMain(Datum main_arg);
+
+extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
+					   BlockNumber blkno, const void *const *buffers,
+					   BlockNumber nblocks);
+/* returns number of blocks read, with one bit set in *read for each  */
+extern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,
+							BlockNumber blkno, void **buffers,
+							BlockNumber nblocks, bits8 *mask);
+
+extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
+							   BlockNumber blkno);
+extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
+							   BlockNumber blkno, int nblocks, bits8 *bitmap);
+extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
+extern void lfc_init(void);
+
+static inline bool
+lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		 void *buffer)
+{
+	bits8		rv = 0;
+	return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
+}
+
+static inline void
+lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		  const void *buffer)
+{
+	return lfc_writev(rinfo, forkNum, blkno, &buffer, 1);
+}
+
+#endif
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -33,6 +33,7 @@
 #include "neon_perf_counters.h"
 #include "neon_utils.h"
 #include "pagestore_client.h"
+#include "file_cache.h"
 #include "walproposer.h"

 #define PageStoreTrace DEBUG5
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -261,35 +261,5 @@ extern void set_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumb
 extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber size);
 extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);

-/* functions for local file cache */
-extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
-					   BlockNumber blkno, const void *const *buffers,
-					   BlockNumber nblocks);
-/* returns number of blocks read, with one bit set in *read for each  */
-extern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,
-							BlockNumber blkno, void **buffers,
-							BlockNumber nblocks, bits8 *mask);
-
-extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
-							   BlockNumber blkno);
-extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
-							   BlockNumber blkno, int nblocks, bits8 *bitmap);
-extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
-extern void lfc_init(void);
-
-static inline bool
-lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-		 void *buffer)
-{
-	bits8		rv = 0;
-	return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
-}
-
-static inline void
-lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-		  const void *buffer)
-{
-	return lfc_writev(rinfo, forkNum, blkno, &buffer, 1);
-}

 #endif
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -68,6 +68,7 @@

 #include "neon_perf_counters.h"
 #include "pagestore_client.h"
+#include "file_cache.h"
 #include "bitmap.h"

 #if PG_VERSION_NUM >= 150000
--- a/pgxn/neon_rmgr/neon_rmgr.c
+++ b/pgxn/neon_rmgr/neon_rmgr.c
@@ -13,9 +13,12 @@
 #include "miscadmin.h"
 #include "storage/buf.h"
 #include "storage/bufmgr.h"
+#include "storage/buf_internals.h"
 #include "storage/bufpage.h"
 #include "storage/freespace.h"
 #include "neon_rmgr.h"
+#include "../neon/file_cache.h"
+#include "../neon/neon_pgversioncompat.h"

 PG_MODULE_MAGIC;
 void		_PG_init(void);
@@ -30,6 +33,7 @@ static void redo_neon_heap_delete(XLogReaderState *record);
 static void redo_neon_heap_update(XLogReaderState *record, bool hot_update);
 static void redo_neon_heap_lock(XLogReaderState *record);
 static void redo_neon_heap_multi_insert(XLogReaderState *record);
+static void redo_neon_lfc_prewarm(XLogReaderState *record);

 const static RmgrData NeonRmgr = {
 	.rm_name = "neon",
@@ -76,6 +80,9 @@ neon_rm_redo(XLogReaderState *record)
 		case XLOG_NEON_HEAP_MULTI_INSERT:
 			redo_neon_heap_multi_insert(record);
 			break;
+		case XLOG_NEON_LFC_PREWARM:
+			redo_neon_lfc_prewarm(record);
+			break;
 		default:
 			elog(PANIC, "neon_rm_redo: unknown op code %u", info);
 	}
@@ -882,6 +889,28 @@ redo_neon_heap_multi_insert(XLogReaderState *record)
 		XLogRecordPageWithFreeSpace(rlocator, blkno, freespace);
 }

+static void
+redo_neon_lfc_prewarm(XLogReaderState *record)
+{
+	FileCacheEntryDesc* entries = (FileCacheEntryDesc*)XLogRecGetData(record);
+	size_t n_entries = XLogRecGetDataLen(record)/sizeof(FileCacheEntryDesc);
+	char buf[BLCKSZ];
+
+	for (size_t i = 0; i < n_entries; i++)
+	{
+		FileCacheEntryDesc* entry = &entries[i];
+		NRelFileInfo rinfo = BufTagGetNRelFileInfo(entry->key);
+		SMgrRelation reln = smgropen(rinfo, INVALID_PROC_NUMBER, RELPERSISTENCE_PERMANENT);
+		for (size_t j = 0; j < CHUNK_BITMAP_SIZE; j++)
+		{
+			if (entry->bitmap[j >> 5] & (1 << (i & 31)))
+			{
+				smgrread(reln, entry->key.forkNum, entry->key.blockNum + j, buf);
+			}
+		}
+	}
+}
+
 #else
 /* safeguard for older PostgreSQL versions */
 PG_MODULE_MAGIC;
--- a/pgxn/neon_rmgr/neon_rmgr.h
+++ b/pgxn/neon_rmgr/neon_rmgr.h
@@ -5,6 +5,8 @@
 #include "replication/decode.h"
 #include "replication/logical.h"

+#define XLOG_NEON_LFC_PREWARM	0x60
+
 extern void neon_rm_desc(StringInfo buf, XLogReaderState *record);
 extern void neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
 extern const char *neon_rm_identify(uint8 info);
--- a/pgxn/neon_rmgr/neon_rmgr_decode.c
+++ b/pgxn/neon_rmgr/neon_rmgr_decode.c
@@ -456,6 +456,8 @@ neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
 			if (SnapBuildProcessChange(builder, xid, buf->origptr))
 				DecodeNeonMultiInsert(ctx, buf);
 			break;
+		case XLOG_NEON_LFC_PREWARM:
+			break;
 		default:
 			elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
 			break;
--- a/pgxn/neon_rmgr/neon_rmgr_desc.c
+++ b/pgxn/neon_rmgr/neon_rmgr_desc.c
@@ -113,6 +113,10 @@ neon_rm_desc(StringInfo buf, XLogReaderState *record)
 					   xlrec->ntuples, &offset_elem_desc, NULL);
 		}
 	}
+	else if (info == XLOG_NEON_LFC_PREWARM)
+	{
+		appendStringInfo(buf, "%d chunks", XLogRecGetDataLen(record));
+	}
 }

 const char *
@@ -152,6 +156,9 @@ neon_rm_identify(uint8 info)
 		case XLOG_NEON_HEAP_MULTI_INSERT | XLOG_NEON_INIT_PAGE:
 			id = "MULTI_INSERT+INIT";
 			break;
+		case XLOG_NEON_LFC_PREWARM:
+			id = "LFC_PREWARM";
+			break;
 	}

 	return id;
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -16,7 +16,7 @@ use crate::context::RequestMonitoring;
 use crate::control_plane::errors::GetEndpointJwksError;
 use crate::http::parse_json_body_with_limit;
 use crate::intern::RoleNameInt;
-use crate::types::{EndpointId, RoleName};
+use crate::{EndpointId, RoleName};

 // TODO(conrad): make these configurable.
 const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
@@ -669,7 +669,7 @@ mod tests {
    use tokio::net::TcpListener;

    use super::*;
-    use crate::types::RoleName;
+    use crate::RoleName;

    fn new_ec_jwk(kid: String) -> (p256::SecretKey, jose_jwk::Jwk) {
        let sk = p256::SecretKey::random(&mut OsRng);
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -10,10 +10,9 @@ use crate::compute_ctl::ComputeCtlApi;
 use crate::context::RequestMonitoring;
 use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, MetricsAuxInfo};
 use crate::control_plane::NodeInfo;
-use crate::http;
 use crate::intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag};
-use crate::types::EndpointId;
 use crate::url::ApiUrl;
+use crate::{http, EndpointId};

 pub struct LocalBackend {
    pub(crate) initialize: Semaphore,
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -21,7 +21,10 @@ use crate::auth::{self, validate_password_and_exchange, AuthError, ComputeUserIn
 use crate::cache::Cached;
 use crate::config::AuthenticationConfig;
 use crate::context::RequestMonitoring;
-use crate::control_plane::provider::{CachedNodeInfo, ControlPlaneBackend};
+use crate::control_plane::errors::GetAuthInfoError;
+use crate::control_plane::provider::{
+    CachedAllowedIps, CachedNodeInfo, CachedRoleSecret, ControlPlaneBackend,
+};
 use crate::control_plane::{self, Api, AuthSecret};
 use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
@@ -29,21 +32,39 @@ use crate::proxy::connect_compute::ComputeConnectBackend;
 use crate::proxy::NeonOptions;
 use crate::rate_limiter::{BucketRateLimiter, EndpointRateLimiter, RateBucketInfo};
 use crate::stream::Stream;
-use crate::types::{EndpointCacheKey, EndpointId, RoleName};
-use crate::{scram, stream};
+use crate::{scram, stream, EndpointCacheKey, EndpointId, RoleName};

-/// The [crate::serverless] module can authenticate either using control-plane
-/// to get authentication state, or by using JWKs stored in the filesystem.
-#[derive(Clone, Copy)]
-pub enum ServerlessBackend<'a> {
-    /// Cloud API (V2).
-    ControlPlane(&'a ControlPlaneBackend),
-    /// Local proxy uses configured auth credentials and does not wake compute
-    Local(&'a LocalBackend),
+/// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality
+pub enum MaybeOwned<'a, T> {
+    Owned(T),
+    Borrowed(&'a T),
 }

-#[cfg(test)]
-use crate::control_plane::provider::{CachedAllowedIps, CachedRoleSecret};
+impl<T> std::ops::Deref for MaybeOwned<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            MaybeOwned::Owned(t) => t,
+            MaybeOwned::Borrowed(t) => t,
+        }
+    }
+}
+
+/// This type serves two purposes:
+///
+/// * When `T` is `()`, it's just a regular auth backend selector
+///   which we use in [`crate::config::ProxyConfig`].
+///
+/// * However, when we substitute `T` with [`ComputeUserInfoMaybeEndpoint`],
+///   this helps us provide the credentials only to those auth
+///   backends which require them for the authentication process.
+pub enum Backend<'a, T> {
+    /// Cloud API (V2).
+    ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T),
+    /// Local proxy uses configured auth credentials and does not wake compute
+    Local(MaybeOwned<'a, LocalBackend>),
+}

 #[cfg(test)]
 pub(crate) trait TestBackend: Send + Sync + 'static {
@@ -61,20 +82,56 @@ impl Clone for Box<dyn TestBackend> {
    }
 }

-impl std::fmt::Display for ControlPlaneBackend {
+impl std::fmt::Display for Backend<'_, ()> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            ControlPlaneBackend::Management(endpoint) => fmt
-                .debug_tuple("ControlPlane::Management")
-                .field(&endpoint.url())
-                .finish(),
-            #[cfg(any(test, feature = "testing"))]
-            ControlPlaneBackend::PostgresMock(endpoint) => fmt
-                .debug_tuple("ControlPlane::PostgresMock")
-                .field(&endpoint.url())
-                .finish(),
-            #[cfg(test)]
-            ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
+            Self::ControlPlane(api, ()) => match &**api {
+                ControlPlaneBackend::Management(endpoint) => fmt
+                    .debug_tuple("ControlPlane::Management")
+                    .field(&endpoint.url())
+                    .finish(),
+                #[cfg(any(test, feature = "testing"))]
+                ControlPlaneBackend::PostgresMock(endpoint) => fmt
+                    .debug_tuple("ControlPlane::PostgresMock")
+                    .field(&endpoint.url())
+                    .finish(),
+                #[cfg(test)]
+                ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
+            },
+            Self::Local(_) => fmt.debug_tuple("Local").finish(),
+        }
+    }
+}
+
+impl<T> Backend<'_, T> {
+    /// Very similar to [`std::option::Option::as_ref`].
+    /// This helps us pass structured config to async tasks.
+    pub(crate) fn as_ref(&self) -> Backend<'_, &T> {
+        match self {
+            Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),
+            Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
+        }
+    }
+}
+
+impl<'a, T> Backend<'a, T> {
+    /// Very similar to [`std::option::Option::map`].
+    /// Maps [`Backend<T>`] to [`Backend<R>`] by applying
+    /// a function to a contained value.
+    pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R> {
+        match self {
+            Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),
+            Self::Local(l) => Backend::Local(l),
+        }
+    }
+}
+impl<'a, T, E> Backend<'a, Result<T, E>> {
+    /// Very similar to [`std::option::Option::transpose`].
+    /// This is most useful for error handling.
+    pub(crate) fn transpose(self) -> Result<Backend<'a, T>, E> {
+        match self {
+            Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
+            Self::Local(l) => Ok(Backend::Local(l)),
        }
    }
 }
@@ -341,79 +398,96 @@ async fn authenticate_with_secret(
    classic::authenticate(ctx, info, client, config, secret).await
 }

-impl ControlPlaneBackend {
+impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint> {
+    /// Get username from the credentials.
+    pub(crate) fn get_user(&self) -> &str {
+        match self {
+            Self::ControlPlane(_, user_info) => &user_info.user,
+            Self::Local(_) => "local",
+        }
+    }
+
+    /// Authenticate the client via the requested backend, possibly using credentials.
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub(crate) async fn authenticate(
-        &self,
+        self,
        ctx: &RequestMonitoring,
-        user_info: ComputeUserInfoMaybeEndpoint,
        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<ControlPlaneComputeBackend> {
-        info!(
-            user = &*user_info.user,
-            project = user_info.endpoint(),
-            "performing authentication using the console"
-        );
+    ) -> auth::Result<Backend<'a, ComputeCredentials>> {
+        let res = match self {
+            Self::ControlPlane(api, user_info) => {
+                info!(
+                    user = &*user_info.user,
+                    project = user_info.endpoint(),
+                    "performing authentication using the console"
+                );

-        let credentials = auth_quirks(
-            ctx,
-            self,
-            user_info,
-            client,
-            allow_cleartext,
-            config,
-            endpoint_rate_limiter,
-        )
-        .await?;
+                let credentials = auth_quirks(
+                    ctx,
+                    &*api,
+                    user_info,
+                    client,
+                    allow_cleartext,
+                    config,
+                    endpoint_rate_limiter,
+                )
+                .await?;
+                Backend::ControlPlane(api, credentials)
+            }
+            Self::Local(_) => {
+                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
+            }
+        };

        info!("user successfully authenticated");
-        Ok(ControlPlaneComputeBackend {
-            api: self,
-            creds: credentials,
-        })
-    }
-
-    pub(crate) fn attach_to_credentials(
-        &self,
-        creds: ComputeCredentials,
-    ) -> ControlPlaneComputeBackend {
-        ControlPlaneComputeBackend { api: self, creds }
+        Ok(res)
    }
 }

-pub struct ControlPlaneComputeBackend<'a> {
-    api: &'a ControlPlaneBackend,
-    creds: ComputeCredentials,
+impl Backend<'_, ComputeUserInfo> {
+    pub(crate) async fn get_role_secret(
+        &self,
+        ctx: &RequestMonitoring,
+    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
+            Self::Local(_) => Ok(Cached::new_uncached(None)),
+        }
+    }
+
+    pub(crate) async fn get_allowed_ips_and_secret(
+        &self,
+        ctx: &RequestMonitoring,
+    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => {
+                api.get_allowed_ips_and_secret(ctx, user_info).await
+            }
+            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+        }
+    }
 }

 #[async_trait::async_trait]
-impl ComputeConnectBackend for ControlPlaneComputeBackend<'static> {
+impl ComputeConnectBackend for Backend<'_, ComputeCredentials> {
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
-        self.api.wake_compute(ctx, &self.creds.info).await
+        match self {
+            Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
+            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
+        }
    }

    fn get_keys(&self) -> &ComputeCredentialKeys {
-        &self.creds.keys
-    }
-}
-
-#[async_trait::async_trait]
-impl ComputeConnectBackend for LocalBackend {
-    async fn wake_compute(
-        &self,
-        _ctx: &RequestMonitoring,
-    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
-        Ok(Cached::new_uncached(self.node_info.clone()))
-    }
-
-    fn get_keys(&self) -> &ComputeCredentialKeys {
-        &ComputeCredentialKeys::None
+        match self {
+            Self::ControlPlane(_, creds) => &creds.keys,
+            Self::Local(_) => &ComputeCredentialKeys::None,
+        }
    }
 }

@@ -477,7 +551,7 @@ mod tests {
        async fn get_endpoint_jwks(
            &self,
            _ctx: &RequestMonitoring,
-            _endpoint: crate::types::EndpointId,
+            _endpoint: crate::EndpointId,
        ) -> Result<Vec<super::jwt::AuthRule>, control_plane::errors::GetEndpointJwksError>
        {
            unimplemented!()
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -15,7 +15,7 @@ use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, SniKind};
 use crate::proxy::NeonOptions;
 use crate::serverless::SERVERLESS_DRIVER_SNI;
-use crate::types::{EndpointId, RoleName};
+use crate::{EndpointId, RoleName};

 #[derive(Debug, Error, PartialEq, Eq, Clone)]
 pub(crate) enum ComputeUserInfoParseError {
--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -1,7 +1,7 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::ServerlessBackend;
+pub use backend::Backend;

 mod credentials;
 pub(crate) use credentials::{
--- a/proxy/src/auth/password_hack.rs
+++ b/proxy/src/auth/password_hack.rs
@@ -5,7 +5,7 @@

 use bstr::ByteSlice;

-use crate::types::EndpointId;
+use crate::EndpointId;

 pub(crate) struct PasswordHackPayload {
    pub(crate) endpoint: EndpointId,
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -25,8 +25,8 @@ use proxy::rate_limiter::{
 use proxy::scram::threadpool::ThreadPool;
 use proxy::serverless::cancel_set::CancelSet;
 use proxy::serverless::{self, GlobalConnPoolOptions};
-use proxy::types::RoleName;
 use proxy::url::ApiUrl;
+use proxy::RoleName;

 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
@@ -177,7 +177,7 @@ async fn main() -> anyhow::Result<()> {
    let mut maintenance_tasks = JoinSet::new();

    let refresh_config_notify = Arc::new(Notify::new());
-    maintenance_tasks.spawn(proxy::signals::handle(shutdown.clone(), {
+    maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), {
        let refresh_config_notify = Arc::clone(&refresh_config_notify);
        move || {
            refresh_config_notify.notify_one();
@@ -203,7 +203,7 @@ async fn main() -> anyhow::Result<()> {

    let task = serverless::task_main(
        config,
-        auth::ServerlessBackend::Local(auth_backend),
+        auth_backend,
        http_listener,
        shutdown.clone(),
        Arc::new(CancellationHandlerMain::new(
@@ -216,7 +216,7 @@ async fn main() -> anyhow::Result<()> {

    match futures::future::select(pin!(maintenance_tasks.join_next()), pin!(task)).await {
        // exit immediately on maintenance task completion
-        Either::Left((Some(res), _)) => match proxy::error::flatten_err(res)? {},
+        Either::Left((Some(res), _)) => match proxy::flatten_err(res)? {},
        // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
        Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
        // exit immediately on client task error
@@ -295,8 +295,12 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
 }

 /// auth::Backend is created at proxy startup, and lives forever.
-fn build_auth_backend(args: &LocalProxyCliArgs) -> anyhow::Result<&'static LocalBackend> {
-    let auth_backend = LocalBackend::new(args.postgres, args.compute_ctl.clone());
+fn build_auth_backend(
+    args: &LocalProxyCliArgs,
+) -> anyhow::Result<&'static auth::Backend<'static, ()>> {
+    let auth_backend = proxy::auth::Backend::Local(proxy::auth::backend::MaybeOwned::Owned(
+        LocalBackend::new(args.postgres, args.compute_ctl.clone()),
+    ));

    Ok(Box::leak(Box::new(auth_backend)))
 }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -133,14 +133,14 @@ async fn main() -> anyhow::Result<()> {
        proxy_listener,
        cancellation_token.clone(),
    ));
-    let signals_task = tokio::spawn(proxy::signals::handle(cancellation_token, || {}));
+    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || {}));

    // the signal task cant ever succeed.
    // the main task can error, or can succeed on cancellation.
    // we want to immediately exit on either of these cases
    let signal = match futures::future::select(signals_task, main).await {
-        Either::Left((res, _)) => proxy::error::flatten_err(res)?,
-        Either::Right((res, _)) => return proxy::error::flatten_err(res),
+        Either::Left((res, _)) => proxy::flatten_err(res)?,
+        Either::Right((res, _)) => return proxy::flatten_err(res),
    };

    // maintenance tasks return `Infallible` success values, this is an impossible value
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -13,14 +13,13 @@ use aws_config::web_identity_token::WebIdentityTokenCredentialsProvider;
 use aws_config::Region;
 use futures::future::Either;
 use proxy::auth::backend::jwt::JwkCache;
-use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend};
+use proxy::auth::backend::{AuthRateLimiter, ConsoleRedirectBackend, MaybeOwned};
 use proxy::cancellation::{CancelMap, CancellationHandler};
 use proxy::config::{
    self, remote_storage_from_toml, AuthenticationConfig, CacheOptions, HttpConfig,
    ProjectInfoCacheOptions, ProxyConfig, ProxyProtocolV2,
 };
 use proxy::context::parquet::ParquetUploadArgs;
-use proxy::control_plane::provider::ControlPlaneBackend;
 use proxy::http::health_server::AppMetrics;
 use proxy::metrics::Metrics;
 use proxy::rate_limiter::{
@@ -468,7 +467,7 @@ async fn main() -> anyhow::Result<()> {
            if let Some(serverless_listener) = serverless_listener {
                client_tasks.spawn(serverless::task_main(
                    config,
-                    auth::ServerlessBackend::ControlPlane(auth_backend),
+                    auth_backend,
                    serverless_listener,
                    cancellation_token.clone(),
                    cancellation_handler.clone(),
@@ -496,7 +495,7 @@ async fn main() -> anyhow::Result<()> {

    // maintenance tasks. these never return unless there's an error
    let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::signals::handle(cancellation_token.clone(), || {}));
+    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone(), || {}));
    maintenance_tasks.spawn(http::health_server::task_main(
        http_listener,
        AppMetrics {
@@ -516,38 +515,40 @@ async fn main() -> anyhow::Result<()> {
        ));
    }

-    if let Either::Left(ControlPlaneBackend::Management(api)) = &auth_backend {
-        match (redis_notifications_client, regional_redis_client.clone()) {
-            (None, None) => {}
-            (client1, client2) => {
-                let cache = api.caches.project_info.clone();
-                if let Some(client) = client1 {
-                    maintenance_tasks.spawn(notifications::task_main(
-                        client,
-                        cache.clone(),
-                        cancel_map.clone(),
-                        args.region.clone(),
-                    ));
+    if let Either::Left(auth::Backend::ControlPlane(api, _)) = &auth_backend {
+        if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api {
+            match (redis_notifications_client, regional_redis_client.clone()) {
+                (None, None) => {}
+                (client1, client2) => {
+                    let cache = api.caches.project_info.clone();
+                    if let Some(client) = client1 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            cancel_map.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    if let Some(client) = client2 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            cancel_map.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
                }
-                if let Some(client) = client2 {
-                    maintenance_tasks.spawn(notifications::task_main(
-                        client,
-                        cache.clone(),
-                        cancel_map.clone(),
-                        args.region.clone(),
-                    ));
-                }
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
            }
-        }
-        if let Some(regional_redis_client) = regional_redis_client {
-            let cache = api.caches.endpoints_cache.clone();
-            let con = regional_redis_client;
-            let span = tracing::info_span!("endpoints_cache");
-            maintenance_tasks.spawn(
-                async move { cache.do_read(con, cancellation_token.clone()).await }
-                    .instrument(span),
-            );
+            if let Some(regional_redis_client) = regional_redis_client {
+                let cache = api.caches.endpoints_cache.clone();
+                let con = regional_redis_client;
+                let span = tracing::info_span!("endpoints_cache");
+                maintenance_tasks.spawn(
+                    async move { cache.do_read(con, cancellation_token.clone()).await }
+                        .instrument(span),
+                );
+            }
        }
    }

@@ -560,11 +561,11 @@ async fn main() -> anyhow::Result<()> {
        .await
        {
            // exit immediately on maintenance task completion
-            Either::Left((Some(res), _)) => break proxy::error::flatten_err(res)?,
+            Either::Left((Some(res), _)) => break proxy::flatten_err(res)?,
            // exit with error immediately if all maintenance tasks have ceased (should be caught by branch above)
            Either::Left((None, _)) => bail!("no maintenance tasks running. invalid state"),
            // exit immediately on client task error
-            Either::Right((Some(res), _)) => proxy::error::flatten_err(res)?,
+            Either::Right((Some(res), _)) => proxy::flatten_err(res)?,
            // exit if all our client tasks have shutdown gracefully
            Either::Right((None, _)) => return Ok(()),
        }
@@ -693,7 +694,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
 /// auth::Backend is created at proxy startup, and lives forever.
 fn build_auth_backend(
    args: &ProxyCliArgs,
-) -> anyhow::Result<Either<&'static ControlPlaneBackend, &'static ConsoleRedirectBackend>> {
+) -> anyhow::Result<Either<&'static auth::Backend<'static, ()>, &'static ConsoleRedirectBackend>> {
    match &args.auth_backend {
        AuthBackendType::Console => {
            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
@@ -743,7 +744,8 @@ fn build_auth_backend(
                locks,
                wake_compute_endpoint_rate_limiter,
            );
-            let auth_backend = control_plane::provider::ControlPlaneBackend::Management(api);
+            let api = control_plane::provider::ControlPlaneBackend::Management(api);
+            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());

            let config = Box::leak(Box::new(auth_backend));

@@ -754,7 +756,9 @@ fn build_auth_backend(
        AuthBackendType::Postgres => {
            let url = args.auth_endpoint.parse()?;
            let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy);
-            let auth_backend = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
+            let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
+
+            let auth_backend = auth::Backend::ControlPlane(MaybeOwned::Owned(api), ());

            let config = Box::leak(Box::new(auth_backend));

--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -17,7 +17,7 @@ use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
 use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
 use crate::rate_limiter::GlobalRateLimiter;
 use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
-use crate::types::EndpointId;
+use crate::EndpointId;

 #[derive(Deserialize, Debug, Clone)]
 pub(crate) struct ControlPlaneEventKey {
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -17,7 +17,7 @@ use crate::auth::IpPattern;
 use crate::config::ProjectInfoCacheOptions;
 use crate::control_plane::AuthSecret;
 use crate::intern::{EndpointIdInt, ProjectIdInt, RoleNameInt};
-use crate::types::{EndpointId, RoleName};
+use crate::{EndpointId, RoleName};

 #[async_trait]
 pub(crate) trait ProjectInfoCache {
@@ -368,7 +368,7 @@ impl Cache for ProjectInfoCacheImpl {
 mod tests {
    use super::*;
    use crate::scram::ServerSecret;
-    use crate::types::ProjectId;
+    use crate::ProjectId;

    #[tokio::test]
    async fn test_project_info_cache_settings() {
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -25,7 +25,7 @@ use crate::control_plane::provider::ApiLockError;
 use crate::error::{ReportableError, UserFacingError};
 use crate::metrics::{Metrics, NumDbConnectionsGuard};
 use crate::proxy::neon_option;
-use crate::types::Host;
+use crate::Host;

 pub const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";

--- a/proxy/src/compute_ctl/mod.rs
+++ b/proxy/src/compute_ctl/mod.rs
@@ -4,9 +4,8 @@ use serde::de::DeserializeOwned;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;

-use crate::http;
-use crate::types::{DbName, RoleName};
 use crate::url::ApiUrl;
+use crate::{http, DbName, RoleName};

 pub struct ComputeCtlApi {
    pub(crate) api: http::Endpoint,
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -20,7 +20,7 @@ use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig}
 use crate::scram::threadpool::ThreadPool;
 use crate::serverless::cancel_set::CancelSet;
 use crate::serverless::GlobalConnPoolOptions;
-use crate::types::Host;
+use crate::Host;

 pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -19,7 +19,7 @@ use crate::intern::{BranchIdInt, ProjectIdInt};
 use crate::metrics::{
    ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting,
 };
-use crate::types::{DbName, EndpointId, RoleName};
+use crate::{DbName, EndpointId, RoleName};

 pub mod parquet;

--- a/proxy/src/control_plane/messages.rs
+++ b/proxy/src/control_plane/messages.rs
@@ -161,9 +161,6 @@ pub(crate) enum Reason {
    /// LockAlreadyTaken indicates that the we attempted to take a lock that was already taken.
    #[serde(rename = "LOCK_ALREADY_TAKEN")]
    LockAlreadyTaken,
-    /// ActiveEndpointsLimitExceeded indicates that the limit of concurrently active endpoints was exceeded.
-    #[serde(rename = "ACTIVE_ENDPOINTS_LIMIT_EXCEEDED")]
-    ActiveEndpointsLimitExceeded,
    #[default]
    #[serde(other)]
    Unknown,
@@ -197,8 +194,7 @@ impl Reason {
            | Reason::ComputeTimeQuotaExceeded
            | Reason::WrittenDataQuotaExceeded
            | Reason::DataTransferQuotaExceeded
-            | Reason::LogicalSizeQuotaExceeded
-            | Reason::ActiveEndpointsLimitExceeded => false,
+            | Reason::LogicalSizeQuotaExceeded => false,
            // transitive error. control plane is currently busy
            // but might be ready soon
            Reason::RunningOperations
--- a/proxy/src/control_plane/provider/mock.rs
+++ b/proxy/src/control_plane/provider/mock.rs
@@ -21,9 +21,8 @@ use crate::control_plane::messages::MetricsAuxInfo;
 use crate::control_plane::provider::{CachedAllowedIps, CachedRoleSecret};
 use crate::error::io_error;
 use crate::intern::RoleNameInt;
-use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
 use crate::url::ApiUrl;
-use crate::{compute, scram};
+use crate::{compute, scram, BranchId, EndpointId, ProjectId, RoleName};

 #[derive(Debug, Error)]
 enum MockApiError {
--- a/proxy/src/control_plane/provider/mod.rs
+++ b/proxy/src/control_plane/provider/mod.rs
@@ -23,8 +23,7 @@ use crate::error::ReportableError;
 use crate::intern::ProjectIdInt;
 use crate::metrics::ApiLockMetrics;
 use crate::rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token};
-use crate::types::{EndpointCacheKey, EndpointId};
-use crate::{compute, scram};
+use crate::{compute, scram, EndpointCacheKey, EndpointId};

 pub(crate) mod errors {
    use thiserror::Error;
@@ -88,8 +87,36 @@ pub(crate) mod errors {
                    Reason::ConcurrencyLimitReached => ErrorKind::ControlPlane,
                    Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
                    Reason::RunningOperations => ErrorKind::ControlPlane,
-                    Reason::ActiveEndpointsLimitExceeded => ErrorKind::ControlPlane,
-                    Reason::Unknown => ErrorKind::ControlPlane,
+                    Reason::Unknown => match &**e {
+                        ControlPlaneError {
+                            http_status_code:
+                                http::StatusCode::NOT_FOUND | http::StatusCode::NOT_ACCEPTABLE,
+                            ..
+                        } => crate::error::ErrorKind::User,
+                        ControlPlaneError {
+                            http_status_code: http::StatusCode::UNPROCESSABLE_ENTITY,
+                            error,
+                            ..
+                        } if error
+                            .contains("compute time quota of non-primary branches is exceeded") =>
+                        {
+                            crate::error::ErrorKind::Quota
+                        }
+                        ControlPlaneError {
+                            http_status_code: http::StatusCode::LOCKED,
+                            error,
+                            ..
+                        } if error.contains("quota exceeded")
+                            || error.contains("the limit for current plan reached") =>
+                        {
+                            crate::error::ErrorKind::Quota
+                        }
+                        ControlPlaneError {
+                            http_status_code: http::StatusCode::TOO_MANY_REQUESTS,
+                            ..
+                        } => crate::error::ErrorKind::ServiceRateLimit,
+                        ControlPlaneError { .. } => crate::error::ErrorKind::ControlPlane,
+                    },
                },
                ApiError::Transport(_) => crate::error::ErrorKind::ControlPlane,
            }
--- a/proxy/src/control_plane/provider/neon.rs
+++ b/proxy/src/control_plane/provider/neon.rs
@@ -24,8 +24,7 @@ use crate::control_plane::errors::GetEndpointJwksError;
 use crate::control_plane::messages::{ColdStartInfo, EndpointJwksResponse, Reason};
 use crate::metrics::{CacheOutcome, Metrics};
 use crate::rate_limiter::WakeComputeRateLimiter;
-use crate::types::{EndpointCacheKey, EndpointId};
-use crate::{compute, http, scram};
+use crate::{compute, http, scram, EndpointCacheKey, EndpointId};

 const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");

--- a/proxy/src/error.rs
+++ b/proxy/src/error.rs
@@ -1,9 +1,7 @@
 use std::error::Error as StdError;
 use std::{fmt, io};

-use anyhow::Context;
 use measured::FixedCardinalityLabel;
-use tokio::task::JoinError;

 /// Upcast (almost) any error into an opaque [`io::Error`].
 pub(crate) fn io_error(e: impl Into<Box<dyn StdError + Send + Sync>>) -> io::Error {
@@ -99,8 +97,3 @@ impl ReportableError for tokio_postgres::error::Error {
        }
    }
 }
-
-/// Flattens `Result<Result<T>>` into `Result<T>`.
-pub fn flatten_err<T>(r: Result<anyhow::Result<T>, JoinError>) -> anyhow::Result<T> {
-    r.context("join error").and_then(|x| x)
-}
--- a/proxy/src/intern.rs
+++ b/proxy/src/intern.rs
@@ -7,7 +7,7 @@ use std::sync::OnceLock;
 use lasso::{Capacity, MemoryLimits, Spur, ThreadedRodeo};
 use rustc_hash::FxHasher;

-use crate::types::{BranchId, EndpointId, ProjectId, RoleName};
+use crate::{BranchId, EndpointId, ProjectId, RoleName};

 pub trait InternId: Sized + 'static {
    fn get_interner() -> &'static StringInterner<Self>;
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -78,6 +78,14 @@
 // List of temporarily allowed lints to unblock beta/nightly.
 #![allow(unknown_lints)]

+use std::convert::Infallible;
+
+use anyhow::{bail, Context};
+use intern::{EndpointIdInt, EndpointIdTag, InternId};
+use tokio::task::JoinError;
+use tokio_util::sync::CancellationToken;
+use tracing::warn;
+
 pub mod auth;
 pub mod cache;
 pub mod cancellation;
@@ -101,9 +109,165 @@ pub mod redis;
 pub mod sasl;
 pub mod scram;
 pub mod serverless;
-pub mod signals;
 pub mod stream;
-pub mod types;
 pub mod url;
 pub mod usage_metrics;
 pub mod waiters;
+
+/// Handle unix signals appropriately.
+pub async fn handle_signals<F>(
+    token: CancellationToken,
+    mut refresh_config: F,
+) -> anyhow::Result<Infallible>
+where
+    F: FnMut(),
+{
+    use tokio::signal::unix::{signal, SignalKind};
+
+    let mut hangup = signal(SignalKind::hangup())?;
+    let mut interrupt = signal(SignalKind::interrupt())?;
+    let mut terminate = signal(SignalKind::terminate())?;
+
+    loop {
+        tokio::select! {
+            // Hangup is commonly used for config reload.
+            _ = hangup.recv() => {
+                warn!("received SIGHUP");
+                refresh_config();
+            }
+            // Shut down the whole application.
+            _ = interrupt.recv() => {
+                warn!("received SIGINT, exiting immediately");
+                bail!("interrupted");
+            }
+            _ = terminate.recv() => {
+                warn!("received SIGTERM, shutting down once all existing connections have closed");
+                token.cancel();
+            }
+        }
+    }
+}
+
+/// Flattens `Result<Result<T>>` into `Result<T>`.
+pub fn flatten_err<T>(r: Result<anyhow::Result<T>, JoinError>) -> anyhow::Result<T> {
+    r.context("join error").and_then(|x| x)
+}
+
+macro_rules! smol_str_wrapper {
+    ($name:ident) => {
+        #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
+        pub struct $name(smol_str::SmolStr);
+
+        impl $name {
+            #[allow(unused)]
+            pub(crate) fn as_str(&self) -> &str {
+                self.0.as_str()
+            }
+        }
+
+        impl std::fmt::Display for $name {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                self.0.fmt(f)
+            }
+        }
+
+        impl<T> std::cmp::PartialEq<T> for $name
+        where
+            smol_str::SmolStr: std::cmp::PartialEq<T>,
+        {
+            fn eq(&self, other: &T) -> bool {
+                self.0.eq(other)
+            }
+        }
+
+        impl<T> From<T> for $name
+        where
+            smol_str::SmolStr: From<T>,
+        {
+            fn from(x: T) -> Self {
+                Self(x.into())
+            }
+        }
+
+        impl AsRef<str> for $name {
+            fn as_ref(&self) -> &str {
+                self.0.as_ref()
+            }
+        }
+
+        impl std::ops::Deref for $name {
+            type Target = str;
+            fn deref(&self) -> &str {
+                &*self.0
+            }
+        }
+
+        impl<'de> serde::de::Deserialize<'de> for $name {
+            fn deserialize<D: serde::de::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
+                <smol_str::SmolStr as serde::de::Deserialize<'de>>::deserialize(d).map(Self)
+            }
+        }
+
+        impl serde::Serialize for $name {
+            fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
+                self.0.serialize(s)
+            }
+        }
+    };
+}
+
+const POOLER_SUFFIX: &str = "-pooler";
+
+impl EndpointId {
+    fn normalize(&self) -> Self {
+        if let Some(stripped) = self.as_ref().strip_suffix(POOLER_SUFFIX) {
+            stripped.into()
+        } else {
+            self.clone()
+        }
+    }
+
+    fn normalize_intern(&self) -> EndpointIdInt {
+        if let Some(stripped) = self.as_ref().strip_suffix(POOLER_SUFFIX) {
+            EndpointIdTag::get_interner().get_or_intern(stripped)
+        } else {
+            self.into()
+        }
+    }
+}
+
+// 90% of role name strings are 20 characters or less.
+smol_str_wrapper!(RoleName);
+// 50% of endpoint strings are 23 characters or less.
+smol_str_wrapper!(EndpointId);
+// 50% of branch strings are 23 characters or less.
+smol_str_wrapper!(BranchId);
+// 90% of project strings are 23 characters or less.
+smol_str_wrapper!(ProjectId);
+
+// will usually equal endpoint ID
+smol_str_wrapper!(EndpointCacheKey);
+
+smol_str_wrapper!(DbName);
+
+// postgres hostname, will likely be a port:ip addr
+smol_str_wrapper!(Host);
+
+// Endpoints are a bit tricky. Rare they might be branches or projects.
+impl EndpointId {
+    pub(crate) fn is_endpoint(&self) -> bool {
+        self.0.starts_with("ep-")
+    }
+    pub(crate) fn is_branch(&self) -> bool {
+        self.0.starts_with("br-")
+    }
+    // pub(crate) fn is_project(&self) -> bool {
+    //     !self.is_endpoint() && !self.is_branch()
+    // }
+    pub(crate) fn as_branch(&self) -> BranchId {
+        BranchId(self.0.clone())
+    }
+    pub(crate) fn as_project(&self) -> ProjectId {
+        ProjectId(self.0.clone())
+    }
+}
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -14,7 +14,6 @@ use metrics::{CounterPairAssoc, CounterPairVec, HyperLogLog, HyperLogLogVec};
 use tokio::time::{self, Instant};

 use crate::control_plane::messages::ColdStartInfo;
-use crate::error::ErrorKind;

 #[derive(MetricGroup)]
 #[metric(new(thread_pool: Arc<ThreadPoolMetrics>))]
@@ -326,10 +325,23 @@ pub enum ConnectionFailureKind {
    ComputeUncached,
 }

+#[derive(FixedCardinalityLabel, Copy, Clone)]
+#[label(singleton = "kind")]
+pub enum WakeupFailureKind {
+    BadComputeAddress,
+    ApiTransportError,
+    QuotaExceeded,
+    ApiConsoleLocked,
+    ApiConsoleBadRequest,
+    ApiConsoleOtherServerError,
+    ApiConsoleOtherError,
+    TimeoutError,
+}
+
 #[derive(LabelGroup)]
 #[label(set = ConnectionFailuresBreakdownSet)]
 pub struct ConnectionFailuresBreakdownGroup {
-    pub kind: ErrorKind,
+    pub kind: WakeupFailureKind,
    pub retry: Bool,
 }

--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -17,7 +17,7 @@ use crate::metrics::{
 };
 use crate::proxy::retry::{retry_after, should_retry, CouldRetry};
 use crate::proxy::wake_compute::wake_compute;
-use crate::types::Host;
+use crate::Host;

 const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);

@@ -56,7 +56,7 @@ pub(crate) trait ConnectMechanism {
 }

 #[async_trait]
-pub(crate) trait ComputeConnectBackend: Send + Sync + 'static {
+pub(crate) trait ComputeConnectBackend {
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
@@ -98,10 +98,10 @@ impl ConnectMechanism for TcpMechanism<'_> {

 /// Try to connect to the compute node, retrying if necessary.
 #[tracing::instrument(skip_all)]
-pub(crate) async fn connect_to_compute<M: ConnectMechanism>(
+pub(crate) async fn connect_to_compute<M: ConnectMechanism, B: ComputeConnectBackend>(
    ctx: &RequestMonitoring,
    mechanism: &M,
-    user_info: &dyn ComputeConnectBackend,
+    user_info: &B,
    allow_self_signed_compute: bool,
    wake_compute_retry_config: RetryConfig,
    connect_to_compute_retry_config: RetryConfig,
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -26,15 +26,13 @@ use self::passthrough::ProxyPassthrough;
 use crate::cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal};
 use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
 use crate::context::RequestMonitoring;
-use crate::control_plane::provider::ControlPlaneBackend;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
 use crate::protocol2::read_proxy_protocol;
 use crate::proxy::handshake::{handshake, HandshakeData};
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::stream::{PqStream, Stream};
-use crate::types::EndpointCacheKey;
-use crate::{auth, compute};
+use crate::{auth, compute, EndpointCacheKey};

 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";

@@ -55,7 +53,7 @@ pub async fn run_until_cancelled<F: std::future::Future>(

 pub async fn task_main(
    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
+    auth_backend: &'static auth::Backend<'static, ()>,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
    cancellation_handler: Arc<CancellationHandlerMain>,
@@ -242,7 +240,7 @@ impl ReportableError for ClientRequestError {
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
+    auth_backend: &'static auth::Backend<'static, ()>,
    ctx: &RequestMonitoring,
    cancellation_handler: Arc<CancellationHandlerMain>,
    stream: S,
@@ -283,17 +281,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let common_names = tls.map(|tls| &tls.common_names);

    // Extract credentials which we're going to use for auth.
-    let result = auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names);
+    let result = auth_backend
+        .as_ref()
+        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names))
+        .transpose();
+
    let user_info = match result {
        Ok(user_info) => user_info,
        Err(e) => stream.throw_error(e).await?,
    };

-    let user = user_info.user.clone();
-    let user_info = match auth_backend
+    let user = user_info.get_user().to_owned();
+    let user_info = match user_info
        .authenticate(
            ctx,
-            user_info,
            &mut stream,
            mode.allow_cleartext(),
            &config.authentication_config,
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -6,7 +6,6 @@ use std::time::Duration;

 use anyhow::{bail, Context};
 use async_trait::async_trait;
-use auth::backend::ControlPlaneComputeBackend;
 use http::StatusCode;
 use retry::{retry_after, ShouldRetryWakeCompute};
 use rstest::rstest;
@@ -20,7 +19,7 @@ use super::connect_compute::ConnectMechanism;
 use super::retry::CouldRetry;
 use super::*;
 use crate::auth::backend::{
-    ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, TestBackend,
+    ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend,
 };
 use crate::config::{CertResolver, RetryConfig};
 use crate::control_plane::messages::{ControlPlaneError, Details, MetricsAuxInfo, Status};
@@ -29,8 +28,7 @@ use crate::control_plane::provider::{
 };
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
 use crate::error::ErrorKind;
-use crate::types::{BranchId, EndpointId, ProjectId};
-use crate::{sasl, scram};
+use crate::{sasl, scram, BranchId, EndpointId, ProjectId};

 /// Generate a set of TLS certificates: CA + server.
 fn generate_certs(
@@ -567,21 +565,19 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn

 fn helper_create_connect_info(
    mechanism: &TestConnectMechanism,
-) -> ControlPlaneComputeBackend<'static> {
-    let api = Box::leak(Box::new(ControlPlaneBackend::Test(Box::new(
-        mechanism.clone(),
-    ))));
-
-    let creds = ComputeCredentials {
-        info: ComputeUserInfo {
-            endpoint: "endpoint".into(),
-            user: "user".into(),
-            options: NeonOptions::parse_options_raw(""),
+) -> auth::Backend<'static, ComputeCredentials> {
+    let user_info = auth::Backend::ControlPlane(
+        MaybeOwned::Owned(ControlPlaneBackend::Test(Box::new(mechanism.clone()))),
+        ComputeCredentials {
+            info: ComputeUserInfo {
+                endpoint: "endpoint".into(),
+                user: "user".into(),
+                options: NeonOptions::parse_options_raw(""),
+            },
+            keys: ComputeCredentialKeys::Password("password".into()),
        },
-        keys: ComputeCredentialKeys::Password("password".into()),
-    };
-
-    api.attach_to_credentials(creds)
+    );
+    user_info
 }

 #[tokio::test]
--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -1,20 +1,22 @@
+use hyper::StatusCode;
 use tracing::{error, info, warn};

 use super::connect_compute::ComputeConnectBackend;
 use crate::config::RetryConfig;
 use crate::context::RequestMonitoring;
 use crate::control_plane::errors::WakeComputeError;
+use crate::control_plane::messages::{ControlPlaneError, Reason};
 use crate::control_plane::provider::CachedNodeInfo;
-use crate::error::ReportableError;
 use crate::metrics::{
    ConnectOutcome, ConnectionFailuresBreakdownGroup, Metrics, RetriesMetricGroup, RetryType,
+    WakeupFailureKind,
 };
 use crate::proxy::retry::{retry_after, should_retry};

-pub(crate) async fn wake_compute(
+pub(crate) async fn wake_compute<B: ComputeConnectBackend>(
    num_retries: &mut u32,
    ctx: &RequestMonitoring,
-    api: &dyn ComputeConnectBackend,
+    api: &B,
    config: RetryConfig,
 ) -> Result<CachedNodeInfo, WakeComputeError> {
    let retry_type = RetryType::WakeCompute;
@@ -58,8 +60,62 @@ pub(crate) async fn wake_compute(
 }

 fn report_error(e: &WakeComputeError, retry: bool) {
-    let kind = e.get_error_kind();
-
+    use crate::control_plane::errors::ApiError;
+    let kind = match e {
+        WakeComputeError::BadComputeAddress(_) => WakeupFailureKind::BadComputeAddress,
+        WakeComputeError::ApiError(ApiError::Transport(_)) => WakeupFailureKind::ApiTransportError,
+        WakeComputeError::ApiError(ApiError::ControlPlane(e)) => match e.get_reason() {
+            Reason::RoleProtected => WakeupFailureKind::ApiConsoleBadRequest,
+            Reason::ResourceNotFound => WakeupFailureKind::ApiConsoleBadRequest,
+            Reason::ProjectNotFound => WakeupFailureKind::ApiConsoleBadRequest,
+            Reason::EndpointNotFound => WakeupFailureKind::ApiConsoleBadRequest,
+            Reason::BranchNotFound => WakeupFailureKind::ApiConsoleBadRequest,
+            Reason::RateLimitExceeded => WakeupFailureKind::ApiConsoleLocked,
+            Reason::NonDefaultBranchComputeTimeExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::ActiveTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::ComputeTimeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::WrittenDataQuotaExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::DataTransferQuotaExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::LogicalSizeQuotaExceeded => WakeupFailureKind::QuotaExceeded,
+            Reason::ConcurrencyLimitReached => WakeupFailureKind::ApiConsoleLocked,
+            Reason::LockAlreadyTaken => WakeupFailureKind::ApiConsoleLocked,
+            Reason::RunningOperations => WakeupFailureKind::ApiConsoleLocked,
+            Reason::Unknown => match **e {
+                ControlPlaneError {
+                    http_status_code: StatusCode::LOCKED,
+                    ref error,
+                    ..
+                } if error.contains("written data quota exceeded")
+                    || error.contains("the limit for current plan reached") =>
+                {
+                    WakeupFailureKind::QuotaExceeded
+                }
+                ControlPlaneError {
+                    http_status_code: StatusCode::UNPROCESSABLE_ENTITY,
+                    ref error,
+                    ..
+                } if error.contains("compute time quota of non-primary branches is exceeded") => {
+                    WakeupFailureKind::QuotaExceeded
+                }
+                ControlPlaneError {
+                    http_status_code: StatusCode::LOCKED,
+                    ..
+                } => WakeupFailureKind::ApiConsoleLocked,
+                ControlPlaneError {
+                    http_status_code: StatusCode::BAD_REQUEST,
+                    ..
+                } => WakeupFailureKind::ApiConsoleBadRequest,
+                ControlPlaneError {
+                    http_status_code, ..
+                } if http_status_code.is_server_error() => {
+                    WakeupFailureKind::ApiConsoleOtherServerError
+                }
+                ControlPlaneError { .. } => WakeupFailureKind::ApiConsoleOtherError,
+            },
+        },
+        WakeComputeError::TooManyConnections => WakeupFailureKind::ApiConsoleLocked,
+        WakeComputeError::TooManyConnectionAttempts(_) => WakeupFailureKind::TimeoutError,
+    };
    Metrics::get()
        .proxy
        .connection_failures_breakdown
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -250,7 +250,7 @@ mod tests {
    use super::{BucketRateLimiter, WakeComputeRateLimiter};
    use crate::intern::EndpointIdInt;
    use crate::rate_limiter::RateBucketInfo;
-    use crate::types::EndpointId;
+    use crate::EndpointId;

    #[test]
    fn rate_bucket_rpi() {
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -271,7 +271,7 @@ mod tests {
    use serde_json::json;

    use super::*;
-    use crate::types::{ProjectId, RoleName};
+    use crate::{ProjectId, RoleName};

    #[test]
    fn parse_allowed_ips() -> anyhow::Result<()> {
--- a/proxy/src/scram/mod.rs
+++ b/proxy/src/scram/mod.rs
@@ -62,7 +62,7 @@ mod tests {
    use super::{Exchange, ServerSecret};
    use crate::intern::EndpointIdInt;
    use crate::sasl::{Mechanism, Step};
-    use crate::types::EndpointId;
+    use crate::EndpointId;

    #[test]
    fn snapshot() {
--- a/proxy/src/scram/threadpool.rs
+++ b/proxy/src/scram/threadpool.rs
@@ -189,7 +189,7 @@ impl Drop for JobHandle {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use crate::types::EndpointId;
+    use crate::EndpointId;

    #[tokio::test]
    async fn hash_is_correct() {
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -15,10 +15,9 @@ use super::conn_pool::poll_client;
 use super::conn_pool_lib::{Client, ConnInfo, GlobalConnPool};
 use super::http_conn_pool::{self, poll_http2_client, Send};
 use super::local_conn_pool::{self, LocalClient, LocalConnPool, EXT_NAME, EXT_SCHEMA, EXT_VERSION};
-use crate::auth::backend::local::{LocalBackend, StaticAuthRules};
+use crate::auth::backend::local::StaticAuthRules;
 use crate::auth::backend::{ComputeCredentials, ComputeUserInfo};
-use crate::auth::{check_peer_addr_is_in_list, AuthError, ServerlessBackend};
-use crate::compute;
+use crate::auth::{self, check_peer_addr_is_in_list, AuthError};
 use crate::compute_ctl::{
    ComputeCtlError, ExtensionInstallRequest, Privilege, SetRoleGrantsRequest,
 };
@@ -26,14 +25,14 @@ use crate::config::ProxyConfig;
 use crate::context::RequestMonitoring;
 use crate::control_plane::errors::{GetAuthInfoError, WakeComputeError};
 use crate::control_plane::locks::ApiLocks;
-use crate::control_plane::provider::{ApiLockError, ControlPlaneBackend};
-use crate::control_plane::{Api, CachedNodeInfo};
+use crate::control_plane::provider::ApiLockError;
+use crate::control_plane::CachedNodeInfo;
 use crate::error::{ErrorKind, ReportableError, UserFacingError};
 use crate::intern::EndpointIdInt;
-use crate::proxy::connect_compute::{ComputeConnectBackend, ConnectMechanism};
+use crate::proxy::connect_compute::ConnectMechanism;
 use crate::proxy::retry::{CouldRetry, ShouldRetryWakeCompute};
 use crate::rate_limiter::EndpointRateLimiter;
-use crate::types::{EndpointId, Host};
+use crate::{compute, EndpointId, Host};

 pub(crate) struct PoolingBackend {
    pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool<Send>>,
@@ -41,6 +40,7 @@ pub(crate) struct PoolingBackend {
    pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,

    pub(crate) config: &'static ProxyConfig,
+    pub(crate) auth_backend: &'static crate::auth::Backend<'static, ()>,
    pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 }

@@ -48,13 +48,12 @@ impl PoolingBackend {
    pub(crate) async fn authenticate_with_password(
        &self,
        ctx: &RequestMonitoring,
-        auth_backend: &ControlPlaneBackend,
        user_info: &ComputeUserInfo,
        password: &[u8],
    ) -> Result<ComputeCredentials, AuthError> {
-        let (allowed_ips, maybe_secret) = auth_backend
-            .get_allowed_ips_and_secret(ctx, user_info)
-            .await?;
+        let user_info = user_info.clone();
+        let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
+        let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
        if self.config.authentication_config.ip_allowlist_check_enabled
            && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
        {
@@ -68,7 +67,7 @@ impl PoolingBackend {
        }
        let cached_secret = match maybe_secret {
            Some(secret) => secret,
-            None => auth_backend.get_role_secret(ctx, user_info).await?,
+            None => backend.get_role_secret(ctx).await?,
        };

        let secret = match cached_secret.value.clone() {
@@ -103,7 +102,7 @@ impl PoolingBackend {
            }
        };
        res.map(|key| ComputeCredentials {
-            info: user_info.clone(),
+            info: user_info,
            keys: key,
        })
    }
@@ -111,12 +110,11 @@ impl PoolingBackend {
    pub(crate) async fn authenticate_with_jwt(
        &self,
        ctx: &RequestMonitoring,
-        auth_backend: ServerlessBackend<'static>,
        user_info: &ComputeUserInfo,
        jwt: String,
    ) -> Result<ComputeCredentials, AuthError> {
-        match auth_backend {
-            ServerlessBackend::ControlPlane(console) => {
+        match &self.auth_backend {
+            crate::auth::Backend::ControlPlane(console, ()) => {
                self.config
                    .authentication_config
                    .jwks_cache
@@ -124,7 +122,7 @@ impl PoolingBackend {
                        ctx,
                        user_info.endpoint.clone(),
                        &user_info.user,
-                        console,
+                        &**console,
                        &jwt,
                    )
                    .await
@@ -135,7 +133,7 @@ impl PoolingBackend {
                    keys: crate::auth::backend::ComputeCredentialKeys::None,
                })
            }
-            ServerlessBackend::Local(_) => {
+            crate::auth::Backend::Local(_) => {
                let keys = self
                    .config
                    .authentication_config
@@ -165,7 +163,6 @@ impl PoolingBackend {
    pub(crate) async fn connect_to_compute(
        &self,
        ctx: &RequestMonitoring,
-        auth_backend: ServerlessBackend<'static>,
        conn_info: ConnInfo,
        keys: ComputeCredentials,
        force_new: bool,
@@ -184,14 +181,7 @@ impl PoolingBackend {
        let conn_id = uuid::Uuid::new_v4();
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "pool: opening a new connection '{conn_info}'");
-
-        let api = match auth_backend {
-            ServerlessBackend::ControlPlane(cplane) => {
-                &cplane.attach_to_credentials(keys) as &dyn ComputeConnectBackend
-            }
-            ServerlessBackend::Local(local_proxy) => local_proxy as &dyn ComputeConnectBackend,
-        };
-
+        let backend = self.auth_backend.as_ref().map(|()| keys);
        crate::proxy::connect_compute::connect_to_compute(
            ctx,
            &TokioMechanism {
@@ -200,7 +190,7 @@ impl PoolingBackend {
                pool: self.pool.clone(),
                locks: &self.config.connect_compute_locks,
            },
-            api,
+            &backend,
            false, // do not allow self signed compute for http flow
            self.config.wake_compute_retry_config,
            self.config.connect_to_compute_retry_config,
@@ -213,7 +203,6 @@ impl PoolingBackend {
    pub(crate) async fn connect_to_local_proxy(
        &self,
        ctx: &RequestMonitoring,
-        auth_backend: &'static ControlPlaneBackend,
        conn_info: ConnInfo,
    ) -> Result<http_conn_pool::Client<Send>, HttpConnError> {
        info!("pool: looking for an existing connection");
@@ -224,8 +213,7 @@ impl PoolingBackend {
        let conn_id = uuid::Uuid::new_v4();
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "pool: opening a new connection '{conn_info}'");
-
-        let backend = auth_backend.attach_to_credentials(ComputeCredentials {
+        let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials {
            info: ComputeUserInfo {
                user: conn_info.user_info.user.clone(),
                endpoint: EndpointId::from(format!("{}-local-proxy", conn_info.user_info.endpoint)),
@@ -260,20 +248,26 @@ impl PoolingBackend {
    pub(crate) async fn connect_to_local_postgres(
        &self,
        ctx: &RequestMonitoring,
-        auth_backend: &LocalBackend,
        conn_info: ConnInfo,
    ) -> Result<LocalClient<tokio_postgres::Client>, HttpConnError> {
        if let Some(client) = self.local_pool.get(ctx, &conn_info)? {
            return Ok(client);
        }

+        let local_backend = match &self.auth_backend {
+            auth::Backend::ControlPlane(_, ()) => {
+                unreachable!("only local_proxy can connect to local postgres")
+            }
+            auth::Backend::Local(local) => local,
+        };
+
        if !self.local_pool.initialized(&conn_info) {
            // only install and grant usage one at a time.
-            let _permit = auth_backend.initialize.acquire().await.unwrap();
+            let _permit = local_backend.initialize.acquire().await.unwrap();

            // check again for race
            if !self.local_pool.initialized(&conn_info) {
-                auth_backend
+                local_backend
                    .compute_ctl
                    .install_extension(&ExtensionInstallRequest {
                        extension: EXT_NAME,
@@ -282,7 +276,7 @@ impl PoolingBackend {
                    })
                    .await?;

-                auth_backend
+                local_backend
                    .compute_ctl
                    .grant_role(&SetRoleGrantsRequest {
                        schema: EXT_SCHEMA,
@@ -300,7 +294,7 @@ impl PoolingBackend {
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "local_pool: opening a new connection '{conn_info}'");

-        let mut node_info = auth_backend.node_info.clone();
+        let mut node_info = local_backend.node_info.clone();

        let (key, jwk) = create_random_jwk();

--- a/Show More
+++ b/Show More