diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index 5ccfe48684..69b8bc5d70 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -671,6 +671,10 @@ jobs:
         password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
+    # Increase timeout to 12h, default timeout is 6h
+    # we have regression in clickbench causing it to run 2-3x longer
+    timeout-minutes: 720
+
     steps:
     - uses: actions/checkout@v4
 
@@ -716,7 +720,7 @@ jobs:
         test_selection: performance/test_perf_olap.py
         run_in_parallel: false
         save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
+        extra_params: -m remote_cluster --timeout 43200 -k test_clickbench
         pg_version: ${{ env.DEFAULT_PG_VERSION }}
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 0d3ea7db28..bba51ddc92 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -839,6 +839,7 @@ jobs:
       - name: Build vm image
         run: |
           ./vm-builder \
+            -size=2G \
             -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
             -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
             -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
@@ -1116,7 +1117,10 @@ jobs:
 
             gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
               -f deployPgSniRouter=true \
-              -f deployProxy=true \
+              -f deployProxyLink=true \
+              -f deployPrivatelinkProxy=true \
+              -f deployProxyScram=true \
+              -f deployProxyAuthBroker=true \
               -f branch=main \
               -f dockerTag=${{needs.tag.outputs.build-tag}}
           else
diff --git a/Cargo.lock b/Cargo.lock
index 7fa5df29fd..c5af247e8b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3749,6 +3749,7 @@ dependencies = [
  "tracing",
  "url",
  "utils",
+ "wal_decoder",
  "walkdir",
  "workspace_hack",
 ]
@@ -4186,6 +4187,7 @@ dependencies = [
  "regex",
  "serde",
  "thiserror",
+ "tracing",
  "utils",
 ]
 
@@ -6272,7 +6274,7 @@ dependencies = [
 [[package]]
 name = "tokio-epoll-uring"
 version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#cb2dcea2058034bc209e7917b01c5097712a3168"
+source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
 dependencies = [
  "futures",
  "nix 0.26.4",
@@ -6788,7 +6790,7 @@ dependencies = [
 [[package]]
 name = "uring-common"
 version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#cb2dcea2058034bc209e7917b01c5097712a3168"
+source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#33e00106a268644d02ba0461bbd64476073b0ee1"
 dependencies = [
  "bytes",
  "io-uring",
@@ -6954,6 +6956,20 @@ dependencies = [
  "utils",
 ]
 
+[[package]]
+name = "wal_decoder"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "bytes",
+ "pageserver_api",
+ "postgres_ffi",
+ "serde",
+ "tracing",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "walkdir"
 version = "2.3.3"
diff --git a/Cargo.toml b/Cargo.toml
index 4c6a24ecde..7f9a766ff9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -33,6 +33,7 @@ members = [
     "libs/postgres_ffi/wal_craft",
     "libs/vm_monitor",
     "libs/walproposer",
+    "libs/wal_decoder",
 ]
 
 [workspace.package]
@@ -238,6 +239,7 @@ tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
 vm_monitor = { version = "0.1", path = "./libs/vm_monitor/" }
 walproposer = { version = "0.1", path = "./libs/walproposer/" }
+wal_decoder = { version = "0.1", path = "./libs/wal_decoder" }
 
 ## Common library dependency
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }
diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile
index 6451e309f0..85fb9c441d 100644
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -666,7 +666,7 @@ RUN apt-get update && \
 #
 # Use new version only for v17
 # because Release_2024_09_1 has some backward incompatible changes
-# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1 
+# https://github.com/rdkit/rdkit/releases/tag/Release_2024_09_1
 ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
 RUN case "${PG_VERSION}" in \
     "v17") \
@@ -860,18 +860,98 @@ ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
 USER nonroot
 WORKDIR /home/nonroot
 
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
-    esac && \
-    curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
+RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
     chmod +x rustup-init && \
     ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
     rm rustup-init && \
+    case "${PG_VERSION}" in \
+        'v17') \
+            echo 'v17 is not supported yet by pgrx. Quit' && exit 0;; \
+    esac && \
     cargo install --locked --version 0.11.3 cargo-pgrx && \
     /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
 
 USER root
 
+#########################################################################################
+#
+# Layer "rust extensions pgrx12"
+#
+# pgrx started to support Postgres 17 since version 12,
+# but some older extension aren't compatible with it.
+# This layer should be used as a base for new pgrx extensions,
+# and eventually get merged with `rust-extensions-build`
+#
+#########################################################################################
+FROM build-deps AS rust-extensions-build-pgrx12
+ARG PG_VERSION
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y curl libclang-dev && \
+    useradd -ms /bin/bash nonroot -b /home
+
+ENV HOME=/home/nonroot
+ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
+USER nonroot
+WORKDIR /home/nonroot
+
+RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
+    chmod +x rustup-init && \
+    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
+    rm rustup-init && \
+    cargo install --locked --version 0.12.6 cargo-pgrx && \
+    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
+
+USER root
+
+#########################################################################################
+#
+# Layers "pg-onnx-build" and "pgrag-pg-build"
+# Compile "pgrag" extensions
+#
+#########################################################################################
+
+FROM rust-extensions-build-pgrx12 AS pg-onnx-build
+
+# cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).
+# Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
+RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \
+    python3 -m venv venv && \
+    . venv/bin/activate && \
+    python3 -m pip install cmake==3.30.5 && \
+    wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
+    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
+    ./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root
+
+
+FROM pg-onnx-build AS pgrag-pg-build
+
+RUN apt-get install -y protobuf-compiler && \
+    wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz &&  \
+    echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
+    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
+    \
+    cd exts/rag && \
+    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgrx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag.control && \
+    \
+    cd ../rag_bge_small_en_v15 && \
+    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/bge_small_en_v15.onnx \
+        cargo pgrx install --release --features remote_onnx && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_bge_small_en_v15.control && \
+    \
+    cd ../rag_jina_reranker_v1_tiny_en && \
+    sed -i 's/pgrx = "0.12.6"/pgrx = { version = "0.12.6", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    ORT_LIB_LOCATION=/home/nonroot/onnxruntime-src/build/Linux \
+        REMOTE_ONNX_URL=http://pg-ext-s3-gateway/pgrag-data/jina_reranker_v1_tiny_en.onnx \
+        cargo pgrx install --release --features remote_onnx && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/rag_jina_reranker_v1_tiny_en.control
+
+
 #########################################################################################
 #
 # Layer "pg-jsonschema-pg-build"
@@ -1041,6 +1121,31 @@ RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.1.0.tar.gz
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
 
+#########################################################################################
+#
+# Layer "pg_mooncake"
+# compile pg_mooncake extension
+#
+#########################################################################################
+FROM rust-extensions-build AS pg-mooncake-build
+ARG PG_VERSION
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PG_MOONCAKE_VERSION=882175dbba07ba2e6e59b1088d61bf325b910b9e
+ENV PATH="/usr/local/pgsql/bin/:$PATH"
+
+RUN case "${PG_VERSION}" in \
+        'v14') \
+            echo "pg_mooncake is not supported on Postgres ${PG_VERSION}" && exit 0;; \
+    esac && \
+    git clone --depth 1 --branch neon https://github.com/kelvich/pg_mooncake.git pg_mooncake-src && \
+    cd pg_mooncake-src && \
+    git checkout "${PG_MOONCAKE_VERSION}" && \
+    git submodule update --init --depth 1 --recursive && \
+    make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) && \
+    make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -1059,6 +1164,7 @@ COPY --from=h3-pg-build /h3/usr /
 COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-tiktoken-pg-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1084,6 +1190,7 @@ COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
 COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/
 
 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -1247,6 +1354,7 @@ COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
 COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
 COPY --from=vector-pg-build /pgvector.patch /ext-src/
 COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
+#COPY --from=pgrag-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 #COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
 #COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
 #COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml
index 51a55b513f..79f894c289 100644
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -18,7 +18,7 @@ commands:
   - name: pgbouncer
     user: postgres
     sysvInitAction: respawn
-    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
+    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'
   - name: local_proxy
     user: postgres
     sysvInitAction: respawn
diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml
index 43e57a4ed5..ff04b9e4c6 100644
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -18,7 +18,7 @@ commands:
   - name: pgbouncer
     user: postgres
     sysvInitAction: respawn
-    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
+    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini 2>&1 > /dev/virtio-ports/tech.neon.log.0'
   - name: local_proxy
     user: postgres
     sysvInitAction: respawn
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 5b5828c6ed..8df0a714ec 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -17,7 +17,7 @@ use std::time::Duration;
 
 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
-use pageserver_api::models::{self, AuxFilePolicy, TenantInfo, TimelineInfo};
+use pageserver_api::models::{self, TenantInfo, TimelineInfo};
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
@@ -399,11 +399,6 @@ impl PageServerNode {
                 .map(serde_json::from_str)
                 .transpose()
                 .context("parse `timeline_get_throttle` from json")?,
-            switch_aux_file_policy: settings
-                .remove("switch_aux_file_policy")
-                .map(|x| x.parse::<AuxFilePolicy>())
-                .transpose()
-                .context("Failed to parse 'switch_aux_file_policy'")?,
             lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
             lsn_lease_length_for_ts: settings
                 .remove("lsn_lease_length_for_ts")
@@ -499,11 +494,6 @@ impl PageServerNode {
                     .map(serde_json::from_str)
                     .transpose()
                     .context("parse `timeline_get_throttle` from json")?,
-                switch_aux_file_policy: settings
-                    .remove("switch_aux_file_policy")
-                    .map(|x| x.parse::<AuxFilePolicy>())
-                    .transpose()
-                    .context("Failed to parse 'switch_aux_file_policy'")?,
                 lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
                 lsn_lease_length_for_ts: settings
                     .remove("lsn_lease_length_for_ts")
diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs
index 73d89699ed..b7f38c6286 100644
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -111,6 +111,11 @@ enum Command {
         #[arg(long)]
         node: NodeId,
     },
+    /// Cancel any ongoing reconciliation for this shard
+    TenantShardCancelReconcile {
+        #[arg(long)]
+        tenant_shard_id: TenantShardId,
+    },
     /// Modify the pageserver tenant configuration of a tenant: this is the configuration structure
     /// that is passed through to pageservers, and does not affect storage controller behavior.
     TenantConfig {
@@ -535,6 +540,15 @@ async fn main() -> anyhow::Result<()> {
                 )
                 .await?;
         }
+        Command::TenantShardCancelReconcile { tenant_shard_id } => {
+            storcon_client
+                .dispatch::<(), ()>(
+                    Method::PUT,
+                    format!("control/v1/tenant/{tenant_shard_id}/cancel_reconcile"),
+                    None,
+                )
+                .await?;
+        }
         Command::TenantConfig { tenant_id, config } => {
             let tenant_conf = serde_json::from_str(&config)?;
 
diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs
index 896a5d8069..6b2d6cf625 100644
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -250,12 +250,6 @@ pub struct TenantConfigToml {
     // Expresed in multiples of checkpoint distance.
     pub image_layer_creation_check_threshold: u8,
 
-    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
-    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
-    /// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
-    /// file is written.
-    pub switch_aux_file_policy: crate::models::AuxFilePolicy,
-
     /// The length for an explicit LSN lease request.
     /// Layers needed to reconstruct pages at LSN will not be GC-ed during this interval.
     #[serde(with = "humantime_serde")]
@@ -475,7 +469,6 @@ impl Default for TenantConfigToml {
             lazy_slru_download: false,
             timeline_get_throttle: crate::models::ThrottleConfig::disabled(),
             image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
-            switch_aux_file_policy: crate::models::AuxFilePolicy::default_tenant_config(),
             lsn_lease_length: LsnLease::DEFAULT_LENGTH,
             lsn_lease_length_for_ts: LsnLease::DEFAULT_LENGTH_FOR_TS,
         }
diff --git a/libs/pageserver_api/src/lib.rs b/libs/pageserver_api/src/lib.rs
index 532185a366..ff705e79cd 100644
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -5,9 +5,11 @@ pub mod controller_api;
 pub mod key;
 pub mod keyspace;
 pub mod models;
+pub mod record;
 pub mod reltag;
 pub mod shard;
 /// Public API types
 pub mod upcall_api;
+pub mod value;
 
 pub mod config;
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index d37f62185c..0a4992aea4 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -10,7 +10,6 @@ use std::{
     io::{BufRead, Read},
     num::{NonZeroU32, NonZeroU64, NonZeroUsize},
     str::FromStr,
-    sync::atomic::AtomicUsize,
     time::{Duration, SystemTime},
 };
 
@@ -309,7 +308,6 @@ pub struct TenantConfig {
     pub lazy_slru_download: Option<bool>,
     pub timeline_get_throttle: Option<ThrottleConfig>,
     pub image_layer_creation_check_threshold: Option<u8>,
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
     pub lsn_lease_length: Option<String>,
     pub lsn_lease_length_for_ts: Option<String>,
 }
@@ -350,68 +348,6 @@ pub enum AuxFilePolicy {
     CrossValidation,
 }
 
-impl AuxFilePolicy {
-    pub fn is_valid_migration_path(from: Option<Self>, to: Self) -> bool {
-        matches!(
-            (from, to),
-            (None, _) | (Some(AuxFilePolicy::CrossValidation), AuxFilePolicy::V2)
-        )
-    }
-
-    /// If a tenant writes aux files without setting `switch_aux_policy`, this value will be used.
-    pub fn default_tenant_config() -> Self {
-        Self::V2
-    }
-}
-
-/// The aux file policy memory flag. Users can store `Option<AuxFilePolicy>` into this atomic flag. 0 == unspecified.
-pub struct AtomicAuxFilePolicy(AtomicUsize);
-
-impl AtomicAuxFilePolicy {
-    pub fn new(policy: Option<AuxFilePolicy>) -> Self {
-        Self(AtomicUsize::new(
-            policy.map(AuxFilePolicy::to_usize).unwrap_or_default(),
-        ))
-    }
-
-    pub fn load(&self) -> Option<AuxFilePolicy> {
-        match self.0.load(std::sync::atomic::Ordering::Acquire) {
-            0 => None,
-            other => Some(AuxFilePolicy::from_usize(other)),
-        }
-    }
-
-    pub fn store(&self, policy: Option<AuxFilePolicy>) {
-        self.0.store(
-            policy.map(AuxFilePolicy::to_usize).unwrap_or_default(),
-            std::sync::atomic::Ordering::Release,
-        );
-    }
-}
-
-impl AuxFilePolicy {
-    pub fn to_usize(self) -> usize {
-        match self {
-            Self::V1 => 1,
-            Self::CrossValidation => 2,
-            Self::V2 => 3,
-        }
-    }
-
-    pub fn try_from_usize(this: usize) -> Option<Self> {
-        match this {
-            1 => Some(Self::V1),
-            2 => Some(Self::CrossValidation),
-            3 => Some(Self::V2),
-            _ => None,
-        }
-    }
-
-    pub fn from_usize(this: usize) -> Self {
-        Self::try_from_usize(this).unwrap()
-    }
-}
-
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
 #[serde(tag = "kind")]
 pub enum EvictionPolicy {
@@ -1633,71 +1569,6 @@ mod tests {
         }
     }
 
-    #[test]
-    fn test_aux_file_migration_path() {
-        assert!(AuxFilePolicy::is_valid_migration_path(
-            None,
-            AuxFilePolicy::V1
-        ));
-        assert!(AuxFilePolicy::is_valid_migration_path(
-            None,
-            AuxFilePolicy::V2
-        ));
-        assert!(AuxFilePolicy::is_valid_migration_path(
-            None,
-            AuxFilePolicy::CrossValidation
-        ));
-        // Self-migration is not a valid migration path, and the caller should handle it by itself.
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V1),
-            AuxFilePolicy::V1
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V2),
-            AuxFilePolicy::V2
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::CrossValidation),
-            AuxFilePolicy::CrossValidation
-        ));
-        // Migrations not allowed
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::CrossValidation),
-            AuxFilePolicy::V1
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V1),
-            AuxFilePolicy::V2
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V2),
-            AuxFilePolicy::V1
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V2),
-            AuxFilePolicy::CrossValidation
-        ));
-        assert!(!AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::V1),
-            AuxFilePolicy::CrossValidation
-        ));
-        // Migrations allowed
-        assert!(AuxFilePolicy::is_valid_migration_path(
-            Some(AuxFilePolicy::CrossValidation),
-            AuxFilePolicy::V2
-        ));
-    }
-
-    #[test]
-    fn test_aux_parse() {
-        assert_eq!(AuxFilePolicy::from_str("V2").unwrap(), AuxFilePolicy::V2);
-        assert_eq!(AuxFilePolicy::from_str("v2").unwrap(), AuxFilePolicy::V2);
-        assert_eq!(
-            AuxFilePolicy::from_str("cross-validation").unwrap(),
-            AuxFilePolicy::CrossValidation
-        );
-    }
-
     #[test]
     fn test_image_compression_algorithm_parsing() {
         use ImageCompressionAlgorithm::*;
diff --git a/libs/pageserver_api/src/record.rs b/libs/pageserver_api/src/record.rs
new file mode 100644
index 0000000000..b80ed2f203
--- /dev/null
+++ b/libs/pageserver_api/src/record.rs
@@ -0,0 +1,113 @@
+//! This module defines the WAL record format used within the pageserver.
+
+use bytes::Bytes;
+use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember};
+use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId};
+use serde::{Deserialize, Serialize};
+use utils::bin_ser::DeserializeError;
+
+/// Each update to a page is represented by a NeonWalRecord. It can be a wrapper
+/// around a PostgreSQL WAL record, or a custom neon-specific "record".
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum NeonWalRecord {
+    /// Native PostgreSQL WAL record
+    Postgres { will_init: bool, rec: Bytes },
+
+    /// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
+    ClearVisibilityMapFlags {
+        new_heap_blkno: Option<u32>,
+        old_heap_blkno: Option<u32>,
+        flags: u8,
+    },
+    /// Mark transaction IDs as committed on a CLOG page
+    ClogSetCommitted {
+        xids: Vec<TransactionId>,
+        timestamp: TimestampTz,
+    },
+    /// Mark transaction IDs as aborted on a CLOG page
+    ClogSetAborted { xids: Vec<TransactionId> },
+    /// Extend multixact offsets SLRU
+    MultixactOffsetCreate {
+        mid: MultiXactId,
+        moff: MultiXactOffset,
+    },
+    /// Extend multixact members SLRU.
+    MultixactMembersCreate {
+        moff: MultiXactOffset,
+        members: Vec<MultiXactMember>,
+    },
+    /// Update the map of AUX files, either writing or dropping an entry
+    AuxFile {
+        file_path: String,
+        content: Option<Bytes>,
+    },
+
+    /// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
+    #[cfg(feature = "testing")]
+    Test {
+        /// Append a string to the image.
+        append: String,
+        /// Clear the image before appending.
+        clear: bool,
+        /// Treat this record as an init record. `clear` should be set to true if this field is set
+        /// to true. This record does not need the history WALs to reconstruct. See [`NeonWalRecord::will_init`] and
+        /// its references in `timeline.rs`.
+        will_init: bool,
+    },
+}
+
+impl NeonWalRecord {
+    /// Does replaying this WAL record initialize the page from scratch, or does
+    /// it need to be applied over the previous image of the page?
+    pub fn will_init(&self) -> bool {
+        // If you change this function, you'll also need to change ValueBytes::will_init
+        match self {
+            NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,
+            #[cfg(feature = "testing")]
+            NeonWalRecord::Test { will_init, .. } => *will_init,
+            // None of the special neon record types currently initialize the page
+            _ => false,
+        }
+    }
+
+    #[cfg(feature = "testing")]
+    pub fn wal_append(s: impl AsRef<str>) -> Self {
+        Self::Test {
+            append: s.as_ref().to_string(),
+            clear: false,
+            will_init: false,
+        }
+    }
+
+    #[cfg(feature = "testing")]
+    pub fn wal_clear() -> Self {
+        Self::Test {
+            append: "".to_string(),
+            clear: true,
+            will_init: false,
+        }
+    }
+
+    #[cfg(feature = "testing")]
+    pub fn wal_init() -> Self {
+        Self::Test {
+            append: "".to_string(),
+            clear: true,
+            will_init: true,
+        }
+    }
+}
+
+/// Build a human-readable string to describe a WAL record
+///
+/// For debugging purposes
+pub fn describe_wal_record(rec: &NeonWalRecord) -> Result<String, DeserializeError> {
+    match rec {
+        NeonWalRecord::Postgres { will_init, rec } => Ok(format!(
+            "will_init: {}, {}",
+            will_init,
+            describe_postgres_wal_record(rec)?
+        )),
+        _ => Ok(format!("{:?}", rec)),
+    }
+}
diff --git a/pageserver/src/repository.rs b/libs/pageserver_api/src/value.rs
similarity index 73%
rename from pageserver/src/repository.rs
rename to libs/pageserver_api/src/value.rs
index e4ebafd927..1f8ed30a9a 100644
--- a/pageserver/src/repository.rs
+++ b/libs/pageserver_api/src/value.rs
@@ -1,13 +1,16 @@
-use crate::walrecord::NeonWalRecord;
-use anyhow::Result;
+//! This module defines the value type used by the storage engine.
+//!
+//! A [`Value`] represents either a completely new value for one Key ([`Value::Image`]),
+//! or a "delta" of how to get from previous version of the value to the new one
+//! ([`Value::WalRecord`]])
+//!
+//! Note that the [`Value`] type is used for the permananent storage format, so any
+//! changes to it must be backwards compatible.
+
+use crate::record::NeonWalRecord;
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
-use std::ops::AddAssign;
-use std::time::Duration;
 
-pub use pageserver_api::key::{Key, KEY_SIZE};
-
-/// A 'value' stored for a one Key.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub enum Value {
     /// An Image value contains a full copy of the value
@@ -20,10 +23,12 @@ pub enum Value {
 }
 
 impl Value {
+    #[inline(always)]
     pub fn is_image(&self) -> bool {
         matches!(self, Value::Image(_))
     }
 
+    #[inline(always)]
     pub fn will_init(&self) -> bool {
         match self {
             Value::Image(_) => true,
@@ -33,17 +38,18 @@ impl Value {
 }
 
 #[derive(Debug, PartialEq)]
-pub(crate) enum InvalidInput {
+pub enum InvalidInput {
     TooShortValue,
     TooShortPostgresRecord,
 }
 
 /// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
 /// use this type for querying if a slice looks some particular way.
-pub(crate) struct ValueBytes;
+pub struct ValueBytes;
 
 impl ValueBytes {
-    pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
+    #[inline(always)]
+    pub fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
         if raw.len() < 12 {
             return Err(InvalidInput::TooShortValue);
         }
@@ -79,6 +85,7 @@ impl ValueBytes {
 mod test {
     use super::*;
 
+    use bytes::Bytes;
     use utils::bin_ser::BeSer;
 
     macro_rules! roundtrip {
@@ -229,56 +236,3 @@ mod test {
         assert!(!ValueBytes::will_init(&expected).unwrap());
     }
 }
-
-///
-/// Result of performing GC
-///
-#[derive(Default, Serialize, Debug)]
-pub struct GcResult {
-    pub layers_total: u64,
-    pub layers_needed_by_cutoff: u64,
-    pub layers_needed_by_pitr: u64,
-    pub layers_needed_by_branches: u64,
-    pub layers_needed_by_leases: u64,
-    pub layers_not_updated: u64,
-    pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
-
-    #[serde(serialize_with = "serialize_duration_as_millis")]
-    pub elapsed: Duration,
-
-    /// The layers which were garbage collected.
-    ///
-    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
-    /// dropped in tests.
-    #[cfg(feature = "testing")]
-    #[serde(skip)]
-    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
-}
-
-// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
-fn serialize_duration_as_millis<S>(d: &Duration, serializer: S) -> Result<S::Ok, S::Error>
-where
-    S: serde::Serializer,
-{
-    d.as_millis().serialize(serializer)
-}
-
-impl AddAssign for GcResult {
-    fn add_assign(&mut self, other: Self) {
-        self.layers_total += other.layers_total;
-        self.layers_needed_by_pitr += other.layers_needed_by_pitr;
-        self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
-        self.layers_needed_by_branches += other.layers_needed_by_branches;
-        self.layers_needed_by_leases += other.layers_needed_by_leases;
-        self.layers_not_updated += other.layers_not_updated;
-        self.layers_removed += other.layers_removed;
-
-        self.elapsed += other.elapsed;
-
-        #[cfg(feature = "testing")]
-        {
-            let mut other = other;
-            self.doomed_layers.append(&mut other.doomed_layers);
-        }
-    }
-}
diff --git a/libs/postgres_ffi/Cargo.toml b/libs/postgres_ffi/Cargo.toml
index ef17833a48..e1f5443cbe 100644
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -15,6 +15,7 @@ memoffset.workspace = true
 thiserror.workspace = true
 serde.workspace = true
 utils.workspace = true
+tracing.workspace = true
 
 [dev-dependencies]
 env_logger.workspace = true
diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs
index 0d46ed6aac..6b219488ac 100644
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -217,6 +217,7 @@ macro_rules! enum_pgversion {
 
 pub mod pg_constants;
 pub mod relfile_utils;
+pub mod walrecord;
 
 // Export some widely used datatypes that are unlikely to change across Postgres versions
 pub use v14::bindings::RepOriginId;
diff --git a/pageserver/src/walrecord.rs b/libs/postgres_ffi/src/walrecord.rs
similarity index 88%
rename from pageserver/src/walrecord.rs
rename to libs/postgres_ffi/src/walrecord.rs
index dd199e2c55..dedbaef64d 100644
--- a/pageserver/src/walrecord.rs
+++ b/libs/postgres_ffi/src/walrecord.rs
@@ -1,107 +1,144 @@
+//! This module houses types used in decoding of PG WAL
+//! records.
 //!
-//! Functions for parsing WAL records.
-//!
+//! TODO: Generate separate types for each supported PG version
 
-use anyhow::Result;
+use crate::pg_constants;
+use crate::XLogRecord;
+use crate::{
+    BlockNumber, MultiXactId, MultiXactOffset, MultiXactStatus, Oid, RepOriginId, TimestampTz,
+    TransactionId,
+};
+use crate::{BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD};
 use bytes::{Buf, Bytes};
-use postgres_ffi::dispatch_pgversion;
-use postgres_ffi::pg_constants;
-use postgres_ffi::BLCKSZ;
-use postgres_ffi::{BlockNumber, TimestampTz};
-use postgres_ffi::{MultiXactId, MultiXactOffset, MultiXactStatus, Oid, TransactionId};
-use postgres_ffi::{RepOriginId, XLogRecord, XLOG_SIZE_OF_XLOG_RECORD};
 use serde::{Deserialize, Serialize};
-use tracing::*;
-use utils::{bin_ser::DeserializeError, lsn::Lsn};
+use utils::bin_ser::DeserializeError;
+use utils::lsn::Lsn;
 
-/// Each update to a page is represented by a NeonWalRecord. It can be a wrapper
-/// around a PostgreSQL WAL record, or a custom neon-specific "record".
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub enum NeonWalRecord {
-    /// Native PostgreSQL WAL record
-    Postgres { will_init: bool, rec: Bytes },
-
-    /// Clear bits in heap visibility map. ('flags' is bitmap of bits to clear)
-    ClearVisibilityMapFlags {
-        new_heap_blkno: Option<u32>,
-        old_heap_blkno: Option<u32>,
-        flags: u8,
-    },
-    /// Mark transaction IDs as committed on a CLOG page
-    ClogSetCommitted {
-        xids: Vec<TransactionId>,
-        timestamp: TimestampTz,
-    },
-    /// Mark transaction IDs as aborted on a CLOG page
-    ClogSetAborted { xids: Vec<TransactionId> },
-    /// Extend multixact offsets SLRU
-    MultixactOffsetCreate {
-        mid: MultiXactId,
-        moff: MultiXactOffset,
-    },
-    /// Extend multixact members SLRU.
-    MultixactMembersCreate {
-        moff: MultiXactOffset,
-        members: Vec<MultiXactMember>,
-    },
-    /// Update the map of AUX files, either writing or dropping an entry
-    AuxFile {
-        file_path: String,
-        content: Option<Bytes>,
-    },
-
-    /// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
-    #[cfg(test)]
-    Test {
-        /// Append a string to the image.
-        append: String,
-        /// Clear the image before appending.
-        clear: bool,
-        /// Treat this record as an init record. `clear` should be set to true if this field is set
-        /// to true. This record does not need the history WALs to reconstruct. See [`NeonWalRecord::will_init`] and
-        /// its references in `timeline.rs`.
-        will_init: bool,
-    },
+#[repr(C)]
+#[derive(Debug)]
+pub struct XlMultiXactCreate {
+    pub mid: MultiXactId,
+    /* new MultiXact's ID */
+    pub moff: MultiXactOffset,
+    /* its starting offset in members file */
+    pub nmembers: u32,
+    /* number of member XIDs */
+    pub members: Vec<MultiXactMember>,
 }
 
-impl NeonWalRecord {
-    /// Does replaying this WAL record initialize the page from scratch, or does
-    /// it need to be applied over the previous image of the page?
-    pub fn will_init(&self) -> bool {
-        // If you change this function, you'll also need to change ValueBytes::will_init
-        match self {
-            NeonWalRecord::Postgres { will_init, rec: _ } => *will_init,
-            #[cfg(test)]
-            NeonWalRecord::Test { will_init, .. } => *will_init,
-            // None of the special neon record types currently initialize the page
-            _ => false,
+impl XlMultiXactCreate {
+    pub fn decode(buf: &mut Bytes) -> XlMultiXactCreate {
+        let mid = buf.get_u32_le();
+        let moff = buf.get_u32_le();
+        let nmembers = buf.get_u32_le();
+        let mut members = Vec::new();
+        for _ in 0..nmembers {
+            members.push(MultiXactMember::decode(buf));
+        }
+        XlMultiXactCreate {
+            mid,
+            moff,
+            nmembers,
+            members,
         }
     }
+}
 
-    #[cfg(test)]
-    pub(crate) fn wal_append(s: impl AsRef<str>) -> Self {
-        Self::Test {
-            append: s.as_ref().to_string(),
-            clear: false,
-            will_init: false,
+#[repr(C)]
+#[derive(Debug)]
+pub struct XlMultiXactTruncate {
+    pub oldest_multi_db: Oid,
+    /* to-be-truncated range of multixact offsets */
+    pub start_trunc_off: MultiXactId,
+    /* just for completeness' sake */
+    pub end_trunc_off: MultiXactId,
+
+    /* to-be-truncated range of multixact members */
+    pub start_trunc_memb: MultiXactOffset,
+    pub end_trunc_memb: MultiXactOffset,
+}
+
+impl XlMultiXactTruncate {
+    pub fn decode(buf: &mut Bytes) -> XlMultiXactTruncate {
+        XlMultiXactTruncate {
+            oldest_multi_db: buf.get_u32_le(),
+            start_trunc_off: buf.get_u32_le(),
+            end_trunc_off: buf.get_u32_le(),
+            start_trunc_memb: buf.get_u32_le(),
+            end_trunc_memb: buf.get_u32_le(),
         }
     }
+}
 
-    #[cfg(test)]
-    pub(crate) fn wal_clear() -> Self {
-        Self::Test {
-            append: "".to_string(),
-            clear: true,
-            will_init: false,
+#[repr(C)]
+#[derive(Debug)]
+pub struct XlRelmapUpdate {
+    pub dbid: Oid,   /* database ID, or 0 for shared map */
+    pub tsid: Oid,   /* database's tablespace, or pg_global */
+    pub nbytes: i32, /* size of relmap data */
+}
+
+impl XlRelmapUpdate {
+    pub fn decode(buf: &mut Bytes) -> XlRelmapUpdate {
+        XlRelmapUpdate {
+            dbid: buf.get_u32_le(),
+            tsid: buf.get_u32_le(),
+            nbytes: buf.get_i32_le(),
         }
     }
+}
 
-    #[cfg(test)]
-    pub(crate) fn wal_init() -> Self {
-        Self::Test {
-            append: "".to_string(),
-            clear: true,
-            will_init: true,
+#[repr(C)]
+#[derive(Debug)]
+pub struct XlReploriginDrop {
+    pub node_id: RepOriginId,
+}
+
+impl XlReploriginDrop {
+    pub fn decode(buf: &mut Bytes) -> XlReploriginDrop {
+        XlReploriginDrop {
+            node_id: buf.get_u16_le(),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct XlReploriginSet {
+    pub remote_lsn: Lsn,
+    pub node_id: RepOriginId,
+}
+
+impl XlReploriginSet {
+    pub fn decode(buf: &mut Bytes) -> XlReploriginSet {
+        XlReploriginSet {
+            remote_lsn: Lsn(buf.get_u64_le()),
+            node_id: buf.get_u16_le(),
+        }
+    }
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy)]
+pub struct RelFileNode {
+    pub spcnode: Oid, /* tablespace */
+    pub dbnode: Oid,  /* database */
+    pub relnode: Oid, /* relation */
+}
+
+#[repr(C)]
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct MultiXactMember {
+    pub xid: TransactionId,
+    pub status: MultiXactStatus,
+}
+
+impl MultiXactMember {
+    pub fn decode(buf: &mut Bytes) -> MultiXactMember {
+        MultiXactMember {
+            xid: buf.get_u32_le(),
+            status: buf.get_u32_le(),
         }
     }
 }
@@ -164,17 +201,17 @@ impl DecodedWALRecord {
     /// Check if this WAL record represents a legacy "copy" database creation, which populates new relations
     /// by reading other existing relations' data blocks.  This is more complex to apply than new-style database
     /// creations which simply include all the desired blocks in the WAL, so we need a helper function to detect this case.
-    pub(crate) fn is_dbase_create_copy(&self, pg_version: u32) -> bool {
+    pub fn is_dbase_create_copy(&self, pg_version: u32) -> bool {
         if self.xl_rmid == pg_constants::RM_DBASE_ID {
             let info = self.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
             match pg_version {
                 14 => {
                     // Postgres 14 database creations are always the legacy kind
-                    info == postgres_ffi::v14::bindings::XLOG_DBASE_CREATE
+                    info == crate::v14::bindings::XLOG_DBASE_CREATE
                 }
-                15 => info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
-                16 => info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
-                17 => info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
+                15 => info == crate::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
+                16 => info == crate::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
+                17 => info == crate::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
                 _ => {
                     panic!("Unsupported postgres version {pg_version}")
                 }
@@ -185,35 +222,294 @@ impl DecodedWALRecord {
     }
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Copy)]
-pub struct RelFileNode {
-    pub spcnode: Oid, /* tablespace */
-    pub dbnode: Oid,  /* database */
-    pub relnode: Oid, /* relation */
-}
+/// Main routine to decode a WAL record and figure out which blocks are modified
+//
+// See xlogrecord.h for details
+// The overall layout of an XLOG record is:
+//		Fixed-size header (XLogRecord struct)
+//      XLogRecordBlockHeader struct
+//          If pg_constants::BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows
+//	           If pg_constants::BKPIMAGE_HAS_HOLE and pg_constants::BKPIMAGE_IS_COMPRESSED, an
+//	           XLogRecordBlockCompressHeader struct follows.
+//          If pg_constants::BKPBLOCK_SAME_REL is not set, a RelFileNode follows
+//          BlockNumber follows
+//      XLogRecordBlockHeader struct
+//      ...
+//      XLogRecordDataHeader[Short|Long] struct
+//      block data
+//      block data
+//      ...
+//      main data
+//
+//
+// For performance reasons, the caller provides the DecodedWALRecord struct and the function just fills it in.
+// It would be more natural for this function to return a DecodedWALRecord as return value,
+// but reusing the caller-supplied struct avoids an allocation.
+// This code is in the hot path for digesting incoming WAL, and is very performance sensitive.
+//
+pub fn decode_wal_record(
+    record: Bytes,
+    decoded: &mut DecodedWALRecord,
+    pg_version: u32,
+) -> anyhow::Result<()> {
+    let mut rnode_spcnode: u32 = 0;
+    let mut rnode_dbnode: u32 = 0;
+    let mut rnode_relnode: u32 = 0;
+    let mut got_rnode = false;
+    let mut origin_id: u16 = 0;
 
-#[repr(C)]
-#[derive(Debug)]
-pub struct XlRelmapUpdate {
-    pub dbid: Oid,   /* database ID, or 0 for shared map */
-    pub tsid: Oid,   /* database's tablespace, or pg_global */
-    pub nbytes: i32, /* size of relmap data */
-}
+    let mut buf = record.clone();
 
-impl XlRelmapUpdate {
-    pub fn decode(buf: &mut Bytes) -> XlRelmapUpdate {
-        XlRelmapUpdate {
-            dbid: buf.get_u32_le(),
-            tsid: buf.get_u32_le(),
-            nbytes: buf.get_i32_le(),
+    // 1. Parse XLogRecord struct
+
+    // FIXME: assume little-endian here
+    let xlogrec = XLogRecord::from_bytes(&mut buf)?;
+
+    tracing::trace!(
+        "decode_wal_record xl_rmid = {} xl_info = {}",
+        xlogrec.xl_rmid,
+        xlogrec.xl_info
+    );
+
+    let remaining: usize = xlogrec.xl_tot_len as usize - XLOG_SIZE_OF_XLOG_RECORD;
+
+    if buf.remaining() != remaining {
+        //TODO error
+    }
+
+    let mut max_block_id = 0;
+    let mut blocks_total_len: u32 = 0;
+    let mut main_data_len = 0;
+    let mut datatotal: u32 = 0;
+    decoded.blocks.clear();
+
+    // 2. Decode the headers.
+    // XLogRecordBlockHeaders if any,
+    // XLogRecordDataHeader[Short|Long]
+    while buf.remaining() > datatotal as usize {
+        let block_id = buf.get_u8();
+
+        match block_id {
+            pg_constants::XLR_BLOCK_ID_DATA_SHORT => {
+                /* XLogRecordDataHeaderShort */
+                main_data_len = buf.get_u8() as u32;
+                datatotal += main_data_len;
+            }
+
+            pg_constants::XLR_BLOCK_ID_DATA_LONG => {
+                /* XLogRecordDataHeaderLong */
+                main_data_len = buf.get_u32_le();
+                datatotal += main_data_len;
+            }
+
+            pg_constants::XLR_BLOCK_ID_ORIGIN => {
+                // RepOriginId is uint16
+                origin_id = buf.get_u16_le();
+            }
+
+            pg_constants::XLR_BLOCK_ID_TOPLEVEL_XID => {
+                // TransactionId is uint32
+                buf.advance(4);
+            }
+
+            0..=pg_constants::XLR_MAX_BLOCK_ID => {
+                /* XLogRecordBlockHeader */
+                let mut blk = DecodedBkpBlock::new();
+
+                if block_id <= max_block_id {
+                    // TODO
+                    //report_invalid_record(state,
+                    //			  "out-of-order block_id %u at %X/%X",
+                    //			  block_id,
+                    //			  (uint32) (state->ReadRecPtr >> 32),
+                    //			  (uint32) state->ReadRecPtr);
+                    //    goto err;
+                }
+                max_block_id = block_id;
+
+                let fork_flags: u8 = buf.get_u8();
+                blk.forknum = fork_flags & pg_constants::BKPBLOCK_FORK_MASK;
+                blk.flags = fork_flags;
+                blk.has_image = (fork_flags & pg_constants::BKPBLOCK_HAS_IMAGE) != 0;
+                blk.has_data = (fork_flags & pg_constants::BKPBLOCK_HAS_DATA) != 0;
+                blk.will_init = (fork_flags & pg_constants::BKPBLOCK_WILL_INIT) != 0;
+                blk.data_len = buf.get_u16_le();
+
+                /* TODO cross-check that the HAS_DATA flag is set iff data_length > 0 */
+
+                datatotal += blk.data_len as u32;
+                blocks_total_len += blk.data_len as u32;
+
+                if blk.has_image {
+                    blk.bimg_len = buf.get_u16_le();
+                    blk.hole_offset = buf.get_u16_le();
+                    blk.bimg_info = buf.get_u8();
+
+                    blk.apply_image = dispatch_pgversion!(
+                        pg_version,
+                        (blk.bimg_info & pgv::bindings::BKPIMAGE_APPLY) != 0
+                    );
+
+                    let blk_img_is_compressed =
+                        crate::bkpimage_is_compressed(blk.bimg_info, pg_version);
+
+                    if blk_img_is_compressed {
+                        tracing::debug!("compressed block image , pg_version = {}", pg_version);
+                    }
+
+                    if blk_img_is_compressed {
+                        if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0 {
+                            blk.hole_length = buf.get_u16_le();
+                        } else {
+                            blk.hole_length = 0;
+                        }
+                    } else {
+                        blk.hole_length = BLCKSZ - blk.bimg_len;
+                    }
+                    datatotal += blk.bimg_len as u32;
+                    blocks_total_len += blk.bimg_len as u32;
+
+                    /*
+                     * cross-check that hole_offset > 0, hole_length > 0 and
+                     * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
+                     */
+                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0
+                        && (blk.hole_offset == 0 || blk.hole_length == 0 || blk.bimg_len == BLCKSZ)
+                    {
+                        // TODO
+                        /*
+                        report_invalid_record(state,
+                                      "pg_constants::BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
+                                      (unsigned int) blk->hole_offset,
+                                      (unsigned int) blk->hole_length,
+                                      (unsigned int) blk->bimg_len,
+                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                        goto err;
+                                     */
+                    }
+
+                    /*
+                     * cross-check that hole_offset == 0 and hole_length == 0 if
+                     * the HAS_HOLE flag is not set.
+                     */
+                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
+                        && (blk.hole_offset != 0 || blk.hole_length != 0)
+                    {
+                        // TODO
+                        /*
+                        report_invalid_record(state,
+                                      "pg_constants::BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
+                                      (unsigned int) blk->hole_offset,
+                                      (unsigned int) blk->hole_length,
+                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                        goto err;
+                                     */
+                    }
+
+                    /*
+                     * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED
+                     * flag is set.
+                     */
+                    if !blk_img_is_compressed && blk.bimg_len == BLCKSZ {
+                        // TODO
+                        /*
+                        report_invalid_record(state,
+                                      "pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
+                                      (unsigned int) blk->bimg_len,
+                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                        goto err;
+                                     */
+                    }
+
+                    /*
+                     * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor
+                     * IS_COMPRESSED flag is set.
+                     */
+                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
+                        && !blk_img_is_compressed
+                        && blk.bimg_len != BLCKSZ
+                    {
+                        // TODO
+                        /*
+                        report_invalid_record(state,
+                                      "neither pg_constants::BKPIMAGE_HAS_HOLE nor pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
+                                      (unsigned int) blk->data_len,
+                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                        goto err;
+                                     */
+                    }
+                }
+                if fork_flags & pg_constants::BKPBLOCK_SAME_REL == 0 {
+                    rnode_spcnode = buf.get_u32_le();
+                    rnode_dbnode = buf.get_u32_le();
+                    rnode_relnode = buf.get_u32_le();
+                    got_rnode = true;
+                } else if !got_rnode {
+                    // TODO
+                    /*
+                    report_invalid_record(state,
+                                    "pg_constants::BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
+                                    (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
+                    goto err;           */
+                }
+
+                blk.rnode_spcnode = rnode_spcnode;
+                blk.rnode_dbnode = rnode_dbnode;
+                blk.rnode_relnode = rnode_relnode;
+
+                blk.blkno = buf.get_u32_le();
+                tracing::trace!(
+                    "this record affects {}/{}/{} blk {}",
+                    rnode_spcnode,
+                    rnode_dbnode,
+                    rnode_relnode,
+                    blk.blkno
+                );
+
+                decoded.blocks.push(blk);
+            }
+
+            _ => {
+                // TODO: invalid block_id
+            }
         }
     }
+
+    // 3. Decode blocks.
+    let mut ptr = record.len() - buf.remaining();
+    for blk in decoded.blocks.iter_mut() {
+        if blk.has_image {
+            blk.bimg_offset = ptr as u32;
+            ptr += blk.bimg_len as usize;
+        }
+        if blk.has_data {
+            ptr += blk.data_len as usize;
+        }
+    }
+    // We don't need them, so just skip blocks_total_len bytes
+    buf.advance(blocks_total_len as usize);
+    assert_eq!(ptr, record.len() - buf.remaining());
+
+    let main_data_offset = (xlogrec.xl_tot_len - main_data_len) as usize;
+
+    // 4. Decode main_data
+    if main_data_len > 0 {
+        assert_eq!(buf.remaining(), main_data_len as usize);
+    }
+
+    decoded.xl_xid = xlogrec.xl_xid;
+    decoded.xl_info = xlogrec.xl_info;
+    decoded.xl_rmid = xlogrec.xl_rmid;
+    decoded.record = record;
+    decoded.origin_id = origin_id;
+    decoded.main_data_offset = main_data_offset;
+
+    Ok(())
 }
 
 pub mod v14 {
+    use crate::{OffsetNumber, TransactionId};
     use bytes::{Buf, Bytes};
-    use postgres_ffi::{OffsetNumber, TransactionId};
 
     #[repr(C)]
     #[derive(Debug)]
@@ -383,8 +679,8 @@ pub mod v15 {
 
 pub mod v16 {
     pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};
+    use crate::{OffsetNumber, TransactionId};
     use bytes::{Buf, Bytes};
-    use postgres_ffi::{OffsetNumber, TransactionId};
 
     pub struct XlHeapDelete {
         pub xmax: TransactionId,
@@ -450,8 +746,8 @@ pub mod v16 {
 
     /* Since PG16, we have the Neon RMGR (RM_NEON_ID) to manage Neon-flavored WAL. */
     pub mod rm_neon {
+        use crate::{OffsetNumber, TransactionId};
         use bytes::{Buf, Bytes};
-        use postgres_ffi::{OffsetNumber, TransactionId};
 
         #[repr(C)]
         #[derive(Debug)]
@@ -563,8 +859,8 @@ pub mod v16 {
 
 pub mod v17 {
     pub use super::v14::XlHeapLockUpdated;
+    pub use crate::{TimeLineID, TimestampTz};
     use bytes::{Buf, Bytes};
-    pub use postgres_ffi::{TimeLineID, TimestampTz};
 
     pub use super::v16::rm_neon;
     pub use super::v16::{
@@ -742,7 +1038,7 @@ impl XlXactParsedRecord {
                 let spcnode = buf.get_u32_le();
                 let dbnode = buf.get_u32_le();
                 let relnode = buf.get_u32_le();
-                trace!(
+                tracing::trace!(
                     "XLOG_XACT_COMMIT relfilenode {}/{}/{}",
                     spcnode,
                     dbnode,
@@ -756,9 +1052,9 @@ impl XlXactParsedRecord {
             }
         }
 
-        if xinfo & postgres_ffi::v15::bindings::XACT_XINFO_HAS_DROPPED_STATS != 0 {
+        if xinfo & crate::v15::bindings::XACT_XINFO_HAS_DROPPED_STATS != 0 {
             let nitems = buf.get_i32_le();
-            debug!(
+            tracing::debug!(
                 "XLOG_XACT_COMMIT-XACT_XINFO_HAS_DROPPED_STAT nitems {}",
                 nitems
             );
@@ -778,7 +1074,7 @@ impl XlXactParsedRecord {
 
         if xinfo & pg_constants::XACT_XINFO_HAS_TWOPHASE != 0 {
             xid = buf.get_u32_le();
-            debug!("XLOG_XACT_COMMIT-XACT_XINFO_HAS_TWOPHASE xid {}", xid);
+            tracing::debug!("XLOG_XACT_COMMIT-XACT_XINFO_HAS_TWOPHASE xid {}", xid);
         }
 
         let origin_lsn = if xinfo & pg_constants::XACT_XINFO_HAS_ORIGIN != 0 {
@@ -822,78 +1118,6 @@ impl XlClogTruncate {
     }
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct MultiXactMember {
-    pub xid: TransactionId,
-    pub status: MultiXactStatus,
-}
-
-impl MultiXactMember {
-    pub fn decode(buf: &mut Bytes) -> MultiXactMember {
-        MultiXactMember {
-            xid: buf.get_u32_le(),
-            status: buf.get_u32_le(),
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug)]
-pub struct XlMultiXactCreate {
-    pub mid: MultiXactId,
-    /* new MultiXact's ID */
-    pub moff: MultiXactOffset,
-    /* its starting offset in members file */
-    pub nmembers: u32,
-    /* number of member XIDs */
-    pub members: Vec<MultiXactMember>,
-}
-
-impl XlMultiXactCreate {
-    pub fn decode(buf: &mut Bytes) -> XlMultiXactCreate {
-        let mid = buf.get_u32_le();
-        let moff = buf.get_u32_le();
-        let nmembers = buf.get_u32_le();
-        let mut members = Vec::new();
-        for _ in 0..nmembers {
-            members.push(MultiXactMember::decode(buf));
-        }
-        XlMultiXactCreate {
-            mid,
-            moff,
-            nmembers,
-            members,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug)]
-pub struct XlMultiXactTruncate {
-    pub oldest_multi_db: Oid,
-    /* to-be-truncated range of multixact offsets */
-    pub start_trunc_off: MultiXactId,
-    /* just for completeness' sake */
-    pub end_trunc_off: MultiXactId,
-
-    /* to-be-truncated range of multixact members */
-    pub start_trunc_memb: MultiXactOffset,
-    pub end_trunc_memb: MultiXactOffset,
-}
-
-impl XlMultiXactTruncate {
-    pub fn decode(buf: &mut Bytes) -> XlMultiXactTruncate {
-        XlMultiXactTruncate {
-            oldest_multi_db: buf.get_u32_le(),
-            start_trunc_off: buf.get_u32_le(),
-            end_trunc_off: buf.get_u32_le(),
-            start_trunc_memb: buf.get_u32_le(),
-            end_trunc_memb: buf.get_u32_le(),
-        }
-    }
-}
-
 #[repr(C)]
 #[derive(Debug)]
 pub struct XlLogicalMessage {
@@ -950,337 +1174,7 @@ impl XlRunningXacts {
     }
 }
 
-#[repr(C)]
-#[derive(Debug)]
-pub struct XlReploriginDrop {
-    pub node_id: RepOriginId,
-}
-
-impl XlReploriginDrop {
-    pub fn decode(buf: &mut Bytes) -> XlReploriginDrop {
-        XlReploriginDrop {
-            node_id: buf.get_u16_le(),
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug)]
-pub struct XlReploriginSet {
-    pub remote_lsn: Lsn,
-    pub node_id: RepOriginId,
-}
-
-impl XlReploriginSet {
-    pub fn decode(buf: &mut Bytes) -> XlReploriginSet {
-        XlReploriginSet {
-            remote_lsn: Lsn(buf.get_u64_le()),
-            node_id: buf.get_u16_le(),
-        }
-    }
-}
-
-/// Main routine to decode a WAL record and figure out which blocks are modified
-//
-// See xlogrecord.h for details
-// The overall layout of an XLOG record is:
-//		Fixed-size header (XLogRecord struct)
-//      XLogRecordBlockHeader struct
-//          If pg_constants::BKPBLOCK_HAS_IMAGE, an XLogRecordBlockImageHeader struct follows
-//	           If pg_constants::BKPIMAGE_HAS_HOLE and pg_constants::BKPIMAGE_IS_COMPRESSED, an
-//	           XLogRecordBlockCompressHeader struct follows.
-//          If pg_constants::BKPBLOCK_SAME_REL is not set, a RelFileNode follows
-//          BlockNumber follows
-//      XLogRecordBlockHeader struct
-//      ...
-//      XLogRecordDataHeader[Short|Long] struct
-//      block data
-//      block data
-//      ...
-//      main data
-//
-//
-// For performance reasons, the caller provides the DecodedWALRecord struct and the function just fills it in.
-// It would be more natural for this function to return a DecodedWALRecord as return value,
-// but reusing the caller-supplied struct avoids an allocation.
-// This code is in the hot path for digesting incoming WAL, and is very performance sensitive.
-//
-pub fn decode_wal_record(
-    record: Bytes,
-    decoded: &mut DecodedWALRecord,
-    pg_version: u32,
-) -> Result<()> {
-    let mut rnode_spcnode: u32 = 0;
-    let mut rnode_dbnode: u32 = 0;
-    let mut rnode_relnode: u32 = 0;
-    let mut got_rnode = false;
-    let mut origin_id: u16 = 0;
-
-    let mut buf = record.clone();
-
-    // 1. Parse XLogRecord struct
-
-    // FIXME: assume little-endian here
-    let xlogrec = XLogRecord::from_bytes(&mut buf)?;
-
-    trace!(
-        "decode_wal_record xl_rmid = {} xl_info = {}",
-        xlogrec.xl_rmid,
-        xlogrec.xl_info
-    );
-
-    let remaining: usize = xlogrec.xl_tot_len as usize - XLOG_SIZE_OF_XLOG_RECORD;
-
-    if buf.remaining() != remaining {
-        //TODO error
-    }
-
-    let mut max_block_id = 0;
-    let mut blocks_total_len: u32 = 0;
-    let mut main_data_len = 0;
-    let mut datatotal: u32 = 0;
-    decoded.blocks.clear();
-
-    // 2. Decode the headers.
-    // XLogRecordBlockHeaders if any,
-    // XLogRecordDataHeader[Short|Long]
-    while buf.remaining() > datatotal as usize {
-        let block_id = buf.get_u8();
-
-        match block_id {
-            pg_constants::XLR_BLOCK_ID_DATA_SHORT => {
-                /* XLogRecordDataHeaderShort */
-                main_data_len = buf.get_u8() as u32;
-                datatotal += main_data_len;
-            }
-
-            pg_constants::XLR_BLOCK_ID_DATA_LONG => {
-                /* XLogRecordDataHeaderLong */
-                main_data_len = buf.get_u32_le();
-                datatotal += main_data_len;
-            }
-
-            pg_constants::XLR_BLOCK_ID_ORIGIN => {
-                // RepOriginId is uint16
-                origin_id = buf.get_u16_le();
-            }
-
-            pg_constants::XLR_BLOCK_ID_TOPLEVEL_XID => {
-                // TransactionId is uint32
-                buf.advance(4);
-            }
-
-            0..=pg_constants::XLR_MAX_BLOCK_ID => {
-                /* XLogRecordBlockHeader */
-                let mut blk = DecodedBkpBlock::new();
-
-                if block_id <= max_block_id {
-                    // TODO
-                    //report_invalid_record(state,
-                    //			  "out-of-order block_id %u at %X/%X",
-                    //			  block_id,
-                    //			  (uint32) (state->ReadRecPtr >> 32),
-                    //			  (uint32) state->ReadRecPtr);
-                    //    goto err;
-                }
-                max_block_id = block_id;
-
-                let fork_flags: u8 = buf.get_u8();
-                blk.forknum = fork_flags & pg_constants::BKPBLOCK_FORK_MASK;
-                blk.flags = fork_flags;
-                blk.has_image = (fork_flags & pg_constants::BKPBLOCK_HAS_IMAGE) != 0;
-                blk.has_data = (fork_flags & pg_constants::BKPBLOCK_HAS_DATA) != 0;
-                blk.will_init = (fork_flags & pg_constants::BKPBLOCK_WILL_INIT) != 0;
-                blk.data_len = buf.get_u16_le();
-
-                /* TODO cross-check that the HAS_DATA flag is set iff data_length > 0 */
-
-                datatotal += blk.data_len as u32;
-                blocks_total_len += blk.data_len as u32;
-
-                if blk.has_image {
-                    blk.bimg_len = buf.get_u16_le();
-                    blk.hole_offset = buf.get_u16_le();
-                    blk.bimg_info = buf.get_u8();
-
-                    blk.apply_image = dispatch_pgversion!(
-                        pg_version,
-                        (blk.bimg_info & pgv::bindings::BKPIMAGE_APPLY) != 0
-                    );
-
-                    let blk_img_is_compressed =
-                        postgres_ffi::bkpimage_is_compressed(blk.bimg_info, pg_version);
-
-                    if blk_img_is_compressed {
-                        debug!("compressed block image , pg_version = {}", pg_version);
-                    }
-
-                    if blk_img_is_compressed {
-                        if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0 {
-                            blk.hole_length = buf.get_u16_le();
-                        } else {
-                            blk.hole_length = 0;
-                        }
-                    } else {
-                        blk.hole_length = BLCKSZ - blk.bimg_len;
-                    }
-                    datatotal += blk.bimg_len as u32;
-                    blocks_total_len += blk.bimg_len as u32;
-
-                    /*
-                     * cross-check that hole_offset > 0, hole_length > 0 and
-                     * bimg_len < BLCKSZ if the HAS_HOLE flag is set.
-                     */
-                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE != 0
-                        && (blk.hole_offset == 0 || blk.hole_length == 0 || blk.bimg_len == BLCKSZ)
-                    {
-                        // TODO
-                        /*
-                        report_invalid_record(state,
-                                      "pg_constants::BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
-                                      (unsigned int) blk->hole_offset,
-                                      (unsigned int) blk->hole_length,
-                                      (unsigned int) blk->bimg_len,
-                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
-                        goto err;
-                                     */
-                    }
-
-                    /*
-                     * cross-check that hole_offset == 0 and hole_length == 0 if
-                     * the HAS_HOLE flag is not set.
-                     */
-                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
-                        && (blk.hole_offset != 0 || blk.hole_length != 0)
-                    {
-                        // TODO
-                        /*
-                        report_invalid_record(state,
-                                      "pg_constants::BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
-                                      (unsigned int) blk->hole_offset,
-                                      (unsigned int) blk->hole_length,
-                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
-                        goto err;
-                                     */
-                    }
-
-                    /*
-                     * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED
-                     * flag is set.
-                     */
-                    if !blk_img_is_compressed && blk.bimg_len == BLCKSZ {
-                        // TODO
-                        /*
-                        report_invalid_record(state,
-                                      "pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
-                                      (unsigned int) blk->bimg_len,
-                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
-                        goto err;
-                                     */
-                    }
-
-                    /*
-                     * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor
-                     * IS_COMPRESSED flag is set.
-                     */
-                    if blk.bimg_info & pg_constants::BKPIMAGE_HAS_HOLE == 0
-                        && !blk_img_is_compressed
-                        && blk.bimg_len != BLCKSZ
-                    {
-                        // TODO
-                        /*
-                        report_invalid_record(state,
-                                      "neither pg_constants::BKPIMAGE_HAS_HOLE nor pg_constants::BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
-                                      (unsigned int) blk->data_len,
-                                      (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
-                        goto err;
-                                     */
-                    }
-                }
-                if fork_flags & pg_constants::BKPBLOCK_SAME_REL == 0 {
-                    rnode_spcnode = buf.get_u32_le();
-                    rnode_dbnode = buf.get_u32_le();
-                    rnode_relnode = buf.get_u32_le();
-                    got_rnode = true;
-                } else if !got_rnode {
-                    // TODO
-                    /*
-                    report_invalid_record(state,
-                                    "pg_constants::BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
-                                    (uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
-                    goto err;           */
-                }
-
-                blk.rnode_spcnode = rnode_spcnode;
-                blk.rnode_dbnode = rnode_dbnode;
-                blk.rnode_relnode = rnode_relnode;
-
-                blk.blkno = buf.get_u32_le();
-                trace!(
-                    "this record affects {}/{}/{} blk {}",
-                    rnode_spcnode,
-                    rnode_dbnode,
-                    rnode_relnode,
-                    blk.blkno
-                );
-
-                decoded.blocks.push(blk);
-            }
-
-            _ => {
-                // TODO: invalid block_id
-            }
-        }
-    }
-
-    // 3. Decode blocks.
-    let mut ptr = record.len() - buf.remaining();
-    for blk in decoded.blocks.iter_mut() {
-        if blk.has_image {
-            blk.bimg_offset = ptr as u32;
-            ptr += blk.bimg_len as usize;
-        }
-        if blk.has_data {
-            ptr += blk.data_len as usize;
-        }
-    }
-    // We don't need them, so just skip blocks_total_len bytes
-    buf.advance(blocks_total_len as usize);
-    assert_eq!(ptr, record.len() - buf.remaining());
-
-    let main_data_offset = (xlogrec.xl_tot_len - main_data_len) as usize;
-
-    // 4. Decode main_data
-    if main_data_len > 0 {
-        assert_eq!(buf.remaining(), main_data_len as usize);
-    }
-
-    decoded.xl_xid = xlogrec.xl_xid;
-    decoded.xl_info = xlogrec.xl_info;
-    decoded.xl_rmid = xlogrec.xl_rmid;
-    decoded.record = record;
-    decoded.origin_id = origin_id;
-    decoded.main_data_offset = main_data_offset;
-
-    Ok(())
-}
-
-///
-/// Build a human-readable string to describe a WAL record
-///
-/// For debugging purposes
-pub fn describe_wal_record(rec: &NeonWalRecord) -> Result<String, DeserializeError> {
-    match rec {
-        NeonWalRecord::Postgres { will_init, rec } => Ok(format!(
-            "will_init: {}, {}",
-            will_init,
-            describe_postgres_wal_record(rec)?
-        )),
-        _ => Ok(format!("{:?}", rec)),
-    }
-}
-
-fn describe_postgres_wal_record(record: &Bytes) -> Result<String, DeserializeError> {
+pub fn describe_postgres_wal_record(record: &Bytes) -> Result<String, DeserializeError> {
     // TODO: It would be nice to use the PostgreSQL rmgrdesc infrastructure for this.
     // Maybe use the postgres wal redo process, the same used for replaying WAL records?
     // Or could we compile the rmgrdesc routines into the dump_layer_file() binary directly,
diff --git a/libs/wal_decoder/Cargo.toml b/libs/wal_decoder/Cargo.toml
new file mode 100644
index 0000000000..3f80f8fcdb
--- /dev/null
+++ b/libs/wal_decoder/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "wal_decoder"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[features]
+testing = []
+
+[dependencies]
+anyhow.workspace = true
+bytes.workspace = true
+pageserver_api.workspace = true
+postgres_ffi.workspace = true
+serde.workspace = true
+tracing.workspace = true
+utils.workspace = true
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
diff --git a/libs/wal_decoder/src/decoder.rs b/libs/wal_decoder/src/decoder.rs
new file mode 100644
index 0000000000..8b13789179
--- /dev/null
+++ b/libs/wal_decoder/src/decoder.rs
@@ -0,0 +1 @@
+
diff --git a/libs/wal_decoder/src/lib.rs b/libs/wal_decoder/src/lib.rs
new file mode 100644
index 0000000000..05349d17c9
--- /dev/null
+++ b/libs/wal_decoder/src/lib.rs
@@ -0,0 +1,2 @@
+pub mod decoder;
+pub mod models;
diff --git a/libs/wal_decoder/src/models.rs b/libs/wal_decoder/src/models.rs
new file mode 100644
index 0000000000..58f8e1b2da
--- /dev/null
+++ b/libs/wal_decoder/src/models.rs
@@ -0,0 +1,167 @@
+//! This module houses types which represent decoded PG WAL records
+//! ready for the pageserver to interpret. They are derived from the original
+//! WAL records, so that each struct corresponds closely to one WAL record of
+//! a specific kind. They contain the same information as the original WAL records,
+//! just decoded into structs and fields for easier access.
+//!
+//! The ingestion code uses these structs to help with parsing the WAL records,
+//! and it splits them into a stream of modifications to the key-value pairs that
+//! are ultimately stored in delta layers.  See also the split-out counterparts in
+//! [`postgres_ffi::walrecord`].
+//!
+//! The pipeline which processes WAL records is not super obvious, so let's follow
+//! the flow of an example XACT_COMMIT Postgres record:
+//!
+//! (Postgres XACT_COMMIT record)
+//! |
+//! |--> pageserver::walingest::WalIngest::decode_xact_record
+//!      |
+//!      |--> ([`XactRecord::Commit`])
+//!           |
+//!           |--> pageserver::walingest::WalIngest::ingest_xact_record
+//!                |
+//!                |--> (NeonWalRecord::ClogSetCommitted)
+//!                     |
+//!                     |--> write to KV store within the pageserver
+
+use bytes::Bytes;
+use pageserver_api::reltag::{RelTag, SlruKind};
+use postgres_ffi::walrecord::{
+    XlMultiXactCreate, XlMultiXactTruncate, XlRelmapUpdate, XlReploriginDrop, XlReploriginSet,
+    XlSmgrTruncate, XlXactParsedRecord,
+};
+use postgres_ffi::{Oid, TransactionId};
+use utils::lsn::Lsn;
+
+pub enum HeapamRecord {
+    ClearVmBits(ClearVmBits),
+}
+
+pub struct ClearVmBits {
+    pub new_heap_blkno: Option<u32>,
+    pub old_heap_blkno: Option<u32>,
+    pub vm_rel: RelTag,
+    pub flags: u8,
+}
+
+pub enum NeonrmgrRecord {
+    ClearVmBits(ClearVmBits),
+}
+
+pub enum SmgrRecord {
+    Create(SmgrCreate),
+    Truncate(XlSmgrTruncate),
+}
+
+pub struct SmgrCreate {
+    pub rel: RelTag,
+}
+
+pub enum DbaseRecord {
+    Create(DbaseCreate),
+    Drop(DbaseDrop),
+}
+
+pub struct DbaseCreate {
+    pub db_id: Oid,
+    pub tablespace_id: Oid,
+    pub src_db_id: Oid,
+    pub src_tablespace_id: Oid,
+}
+
+pub struct DbaseDrop {
+    pub db_id: Oid,
+    pub tablespace_ids: Vec<Oid>,
+}
+
+pub enum ClogRecord {
+    ZeroPage(ClogZeroPage),
+    Truncate(ClogTruncate),
+}
+
+pub struct ClogZeroPage {
+    pub segno: u32,
+    pub rpageno: u32,
+}
+
+pub struct ClogTruncate {
+    pub pageno: u32,
+    pub oldest_xid: TransactionId,
+    pub oldest_xid_db: Oid,
+}
+
+pub enum XactRecord {
+    Commit(XactCommon),
+    Abort(XactCommon),
+    CommitPrepared(XactCommon),
+    AbortPrepared(XactCommon),
+    Prepare(XactPrepare),
+}
+
+pub struct XactCommon {
+    pub parsed: XlXactParsedRecord,
+    pub origin_id: u16,
+    // Fields below are only used for logging
+    pub xl_xid: TransactionId,
+    pub lsn: Lsn,
+}
+
+pub struct XactPrepare {
+    pub xl_xid: TransactionId,
+    pub data: Bytes,
+}
+
+pub enum MultiXactRecord {
+    ZeroPage(MultiXactZeroPage),
+    Create(XlMultiXactCreate),
+    Truncate(XlMultiXactTruncate),
+}
+
+pub struct MultiXactZeroPage {
+    pub slru_kind: SlruKind,
+    pub segno: u32,
+    pub rpageno: u32,
+}
+
+pub enum RelmapRecord {
+    Update(RelmapUpdate),
+}
+
+pub struct RelmapUpdate {
+    pub update: XlRelmapUpdate,
+    pub buf: Bytes,
+}
+
+pub enum XlogRecord {
+    Raw(RawXlogRecord),
+}
+
+pub struct RawXlogRecord {
+    pub info: u8,
+    pub lsn: Lsn,
+    pub buf: Bytes,
+}
+
+pub enum LogicalMessageRecord {
+    Put(PutLogicalMessage),
+    #[cfg(feature = "testing")]
+    Failpoint,
+}
+
+pub struct PutLogicalMessage {
+    pub path: String,
+    pub buf: Bytes,
+}
+
+pub enum StandbyRecord {
+    RunningXacts(StandbyRunningXacts),
+}
+
+pub struct StandbyRunningXacts {
+    pub oldest_running_xid: TransactionId,
+}
+
+pub enum ReploriginRecord {
+    Set(XlReploriginSet),
+    Drop(XlReploriginDrop),
+}
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 2531abc7a1..ecb8fa7491 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -8,7 +8,7 @@ license.workspace = true
 default = []
 # Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
 # which adds some runtime cost to run tests on outage conditions
-testing = ["fail/failpoints", "pageserver_api/testing" ]
+testing = ["fail/failpoints", "pageserver_api/testing", "wal_decoder/testing"]
 
 [dependencies]
 anyhow.workspace = true
@@ -83,6 +83,7 @@ enum-map.workspace = true
 enumset = { workspace = true, features = ["serde"]}
 strum.workspace = true
 strum_macros.workspace = true
+wal_decoder.workspace = true
 
 [target.'cfg(target_os = "linux")'.dependencies]
 procfs.workspace = true
diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs
index d98b23acce..0a1ad9cd6b 100644
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -8,13 +8,12 @@ use pageserver::{
     context::{DownloadBehavior, RequestContext},
     l0_flush::{L0FlushConfig, L0FlushGlobalState},
     page_cache,
-    repository::Value,
     task_mgr::TaskKind,
     tenant::storage_layer::inmemory_layer::SerializedBatch,
     tenant::storage_layer::InMemoryLayer,
     virtual_file,
 };
-use pageserver_api::{key::Key, shard::TenantShardId};
+use pageserver_api::{key::Key, shard::TenantShardId, value::Value};
 use utils::{
     bin_ser::BeSer,
     id::{TenantId, TimelineId},
diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs
index 1353e79f7c..5c5b52db44 100644
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -1,9 +1,9 @@
 use criterion::measurement::WallTime;
 use pageserver::keyspace::{KeyPartitioning, KeySpace};
-use pageserver::repository::Key;
 use pageserver::tenant::layer_map::LayerMap;
 use pageserver::tenant::storage_layer::LayerName;
 use pageserver::tenant::storage_layer::PersistentLayerDesc;
+use pageserver_api::key::Key;
 use pageserver_api::shard::TenantShardId;
 use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use std::cmp::{max, min};
diff --git a/pageserver/benches/bench_walredo.rs b/pageserver/benches/bench_walredo.rs
index 45936cb3fa..d3551b56e1 100644
--- a/pageserver/benches/bench_walredo.rs
+++ b/pageserver/benches/bench_walredo.rs
@@ -60,7 +60,8 @@ use anyhow::Context;
 use bytes::{Buf, Bytes};
 use criterion::{BenchmarkId, Criterion};
 use once_cell::sync::Lazy;
-use pageserver::{config::PageServerConf, walrecord::NeonWalRecord, walredo::PostgresRedoManager};
+use pageserver::{config::PageServerConf, walredo::PostgresRedoManager};
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::{key::Key, shard::TenantShardId};
 use std::{
     future::Future,
diff --git a/pageserver/ctl/src/draw_timeline_dir.rs b/pageserver/ctl/src/draw_timeline_dir.rs
index bc939f9688..177e65ef79 100644
--- a/pageserver/ctl/src/draw_timeline_dir.rs
+++ b/pageserver/ctl/src/draw_timeline_dir.rs
@@ -51,7 +51,7 @@
 //!
 
 use anyhow::{Context, Result};
-use pageserver::repository::Key;
+use pageserver_api::key::Key;
 use std::cmp::Ordering;
 use std::io::{self, BufRead};
 use std::path::PathBuf;
diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs
index 7dd2a5d05c..451d2a1d69 100644
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -14,12 +14,12 @@ use std::ops::Range;
 use std::{fs, str};
 
 use pageserver::page_cache::{self, PAGE_SZ};
-use pageserver::repository::{Key, KEY_SIZE};
 use pageserver::tenant::block_io::FileBlockReader;
 use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection};
 use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE};
 use pageserver::tenant::storage_layer::range_overlaps;
 use pageserver::virtual_file::{self, VirtualFile};
+use pageserver_api::key::{Key, KEY_SIZE};
 
 use utils::{bin_ser::BeSer, lsn::Lsn};
 
diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs
index c0b2b6ae89..22627d72c8 100644
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -14,13 +14,13 @@ use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
 use pageserver::virtual_file::api::IoMode;
 use pageserver::{page_cache, virtual_file};
 use pageserver::{
-    repository::{Key, KEY_SIZE},
     tenant::{
         block_io::FileBlockReader, disk_btree::VisitDirection,
         storage_layer::delta_layer::DELTA_KEY_SIZE,
     },
     virtual_file::VirtualFile,
 };
+use pageserver_api::key::{Key, KEY_SIZE};
 use std::fs;
 use utils::bin_ser::BeSer;
 use utils::id::{TenantId, TimelineId};
diff --git a/pageserver/pagebench/src/cmd/aux_files.rs b/pageserver/pagebench/src/cmd/aux_files.rs
index bce3285606..923a7f1f18 100644
--- a/pageserver/pagebench/src/cmd/aux_files.rs
+++ b/pageserver/pagebench/src/cmd/aux_files.rs
@@ -1,4 +1,4 @@
-use pageserver_api::models::{AuxFilePolicy, TenantConfig, TenantConfigRequest};
+use pageserver_api::models::{TenantConfig, TenantConfigRequest};
 use pageserver_api::shard::TenantShardId;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
@@ -66,10 +66,7 @@ async fn main_impl(args: Args) -> anyhow::Result<()> {
     mgmt_api_client
         .tenant_config(&TenantConfigRequest {
             tenant_id: timeline.tenant_id,
-            config: TenantConfig {
-                switch_aux_file_policy: Some(AuxFilePolicy::V2),
-                ..Default::default()
-            },
+            config: TenantConfig::default(),
         })
         .await?;
 
diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs
index 73bdc90213..7733bdb640 100644
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -696,7 +696,7 @@ impl DeletionQueue {
 mod test {
     use camino::Utf8Path;
     use hex_literal::hex;
-    use pageserver_api::{shard::ShardIndex, upcall_api::ReAttachResponseTenant};
+    use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant};
     use std::{io::ErrorKind, time::Duration};
     use tracing::info;
 
@@ -705,7 +705,6 @@ mod test {
 
     use crate::{
         controller_upcall_client::RetryForeverError,
-        repository::Key,
         tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
     };
 
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 3943f62ac0..2d8f4309ca 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -2232,13 +2232,13 @@ async fn getpage_at_lsn_handler(
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let state = get_state(&request);
 
-    struct Key(crate::repository::Key);
+    struct Key(pageserver_api::key::Key);
 
     impl std::str::FromStr for Key {
         type Err = anyhow::Error;
 
         fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-            crate::repository::Key::from_hex(s).map(Key)
+            pageserver_api::key::Key::from_hex(s).map(Key)
         }
     }
 
diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs
index ca87f1d080..530c91c4da 100644
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -19,12 +19,11 @@ use crate::metrics::WAL_INGEST;
 use crate::pgdatadir_mapping::*;
 use crate::tenant::Timeline;
 use crate::walingest::WalIngest;
-use crate::walrecord::decode_wal_record;
-use crate::walrecord::DecodedWALRecord;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::*;
 use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_ffi::walrecord::{decode_wal_record, DecodedWALRecord};
 use postgres_ffi::ControlFileData;
 use postgres_ffi::DBState_DB_SHUTDOWNED;
 use postgres_ffi::Oid;
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index d51931c768..ef6711397a 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -24,7 +24,6 @@ pub mod metrics;
 pub mod page_cache;
 pub mod page_service;
 pub mod pgdatadir_mapping;
-pub mod repository;
 pub mod span;
 pub(crate) mod statvfs;
 pub mod task_mgr;
@@ -32,7 +31,6 @@ pub mod tenant;
 pub mod utilization;
 pub mod virtual_file;
 pub mod walingest;
-pub mod walrecord;
 pub mod walredo;
 
 use camino::Utf8Path;
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index 19233a28cc..dc2dc08b53 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -7,14 +7,14 @@
 //! Clarify that)
 //!
 use super::tenant::{PageReconstructError, Timeline};
+use crate::aux_file;
 use crate::context::RequestContext;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
-use crate::walrecord::NeonWalRecord;
-use crate::{aux_file, repository::*};
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
+use pageserver_api::key::Key;
 use pageserver_api::key::{
     dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key,
     relmap_file_key, repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key,
@@ -22,7 +22,9 @@ use pageserver_api::key::{
     CompactKey, AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
 };
 use pageserver_api::keyspace::SparseKeySpace;
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
+use pageserver_api::value::Value;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
 use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId};
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index f846e145c5..64e4eb46ce 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -92,11 +92,11 @@ use crate::metrics::{
     remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN,
     TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC,
 };
-use crate::repository::GcResult;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::LocationMode;
 use crate::tenant::config::TenantConfOpt;
+use crate::tenant::gc_result::GcResult;
 pub use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::remote_initdb_archive_path;
 use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
@@ -160,6 +160,7 @@ pub(crate) mod timeline;
 pub mod size;
 
 mod gc_block;
+mod gc_result;
 pub(crate) mod throttle;
 
 pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -301,6 +302,13 @@ pub struct Tenant {
     /// **Lock order**: if acquiring all (or a subset), acquire them in order `timelines`, `timelines_offloaded`, `timelines_creating`
     timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
 
+    /// Serialize writes of the tenant manifest to remote storage.  If there are concurrent operations
+    /// affecting the manifest, such as timeline deletion and timeline offload, they must wait for
+    /// each other (this could be optimized to coalesce writes if necessary).
+    ///
+    /// The contents of the Mutex are the last manifest we successfully uploaded
+    tenant_manifest_upload: tokio::sync::Mutex<Option<TenantManifest>>,
+
     // This mutex prevents creation of new timelines during GC.
     // Adding yet another mutex (in addition to `timelines`) is needed because holding
     // `timelines` mutex during all GC iteration
@@ -467,10 +475,10 @@ impl WalRedoManager {
     /// This method is cancellation-safe.
     pub async fn request_redo(
         &self,
-        key: crate::repository::Key,
+        key: pageserver_api::key::Key,
         lsn: Lsn,
         base_img: Option<(Lsn, bytes::Bytes)>,
-        records: Vec<(Lsn, crate::walrecord::NeonWalRecord)>,
+        records: Vec<(Lsn, pageserver_api::record::NeonWalRecord)>,
         pg_version: u32,
     ) -> Result<bytes::Bytes, walredo::Error> {
         match self {
@@ -625,19 +633,10 @@ impl TimelineOrOffloaded {
             TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
         }
     }
-    fn remote_client_maybe_construct(&self, tenant: &Tenant) -> Arc<RemoteTimelineClient> {
+    fn maybe_remote_client(&self) -> Option<Arc<RemoteTimelineClient>> {
         match self {
-            TimelineOrOffloaded::Timeline(timeline) => timeline.remote_client.clone(),
-            TimelineOrOffloaded::Offloaded(offloaded) => match offloaded.remote_client.clone() {
-                Some(remote_client) => remote_client,
-                None => {
-                    let remote_client = tenant.build_timeline_client(
-                        offloaded.timeline_id,
-                        tenant.remote_storage.clone(),
-                    );
-                    Arc::new(remote_client)
-                }
-            },
+            TimelineOrOffloaded::Timeline(timeline) => Some(timeline.remote_client.clone()),
+            TimelineOrOffloaded::Offloaded(offloaded) => offloaded.remote_client.clone(),
         }
     }
 }
@@ -749,6 +748,24 @@ pub enum TimelineArchivalError {
     Other(anyhow::Error),
 }
 
+#[derive(thiserror::Error, Debug)]
+pub(crate) enum TenantManifestError {
+    #[error("Remote storage error: {0}")]
+    RemoteStorage(anyhow::Error),
+
+    #[error("Cancelled")]
+    Cancelled,
+}
+
+impl From<TenantManifestError> for TimelineArchivalError {
+    fn from(e: TenantManifestError) -> Self {
+        match e {
+            TenantManifestError::RemoteStorage(e) => Self::Other(e),
+            TenantManifestError::Cancelled => Self::Cancelled,
+        }
+    }
+}
+
 impl Debug for TimelineArchivalError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -1534,18 +1551,7 @@ impl Tenant {
             offloaded_timelines_accessor.extend(offloaded_timelines_list.into_iter());
         }
         if !offloaded_timeline_ids.is_empty() {
-            let manifest = self.tenant_manifest();
-            // TODO: generation support
-            let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-            upload_tenant_manifest(
-                &self.remote_storage,
-                &self.tenant_shard_id,
-                generation,
-                &manifest,
-                &self.cancel,
-            )
-            .await
-            .map_err(TimelineArchivalError::Other)?;
+            self.store_tenant_manifest().await?;
         }
 
         // The local filesystem contents are a cache of what's in the remote IndexPart;
@@ -1830,6 +1836,18 @@ impl Tenant {
         ctx: RequestContext,
     ) -> Result<Arc<Timeline>, TimelineArchivalError> {
         info!("unoffloading timeline");
+
+        // We activate the timeline below manually, so this must be called on an active timeline.
+        // We expect callers of this function to ensure this.
+        match self.current_state() {
+            TenantState::Activating { .. }
+            | TenantState::Attaching
+            | TenantState::Broken { .. } => {
+                panic!("Timeline expected to be active")
+            }
+            TenantState::Stopping { .. } => return Err(TimelineArchivalError::Cancelled),
+            TenantState::Active => {}
+        }
         let cancel = self.cancel.clone();
 
         // Protect against concurrent attempts to use this TimelineId
@@ -1914,18 +1932,7 @@ impl Tenant {
         };
 
         // Upload new list of offloaded timelines to S3
-        let manifest = self.tenant_manifest();
-        // TODO: generation support
-        let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-        upload_tenant_manifest(
-            &self.remote_storage,
-            &self.tenant_shard_id,
-            generation,
-            &manifest,
-            &cancel,
-        )
-        .await
-        .map_err(TimelineArchivalError::Other)?;
+        self.store_tenant_manifest().await?;
 
         // Activate the timeline (if it makes sense)
         if !(timeline.is_broken() || timeline.is_stopping()) {
@@ -3122,7 +3129,7 @@ impl Tenant {
             }
         }
 
-        let tenant_manifest = self.tenant_manifest();
+        let tenant_manifest = self.build_tenant_manifest();
         // TODO: generation support
         let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
         for child_shard in child_shards {
@@ -3317,7 +3324,8 @@ impl Tenant {
             .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
     }
 
-    pub(crate) fn tenant_manifest(&self) -> TenantManifest {
+    /// Generate an up-to-date TenantManifest based on the state of this Tenant.
+    fn build_tenant_manifest(&self) -> TenantManifest {
         let timelines_offloaded = self.timelines_offloaded.lock().unwrap();
 
         let mut timeline_manifests = timelines_offloaded
@@ -3525,6 +3533,7 @@ impl Tenant {
             timelines: Mutex::new(HashMap::new()),
             timelines_creating: Mutex::new(HashSet::new()),
             timelines_offloaded: Mutex::new(HashMap::new()),
+            tenant_manifest_upload: Default::default(),
             gc_cs: tokio::sync::Mutex::new(()),
             walredo_mgr,
             remote_storage,
@@ -4704,6 +4713,49 @@ impl Tenant {
             .max()
             .unwrap_or(0)
     }
+
+    /// Serialize and write the latest TenantManifest to remote storage.
+    pub(crate) async fn store_tenant_manifest(&self) -> Result<(), TenantManifestError> {
+        // Only one manifest write may be done at at time, and the contents of the manifest
+        // must be loaded while holding this lock. This makes it safe to call this function
+        // from anywhere without worrying about colliding updates.
+        let mut guard = tokio::select! {
+            g = self.tenant_manifest_upload.lock() => {
+                g
+            },
+            _ = self.cancel.cancelled() => {
+                return Err(TenantManifestError::Cancelled);
+            }
+        };
+
+        let manifest = self.build_tenant_manifest();
+        if Some(&manifest) == (*guard).as_ref() {
+            // Optimisation: skip uploads that don't change anything.
+            return Ok(());
+        }
+
+        upload_tenant_manifest(
+            &self.remote_storage,
+            &self.tenant_shard_id,
+            self.generation,
+            &manifest,
+            &self.cancel,
+        )
+        .await
+        .map_err(|e| {
+            if self.cancel.is_cancelled() {
+                TenantManifestError::Cancelled
+            } else {
+                TenantManifestError::RemoteStorage(e)
+            }
+        })?;
+
+        // Store the successfully uploaded manifest, so that future callers can avoid
+        // re-uploading the same thing.
+        *guard = Some(manifest);
+
+        Ok(())
+    }
 }
 
 /// Create the cluster temporarily in 'initdbpath' directory inside the repository
@@ -4806,7 +4858,8 @@ pub(crate) mod harness {
     use crate::deletion_queue::mock::MockDeletionQueue;
     use crate::l0_flush::L0FlushConfig;
     use crate::walredo::apply_neon;
-    use crate::{repository::Key, walrecord::NeonWalRecord};
+    use pageserver_api::key::Key;
+    use pageserver_api::record::NeonWalRecord;
 
     use super::*;
     use hex_literal::hex;
@@ -4853,7 +4906,6 @@ pub(crate) mod harness {
                 image_layer_creation_check_threshold: Some(
                     tenant_conf.image_layer_creation_check_threshold,
                 ),
-                switch_aux_file_policy: Some(tenant_conf.switch_aux_file_policy),
                 lsn_lease_length: Some(tenant_conf.lsn_lease_length),
                 lsn_lease_length_for_ts: Some(tenant_conf.lsn_lease_length_for_ts),
             }
@@ -5076,25 +5128,30 @@ mod tests {
 
     use super::*;
     use crate::keyspace::KeySpaceAccum;
-    use crate::repository::{Key, Value};
     use crate::tenant::harness::*;
     use crate::tenant::timeline::CompactFlags;
-    use crate::walrecord::NeonWalRecord;
     use crate::DEFAULT_PG_VERSION;
     use bytes::{Bytes, BytesMut};
     use hex_literal::hex;
     use itertools::Itertools;
-    use pageserver_api::key::{AUX_KEY_PREFIX, NON_INHERITED_RANGE};
+    use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE};
     use pageserver_api::keyspace::KeySpace;
     use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings};
+    use pageserver_api::value::Value;
     use rand::{thread_rng, Rng};
     use storage_layer::PersistentLayerKey;
     use tests::storage_layer::ValuesReconstructState;
     use tests::timeline::{GetVectoredError, ShutdownMode};
-    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
-    use timeline::{DeltaLayerTestDesc, GcInfo};
+    use timeline::DeltaLayerTestDesc;
     use utils::id::TenantId;
 
+    #[cfg(feature = "testing")]
+    use pageserver_api::record::NeonWalRecord;
+    #[cfg(feature = "testing")]
+    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
+    #[cfg(feature = "testing")]
+    use timeline::GcInfo;
+
     static TEST_KEY: Lazy<Key> =
         Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001")));
 
@@ -7659,6 +7716,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_neon_test_record() -> anyhow::Result<()> {
         let harness = TenantHarness::create("test_neon_test_record").await?;
@@ -7850,6 +7908,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_simple_bottom_most_compaction_deltas() -> anyhow::Result<()> {
         let harness = TenantHarness::create("test_simple_bottom_most_compaction_deltas").await?;
@@ -8046,6 +8105,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_generate_key_retention() -> anyhow::Result<()> {
         let harness = TenantHarness::create("test_generate_key_retention").await?;
@@ -8393,6 +8453,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_simple_bottom_most_compaction_with_retain_lsns() -> anyhow::Result<()> {
         let harness =
@@ -8633,6 +8694,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_simple_bottom_most_compaction_with_retain_lsns_single_key() -> anyhow::Result<()>
     {
@@ -8841,6 +8903,7 @@ mod tests {
         Ok(())
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> {
         let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?;
@@ -9042,6 +9105,7 @@ mod tests {
     //
     // When querying the key range [A, B) we need to read at different LSN ranges
     // for [A, C) and [C, B). This test checks that the described edge case is handled correctly.
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn test_vectored_read_with_nested_image_layer() -> anyhow::Result<()> {
         let harness = TenantHarness::create("test_vectored_read_with_nested_image_layer").await?;
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 502cb62fe8..ce686c89ef 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,7 +9,6 @@
 //! may lead to a data loss.
 //!
 pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf;
-use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::CompactionAlgorithmSettings;
 use pageserver_api::models::EvictionPolicy;
 use pageserver_api::models::{self, ThrottleConfig};
@@ -341,10 +340,6 @@ pub struct TenantConfOpt {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub image_layer_creation_check_threshold: Option<u8>,
 
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
-
     #[serde(skip_serializing_if = "Option::is_none")]
     #[serde(with = "humantime_serde")]
     #[serde(default)]
@@ -410,9 +405,6 @@ impl TenantConfOpt {
             image_layer_creation_check_threshold: self
                 .image_layer_creation_check_threshold
                 .unwrap_or(global_conf.image_layer_creation_check_threshold),
-            switch_aux_file_policy: self
-                .switch_aux_file_policy
-                .unwrap_or(global_conf.switch_aux_file_policy),
             lsn_lease_length: self
                 .lsn_lease_length
                 .unwrap_or(global_conf.lsn_lease_length),
@@ -470,7 +462,6 @@ impl From<TenantConfOpt> for models::TenantConfig {
             lazy_slru_download: value.lazy_slru_download,
             timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
             image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
-            switch_aux_file_policy: value.switch_aux_file_policy,
             lsn_lease_length: value.lsn_lease_length.map(humantime),
             lsn_lease_length_for_ts: value.lsn_lease_length_for_ts.map(humantime),
         }
diff --git a/pageserver/src/tenant/gc_result.rs b/pageserver/src/tenant/gc_result.rs
new file mode 100644
index 0000000000..c805aafeab
--- /dev/null
+++ b/pageserver/src/tenant/gc_result.rs
@@ -0,0 +1,57 @@
+use anyhow::Result;
+use serde::Serialize;
+use std::ops::AddAssign;
+use std::time::Duration;
+
+///
+/// Result of performing GC
+///
+#[derive(Default, Serialize, Debug)]
+pub struct GcResult {
+    pub layers_total: u64,
+    pub layers_needed_by_cutoff: u64,
+    pub layers_needed_by_pitr: u64,
+    pub layers_needed_by_branches: u64,
+    pub layers_needed_by_leases: u64,
+    pub layers_not_updated: u64,
+    pub layers_removed: u64, // # of layer files removed because they have been made obsolete by newer ondisk files.
+
+    #[serde(serialize_with = "serialize_duration_as_millis")]
+    pub elapsed: Duration,
+
+    /// The layers which were garbage collected.
+    ///
+    /// Used in `/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc` to wait for the layers to be
+    /// dropped in tests.
+    #[cfg(feature = "testing")]
+    #[serde(skip)]
+    pub(crate) doomed_layers: Vec<crate::tenant::storage_layer::Layer>,
+}
+
+// helper function for `GcResult`, serializing a `Duration` as an integer number of milliseconds
+fn serialize_duration_as_millis<S>(d: &Duration, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+{
+    d.as_millis().serialize(serializer)
+}
+
+impl AddAssign for GcResult {
+    fn add_assign(&mut self, other: Self) {
+        self.layers_total += other.layers_total;
+        self.layers_needed_by_pitr += other.layers_needed_by_pitr;
+        self.layers_needed_by_cutoff += other.layers_needed_by_cutoff;
+        self.layers_needed_by_branches += other.layers_needed_by_branches;
+        self.layers_needed_by_leases += other.layers_needed_by_leases;
+        self.layers_not_updated += other.layers_not_updated;
+        self.layers_removed += other.layers_removed;
+
+        self.elapsed += other.elapsed;
+
+        #[cfg(feature = "testing")]
+        {
+            let mut other = other;
+            self.doomed_layers.append(&mut other.doomed_layers);
+        }
+    }
+}
diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs
index 707233b003..7f15baed10 100644
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -48,9 +48,9 @@ mod layer_coverage;
 
 use crate::context::RequestContext;
 use crate::keyspace::KeyPartitioning;
-use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use anyhow::Result;
+use pageserver_api::key::Key;
 use pageserver_api::keyspace::{KeySpace, KeySpaceAccum};
 use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze};
 use std::collections::{HashMap, VecDeque};
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 0567f8f3a7..a4c458b737 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -2811,7 +2811,7 @@ where
 }
 
 use {
-    crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
+    crate::tenant::gc_result::GcResult, pageserver_api::models::TimelineGcRequest,
     utils::http::error::ApiError,
 };
 
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 1c72c7fff8..19e762b9fa 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -249,7 +249,7 @@ pub(crate) use download::{
     list_remote_tenant_shards, list_remote_timelines,
 };
 pub(crate) use index::LayerFileMetadata;
-pub(crate) use upload::{upload_initdb_dir, upload_tenant_manifest};
+pub(crate) use upload::upload_initdb_dir;
 
 // Occasional network issues and such can cause remote operations to fail, and
 // that's expected. If a download fails, we log it at info-level, and retry.
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index 95f8f026d4..8679c68a27 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -403,59 +403,79 @@ async fn do_download_index_part(
     Ok((index_part, index_generation, index_part_mtime))
 }
 
-/// index_part.json objects are suffixed with a generation number, so we cannot
-/// directly GET the latest index part without doing some probing.
+/// Metadata objects are "generationed", meaning that they include a generation suffix.  This
+/// function downloads the object with the highest generation <= `my_generation`.
 ///
-/// In this function we probe for the most recent index in a generation <= our current generation.
-/// See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md
+/// Data objects (layer files) also include a generation in their path, but there is no equivalent
+/// search process, because their reference from an index includes the generation.
+///
+/// An expensive object listing operation is only done if necessary: the typical fast path is to issue two
+/// GET operations, one to our own generation (stale attachment case), and one to the immediately preceding
+/// generation (normal case when migrating/restarting).  Only if both of these return 404 do we fall back
+/// to listing objects.
+///
+/// * `my_generation`: the value of `[crate::tenant::Tenant::generation]`
+/// * `what`: for logging, what object are we downloading
+/// * `prefix`: when listing objects, use this prefix (i.e. the part of the object path before the generation)
+/// * `do_download`: a GET of the object in a particular generation, which should **retry indefinitely** unless
+///                  `cancel`` has fired.  This function does not do its own retries of GET operations, and relies
+///                  on the function passed in to do so.
+/// * `parse_path`: parse a fully qualified remote storage path to get the generation of the object.
+#[allow(clippy::too_many_arguments)]
 #[tracing::instrument(skip_all, fields(generation=?my_generation))]
-pub(crate) async fn download_index_part(
-    storage: &GenericRemoteStorage,
-    tenant_shard_id: &TenantShardId,
-    timeline_id: &TimelineId,
+pub(crate) async fn download_generation_object<'a, T, DF, DFF, PF>(
+    storage: &'a GenericRemoteStorage,
+    tenant_shard_id: &'a TenantShardId,
+    timeline_id: &'a TimelineId,
     my_generation: Generation,
-    cancel: &CancellationToken,
-) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
+    what: &str,
+    prefix: RemotePath,
+    do_download: DF,
+    parse_path: PF,
+    cancel: &'a CancellationToken,
+) -> Result<(T, Generation, SystemTime), DownloadError>
+where
+    DF: Fn(
+        &'a GenericRemoteStorage,
+        &'a TenantShardId,
+        &'a TimelineId,
+        Generation,
+        &'a CancellationToken,
+    ) -> DFF,
+    DFF: Future<Output = Result<(T, Generation, SystemTime), DownloadError>>,
+    PF: Fn(RemotePath) -> Option<Generation>,
+    T: 'static,
+{
     debug_assert_current_span_has_tenant_and_timeline_id();
 
     if my_generation.is_none() {
         // Operating without generations: just fetch the generation-less path
-        return do_download_index_part(
-            storage,
-            tenant_shard_id,
-            timeline_id,
-            my_generation,
-            cancel,
-        )
-        .await;
+        return do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await;
     }
 
-    // Stale case: If we were intentionally attached in a stale generation, there may already be a remote
-    // index in our generation.
+    // Stale case: If we were intentionally attached in a stale generation, the remote object may already
+    // exist in our generation.
     //
     // This is an optimization to avoid doing the listing for the general case below.
-    let res =
-        do_download_index_part(storage, tenant_shard_id, timeline_id, my_generation, cancel).await;
+    let res = do_download(storage, tenant_shard_id, timeline_id, my_generation, cancel).await;
     match res {
-        Ok(index_part) => {
-            tracing::debug!(
-                "Found index_part from current generation (this is a stale attachment)"
-            );
-            return Ok(index_part);
+        Ok(decoded) => {
+            tracing::debug!("Found {what} from current generation (this is a stale attachment)");
+            return Ok(decoded);
         }
         Err(DownloadError::NotFound) => {}
         Err(e) => return Err(e),
     };
 
-    // Typical case: the previous generation of this tenant was running healthily, and had uploaded
-    // and index part.  We may safely start from this index without doing a listing, because:
+    // Typical case: the previous generation of this tenant was running healthily, and had uploaded the object
+    // we are seeking in that generation.  We may safely start from this index without doing a listing, because:
     //  - We checked for current generation case above
     //  - generations > my_generation are to be ignored
-    //  - any other indices that exist would have an older generation than `previous_gen`, and
-    //    we want to find the most recent index from a previous generation.
+    //  - any other objects that exist would have an older generation than `previous_gen`, and
+    //    we want to find the most recent object from a previous generation.
     //
     // This is an optimization to avoid doing the listing for the general case below.
-    let res = do_download_index_part(
+    let res = do_download(
         storage,
         tenant_shard_id,
         timeline_id,
@@ -464,14 +484,12 @@ pub(crate) async fn download_index_part(
     )
     .await;
     match res {
-        Ok(index_part) => {
-            tracing::debug!("Found index_part from previous generation");
-            return Ok(index_part);
+        Ok(decoded) => {
+            tracing::debug!("Found {what} from previous generation");
+            return Ok(decoded);
         }
         Err(DownloadError::NotFound) => {
-            tracing::debug!(
-                "No index_part found from previous generation, falling back to listing"
-            );
+            tracing::debug!("No {what} found from previous generation, falling back to listing");
         }
         Err(e) => {
             return Err(e);
@@ -481,12 +499,10 @@ pub(crate) async fn download_index_part(
     // General case/fallback: if there is no index at my_generation or prev_generation, then list all index_part.json
     // objects, and select the highest one with a generation <= my_generation.  Constructing the prefix is equivalent
     // to constructing a full index path with no generation, because the generation is a suffix.
-    let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());
-
-    let indices = download_retry(
+    let paths = download_retry(
         || async {
             storage
-                .list(Some(&index_prefix), ListingMode::NoDelimiter, None, cancel)
+                .list(Some(&prefix), ListingMode::NoDelimiter, None, cancel)
                 .await
         },
         "list index_part files",
@@ -497,22 +513,22 @@ pub(crate) async fn download_index_part(
 
     // General case logic for which index to use: the latest index whose generation
     // is <= our own.  See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md
-    let max_previous_generation = indices
+    let max_previous_generation = paths
         .into_iter()
-        .filter_map(|o| parse_remote_index_path(o.key))
+        .filter_map(|o| parse_path(o.key))
         .filter(|g| g <= &my_generation)
         .max();
 
     match max_previous_generation {
         Some(g) => {
-            tracing::debug!("Found index_part in generation {g:?}");
-            do_download_index_part(storage, tenant_shard_id, timeline_id, g, cancel).await
+            tracing::debug!("Found {what} in generation {g:?}");
+            do_download(storage, tenant_shard_id, timeline_id, g, cancel).await
         }
         None => {
             // Migration from legacy pre-generation state: we have a generation but no prior
             // attached pageservers did.  Try to load from a no-generation path.
-            tracing::debug!("No index_part.json* found");
-            do_download_index_part(
+            tracing::debug!("No {what}* found");
+            do_download(
                 storage,
                 tenant_shard_id,
                 timeline_id,
@@ -524,6 +540,33 @@ pub(crate) async fn download_index_part(
     }
 }
 
+/// index_part.json objects are suffixed with a generation number, so we cannot
+/// directly GET the latest index part without doing some probing.
+///
+/// In this function we probe for the most recent index in a generation <= our current generation.
+/// See "Finding the remote indices for timelines" in docs/rfcs/025-generation-numbers.md
+pub(crate) async fn download_index_part(
+    storage: &GenericRemoteStorage,
+    tenant_shard_id: &TenantShardId,
+    timeline_id: &TimelineId,
+    my_generation: Generation,
+    cancel: &CancellationToken,
+) -> Result<(IndexPart, Generation, SystemTime), DownloadError> {
+    let index_prefix = remote_index_path(tenant_shard_id, timeline_id, Generation::none());
+    download_generation_object(
+        storage,
+        tenant_shard_id,
+        timeline_id,
+        my_generation,
+        "index_part",
+        index_prefix,
+        do_download_index_part,
+        parse_remote_index_path,
+        cancel,
+    )
+    .await
+}
+
 pub(crate) async fn download_initdb_tar_zst(
     conf: &'static PageServerConf,
     storage: &GenericRemoteStorage,
diff --git a/pageserver/src/tenant/remote_timeline_client/manifest.rs b/pageserver/src/tenant/remote_timeline_client/manifest.rs
index 7d92d45146..c4382cb648 100644
--- a/pageserver/src/tenant/remote_timeline_client/manifest.rs
+++ b/pageserver/src/tenant/remote_timeline_client/manifest.rs
@@ -3,7 +3,7 @@ use serde::{Deserialize, Serialize};
 use utils::{id::TimelineId, lsn::Lsn};
 
 /// Tenant-shard scoped manifest
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct TenantManifest {
     /// Debugging aid describing the version of this manifest.
     /// Can also be used for distinguishing breaking changes later on.
@@ -23,7 +23,7 @@ pub struct TenantManifest {
 /// Very similar to [`pageserver_api::models::OffloadedTimelineInfo`],
 /// but the two datastructures serve different needs, this is for a persistent disk format
 /// that must be backwards compatible, while the other is only for informative purposes.
-#[derive(Clone, Serialize, Deserialize, Copy)]
+#[derive(Clone, Serialize, Deserialize, Copy, PartialEq, Eq)]
 pub struct OffloadedTimelineManifest {
     pub timeline_id: TimelineId,
     /// Whether the timeline has a parent it has been branched off from or not
diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs
index 4a4c698b56..6c3276ea3c 100644
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -187,6 +187,8 @@ pub(super) async fn gather_inputs(
     // but it is unlikely to cause any issues. In the worst case,
     // the calculation will error out.
     timelines.retain(|t| t.is_active());
+    // Also filter out archived timelines.
+    timelines.retain(|t| t.is_archived() != Some(true));
 
     // Build a map of branch points.
     let mut branchpoints: HashMap<TimelineId, HashSet<Lsn>> = HashMap::new();
diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs
index 4a63491e90..8f4219bbbc 100644
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -11,11 +11,11 @@ mod layer_name;
 pub mod merge_iterator;
 
 use crate::context::{AccessStatsBehavior, RequestContext};
-use crate::repository::Value;
-use crate::walrecord::NeonWalRecord;
 use bytes::Bytes;
 use pageserver_api::key::Key;
 use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum};
+use pageserver_api::record::NeonWalRecord;
+use pageserver_api::value::Value;
 use std::cmp::{Ordering, Reverse};
 use std::collections::hash_map::Entry;
 use std::collections::{BinaryHeap, HashMap};
diff --git a/pageserver/src/tenant/storage_layer/batch_split_writer.rs b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
index 272e422c90..8a397ceb7a 100644
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -5,7 +5,8 @@ use pageserver_api::key::{Key, KEY_SIZE};
 use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId};
 
 use crate::tenant::storage_layer::Layer;
-use crate::{config::PageServerConf, context::RequestContext, repository::Value, tenant::Timeline};
+use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline};
+use pageserver_api::value::Value;
 
 use super::layer::S3_UPLOAD_LIMIT;
 use super::{
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index 641729d681..10165b1d06 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -30,7 +30,6 @@
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::page_cache::{self, FileId, PAGE_SZ};
-use crate::repository::{Key, Value, KEY_SIZE};
 use crate::tenant::blob_io::BlobWriter;
 use crate::tenant::block_io::{BlockBuf, BlockCursor, BlockLease, BlockReader, FileBlockReader};
 use crate::tenant::disk_btree::{
@@ -46,7 +45,7 @@ use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
 use crate::virtual_file::IoBufferMut;
 use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
-use crate::{walrecord, TEMP_FILE_SUFFIX};
+use crate::TEMP_FILE_SUFFIX;
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use camino::{Utf8Path, Utf8PathBuf};
@@ -54,9 +53,11 @@ use futures::StreamExt;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
 use pageserver_api::key::DBDIR_KEY;
+use pageserver_api::key::{Key, KEY_SIZE};
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::ImageCompressionAlgorithm;
 use pageserver_api::shard::TenantShardId;
+use pageserver_api::value::Value;
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::collections::VecDeque;
@@ -1293,7 +1294,7 @@ impl DeltaLayerInner {
                     // is it an image or will_init walrecord?
                     // FIXME: this could be handled by threading the BlobRef to the
                     // VectoredReadBuilder
-                    let will_init = crate::repository::ValueBytes::will_init(&data)
+                    let will_init = pageserver_api::value::ValueBytes::will_init(&data)
                         .inspect_err(|_e| {
                             #[cfg(feature = "testing")]
                             tracing::error!(data=?utils::Hex(&data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
@@ -1356,7 +1357,7 @@ impl DeltaLayerInner {
                     format!(" img {} bytes", img.len())
                 }
                 Value::WalRecord(rec) => {
-                    let wal_desc = walrecord::describe_wal_record(&rec)?;
+                    let wal_desc = pageserver_api::record::describe_wal_record(&rec)?;
                     format!(
                         " rec {} bytes will_init: {} {}",
                         buf.len(),
@@ -1610,7 +1611,6 @@ pub(crate) mod test {
     use rand::RngCore;
 
     use super::*;
-    use crate::repository::Value;
     use crate::tenant::harness::TIMELINE_ID;
     use crate::tenant::storage_layer::{Layer, ResidentLayer};
     use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner;
@@ -1622,6 +1622,7 @@ pub(crate) mod test {
         DEFAULT_PG_VERSION,
     };
     use bytes::Bytes;
+    use pageserver_api::value::Value;
 
     /// Construct an index for a fictional delta layer and and then
     /// traverse in order to plan vectored reads for a query. Finally,
@@ -1974,8 +1975,8 @@ pub(crate) mod test {
 
     #[tokio::test]
     async fn copy_delta_prefix_smoke() {
-        use crate::walrecord::NeonWalRecord;
         use bytes::Bytes;
+        use pageserver_api::record::NeonWalRecord;
 
         let h = crate::tenant::harness::TenantHarness::create("truncate_delta_smoke")
             .await
@@ -2198,6 +2199,7 @@ pub(crate) mod test {
         (k1, l1).cmp(&(k2, l2))
     }
 
+    #[cfg(feature = "testing")]
     pub(crate) fn sort_delta_value(
         (k1, l1, v1): &(Key, Lsn, Value),
         (k2, l2, v2): &(Key, Lsn, Value),
diff --git a/pageserver/src/tenant/storage_layer/filter_iterator.rs b/pageserver/src/tenant/storage_layer/filter_iterator.rs
index f45dd4b801..ccfcf68e8f 100644
--- a/pageserver/src/tenant/storage_layer/filter_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/filter_iterator.rs
@@ -7,7 +7,7 @@ use pageserver_api::{
 };
 use utils::lsn::Lsn;
 
-use crate::repository::Value;
+use pageserver_api::value::Value;
 
 use super::merge_iterator::MergeIterator;
 
@@ -121,8 +121,8 @@ mod tests {
 
     #[tokio::test]
     async fn filter_keyspace_iterator() {
-        use crate::repository::Value;
         use bytes::Bytes;
+        use pageserver_api::value::Value;
 
         let harness = TenantHarness::create("filter_iterator_filter_keyspace_iterator")
             .await
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index 3f90df312d..c0d183dc08 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -28,7 +28,6 @@
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
 use crate::page_cache::{self, FileId, PAGE_SZ};
-use crate::repository::{Key, Value, KEY_SIZE};
 use crate::tenant::blob_io::BlobWriter;
 use crate::tenant::block_io::{BlockBuf, FileBlockReader};
 use crate::tenant::disk_btree::{
@@ -51,8 +50,10 @@ use hex;
 use itertools::Itertools;
 use pageserver_api::config::MaxVectoredReadBytes;
 use pageserver_api::key::DBDIR_KEY;
+use pageserver_api::key::{Key, KEY_SIZE};
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
+use pageserver_api::value::Value;
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::collections::VecDeque;
@@ -1125,6 +1126,7 @@ mod test {
     use pageserver_api::{
         key::Key,
         shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize},
+        value::Value,
     };
     use utils::{
         generation::Generation,
@@ -1134,7 +1136,6 @@ mod test {
 
     use crate::{
         context::RequestContext,
-        repository::Value,
         tenant::{
             config::TenantConf,
             harness::{TenantHarness, TIMELINE_ID},
diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
index 7573ddb5cc..df448a0963 100644
--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -7,7 +7,6 @@
 use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64};
 use crate::config::PageServerConf;
 use crate::context::{PageContentKind, RequestContext, RequestContextBuilder};
-use crate::repository::{Key, Value};
 use crate::tenant::ephemeral_file::EphemeralFile;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::PageReconstructError;
@@ -16,9 +15,11 @@ use crate::{l0_flush, page_cache};
 use anyhow::{anyhow, Context, Result};
 use camino::Utf8PathBuf;
 use pageserver_api::key::CompactKey;
+use pageserver_api::key::Key;
 use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::InMemoryLayerInfo;
 use pageserver_api::shard::TenantShardId;
+use pageserver_api::value::Value;
 use std::collections::{BTreeMap, HashMap};
 use std::sync::{Arc, OnceLock};
 use std::time::Instant;
diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs
index 9de70f14ee..36dcc8d805 100644
--- a/pageserver/src/tenant/storage_layer/layer/tests.rs
+++ b/pageserver/src/tenant/storage_layer/layer/tests.rs
@@ -760,8 +760,8 @@ async fn evict_and_wait_does_not_wait_for_download() {
 /// Also checks that the same does not happen on a non-evicted layer (regression test).
 #[tokio::test(start_paused = true)]
 async fn eviction_cancellation_on_drop() {
-    use crate::repository::Value;
     use bytes::Bytes;
+    use pageserver_api::value::Value;
 
     // this is the runtime on which Layer spawns the blocking tasks on
     let handle = tokio::runtime::Handle::current();
@@ -782,7 +782,7 @@ async fn eviction_cancellation_on_drop() {
         let mut writer = timeline.writer().await;
         writer
             .put(
-                crate::repository::Key::from_i128(5),
+                pageserver_api::key::Key::from_i128(5),
                 Lsn(0x20),
                 &Value::Image(Bytes::from_static(b"this does not matter either")),
                 &ctx,
diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs
index a30c25d780..2097e90764 100644
--- a/pageserver/src/tenant/storage_layer/layer_desc.rs
+++ b/pageserver/src/tenant/storage_layer/layer_desc.rs
@@ -3,7 +3,7 @@ use pageserver_api::shard::TenantShardId;
 use std::ops::Range;
 use utils::{id::TimelineId, lsn::Lsn};
 
-use crate::repository::Key;
+use pageserver_api::key::Key;
 
 use super::{DeltaLayerName, ImageLayerName, LayerName};
 
diff --git a/pageserver/src/tenant/storage_layer/layer_name.rs b/pageserver/src/tenant/storage_layer/layer_name.rs
index 8e750e1187..2b98d74f9f 100644
--- a/pageserver/src/tenant/storage_layer/layer_name.rs
+++ b/pageserver/src/tenant/storage_layer/layer_name.rs
@@ -1,7 +1,7 @@
 //!
 //! Helper functions for dealing with filenames of the image and delta layer files.
 //!
-use crate::repository::Key;
+use pageserver_api::key::Key;
 use std::borrow::Cow;
 use std::cmp::Ordering;
 use std::fmt;
diff --git a/pageserver/src/tenant/storage_layer/merge_iterator.rs b/pageserver/src/tenant/storage_layer/merge_iterator.rs
index f91e27241d..980202f12c 100644
--- a/pageserver/src/tenant/storage_layer/merge_iterator.rs
+++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs
@@ -7,7 +7,8 @@ use anyhow::bail;
 use pageserver_api::key::Key;
 use utils::lsn::Lsn;
 
-use crate::{context::RequestContext, repository::Value};
+use crate::context::RequestContext;
+use pageserver_api::value::Value;
 
 use super::{
     delta_layer::{DeltaLayerInner, DeltaLayerIterator},
@@ -291,12 +292,16 @@ mod tests {
     use crate::{
         tenant::{
             harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::delta_layer::test::{produce_delta_layer, sort_delta, sort_delta_value},
+            storage_layer::delta_layer::test::{produce_delta_layer, sort_delta},
         },
-        walrecord::NeonWalRecord,
         DEFAULT_PG_VERSION,
     };
 
+    #[cfg(feature = "testing")]
+    use crate::tenant::storage_layer::delta_layer::test::sort_delta_value;
+    #[cfg(feature = "testing")]
+    use pageserver_api::record::NeonWalRecord;
+
     async fn assert_merge_iter_equal(
         merge_iter: &mut MergeIterator<'_>,
         expect: &[(Key, Lsn, Value)],
@@ -319,8 +324,8 @@ mod tests {
 
     #[tokio::test]
     async fn merge_in_between() {
-        use crate::repository::Value;
         use bytes::Bytes;
+        use pageserver_api::value::Value;
 
         let harness = TenantHarness::create("merge_iterator_merge_in_between")
             .await
@@ -384,8 +389,8 @@ mod tests {
 
     #[tokio::test]
     async fn delta_merge() {
-        use crate::repository::Value;
         use bytes::Bytes;
+        use pageserver_api::value::Value;
 
         let harness = TenantHarness::create("merge_iterator_delta_merge")
             .await
@@ -458,10 +463,11 @@ mod tests {
         // TODO: test layers are loaded only when needed, reducing num of active iterators in k-merge
     }
 
+    #[cfg(feature = "testing")]
     #[tokio::test]
     async fn delta_image_mixed_merge() {
-        use crate::repository::Value;
         use bytes::Bytes;
+        use pageserver_api::value::Value;
 
         let harness = TenantHarness::create("merge_iterator_delta_image_mixed_merge")
             .await
@@ -586,5 +592,6 @@ mod tests {
         is_send(merge_iter);
     }
 
+    #[cfg(feature = "testing")]
     fn is_send(_: impl Send) {}
 }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index f8d61dac5e..d765a7c987 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -125,11 +125,12 @@ use utils::{
     simple_rcu::{Rcu, RcuReadGuard},
 };
 
-use crate::repository::GcResult;
-use crate::repository::{Key, Value};
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
+use crate::tenant::gc_result::GcResult;
 use crate::ZERO_PAGE;
+use pageserver_api::key::Key;
+use pageserver_api::value::Value;
 
 use self::delete::DeleteTimelineFlow;
 pub(super) use self::eviction_task::EvictionTaskTenantState;
@@ -5822,17 +5823,15 @@ fn is_send() {
 #[cfg(test)]
 mod tests {
     use pageserver_api::key::Key;
+    use pageserver_api::value::Value;
     use utils::{id::TimelineId, lsn::Lsn};
 
-    use crate::{
-        repository::Value,
-        tenant::{
-            harness::{test_img, TenantHarness},
-            layer_map::LayerMap,
-            storage_layer::{Layer, LayerName},
-            timeline::{DeltaLayerTestDesc, EvictionError},
-            Timeline,
-        },
+    use crate::tenant::{
+        harness::{test_img, TenantHarness},
+        layer_map::LayerMap,
+        storage_layer::{Layer, LayerName},
+        timeline::{DeltaLayerTestDesc, EvictionError},
+        Timeline,
     };
 
     #[tokio::test]
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 73e4f0e87c..70f93656cd 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -49,9 +49,10 @@ use pageserver_api::config::tenant_conf_defaults::{
     DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD,
 };
 
-use crate::keyspace::KeySpace;
-use crate::repository::{Key, Value};
-use crate::walrecord::NeonWalRecord;
+use pageserver_api::key::Key;
+use pageserver_api::keyspace::KeySpace;
+use pageserver_api::record::NeonWalRecord;
+use pageserver_api::value::Value;
 
 use utils::lsn::Lsn;
 
@@ -2148,7 +2149,7 @@ struct ResidentDeltaLayer(ResidentLayer);
 struct ResidentImageLayer(ResidentLayer);
 
 impl CompactionJobExecutor for TimelineAdaptor {
-    type Key = crate::repository::Key;
+    type Key = pageserver_api::key::Key;
 
     type Layer = OwnArc<PersistentLayerDesc>;
     type DeltaLayer = ResidentDeltaLayer;
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index a664bb59e1..2c6161da15 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -6,7 +6,7 @@ use std::{
 use anyhow::Context;
 use pageserver_api::{models::TimelineState, shard::TenantShardId};
 use tokio::sync::OwnedMutexGuard;
-use tracing::{error, info, instrument, Instrument};
+use tracing::{error, info, info_span, instrument, Instrument};
 use utils::{crashsafe, fs_ext, id::TimelineId, pausable_failpoint};
 
 use crate::{
@@ -14,10 +14,9 @@ use crate::{
     task_mgr::{self, TaskKind},
     tenant::{
         metadata::TimelineMetadata,
-        remote_timeline_client::{
-            self, PersistIndexPartWithDeletedFlagError, RemoteTimelineClient,
-        },
-        CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
+        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
+        CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant,
+        TimelineOrOffloaded,
     },
 };
 
@@ -176,32 +175,6 @@ async fn remove_maybe_offloaded_timeline_from_tenant(
     Ok(())
 }
 
-/// It is important that this gets called when DeletionGuard is being held.
-/// For more context see comments in [`DeleteTimelineFlow::prepare`]
-async fn upload_new_tenant_manifest(
-    tenant: &Tenant,
-    _: &DeletionGuard, // using it as a witness
-) -> anyhow::Result<()> {
-    // This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash
-    // between the deletion of the index-part.json and reaching of this code.
-    // So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
-    // However, we handle this case in tenant loading code so the next time we attach, the issue is
-    // resolved.
-    let manifest = tenant.tenant_manifest();
-    // TODO: generation support
-    let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-    remote_timeline_client::upload_tenant_manifest(
-        &tenant.remote_storage,
-        &tenant.tenant_shard_id,
-        generation,
-        &manifest,
-        &tenant.cancel,
-    )
-    .await?;
-
-    Ok(())
-}
-
 /// Orchestrates timeline shut down of all timeline tasks, removes its in-memory structures,
 /// and deletes its data from both disk and s3.
 /// The sequence of steps:
@@ -258,7 +231,32 @@ impl DeleteTimelineFlow {
             ))?
         });
 
-        let remote_client = timeline.remote_client_maybe_construct(tenant);
+        let remote_client = match timeline.maybe_remote_client() {
+            Some(remote_client) => remote_client,
+            None => {
+                let remote_client = tenant
+                    .build_timeline_client(timeline.timeline_id(), tenant.remote_storage.clone());
+                let result = remote_client
+                    .download_index_file(&tenant.cancel)
+                    .instrument(info_span!("download_index_file"))
+                    .await
+                    .map_err(|e| DeleteTimelineError::Other(anyhow::anyhow!("error: {:?}", e)))?;
+                let index_part = match result {
+                    MaybeDeletedIndexPart::Deleted(p) => {
+                        tracing::info!("Timeline already set as deleted in remote index");
+                        p
+                    }
+                    MaybeDeletedIndexPart::IndexPart(p) => p,
+                };
+                let remote_client = Arc::new(remote_client);
+
+                remote_client
+                    .init_upload_queue(&index_part)
+                    .map_err(DeleteTimelineError::Other)?;
+                remote_client.shutdown().await;
+                remote_client
+            }
+        };
         set_deleted_in_remote_index(&remote_client).await?;
 
         fail::fail_point!("timeline-delete-before-schedule", |_| {
@@ -455,7 +453,15 @@ impl DeleteTimelineFlow {
 
         remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;
 
-        upload_new_tenant_manifest(tenant, &guard).await?;
+        // This is susceptible to race conditions, i.e. we won't continue deletions if there is a crash
+        // between the deletion of the index-part.json and reaching of this code.
+        // So indeed, the tenant manifest might refer to an offloaded timeline which has already been deleted.
+        // However, we handle this case in tenant loading code so the next time we attach, the issue is
+        // resolved.
+        tenant
+            .store_tenant_manifest()
+            .await
+            .map_err(|e| DeleteTimelineError::Other(anyhow::anyhow!(e)))?;
 
         *guard = Self::Finished;
 
diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs
index 8e6eceb084..305c139b54 100644
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -3,7 +3,7 @@ use std::sync::Arc;
 use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
 use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::tenant::{remote_timeline_client, OffloadedTimeline, Tenant, TimelineOrOffloaded};
+use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
 
 pub(crate) async fn offload_timeline(
     tenant: &Tenant,
@@ -63,17 +63,10 @@ pub(crate) async fn offload_timeline(
     // at the next restart attach it again.
     // For that to happen, we'd need to make the manifest reflect our *intended* state,
     // not our actual state of offloaded timelines.
-    let manifest = tenant.tenant_manifest();
-    // TODO: generation support
-    let generation = remote_timeline_client::TENANT_MANIFEST_GENERATION;
-    remote_timeline_client::upload_tenant_manifest(
-        &tenant.remote_storage,
-        &tenant.tenant_shard_id,
-        generation,
-        &manifest,
-        &tenant.cancel,
-    )
-    .await?;
+    tenant
+        .store_tenant_manifest()
+        .await
+        .map_err(|e| anyhow::anyhow!(e))?;
 
     Ok(())
 }
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index cee259e2e0..739fadbc6b 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -31,11 +31,11 @@ use crate::{
     task_mgr::{TaskKind, WALRECEIVER_RUNTIME},
     tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline, WalReceiverInfo},
     walingest::WalIngest,
-    walrecord::{decode_wal_record, DecodedWALRecord},
 };
 use postgres_backend::is_expected_io_error;
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::waldecoder::WalStreamDecoder;
+use postgres_ffi::walrecord::{decode_wal_record, DecodedWALRecord};
 use utils::{id::NodeId, lsn::Lsn};
 use utils::{pageserver_feedback::PageserverFeedback, sync::gate::GateError};
 
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 9e43e10801..27b3f93845 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -29,8 +29,10 @@ use std::time::Instant;
 use std::time::SystemTime;
 
 use pageserver_api::shard::ShardIdentity;
+use postgres_ffi::walrecord::*;
 use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz};
 use postgres_ffi::{fsm_logical_to_physical, page_is_new, page_set_lsn};
+use wal_decoder::models::*;
 
 use anyhow::{bail, Context, Result};
 use bytes::{Buf, Bytes, BytesMut};
@@ -44,9 +46,9 @@ use crate::pgdatadir_mapping::{DatadirModification, Version};
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
-use crate::walrecord::*;
 use crate::ZERO_PAGE;
 use pageserver_api::key::rel_block_to_key;
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
@@ -108,143 +110,6 @@ struct WarnIngestLag {
     timestamp_invalid_msg_ratelimit: RateLimit,
 }
 
-// These structs are an intermediary representation of the PostgreSQL WAL records.
-// The ones prefixed with `Xl` are lower level, while the ones that are not have
-// all the required context to be acted upon by the pageserver.
-
-enum HeapamRecord {
-    ClearVmBits(ClearVmBits),
-}
-
-struct ClearVmBits {
-    new_heap_blkno: Option<u32>,
-    old_heap_blkno: Option<u32>,
-    vm_rel: RelTag,
-    flags: u8,
-}
-
-enum NeonrmgrRecord {
-    ClearVmBits(ClearVmBits),
-}
-
-enum SmgrRecord {
-    Create(SmgrCreate),
-    Truncate(XlSmgrTruncate),
-}
-
-struct SmgrCreate {
-    rel: RelTag,
-}
-
-enum DbaseRecord {
-    Create(DbaseCreate),
-    Drop(DbaseDrop),
-}
-
-struct DbaseCreate {
-    db_id: u32,
-    tablespace_id: u32,
-    src_db_id: u32,
-    src_tablespace_id: u32,
-}
-
-struct DbaseDrop {
-    db_id: u32,
-    tablespace_ids: Vec<u32>,
-}
-
-enum ClogRecord {
-    ZeroPage(ClogZeroPage),
-    Truncate(ClogTruncate),
-}
-
-struct ClogZeroPage {
-    segno: u32,
-    rpageno: u32,
-}
-
-struct ClogTruncate {
-    pageno: u32,
-    oldest_xid: u32,
-    oldest_xid_db: u32,
-}
-
-enum XactRecord {
-    Commit(XactCommon),
-    Abort(XactCommon),
-    CommitPrepared(XactCommon),
-    AbortPrepared(XactCommon),
-    Prepare(XactPrepare),
-}
-
-struct XactCommon {
-    parsed: XlXactParsedRecord,
-    origin_id: u16,
-    // Fields below are only used for logging
-    xl_xid: u32,
-    lsn: Lsn,
-}
-
-struct XactPrepare {
-    xl_xid: u32,
-    data: Bytes,
-}
-
-enum MultiXactRecord {
-    ZeroPage(MultiXactZeroPage),
-    Create(XlMultiXactCreate),
-    Truncate(XlMultiXactTruncate),
-}
-
-struct MultiXactZeroPage {
-    slru_kind: SlruKind,
-    segno: u32,
-    rpageno: u32,
-}
-
-enum RelmapRecord {
-    Update(RelmapUpdate),
-}
-
-struct RelmapUpdate {
-    update: XlRelmapUpdate,
-    buf: Bytes,
-}
-
-enum XlogRecord {
-    Raw(RawXlogRecord),
-}
-
-struct RawXlogRecord {
-    info: u8,
-    lsn: Lsn,
-    buf: Bytes,
-}
-
-enum LogicalMessageRecord {
-    Put(PutLogicalMessage),
-    #[cfg(feature = "testing")]
-    Failpoint,
-}
-
-struct PutLogicalMessage {
-    path: String,
-    buf: Bytes,
-}
-
-enum StandbyRecord {
-    RunningXacts(StandbyRunningXacts),
-}
-
-struct StandbyRunningXacts {
-    oldest_running_xid: u32,
-}
-
-enum ReploriginRecord {
-    Set(XlReploriginSet),
-    Drop(XlReploriginDrop),
-}
-
 impl WalIngest {
     pub async fn new(
         timeline: &Timeline,
@@ -284,7 +149,6 @@ impl WalIngest {
     /// relations/pages that the record affects.
     ///
     /// This function returns `true` if the record was ingested, and `false` if it was filtered out
-    ///
     pub async fn ingest_record(
         &mut self,
         decoded: DecodedWALRecord,
@@ -2218,7 +2082,7 @@ impl WalIngest {
     ) -> anyhow::Result<Option<LogicalMessageRecord>> {
         let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
         if info == pg_constants::XLOG_LOGICAL_MESSAGE {
-            let xlrec = crate::walrecord::XlLogicalMessage::decode(buf);
+            let xlrec = XlLogicalMessage::decode(buf);
             let prefix = std::str::from_utf8(&buf[0..xlrec.prefix_size - 1])?;
 
             #[cfg(feature = "testing")]
@@ -2246,7 +2110,7 @@ impl WalIngest {
     ) -> anyhow::Result<Option<StandbyRecord>> {
         let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
         if info == pg_constants::XLOG_RUNNING_XACTS {
-            let xlrec = crate::walrecord::XlRunningXacts::decode(buf);
+            let xlrec = XlRunningXacts::decode(buf);
             return Ok(Some(StandbyRecord::RunningXacts(StandbyRunningXacts {
                 oldest_running_xid: xlrec.oldest_running_xid,
             })));
@@ -2276,10 +2140,10 @@ impl WalIngest {
     ) -> anyhow::Result<Option<ReploriginRecord>> {
         let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;
         if info == pg_constants::XLOG_REPLORIGIN_SET {
-            let xlrec = crate::walrecord::XlReploriginSet::decode(buf);
+            let xlrec = XlReploriginSet::decode(buf);
             return Ok(Some(ReploriginRecord::Set(xlrec)));
         } else if info == pg_constants::XLOG_REPLORIGIN_DROP {
-            let xlrec = crate::walrecord::XlReploriginDrop::decode(buf);
+            let xlrec = XlReploriginDrop::decode(buf);
             return Ok(Some(ReploriginRecord::Drop(xlrec)));
         }
 
@@ -3146,6 +3010,7 @@ mod tests {
     async fn test_ingest_real_wal() {
         use crate::tenant::harness::*;
         use postgres_ffi::waldecoder::WalStreamDecoder;
+        use postgres_ffi::walrecord::decode_wal_record;
         use postgres_ffi::WAL_SEGMENT_SIZE;
 
         // Define test data path and constants.
diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs
index a1c9fc5651..027a6eb7d7 100644
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -29,11 +29,11 @@ use crate::metrics::{
     WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM,
     WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME,
 };
-use crate::repository::Key;
-use crate::walrecord::NeonWalRecord;
 use anyhow::Context;
 use bytes::{Bytes, BytesMut};
+use pageserver_api::key::Key;
 use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::shard::TenantShardId;
 use std::future::Future;
 use std::sync::Arc;
@@ -548,9 +548,10 @@ impl PostgresRedoManager {
 #[cfg(test)]
 mod tests {
     use super::PostgresRedoManager;
-    use crate::repository::Key;
-    use crate::{config::PageServerConf, walrecord::NeonWalRecord};
+    use crate::config::PageServerConf;
     use bytes::Bytes;
+    use pageserver_api::key::Key;
+    use pageserver_api::record::NeonWalRecord;
     use pageserver_api::shard::TenantShardId;
     use std::str::FromStr;
     use tracing::Instrument;
diff --git a/pageserver/src/walredo/apply_neon.rs b/pageserver/src/walredo/apply_neon.rs
index c067787f97..7aaa357318 100644
--- a/pageserver/src/walredo/apply_neon.rs
+++ b/pageserver/src/walredo/apply_neon.rs
@@ -1,8 +1,8 @@
-use crate::walrecord::NeonWalRecord;
 use anyhow::Context;
 use byteorder::{ByteOrder, LittleEndian};
 use bytes::BytesMut;
 use pageserver_api::key::Key;
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::reltag::SlruKind;
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
@@ -238,7 +238,7 @@ pub(crate) fn apply_in_neon(
             // No-op: this record will never be created in aux v2.
             warn!("AuxFile record should not be created in aux v2");
         }
-        #[cfg(test)]
+        #[cfg(feature = "testing")]
         NeonWalRecord::Test {
             append,
             clear,
diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs
index f3197e68b5..7e9477cfbc 100644
--- a/pageserver/src/walredo/process.rs
+++ b/pageserver/src/walredo/process.rs
@@ -8,10 +8,10 @@ use crate::{
     metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER},
     page_cache::PAGE_SZ,
     span::debug_assert_current_span_has_tenant_id,
-    walrecord::NeonWalRecord,
 };
 use anyhow::Context;
 use bytes::Bytes;
+use pageserver_api::record::NeonWalRecord;
 use pageserver_api::{reltag::RelTag, shard::TenantShardId};
 use postgres_ffi::BLCKSZ;
 #[cfg(feature = "testing")]
diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile
index 42f2a8efda..c87ae59fd6 100644
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -16,6 +16,7 @@ OBJS = \
 	neon_walreader.o \
 	pagestore_smgr.o \
 	relsize_cache.o \
+	unstable_extensions.o \
 	walproposer.o \
 	walproposer_pg.o \
 	control_plane_connector.o \
diff --git a/pgxn/neon/control_plane_connector.c b/pgxn/neon/control_plane_connector.c
index 4713103909..b47b22cd20 100644
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -18,6 +18,7 @@
  *
  *-------------------------------------------------------------------------
  */
+
 #include "postgres.h"
 
 #include <curl/curl.h>
@@ -508,6 +509,8 @@ NeonXactCallback(XactEvent event, void *arg)
 static bool
 RoleIsNeonSuperuser(const char *role_name)
 {
+	Assert(role_name);
+
 	return strcmp(role_name, "neon_superuser") == 0;
 }
 
@@ -670,7 +673,7 @@ HandleCreateRole(CreateRoleStmt *stmt)
 static void
 HandleAlterRole(AlterRoleStmt *stmt)
 {
-	const char *role_name = stmt->role->rolename;
+	char	   *role_name;
 	DefElem    *dpass;
 	ListCell   *option;
 	bool		found = false;
@@ -678,6 +681,7 @@ HandleAlterRole(AlterRoleStmt *stmt)
 
 	InitRoleTableIfNeeded();
 
+	role_name = get_rolespec_name(stmt->role);
 	if (RoleIsNeonSuperuser(role_name) && !superuser())
 		elog(ERROR, "can't ALTER neon_superuser");
 
@@ -689,9 +693,13 @@ HandleAlterRole(AlterRoleStmt *stmt)
 		if (strcmp(defel->defname, "password") == 0)
 			dpass = defel;
 	}
+
 	/* We only care about updates to the password */
 	if (!dpass)
+	{
+		pfree(role_name);
 		return;
+	}
 
 	entry = hash_search(CurrentDdlTable->role_table,
 						role_name,
@@ -704,6 +712,8 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	else
 		entry->password = NULL;
 	entry->type = Op_Set;
+
+	pfree(role_name);
 }
 
 static void
diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c
index f8ec725c18..dc87d79e87 100644
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -30,6 +30,7 @@
 #include "neon.h"
 #include "control_plane_connector.h"
 #include "logical_replication_monitor.h"
+#include "unstable_extensions.h"
 #include "walsender_hooks.h"
 #if PG_MAJORVERSION_NUM >= 16
 #include "storage/ipc.h"
@@ -424,6 +425,7 @@ _PG_init(void)
 	LogicalFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
 	SlotFuncs_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;
 
+	InitUnstableExtensionsSupport();
 	InitLogicalReplicationMonitor();
 	InitControlPlaneConnector();
 
diff --git a/pgxn/neon/neon_pgversioncompat.c b/pgxn/neon/neon_pgversioncompat.c
index a0dbddde4b..7c404fb5a9 100644
--- a/pgxn/neon/neon_pgversioncompat.c
+++ b/pgxn/neon/neon_pgversioncompat.c
@@ -42,3 +42,4 @@ InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
 	MemoryContextSwitchTo(old_context);
 }
 #endif
+
diff --git a/pgxn/neon/unstable_extensions.c b/pgxn/neon/unstable_extensions.c
new file mode 100644
index 0000000000..72de2871f4
--- /dev/null
+++ b/pgxn/neon/unstable_extensions.c
@@ -0,0 +1,129 @@
+#include <stdlib.h>
+#include <string.h>
+
+#include "postgres.h"
+
+#include "nodes/plannodes.h"
+#include "nodes/parsenodes.h"
+#include "tcop/utility.h"
+#include "utils/errcodes.h"
+#include "utils/guc.h"
+
+#include "neon_pgversioncompat.h"
+#include "unstable_extensions.h"
+
+static bool					allow_unstable_extensions = false;
+static char				   *unstable_extensions = NULL;
+
+static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
+
+static bool
+list_contains(char const* comma_separated_list, char const* val)
+{
+	char const* occ = comma_separated_list;
+	size_t val_len = strlen(val);
+
+	if (val_len == 0)
+		return false;
+
+	while ((occ = strstr(occ, val)) != NULL)
+	{
+		if ((occ == comma_separated_list || occ[-1] == ',')
+			&& (occ[val_len] == '\0' || occ[val_len] == ','))
+		{
+			return true;
+		}
+		occ += val_len;
+	}
+
+	return false;
+}
+
+
+static void
+CheckUnstableExtension(
+	PlannedStmt *pstmt,
+	const char *queryString,
+	bool readOnlyTree,
+	ProcessUtilityContext context,
+	ParamListInfo params,
+	QueryEnvironment *queryEnv,
+	DestReceiver *dest,
+	QueryCompletion *qc)
+{
+	Node	   *parseTree = pstmt->utilityStmt;
+
+	if (allow_unstable_extensions || unstable_extensions == NULL)
+		goto process;
+
+	switch (nodeTag(parseTree))
+	{
+		case T_CreateExtensionStmt:
+		{
+			CreateExtensionStmt *stmt = castNode(CreateExtensionStmt, parseTree);
+			if (list_contains(unstable_extensions, stmt->extname))
+			{
+				ereport(ERROR,
+						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+						 errmsg("%s extension is in beta and may be unstable or introduce backward-incompatible changes.\nWe recommend testing it in a separate, dedicated Neon project.", stmt->extname),
+						 errhint("to proceed with installation, run SET neon.allow_unstable_extensions='true'")));
+			}
+			break;
+		}
+		default:
+			goto process;
+	}
+
+process:
+	if (PreviousProcessUtilityHook)
+	{
+		PreviousProcessUtilityHook(
+			pstmt,
+			queryString,
+			readOnlyTree,
+			context,
+			params,
+			queryEnv,
+			dest,
+			qc);
+	}
+	else
+	{
+		standard_ProcessUtility(
+			pstmt,
+			queryString,
+			readOnlyTree,
+			context,
+			params,
+			queryEnv,
+			dest,
+			qc);
+	}
+}
+
+void
+InitUnstableExtensionsSupport(void)
+{
+	DefineCustomBoolVariable(
+		"neon.allow_unstable_extensions",
+		"Allow unstable extensions to be installed and used",
+		NULL,
+		&allow_unstable_extensions,
+		false,
+		PGC_USERSET,
+		0,
+		NULL, NULL, NULL);
+
+	DefineCustomStringVariable(
+		"neon.unstable_extensions",
+		"List of unstable extensions",
+		NULL,
+		&unstable_extensions,
+		NULL,
+		PGC_SUSET,
+		0,
+		NULL, NULL, NULL);
+
+	PreviousProcessUtilityHook = ProcessUtility_hook;
+	ProcessUtility_hook = CheckUnstableExtension;
+}
diff --git a/pgxn/neon/unstable_extensions.h b/pgxn/neon/unstable_extensions.h
new file mode 100644
index 0000000000..3c695e9fb2
--- /dev/null
+++ b/pgxn/neon/unstable_extensions.h
@@ -0,0 +1,6 @@
+#ifndef __NEON_UNSTABLE_EXTENSIONS_H__
+#define __NEON_UNSTABLE_EXTENSIONS_H__
+
+void InitUnstableExtensionsSupport(void);
+
+#endif
diff --git a/poetry.lock b/poetry.lock
index e307b873f3..36ea82a446 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -1521,6 +1521,21 @@ files = [
 [package.dependencies]
 six = "*"
 
+[[package]]
+name = "jwcrypto"
+version = "1.5.6"
+description = "Implementation of JOSE Web standards"
+optional = false
+python-versions = ">= 3.8"
+files = [
+    {file = "jwcrypto-1.5.6-py3-none-any.whl", hash = "sha256:150d2b0ebbdb8f40b77f543fb44ffd2baeff48788be71f67f03566692fd55789"},
+    {file = "jwcrypto-1.5.6.tar.gz", hash = "sha256:771a87762a0c081ae6166958a954f80848820b2ab066937dc8b8379d65b1b039"},
+]
+
+[package.dependencies]
+cryptography = ">=3.4"
+typing-extensions = ">=4.5.0"
+
 [[package]]
 name = "kafka-python"
 version = "2.0.2"
@@ -2111,7 +2126,6 @@ files = [
     {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
     {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
@@ -2120,8 +2134,6 @@ files = [
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
     {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
-    {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
     {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
@@ -2603,7 +2615,6 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2912,6 +2923,20 @@ files = [
     {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
 ]
 
+[[package]]
+name = "types-jwcrypto"
+version = "1.5.0.20240925"
+description = "Typing stubs for jwcrypto"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "types-jwcrypto-1.5.0.20240925.tar.gz", hash = "sha256:50e17b790378c96239344476c7bd13b52d0c7eeb6d16c2d53723e48cc6bbf4fe"},
+    {file = "types_jwcrypto-1.5.0.20240925-py3-none-any.whl", hash = "sha256:2d12a2d528240d326075e896aafec7056b9136bf3207fa6ccf3fcb8fbf9e11a1"},
+]
+
+[package.dependencies]
+cryptography = "*"
+
 [[package]]
 name = "types-psutil"
 version = "5.9.5.12"
@@ -3118,13 +3143,13 @@ files = [
 
 [[package]]
 name = "werkzeug"
-version = "3.0.3"
+version = "3.0.6"
 description = "The comprehensive WSGI web application library."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"},
-    {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"},
+    {file = "werkzeug-3.0.6-py3-none-any.whl", hash = "sha256:1bc0c2310d2fbb07b1dd1105eba2f7af72f322e1e455f2f93c993bee8c8a5f17"},
+    {file = "werkzeug-3.0.6.tar.gz", hash = "sha256:a8dd59d4de28ca70471a34cba79bed5f7ef2e036a76b3ab0835474246eb41f8d"},
 ]
 
 [package.dependencies]
@@ -3159,16 +3184,6 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
-    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
-    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
-    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
-    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
-    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -3406,4 +3421,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "f52632571e34b0e51b059c280c35d6ff6f69f6a8c9586caca78282baf635be91"
+content-hash = "ad5c9ee7723359af22bbd7fa41538dcf78913c02e947a13a8f9a87eb3a59039e"
diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs
index 8ab8d5d37f..28bdacd769 100644
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -1,5 +1,5 @@
 use tokio::io::{AsyncRead, AsyncWrite};
-use tracing::{info, warn};
+use tracing::{debug, info};
 
 use super::{ComputeCredentials, ComputeUserInfo, ComputeUserInfoNoEndpoint};
 use crate::auth::{self, AuthFlow};
@@ -21,7 +21,7 @@ pub(crate) async fn authenticate_cleartext(
     secret: AuthSecret,
     config: &'static AuthenticationConfig,
 ) -> auth::Result<ComputeCredentials> {
-    warn!("cleartext auth flow override is enabled, proceeding");
+    debug!("cleartext auth flow override is enabled, proceeding");
     ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
 
     // pause the timer while we communicate with the client
@@ -61,7 +61,7 @@ pub(crate) async fn password_hack_no_authentication(
     info: ComputeUserInfoNoEndpoint,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
 ) -> auth::Result<(ComputeUserInfo, Vec<u8>)> {
-    warn!("project not specified, resorting to the password hack auth flow");
+    debug!("project not specified, resorting to the password hack auth flow");
     ctx.set_auth_method(crate::context::AuthMethod::Cleartext);
 
     // pause the timer while we communicate with the client
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index 69ab4b8ccb..83c3617612 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::future::Future;
 use std::sync::Arc;
 use std::time::{Duration, SystemTime};
@@ -45,6 +46,7 @@ pub(crate) enum FetchAuthRulesError {
     RoleJwksNotConfigured,
 }
 
+#[derive(Clone)]
 pub(crate) struct AuthRule {
     pub(crate) id: String,
     pub(crate) jwks_url: url::Url,
@@ -277,7 +279,7 @@ impl JwkCacheEntryLock {
 
         // get the key from the JWKs if possible. If not, wait for the keys to update.
         let (jwk, expected_audience) = loop {
-            match guard.find_jwk_and_audience(kid, role_name) {
+            match guard.find_jwk_and_audience(&kid, role_name) {
                 Some(jwk) => break jwk,
                 None if guard.last_retrieved.elapsed() > MIN_RENEW => {
                     let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
@@ -312,7 +314,9 @@ impl JwkCacheEntryLock {
 
         if let Some(aud) = expected_audience {
             if payload.audience.0.iter().all(|s| s != aud) {
-                return Err(JwtError::InvalidJwtTokenAudience);
+                return Err(JwtError::InvalidClaims(
+                    JwtClaimsError::InvalidJwtTokenAudience,
+                ));
             }
         }
 
@@ -320,13 +324,15 @@ impl JwkCacheEntryLock {
 
         if let Some(exp) = payload.expiration {
             if now >= exp + CLOCK_SKEW_LEEWAY {
-                return Err(JwtError::JwtTokenHasExpired);
+                return Err(JwtError::InvalidClaims(JwtClaimsError::JwtTokenHasExpired));
             }
         }
 
         if let Some(nbf) = payload.not_before {
             if nbf >= now + CLOCK_SKEW_LEEWAY {
-                return Err(JwtError::JwtTokenNotYetReadyToUse);
+                return Err(JwtError::InvalidClaims(
+                    JwtClaimsError::JwtTokenNotYetReadyToUse,
+                ));
             }
         }
 
@@ -420,8 +426,8 @@ struct JwtHeader<'a> {
     #[serde(rename = "alg")]
     algorithm: jose_jwa::Algorithm,
     /// key id, must be provided for our usecase
-    #[serde(rename = "kid")]
-    key_id: Option<&'a str>,
+    #[serde(rename = "kid", borrow)]
+    key_id: Option<Cow<'a, str>>,
 }
 
 /// <https://datatracker.ietf.org/doc/html/rfc7519#section-4.1>
@@ -440,17 +446,17 @@ struct JwtPayload<'a> {
 
     // the following entries are only extracted for the sake of debug logging.
     /// Issuer of the JWT
-    #[serde(rename = "iss")]
-    issuer: Option<&'a str>,
+    #[serde(rename = "iss", borrow)]
+    issuer: Option<Cow<'a, str>>,
     /// Subject of the JWT (the user)
-    #[serde(rename = "sub")]
-    subject: Option<&'a str>,
+    #[serde(rename = "sub", borrow)]
+    subject: Option<Cow<'a, str>>,
     /// Unique token identifier
-    #[serde(rename = "jti")]
-    jwt_id: Option<&'a str>,
+    #[serde(rename = "jti", borrow)]
+    jwt_id: Option<Cow<'a, str>>,
     /// Unique session identifier
-    #[serde(rename = "sid")]
-    session_id: Option<&'a str>,
+    #[serde(rename = "sid", borrow)]
+    session_id: Option<Cow<'a, str>>,
 }
 
 /// `OneOrMany` supports parsing either a single item or an array of items.
@@ -585,14 +591,8 @@ pub(crate) enum JwtError {
     #[error("Provided authentication token is not a valid JWT encoding")]
     JwtEncoding(#[from] JwtEncodingError),
 
-    #[error("invalid JWT token audience")]
-    InvalidJwtTokenAudience,
-
-    #[error("JWT token has expired")]
-    JwtTokenHasExpired,
-
-    #[error("JWT token is not yet ready to use")]
-    JwtTokenNotYetReadyToUse,
+    #[error(transparent)]
+    InvalidClaims(#[from] JwtClaimsError),
 
     #[error("invalid P256 key")]
     InvalidP256Key(jose_jwk::crypto::Error),
@@ -644,6 +644,19 @@ pub enum JwtEncodingError {
     InvalidCompactForm,
 }
 
+#[derive(Error, Debug, PartialEq)]
+#[non_exhaustive]
+pub enum JwtClaimsError {
+    #[error("invalid JWT token audience")]
+    InvalidJwtTokenAudience,
+
+    #[error("JWT token has expired")]
+    JwtTokenHasExpired,
+
+    #[error("JWT token is not yet ready to use")]
+    JwtTokenNotYetReadyToUse,
+}
+
 #[allow(dead_code, reason = "Debug use only")]
 #[derive(Debug)]
 pub(crate) enum KeyType {
@@ -680,6 +693,8 @@ mod tests {
     use hyper_util::rt::TokioIo;
     use rand::rngs::OsRng;
     use rsa::pkcs8::DecodePrivateKey;
+    use serde::Serialize;
+    use serde_json::json;
     use signature::Signer;
     use tokio::net::TcpListener;
 
@@ -693,6 +708,7 @@ mod tests {
             key: jose_jwk::Key::Ec(pk),
             prm: jose_jwk::Parameters {
                 kid: Some(kid),
+                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Es256)),
                 ..Default::default()
             },
         };
@@ -706,24 +722,47 @@ mod tests {
             key: jose_jwk::Key::Rsa(pk),
             prm: jose_jwk::Parameters {
                 kid: Some(kid),
+                alg: Some(jose_jwa::Algorithm::Signing(jose_jwa::Signing::Rs256)),
                 ..Default::default()
             },
         };
         (sk, jwk)
     }
 
+    fn now() -> u64 {
+        SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs()
+    }
+
     fn build_jwt_payload(kid: String, sig: jose_jwa::Signing) -> String {
+        let now = now();
+        let body = typed_json::json! {{
+            "exp": now + 3600,
+            "nbf": now,
+            "aud": ["audience1", "neon", "audience2"],
+            "sub": "user1",
+            "sid": "session1",
+            "jti": "token1",
+            "iss": "neon-testing",
+        }};
+        build_custom_jwt_payload(kid, body, sig)
+    }
+
+    fn build_custom_jwt_payload(
+        kid: String,
+        body: impl Serialize,
+        sig: jose_jwa::Signing,
+    ) -> String {
         let header = JwtHeader {
             algorithm: jose_jwa::Algorithm::Signing(sig),
-            key_id: Some(&kid),
+            key_id: Some(Cow::Owned(kid)),
         };
-        let body = typed_json::json! {{
-            "exp": SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs() + 3600,
-        }};
 
         let header =
             base64::encode_config(serde_json::to_string(&header).unwrap(), URL_SAFE_NO_PAD);
-        let body = base64::encode_config(body.to_string(), URL_SAFE_NO_PAD);
+        let body = base64::encode_config(serde_json::to_string(&body).unwrap(), URL_SAFE_NO_PAD);
 
         format!("{header}.{body}")
     }
@@ -738,6 +777,16 @@ mod tests {
         format!("{payload}.{sig}")
     }
 
+    fn new_custom_ec_jwt(kid: String, key: &p256::SecretKey, body: impl Serialize) -> String {
+        use p256::ecdsa::{Signature, SigningKey};
+
+        let payload = build_custom_jwt_payload(kid, body, jose_jwa::Signing::Es256);
+        let sig: Signature = SigningKey::from(key).sign(payload.as_bytes());
+        let sig = base64::encode_config(sig.to_bytes(), URL_SAFE_NO_PAD);
+
+        format!("{payload}.{sig}")
+    }
+
     fn new_rsa_jwt(kid: String, key: rsa::RsaPrivateKey) -> String {
         use rsa::pkcs1v15::SigningKey;
         use rsa::signature::SignatureEncoding;
@@ -809,37 +858,34 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
 -----END PRIVATE KEY-----
 ";
 
-    #[tokio::test]
-    async fn renew() {
-        let (rs1, jwk1) = new_rsa_jwk(RS1, "1".into());
-        let (rs2, jwk2) = new_rsa_jwk(RS2, "2".into());
-        let (ec1, jwk3) = new_ec_jwk("3".into());
-        let (ec2, jwk4) = new_ec_jwk("4".into());
+    #[derive(Clone)]
+    struct Fetch(Vec<AuthRule>);
 
-        let foo_jwks = jose_jwk::JwkSet {
-            keys: vec![jwk1, jwk3],
-        };
-        let bar_jwks = jose_jwk::JwkSet {
-            keys: vec![jwk2, jwk4],
-        };
+    impl FetchAuthRules for Fetch {
+        async fn fetch_auth_rules(
+            &self,
+            _ctx: &RequestMonitoring,
+            _endpoint: EndpointId,
+        ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
+            Ok(self.0.clone())
+        }
+    }
 
+    async fn jwks_server(
+        router: impl for<'a> Fn(&'a str) -> Option<Vec<u8>> + Send + Sync + 'static,
+    ) -> SocketAddr {
+        let router = Arc::new(router);
         let service = service_fn(move |req| {
-            let foo_jwks = foo_jwks.clone();
-            let bar_jwks = bar_jwks.clone();
+            let router = Arc::clone(&router);
             async move {
-                let jwks = match req.uri().path() {
-                    "/foo" => &foo_jwks,
-                    "/bar" => &bar_jwks,
-                    _ => {
-                        return Response::builder()
-                            .status(404)
-                            .body(Full::new(Bytes::new()));
-                    }
-                };
-                let body = serde_json::to_vec(jwks).unwrap();
-                Response::builder()
-                    .status(200)
-                    .body(Full::new(Bytes::from(body)))
+                match router(req.uri().path()) {
+                    Some(body) => Response::builder()
+                        .status(200)
+                        .body(Full::new(Bytes::from(body))),
+                    None => Response::builder()
+                        .status(404)
+                        .body(Full::new(Bytes::new())),
+                }
             }
         });
 
@@ -854,84 +900,61 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
             }
         });
 
-        let client = reqwest::Client::new();
+        addr
+    }
 
-        #[derive(Clone)]
-        struct Fetch(SocketAddr, Vec<RoleNameInt>);
+    #[tokio::test]
+    async fn check_jwt_happy_path() {
+        let (rs1, jwk1) = new_rsa_jwk(RS1, "rs1".into());
+        let (rs2, jwk2) = new_rsa_jwk(RS2, "rs2".into());
+        let (ec1, jwk3) = new_ec_jwk("ec1".into());
+        let (ec2, jwk4) = new_ec_jwk("ec2".into());
 
-        impl FetchAuthRules for Fetch {
-            async fn fetch_auth_rules(
-                &self,
-                _ctx: &RequestMonitoring,
-                _endpoint: EndpointId,
-            ) -> Result<Vec<AuthRule>, FetchAuthRulesError> {
-                Ok(vec![
-                    AuthRule {
-                        id: "foo".to_owned(),
-                        jwks_url: format!("http://{}/foo", self.0).parse().unwrap(),
-                        audience: None,
-                        role_names: self.1.clone(),
-                    },
-                    AuthRule {
-                        id: "bar".to_owned(),
-                        jwks_url: format!("http://{}/bar", self.0).parse().unwrap(),
-                        audience: None,
-                        role_names: self.1.clone(),
-                    },
-                ])
-            }
-        }
+        let foo_jwks = jose_jwk::JwkSet {
+            keys: vec![jwk1, jwk3],
+        };
+        let bar_jwks = jose_jwk::JwkSet {
+            keys: vec![jwk2, jwk4],
+        };
+
+        let jwks_addr = jwks_server(move |path| match path {
+            "/foo" => Some(serde_json::to_vec(&foo_jwks).unwrap()),
+            "/bar" => Some(serde_json::to_vec(&bar_jwks).unwrap()),
+            _ => None,
+        })
+        .await;
 
         let role_name1 = RoleName::from("anonymous");
         let role_name2 = RoleName::from("authenticated");
 
-        let fetch = Fetch(
-            addr,
-            vec![
-                RoleNameInt::from(&role_name1),
-                RoleNameInt::from(&role_name2),
-            ],
-        );
+        let roles = vec![
+            RoleNameInt::from(&role_name1),
+            RoleNameInt::from(&role_name2),
+        ];
+        let rules = vec![
+            AuthRule {
+                id: "foo".to_owned(),
+                jwks_url: format!("http://{jwks_addr}/foo").parse().unwrap(),
+                audience: None,
+                role_names: roles.clone(),
+            },
+            AuthRule {
+                id: "bar".to_owned(),
+                jwks_url: format!("http://{jwks_addr}/bar").parse().unwrap(),
+                audience: None,
+                role_names: roles.clone(),
+            },
+        ];
+
+        let fetch = Fetch(rules);
+        let jwk_cache = JwkCache::default();
 
         let endpoint = EndpointId::from("ep");
 
-        let jwk_cache = Arc::new(JwkCacheEntryLock::default());
-
-        let jwt1 = new_rsa_jwt("1".into(), rs1);
-        let jwt2 = new_rsa_jwt("2".into(), rs2);
-        let jwt3 = new_ec_jwt("3".into(), &ec1);
-        let jwt4 = new_ec_jwt("4".into(), &ec2);
-
-        // had the wrong kid, therefore will have the wrong ecdsa signature
-        let bad_jwt = new_ec_jwt("3".into(), &ec2);
-        // this role_name is not accepted
-        let bad_role_name = RoleName::from("cloud_admin");
-
-        let err = jwk_cache
-            .check_jwt(
-                &RequestMonitoring::test(),
-                &bad_jwt,
-                &client,
-                endpoint.clone(),
-                &role_name1,
-                &fetch,
-            )
-            .await
-            .unwrap_err();
-        assert!(err.to_string().contains("signature error"));
-
-        let err = jwk_cache
-            .check_jwt(
-                &RequestMonitoring::test(),
-                &jwt1,
-                &client,
-                endpoint.clone(),
-                &bad_role_name,
-                &fetch,
-            )
-            .await
-            .unwrap_err();
-        assert!(err.to_string().contains("jwk not found"));
+        let jwt1 = new_rsa_jwt("rs1".into(), rs1);
+        let jwt2 = new_rsa_jwt("rs2".into(), rs2);
+        let jwt3 = new_ec_jwt("ec1".into(), &ec1);
+        let jwt4 = new_ec_jwt("ec2".into(), &ec2);
 
         let tokens = [jwt1, jwt2, jwt3, jwt4];
         let role_names = [role_name1, role_name2];
@@ -940,15 +963,250 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
                 jwk_cache
                     .check_jwt(
                         &RequestMonitoring::test(),
-                        token,
-                        &client,
                         endpoint.clone(),
                         role,
                         &fetch,
+                        token,
                     )
                     .await
                     .unwrap();
             }
         }
     }
+
+    /// AWS Cognito escapes the `/` in the URL.
+    #[tokio::test]
+    async fn check_jwt_regression_cognito_issuer() {
+        let (key, jwk) = new_ec_jwk("key".into());
+
+        let now = now();
+        let token = new_custom_ec_jwt(
+            "key".into(),
+            &key,
+            typed_json::json! {{
+                "sub": "dd9a73fd-e785-4a13-aae1-e691ce43e89d",
+                // cognito uses `\/`. I cannot replicated that easily here as serde_json will refuse
+                // to write that escape character. instead I will make a bogus URL using `\` instead.
+                "iss": "https:\\\\cognito-idp.us-west-2.amazonaws.com\\us-west-2_abcdefgh",
+                "client_id": "abcdefghijklmnopqrstuvwxyz",
+                "origin_jti": "6759d132-3fe7-446e-9e90-2fe7e8017893",
+                "event_id": "ec9c36ab-b01d-46a0-94e4-87fde6767065",
+                "token_use": "access",
+                "scope": "aws.cognito.signin.user.admin",
+                "auth_time":now,
+                "exp":now + 60,
+                "iat":now,
+                "jti": "b241614b-0b93-4bdc-96db-0a3c7061d9c0",
+                "username": "dd9a73fd-e785-4a13-aae1-e691ce43e89d",
+            }},
+        );
+
+        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
+
+        let jwks_addr = jwks_server(move |_path| Some(serde_json::to_vec(&jwks).unwrap())).await;
+
+        let role_name = RoleName::from("anonymous");
+        let rules = vec![AuthRule {
+            id: "aws-cognito".to_owned(),
+            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
+            audience: None,
+            role_names: vec![RoleNameInt::from(&role_name)],
+        }];
+
+        let fetch = Fetch(rules);
+        let jwk_cache = JwkCache::default();
+
+        let endpoint = EndpointId::from("ep");
+
+        jwk_cache
+            .check_jwt(
+                &RequestMonitoring::test(),
+                endpoint.clone(),
+                &role_name,
+                &fetch,
+                &token,
+            )
+            .await
+            .unwrap();
+    }
+
+    #[tokio::test]
+    async fn check_jwt_invalid_signature() {
+        let (_, jwk) = new_ec_jwk("1".into());
+        let (key, _) = new_ec_jwk("1".into());
+
+        // has a matching kid, but signed by the wrong key
+        let bad_jwt = new_ec_jwt("1".into(), &key);
+
+        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
+        let jwks_addr = jwks_server(move |path| match path {
+            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
+            _ => None,
+        })
+        .await;
+
+        let role = RoleName::from("authenticated");
+
+        let rules = vec![AuthRule {
+            id: String::new(),
+            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
+            audience: None,
+            role_names: vec![RoleNameInt::from(&role)],
+        }];
+
+        let fetch = Fetch(rules);
+        let jwk_cache = JwkCache::default();
+
+        let ep = EndpointId::from("ep");
+
+        let ctx = RequestMonitoring::test();
+        let err = jwk_cache
+            .check_jwt(&ctx, ep, &role, &fetch, &bad_jwt)
+            .await
+            .unwrap_err();
+        assert!(
+            matches!(err, JwtError::Signature(_)),
+            "expected \"signature error\", got {err:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn check_jwt_unknown_role() {
+        let (key, jwk) = new_rsa_jwk(RS1, "1".into());
+        let jwt = new_rsa_jwt("1".into(), key);
+
+        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
+        let jwks_addr = jwks_server(move |path| match path {
+            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
+            _ => None,
+        })
+        .await;
+
+        let role = RoleName::from("authenticated");
+        let rules = vec![AuthRule {
+            id: String::new(),
+            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
+            audience: None,
+            role_names: vec![RoleNameInt::from(&role)],
+        }];
+
+        let fetch = Fetch(rules);
+        let jwk_cache = JwkCache::default();
+
+        let ep = EndpointId::from("ep");
+
+        // this role_name is not accepted
+        let bad_role_name = RoleName::from("cloud_admin");
+
+        let ctx = RequestMonitoring::test();
+        let err = jwk_cache
+            .check_jwt(&ctx, ep, &bad_role_name, &fetch, &jwt)
+            .await
+            .unwrap_err();
+
+        assert!(
+            matches!(err, JwtError::JwkNotFound),
+            "expected \"jwk not found\", got {err:?}"
+        );
+    }
+
+    #[tokio::test]
+    async fn check_jwt_invalid_claims() {
+        let (key, jwk) = new_ec_jwk("1".into());
+
+        let jwks = jose_jwk::JwkSet { keys: vec![jwk] };
+        let jwks_addr = jwks_server(move |path| match path {
+            "/" => Some(serde_json::to_vec(&jwks).unwrap()),
+            _ => None,
+        })
+        .await;
+
+        let now = SystemTime::now()
+            .duration_since(SystemTime::UNIX_EPOCH)
+            .unwrap()
+            .as_secs();
+
+        struct Test {
+            body: serde_json::Value,
+            error: JwtClaimsError,
+        }
+
+        let table = vec![
+            Test {
+                body: json! {{
+                    "nbf": now + 60,
+                    "aud": "neon",
+                }},
+                error: JwtClaimsError::JwtTokenNotYetReadyToUse,
+            },
+            Test {
+                body: json! {{
+                    "exp": now - 60,
+                    "aud": ["neon"],
+                }},
+                error: JwtClaimsError::JwtTokenHasExpired,
+            },
+            Test {
+                body: json! {{
+                }},
+                error: JwtClaimsError::InvalidJwtTokenAudience,
+            },
+            Test {
+                body: json! {{
+                    "aud": [],
+                }},
+                error: JwtClaimsError::InvalidJwtTokenAudience,
+            },
+            Test {
+                body: json! {{
+                    "aud": "foo",
+                }},
+                error: JwtClaimsError::InvalidJwtTokenAudience,
+            },
+            Test {
+                body: json! {{
+                    "aud": ["foo"],
+                }},
+                error: JwtClaimsError::InvalidJwtTokenAudience,
+            },
+            Test {
+                body: json! {{
+                    "aud": ["foo", "bar"],
+                }},
+                error: JwtClaimsError::InvalidJwtTokenAudience,
+            },
+        ];
+
+        let role = RoleName::from("authenticated");
+
+        let rules = vec![AuthRule {
+            id: String::new(),
+            jwks_url: format!("http://{jwks_addr}/").parse().unwrap(),
+            audience: Some("neon".to_string()),
+            role_names: vec![RoleNameInt::from(&role)],
+        }];
+
+        let fetch = Fetch(rules);
+        let jwk_cache = JwkCache::default();
+
+        let ep = EndpointId::from("ep");
+
+        let ctx = RequestMonitoring::test();
+        for test in table {
+            let jwt = new_custom_ec_jwt("1".into(), &key, test.body);
+
+            match jwk_cache
+                .check_jwt(&ctx, ep.clone(), &role, &fetch, &jwt)
+                .await
+            {
+                Err(JwtError::InvalidClaims(error)) if error == test.error => {}
+                Err(err) => {
+                    panic!("expected {:?}, got {err:?}", test.error)
+                }
+                Ok(_payload) => {
+                    panic!("expected {:?}, got ok", test.error)
+                }
+            }
+        }
+    }
 }
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 6e190029aa..82c259efc8 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -137,9 +137,6 @@ struct ProxyCliArgs {
     /// size of the threadpool for password hashing
     #[clap(long, default_value_t = 4)]
     scram_thread_pool_size: u8,
-    /// Disable dynamic rate limiter and store the metrics to ensure its production behaviour.
-    #[clap(long, default_value_t = true, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
-    disable_dynamic_rate_limiter: bool,
     /// Endpoint rate limiter max number of requests per second.
     ///
     /// Provided in the form `<Requests Per Second>@<Bucket Duration Size>`.
@@ -615,9 +612,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
              and metric-collection-interval must be specified"
         ),
     };
-    if !args.disable_dynamic_rate_limiter {
-        bail!("dynamic rate limiter should be disabled");
-    }
 
     let config::ConcurrencyLockOptions {
         shards,
diff --git a/proxy/src/serverless/error.rs b/proxy/src/serverless/error.rs
new file mode 100644
index 0000000000..323c91baa5
--- /dev/null
+++ b/proxy/src/serverless/error.rs
@@ -0,0 +1,5 @@
+use http::StatusCode;
+
+pub trait HttpCodeError {
+    fn get_http_status_code(&self) -> StatusCode;
+}
diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs
index 8fb7a771d9..edbb0347d3 100644
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -6,6 +6,7 @@ mod backend;
 pub mod cancel_set;
 mod conn_pool;
 mod conn_pool_lib;
+mod error;
 mod http_conn_pool;
 mod http_util;
 mod json;
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index 1f3eec6d19..0713c27d65 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -28,6 +28,7 @@ use uuid::Uuid;
 use super::backend::{LocalProxyConnError, PoolingBackend};
 use super::conn_pool::{AuthData, ConnInfoWithAuth};
 use super::conn_pool_lib::{self, ConnInfo};
+use super::error::HttpCodeError;
 use super::http_util::json_response;
 use super::json::{json_to_pg_text, pg_text_row_to_json, JsonConversionError};
 use super::local_conn_pool;
@@ -238,7 +239,6 @@ fn get_conn_info(
     Ok(ConnInfoWithAuth { conn_info, auth })
 }
 
-// TODO: return different http error codes
 pub(crate) async fn handle(
     config: &'static ProxyConfig,
     ctx: RequestMonitoring,
@@ -319,9 +319,8 @@ pub(crate) async fn handle(
                 "forwarding error to user"
             );
 
-            // TODO: this shouldn't always be bad request.
             json_response(
-                StatusCode::BAD_REQUEST,
+                e.get_http_status_code(),
                 json!({
                     "message": message,
                     "code": code,
@@ -405,6 +404,25 @@ impl UserFacingError for SqlOverHttpError {
     }
 }
 
+impl HttpCodeError for SqlOverHttpError {
+    fn get_http_status_code(&self) -> StatusCode {
+        match self {
+            SqlOverHttpError::ReadPayload(_) => StatusCode::BAD_REQUEST,
+            SqlOverHttpError::ConnectCompute(h) => match h.get_error_kind() {
+                ErrorKind::User => StatusCode::BAD_REQUEST,
+                _ => StatusCode::INTERNAL_SERVER_ERROR,
+            },
+            SqlOverHttpError::ConnInfo(_) => StatusCode::BAD_REQUEST,
+            SqlOverHttpError::RequestTooLarge(_) => StatusCode::PAYLOAD_TOO_LARGE,
+            SqlOverHttpError::ResponseTooLarge(_) => StatusCode::INSUFFICIENT_STORAGE,
+            SqlOverHttpError::InvalidIsolationLevel => StatusCode::BAD_REQUEST,
+            SqlOverHttpError::Postgres(_) => StatusCode::BAD_REQUEST,
+            SqlOverHttpError::JsonConversion(_) => StatusCode::INTERNAL_SERVER_ERROR,
+            SqlOverHttpError::Cancelled(_) => StatusCode::INTERNAL_SERVER_ERROR,
+        }
+    }
+}
+
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum ReadPayloadError {
     #[error("could not read the HTTP request body: {0}")]
diff --git a/pyproject.toml b/pyproject.toml
index 862ed49638..faa5f9123c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ backoff = "^2.2.1"
 pytest-lazy-fixture = "^0.6.3"
 prometheus-client = "^0.14.1"
 pytest-timeout = "^2.1.0"
-Werkzeug = "^3.0.3"
+Werkzeug = "^3.0.6"
 pytest-order = "^1.1.0"
 allure-pytest = "^2.13.2"
 pytest-asyncio = "^0.21.0"
@@ -42,6 +42,9 @@ pytest-repeat = "^0.9.3"
 websockets = "^12.0"
 clickhouse-connect = "^0.7.16"
 kafka-python = "^2.0.2"
+jwcrypto = "^1.5.6"
+h2 = "^4.1.0"
+types-jwcrypto = "^1.5.0.20240925"
 
 [tool.poetry.group.dev.dependencies]
 mypy = "==1.3.0"
diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs
index b4590fe3e5..df68f8a68e 100644
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -262,14 +262,6 @@ async fn timeline_snapshot_handler(request: Request<Body>) -> Result<Response<Bo
     check_permission(&request, Some(ttid.tenant_id))?;
 
     let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
-    // Note: with evicted timelines it should work better then de-evict them and
-    // stream; probably start_snapshot would copy partial s3 file to dest path
-    // and stream control file, or return WalResidentTimeline if timeline is not
-    // evicted.
-    let tli = tli
-        .wal_residence_guard()
-        .await
-        .map_err(ApiError::InternalServerError)?;
 
     // To stream the body use wrap_stream which wants Stream of Result<Bytes>,
     // so create the chan and write to it in another task.
diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs
index c7f5165f90..c700e18cc7 100644
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize};
 use std::{
     cmp::min,
     io::{self, ErrorKind},
+    sync::Arc,
 };
 use tokio::{fs::OpenOptions, io::AsyncWrite, sync::mpsc, task};
 use tokio_tar::{Archive, Builder, Header};
@@ -25,8 +26,8 @@ use crate::{
         routes::TimelineStatus,
     },
     safekeeper::Term,
-    state::TimelinePersistentState,
-    timeline::WalResidentTimeline,
+    state::{EvictionState, TimelinePersistentState},
+    timeline::{Timeline, WalResidentTimeline},
     timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline},
     wal_backup,
     wal_storage::open_wal_file,
@@ -43,18 +44,33 @@ use utils::{
 /// Stream tar archive of timeline to tx.
 #[instrument(name = "snapshot", skip_all, fields(ttid = %tli.ttid))]
 pub async fn stream_snapshot(
-    tli: WalResidentTimeline,
+    tli: Arc<Timeline>,
     source: NodeId,
     destination: NodeId,
     tx: mpsc::Sender<Result<Bytes>>,
 ) {
-    if let Err(e) = stream_snapshot_guts(tli, source, destination, tx.clone()).await {
-        // Error type/contents don't matter as they won't can't reach the client
-        // (hyper likely doesn't do anything with it), but http stream will be
-        // prematurely terminated. It would be nice to try to send the error in
-        // trailers though.
-        tx.send(Err(anyhow!("snapshot failed"))).await.ok();
-        error!("snapshot failed: {:#}", e);
+    match tli.try_wal_residence_guard().await {
+        Err(e) => {
+            tx.send(Err(anyhow!("Error checking residence: {:#}", e)))
+                .await
+                .ok();
+        }
+        Ok(maybe_resident_tli) => {
+            if let Err(e) = match maybe_resident_tli {
+                Some(resident_tli) => {
+                    stream_snapshot_resident_guts(resident_tli, source, destination, tx.clone())
+                        .await
+                }
+                None => stream_snapshot_offloaded_guts(tli, source, destination, tx.clone()).await,
+            } {
+                // Error type/contents don't matter as they won't can't reach the client
+                // (hyper likely doesn't do anything with it), but http stream will be
+                // prematurely terminated. It would be nice to try to send the error in
+                // trailers though.
+                tx.send(Err(anyhow!("snapshot failed"))).await.ok();
+                error!("snapshot failed: {:#}", e);
+            }
+        }
     }
 }
 
@@ -80,12 +96,10 @@ impl Drop for SnapshotContext {
     }
 }
 
-pub async fn stream_snapshot_guts(
-    tli: WalResidentTimeline,
-    source: NodeId,
-    destination: NodeId,
+/// Build a tokio_tar stream that sends encoded bytes into a Bytes channel.
+fn prepare_tar_stream(
     tx: mpsc::Sender<Result<Bytes>>,
-) -> Result<()> {
+) -> tokio_tar::Builder<impl AsyncWrite + Unpin + Send> {
     // tokio-tar wants Write implementor, but we have mpsc tx <Result<Bytes>>;
     // use SinkWriter as a Write impl. That is,
     // - create Sink from the tx. It returns PollSendError if chan is closed.
@@ -100,12 +114,38 @@ pub async fn stream_snapshot_guts(
     // - SinkWriter (not surprisingly) wants sink of &[u8], not bytes, so wrap
     // into CopyToBytes. This is a data copy.
     let copy_to_bytes = CopyToBytes::new(oksink);
-    let mut writer = SinkWriter::new(copy_to_bytes);
-    let pinned_writer = std::pin::pin!(writer);
+    let writer = SinkWriter::new(copy_to_bytes);
+    let pinned_writer = Box::pin(writer);
 
     // Note that tokio_tar append_* funcs use tokio::io::copy with 8KB buffer
     // which is also likely suboptimal.
-    let mut ar = Builder::new_non_terminated(pinned_writer);
+    Builder::new_non_terminated(pinned_writer)
+}
+
+/// Implementation of snapshot for an offloaded timeline, only reads control file
+pub(crate) async fn stream_snapshot_offloaded_guts(
+    tli: Arc<Timeline>,
+    source: NodeId,
+    destination: NodeId,
+    tx: mpsc::Sender<Result<Bytes>>,
+) -> Result<()> {
+    let mut ar = prepare_tar_stream(tx);
+
+    tli.snapshot_offloaded(&mut ar, source, destination).await?;
+
+    ar.finish().await?;
+
+    Ok(())
+}
+
+/// Implementation of snapshot for a timeline which is resident (includes some segment data)
+pub async fn stream_snapshot_resident_guts(
+    tli: WalResidentTimeline,
+    source: NodeId,
+    destination: NodeId,
+    tx: mpsc::Sender<Result<Bytes>>,
+) -> Result<()> {
+    let mut ar = prepare_tar_stream(tx);
 
     let bctx = tli.start_snapshot(&mut ar, source, destination).await?;
     pausable_failpoint!("sk-snapshot-after-list-pausable");
@@ -138,6 +178,70 @@ pub async fn stream_snapshot_guts(
     Ok(())
 }
 
+impl Timeline {
+    /// Simple snapshot for an offloaded timeline: we will only upload a renamed partial segment and
+    /// pass a modified control file into the provided tar stream (nothing with data segments on disk, since
+    /// we are offloaded and there aren't any)
+    async fn snapshot_offloaded<W: AsyncWrite + Unpin + Send>(
+        self: &Arc<Timeline>,
+        ar: &mut tokio_tar::Builder<W>,
+        source: NodeId,
+        destination: NodeId,
+    ) -> Result<()> {
+        // Take initial copy of control file, then release state lock
+        let mut control_file = {
+            let shared_state = self.write_shared_state().await;
+
+            let control_file = TimelinePersistentState::clone(shared_state.sk.state());
+
+            // Rare race: we got unevicted between entering function and reading control file.
+            // We error out and let API caller retry.
+            if !matches!(control_file.eviction_state, EvictionState::Offloaded(_)) {
+                bail!("Timeline was un-evicted during snapshot, please retry");
+            }
+
+            control_file
+        };
+
+        // Modify the partial segment of the in-memory copy for the control file to
+        // point to the destination safekeeper.
+        let replace = control_file
+            .partial_backup
+            .replace_uploaded_segment(source, destination)?;
+
+        let Some(replace) = replace else {
+            // In Manager:: ready_for_eviction, we do not permit eviction unless the timeline
+            // has a partial segment.  It is unexpected that
+            anyhow::bail!("Timeline has no partial segment, cannot generate snapshot");
+        };
+
+        tracing::info!("Replacing uploaded partial segment in in-mem control file: {replace:?}");
+
+        // Optimistically try to copy the partial segment to the destination's path: this
+        // can fail if the timeline was un-evicted and modified in the background.
+        let remote_timeline_path = &self.remote_path;
+        wal_backup::copy_partial_segment(
+            &replace.previous.remote_path(remote_timeline_path),
+            &replace.current.remote_path(remote_timeline_path),
+        )
+        .await?;
+
+        // Since the S3 copy succeeded with the path given in our control file snapshot, and
+        // we are sending that snapshot in our response, we are giving the caller a consistent
+        // snapshot even if our local Timeline was unevicted or otherwise modified in the meantime.
+        let buf = control_file
+            .write_to_buf()
+            .with_context(|| "failed to serialize control store")?;
+        let mut header = Header::new_gnu();
+        header.set_size(buf.len().try_into().expect("never breaches u64"));
+        ar.append_data(&mut header, CONTROL_FILE_NAME, buf.as_slice())
+            .await
+            .with_context(|| "failed to append to archive")?;
+
+        Ok(())
+    }
+}
+
 impl WalResidentTimeline {
     /// Start streaming tar archive with timeline:
     /// 1) stream control file under lock;
diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs
index 3dbf72298f..f97e127a17 100644
--- a/safekeeper/src/receive_wal.rs
+++ b/safekeeper/src/receive_wal.rs
@@ -21,18 +21,15 @@ use postgres_backend::QueryError;
 use pq_proto::BeMessage;
 use serde::Deserialize;
 use serde::Serialize;
+use std::future;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use tokio::io::AsyncRead;
 use tokio::io::AsyncWrite;
-use tokio::sync::mpsc::channel;
-use tokio::sync::mpsc::error::TryRecvError;
-use tokio::sync::mpsc::Receiver;
-use tokio::sync::mpsc::Sender;
+use tokio::sync::mpsc::{channel, Receiver, Sender};
 use tokio::task;
 use tokio::task::JoinHandle;
-use tokio::time::Duration;
-use tokio::time::Instant;
+use tokio::time::{Duration, MissedTickBehavior};
 use tracing::*;
 use utils::id::TenantTimelineId;
 use utils::lsn::Lsn;
@@ -444,9 +441,9 @@ async fn network_write<IO: AsyncRead + AsyncWrite + Unpin>(
     }
 }
 
-// Send keepalive messages to walproposer, to make sure it receives updates
-// even when it writes a steady stream of messages.
-const KEEPALIVE_INTERVAL: Duration = Duration::from_secs(1);
+/// The WAL flush interval. This ensures we periodically flush the WAL and send AppendResponses to
+/// walproposer, even when it's writing a steady stream of messages.
+const FLUSH_INTERVAL: Duration = Duration::from_secs(1);
 
 /// Encapsulates a task which takes messages from msg_rx, processes and pushes
 /// replies to reply_tx.
@@ -494,67 +491,76 @@ impl WalAcceptor {
     async fn run(&mut self) -> anyhow::Result<()> {
         let walreceiver_guard = self.tli.get_walreceivers().register(self.conn_id);
 
-        // After this timestamp we will stop processing AppendRequests and send a response
-        // to the walproposer. walproposer sends at least one AppendRequest per second,
-        // we will send keepalives by replying to these requests once per second.
-        let mut next_keepalive = Instant::now();
+        // Periodically flush the WAL.
+        let mut flush_ticker = tokio::time::interval(FLUSH_INTERVAL);
+        flush_ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
+        flush_ticker.tick().await; // skip the initial, immediate tick
 
-        while let Some(mut next_msg) = self.msg_rx.recv().await {
-            // Update walreceiver state in shmem for reporting.
-            if let ProposerAcceptorMessage::Elected(_) = &next_msg {
-                walreceiver_guard.get().status = WalReceiverStatus::Streaming;
-            }
+        // Tracks unflushed appends.
+        let mut dirty = false;
 
-            let reply_msg = if matches!(next_msg, ProposerAcceptorMessage::AppendRequest(_)) {
-                // Loop through AppendRequests while available to write as many WAL records as
-                // possible without fsyncing.
-                //
-                // Make sure the WAL is flushed before returning, see:
-                // https://github.com/neondatabase/neon/issues/9259
-                //
-                // Note: this will need to be rewritten if we want to read non-AppendRequest messages here.
-                // Otherwise, we might end up in a situation where we read a message, but don't
-                // process it.
-                while let ProposerAcceptorMessage::AppendRequest(append_request) = next_msg {
-                    let noflush_msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request);
-
-                    if let Some(reply) = self.tli.process_msg(&noflush_msg).await? {
-                        if self.reply_tx.send(reply).await.is_err() {
-                            break; // disconnected, flush WAL and return on next send/recv
-                        }
-                    }
-
-                    // get out of this loop if keepalive time is reached
-                    if Instant::now() >= next_keepalive {
+        loop {
+            let reply = tokio::select! {
+                // Process inbound message.
+                msg = self.msg_rx.recv() => {
+                    // If disconnected, break to flush WAL and return.
+                    let Some(mut msg) = msg else {
                         break;
+                    };
+
+                    // Update walreceiver state in shmem for reporting.
+                    if let ProposerAcceptorMessage::Elected(_) = &msg {
+                        walreceiver_guard.get().status = WalReceiverStatus::Streaming;
                     }
 
-                    // continue pulling AppendRequests if available
-                    match self.msg_rx.try_recv() {
-                        Ok(msg) => next_msg = msg,
-                        Err(TryRecvError::Empty) => break,
-                        // on disconnect, flush WAL and return on next send/recv
-                        Err(TryRecvError::Disconnected) => break,
-                    };
+                    // Don't flush the WAL on every append, only periodically via flush_ticker.
+                    // This batches multiple appends per fsync. If the channel is empty after
+                    // sending the reply, we'll schedule an immediate flush.
+                    if let ProposerAcceptorMessage::AppendRequest(append_request) = msg {
+                        msg = ProposerAcceptorMessage::NoFlushAppendRequest(append_request);
+                        dirty = true;
+                    }
+
+                    self.tli.process_msg(&msg).await?
                 }
 
-                // flush all written WAL to the disk
-                self.tli
-                    .process_msg(&ProposerAcceptorMessage::FlushWAL)
-                    .await?
-            } else {
-                // process message other than AppendRequest
-                self.tli.process_msg(&next_msg).await?
+                // While receiving AppendRequests, flush the WAL periodically and respond with an
+                // AppendResponse to let walproposer know we're still alive.
+                _ = flush_ticker.tick(), if dirty => {
+                    dirty = false;
+                    self.tli
+                        .process_msg(&ProposerAcceptorMessage::FlushWAL)
+                        .await?
+                }
+
+                // If there are no pending messages, flush the WAL immediately.
+                //
+                // TODO: this should be done via flush_ticker.reset_immediately(), but that's always
+                // delayed by 1ms due to this bug: https://github.com/tokio-rs/tokio/issues/6866.
+                _ = future::ready(()), if dirty && self.msg_rx.is_empty() => {
+                    dirty = false;
+                    flush_ticker.reset();
+                    self.tli
+                        .process_msg(&ProposerAcceptorMessage::FlushWAL)
+                        .await?
+                }
             };
 
-            if let Some(reply) = reply_msg {
+            // Send reply, if any.
+            if let Some(reply) = reply {
                 if self.reply_tx.send(reply).await.is_err() {
-                    return Ok(()); // chan closed, streaming terminated
+                    break; // disconnected, break to flush WAL and return
                 }
-                // reset keepalive time
-                next_keepalive = Instant::now() + KEEPALIVE_INTERVAL;
             }
         }
+
+        // Flush WAL on disconnect, see https://github.com/neondatabase/neon/issues/9259.
+        if dirty {
+            self.tli
+                .process_msg(&ProposerAcceptorMessage::FlushWAL)
+                .await?;
+        }
+
         Ok(())
     }
 }
diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs
index c737dfcf9b..f0113978c4 100644
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -797,14 +797,17 @@ impl Timeline {
         state.sk.term_bump(to).await
     }
 
-    /// Get the timeline guard for reading/writing WAL files.
-    /// If WAL files are not present on disk (evicted), they will be automatically
-    /// downloaded from remote storage. This is done in the manager task, which is
-    /// responsible for issuing all guards.
-    ///
-    /// NB: don't use this function from timeline_manager, it will deadlock.
-    /// NB: don't use this function while holding shared_state lock.
-    pub async fn wal_residence_guard(self: &Arc<Self>) -> Result<WalResidentTimeline> {
+    /// Guts of [`Self::wal_residence_guard`] and [`Self::try_wal_residence_guard`]
+    async fn do_wal_residence_guard(
+        self: &Arc<Self>,
+        block: bool,
+    ) -> Result<Option<WalResidentTimeline>> {
+        let op_label = if block {
+            "wal_residence_guard"
+        } else {
+            "try_wal_residence_guard"
+        };
+
         if self.is_cancelled() {
             bail!(TimelineError::Cancelled(self.ttid));
         }
@@ -816,10 +819,13 @@ impl Timeline {
         // Wait 30 seconds for the guard to be acquired. It can time out if someone is
         // holding the lock (e.g. during `SafeKeeper::process_msg()`) or manager task
         // is stuck.
-        let res = tokio::time::timeout_at(
-            started_at + Duration::from_secs(30),
-            self.manager_ctl.wal_residence_guard(),
-        )
+        let res = tokio::time::timeout_at(started_at + Duration::from_secs(30), async {
+            if block {
+                self.manager_ctl.wal_residence_guard().await.map(Some)
+            } else {
+                self.manager_ctl.try_wal_residence_guard().await
+            }
+        })
         .await;
 
         let guard = match res {
@@ -827,14 +833,14 @@ impl Timeline {
                 let finished_at = Instant::now();
                 let elapsed = finished_at - started_at;
                 MISC_OPERATION_SECONDS
-                    .with_label_values(&["wal_residence_guard"])
+                    .with_label_values(&[op_label])
                     .observe(elapsed.as_secs_f64());
 
                 guard
             }
             Ok(Err(e)) => {
                 warn!(
-                    "error while acquiring WalResidentTimeline guard, statuses {:?} => {:?}",
+                    "error acquiring in {op_label}, statuses {:?} => {:?}",
                     status_before,
                     self.mgr_status.get()
                 );
@@ -842,7 +848,7 @@ impl Timeline {
             }
             Err(_) => {
                 warn!(
-                    "timeout while acquiring WalResidentTimeline guard, statuses {:?} => {:?}",
+                    "timeout acquiring in {op_label} guard, statuses {:?} => {:?}",
                     status_before,
                     self.mgr_status.get()
                 );
@@ -850,7 +856,28 @@ impl Timeline {
             }
         };
 
-        Ok(WalResidentTimeline::new(self.clone(), guard))
+        Ok(guard.map(|g| WalResidentTimeline::new(self.clone(), g)))
+    }
+
+    /// Get the timeline guard for reading/writing WAL files.
+    /// If WAL files are not present on disk (evicted), they will be automatically
+    /// downloaded from remote storage. This is done in the manager task, which is
+    /// responsible for issuing all guards.
+    ///
+    /// NB: don't use this function from timeline_manager, it will deadlock.
+    /// NB: don't use this function while holding shared_state lock.
+    pub async fn wal_residence_guard(self: &Arc<Self>) -> Result<WalResidentTimeline> {
+        self.do_wal_residence_guard(true)
+            .await
+            .map(|m| m.expect("Always get Some in block=true mode"))
+    }
+
+    /// Get the timeline guard for reading/writing WAL files if the timeline is resident,
+    /// else return None
+    pub(crate) async fn try_wal_residence_guard(
+        self: &Arc<Self>,
+    ) -> Result<Option<WalResidentTimeline>> {
+        self.do_wal_residence_guard(false).await
     }
 
     pub async fn backup_partial_reset(self: &Arc<Self>) -> Result<Vec<String>> {
diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs
index f5363ae9b0..303421c837 100644
--- a/safekeeper/src/timeline_eviction.rs
+++ b/safekeeper/src/timeline_eviction.rs
@@ -56,6 +56,9 @@ impl Manager {
             // This also works for the first segment despite last_removed_segno
             // being 0 on init because this 0 triggers run of wal_removal_task
             // on success of which manager updates the horizon.
+            //
+            // **Note** pull_timeline functionality assumes that evicted timelines always have
+            // a partial segment: if we ever change this condition, must also update that code.
             && self
                 .partial_backup_uploaded
                 .as_ref()
diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs
index f0583dd3ff..79200fff8d 100644
--- a/safekeeper/src/timeline_manager.rs
+++ b/safekeeper/src/timeline_manager.rs
@@ -100,6 +100,8 @@ const REFRESH_INTERVAL: Duration = Duration::from_millis(300);
 pub enum ManagerCtlMessage {
     /// Request to get a guard for WalResidentTimeline, with WAL files available locally.
     GuardRequest(tokio::sync::oneshot::Sender<anyhow::Result<ResidenceGuard>>),
+    /// Get a guard for WalResidentTimeline if the timeline is not currently offloaded, else None
+    TryGuardRequest(tokio::sync::oneshot::Sender<Option<ResidenceGuard>>),
     /// Request to drop the guard.
     GuardDrop(GuardId),
     /// Request to reset uploaded partial backup state.
@@ -110,6 +112,7 @@ impl std::fmt::Debug for ManagerCtlMessage {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             ManagerCtlMessage::GuardRequest(_) => write!(f, "GuardRequest"),
+            ManagerCtlMessage::TryGuardRequest(_) => write!(f, "TryGuardRequest"),
             ManagerCtlMessage::GuardDrop(id) => write!(f, "GuardDrop({:?})", id),
             ManagerCtlMessage::BackupPartialReset(_) => write!(f, "BackupPartialReset"),
         }
@@ -152,6 +155,19 @@ impl ManagerCtl {
             .and_then(std::convert::identity)
     }
 
+    /// Issue a new guard if the timeline is currently not offloaded, else return None
+    /// Sends a message to the manager and waits for the response.
+    /// Can be blocked indefinitely if the manager is stuck.
+    pub async fn try_wal_residence_guard(&self) -> anyhow::Result<Option<ResidenceGuard>> {
+        let (tx, rx) = tokio::sync::oneshot::channel();
+        self.manager_tx
+            .send(ManagerCtlMessage::TryGuardRequest(tx))?;
+
+        // wait for the manager to respond with the guard
+        rx.await
+            .map_err(|e| anyhow::anyhow!("response read fail: {:?}", e))
+    }
+
     /// Request timeline manager to reset uploaded partial segment state and
     /// wait for the result.
     pub async fn backup_partial_reset(&self) -> anyhow::Result<Vec<String>> {
@@ -674,6 +690,17 @@ impl Manager {
                     warn!("failed to reply with a guard, receiver dropped");
                 }
             }
+            Some(ManagerCtlMessage::TryGuardRequest(tx)) => {
+                let result = if self.is_offloaded {
+                    None
+                } else {
+                    Some(self.access_service.create_guard())
+                };
+
+                if tx.send(result).is_err() {
+                    warn!("failed to reply with a guard, receiver dropped");
+                }
+            }
             Some(ManagerCtlMessage::GuardDrop(guard_id)) => {
                 self.access_service.drop_guard(guard_id);
             }
diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs
index afefe8598c..face3d2c2d 100644
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -968,6 +968,28 @@ async fn handle_tenant_shard_migrate(
     )
 }
 
+async fn handle_tenant_shard_cancel_reconcile(
+    service: Arc<Service>,
+    req: Request<Body>,
+) -> Result<Response<Body>, ApiError> {
+    check_permissions(&req, Scope::Admin)?;
+
+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
+    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
+    json_response(
+        StatusCode::OK,
+        service
+            .tenant_shard_cancel_reconcile(tenant_shard_id)
+            .await?,
+    )
+}
+
 async fn handle_tenant_update_policy(req: Request<Body>) -> Result<Response<Body>, ApiError> {
     check_permissions(&req, Scope::Admin)?;
 
@@ -1776,6 +1798,16 @@ pub fn make_router(
                 RequestName("control_v1_tenant_migrate"),
             )
         })
+        .put(
+            "/control/v1/tenant/:tenant_shard_id/cancel_reconcile",
+            |r| {
+                tenant_service_handler(
+                    r,
+                    handle_tenant_shard_cancel_reconcile,
+                    RequestName("control_v1_tenant_cancel_reconcile"),
+                )
+            },
+        )
         .put("/control/v1/tenant/:tenant_id/shard_split", |r| {
             tenant_service_handler(
                 r,
diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs
index a2a6e63dd2..3f6cbfef59 100644
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -4834,6 +4834,43 @@ impl Service {
         Ok(TenantShardMigrateResponse {})
     }
 
+    /// 'cancel' in this context means cancel any ongoing reconcile
+    pub(crate) async fn tenant_shard_cancel_reconcile(
+        &self,
+        tenant_shard_id: TenantShardId,
+    ) -> Result<(), ApiError> {
+        // Take state lock and fire the cancellation token, after which we drop lock and wait for any ongoing reconcile to complete
+        let waiter = {
+            let locked = self.inner.write().unwrap();
+            let Some(shard) = locked.tenants.get(&tenant_shard_id) else {
+                return Err(ApiError::NotFound(
+                    anyhow::anyhow!("Tenant shard not found").into(),
+                ));
+            };
+
+            let waiter = shard.get_waiter();
+            match waiter {
+                None => {
+                    tracing::info!("Shard does not have an ongoing Reconciler");
+                    return Ok(());
+                }
+                Some(waiter) => {
+                    tracing::info!("Cancelling Reconciler");
+                    shard.cancel_reconciler();
+                    waiter
+                }
+            }
+        };
+
+        // Cancellation should be prompt.  If this fails we have still done our job of firing the
+        // cancellation token, but by returning an ApiError we will indicate to the caller that
+        // the Reconciler is misbehaving and not respecting the cancellation token
+        self.await_waiters(vec![waiter], SHORT_RECONCILE_TIMEOUT)
+            .await?;
+
+        Ok(())
+    }
+
     /// This is for debug/support only: we simply drop all state for a tenant, without
     /// detaching or deleting it on pageservers.
     pub(crate) async fn tenant_drop(&self, tenant_id: TenantId) -> Result<(), ApiError> {
@@ -4921,16 +4958,7 @@ impl Service {
                     stripe_size,
                 },
                 placement_policy: Some(PlacementPolicy::Attached(0)), // No secondaries, for convenient debug/hacking
-
-                // There is no way to know what the tenant's config was: revert to defaults
-                //
-                // TODO: remove `switch_aux_file_policy` once we finish auxv2 migration
-                //
-                // we write to both v1+v2 storage, so that the test case can use either storage format for testing
-                config: TenantConfig {
-                    switch_aux_file_policy: Some(models::AuxFilePolicy::CrossValidation),
-                    ..TenantConfig::default()
-                },
+                config: TenantConfig::default(),
             })
             .await?;
 
diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs
index e696c72ba7..27c97d3b86 100644
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -1317,6 +1317,12 @@ impl TenantShard {
         })
     }
 
+    pub(crate) fn cancel_reconciler(&self) {
+        if let Some(handle) = self.reconciler.as_ref() {
+            handle.cancel.cancel()
+        }
+    }
+
     /// Get a waiter for any reconciliation in flight, but do not start reconciliation
     /// if it is not already running
     pub(crate) fn get_waiter(&self) -> Option<ReconcilerWaiter> {
diff --git a/test_runner/conftest.py b/test_runner/conftest.py
index 4a3194c691..84eda52d33 100644
--- a/test_runner/conftest.py
+++ b/test_runner/conftest.py
@@ -3,6 +3,7 @@ from __future__ import annotations
 pytest_plugins = (
     "fixtures.pg_version",
     "fixtures.parametrize",
+    "fixtures.h2server",
     "fixtures.httpserver",
     "fixtures.compute_reconfigure",
     "fixtures.storage_controller_proxy",
diff --git a/test_runner/fixtures/h2server.py b/test_runner/fixtures/h2server.py
new file mode 100644
index 0000000000..92783e1fb2
--- /dev/null
+++ b/test_runner/fixtures/h2server.py
@@ -0,0 +1,198 @@
+"""
+https://python-hyper.org/projects/hyper-h2/en/stable/asyncio-example.html
+
+auth-broker -> local-proxy needs a h2 connection, so we need a h2 server :)
+"""
+
+import asyncio
+import collections
+import io
+import json
+from collections.abc import AsyncIterable
+
+import pytest_asyncio
+from h2.config import H2Configuration
+from h2.connection import H2Connection
+from h2.errors import ErrorCodes
+from h2.events import (
+    ConnectionTerminated,
+    DataReceived,
+    RemoteSettingsChanged,
+    RequestReceived,
+    StreamEnded,
+    StreamReset,
+    WindowUpdated,
+)
+from h2.exceptions import ProtocolError, StreamClosedError
+from h2.settings import SettingCodes
+
+RequestData = collections.namedtuple("RequestData", ["headers", "data"])
+
+
+class H2Server:
+    def __init__(self, host, port) -> None:
+        self.host = host
+        self.port = port
+
+
+class H2Protocol(asyncio.Protocol):
+    def __init__(self):
+        config = H2Configuration(client_side=False, header_encoding="utf-8")
+        self.conn = H2Connection(config=config)
+        self.transport = None
+        self.stream_data = {}
+        self.flow_control_futures = {}
+
+    def connection_made(self, transport: asyncio.Transport):  # type: ignore[override]
+        self.transport = transport
+        self.conn.initiate_connection()
+        self.transport.write(self.conn.data_to_send())
+
+    def connection_lost(self, _exc):
+        for future in self.flow_control_futures.values():
+            future.cancel()
+        self.flow_control_futures = {}
+
+    def data_received(self, data: bytes):
+        assert self.transport is not None
+        try:
+            events = self.conn.receive_data(data)
+        except ProtocolError:
+            self.transport.write(self.conn.data_to_send())
+            self.transport.close()
+        else:
+            self.transport.write(self.conn.data_to_send())
+            for event in events:
+                if isinstance(event, RequestReceived):
+                    self.request_received(event.headers, event.stream_id)
+                elif isinstance(event, DataReceived):
+                    self.receive_data(event.data, event.stream_id)
+                elif isinstance(event, StreamEnded):
+                    self.stream_complete(event.stream_id)
+                elif isinstance(event, ConnectionTerminated):
+                    self.transport.close()
+                elif isinstance(event, StreamReset):
+                    self.stream_reset(event.stream_id)
+                elif isinstance(event, WindowUpdated):
+                    self.window_updated(event.stream_id, event.delta)
+                elif isinstance(event, RemoteSettingsChanged):
+                    if SettingCodes.INITIAL_WINDOW_SIZE in event.changed_settings:
+                        self.window_updated(None, 0)
+
+                self.transport.write(self.conn.data_to_send())
+
+    def request_received(self, headers: list[tuple[str, str]], stream_id: int):
+        headers_map = collections.OrderedDict(headers)
+
+        # Store off the request data.
+        request_data = RequestData(headers_map, io.BytesIO())
+        self.stream_data[stream_id] = request_data
+
+    def stream_complete(self, stream_id: int):
+        """
+        When a stream is complete, we can send our response.
+        """
+        try:
+            request_data = self.stream_data[stream_id]
+        except KeyError:
+            # Just return, we probably 405'd this already
+            return
+
+        headers = request_data.headers
+        body = request_data.data.getvalue().decode("utf-8")
+
+        data = json.dumps({"headers": headers, "body": body}, indent=4).encode("utf8")
+
+        response_headers = (
+            (":status", "200"),
+            ("content-type", "application/json"),
+            ("content-length", str(len(data))),
+        )
+        self.conn.send_headers(stream_id, response_headers)
+        asyncio.ensure_future(self.send_data(data, stream_id))
+
+    def receive_data(self, data: bytes, stream_id: int):
+        """
+        We've received some data on a stream. If that stream is one we're
+        expecting data on, save it off. Otherwise, reset the stream.
+        """
+        try:
+            stream_data = self.stream_data[stream_id]
+        except KeyError:
+            self.conn.reset_stream(stream_id, error_code=ErrorCodes.PROTOCOL_ERROR)
+        else:
+            stream_data.data.write(data)
+
+    def stream_reset(self, stream_id):
+        """
+        A stream reset was sent. Stop sending data.
+        """
+        if stream_id in self.flow_control_futures:
+            future = self.flow_control_futures.pop(stream_id)
+            future.cancel()
+
+    async def send_data(self, data, stream_id):
+        """
+        Send data according to the flow control rules.
+        """
+        while data:
+            while self.conn.local_flow_control_window(stream_id) < 1:
+                try:
+                    await self.wait_for_flow_control(stream_id)
+                except asyncio.CancelledError:
+                    return
+
+            chunk_size = min(
+                self.conn.local_flow_control_window(stream_id),
+                len(data),
+                self.conn.max_outbound_frame_size,
+            )
+
+            try:
+                self.conn.send_data(
+                    stream_id, data[:chunk_size], end_stream=(chunk_size == len(data))
+                )
+            except (StreamClosedError, ProtocolError):
+                # The stream got closed and we didn't get told. We're done
+                # here.
+                break
+
+            assert self.transport is not None
+            self.transport.write(self.conn.data_to_send())
+            data = data[chunk_size:]
+
+    async def wait_for_flow_control(self, stream_id):
+        """
+        Waits for a Future that fires when the flow control window is opened.
+        """
+        f: asyncio.Future[None] = asyncio.Future()
+        self.flow_control_futures[stream_id] = f
+        await f
+
+    def window_updated(self, stream_id, delta):
+        """
+        A window update frame was received. Unblock some number of flow control
+        Futures.
+        """
+        if stream_id and stream_id in self.flow_control_futures:
+            f = self.flow_control_futures.pop(stream_id)
+            f.set_result(delta)
+        elif not stream_id:
+            for f in self.flow_control_futures.values():
+                f.set_result(delta)
+
+            self.flow_control_futures = {}
+
+
+@pytest_asyncio.fixture(scope="function")
+async def http2_echoserver() -> AsyncIterable[H2Server]:
+    loop = asyncio.get_event_loop()
+    serve = await loop.create_server(H2Protocol, "127.0.0.1", 0)
+    (host, port) = serve.sockets[0].getsockname()
+
+    asyncio.create_task(serve.wait_closed())
+
+    server = H2Server(host, port)
+    yield server
+
+    serve.close()
diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py
index 1b2767e296..d220ea57a2 100644
--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -16,7 +16,6 @@ from fixtures.common_types import Lsn, TenantId, TimelineId
 from fixtures.log_helper import log
 from fixtures.pageserver.common_types import IndexPartDump
 from fixtures.pg_version import PgVersion
-from fixtures.utils import AuxFileStore
 
 if TYPE_CHECKING:
     from typing import (
@@ -201,7 +200,6 @@ class NeonLocalCli(AbstractNeonCli):
         shard_stripe_size: Optional[int] = None,
         placement_policy: Optional[str] = None,
         set_default: bool = False,
-        aux_file_policy: Optional[AuxFileStore] = None,
     ):
         """
         Creates a new tenant, returns its id and its initial timeline's id.
@@ -223,13 +221,6 @@ class NeonLocalCli(AbstractNeonCli):
                 )
             )
 
-        if aux_file_policy is AuxFileStore.V2:
-            args.extend(["-c", "switch_aux_file_policy:v2"])
-        elif aux_file_policy is AuxFileStore.V1:
-            args.extend(["-c", "switch_aux_file_policy:v1"])
-        elif aux_file_policy is AuxFileStore.CrossValidation:
-            args.extend(["-c", "switch_aux_file_policy:cross-validation"])
-
         if set_default:
             args.append("--set-default")
 
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 6491069f20..1b9bc873f4 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -35,11 +35,13 @@ import toml
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
+from jwcrypto import jwk
 
 # Type-related stuff
 from psycopg2.extensions import connection as PgConnection
 from psycopg2.extensions import cursor as PgCursor
 from psycopg2.extensions import make_dsn, parse_dsn
+from pytest_httpserver import HTTPServer
 from urllib3.util.retry import Retry
 
 from fixtures import overlayfs
@@ -53,6 +55,7 @@ from fixtures.common_types import (
     TimelineId,
 )
 from fixtures.endpoint.http import EndpointHttpClient
+from fixtures.h2server import H2Server
 from fixtures.log_helper import log
 from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
 from fixtures.neon_cli import NeonLocalCli, Pagectl
@@ -94,7 +97,6 @@ from fixtures.utils import (
     subprocess_capture,
     wait_until,
 )
-from fixtures.utils import AuxFileStore as AuxFileStore  # reexport
 
 from .neon_api import NeonAPI, NeonApiEndpoint
 
@@ -353,7 +355,6 @@ class NeonEnvBuilder:
         initial_tenant: Optional[TenantId] = None,
         initial_timeline: Optional[TimelineId] = None,
         pageserver_virtual_file_io_engine: Optional[str] = None,
-        pageserver_aux_file_policy: Optional[AuxFileStore] = None,
         pageserver_default_tenant_config_compaction_algorithm: Optional[dict[str, Any]] = None,
         safekeeper_extra_opts: Optional[list[str]] = None,
         storage_controller_port_override: Optional[int] = None,
@@ -405,8 +406,6 @@ class NeonEnvBuilder:
                 f"Overriding pageserver default compaction algorithm to {self.pageserver_default_tenant_config_compaction_algorithm}"
             )
 
-        self.pageserver_aux_file_policy = pageserver_aux_file_policy
-
         self.safekeeper_extra_opts = safekeeper_extra_opts
 
         self.storage_controller_port_override = storage_controller_port_override
@@ -467,7 +466,6 @@ class NeonEnvBuilder:
             timeline_id=env.initial_timeline,
             shard_count=initial_tenant_shard_count,
             shard_stripe_size=initial_tenant_shard_stripe_size,
-            aux_file_policy=self.pageserver_aux_file_policy,
         )
         assert env.initial_tenant == initial_tenant
         assert env.initial_timeline == initial_timeline
@@ -1027,7 +1025,6 @@ class NeonEnv:
         self.control_plane_compute_hook_api = config.control_plane_compute_hook_api
 
         self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
-        self.pageserver_aux_file_policy = config.pageserver_aux_file_policy
         self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode
 
         # Create the neon_local's `NeonLocalInitConf`
@@ -1323,7 +1320,6 @@ class NeonEnv:
         shard_stripe_size: Optional[int] = None,
         placement_policy: Optional[str] = None,
         set_default: bool = False,
-        aux_file_policy: Optional[AuxFileStore] = None,
     ) -> tuple[TenantId, TimelineId]:
         """
         Creates a new tenant, returns its id and its initial timeline's id.
@@ -1340,7 +1336,6 @@ class NeonEnv:
             shard_stripe_size=shard_stripe_size,
             placement_policy=placement_policy,
             set_default=set_default,
-            aux_file_policy=aux_file_policy,
         )
 
         return tenant_id, timeline_id
@@ -1398,7 +1393,6 @@ def neon_simple_env(
     compatibility_pg_distrib_dir: Path,
     pg_version: PgVersion,
     pageserver_virtual_file_io_engine: str,
-    pageserver_aux_file_policy: Optional[AuxFileStore],
     pageserver_default_tenant_config_compaction_algorithm: Optional[dict[str, Any]],
     pageserver_virtual_file_io_mode: Optional[str],
 ) -> Iterator[NeonEnv]:
@@ -1431,7 +1425,6 @@ def neon_simple_env(
         test_name=request.node.name,
         test_output_dir=test_output_dir,
         pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
-        pageserver_aux_file_policy=pageserver_aux_file_policy,
         pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
         pageserver_virtual_file_io_mode=pageserver_virtual_file_io_mode,
         combination=combination,
@@ -1458,7 +1451,6 @@ def neon_env_builder(
     top_output_dir: Path,
     pageserver_virtual_file_io_engine: str,
     pageserver_default_tenant_config_compaction_algorithm: Optional[dict[str, Any]],
-    pageserver_aux_file_policy: Optional[AuxFileStore],
     record_property: Callable[[str, object], None],
     pageserver_virtual_file_io_mode: Optional[str],
 ) -> Iterator[NeonEnvBuilder]:
@@ -1501,7 +1493,6 @@ def neon_env_builder(
         test_name=request.node.name,
         test_output_dir=test_output_dir,
         test_overlay_dir=test_overlay_dir,
-        pageserver_aux_file_policy=pageserver_aux_file_policy,
         pageserver_default_tenant_config_compaction_algorithm=pageserver_default_tenant_config_compaction_algorithm,
         pageserver_virtual_file_io_mode=pageserver_virtual_file_io_mode,
     ) as builder:
@@ -3092,6 +3083,31 @@ class PSQL:
         )
 
 
+def generate_proxy_tls_certs(common_name: str, key_path: Path, crt_path: Path):
+    if not key_path.exists():
+        r = subprocess.run(
+            [
+                "openssl",
+                "req",
+                "-new",
+                "-x509",
+                "-days",
+                "365",
+                "-nodes",
+                "-text",
+                "-out",
+                str(crt_path),
+                "-keyout",
+                str(key_path),
+                "-subj",
+                f"/CN={common_name}",
+                "-addext",
+                f"subjectAltName = DNS:{common_name}",
+            ]
+        )
+        assert r.returncode == 0
+
+
 class NeonProxy(PgProtocol):
     link_auth_uri: str = "http://dummy-uri"
 
@@ -3190,29 +3206,7 @@ class NeonProxy(PgProtocol):
         # generate key of it doesn't exist
         crt_path = self.test_output_dir / "proxy.crt"
         key_path = self.test_output_dir / "proxy.key"
-
-        if not key_path.exists():
-            r = subprocess.run(
-                [
-                    "openssl",
-                    "req",
-                    "-new",
-                    "-x509",
-                    "-days",
-                    "365",
-                    "-nodes",
-                    "-text",
-                    "-out",
-                    str(crt_path),
-                    "-keyout",
-                    str(key_path),
-                    "-subj",
-                    "/CN=*.localtest.me",
-                    "-addext",
-                    "subjectAltName = DNS:*.localtest.me",
-                ]
-            )
-            assert r.returncode == 0
+        generate_proxy_tls_certs("*.localtest.me", key_path, crt_path)
 
         args = [
             str(self.neon_binpath / "proxy"),
@@ -3392,6 +3386,125 @@ class NeonProxy(PgProtocol):
         assert out == "ok"
 
 
+class NeonAuthBroker:
+    class ControlPlane:
+        def __init__(self, endpoint: str):
+            self.endpoint = endpoint
+
+        def extra_args(self) -> list[str]:
+            args = [
+                *["--auth-backend", "console"],
+                *["--auth-endpoint", self.endpoint],
+            ]
+            return args
+
+    def __init__(
+        self,
+        neon_binpath: Path,
+        test_output_dir: Path,
+        http_port: int,
+        mgmt_port: int,
+        external_http_port: int,
+        auth_backend: NeonAuthBroker.ControlPlane,
+    ):
+        self.domain = "apiauth.localtest.me"  # resolves to 127.0.0.1
+        self.host = "127.0.0.1"
+        self.http_port = http_port
+        self.external_http_port = external_http_port
+        self.neon_binpath = neon_binpath
+        self.test_output_dir = test_output_dir
+        self.mgmt_port = mgmt_port
+        self.auth_backend = auth_backend
+        self.http_timeout_seconds = 15
+        self._popen: Optional[subprocess.Popen[bytes]] = None
+
+    def start(self) -> NeonAuthBroker:
+        assert self._popen is None
+
+        # generate key of it doesn't exist
+        crt_path = self.test_output_dir / "proxy.crt"
+        key_path = self.test_output_dir / "proxy.key"
+        generate_proxy_tls_certs("apiauth.localtest.me", key_path, crt_path)
+
+        args = [
+            str(self.neon_binpath / "proxy"),
+            *["--http", f"{self.host}:{self.http_port}"],
+            *["--mgmt", f"{self.host}:{self.mgmt_port}"],
+            *["--wss", f"{self.host}:{self.external_http_port}"],
+            *["-c", str(crt_path)],
+            *["-k", str(key_path)],
+            *["--sql-over-http-pool-opt-in", "false"],
+            *["--is-auth-broker", "true"],
+            *self.auth_backend.extra_args(),
+        ]
+
+        logfile = open(self.test_output_dir / "proxy.log", "w")
+        self._popen = subprocess.Popen(args, stdout=logfile, stderr=logfile)
+        self._wait_until_ready()
+        return self
+
+    # Sends SIGTERM to the proxy if it has been started
+    def terminate(self):
+        if self._popen:
+            self._popen.terminate()
+
+    # Waits for proxy to exit if it has been opened with a default timeout of
+    # two seconds. Raises subprocess.TimeoutExpired if the proxy does not exit in time.
+    def wait_for_exit(self, timeout=2):
+        if self._popen:
+            self._popen.wait(timeout=timeout)
+
+    @backoff.on_exception(backoff.expo, requests.exceptions.RequestException, max_time=10)
+    def _wait_until_ready(self):
+        assert (
+            self._popen and self._popen.poll() is None
+        ), "Proxy exited unexpectedly. Check test log."
+        requests.get(f"http://{self.host}:{self.http_port}/v1/status")
+
+    async def query(self, query, args, **kwargs):
+        user = kwargs["user"]
+        token = kwargs["token"]
+        expected_code = kwargs.get("expected_code")
+
+        log.info(f"Executing http query: {query}")
+
+        connstr = f"postgresql://{user}@{self.domain}/postgres"
+        async with httpx.AsyncClient(verify=str(self.test_output_dir / "proxy.crt")) as client:
+            response = await client.post(
+                f"https://{self.domain}:{self.external_http_port}/sql",
+                json={"query": query, "params": args},
+                headers={
+                    "Neon-Connection-String": connstr,
+                    "Authorization": f"Bearer {token}",
+                },
+            )
+
+            if expected_code is not None:
+                assert response.status_code == expected_code, f"response: {response.json()}"
+            return response.json()
+
+    def get_metrics(self) -> str:
+        request_result = requests.get(f"http://{self.host}:{self.http_port}/metrics")
+        return request_result.text
+
+    def __enter__(self) -> NeonAuthBroker:
+        return self
+
+    def __exit__(
+        self,
+        _exc_type: Optional[type[BaseException]],
+        _exc_value: Optional[BaseException],
+        _traceback: Optional[TracebackType],
+    ):
+        if self._popen is not None:
+            self._popen.terminate()
+            try:
+                self._popen.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                log.warning("failed to gracefully terminate proxy; killing")
+                self._popen.kill()
+
+
 @pytest.fixture(scope="function")
 def link_proxy(
     port_distributor: PortDistributor, neon_binpath: Path, test_output_dir: Path
@@ -3456,6 +3569,74 @@ def static_proxy(
         yield proxy
 
 
+@pytest.fixture(scope="function")
+def neon_authorize_jwk() -> jwk.JWK:
+    kid = str(uuid.uuid4())
+    key = jwk.JWK.generate(kty="RSA", size=2048, alg="RS256", use="sig", kid=kid)
+    assert isinstance(key, jwk.JWK)
+    return key
+
+
+@pytest.fixture(scope="function")
+def static_auth_broker(
+    port_distributor: PortDistributor,
+    neon_binpath: Path,
+    test_output_dir: Path,
+    httpserver: HTTPServer,
+    neon_authorize_jwk: jwk.JWK,
+    http2_echoserver: H2Server,
+) -> Iterable[NeonAuthBroker]:
+    """Neon Auth Broker that routes to a mocked local_proxy and a mocked cplane HTTP API."""
+
+    local_proxy_addr = f"{http2_echoserver.host}:{http2_echoserver.port}"
+
+    # return local_proxy addr on ProxyWakeCompute.
+    httpserver.expect_request("/cplane/proxy_wake_compute").respond_with_json(
+        {
+            "address": local_proxy_addr,
+            "aux": {
+                "endpoint_id": "ep-foo-bar-1234",
+                "branch_id": "br-foo-bar",
+                "project_id": "foo-bar",
+            },
+        }
+    )
+
+    # return jwks mock addr on GetEndpointJwks
+    httpserver.expect_request(re.compile("^/cplane/endpoints/.+/jwks$")).respond_with_json(
+        {
+            "jwks": [
+                {
+                    "id": "foo",
+                    "jwks_url": httpserver.url_for("/authorize/jwks.json"),
+                    "provider_name": "test",
+                    "jwt_audience": None,
+                    "role_names": ["anonymous", "authenticated"],
+                }
+            ]
+        }
+    )
+
+    # return static fixture jwks.
+    jwk = neon_authorize_jwk.export_public(as_dict=True)
+    httpserver.expect_request("/authorize/jwks.json").respond_with_json({"keys": [jwk]})
+
+    mgmt_port = port_distributor.get_port()
+    http_port = port_distributor.get_port()
+    external_http_port = port_distributor.get_port()
+
+    with NeonAuthBroker(
+        neon_binpath=neon_binpath,
+        test_output_dir=test_output_dir,
+        http_port=http_port,
+        mgmt_port=mgmt_port,
+        external_http_port=external_http_port,
+        auth_backend=NeonAuthBroker.ControlPlane(httpserver.url_for("/cplane")),
+    ) as proxy:
+        proxy.start()
+        yield proxy
+
+
 class Endpoint(PgProtocol, LogUtils):
     """An object representing a Postgres compute endpoint managed by the control plane."""
 
diff --git a/test_runner/fixtures/parametrize.py b/test_runner/fixtures/parametrize.py
index 4114c2fcb3..1131bf090f 100644
--- a/test_runner/fixtures/parametrize.py
+++ b/test_runner/fixtures/parametrize.py
@@ -10,12 +10,6 @@ from _pytest.python import Metafunc
 
 from fixtures.pg_version import PgVersion
 
-if TYPE_CHECKING:
-    from typing import Any, Optional
-
-    from fixtures.utils import AuxFileStore
-
-
 if TYPE_CHECKING:
     from typing import Any, Optional
 
@@ -50,11 +44,6 @@ def pageserver_virtual_file_io_mode() -> Optional[str]:
     return os.getenv("PAGESERVER_VIRTUAL_FILE_IO_MODE")
 
 
-@pytest.fixture(scope="function", autouse=True)
-def pageserver_aux_file_policy() -> Optional[AuxFileStore]:
-    return None
-
-
 def get_pageserver_default_tenant_config_compaction_algorithm() -> Optional[dict[str, Any]]:
     toml_table = os.getenv("PAGESERVER_DEFAULT_TENANT_CONFIG_COMPACTION_ALGORITHM")
     if toml_table is None:
diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py
index d12fa59abc..01b7cf1026 100644
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -1,7 +1,6 @@
 from __future__ import annotations
 
 import contextlib
-import enum
 import json
 import os
 import re
@@ -515,21 +514,6 @@ def assert_no_errors(log_file: Path, service: str, allowed_errors: list[str]):
     assert not errors, f"First log error on {service}: {errors[0]}\nHint: use scripts/check_allowed_errors.sh to test any new allowed_error you add"
 
 
-@enum.unique
-class AuxFileStore(str, enum.Enum):
-    V1 = "v1"
-    V2 = "v2"
-    CrossValidation = "cross-validation"
-
-    @override
-    def __repr__(self) -> str:
-        return f"'aux-{self.value}'"
-
-    @override
-    def __str__(self) -> str:
-        return f"'aux-{self.value}'"
-
-
 def assert_pageserver_backups_equal(left: Path, right: Path, skip_files: set[str]):
     """
     This is essentially:
diff --git a/test_runner/performance/test_logical_replication.py b/test_runner/performance/test_logical_replication.py
index 815d186ab9..8b2a296bdd 100644
--- a/test_runner/performance/test_logical_replication.py
+++ b/test_runner/performance/test_logical_replication.py
@@ -9,7 +9,7 @@ import pytest
 from fixtures.benchmark_fixture import MetricReport
 from fixtures.common_types import Lsn
 from fixtures.log_helper import log
-from fixtures.neon_fixtures import AuxFileStore, logical_replication_sync
+from fixtures.neon_fixtures import logical_replication_sync
 
 if TYPE_CHECKING:
     from fixtures.benchmark_fixture import NeonBenchmarker
@@ -17,7 +17,6 @@ if TYPE_CHECKING:
     from fixtures.neon_fixtures import NeonEnv, PgBin
 
 
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.V2])
 @pytest.mark.timeout(1000)
 def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg):
     env = neon_simple_env
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index 4a7017994d..83d003a5cc 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -172,7 +172,6 @@ def test_fully_custom_config(positive_env: NeonEnv):
         },
         "walreceiver_connect_timeout": "13m",
         "image_layer_creation_check_threshold": 1,
-        "switch_aux_file_policy": "cross-validation",
         "lsn_lease_length": "1m",
         "lsn_lease_length_for_ts": "5s",
     }
diff --git a/test_runner/regress/test_auth_broker.py b/test_runner/regress/test_auth_broker.py
new file mode 100644
index 0000000000..11dc7d56b5
--- /dev/null
+++ b/test_runner/regress/test_auth_broker.py
@@ -0,0 +1,37 @@
+import json
+
+import pytest
+from fixtures.neon_fixtures import NeonAuthBroker
+from jwcrypto import jwk, jwt
+
+
+@pytest.mark.asyncio
+async def test_auth_broker_happy(
+    static_auth_broker: NeonAuthBroker,
+    neon_authorize_jwk: jwk.JWK,
+):
+    """
+    Signs a JWT and uses it to authorize a query to local_proxy.
+    """
+
+    token = jwt.JWT(
+        header={"kid": neon_authorize_jwk.key_id, "alg": "RS256"}, claims={"sub": "user1"}
+    )
+    token.make_signed_token(neon_authorize_jwk)
+    res = await static_auth_broker.query("foo", ["arg1"], user="anonymous", token=token.serialize())
+
+    # local proxy mock just echos back the request
+    # check that we forward the correct data
+
+    assert (
+        res["headers"]["authorization"] == f"Bearer {token.serialize()}"
+    ), "JWT should be forwarded"
+
+    assert (
+        "anonymous" in res["headers"]["neon-connection-string"]
+    ), "conn string should be forwarded"
+
+    assert json.loads(res["body"]) == {
+        "query": "foo",
+        "params": ["arg1"],
+    }, "Query body should be forwarded"
diff --git a/test_runner/regress/test_ddl_forwarding.py b/test_runner/regress/test_ddl_forwarding.py
index 96657b3ce4..e517e83e6f 100644
--- a/test_runner/regress/test_ddl_forwarding.py
+++ b/test_runner/regress/test_ddl_forwarding.py
@@ -7,6 +7,7 @@ import psycopg2
 import pytest
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv, VanillaPostgres
+from psycopg2.errors import UndefinedObject
 from pytest_httpserver import HTTPServer
 from werkzeug.wrappers.request import Request
 from werkzeug.wrappers.response import Response
@@ -335,3 +336,34 @@ def test_ddl_forwarding_invalid_db(neon_simple_env: NeonEnv):
         if not result:
             raise AssertionError("Could not count databases")
         assert result[0] == 0, "Database 'failure' still exists after restart"
+
+
+def test_ddl_forwarding_role_specs(neon_simple_env: NeonEnv):
+    """
+    Postgres has a concept of role specs:
+
+        ROLESPEC_CSTRING: ALTER ROLE xyz
+        ROLESPEC_CURRENT_USER: ALTER ROLE current_user
+        ROLESPEC_CURRENT_ROLE: ALTER ROLE current_role
+        ROLESPEC_SESSION_USER: ALTER ROLE session_user
+        ROLESPEC_PUBLIC: ALTER ROLE public
+
+    The extension is required to serialize these special role spec into
+    usernames for the purpose of DDL forwarding.
+    """
+    env = neon_simple_env
+
+    endpoint = env.endpoints.create_start("main")
+
+    with endpoint.cursor() as cur:
+        # ROLESPEC_CSTRING
+        cur.execute("ALTER ROLE cloud_admin WITH PASSWORD 'york'")
+        # ROLESPEC_CURRENT_USER
+        cur.execute("ALTER ROLE current_user WITH PASSWORD 'pork'")
+        # ROLESPEC_CURRENT_ROLE
+        cur.execute("ALTER ROLE current_role WITH PASSWORD 'cork'")
+        # ROLESPEC_SESSION_USER
+        cur.execute("ALTER ROLE session_user WITH PASSWORD 'bork'")
+        # ROLESPEC_PUBLIC
+        with pytest.raises(UndefinedObject):
+            cur.execute("ALTER ROLE public WITH PASSWORD 'dork'")
diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py
index c26bf058e2..30027463df 100644
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -5,11 +5,9 @@ from functools import partial
 from random import choice
 from string import ascii_lowercase
 
-import pytest
 from fixtures.common_types import Lsn
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
-    AuxFileStore,
     NeonEnv,
     NeonEnvBuilder,
     PgProtocol,
@@ -23,17 +21,6 @@ def random_string(n: int):
     return "".join([choice(ascii_lowercase) for _ in range(n)])
 
 
-@pytest.mark.parametrize(
-    "pageserver_aux_file_policy", [AuxFileStore.V2, AuxFileStore.CrossValidation]
-)
-def test_aux_file_v2_flag(neon_simple_env: NeonEnv, pageserver_aux_file_policy: AuxFileStore):
-    env = neon_simple_env
-    with env.pageserver.http_client() as client:
-        tenant_config = client.tenant_config(env.initial_tenant).effective_config
-        assert pageserver_aux_file_policy == tenant_config["switch_aux_file_policy"]
-
-
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
 def test_logical_replication(neon_simple_env: NeonEnv, vanilla_pg):
     env = neon_simple_env
 
@@ -173,7 +160,6 @@ COMMIT;
 
 
 # Test that neon.logical_replication_max_snap_files works
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
 def test_obsolete_slot_drop(neon_simple_env: NeonEnv, vanilla_pg):
     def slot_removed(ep):
         assert (
@@ -350,7 +336,6 @@ FROM generate_series(1, 16384) AS seq; -- Inserts enough rows to exceed 16MB of
 #
 # Most pages start with a contrecord, so we don't do anything special
 # to ensure that.
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
 def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
     env = neon_simple_env
 
@@ -395,7 +380,6 @@ def test_restart_endpoint(neon_simple_env: NeonEnv, vanilla_pg):
 # logical replication bug as such, but without logical replication,
 # records passed ot the WAL redo process are never large enough to hit
 # the bug.
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
 def test_large_records(neon_simple_env: NeonEnv, vanilla_pg):
     env = neon_simple_env
 
@@ -467,7 +451,6 @@ def test_slots_and_branching(neon_simple_env: NeonEnv):
     ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
 
 
-@pytest.mark.parametrize("pageserver_aux_file_policy", [AuxFileStore.CrossValidation])
 def test_replication_shutdown(neon_simple_env: NeonEnv):
     # Ensure Postgres can exit without stuck when a replication job is active + neon extension installed
     env = neon_simple_env
diff --git a/test_runner/regress/test_proxy.py b/test_runner/regress/test_proxy.py
index f598900af9..e59d46e352 100644
--- a/test_runner/regress/test_proxy.py
+++ b/test_runner/regress/test_proxy.py
@@ -561,7 +561,7 @@ def test_sql_over_http_pool_dos(static_proxy: NeonProxy):
 
     # query generates a million rows - should hit the 10MB reponse limit quickly
     response = query(
-        400,
+        507,
         "select * from generate_series(1, 5000) a cross join generate_series(1, 5000) b cross join (select 'foo'::foo) c;",
     )
     assert "response is too large (max is 10485760 bytes)" in response["message"]
diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py
index d4bc4b1a4f..c8de292588 100644
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -18,6 +18,7 @@ from fixtures.log_helper import log
 from fixtures.neon_fixtures import (
     NeonEnv,
     NeonEnvBuilder,
+    NeonPageserver,
     PageserverAvailability,
     PageserverSchedulingPolicy,
     PgBin,
@@ -298,17 +299,20 @@ def test_storage_controller_restart(neon_env_builder: NeonEnvBuilder):
     env.storage_controller.consistency_check()
 
 
-@pytest.mark.parametrize("warm_up", [True, False])
-def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: bool):
+def prepare_onboarding_env(
+    neon_env_builder: NeonEnvBuilder,
+) -> tuple[NeonEnv, NeonPageserver, TenantId, int]:
     """
-    We onboard tenants to the sharding service by treating it as a 'virtual pageserver'
-    which provides the /location_config API.  This is similar to creating a tenant,
-    but imports the generation number.
+    For tests that do onboarding of a tenant to the storage controller, a small dance to
+    set up one pageserver that won't be managed by the storage controller and create
+    a tenant there.
     """
-
     # One pageserver to simulate legacy environment, two to be managed by storage controller
     neon_env_builder.num_pageservers = 3
 
+    # Enable tests to use methods that require real S3 API
+    neon_env_builder.enable_pageserver_remote_storage(s3_storage())
+
     # Start services by hand so that we can skip registration on one of the pageservers
     env = neon_env_builder.init_configs()
     env.broker.start()
@@ -329,7 +333,6 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
     # will be attached after onboarding
     env.pageservers[1].start()
     env.pageservers[2].start()
-    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
 
     for sk in env.safekeepers:
         sk.start()
@@ -339,6 +342,23 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
     generation = 123
     origin_ps.tenant_create(tenant_id, generation=generation)
 
+    origin_ps.http_client().timeline_create(PgVersion.NOT_SET, tenant_id, TimelineId.generate())
+
+    return (env, origin_ps, tenant_id, generation)
+
+
+@pytest.mark.parametrize("warm_up", [True, False])
+def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up: bool):
+    """
+    We onboard tenants to the sharding service by treating it as a 'virtual pageserver'
+    which provides the /location_config API.  This is similar to creating a tenant,
+    but imports the generation number.
+    """
+
+    env, origin_ps, tenant_id, generation = prepare_onboarding_env(neon_env_builder)
+
+    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
+
     # As if doing a live migration, first configure origin into stale mode
     r = origin_ps.http_client().tenant_location_conf(
         tenant_id,
@@ -475,6 +495,70 @@ def test_storage_controller_onboarding(neon_env_builder: NeonEnvBuilder, warm_up
     env.storage_controller.consistency_check()
 
 
+@run_only_on_default_postgres("this test doesn't start an endpoint")
+def test_storage_controller_onboard_detached(neon_env_builder: NeonEnvBuilder):
+    """
+    Sometimes, the control plane wants to delete a tenant that wasn't attached to any pageserver,
+    and also wasn't ever registered with the storage controller.
+
+    It may do this by calling /location_conf in mode Detached and then calling the delete API
+    as normal.
+    """
+
+    env, origin_ps, tenant_id, generation = prepare_onboarding_env(neon_env_builder)
+
+    remote_prefix = "/".join(
+        (
+            "tenants",
+            str(tenant_id),
+        )
+    )
+
+    # Detach it from its original pageserver.
+    origin_ps.http_client().tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": None,
+        },
+    )
+
+    # Since we will later assert that remote data is gone, as a control also check it was ever there
+    assert_prefix_not_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+    # Register with storage controller in Detached state
+    virtual_ps_http = PageserverHttpClient(env.storage_controller_port, lambda: True)
+    generation += 1
+    r = virtual_ps_http.tenant_location_conf(
+        tenant_id,
+        {
+            "mode": "Detached",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": generation,
+        },
+    )
+    assert len(r["shards"]) == 0  # location_conf tells us there are no attached shards
+
+    # Onboarding in Detached state shouldn't have attached it to any pageserver
+    for ps in env.pageservers:
+        assert ps.http_client().tenant_list() == []
+
+    # Delete it via the storage controller
+    virtual_ps_http.tenant_delete(tenant_id)
+
+    # Check that we really deleted it
+    assert_prefix_empty(
+        neon_env_builder.pageserver_remote_storage,
+        prefix=remote_prefix,
+    )
+
+
 def test_storage_controller_compute_hook(
     httpserver: HTTPServer,
     neon_env_builder: NeonEnvBuilder,
@@ -872,6 +956,14 @@ def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder):
     assert sum(v["shard_count"] for v in response.json()["nodes"].values()) == 3
     assert all(v["may_schedule"] for v in response.json()["nodes"].values())
 
+    # Reconciler cancel API should be a no-op when nothing is in flight
+    env.storage_controller.request(
+        "PUT",
+        f"{env.storage_controller_api}/control/v1/tenant/{tenant_id}-0102/cancel_reconcile",
+        headers=env.storage_controller.headers(TokenScope.ADMIN),
+    )
+
+    # Node unclean drop API
     response = env.storage_controller.request(
         "POST",
         f"{env.storage_controller_api}/debug/v1/node/{env.pageservers[1].id}/drop",
@@ -879,6 +971,7 @@ def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder):
     )
     assert len(env.storage_controller.node_list()) == 1
 
+    # Tenant unclean drop API
     response = env.storage_controller.request(
         "POST",
         f"{env.storage_controller_api}/debug/v1/tenant/{tenant_id}/drop",
@@ -892,7 +985,6 @@ def test_storage_controller_debug_apis(neon_env_builder: NeonEnvBuilder):
         headers=env.storage_controller.headers(TokenScope.ADMIN),
     )
     assert len(response.json()) == 1
-
     # Check that the 'drop' APIs didn't leave things in a state that would fail a consistency check: they're
     # meant to be unclean wrt the pageserver state, but not leave a broken storage controller behind.
     env.storage_controller.consistency_check()
@@ -1660,6 +1752,11 @@ def test_storcon_cli(neon_env_builder: NeonEnvBuilder):
     storcon_cli(["tenant-policy", "--tenant-id", str(env.initial_tenant), "--scheduling", "stop"])
     assert "Stop" in storcon_cli(["tenants"])[3]
 
+    # Cancel ongoing reconcile on a tenant
+    storcon_cli(
+        ["tenant-shard-cancel-reconcile", "--tenant-shard-id", f"{env.initial_tenant}-0104"]
+    )
+
     # Change a tenant's placement
     storcon_cli(
         ["tenant-policy", "--tenant-id", str(env.initial_tenant), "--placement", "secondary"]
diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py
index cb8724dd1c..77efd7b749 100644
--- a/test_runner/regress/test_timeline_archive.py
+++ b/test_runner/regress/test_timeline_archive.py
@@ -137,14 +137,17 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
         }
     )
 
-    # Create two branches and archive them
-    parent_timeline_id = env.create_branch("test_ancestor_branch_archive_parent", tenant_id)
-    leaf_timeline_id = env.create_branch(
-        "test_ancestor_branch_archive_branch1", tenant_id, "test_ancestor_branch_archive_parent"
+    # Create three branches that depend on each other, starting with two
+    grandparent_timeline_id = env.create_branch(
+        "test_ancestor_branch_archive_grandparent", tenant_id
+    )
+    parent_timeline_id = env.create_branch(
+        "test_ancestor_branch_archive_parent", tenant_id, "test_ancestor_branch_archive_grandparent"
     )
 
+    # write some stuff to the parent
     with env.endpoints.create_start(
-        "test_ancestor_branch_archive_branch1", tenant_id=tenant_id
+        "test_ancestor_branch_archive_parent", tenant_id=tenant_id
     ) as endpoint:
         endpoint.safe_psql_many(
             [
@@ -154,6 +157,11 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
         )
         sum = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
 
+    # create the third branch
+    leaf_timeline_id = env.create_branch(
+        "test_ancestor_branch_archive_branch1", tenant_id, "test_ancestor_branch_archive_parent"
+    )
+
     ps_http.timeline_archival_config(
         tenant_id,
         leaf_timeline_id,
@@ -171,6 +179,12 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
         state=TimelineArchivalState.ARCHIVED,
     )
 
+    ps_http.timeline_archival_config(
+        tenant_id,
+        grandparent_timeline_id,
+        state=TimelineArchivalState.ARCHIVED,
+    )
+
     def timeline_offloaded_logged(timeline_id: TimelineId) -> bool:
         return (
             env.pageserver.log_contains(f".*{timeline_id}.* offloading archived timeline.*")
@@ -201,30 +215,34 @@ def test_timeline_offloading(neon_env_builder: NeonEnvBuilder, manual_offload: b
 
     ps_http.timeline_archival_config(
         tenant_id,
-        parent_timeline_id,
+        grandparent_timeline_id,
         state=TimelineArchivalState.UNARCHIVED,
     )
     ps_http.timeline_archival_config(
         tenant_id,
-        leaf_timeline_id,
+        parent_timeline_id,
         state=TimelineArchivalState.UNARCHIVED,
     )
-    leaf_detail = ps_http.timeline_detail(
+    parent_detail = ps_http.timeline_detail(
         tenant_id,
-        leaf_timeline_id,
+        parent_timeline_id,
     )
-    assert leaf_detail["is_archived"] is False
+    assert parent_detail["is_archived"] is False
 
     with env.endpoints.create_start(
-        "test_ancestor_branch_archive_branch1", tenant_id=tenant_id
+        "test_ancestor_branch_archive_parent", tenant_id=tenant_id
     ) as endpoint:
         sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key > 50")
         assert sum == sum_again
 
+    # Test that deletion of offloaded timelines works
+    ps_http.timeline_delete(tenant_id, leaf_timeline_id)
+
     assert not timeline_offloaded_logged(initial_timeline_id)
 
 
-def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder):
+@pytest.mark.parametrize("delete_timeline", [False, True])
+def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder, delete_timeline: bool):
     """
     Test for persistence of timeline offload state
     """
@@ -306,27 +324,35 @@ def test_timeline_offload_persist(neon_env_builder: NeonEnvBuilder):
     assert timeline_offloaded_api(child_timeline_id)
     assert not timeline_offloaded_api(root_timeline_id)
 
-    ps_http.timeline_archival_config(
-        tenant_id,
-        child_timeline_id,
-        state=TimelineArchivalState.UNARCHIVED,
-    )
-    child_detail = ps_http.timeline_detail(
-        tenant_id,
-        child_timeline_id,
-    )
-    assert child_detail["is_archived"] is False
+    if delete_timeline:
+        ps_http.timeline_delete(tenant_id, child_timeline_id)
+        with pytest.raises(PageserverApiException, match="not found"):
+            ps_http.timeline_detail(
+                tenant_id,
+                child_timeline_id,
+            )
+    else:
+        ps_http.timeline_archival_config(
+            tenant_id,
+            child_timeline_id,
+            state=TimelineArchivalState.UNARCHIVED,
+        )
+        child_detail = ps_http.timeline_detail(
+            tenant_id,
+            child_timeline_id,
+        )
+        assert child_detail["is_archived"] is False
 
-    with env.endpoints.create_start(
-        "test_archived_branch_persisted", tenant_id=tenant_id
-    ) as endpoint:
-        sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key < 500")
-        assert sum == sum_again
+        with env.endpoints.create_start(
+            "test_archived_branch_persisted", tenant_id=tenant_id
+        ) as endpoint:
+            sum_again = endpoint.safe_psql("SELECT sum(key) from foo where key < 500")
+            assert sum == sum_again
 
-    assert_prefix_empty(
-        neon_env_builder.pageserver_remote_storage,
-        prefix=f"tenants/{str(env.initial_tenant)}/tenant-manifest",
-    )
+        assert_prefix_empty(
+            neon_env_builder.pageserver_remote_storage,
+            prefix=f"tenants/{str(env.initial_tenant)}/tenant-manifest",
+        )
 
     assert not timeline_offloaded_api(root_timeline_id)
 
diff --git a/test_runner/regress/test_unstable_extensions.py b/test_runner/regress/test_unstable_extensions.py
new file mode 100644
index 0000000000..06a62ccfd8
--- /dev/null
+++ b/test_runner/regress/test_unstable_extensions.py
@@ -0,0 +1,50 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, cast
+
+import pytest
+from psycopg2.errors import InsufficientPrivilege
+
+if TYPE_CHECKING:
+    from fixtures.neon_fixtures import NeonEnv
+
+
+def test_unstable_extensions_installation(neon_simple_env: NeonEnv):
+    """
+    Test that the unstable extension support within the neon extension can
+    block extension installation.
+    """
+    env = neon_simple_env
+
+    neon_unstable_extensions = "pg_prewarm,amcheck"
+
+    endpoint = env.endpoints.create(
+        "main",
+        config_lines=[
+            "neon.allow_unstable_extensions=false",
+            f"neon.unstable_extensions='{neon_unstable_extensions}'",
+        ],
+    )
+    endpoint.respec(skip_pg_catalog_updates=False)
+    endpoint.start()
+
+    with endpoint.cursor() as cursor:
+        cursor.execute("SELECT current_setting('neon.unstable_extensions')")
+        result = cursor.fetchone()
+        assert result is not None
+        setting = cast("str", result[0])
+        assert setting == neon_unstable_extensions
+
+        with pytest.raises(InsufficientPrivilege):
+            cursor.execute("CREATE EXTENSION pg_prewarm")
+
+        with pytest.raises(InsufficientPrivilege):
+            cursor.execute("CREATE EXTENSION amcheck")
+
+        # Make sure that we can install a "stable" extension
+        cursor.execute("CREATE EXTENSION pageinspect")
+
+        cursor.execute("BEGIN")
+        cursor.execute("SET neon.allow_unstable_extensions TO true")
+        cursor.execute("CREATE EXTENSION pg_prewarm")
+        cursor.execute("COMMIT")
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index d803cd7c78..157390c01c 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -1998,6 +1998,109 @@ def test_pull_timeline_term_change(neon_env_builder: NeonEnvBuilder):
         pt_handle.join()
 
 
+def test_pull_timeline_while_evicted(neon_env_builder: NeonEnvBuilder):
+    """
+    Verify that when pull_timeline is used on an evicted timeline, it does not result in
+    promoting any segments to local disk on the source, and the timeline is correctly instantiated
+    in evicted state on the destination.  This behavior is important to avoid ballooning disk
+    usage when doing mass migration of timelines.
+    """
+    neon_env_builder.num_safekeepers = 4
+    neon_env_builder.enable_safekeeper_remote_storage(default_remote_storage())
+
+    # Configure safekeepers with ultra-fast eviction policy
+    neon_env_builder.safekeeper_extra_opts = [
+        "--enable-offload",
+        "--partial-backup-timeout",
+        "50ms",
+        "--control-file-save-interval",
+        "1s",
+        # Safekeepers usually wait a while before evicting something: for this test we want them to
+        # evict things as soon as they are inactive.
+        "--eviction-min-resident=100ms",
+        "--delete-offloaded-wal",
+    ]
+
+    initial_tenant_conf = {"lagging_wal_timeout": "1s", "checkpoint_timeout": "100ms"}
+    env = neon_env_builder.init_start(initial_tenant_conf=initial_tenant_conf)
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    (src_sk, dst_sk) = (env.safekeepers[0], env.safekeepers[-1])
+    log.info(f"Will pull_timeline on destination {dst_sk.id} from source {src_sk.id}")
+
+    ep = env.endpoints.create("main")
+    ep.active_safekeepers = [s.id for s in env.safekeepers if s.id != dst_sk.id]
+    log.info(f"Compute writing initially to safekeepers: {ep.active_safekeepers}")
+    ep.active_safekeepers = [1, 2, 3]  # Exclude dst_sk from set written by compute initially
+    ep.start()
+    ep.safe_psql("CREATE TABLE t(i int)")
+    ep.safe_psql("INSERT INTO t VALUES (0)")
+    ep.stop()
+
+    wait_lsn_force_checkpoint_at_sk(src_sk, tenant_id, timeline_id, env.pageserver)
+
+    src_http = src_sk.http_client()
+    dst_http = dst_sk.http_client()
+
+    def evicted_on_source():
+        # Wait for timeline to go into evicted state
+        assert src_http.get_eviction_state(timeline_id) != "Present"
+        assert (
+            src_http.get_metric_value(
+                "safekeeper_eviction_events_completed_total", {"kind": "evict"}
+            )
+            or 0 > 0
+        )
+        assert src_http.get_metric_value("safekeeper_evicted_timelines") or 0 > 0
+        # Check that on source no segment files are present
+        assert src_sk.list_segments(tenant_id, timeline_id) == []
+
+    wait_until(60, 1, evicted_on_source)
+
+    # Invoke pull_timeline: source should serve snapshot request without promoting anything to local disk,
+    # destination should import the control file only & go into evicted mode immediately
+    dst_sk.pull_timeline([src_sk], tenant_id, timeline_id)
+
+    # Check that on source and destination no segment files are present
+    assert src_sk.list_segments(tenant_id, timeline_id) == []
+    assert dst_sk.list_segments(tenant_id, timeline_id) == []
+
+    # Check that the timeline on the destination is in the expected evicted state.
+    evicted_on_source()  # It should still be evicted on the source
+
+    def evicted_on_destination():
+        assert dst_http.get_eviction_state(timeline_id) != "Present"
+        assert dst_http.get_metric_value("safekeeper_evicted_timelines") or 0 > 0
+
+    # This should be fast, it is a wait_until because eviction state is updated
+    # in the background wrt pull_timeline.
+    wait_until(10, 0.1, evicted_on_destination)
+
+    # Delete the timeline on the source, to prove that deletion works on an
+    # evicted timeline _and_ that the final compute test is really not using
+    # the original location
+    src_sk.http_client().timeline_delete(tenant_id, timeline_id, only_local=True)
+
+    # Check that using the timeline correctly un-evicts it on the new location
+    ep.active_safekeepers = [2, 3, 4]
+    ep.start()
+    ep.safe_psql("INSERT INTO t VALUES (0)")
+    ep.stop()
+
+    def unevicted_on_dest():
+        assert (
+            dst_http.get_metric_value(
+                "safekeeper_eviction_events_completed_total", {"kind": "restore"}
+            )
+            or 0 > 0
+        )
+        n_evicted = dst_sk.http_client().get_metric_value("safekeeper_evicted_timelines")
+        assert n_evicted == 0
+
+    wait_until(10, 1, unevicted_on_dest)
+
+
 # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries
 # when compute is active, but there are no writes to the timeline. In that case
 # pageserver should maintain a single connection to safekeeper and don't attempt
diff --git a/test_runner/stubs/h2/README.md b/test_runner/stubs/h2/README.md
new file mode 100644
index 0000000000..cdf181ff80
--- /dev/null
+++ b/test_runner/stubs/h2/README.md
@@ -0,0 +1 @@
+generated via `poetry run stubgen -p h2 -o test_runner/stubs`
diff --git a/test_runner/stubs/h2/__init__.pyi b/test_runner/stubs/h2/__init__.pyi
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/test_runner/stubs/h2/config.pyi b/test_runner/stubs/h2/config.pyi
new file mode 100644
index 0000000000..710005db69
--- /dev/null
+++ b/test_runner/stubs/h2/config.pyi
@@ -0,0 +1,42 @@
+from _typeshed import Incomplete
+
+class _BooleanConfigOption:
+    name: Incomplete
+    attr_name: Incomplete
+    def __init__(self, name) -> None: ...
+    def __get__(self, instance, owner): ...
+    def __set__(self, instance, value) -> None: ...
+
+class DummyLogger:
+    def __init__(self, *vargs) -> None: ...
+    def debug(self, *vargs, **kwargs) -> None: ...
+    def trace(self, *vargs, **kwargs) -> None: ...
+
+class OutputLogger:
+    file: Incomplete
+    trace_level: Incomplete
+    def __init__(self, file: Incomplete | None = ..., trace_level: bool = ...) -> None: ...
+    def debug(self, fmtstr, *args) -> None: ...
+    def trace(self, fmtstr, *args) -> None: ...
+
+class H2Configuration:
+    client_side: Incomplete
+    validate_outbound_headers: Incomplete
+    normalize_outbound_headers: Incomplete
+    validate_inbound_headers: Incomplete
+    normalize_inbound_headers: Incomplete
+    logger: Incomplete
+    def __init__(
+        self,
+        client_side: bool = ...,
+        header_encoding: Incomplete | None = ...,
+        validate_outbound_headers: bool = ...,
+        normalize_outbound_headers: bool = ...,
+        validate_inbound_headers: bool = ...,
+        normalize_inbound_headers: bool = ...,
+        logger: Incomplete | None = ...,
+    ) -> None: ...
+    @property
+    def header_encoding(self): ...
+    @header_encoding.setter
+    def header_encoding(self, value) -> None: ...
diff --git a/test_runner/stubs/h2/connection.pyi b/test_runner/stubs/h2/connection.pyi
new file mode 100644
index 0000000000..04be18ca74
--- /dev/null
+++ b/test_runner/stubs/h2/connection.pyi
@@ -0,0 +1,142 @@
+from enum import Enum, IntEnum
+
+from _typeshed import Incomplete
+
+from .config import H2Configuration as H2Configuration
+from .errors import ErrorCodes as ErrorCodes
+from .events import AlternativeServiceAvailable as AlternativeServiceAvailable
+from .events import ConnectionTerminated as ConnectionTerminated
+from .events import PingAckReceived as PingAckReceived
+from .events import PingReceived as PingReceived
+from .events import PriorityUpdated as PriorityUpdated
+from .events import RemoteSettingsChanged as RemoteSettingsChanged
+from .events import SettingsAcknowledged as SettingsAcknowledged
+from .events import UnknownFrameReceived as UnknownFrameReceived
+from .events import WindowUpdated as WindowUpdated
+from .exceptions import DenialOfServiceError as DenialOfServiceError
+from .exceptions import FlowControlError as FlowControlError
+from .exceptions import FrameTooLargeError as FrameTooLargeError
+from .exceptions import NoAvailableStreamIDError as NoAvailableStreamIDError
+from .exceptions import NoSuchStreamError as NoSuchStreamError
+from .exceptions import ProtocolError as ProtocolError
+from .exceptions import RFC1122Error as RFC1122Error
+from .exceptions import StreamClosedError as StreamClosedError
+from .exceptions import StreamIDTooLowError as StreamIDTooLowError
+from .exceptions import TooManyStreamsError as TooManyStreamsError
+from .frame_buffer import FrameBuffer as FrameBuffer
+from .settings import SettingCodes as SettingCodes
+from .settings import Settings as Settings
+from .stream import H2Stream as H2Stream
+from .stream import StreamClosedBy as StreamClosedBy
+from .utilities import guard_increment_window as guard_increment_window
+from .windows import WindowManager as WindowManager
+
+class ConnectionState(Enum):
+    IDLE: int
+    CLIENT_OPEN: int
+    SERVER_OPEN: int
+    CLOSED: int
+
+class ConnectionInputs(Enum):
+    SEND_HEADERS: int
+    SEND_PUSH_PROMISE: int
+    SEND_DATA: int
+    SEND_GOAWAY: int
+    SEND_WINDOW_UPDATE: int
+    SEND_PING: int
+    SEND_SETTINGS: int
+    SEND_RST_STREAM: int
+    SEND_PRIORITY: int
+    RECV_HEADERS: int
+    RECV_PUSH_PROMISE: int
+    RECV_DATA: int
+    RECV_GOAWAY: int
+    RECV_WINDOW_UPDATE: int
+    RECV_PING: int
+    RECV_SETTINGS: int
+    RECV_RST_STREAM: int
+    RECV_PRIORITY: int
+    SEND_ALTERNATIVE_SERVICE: int
+    RECV_ALTERNATIVE_SERVICE: int
+
+class AllowedStreamIDs(IntEnum):
+    EVEN: int
+    ODD: int
+
+class H2ConnectionStateMachine:
+    state: Incomplete
+    def __init__(self) -> None: ...
+    def process_input(self, input_): ...
+
+class H2Connection:
+    DEFAULT_MAX_OUTBOUND_FRAME_SIZE: int
+    DEFAULT_MAX_INBOUND_FRAME_SIZE: Incomplete
+    HIGHEST_ALLOWED_STREAM_ID: Incomplete
+    MAX_WINDOW_INCREMENT: Incomplete
+    DEFAULT_MAX_HEADER_LIST_SIZE: Incomplete
+    MAX_CLOSED_STREAMS: Incomplete
+    state_machine: Incomplete
+    streams: Incomplete
+    highest_inbound_stream_id: int
+    highest_outbound_stream_id: int
+    encoder: Incomplete
+    decoder: Incomplete
+    config: Incomplete
+    local_settings: Incomplete
+    remote_settings: Incomplete
+    outbound_flow_control_window: Incomplete
+    max_outbound_frame_size: Incomplete
+    max_inbound_frame_size: Incomplete
+    incoming_buffer: Incomplete
+    def __init__(self, config: Incomplete | None = ...) -> None: ...
+    @property
+    def open_outbound_streams(self): ...
+    @property
+    def open_inbound_streams(self): ...
+    @property
+    def inbound_flow_control_window(self): ...
+    def initiate_connection(self) -> None: ...
+    def initiate_upgrade_connection(self, settings_header: Incomplete | None = ...): ...
+    def get_next_available_stream_id(self): ...
+    def send_headers(
+        self,
+        stream_id,
+        headers,
+        end_stream: bool = ...,
+        priority_weight: Incomplete | None = ...,
+        priority_depends_on: Incomplete | None = ...,
+        priority_exclusive: Incomplete | None = ...,
+    ) -> None: ...
+    def send_data(
+        self, stream_id, data, end_stream: bool = ..., pad_length: Incomplete | None = ...
+    ) -> None: ...
+    def end_stream(self, stream_id) -> None: ...
+    def increment_flow_control_window(
+        self, increment, stream_id: Incomplete | None = ...
+    ) -> None: ...
+    def push_stream(self, stream_id, promised_stream_id, request_headers) -> None: ...
+    def ping(self, opaque_data) -> None: ...
+    def reset_stream(self, stream_id, error_code: int = ...) -> None: ...
+    def close_connection(
+        self,
+        error_code: int = ...,
+        additional_data: Incomplete | None = ...,
+        last_stream_id: Incomplete | None = ...,
+    ) -> None: ...
+    def update_settings(self, new_settings) -> None: ...
+    def advertise_alternative_service(
+        self, field_value, origin: Incomplete | None = ..., stream_id: Incomplete | None = ...
+    ) -> None: ...
+    def prioritize(
+        self,
+        stream_id,
+        weight: Incomplete | None = ...,
+        depends_on: Incomplete | None = ...,
+        exclusive: Incomplete | None = ...,
+    ) -> None: ...
+    def local_flow_control_window(self, stream_id): ...
+    def remote_flow_control_window(self, stream_id): ...
+    def acknowledge_received_data(self, acknowledged_size, stream_id) -> None: ...
+    def data_to_send(self, amount: Incomplete | None = ...): ...
+    def clear_outbound_data_buffer(self) -> None: ...
+    def receive_data(self, data): ...
diff --git a/test_runner/stubs/h2/errors.pyi b/test_runner/stubs/h2/errors.pyi
new file mode 100644
index 0000000000..b70c632f8c
--- /dev/null
+++ b/test_runner/stubs/h2/errors.pyi
@@ -0,0 +1,17 @@
+import enum
+
+class ErrorCodes(enum.IntEnum):
+    NO_ERROR: int
+    PROTOCOL_ERROR: int
+    INTERNAL_ERROR: int
+    FLOW_CONTROL_ERROR: int
+    SETTINGS_TIMEOUT: int
+    STREAM_CLOSED: int
+    FRAME_SIZE_ERROR: int
+    REFUSED_STREAM: int
+    CANCEL: int
+    COMPRESSION_ERROR: int
+    CONNECT_ERROR: int
+    ENHANCE_YOUR_CALM: int
+    INADEQUATE_SECURITY: int
+    HTTP_1_1_REQUIRED: int
diff --git a/test_runner/stubs/h2/events.pyi b/test_runner/stubs/h2/events.pyi
new file mode 100644
index 0000000000..75d0a9e53b
--- /dev/null
+++ b/test_runner/stubs/h2/events.pyi
@@ -0,0 +1,106 @@
+from _typeshed import Incomplete
+
+from .settings import ChangedSetting as ChangedSetting
+
+class Event: ...
+
+class RequestReceived(Event):
+    stream_id: Incomplete
+    headers: Incomplete
+    stream_ended: Incomplete
+    priority_updated: Incomplete
+    def __init__(self) -> None: ...
+
+class ResponseReceived(Event):
+    stream_id: Incomplete
+    headers: Incomplete
+    stream_ended: Incomplete
+    priority_updated: Incomplete
+    def __init__(self) -> None: ...
+
+class TrailersReceived(Event):
+    stream_id: Incomplete
+    headers: Incomplete
+    stream_ended: Incomplete
+    priority_updated: Incomplete
+    def __init__(self) -> None: ...
+
+class _HeadersSent(Event): ...
+class _ResponseSent(_HeadersSent): ...
+class _RequestSent(_HeadersSent): ...
+class _TrailersSent(_HeadersSent): ...
+class _PushedRequestSent(_HeadersSent): ...
+
+class InformationalResponseReceived(Event):
+    stream_id: Incomplete
+    headers: Incomplete
+    priority_updated: Incomplete
+    def __init__(self) -> None: ...
+
+class DataReceived(Event):
+    stream_id: Incomplete
+    data: Incomplete
+    flow_controlled_length: Incomplete
+    stream_ended: Incomplete
+    def __init__(self) -> None: ...
+
+class WindowUpdated(Event):
+    stream_id: Incomplete
+    delta: Incomplete
+    def __init__(self) -> None: ...
+
+class RemoteSettingsChanged(Event):
+    changed_settings: Incomplete
+    def __init__(self) -> None: ...
+    @classmethod
+    def from_settings(cls, old_settings, new_settings): ...
+
+class PingReceived(Event):
+    ping_data: Incomplete
+    def __init__(self) -> None: ...
+
+class PingAckReceived(Event):
+    ping_data: Incomplete
+    def __init__(self) -> None: ...
+
+class StreamEnded(Event):
+    stream_id: Incomplete
+    def __init__(self) -> None: ...
+
+class StreamReset(Event):
+    stream_id: Incomplete
+    error_code: Incomplete
+    remote_reset: bool
+    def __init__(self) -> None: ...
+
+class PushedStreamReceived(Event):
+    pushed_stream_id: Incomplete
+    parent_stream_id: Incomplete
+    headers: Incomplete
+    def __init__(self) -> None: ...
+
+class SettingsAcknowledged(Event):
+    changed_settings: Incomplete
+    def __init__(self) -> None: ...
+
+class PriorityUpdated(Event):
+    stream_id: Incomplete
+    weight: Incomplete
+    depends_on: Incomplete
+    exclusive: Incomplete
+    def __init__(self) -> None: ...
+
+class ConnectionTerminated(Event):
+    error_code: Incomplete
+    last_stream_id: Incomplete
+    additional_data: Incomplete
+    def __init__(self) -> None: ...
+
+class AlternativeServiceAvailable(Event):
+    origin: Incomplete
+    field_value: Incomplete
+    def __init__(self) -> None: ...
+
+class UnknownFrameReceived(Event):
+    frame: Incomplete
+    def __init__(self) -> None: ...
diff --git a/test_runner/stubs/h2/exceptions.pyi b/test_runner/stubs/h2/exceptions.pyi
new file mode 100644
index 0000000000..82019d5ec1
--- /dev/null
+++ b/test_runner/stubs/h2/exceptions.pyi
@@ -0,0 +1,48 @@
+from _typeshed import Incomplete
+
+class H2Error(Exception): ...
+
+class ProtocolError(H2Error):
+    error_code: Incomplete
+
+class FrameTooLargeError(ProtocolError):
+    error_code: Incomplete
+
+class FrameDataMissingError(ProtocolError):
+    error_code: Incomplete
+
+class TooManyStreamsError(ProtocolError): ...
+
+class FlowControlError(ProtocolError):
+    error_code: Incomplete
+
+class StreamIDTooLowError(ProtocolError):
+    stream_id: Incomplete
+    max_stream_id: Incomplete
+    def __init__(self, stream_id, max_stream_id) -> None: ...
+
+class NoAvailableStreamIDError(ProtocolError): ...
+
+class NoSuchStreamError(ProtocolError):
+    stream_id: Incomplete
+    def __init__(self, stream_id) -> None: ...
+
+class StreamClosedError(NoSuchStreamError):
+    stream_id: Incomplete
+    error_code: Incomplete
+    def __init__(self, stream_id) -> None: ...
+
+class InvalidSettingsValueError(ProtocolError, ValueError):
+    error_code: Incomplete
+    def __init__(self, msg, error_code) -> None: ...
+
+class InvalidBodyLengthError(ProtocolError):
+    expected_length: Incomplete
+    actual_length: Incomplete
+    def __init__(self, expected, actual) -> None: ...
+
+class UnsupportedFrameError(ProtocolError): ...
+class RFC1122Error(H2Error): ...
+
+class DenialOfServiceError(ProtocolError):
+    error_code: Incomplete
diff --git a/test_runner/stubs/h2/frame_buffer.pyi b/test_runner/stubs/h2/frame_buffer.pyi
new file mode 100644
index 0000000000..f47adab704
--- /dev/null
+++ b/test_runner/stubs/h2/frame_buffer.pyi
@@ -0,0 +1,19 @@
+from .exceptions import (
+    FrameDataMissingError as FrameDataMissingError,
+)
+from .exceptions import (
+    FrameTooLargeError as FrameTooLargeError,
+)
+from .exceptions import (
+    ProtocolError as ProtocolError,
+)
+
+CONTINUATION_BACKLOG: int
+
+class FrameBuffer:
+    data: bytes
+    max_frame_size: int
+    def __init__(self, server: bool = ...) -> None: ...
+    def add_data(self, data) -> None: ...
+    def __iter__(self): ...
+    def __next__(self): ...
diff --git a/test_runner/stubs/h2/settings.pyi b/test_runner/stubs/h2/settings.pyi
new file mode 100644
index 0000000000..a352abe53e
--- /dev/null
+++ b/test_runner/stubs/h2/settings.pyi
@@ -0,0 +1,61 @@
+import enum
+from collections.abc import MutableMapping
+from typing import Any
+
+from _typeshed import Incomplete
+from h2.errors import ErrorCodes as ErrorCodes
+from h2.exceptions import InvalidSettingsValueError as InvalidSettingsValueError
+
+class SettingCodes(enum.IntEnum):
+    HEADER_TABLE_SIZE: Incomplete
+    ENABLE_PUSH: Incomplete
+    MAX_CONCURRENT_STREAMS: Incomplete
+    INITIAL_WINDOW_SIZE: Incomplete
+    MAX_FRAME_SIZE: Incomplete
+    MAX_HEADER_LIST_SIZE: Incomplete
+    ENABLE_CONNECT_PROTOCOL: Incomplete
+
+class ChangedSetting:
+    setting: Incomplete
+    original_value: Incomplete
+    new_value: Incomplete
+    def __init__(self, setting, original_value, new_value) -> None: ...
+
+class Settings(MutableMapping[str, Any]):
+    def __init__(self, client: bool = ..., initial_values: Incomplete | None = ...) -> None: ...
+    def acknowledge(self): ...
+    @property
+    def header_table_size(self): ...
+    @header_table_size.setter
+    def header_table_size(self, value) -> None: ...
+    @property
+    def enable_push(self): ...
+    @enable_push.setter
+    def enable_push(self, value) -> None: ...
+    @property
+    def initial_window_size(self): ...
+    @initial_window_size.setter
+    def initial_window_size(self, value) -> None: ...
+    @property
+    def max_frame_size(self): ...
+    @max_frame_size.setter
+    def max_frame_size(self, value) -> None: ...
+    @property
+    def max_concurrent_streams(self): ...
+    @max_concurrent_streams.setter
+    def max_concurrent_streams(self, value) -> None: ...
+    @property
+    def max_header_list_size(self): ...
+    @max_header_list_size.setter
+    def max_header_list_size(self, value) -> None: ...
+    @property
+    def enable_connect_protocol(self): ...
+    @enable_connect_protocol.setter
+    def enable_connect_protocol(self, value) -> None: ...
+    def __getitem__(self, key): ...
+    def __setitem__(self, key, value) -> None: ...
+    def __delitem__(self, key) -> None: ...
+    def __iter__(self): ...
+    def __len__(self) -> int: ...
+    def __eq__(self, other): ...
+    def __ne__(self, other): ...
diff --git a/test_runner/stubs/h2/stream.pyi b/test_runner/stubs/h2/stream.pyi
new file mode 100644
index 0000000000..d52ab8e72b
--- /dev/null
+++ b/test_runner/stubs/h2/stream.pyi
@@ -0,0 +1,184 @@
+from enum import Enum, IntEnum
+
+from _typeshed import Incomplete
+
+from .errors import ErrorCodes as ErrorCodes
+from .events import (
+    AlternativeServiceAvailable as AlternativeServiceAvailable,
+)
+from .events import (
+    DataReceived as DataReceived,
+)
+from .events import (
+    InformationalResponseReceived as InformationalResponseReceived,
+)
+from .events import (
+    PushedStreamReceived as PushedStreamReceived,
+)
+from .events import (
+    RequestReceived as RequestReceived,
+)
+from .events import (
+    ResponseReceived as ResponseReceived,
+)
+from .events import (
+    StreamEnded as StreamEnded,
+)
+from .events import (
+    StreamReset as StreamReset,
+)
+from .events import (
+    TrailersReceived as TrailersReceived,
+)
+from .events import (
+    WindowUpdated as WindowUpdated,
+)
+from .exceptions import (
+    FlowControlError as FlowControlError,
+)
+from .exceptions import (
+    InvalidBodyLengthError as InvalidBodyLengthError,
+)
+from .exceptions import (
+    ProtocolError as ProtocolError,
+)
+from .exceptions import (
+    StreamClosedError as StreamClosedError,
+)
+from .utilities import (
+    HeaderValidationFlags as HeaderValidationFlags,
+)
+from .utilities import (
+    authority_from_headers as authority_from_headers,
+)
+from .utilities import (
+    extract_method_header as extract_method_header,
+)
+from .utilities import (
+    guard_increment_window as guard_increment_window,
+)
+from .utilities import (
+    is_informational_response as is_informational_response,
+)
+from .utilities import (
+    normalize_inbound_headers as normalize_inbound_headers,
+)
+from .utilities import (
+    normalize_outbound_headers as normalize_outbound_headers,
+)
+from .utilities import (
+    validate_headers as validate_headers,
+)
+from .utilities import (
+    validate_outbound_headers as validate_outbound_headers,
+)
+from .windows import WindowManager as WindowManager
+
+class StreamState(IntEnum):
+    IDLE: int
+    RESERVED_REMOTE: int
+    RESERVED_LOCAL: int
+    OPEN: int
+    HALF_CLOSED_REMOTE: int
+    HALF_CLOSED_LOCAL: int
+    CLOSED: int
+
+class StreamInputs(Enum):
+    SEND_HEADERS: int
+    SEND_PUSH_PROMISE: int
+    SEND_RST_STREAM: int
+    SEND_DATA: int
+    SEND_WINDOW_UPDATE: int
+    SEND_END_STREAM: int
+    RECV_HEADERS: int
+    RECV_PUSH_PROMISE: int
+    RECV_RST_STREAM: int
+    RECV_DATA: int
+    RECV_WINDOW_UPDATE: int
+    RECV_END_STREAM: int
+    RECV_CONTINUATION: int
+    SEND_INFORMATIONAL_HEADERS: int
+    RECV_INFORMATIONAL_HEADERS: int
+    SEND_ALTERNATIVE_SERVICE: int
+    RECV_ALTERNATIVE_SERVICE: int
+    UPGRADE_CLIENT: int
+    UPGRADE_SERVER: int
+
+class StreamClosedBy(Enum):
+    SEND_END_STREAM: int
+    RECV_END_STREAM: int
+    SEND_RST_STREAM: int
+    RECV_RST_STREAM: int
+
+STREAM_OPEN: Incomplete
+
+class H2StreamStateMachine:
+    state: Incomplete
+    stream_id: Incomplete
+    client: Incomplete
+    headers_sent: Incomplete
+    trailers_sent: Incomplete
+    headers_received: Incomplete
+    trailers_received: Incomplete
+    stream_closed_by: Incomplete
+    def __init__(self, stream_id) -> None: ...
+    def process_input(self, input_): ...
+    def request_sent(self, previous_state): ...
+    def response_sent(self, previous_state): ...
+    def request_received(self, previous_state): ...
+    def response_received(self, previous_state): ...
+    def data_received(self, previous_state): ...
+    def window_updated(self, previous_state): ...
+    def stream_half_closed(self, previous_state): ...
+    def stream_ended(self, previous_state): ...
+    def stream_reset(self, previous_state): ...
+    def send_new_pushed_stream(self, previous_state): ...
+    def recv_new_pushed_stream(self, previous_state): ...
+    def send_push_promise(self, previous_state): ...
+    def recv_push_promise(self, previous_state): ...
+    def send_end_stream(self, previous_state) -> None: ...
+    def send_reset_stream(self, previous_state) -> None: ...
+    def reset_stream_on_error(self, previous_state) -> None: ...
+    def recv_on_closed_stream(self, previous_state) -> None: ...
+    def send_on_closed_stream(self, previous_state) -> None: ...
+    def recv_push_on_closed_stream(self, previous_state) -> None: ...
+    def send_push_on_closed_stream(self, previous_state) -> None: ...
+    def send_informational_response(self, previous_state): ...
+    def recv_informational_response(self, previous_state): ...
+    def recv_alt_svc(self, previous_state): ...
+    def send_alt_svc(self, previous_state) -> None: ...
+
+class H2Stream:
+    state_machine: Incomplete
+    stream_id: Incomplete
+    max_outbound_frame_size: Incomplete
+    request_method: Incomplete
+    outbound_flow_control_window: Incomplete
+    config: Incomplete
+    def __init__(self, stream_id, config, inbound_window_size, outbound_window_size) -> None: ...
+    @property
+    def inbound_flow_control_window(self): ...
+    @property
+    def open(self): ...
+    @property
+    def closed(self): ...
+    @property
+    def closed_by(self): ...
+    def upgrade(self, client_side) -> None: ...
+    def send_headers(self, headers, encoder, end_stream: bool = ...): ...
+    def push_stream_in_band(self, related_stream_id, headers, encoder): ...
+    def locally_pushed(self): ...
+    def send_data(self, data, end_stream: bool = ..., pad_length: Incomplete | None = ...): ...
+    def end_stream(self): ...
+    def advertise_alternative_service(self, field_value): ...
+    def increase_flow_control_window(self, increment): ...
+    def receive_push_promise_in_band(self, promised_stream_id, headers, header_encoding): ...
+    def remotely_pushed(self, pushed_headers): ...
+    def receive_headers(self, headers, end_stream, header_encoding): ...
+    def receive_data(self, data, end_stream, flow_control_len): ...
+    def receive_window_update(self, increment): ...
+    def receive_continuation(self) -> None: ...
+    def receive_alt_svc(self, frame): ...
+    def reset_stream(self, error_code: int = ...): ...
+    def stream_reset(self, frame): ...
+    def acknowledge_received_data(self, acknowledged_size): ...
diff --git a/test_runner/stubs/h2/utilities.pyi b/test_runner/stubs/h2/utilities.pyi
new file mode 100644
index 0000000000..e0a8d55d1d
--- /dev/null
+++ b/test_runner/stubs/h2/utilities.pyi
@@ -0,0 +1,25 @@
+from typing import NamedTuple
+
+from _typeshed import Incomplete
+
+from .exceptions import FlowControlError as FlowControlError
+from .exceptions import ProtocolError as ProtocolError
+
+UPPER_RE: Incomplete
+CONNECTION_HEADERS: Incomplete
+
+def extract_method_header(headers): ...
+def is_informational_response(headers): ...
+def guard_increment_window(current, increment): ...
+def authority_from_headers(headers): ...
+
+class HeaderValidationFlags(NamedTuple):
+    is_client: Incomplete
+    is_trailer: Incomplete
+    is_response_header: Incomplete
+    is_push_promise: Incomplete
+
+def validate_headers(headers, hdr_validation_flags): ...
+def normalize_outbound_headers(headers, hdr_validation_flags): ...
+def normalize_inbound_headers(headers, hdr_validation_flags): ...
+def validate_outbound_headers(headers, hdr_validation_flags): ...
diff --git a/test_runner/stubs/h2/windows.pyi b/test_runner/stubs/h2/windows.pyi
new file mode 100644
index 0000000000..7dc78e431c
--- /dev/null
+++ b/test_runner/stubs/h2/windows.pyi
@@ -0,0 +1,13 @@
+from _typeshed import Incomplete
+
+from .exceptions import FlowControlError as FlowControlError
+
+LARGEST_FLOW_CONTROL_WINDOW: Incomplete
+
+class WindowManager:
+    max_window_size: Incomplete
+    current_window_size: Incomplete
+    def __init__(self, max_window_size) -> None: ...
+    def window_consumed(self, size) -> None: ...
+    def window_opened(self, size) -> None: ...
+    def process_bytes(self, size): ...