lint

SK: re-elect leader when backup lag is high (#781 )
We observe that the offloader fails to upload a segment due to race conditions on XLOG SWITCH and PG start streaming WALs. wal_backup task continously failing to upload a full segment while the segment remains partial on the disk. The consequence is that commit_lsn for all SKs move forward but backup_lsn stays the same. Then, all SKs run out of disk space. See go/sk-ood-xlog-switch for more details. To mitigate this issue, we will re-elect a new offloader if the current offloader is lagging behind too much. Each SK makes the decision locally but they are aware of each other's commit and backup lsns. The new algorithm is - determine_offloader will pick a SK. say SK-1. - Each SK checks -- if commit_lsn - back_lsn > threshold, -- -- remove SK-1 from the candidate and call determine_offloader again. SK-1 will step down and all SKs will elect the same leader again. After the backup is caught up, the leader will become SK-1 again. This also helps when SK-1 is slow to backup. I'll set the reelect backup lag to 4 GB later. Setting to 128 MB in dev to trigger the code more frequently. DEV. (cherry picked from commit 7286f79f9536380d321e2442318bd8a631269499)
2026-05-22 23:50:39 +00:00 · 2025-07-02 13:44:13 +01:00 · 2025-07-02 08:32:45 +01:00
115 changed files with 1552 additions and 3835 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -4,7 +4,6 @@
 !Cargo.lock
 !Cargo.toml
 !Makefile
-!postgres.mk
 !rust-toolchain.toml
 !scripts/ninstall.sh
 !docker-compose/run-tests.sh
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -94,6 +94,11 @@ jobs:
        run: |
          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)

+      - name: Get postgres headers ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-headers-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
      - name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
        with:
@@ -135,12 +140,6 @@ jobs:
          name: pg_install--v17
          path: pg_install/v17

-      # `actions/download-artifact` doesn't preserve permissions:
-      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
-      - name: Make pg_install/v*/bin/* executable
-        run: |
-          chmod +x pg_install/v*/bin/*
-
      - name: Cache walproposer-lib
        id: cache_walproposer_lib
        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -168,7 +167,7 @@ jobs:
      - name: Build walproposer-lib (only for v17)
        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
        run:
-          make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
+          make walproposer-lib -j$(sysctl -n hw.ncpu)

      - name: Upload "build/walproposer-lib" artifact
        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -69,7 +69,7 @@ jobs:
          submodules: true

      - name: Check for file changes
-        uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36  # v3.0.2
+        uses: step-security/paths-filter@v3
        id: files-changed
        with:
          token: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -153,7 +153,7 @@ jobs:
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"

    - name: Benchmark database maintenance
-      if: ${{ matrix.test_maintenance }}
+      if: ${{ matrix.test_maintenance == 'true' }}
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -53,7 +53,7 @@ jobs:
          submodules: true

      - name: Check for Postgres changes
-        uses: dorny/paths-filter@1441771bbfdd59dcd748680ee64ebd8faab1a242  #v3
+        uses: step-security/paths-filter@v3
        id: files_changed
        with:
          token: ${{ github.token }}
--- a/.github/workflows/pre-merge-checks.yml
+++ b/.github/workflows/pre-merge-checks.yml
@@ -34,7 +34,7 @@ jobs:

      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

-      - uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
+      - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
        id: python-src
        with:
          files: |
@@ -45,7 +45,7 @@ jobs:
            poetry.lock
            pyproject.toml

-      - uses: tj-actions/changed-files@ed68ef82c095e0d48ec87eccea555d944a631a4c # v46.0.5
+      - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1
        id: rust-src
        with:
          files: |
--- a/.github/workflows/proxy-benchmark.yml
+++ b/.github/workflows/proxy-benchmark.yml
@@ -60,23 +60,22 @@ jobs:
        } >> "$GITHUB_ENV"

    - name: Run proxy-bench
-      run: ${PROXY_BENCH_PATH}/run.sh
+      run: ./${PROXY_BENCH_PATH}/run.sh

    - name: Ingest Bench Results # neon repo script
-      if: always()
+      if: success()
      run: |
        mkdir -p $TEST_OUTPUT
        python $NEON_DIR/scripts/proxy_bench_results_ingest.py --out $TEST_OUTPUT

    - name: Push Metrics to Proxy perf database
-      if: always()
+      if: success()
      env:
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PROXY_TEST_RESULT_CONNSTR }}"
        REPORT_FROM: $TEST_OUTPUT
      run: $NEON_DIR/scripts/generate_and_push_perf_report.sh

    - name: Docker cleanup
-      if: always()
      run: docker compose down

    - name: Notify Failure
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1279,7 +1279,6 @@ dependencies = [
 "remote_storage",
 "serde",
 "serde_json",
- "url",
 "utils",
 ]

@@ -1317,7 +1316,6 @@ dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
 "p256 0.13.2",
- "pageserver_page_api",
 "postgres",
 "postgres_initdb",
 "postgres_versioninfo",
@@ -1337,7 +1335,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-stream",
 "tokio-util",
- "tonic 0.13.1",
 "tower 0.5.2",
 "tower-http",
 "tower-otel",
@@ -4411,7 +4408,6 @@ dependencies = [
 "postgres_backend",
 "postgres_ffi_types",
 "postgres_versioninfo",
- "posthog_client_lite",
 "rand 0.8.5",
 "remote_storage",
 "reqwest",
@@ -4422,7 +4418,6 @@ dependencies = [
 "strum",
 "strum_macros",
 "thiserror 1.0.69",
- "tracing",
 "tracing-utils",
 "utils",
 ]
@@ -4479,14 +4474,12 @@ dependencies = [
 "bytes",
 "futures",
 "pageserver_api",
- "postgres_ffi_types",
+ "postgres_ffi",
 "prost 0.13.5",
- "prost-types 0.13.5",
 "strum",
 "strum_macros",
 "thiserror 1.0.69",
 "tokio",
- "tokio-util",
 "tonic 0.13.1",
 "tonic-build",
 "utils",
@@ -5159,7 +5152,7 @@ dependencies = [
 "petgraph",
 "prettyplease",
 "prost 0.13.5",
- "prost-types 0.13.5",
+ "prost-types 0.13.3",
 "regex",
 "syn 2.0.100",
 "tempfile",
@@ -5202,9 +5195,9 @@ dependencies = [

 [[package]]
 name = "prost-types"
-version = "0.13.5"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
+checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
 dependencies = [
 "prost 0.13.5",
 ]
@@ -6811,7 +6804,6 @@ dependencies = [
 "chrono",
 "clap",
 "clashmap",
- "compute_api",
 "control_plane",
 "cron",
 "diesel",
@@ -7645,7 +7637,7 @@ dependencies = [
 "prettyplease",
 "proc-macro2",
 "prost-build 0.13.3",
- "prost-types 0.13.5",
+ "prost-types 0.13.3",
 "quote",
 "syn 2.0.100",
 ]
@@ -7657,7 +7649,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9687bd5bfeafebdded2356950f278bba8226f0b32109537c4253406e09aafe1"
 dependencies = [
 "prost 0.13.5",
- "prost-types 0.13.5",
+ "prost-types 0.13.3",
 "tokio",
 "tokio-stream",
 "tonic 0.13.1",
@@ -8686,6 +8678,7 @@ dependencies = [
 "num-iter",
 "num-rational",
 "num-traits",
+ "once_cell",
 "p256 0.13.2",
 "parquet",
 "prettyplease",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -152,7 +152,6 @@ pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointe
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13.5"
-prost-types = "0.13.5"
 rand = "0.8"
 redis = { version = "0.29.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
@@ -200,7 +199,7 @@ tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
 tokio-stream = "0.1"
 tokio-tar = "0.3"
-tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
+tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
 tonic = { version = "0.13.1", default-features = false, features = ["channel", "codegen", "gzip", "prost", "router", "server", "tls-ring", "tls-native-roots", "zstd"] }
--- a/1
+++ b/1
@@ -40,7 +40,6 @@ COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
 COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
 COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
-COPY --chown=nonroot postgres.mk postgres.mk
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh

 ENV BUILD_TYPE=release
--- a/129
+++ b/129
@@ -4,14 +4,11 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 # managers.
 POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/

-# Supported PostgreSQL versions
-POSTGRES_VERSIONS = v17 v16 v15 v14
-
 # CARGO_BUILD_FLAGS: Extra flags to pass to `cargo build`. `--locked`
 # and `--features testing` are popular examples.
 #
-# CARGO_PROFILE: Set to override the cargo profile to use. By default,
-# it is derived from BUILD_TYPE.
+# CARGO_PROFILE: You can also set to override the cargo profile to
+# use. By default, it is derived from BUILD_TYPE.

 # All intermediate build artifacts are stored here.
 BUILD_DIR := build
@@ -98,24 +95,91 @@ CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
 # Top level Makefile to build Neon and PostgreSQL
 #
 .PHONY: all
-all: neon postgres-install neon-pg-ext
+all: neon postgres neon-pg-ext

 ### Neon Rust bits
 #
 # The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: neon
-neon: postgres-headers-install walproposer-lib cargo-target-dir
+neon: postgres-headers walproposer-lib cargo-target-dir
 	+@echo "Compiling Neon"
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE)
-
 .PHONY: cargo-target-dir
 cargo-target-dir:
 	# https://github.com/rust-lang/cargo/issues/14281
 	mkdir -p target
 	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG

+### PostgreSQL parts
+# Some rules are duplicated for Postgres v14 and 15. We may want to refactor
+# to avoid the duplication in the future, but it's tolerable for now.
+#
+$(BUILD_DIR)/%/config.status:
+	mkdir -p $(BUILD_DIR)
+	test -e $(BUILD_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(BUILD_DIR)/CACHEDIR.TAG
+
+	+@echo "Configuring Postgres $* build"
+	@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
+		echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
+		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
+		exit 1; }
+	mkdir -p $(BUILD_DIR)/$*
+
+	VERSION=$*; \
+	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
+	(cd $(BUILD_DIR)/$$VERSION && \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
+		CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
+		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
+
+# nicer alias to run 'configure'
+# Note: I've been unable to use templates for this part of our configuration.
+# I'm not sure why it wouldn't work, but this is the only place (apart from
+# the "build-all-versions" entry points) where direct mention of PostgreSQL
+# versions is used.
+.PHONY: postgres-configure-v17
+postgres-configure-v17: $(BUILD_DIR)/v17/config.status
+.PHONY: postgres-configure-v16
+postgres-configure-v16: $(BUILD_DIR)/v16/config.status
+.PHONY: postgres-configure-v15
+postgres-configure-v15: $(BUILD_DIR)/v15/config.status
+.PHONY: postgres-configure-v14
+postgres-configure-v14: $(BUILD_DIR)/v14/config.status
+
+# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
+.PHONY: postgres-headers-%
+postgres-headers-%: postgres-configure-%
+	+@echo "Installing PostgreSQL $* headers"
+	$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install
+
+# Compile and install PostgreSQL
+.PHONY: postgres-%
+postgres-%: postgres-configure-% \
+		  postgres-headers-% # to prevent `make install` conflicts with neon's `postgres-headers`
+	+@echo "Compiling PostgreSQL $*"
+	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install
+	+@echo "Compiling pg_prewarm $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install
+	+@echo "Compiling pg_buffercache $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install
+	+@echo "Compiling pg_visibility $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install
+	+@echo "Compiling pageinspect $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install
+	+@echo "Compiling pg_trgm $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install
+	+@echo "Compiling amcheck $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install
+	+@echo "Compiling test_decoding $*"
+	$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install
+
+.PHONY: postgres-check-%
+postgres-check-%: postgres-%
+	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check
+
 .PHONY: neon-pg-ext-%
-neon-pg-ext-%: postgres-install-%
+neon-pg-ext-%: postgres-%
 	+@echo "Compiling neon-specific Postgres extensions for $*"
 	mkdir -p $(BUILD_DIR)/pgxn-$*
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
@@ -154,14 +218,39 @@ ifeq ($(UNAME_S),Linux)
 		pg_crc32c.o
 endif

-# Shorthand to call neon-pg-ext-% target for all Postgres versions
 .PHONY: neon-pg-ext
-neon-pg-ext: $(foreach pg_version,$(POSTGRES_VERSIONS),neon-pg-ext-$(pg_version))
+neon-pg-ext: \
+	neon-pg-ext-v14 \
+	neon-pg-ext-v15 \
+	neon-pg-ext-v16 \
+	neon-pg-ext-v17
+
+# shorthand to build all Postgres versions
+.PHONY: postgres
+postgres: \
+	postgres-v14 \
+	postgres-v15 \
+	postgres-v16 \
+	postgres-v17
+
+.PHONY: postgres-headers
+postgres-headers: \
+	postgres-headers-v14 \
+	postgres-headers-v15 \
+	postgres-headers-v16 \
+	postgres-headers-v17
+
+.PHONY: postgres-check
+postgres-check: \
+	postgres-check-v14 \
+	postgres-check-v15 \
+	postgres-check-v16 \
+	postgres-check-v17

 # This removes everything
 .PHONY: distclean
 distclean:
-	$(RM) -r $(POSTGRES_INSTALL_DIR) $(BUILD_DIR)
+	$(RM) -r $(POSTGRES_INSTALL_DIR)
 	$(CARGO_CMD_PREFIX) cargo clean

 .PHONY: fmt
@@ -209,19 +298,3 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 .PHONY: setup-pre-commit-hook
 setup-pre-commit-hook:
 	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit
-
-# Targets for building PostgreSQL are defined in postgres.mk.
-#
-# But if the caller has indicated that PostgreSQL is already
-# installed, by setting the PG_INSTALL_CACHED variable, skip it.
-ifdef PG_INSTALL_CACHED
-postgres-install: skip-install
-$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-install-$(pg_version)): skip-install
-postgres-headers-install:
-	+@echo "Skipping installation of PostgreSQL headers because PG_INSTALL_CACHED is set"
-skip-install:
-	+@echo "Skipping PostgreSQL installation because PG_INSTALL_CACHED is set"
-
-else
-include postgres.mk
-endif
--- a/build-tools.Dockerfile
+++ b/build-tools.Dockerfile
@@ -165,7 +165,6 @@ RUN curl -fsSL \
    && rm sql_exporter.tar.gz

 # protobuf-compiler (protoc)
-# Keep the version the same as in compute/compute-node.Dockerfile
 ENV PROTOC_VERSION=25.1
 RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
    && unzip -q protoc.zip -d protoc \
@@ -180,7 +179,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
    && mv s5cmd /usr/local/bin/s5cmd

 # LLVM
-ENV LLVM_VERSION=20
+ENV LLVM_VERSION=19
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
    && echo "deb http://apt.llvm.org/${DEBIAN_VERSION}/ llvm-toolchain-${DEBIAN_VERSION}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
    && apt update \
@@ -293,7 +292,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.88.0
+ENV RUSTC_VERSION=1.87.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -115,9 +115,6 @@ ARG EXTENSIONS=all
 FROM $BASE_IMAGE_SHA AS build-deps
 ARG DEBIAN_VERSION

-# Keep in sync with build-tools.Dockerfile
-ENV PROTOC_VERSION=25.1
-
 # Use strict mode for bash to catch errors early
 SHELL ["/bin/bash", "-euo", "pipefail", "-c"]

@@ -152,14 +149,8 @@ RUN case $DEBIAN_VERSION in \
    libclang-dev \
    jsonnet \
    $VERSION_INSTALLS \
-    && apt clean && rm -rf /var/lib/apt/lists/* \
-    && useradd -ms /bin/bash nonroot -b /home \
-    # Install protoc from binary release, since Debian's versions are too old.
-    && curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
-    && unzip -q protoc.zip -d protoc \
-    && mv protoc/bin/protoc /usr/local/bin/protoc \
-    && mv protoc/include/google /usr/local/include/google \
-    && rm -rf protoc.zip protoc
+    && apt clean && rm -rf /var/lib/apt/lists/* && \
+    useradd -ms /bin/bash nonroot -b /home

 #########################################################################################
 #
@@ -1179,7 +1170,7 @@ COPY --from=pgrag-src /ext-src/ /ext-src/
 # Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
 WORKDIR /ext-src/onnxruntime-src
 RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
-    python3 python3-pip python3-venv && \
+    python3 python3-pip python3-venv protobuf-compiler && \
    apt clean && rm -rf /var/lib/apt/lists/* && \
    python3 -m venv venv && \
    . venv/bin/activate && \
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -38,7 +38,6 @@ once_cell.workspace = true
 opentelemetry.workspace = true
 opentelemetry_sdk.workspace = true
 p256 = { version = "0.13", features = ["pem"] }
-pageserver_page_api.workspace = true
 postgres.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["json"] }
@@ -54,7 +53,6 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
-tonic.workspace = true
 tower-otel.workspace = true
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -36,8 +36,6 @@
 use std::ffi::OsString;
 use std::fs::File;
 use std::process::exit;
-use std::sync::Arc;
-use std::sync::atomic::AtomicU64;
 use std::sync::mpsc;
 use std::thread;
 use std::time::Duration;
@@ -192,9 +190,7 @@ fn main() -> Result<()> {
            cgroup: cli.cgroup,
            #[cfg(target_os = "linux")]
            vm_monitor_addr: cli.vm_monitor_addr,
-            installed_extensions_collection_interval: Arc::new(AtomicU64::new(
-                cli.installed_extensions_collection_interval,
-            )),
+            installed_extensions_collection_interval: cli.installed_extensions_collection_interval,
        },
        config,
    )?;
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -6,7 +6,7 @@ use compute_api::responses::{
    LfcPrewarmState, TlsConfig,
 };
 use compute_api::spec::{
-    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
+    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent,
 };
 use futures::StreamExt;
 use futures::future::join_all;
@@ -15,17 +15,17 @@ use itertools::Itertools;
 use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use once_cell::sync::Lazy;
-use pageserver_page_api::{self as page_api, BaseBackupCompression};
 use postgres;
 use postgres::NoTls;
 use postgres::error::SqlState;
 use remote_storage::{DownloadError, RemotePath};
 use std::collections::{HashMap, HashSet};
+use std::net::SocketAddr;
 use std::os::unix::fs::{PermissionsExt, symlink};
 use std::path::Path;
 use std::process::{Command, Stdio};
 use std::str::FromStr;
-use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
+use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
@@ -36,7 +36,6 @@ use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;
 use utils::measured_stream::MeasuredReader;
 use utils::pid_file;
-use utils::shard::{ShardCount, ShardIndex, ShardNumber};

 use crate::configurator::launch_configurator;
 use crate::disk_quota::set_disk_quota;
@@ -70,7 +69,6 @@ pub static BUILD_TAG: Lazy<String> = Lazy::new(|| {
        .unwrap_or(BUILD_TAG_DEFAULT)
        .to_string()
 });
-const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;

 /// Static configuration params that don't change after startup. These mostly
 /// come from the CLI args, or are derived from them.
@@ -104,7 +102,7 @@ pub struct ComputeNodeParams {
    pub remote_ext_base_url: Option<Url>,

    /// Interval for installed extensions collection
-    pub installed_extensions_collection_interval: Arc<AtomicU64>,
+    pub installed_extensions_collection_interval: u64,
 }

 /// Compute node info shared across several `compute_ctl` threads.
@@ -127,9 +125,6 @@ pub struct ComputeNode {
    // key: ext_archive_name, value: started download time, download_completed?
    pub ext_download_progress: RwLock<HashMap<String, (DateTime<Utc>, bool)>>,
    pub compute_ctl_config: ComputeCtlConfig,
-
-    /// Handle to the extension stats collection task
-    extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
 }

 // store some metrics about download size that might impact startup time
@@ -223,8 +218,7 @@ pub struct ParsedSpec {
    pub pageserver_connstr: String,
    pub safekeeper_connstrings: Vec<String>,
    pub storage_auth_token: Option<String>,
-    /// k8s dns name and port
-    pub endpoint_storage_addr: Option<String>,
+    pub endpoint_storage_addr: Option<SocketAddr>,
    pub endpoint_storage_token: Option<String>,
 }

@@ -319,10 +313,13 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
                .or(Err("invalid timeline id"))?
        };

-        let endpoint_storage_addr: Option<String> = spec
+        let endpoint_storage_addr: Option<SocketAddr> = spec
            .endpoint_storage_addr
            .clone()
-            .or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"));
+            .or_else(|| spec.cluster.settings.find("neon.endpoint_storage_addr"))
+            .unwrap_or_default()
+            .parse()
+            .ok();
        let endpoint_storage_token = spec
            .endpoint_storage_token
            .clone()
@@ -432,7 +429,6 @@ impl ComputeNode {
            state_changed: Condvar::new(),
            ext_download_progress: RwLock::new(HashMap::new()),
            compute_ctl_config: config.compute_ctl_config,
-            extension_stats_task: Mutex::new(None),
        })
    }

@@ -520,9 +516,6 @@ impl ComputeNode {
            None
        };

-        // Terminate the extension stats collection task
-        this.terminate_extension_stats_task();
-
        // Terminate the vm_monitor so it releases the file watcher on
        // /sys/fs/cgroup/neon-postgres.
        // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -1005,80 +998,13 @@ impl ComputeNode {
        Ok(())
    }

-    /// Fetches a basebackup from the Pageserver using the compute state's Pageserver connstring and
-    /// unarchives it to `pgdata` directory, replacing any existing contents.
+    // Get basebackup from the libpq connection to pageserver using `connstr` and
+    // unarchive it to `pgdata` directory overriding all its previous content.
    #[instrument(skip_all, fields(%lsn))]
    fn try_get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
+        let start_time = Instant::now();

-        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
-        let started = Instant::now();
-
-        let (connected, size) = match PageserverProtocol::from_connstring(shard0_connstr)? {
-            PageserverProtocol::Libpq => self.try_get_basebackup_libpq(spec, lsn)?,
-            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
-        };
-
-        let mut state = self.state.lock().unwrap();
-        state.metrics.pageserver_connect_micros =
-            connected.duration_since(started).as_micros() as u64;
-        state.metrics.basebackup_bytes = size as u64;
-        state.metrics.basebackup_ms = started.elapsed().as_millis() as u64;
-
-        Ok(())
-    }
-
-    /// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
-    /// the connection was established, and the (compressed) size of the basebackup.
-    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
-        let shard0_connstr = spec
-            .pageserver_connstr
-            .split(',')
-            .next()
-            .unwrap()
-            .to_string();
-        let shard_index = match spec.pageserver_connstr.split(',').count() as u8 {
-            0 | 1 => ShardIndex::unsharded(),
-            count => ShardIndex::new(ShardNumber(0), ShardCount(count)),
-        };
-
-        let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
-            let mut client = page_api::Client::new(
-                shard0_connstr,
-                spec.tenant_id,
-                spec.timeline_id,
-                shard_index,
-                spec.storage_auth_token.clone(),
-                None, // NB: base backups use payload compression
-            )
-            .await?;
-            let connected = Instant::now();
-            let reader = client
-                .get_base_backup(page_api::GetBaseBackupRequest {
-                    lsn: (lsn != Lsn(0)).then_some(lsn),
-                    compression: BaseBackupCompression::Gzip,
-                    replica: spec.spec.mode != ComputeMode::Primary,
-                    full: false,
-                })
-                .await?;
-            anyhow::Ok((reader, connected))
-        })?;
-
-        let mut reader = MeasuredReader::new(tokio_util::io::SyncIoBridge::new(reader));
-
-        // Set `ignore_zeros` so that unpack() reads the entire stream and doesn't just stop at the
-        // end-of-archive marker. If the server errors, the tar::Builder drop handler will write an
-        // end-of-archive marker before the error is emitted, and we would not see the error.
-        let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut reader));
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.params.pgdata)?;
-
-        Ok((connected, reader.get_byte_count()))
-    }
-
-    /// Fetches a basebackup via libpq. The connstring must use postgresql://. Returns the timestamp
-    /// when the connection was established, and the (compressed) size of the basebackup.
-    fn try_get_basebackup_libpq(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
        let shard0_connstr = spec.pageserver_connstr.split(',').next().unwrap();
        let mut config = postgres::Config::from_str(shard0_connstr)?;

@@ -1092,14 +1018,16 @@ impl ComputeNode {
        }

        config.application_name("compute_ctl");
-        config.options(&format!(
-            "-c neon.compute_mode={}",
-            spec.spec.mode.to_type_str()
-        ));
+        if let Some(spec) = &compute_state.pspec {
+            config.options(&format!(
+                "-c neon.compute_mode={}",
+                spec.spec.mode.to_type_str()
+            ));
+        }

        // Connect to pageserver
        let mut client = config.connect(NoTls)?;
-        let connected = Instant::now();
+        let pageserver_connect_micros = start_time.elapsed().as_micros() as u64;

        let basebackup_cmd = match lsn {
            Lsn(0) => {
@@ -1136,13 +1064,16 @@ impl ComputeNode {
        // Set `ignore_zeros` so that unpack() reads all the Copy data and
        // doesn't stop at the end-of-archive marker. Otherwise, if the server
        // sends an Error after finishing the tarball, we will not notice it.
-        // The tar::Builder drop handler will write an end-of-archive marker
-        // before emitting the error, and we would not see it otherwise.
        let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
        ar.set_ignore_zeros(true);
        ar.unpack(&self.params.pgdata)?;

-        Ok((connected, measured_reader.get_byte_count()))
+        // Report metrics
+        let mut state = self.state.lock().unwrap();
+        state.metrics.pageserver_connect_micros = pageserver_connect_micros;
+        state.metrics.basebackup_bytes = measured_reader.get_byte_count() as u64;
+        state.metrics.basebackup_ms = start_time.elapsed().as_millis() as u64;
+        Ok(())
    }

    // Gets the basebackup in a retry loop
@@ -1679,8 +1610,6 @@ impl ComputeNode {
            tls_config = self.compute_ctl_config.tls.clone();
        }

-        self.update_installed_extensions_collection_interval(&spec);
-
        let max_concurrent_connections = self.max_service_connections(compute_state, &spec);

        // Merge-apply spec & changes to PostgreSQL state.
@@ -1745,8 +1674,6 @@ impl ComputeNode {

        let tls_config = self.tls_config(&spec);

-        self.update_installed_extensions_collection_interval(&spec);
-
        if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings {
            info!("tuning pgbouncer");

@@ -2351,20 +2278,10 @@ LIMIT 100",
    }

    pub fn spawn_extension_stats_task(&self) {
-        // Cancel any existing task
-        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
-            handle.abort();
-        }
-
        let conf = self.tokio_conn_conf.clone();
-        let atomic_interval = self.params.installed_extensions_collection_interval.clone();
-        let mut installed_extensions_collection_interval =
-            2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst);
-        info!(
-            "[NEON_EXT_SPAWN] Spawning background installed extensions worker with Timeout: {}",
-            installed_extensions_collection_interval
-        );
-        let handle = tokio::spawn(async move {
+        let installed_extensions_collection_interval =
+            self.params.installed_extensions_collection_interval;
+        tokio::spawn(async move {
            // An initial sleep is added to ensure that two collections don't happen at the same time.
            // The first collection happens during compute startup.
            tokio::time::sleep(tokio::time::Duration::from_secs(
@@ -2377,48 +2294,8 @@ LIMIT 100",
            loop {
                interval.tick().await;
                let _ = installed_extensions(conf.clone()).await;
-                // Acquire a read lock on the compute spec and then update the interval if necessary
-                interval = tokio::time::interval(tokio::time::Duration::from_secs(std::cmp::max(
-                    installed_extensions_collection_interval,
-                    2 * atomic_interval.load(std::sync::atomic::Ordering::SeqCst),
-                )));
-                installed_extensions_collection_interval = interval.period().as_secs();
            }
        });
-
-        // Store the new task handle
-        *self.extension_stats_task.lock().unwrap() = Some(handle);
-    }
-
-    fn terminate_extension_stats_task(&self) {
-        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
-            handle.abort();
-        }
-    }
-
-    fn update_installed_extensions_collection_interval(&self, spec: &ComputeSpec) {
-        // Update the interval for collecting installed extensions statistics
-        // If the value is -1, we never suspend so set the value to default collection.
-        // If the value is 0, it means default, we will just continue to use the default.
-        if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
-            info!(
-                "[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
-                spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
-            );
-            self.params.installed_extensions_collection_interval.store(
-                DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
-                std::sync::atomic::Ordering::SeqCst,
-            );
-        } else {
-            info!(
-                "[NEON_EXT_INT_UPD] Spec Timeout: {}",
-                spec.suspend_timeout_seconds
-            );
-            self.params.installed_extensions_collection_interval.store(
-                spec.suspend_timeout_seconds as u64,
-                std::sync::atomic::Ordering::SeqCst,
-            );
-        }
    }
 }

--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -4,9 +4,7 @@ use std::thread;
 use std::time::{Duration, SystemTime};

 use anyhow::{Result, bail};
-use compute_api::spec::{ComputeMode, PageserverProtocol};
-use itertools::Itertools as _;
-use pageserver_page_api as page_api;
+use compute_api::spec::ComputeMode;
 use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
 use utils::id::{TenantId, TimelineId};
@@ -78,17 +76,25 @@ fn acquire_lsn_lease_with_retry(

    loop {
        // Note: List of pageservers is dynamic, need to re-read configs before each attempt.
-        let (connstrings, auth) = {
+        let configs = {
            let state = compute.state.lock().unwrap();
+
            let spec = state.pspec.as_ref().expect("spec must be set");
-            (
-                spec.pageserver_connstr.clone(),
-                spec.storage_auth_token.clone(),
-            )
+
+            let conn_strings = spec.pageserver_connstr.split(',');
+
+            conn_strings
+                .map(|connstr| {
+                    let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
+                    if let Some(storage_auth_token) = &spec.storage_auth_token {
+                        config.password(storage_auth_token.clone());
+                    }
+                    config
+                })
+                .collect::<Vec<_>>()
        };

-        let result =
-            try_acquire_lsn_lease(&connstrings, auth.as_deref(), tenant_id, timeline_id, lsn);
+        let result = try_acquire_lsn_lease(tenant_id, timeline_id, lsn, &configs);
        match result {
            Ok(Some(res)) => {
                return Ok(res);
@@ -110,104 +116,68 @@ fn acquire_lsn_lease_with_retry(
    }
 }

-/// Tries to acquire LSN leases on all Pageserver shards.
+/// Tries to acquire an LSN lease through PS page_service API.
 fn try_acquire_lsn_lease(
-    connstrings: &str,
-    auth: Option<&str>,
    tenant_id: TenantId,
    timeline_id: TimelineId,
    lsn: Lsn,
+    configs: &[postgres::Config],
 ) -> Result<Option<SystemTime>> {
-    let connstrings = connstrings.split(',').collect_vec();
-    let shard_count = connstrings.len();
-    let mut leases = Vec::new();
-
-    for (shard_number, &connstring) in connstrings.iter().enumerate() {
-        let tenant_shard_id = match shard_count {
-            0 | 1 => TenantShardId::unsharded(tenant_id),
-            shard_count => TenantShardId {
-                tenant_id,
-                shard_number: ShardNumber(shard_number as u8),
-                shard_count: ShardCount::new(shard_count as u8),
-            },
+    fn get_valid_until(
+        config: &postgres::Config,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        lsn: Lsn,
+    ) -> Result<Option<SystemTime>> {
+        let mut client = config.connect(NoTls)?;
+        let cmd = format!("lease lsn {tenant_shard_id} {timeline_id} {lsn} ");
+        let res = client.simple_query(&cmd)?;
+        let msg = match res.first() {
+            Some(msg) => msg,
+            None => bail!("empty response"),
+        };
+        let row = match msg {
+            SimpleQueryMessage::Row(row) => row,
+            _ => bail!("error parsing lsn lease response"),
        };

-        let lease = match PageserverProtocol::from_connstring(connstring)? {
-            PageserverProtocol::Libpq => {
-                acquire_lsn_lease_libpq(connstring, auth, tenant_shard_id, timeline_id, lsn)?
-            }
-            PageserverProtocol::Grpc => {
-                acquire_lsn_lease_grpc(connstring, auth, tenant_shard_id, timeline_id, lsn)?
-            }
-        };
-        leases.push(lease);
+        // Note: this will be None if a lease is explicitly not granted.
+        let valid_until_str = row.get("valid_until");
+
+        let valid_until = valid_until_str.map(|s| {
+            SystemTime::UNIX_EPOCH
+                .checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
+                .expect("Time larger than max SystemTime could handle")
+        });
+        Ok(valid_until)
    }

-    Ok(leases.into_iter().min().flatten())
-}
+    let shard_count = configs.len();

-/// Acquires an LSN lease on a single shard, using the libpq API. The connstring must use a
-/// postgresql:// scheme.
-fn acquire_lsn_lease_libpq(
-    connstring: &str,
-    auth: Option<&str>,
-    tenant_shard_id: TenantShardId,
-    timeline_id: TimelineId,
-    lsn: Lsn,
-) -> Result<Option<SystemTime>> {
-    let mut config = postgres::Config::from_str(connstring)?;
-    if let Some(auth) = auth {
-        config.password(auth);
-    }
-    let mut client = config.connect(NoTls)?;
-    let cmd = format!("lease lsn {tenant_shard_id} {timeline_id} {lsn} ");
-    let res = client.simple_query(&cmd)?;
-    let msg = match res.first() {
-        Some(msg) => msg,
-        None => bail!("empty response"),
-    };
-    let row = match msg {
-        SimpleQueryMessage::Row(row) => row,
-        _ => bail!("error parsing lsn lease response"),
+    let valid_until = if shard_count > 1 {
+        configs
+            .iter()
+            .enumerate()
+            .map(|(shard_number, config)| {
+                let tenant_shard_id = TenantShardId {
+                    tenant_id,
+                    shard_count: ShardCount::new(shard_count as u8),
+                    shard_number: ShardNumber(shard_number as u8),
+                };
+                get_valid_until(config, tenant_shard_id, timeline_id, lsn)
+            })
+            .collect::<Result<Vec<Option<SystemTime>>>>()?
+            .into_iter()
+            .min()
+            .unwrap()
+    } else {
+        get_valid_until(
+            &configs[0],
+            TenantShardId::unsharded(tenant_id),
+            timeline_id,
+            lsn,
+        )?
    };

-    // Note: this will be None if a lease is explicitly not granted.
-    let valid_until_str = row.get("valid_until");
-
-    let valid_until = valid_until_str.map(|s| {
-        SystemTime::UNIX_EPOCH
-            .checked_add(Duration::from_millis(u128::from_str(s).unwrap() as u64))
-            .expect("Time larger than max SystemTime could handle")
-    });
    Ok(valid_until)
 }
-
-/// Acquires an LSN lease on a single shard, using the gRPC API. The connstring must use a
-/// grpc:// scheme.
-fn acquire_lsn_lease_grpc(
-    connstring: &str,
-    auth: Option<&str>,
-    tenant_shard_id: TenantShardId,
-    timeline_id: TimelineId,
-    lsn: Lsn,
-) -> Result<Option<SystemTime>> {
-    tokio::runtime::Handle::current().block_on(async move {
-        let mut client = page_api::Client::new(
-            connstring.to_string(),
-            tenant_shard_id.tenant_id,
-            timeline_id,
-            tenant_shard_id.to_index(),
-            auth.map(String::from),
-            None,
-        )
-        .await?;
-
-        let req = page_api::LeaseLsnRequest { lsn };
-        match client.lease_lsn(req).await {
-            Ok(expires) => Ok(Some(expires)),
-            // Lease couldn't be acquired because the LSN has been garbage collected.
-            Err(err) if err.code() == tonic::Code::FailedPrecondition => Ok(None),
-            Err(err) => Err(err.into()),
-        }
-    })
-}
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -3,8 +3,7 @@

  "timestamp": "2021-05-23T18:25:43.511Z",
  "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
-  "suspend_timeout_seconds": 3600,
-  
+
  "cluster": {
    "cluster_id": "test-cluster-42",
    "name": "Zenith Test",
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -16,9 +16,9 @@ use std::time::Duration;
 use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::requests::ComputeClaimsScope;
-use compute_api::spec::{ComputeMode, PageserverProtocol};
+use compute_api::spec::ComputeMode;
 use control_plane::broker::StorageBroker;
-use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode};
+use control_plane::endpoint::{ComputeControlPlane, EndpointTerminateMode, PageserverProtocol};
 use control_plane::endpoint_storage::{ENDPOINT_STORAGE_DEFAULT_ADDR, EndpointStorage};
 use control_plane::local_env;
 use control_plane::local_env::{
@@ -1649,9 +1649,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            // If --safekeepers argument is given, use only the listed
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = parse_safekeepers(&args.safekeepers)?;
-            endpoint
-                .reconfigure(Some(pageservers), None, safekeepers, None)
-                .await?;
+            endpoint.reconfigure(pageservers, None, safekeepers).await?;
        }
        EndpointCmd::Stop(args) => {
            let endpoint_id = &args.endpoint_id;
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -56,8 +56,8 @@ use compute_api::responses::{
    TlsConfig,
 };
 use compute_api::spec::{
-    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PageserverProtocol,
-    PgIdent, RemoteExtSpec, Role,
+    Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent,
+    RemoteExtSpec, Role,
 };
 use jsonwebtoken::jwk::{
    AlgorithmParameters, CommonParameters, EllipticCurve, Jwk, JwkSet, KeyAlgorithm, KeyOperations,
@@ -373,6 +373,29 @@ impl std::fmt::Display for EndpointTerminateMode {
    }
 }

+/// Protocol used to connect to a Pageserver.
+#[derive(Clone, Copy, Debug)]
+pub enum PageserverProtocol {
+    Libpq,
+    Grpc,
+}
+
+impl PageserverProtocol {
+    /// Returns the URL scheme for the protocol, used in connstrings.
+    pub fn scheme(&self) -> &'static str {
+        match self {
+            Self::Libpq => "postgresql",
+            Self::Grpc => "grpc",
+        }
+    }
+}
+
+impl Display for PageserverProtocol {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.scheme())
+    }
+}
+
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -780,7 +803,6 @@ impl Endpoint {
                endpoint_storage_addr: Some(endpoint_storage_addr),
                endpoint_storage_token: Some(endpoint_storage_token),
                autoprewarm: false,
-                suspend_timeout_seconds: -1, // Only used in neon_local.
            };

            // this strange code is needed to support respec() in tests
@@ -975,11 +997,12 @@ impl Endpoint {

    pub async fn reconfigure(
        &self,
-        pageservers: Option<Vec<(PageserverProtocol, Host, u16)>>,
+        pageservers: Vec<(PageserverProtocol, Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
-        safekeeper_generation: Option<SafekeeperGeneration>,
    ) -> Result<()> {
+        anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
+
        let (mut spec, compute_ctl_config) = {
            let config_path = self.endpoint_path().join("config.json");
            let file = std::fs::File::open(config_path)?;
@@ -991,24 +1014,16 @@ impl Endpoint {
        let postgresql_conf = self.read_postgresql_conf()?;
        spec.cluster.postgresql_conf = Some(postgresql_conf);

-        // If pageservers are not specified, don't change them.
-        if let Some(pageservers) = pageservers {
-            anyhow::ensure!(!pageservers.is_empty(), "no pageservers provided");
-
-            let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
-            spec.pageserver_connstring = Some(pageserver_connstr);
-            if stripe_size.is_some() {
-                spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
-            }
+        let pageserver_connstr = Self::build_pageserver_connstr(&pageservers);
+        spec.pageserver_connstring = Some(pageserver_connstr);
+        if stripe_size.is_some() {
+            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
        }

        // If safekeepers are not specified, don't change them.
        if let Some(safekeepers) = safekeepers {
            let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
            spec.safekeeper_connstrings = safekeeper_connstrings;
-            if let Some(g) = safekeeper_generation {
-                spec.safekeepers_generation = Some(g.into_inner());
-            }
        }

        let client = reqwest::Client::builder()
@@ -1046,24 +1061,6 @@ impl Endpoint {
        }
    }

-    pub async fn reconfigure_pageservers(
-        &self,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
-        stripe_size: Option<ShardStripeSize>,
-    ) -> Result<()> {
-        self.reconfigure(Some(pageservers), stripe_size, None, None)
-            .await
-    }
-
-    pub async fn reconfigure_safekeepers(
-        &self,
-        safekeepers: Vec<NodeId>,
-        generation: SafekeeperGeneration,
-    ) -> Result<()> {
-        self.reconfigure(None, None, Some(safekeepers), Some(generation))
-            .await
-    }
-
    pub async fn stop(
        &self,
        mode: EndpointTerminateMode,
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -212,7 +212,7 @@ pub struct NeonStorageControllerConf {

    pub use_local_compute_notifications: bool,

-    pub timeline_safekeeper_count: Option<usize>,
+    pub timeline_safekeeper_count: Option<i64>,

    pub posthog_config: Option<PostHogConfig>,

--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -638,13 +638,7 @@ impl StorageController {
            args.push("--timelines-onto-safekeepers".to_string());
        }

-        // neon_local is used in test environments where we often have less than 3 safekeepers.
-        if self.config.timeline_safekeeper_count.is_some() || self.env.safekeepers.len() < 3 {
-            let sk_cnt = self
-                .config
-                .timeline_safekeeper_count
-                .unwrap_or(self.env.safekeepers.len());
-
+        if let Some(sk_cnt) = self.config.timeline_safekeeper_count {
            args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
        }

--- a/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/configs/config.json
@@ -4,7 +4,6 @@

        "timestamp": "2022-10-12T18:00:00.000Z",
        "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8c",
-        "suspend_timeout_seconds": -1,

        "cluster": {
            "cluster_id": "docker_compose",
--- a/docs/rfcs/040-Endpoint-Persistent-Unlogged-Files-Storage.md
+++ b/docs/rfcs/040-Endpoint-Persistent-Unlogged-Files-Storage.md
@@ -1,396 +0,0 @@
-# Memo: Endpoint Persistent Unlogged Files Storage
-Created on 2024-11-05
-Implemented on N/A
-
-## Summary
-A design for a storage system that allows storage of files required to make
-Neon's Endpoints have a better experience at or after a reboot.
-
-## Motivation
-Several systems inside PostgreSQL (and Neon) need some persistent storage for
-optimal workings across reboots and restarts, but still work without.
-Examples are the query-level statistics files of `pg_stat_statements` in
-`pg_stat/pg_stat_statements.stat`, and `pg_prewarm`'s `autoprewarm.blocks`.
-We need a storage system that can store and manage these files for each
-Endpoint, without necessarily granting users access to an unlimited storage
-device.
-
-## Goals
- Store known files for Endpoints with reasonable persistence.  
-  _Data loss in this service, while annoying and bad for UX, won't lose any
-  customer's data._
-
-## Non Goals (if relevant)
- This storage system does not need branching, file versioning, or other such
-  features. The files are as ephemeral to the timeline of the data as the
-  Endpoints that host the data.
- This storage system does not need to store _all_ user files, only 'known'
-  user files.
- This storage system does not need to be hosted fully inside Computes.  
-  _Instead, this will be a separate component similar to Pageserver,
-  SafeKeeper, the S3 proxy used for dynamically loaded extensions, etc._
-
-## Impacted components
- Compute needs new code to load and store these files in its lifetime.
- Control Plane needs to consider this new storage system when signalling
-  the deletion of an Endpoint, Timeline, or Tenant.
- Control Plane needs to consider this new storage system when it resets
-  or re-assigns an endpoint's timeline/branch state.
-
-A new service is created: the Endpoint Persistent Unlogged Files Storage
-service.  This could be integrated in e.g. Pageserver or Control Plane, or a
-separately hosted service.
-
-## Proposed implementation
-Endpoint-related data files are managed by a newly designed service (which
-optionally is integrated in an existing service like Pageserver or Control
-Plane), which stores data directly into S3 or any blob storage of choice.
-
-Upon deletion of the Endpoint, or reassignment of the endpoint to a different
-branch, this ephemeral data is dropped: the data stored may not match the
-state of the branch's data after reassignment, and on endpoint deletion the
-data won't have any use to the user.
-
-Compute gets credentials (JWT token with Tenant, Timeline & Endpoint claims)
-which it can use to authenticate to this new service and retrieve and store
-data associated with this endpoint.  This limited scope reduces leaks of data
-across endpoints and timeline resets, and limits the ability of endpoints to
-mess with other endpoints' data.
-
-The path of this endpoint data in S3 is initially as follows:
-
-    s3://<regional-epufs-bucket>/
-      tenants/
-        <hex-tenant-id>/
-          tenants/
-            <hex-timeline-id>/
-              endpoints/
-                <endpoint-id>/
-                  pgdata/
-                    <file_path_in_pgdatadir>
-
-For other blob storages an equivalent or similar path can be constructed.
-
-### Reliability, failure modes and corner cases (if relevant)
-Reliability is important, but not critical to the workings of Neon.  The data
-stored in this service will, when lost, reduce performance, but won't be a
-cause of permanent data loss - only operational metadata is stored.
-
-Most, if not all, blob storage services have sufficiently high persistence
-guarantees to cater our need for persistence and uptime. The only concern with
-blob storages is that the access latency is generally higher than local disk,
-but for the object types stored (cache state, ...) I don't think this will be
-much of an issue.
-
-### Interaction/Sequence diagram (if relevant)
-
-In these diagrams you can replace S3 with any persistent storage device of
-choice, but S3 is chosen as representative name: The well-known and short name
-of AWS' blob storage. Azure Blob Storage should work too, but it has a much
-longer name making it less practical for the diagrams.
-
-Write data:
-
-```http
-POST /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
-Host: epufs.svc.neon.local
-
-<<<
-
-200 OK
-{
-  "version": "<opaque>", # opaque file version token, changes when the file contents change
-  "size": <bytes>,
-}
-```
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant co as Compute
-    participant ep as EPUFS
-    participant s3 as Blob Storage
-
-    co-->ep: Connect with credentials
-    co->>+ep: Store Unlogged Persistent File
-    opt is authenticated
-        ep->>s3: Write UPF to S3
-    end
-    ep->>-co: OK / Failure / Auth Failure
-    co-->ep: Cancel connection
-```
-
-Read data: (optional with cache-relevant request parameters, e.g. If-Modified-Since)
-```http
-GET /tenants/<tenant-id>/timelines/<tl-id>/endpoints/<endpoint-id>/pgdata/<the-pgdata-path>
-Host: epufs.svc.neon.local
-
-<<<
-
-200 OK
-
-<file data>
-```
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant co as Compute
-    participant ep as EPUFS
-    participant s3 as Blob Storage
-
-    co->>+ep: Read Unlogged Persistent File
-    opt is authenticated
-        ep->>+s3: Request UPF from storage
-        s3->>-ep: Receive UPF from storage
-    end
-    ep->>-co: OK(response) / Failure(storage, auth, ...)
-```
-
-Compute Startup:
-```mermaid
-sequenceDiagram
-    autonumber
-    participant co as Compute
-    participant ps as Pageserver
-    participant ep as EPUFS
-    participant es as Extension server
-
-    note over co: Bind endpoint ep-xxx
-    par Get basebackup
-        co->>+ps: Request basebackup @ LSN
-        ps-)ps: Construct basebackup
-        ps->>-co: Receive basebackup TAR @ LSN
-    and Get startup-critical Unlogged Persistent Files
-        co->>+ep: Get all UPFs of endpoint ep-xxx
-        ep-)ep: Retrieve and gather all UPFs
-        ep->>-co: TAR of UPFs
-    and Get startup-critical extensions
-        loop For every startup-critical extension
-            co->>es: Get critical extension
-            es->>co: Receive critical extension
-        end
-    end
-    note over co: Start compute
-```
-
-CPlane ops:
-```http
-DELETE /tenants/<tenant-id>/timelines/<timeline-id>/endpoints/<endpoint-id>
-Host: epufs.svc.neon.local
-
-<<<
-
-200 OK
-{
-  "tenant": "<tenant-id>",
-  "timeline": "<timeline-id>",
-  "endpoint": "<endpoint-id>",
-  "deleted": {
-    "files": <count>,
-    "bytes": <count>,
-  },
-}
-```
-
-```http
-DELETE /tenants/<tenant-id>/timelines/<timeline-id>
-Host: epufs.svc.neon.local
-
-<<<
-
-200 OK
-{
-  "tenant": "<tenant-id>",
-  "timeline": "<timeline-id>",
-  "deleted": {
-    "files": <count>,
-    "bytes": <count>,
-  },
-}
-```
-
-```http
-DELETE /tenants/<tenant-id>
-Host: epufs.svc.neon.local
-
-<<<
-
-200 OK
-{
-  "tenant": "<tenant-id>",
-  "deleted": {
-    "files": <count>,
-    "bytes": <count>,
-  },
-}
-```
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant cp as Control Plane
-    participant ep as EPUFS
-    participant s3 as Blob Storage
-
-    alt Tenant deleted
-        cp-)ep: Tenant deleted
-        loop For every object associated with removed tenant
-            ep->>s3: Remove data of deleted tenant from Storage
-        end
-        opt
-            ep-)cp: Tenant cleanup complete
-        end
-    alt Timeline deleted
-        cp-)ep: Timeline deleted
-        loop For every object associated with removed timeline
-            ep->>s3: Remove data of deleted timeline from Storage
-        end
-        opt
-            ep-)cp: Timeline cleanup complete
-        end
-    else Endpoint reassigned or removed
-        cp->>+ep: Endpoint reassigned
-        loop For every object associated with reassigned/removed endpoint
-            ep->>s3: Remove data from Storage
-        end
-        ep->>-cp: Cleanup complete
-    end
-```
-
-### Scalability (if relevant)
-
-Provisionally:  As this service is going to be part of compute startup, this
-service should be able to quickly respond to all requests.  Therefore this
-service is deployed to every AZ we host Computes in, and Computes communicate
-(generally) only to the EPUFS endpoint of the AZ they're hosted in.
-
-Local caching of frequently restarted endpoints' data or metadata may be
-needed for best performance.  However, due to the regional nature of stored
-data but zonal nature of the service deployment, we should be careful when we
-implement any local caching, as it is possible that computes in AZ 1 will
-update data originally written and thus cached by AZ 2.  Cache version tests
-and invalidation is therefore required if we want to roll out caching to this
-service, which is too broad a scope for an MVC.  This is why caching is left
-out of scope for this RFC, and should be considered separately after this RFC
-is implemented.
-
-### Security implications (if relevant)
-This service must be able to authenticate users at least by Tenant ID,
-Timeline ID and Endpoint ID. This will use the existing JWT infrastructure of
-Compute, which will be upgraded to the extent needed to support Timeline- and
-Endpoint-based claims.
-
-The service requires unlimited access to (a prefix of) a blob storage bucket,
-and thus must be hosted outside the Compute VM sandbox.
-
-A service that generates pre-signed request URLs for Compute to download the
-data from that URL is likely problematic, too:  Compute would be able to write
-unlimited data to the bucket, or exfiltrate this signed URL to get read/write
-access to specific objects in this bucket, which would still effectively give
-users access to the S3 bucket (but with improved access logging).
-
-There may be a use case for transferring data associated with one endpoint to
-another endpoint (e.g. to make one endpoint warm its caches with the state of
-another endpoint), but that's not currently in scope, and specific needs may
-be solved through out-of-line communication of data or pre-signed URLs.
-
-### Unresolved questions (if relevant)
-Caching of files is not in the implementation scope of the document, but
-should at some future point be considered to maximize performance.
-
-## Alternative implementation (if relevant)
-Several ideas have come up to solve this issue:
-
-### Use AUXfile
-One prevalent idea was to WAL-log the files using our AUXfile mechanism.
-
-Benefits:
-
-+ We already have this storage mechanism
-
-Demerits:
-
- It isn't available on read replicas
- Additional WAL will be consumed during shutdown and after the shutdown
-  checkpoint, which needs PG modifications to work without panics.
- It increases the data we need to manage in our versioned storage, thus
-  causing higher storage costs with higher retention due to duplication at
-  the storage layer.
-
-### Sign URLs for read/write operations, instead of proxying them
-
-Benefits:
-
-+ The service can be implemented with a much reduced IO budget
-
-Demerits:
-
- Users could get access to these signed credentials
- Not all blob storage services may implement URL signing
-
-### Give endpoints each their own directly accessed block volume
-
-Benefits:
-
-+ Easier to integrate for PostgreSQL
-
-Demerits:
-
- Little control on data size and contents
- Potentially problematic as we'd need to store data all across the pgdata
-  directory.
- EBS is not a good candidate
-   - Attaches in 10s of seconds, if not more; i.e. too cold to start
-   - Shared EBS volumes are a no-go, as you'd have to schedule the endpoint
-     with users of the same EBS volumes, which can't work with VM migration
-   - EBS storage costs are very high (>80$/kilotenant when using a
-     volume/tenant)
-   - EBS volumes can't be mounted across AZ boundaries
- Bucket per endpoint is unfeasible
-   - S3 buckets are priced at $20/month per 1k, which we could better spend
-     on developers.
-   - Allocating service accounts takes time (100s of ms), and service accounts
-     are a limited resource, too; so they're not a good candidate to allocate
-     on a per-endpoint basis.
-   - Giving credentials limited to prefix has similar issues as the pre-signed
-     URL approach.
-   - Bucket DNS lookup will fill DNS caches and put pressure on DNS lookup
-     much more than our current systems would.
- Volumes bound by hypervisor are unlikely
-   - This requires significant investment and increased software on the
-     hypervisor.
-   - It is unclear if we can attach volumes after boot, i.e. for pooled
-     instances.
-
-### Put the files into a table
-
-Benefits:
-
- + Mostly already available in PostgreSQL
-
-Demerits:
-
- - Uses WAL
-   - Can't be used after shutdown checkpoint
-   - Needs a RW endpoint, and table & catalog access to write to this data
- - Gets hit with DB size limitations
- - Depending on user acces:
-   - Inaccessible:  
-     The user doesn't have control over database size caused by
-     these systems.
-   - Accessible:  
-     The user can corrupt these files and cause the system to crash while
-     user-corrupted files are present, thus increasing on-call overhead.
-
-## Definition of Done (if relevant)
-
-This project is done if we have:
-
- One S3 bucket equivalent per region, which stores this per-endpoint data.
- A new service endpoint in at least every AZ, which indirectly grants
-  endpoints access to the data stored for these endpoints in these buckets.
- Compute writes & reads temp-data at shutdown and startup, respectively, for
-  at least the pg_prewarm or lfc_prewarm state files.
- Cleanup of endpoint data is triggered when the endpoint is deleted or is
-  detached from its current timeline.
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -12,7 +12,6 @@ jsonwebtoken.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 regex.workspace = true
-url.workspace = true

 utils = { path = "../utils" }
 remote_storage = { version = "0.1", path = "../remote_storage/" }
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -4,14 +4,11 @@
 //! provide it by calling the compute_ctl's `/compute_ctl` endpoint, or
 //! compute_ctl can fetch it by calling the control plane's API.
 use std::collections::HashMap;
-use std::fmt::Display;

-use anyhow::anyhow;
 use indexmap::IndexMap;
 use regex::Regex;
 use remote_storage::RemotePath;
 use serde::{Deserialize, Serialize};
-use url::Url;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

@@ -184,11 +181,6 @@ pub struct ComputeSpec {
    /// Download LFC state from endpoint_storage and pass it to Postgres on startup
    #[serde(default)]
    pub autoprewarm: bool,
-
-    /// Suspend timeout in seconds.
-    ///
-    /// We use this value to derive other values, such as the installed extensions metric.
-    pub suspend_timeout_seconds: i64,
 }

 /// Feature flag to signal `compute_ctl` to enable certain experimental functionality.
@@ -437,47 +429,6 @@ pub struct JwksSettings {
    pub jwt_audience: Option<String>,
 }

-/// Protocol used to connect to a Pageserver. Parsed from the connstring scheme.
-#[derive(Clone, Copy, Debug, Default)]
-pub enum PageserverProtocol {
-    /// The original protocol based on libpq and COPY. Uses postgresql:// or postgres:// scheme.
-    #[default]
-    Libpq,
-    /// A newer, gRPC-based protocol. Uses grpc:// scheme.
-    Grpc,
-}
-
-impl PageserverProtocol {
-    /// Parses the protocol from a connstring scheme. Defaults to Libpq if no scheme is given.
-    /// Errors if the connstring is an invalid URL.
-    pub fn from_connstring(connstring: &str) -> anyhow::Result<Self> {
-        let scheme = match Url::parse(connstring) {
-            Ok(url) => url.scheme().to_lowercase(),
-            Err(url::ParseError::RelativeUrlWithoutBase) => return Ok(Self::default()),
-            Err(err) => return Err(anyhow!("invalid connstring URL: {err}")),
-        };
-        match scheme.as_str() {
-            "postgresql" | "postgres" => Ok(Self::Libpq),
-            "grpc" => Ok(Self::Grpc),
-            scheme => Err(anyhow!("invalid protocol scheme: {scheme}")),
-        }
-    }
-
-    /// Returns the URL scheme for the protocol, for use in connstrings.
-    pub fn scheme(&self) -> &'static str {
-        match self {
-            Self::Libpq => "postgresql",
-            Self::Grpc => "grpc",
-        }
-    }
-}
-
-impl Display for PageserverProtocol {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.scheme())
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::fs::File;
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -3,7 +3,6 @@

    "timestamp": "2021-05-23T18:25:43.511Z",
    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
-    "suspend_timeout_seconds": 3600,

    "cluster": {
        "cluster_id": "test-cluster-42",
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -19,7 +19,6 @@ byteorder.workspace = true
 utils.workspace = true
 postgres_ffi_types.workspace = true
 postgres_versioninfo.workspace = true
-posthog_client_lite.workspace = true
 enum-map.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
@@ -30,13 +29,12 @@ humantime-serde.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 itertools.workspace = true
 storage_broker.workspace = true
-camino = { workspace = true, features = ["serde1"] }
+camino = {workspace = true, features = ["serde1"]}
 remote_storage.workspace = true
 postgres_backend.workspace = true
-nix = { workspace = true, optional = true }
+nix = {workspace = true, optional = true}
 reqwest.workspace = true
 rand.workspace = true
-tracing.workspace = true
 tracing-utils.workspace = true
 once_cell.workspace = true

--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -4,7 +4,6 @@ use camino::Utf8PathBuf;
 mod tests;

 use const_format::formatcp;
-use posthog_client_lite::PostHogClientConfig;
 pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
 pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
 pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
@@ -69,61 +68,23 @@ impl Display for NodeMetadata {
 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub struct PostHogConfig {
    /// PostHog project ID
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub project_id: Option<String>,
+    pub project_id: String,
    /// Server-side (private) API key
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub server_api_key: Option<String>,
+    pub server_api_key: String,
    /// Client-side (public) API key
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub client_api_key: Option<String>,
+    pub client_api_key: String,
    /// Private API URL
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub private_api_url: Option<String>,
+    pub private_api_url: String,
    /// Public API URL
-    #[serde(default)]
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub public_api_url: Option<String>,
+    pub public_api_url: String,
    /// Refresh interval for the feature flag spec.
    /// The storcon will push the feature flag spec to the pageserver. If the pageserver does not receive
    /// the spec for `refresh_interval`, it will fetch the spec from the PostHog API.
-    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
    pub refresh_interval: Option<Duration>,
 }

-impl PostHogConfig {
-    pub fn try_into_posthog_config(self) -> Result<PostHogClientConfig, &'static str> {
-        let Some(project_id) = self.project_id else {
-            return Err("project_id is required");
-        };
-        let Some(server_api_key) = self.server_api_key else {
-            return Err("server_api_key is required");
-        };
-        let Some(client_api_key) = self.client_api_key else {
-            return Err("client_api_key is required");
-        };
-        let Some(private_api_url) = self.private_api_url else {
-            return Err("private_api_url is required");
-        };
-        let Some(public_api_url) = self.public_api_url else {
-            return Err("public_api_url is required");
-        };
-        Ok(PostHogClientConfig {
-            project_id,
-            server_api_key,
-            client_api_key,
-            private_api_url,
-            public_api_url,
-        })
-    }
-}
-
 /// `pageserver.toml`
 ///
 /// We use serde derive with `#[serde(default)]` to generate a deserializer
@@ -409,9 +370,6 @@ pub struct BasebackupCacheConfig {
    // TODO(diko): support max_entry_size_bytes.
    // pub max_entry_size_bytes: u64,
    pub max_size_entries: usize,
-    /// Size of the channel used to send prepare requests to the basebackup cache worker.
-    /// If exceeded, new prepare requests will be dropped.
-    pub prepare_channel_size: usize,
 }

 impl Default for BasebackupCacheConfig {
@@ -420,8 +378,7 @@ impl Default for BasebackupCacheConfig {
            cleanup_period: Duration::from_secs(60),
            max_total_size_bytes: 1024 * 1024 * 1024, // 1 GiB
            // max_entry_size_bytes: 16 * 1024 * 1024,   // 16 MiB
-            max_size_entries: 10000,
-            prepare_channel_size: 100,
+            max_size_entries: 1000,
        }
    }
 }
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -546,11 +546,6 @@ pub struct TimelineImportRequest {
    pub sk_set: Vec<NodeId>,
 }

-#[derive(serde::Serialize, serde::Deserialize, Clone)]
-pub struct TimelineSafekeeperMigrateRequest {
-    pub new_sk_set: Vec<NodeId>,
-}
-
 #[cfg(test)]
 mod test {
    use serde_json;
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -21,9 +21,7 @@ use utils::{completion, serde_system_time};

 use crate::config::Ratio;
 use crate::key::{CompactKey, Key};
-use crate::shard::{
-    DEFAULT_STRIPE_SIZE, ShardCount, ShardIdentity, ShardStripeSize, TenantShardId,
-};
+use crate::shard::{DEFAULT_STRIPE_SIZE, ShardCount, ShardStripeSize, TenantShardId};

 /// The state of a tenant in this pageserver.
 ///
@@ -477,7 +475,7 @@ pub struct TenantShardSplitResponse {
 }

 /// Parameters that apply to all shards in a tenant.  Used during tenant creation.
-#[derive(Clone, Copy, Serialize, Deserialize, Debug)]
+#[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct ShardParameters {
    pub count: ShardCount,
@@ -499,15 +497,6 @@ impl Default for ShardParameters {
    }
 }

-impl From<ShardIdentity> for ShardParameters {
-    fn from(identity: ShardIdentity) -> Self {
-        Self {
-            count: identity.count,
-            stripe_size: identity.stripe_size,
-        }
-    }
-}
-
 #[derive(Debug, Default, Clone, Eq, PartialEq)]
 pub enum FieldPatch<T> {
    Upsert(T),
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -37,7 +37,6 @@ use std::hash::{Hash, Hasher};
 pub use ::utils::shard::*;
 use postgres_ffi_types::forknum::INIT_FORKNUM;
 use serde::{Deserialize, Serialize};
-use utils::critical;

 use crate::key::Key;
 use crate::models::ShardParameters;
@@ -180,7 +179,7 @@ impl ShardIdentity {

    /// For use when creating ShardIdentity instances for new shards, where a creation request
    /// specifies the ShardParameters that apply to all shards.
-    pub fn from_params(number: ShardNumber, params: ShardParameters) -> Self {
+    pub fn from_params(number: ShardNumber, params: &ShardParameters) -> Self {
        Self {
            number,
            count: params.count,
@@ -189,17 +188,6 @@ impl ShardIdentity {
        }
    }

-    /// Asserts that the given shard identities are equal. Changes to shard parameters will likely
-    /// result in data corruption.
-    pub fn assert_equal(&self, other: ShardIdentity) {
-        if self != &other {
-            // TODO: for now, we're conservative and just log errors in production. Turn this into a
-            // real assertion when we're confident it doesn't misfire, and also reject requests that
-            // attempt to change it with an error response.
-            critical!("shard identity mismatch: {self:?} != {other:?}");
-        }
-    }
-
    fn is_broken(&self) -> bool {
        self.layout == LAYOUT_BROKEN
    }
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -210,7 +210,7 @@ pub struct TimelineStatus {
 }

 /// Request to switch membership configuration.
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Serialize, Deserialize)]
 #[serde(transparent)]
 pub struct TimelineMembershipSwitchRequest {
    pub mconf: Configuration,
@@ -221,8 +221,6 @@ pub struct TimelineMembershipSwitchRequest {
 pub struct TimelineMembershipSwitchResponse {
    pub previous_conf: Configuration,
    pub current_conf: Configuration,
-    pub term: Term,
-    pub flush_lsn: Lsn,
 }

 #[derive(Clone, Copy, Serialize, Deserialize)]
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -86,14 +86,6 @@ pub enum GateError {
    GateClosed,
 }

-impl GateError {
-    pub fn is_cancel(&self) -> bool {
-        match self {
-            GateError::GateClosed => true,
-        }
-    }
-}
-
 impl Default for Gate {
    fn default() -> Self {
        Self {
--- a/pageserver/page_api/Cargo.toml
+++ b/pageserver/page_api/Cargo.toml
@@ -9,14 +9,12 @@ anyhow.workspace = true
 bytes.workspace = true
 futures.workspace = true
 pageserver_api.workspace = true
-postgres_ffi_types.workspace = true
+postgres_ffi.workspace = true
 prost.workspace = true
-prost-types.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
-tokio-util.workspace = true
 tonic.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true
--- a/pageserver/page_api/proto/page_service.proto
+++ b/pageserver/page_api/proto/page_service.proto
@@ -35,8 +35,6 @@
 syntax = "proto3";
 package page_api;

-import "google/protobuf/timestamp.proto";
-
 service PageService {
  // Returns whether a relation exists.
  rpc CheckRelExists(CheckRelExistsRequest) returns (CheckRelExistsResponse);
@@ -66,10 +64,6 @@ service PageService {

  // Fetches an SLRU segment.
  rpc GetSlruSegment (GetSlruSegmentRequest) returns (GetSlruSegmentResponse);
-
-  // Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage
-  // collect the LSN until the lease expires. Must be acquired on all relevant shards.
-  rpc LeaseLsn (LeaseLsnRequest) returns (LeaseLsnResponse);
 }

 // The LSN a request should read at.
@@ -116,19 +110,6 @@ message GetBaseBackupRequest {
  bool replica = 2;
  // If true, include relation files in the base backup. Mainly for debugging and tests.
  bool full = 3;
-  // Compression algorithm to use. Base backups send a compressed payload instead of using gRPC
-  // compression, so that we can cache compressed backups on the server.
-  BaseBackupCompression compression = 4;
-}
-
-// Base backup compression algorithms.
-enum BaseBackupCompression {
-  // Unknown algorithm. Used when clients send an unsupported algorithm.
-  BASE_BACKUP_COMPRESSION_UNKNOWN = 0;
-  // No compression.
-  BASE_BACKUP_COMPRESSION_NONE = 1;
-  // GZIP compression.
-  BASE_BACKUP_COMPRESSION_GZIP = 2;
 }

 // Base backup response chunk, returned as an ordered stream.
@@ -258,17 +239,3 @@ message GetSlruSegmentRequest {
 message GetSlruSegmentResponse {
  bytes segment = 1;
 }
-
-// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage
-// collect the LSN until the lease expires. Must be acquired on all relevant shards.
-message LeaseLsnRequest {
-  // The LSN to lease. Can't be 0 or below the current GC cutoff.
-  uint64 lsn = 1;
-}
-
-// Lease acquisition response. If the lease could not be granted because the LSN has already been
-// garbage collected, a FailedPrecondition status will be returned instead.
-message LeaseLsnResponse {
-  // The lease expiration time.
-  google.protobuf.Timestamp expires = 1;
-}
--- a/pageserver/page_api/src/client.rs
+++ b/pageserver/page_api/src/client.rs
@@ -1,7 +1,8 @@
-use anyhow::Result;
-use futures::{Stream, StreamExt as _, TryStreamExt as _};
-use tokio::io::AsyncRead;
-use tokio_util::io::StreamReader;
+use std::convert::TryInto;
+
+use bytes::Bytes;
+use futures::TryStreamExt;
+use futures::{Stream, StreamExt};
 use tonic::metadata::AsciiMetadataValue;
 use tonic::metadata::errors::InvalidMetadataValue;
 use tonic::transport::Channel;
@@ -11,6 +12,8 @@ use utils::id::TenantId;
 use utils::id::TimelineId;
 use utils::shard::ShardIndex;

+use anyhow::Result;
+
 use crate::model;
 use crate::proto;

@@ -66,7 +69,6 @@ impl tonic::service::Interceptor for AuthInterceptor {
        Ok(req)
    }
 }
-
 #[derive(Clone)]
 pub struct Client {
    client: proto::PageServiceClient<
@@ -93,6 +95,7 @@ impl Client {

        if let Some(compression) = compression {
            // TODO: benchmark this (including network latency).
+            // TODO: consider enabling compression by default.
            client = client
                .accept_compressed(compression)
                .send_compressed(compression);
@@ -118,15 +121,22 @@ impl Client {
    pub async fn get_base_backup(
        &mut self,
        req: model::GetBaseBackupRequest,
-    ) -> Result<impl AsyncRead + use<>, tonic::Status> {
-        let req = proto::GetBaseBackupRequest::from(req);
-        let chunks = self.client.get_base_backup(req).await?.into_inner();
-        let reader = StreamReader::new(
-            chunks
-                .map_ok(|resp| resp.chunk)
-                .map_err(std::io::Error::other),
-        );
-        Ok(reader)
+    ) -> Result<impl Stream<Item = Result<Bytes, tonic::Status>> + 'static, tonic::Status> {
+        let proto_req = proto::GetBaseBackupRequest::from(req);
+
+        let response_stream: Streaming<proto::GetBaseBackupResponseChunk> =
+            self.client.get_base_backup(proto_req).await?.into_inner();
+
+        // TODO: Consider dechunking internally
+        let domain_stream = response_stream.map(|chunk_res| {
+            chunk_res.and_then(|proto_chunk| {
+                proto_chunk.try_into().map_err(|e| {
+                    tonic::Status::internal(format!("Failed to convert response chunk: {e}"))
+                })
+            })
+        });
+
+        Ok(domain_stream)
    }

    /// Returns the total size of a database, as # of bytes.
@@ -187,17 +197,4 @@ impl Client {
        let response = self.client.get_slru_segment(proto_req).await?;
        Ok(response.into_inner().try_into()?)
    }
-
-    /// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't
-    /// garbage collect the LSN until the lease expires. Must be acquired on all relevant shards.
-    ///
-    /// Returns the lease expiration time, or a FailedPrecondition status if the lease could not be
-    /// acquired because the LSN has already been garbage collected.
-    pub async fn lease_lsn(
-        &mut self,
-        req: model::LeaseLsnRequest,
-    ) -> Result<model::LeaseLsnResponse, tonic::Status> {
-        let req = proto::LeaseLsnRequest::from(req);
-        Ok(self.client.lease_lsn(req).await?.into_inner().try_into()?)
-    }
 }
--- a/pageserver/page_api/src/model.rs
+++ b/pageserver/page_api/src/model.rs
@@ -16,11 +16,10 @@
 //! stream combinators without dealing with errors, and avoids validating the same message twice.

 use std::fmt::Display;
-use std::time::{Duration, SystemTime, UNIX_EPOCH};

 use bytes::Bytes;
-use postgres_ffi_types::Oid;
-// TODO: split out Lsn, RelTag, SlruKind and other basic types to a separate crate, to avoid
+use postgres_ffi::Oid;
+// TODO: split out Lsn, RelTag, SlruKind, Oid and other basic types to a separate crate, to avoid
 // pulling in all of their other crate dependencies when building the client.
 use utils::lsn::Lsn;

@@ -192,21 +191,15 @@ pub struct GetBaseBackupRequest {
    pub replica: bool,
    /// If true, include relation files in the base backup. Mainly for debugging and tests.
    pub full: bool,
-    /// Compression algorithm to use. Base backups send a compressed payload instead of using gRPC
-    /// compression, so that we can cache compressed backups on the server.
-    pub compression: BaseBackupCompression,
 }

-impl TryFrom<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
-    type Error = ProtocolError;
-
-    fn try_from(pb: proto::GetBaseBackupRequest) -> Result<Self, Self::Error> {
-        Ok(Self {
+impl From<proto::GetBaseBackupRequest> for GetBaseBackupRequest {
+    fn from(pb: proto::GetBaseBackupRequest) -> Self {
+        Self {
            lsn: (pb.lsn != 0).then_some(Lsn(pb.lsn)),
            replica: pb.replica,
            full: pb.full,
-            compression: pb.compression.try_into()?,
-        })
+        }
    }
 }

@@ -216,55 +209,10 @@ impl From<GetBaseBackupRequest> for proto::GetBaseBackupRequest {
            lsn: request.lsn.unwrap_or_default().0,
            replica: request.replica,
            full: request.full,
-            compression: request.compression.into(),
        }
    }
 }

-/// Base backup compression algorithm.
-#[derive(Clone, Copy, Debug)]
-pub enum BaseBackupCompression {
-    None,
-    Gzip,
-}
-
-impl TryFrom<proto::BaseBackupCompression> for BaseBackupCompression {
-    type Error = ProtocolError;
-
-    fn try_from(pb: proto::BaseBackupCompression) -> Result<Self, Self::Error> {
-        match pb {
-            proto::BaseBackupCompression::Unknown => Err(ProtocolError::invalid("compression", pb)),
-            proto::BaseBackupCompression::None => Ok(Self::None),
-            proto::BaseBackupCompression::Gzip => Ok(Self::Gzip),
-        }
-    }
-}
-
-impl TryFrom<i32> for BaseBackupCompression {
-    type Error = ProtocolError;
-
-    fn try_from(compression: i32) -> Result<Self, Self::Error> {
-        proto::BaseBackupCompression::try_from(compression)
-            .map_err(|_| ProtocolError::invalid("compression", compression))
-            .and_then(Self::try_from)
-    }
-}
-
-impl From<BaseBackupCompression> for proto::BaseBackupCompression {
-    fn from(compression: BaseBackupCompression) -> Self {
-        match compression {
-            BaseBackupCompression::None => Self::None,
-            BaseBackupCompression::Gzip => Self::Gzip,
-        }
-    }
-}
-
-impl From<BaseBackupCompression> for i32 {
-    fn from(compression: BaseBackupCompression) -> Self {
-        proto::BaseBackupCompression::from(compression).into()
-    }
-}
-
 pub type GetBaseBackupResponseChunk = Bytes;

 impl TryFrom<proto::GetBaseBackupResponseChunk> for GetBaseBackupResponseChunk {
@@ -704,54 +652,3 @@ impl From<GetSlruSegmentResponse> for proto::GetSlruSegmentResponse {

 // SlruKind is defined in pageserver_api::reltag.
 pub type SlruKind = pageserver_api::reltag::SlruKind;
-
-/// Acquires or extends a lease on the given LSN. This guarantees that the Pageserver won't garbage
-/// collect the LSN until the lease expires.
-pub struct LeaseLsnRequest {
-    /// The LSN to lease.
-    pub lsn: Lsn,
-}
-
-impl TryFrom<proto::LeaseLsnRequest> for LeaseLsnRequest {
-    type Error = ProtocolError;
-
-    fn try_from(pb: proto::LeaseLsnRequest) -> Result<Self, Self::Error> {
-        if pb.lsn == 0 {
-            return Err(ProtocolError::Missing("lsn"));
-        }
-        Ok(Self { lsn: Lsn(pb.lsn) })
-    }
-}
-
-impl From<LeaseLsnRequest> for proto::LeaseLsnRequest {
-    fn from(request: LeaseLsnRequest) -> Self {
-        Self { lsn: request.lsn.0 }
-    }
-}
-
-/// Lease expiration time. If the lease could not be granted because the LSN has already been
-/// garbage collected, a FailedPrecondition status will be returned instead.
-pub type LeaseLsnResponse = SystemTime;
-
-impl TryFrom<proto::LeaseLsnResponse> for LeaseLsnResponse {
-    type Error = ProtocolError;
-
-    fn try_from(pb: proto::LeaseLsnResponse) -> Result<Self, Self::Error> {
-        let expires = pb.expires.ok_or(ProtocolError::Missing("expires"))?;
-        UNIX_EPOCH
-            .checked_add(Duration::new(expires.seconds as u64, expires.nanos as u32))
-            .ok_or_else(|| ProtocolError::invalid("expires", expires))
-    }
-}
-
-impl From<LeaseLsnResponse> for proto::LeaseLsnResponse {
-    fn from(response: LeaseLsnResponse) -> Self {
-        let expires = response.duration_since(UNIX_EPOCH).unwrap_or_default();
-        Self {
-            expires: Some(prost_types::Timestamp {
-                seconds: expires.as_secs() as i64,
-                nanos: expires.subsec_nanos() as i32,
-            }),
-        }
-    }
-}
--- a/pageserver/pagebench/src/cmd/basebackup.rs
+++ b/pageserver/pagebench/src/cmd/basebackup.rs
@@ -317,7 +317,6 @@ impl Client for LibpqClient {
 /// A gRPC Pageserver client.
 struct GrpcClient {
    inner: page_api::Client,
-    compression: page_api::BaseBackupCompression,
 }

 impl GrpcClient {
@@ -332,14 +331,10 @@ impl GrpcClient {
            ttid.timeline_id,
            ShardIndex::unsharded(),
            None,
-            None, // NB: uses payload compression
+            compression.then_some(tonic::codec::CompressionEncoding::Zstd),
        )
        .await?;
-        let compression = match compression {
-            true => page_api::BaseBackupCompression::Gzip,
-            false => page_api::BaseBackupCompression::None,
-        };
-        Ok(Self { inner, compression })
+        Ok(Self { inner })
    }
 }

@@ -353,8 +348,10 @@ impl Client for GrpcClient {
            lsn,
            replica: false,
            full: false,
-            compression: self.compression,
        };
-        Ok(Box::pin(self.inner.get_base_backup(req).await?))
+        let stream = self.inner.get_base_backup(req).await?;
+        Ok(Box::pin(StreamReader::new(
+            stream.map_err(std::io::Error::other),
+        )))
    }
 }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -14,7 +14,6 @@ use std::fmt::Write as FmtWrite;
 use std::time::{Instant, SystemTime};

 use anyhow::{Context, anyhow};
-use async_compression::tokio::write::GzipEncoder;
 use bytes::{BufMut, Bytes, BytesMut};
 use fail::fail_point;
 use pageserver_api::key::{Key, rel_block_to_key};
@@ -26,7 +25,8 @@ use postgres_ffi::{
 };
 use postgres_ffi_types::constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
 use postgres_ffi_types::forknum::{INIT_FORKNUM, MAIN_FORKNUM};
-use tokio::io::{self, AsyncWrite, AsyncWriteExt as _};
+use tokio::io;
+use tokio::io::AsyncWrite;
 use tokio_tar::{Builder, EntryType, Header};
 use tracing::*;
 use utils::lsn::Lsn;
@@ -97,7 +97,6 @@ impl From<BasebackupError> for tonic::Status {
 ///  * When working without safekeepers. In this situation it is important to match the lsn
 ///    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
 ///    to start the replication.
-#[allow(clippy::too_many_arguments)]
 pub async fn send_basebackup_tarball<'a, W>(
    write: &'a mut W,
    timeline: &'a Timeline,
@@ -105,7 +104,6 @@ pub async fn send_basebackup_tarball<'a, W>(
    prev_lsn: Option<Lsn>,
    full_backup: bool,
    replica: bool,
-    gzip_level: Option<async_compression::Level>,
    ctx: &'a RequestContext,
 ) -> Result<(), BasebackupError>
 where
@@ -124,7 +122,7 @@ where
    // prev_lsn value; that happens if the timeline was just branched from
    // an old LSN and it doesn't have any WAL of its own yet. We will set
    // prev_lsn to Lsn(0) if we cannot provide the correct value.
-    let (backup_prev, lsn) = if let Some(req_lsn) = req_lsn {
+    let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
        // Backup was requested at a particular LSN. The caller should've
        // already checked that it's a valid LSN.

@@ -145,7 +143,7 @@ where
    };

    // Consolidate the derived and the provided prev_lsn values
-    let prev_record_lsn = if let Some(provided_prev_lsn) = prev_lsn {
+    let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
        if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn {
            return Err(BasebackupError::Server(anyhow!(
                "backup_prev {backup_prev} != provided_prev_lsn {provided_prev_lsn}"
@@ -157,55 +155,30 @@ where
    };

    info!(
-        "taking basebackup lsn={lsn}, prev_lsn={prev_record_lsn} \
-        (full_backup={full_backup}, replica={replica}, gzip={gzip_level:?})",
-    );
-    let span = info_span!("send_tarball", backup_lsn=%lsn);
-
-    let io_concurrency = IoConcurrency::spawn_from_conf(
-        timeline.conf.get_vectored_concurrent_io,
-        timeline
-            .gate
-            .enter()
-            .map_err(|_| BasebackupError::Shutdown)?,
+        "taking basebackup lsn={}, prev_lsn={} (full_backup={}, replica={})",
+        backup_lsn, prev_lsn, full_backup, replica
    );

-    if let Some(gzip_level) = gzip_level {
-        let mut encoder = GzipEncoder::with_quality(write, gzip_level);
-        Basebackup {
-            ar: Builder::new_non_terminated(&mut encoder),
-            timeline,
-            lsn,
-            prev_record_lsn,
-            full_backup,
-            replica,
-            ctx,
-            io_concurrency,
-        }
+    let basebackup = Basebackup {
+        ar: Builder::new_non_terminated(write),
+        timeline,
+        lsn: backup_lsn,
+        prev_record_lsn: prev_lsn,
+        full_backup,
+        replica,
+        ctx,
+        io_concurrency: IoConcurrency::spawn_from_conf(
+            timeline.conf.get_vectored_concurrent_io,
+            timeline
+                .gate
+                .enter()
+                .map_err(|_| BasebackupError::Shutdown)?,
+        ),
+    };
+    basebackup
        .send_tarball()
-        .instrument(span)
-        .await?;
-        encoder
-            .shutdown()
-            .await
-            .map_err(|err| BasebackupError::Client(err, "gzip"))?;
-    } else {
-        Basebackup {
-            ar: Builder::new_non_terminated(write),
-            timeline,
-            lsn,
-            prev_record_lsn,
-            full_backup,
-            replica,
-            ctx,
-            io_concurrency,
-        }
-        .send_tarball()
-        .instrument(span)
-        .await?;
-    }
-
-    Ok(())
+        .instrument(info_span!("send_tarball", backup_lsn=%backup_lsn))
+        .await
 }

 /// This is short-living object only for the time of tarball creation,
--- a/pageserver/src/basebackup_cache.rs
+++ b/pageserver/src/basebackup_cache.rs
@@ -1,12 +1,13 @@
 use std::{collections::HashMap, sync::Arc};

 use anyhow::Context;
+use async_compression::tokio::write::GzipEncoder;
 use camino::{Utf8Path, Utf8PathBuf};
 use metrics::core::{AtomicU64, GenericCounter};
 use pageserver_api::{config::BasebackupCacheConfig, models::TenantState};
 use tokio::{
    io::{AsyncWriteExt, BufWriter},
-    sync::mpsc::{Receiver, Sender, error::TrySendError},
+    sync::mpsc::{UnboundedReceiver, UnboundedSender},
 };
 use tokio_util::sync::CancellationToken;
 use utils::{
@@ -19,8 +20,8 @@ use crate::{
    basebackup::send_basebackup_tarball,
    context::{DownloadBehavior, RequestContext},
    metrics::{
-        BASEBACKUP_CACHE_ENTRIES, BASEBACKUP_CACHE_PREPARE, BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE,
-        BASEBACKUP_CACHE_READ, BASEBACKUP_CACHE_SIZE,
+        BASEBACKUP_CACHE_ENTRIES, BASEBACKUP_CACHE_PREPARE, BASEBACKUP_CACHE_READ,
+        BASEBACKUP_CACHE_SIZE,
    },
    task_mgr::TaskKind,
    tenant::{
@@ -35,8 +36,8 @@ pub struct BasebackupPrepareRequest {
    pub lsn: Lsn,
 }

-pub type BasebackupPrepareSender = Sender<BasebackupPrepareRequest>;
-pub type BasebackupPrepareReceiver = Receiver<BasebackupPrepareRequest>;
+pub type BasebackupPrepareSender = UnboundedSender<BasebackupPrepareRequest>;
+pub type BasebackupPrepareReceiver = UnboundedReceiver<BasebackupPrepareRequest>;

 #[derive(Clone)]
 struct CacheEntry {
@@ -60,65 +61,40 @@ struct CacheEntry {
 /// and ~1 RPS for get requests.
 pub struct BasebackupCache {
    data_dir: Utf8PathBuf,
-    config: Option<BasebackupCacheConfig>,

    entries: std::sync::Mutex<HashMap<TenantTimelineId, CacheEntry>>,

-    prepare_sender: BasebackupPrepareSender,
-
    read_hit_count: GenericCounter<AtomicU64>,
    read_miss_count: GenericCounter<AtomicU64>,
    read_err_count: GenericCounter<AtomicU64>,
-
-    prepare_skip_count: GenericCounter<AtomicU64>,
 }

 impl BasebackupCache {
-    /// Create a new BasebackupCache instance.
-    /// Also returns a BasebackupPrepareReceiver which is needed to start
-    /// the background task.
-    /// The cache is initialized from the data_dir in the background task.
-    /// The cache will return `None` for any get requests until the initialization is complete.
-    /// The background task is spawned separately using [`Self::spawn_background_task`]
-    /// to avoid a circular dependency between the cache and the tenant manager.
-    pub fn new(
+    /// Creates a BasebackupCache and spawns the background task.
+    /// The initialization of the cache is performed in the background and does not
+    /// block the caller. The cache will return `None` for any get requests until
+    /// initialization is complete.
+    pub fn spawn(
+        runtime_handle: &tokio::runtime::Handle,
        data_dir: Utf8PathBuf,
        config: Option<BasebackupCacheConfig>,
-    ) -> (Arc<Self>, BasebackupPrepareReceiver) {
-        let chan_size = config.as_ref().map(|c| c.max_size_entries).unwrap_or(1);
-
-        let (prepare_sender, prepare_receiver) = tokio::sync::mpsc::channel(chan_size);
-
+        prepare_receiver: BasebackupPrepareReceiver,
+        tenant_manager: Arc<TenantManager>,
+        cancel: CancellationToken,
+    ) -> Arc<Self> {
        let cache = Arc::new(BasebackupCache {
            data_dir,
-            config,
+
            entries: std::sync::Mutex::new(HashMap::new()),
-            prepare_sender,

            read_hit_count: BASEBACKUP_CACHE_READ.with_label_values(&["hit"]),
            read_miss_count: BASEBACKUP_CACHE_READ.with_label_values(&["miss"]),
            read_err_count: BASEBACKUP_CACHE_READ.with_label_values(&["error"]),
-
-            prepare_skip_count: BASEBACKUP_CACHE_PREPARE.with_label_values(&["skip"]),
        });

-        (cache, prepare_receiver)
-    }
-
-    /// Spawns the background task.
-    /// The background task initializes the cache from the disk,
-    /// processes prepare requests, and cleans up outdated cache entries.
-    /// Noop if the cache is disabled (config is None).
-    pub fn spawn_background_task(
-        self: Arc<Self>,
-        runtime_handle: &tokio::runtime::Handle,
-        prepare_receiver: BasebackupPrepareReceiver,
-        tenant_manager: Arc<TenantManager>,
-        cancel: CancellationToken,
-    ) {
-        if let Some(config) = self.config.clone() {
+        if let Some(config) = config {
            let background = BackgroundTask {
-                c: self,
+                c: cache.clone(),

                config,
                tenant_manager,
@@ -133,45 +109,8 @@ impl BasebackupCache {
            };
            runtime_handle.spawn(background.run(prepare_receiver));
        }
-    }

-    /// Send a basebackup prepare request to the background task.
-    /// The basebackup will be prepared asynchronously, it does not block the caller.
-    /// The request will be skipped if any cache limits are exceeded.
-    pub fn send_prepare(&self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, lsn: Lsn) {
-        let req = BasebackupPrepareRequest {
-            tenant_shard_id,
-            timeline_id,
-            lsn,
-        };
-
-        BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.inc();
-        let res = self.prepare_sender.try_send(req);
-
-        if let Err(e) = res {
-            BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.dec();
-            self.prepare_skip_count.inc();
-            match e {
-                TrySendError::Full(_) => {
-                    // Basebackup prepares are pretty rare, normally we should not hit this.
-                    tracing::info!(
-                        tenant_id = %tenant_shard_id.tenant_id,
-                        %timeline_id,
-                        %lsn,
-                        "Basebackup prepare channel is full, skipping the request"
-                    );
-                }
-                TrySendError::Closed(_) => {
-                    // Normal during shutdown, not critical.
-                    tracing::info!(
-                        tenant_id = %tenant_shard_id.tenant_id,
-                        %timeline_id,
-                        %lsn,
-                        "Basebackup prepare channel is closed, skipping the request"
-                    );
-                }
-            }
-        }
+        cache
    }

    /// Gets a basebackup entry from the cache.
@@ -184,10 +123,6 @@ impl BasebackupCache {
        timeline_id: TimelineId,
        lsn: Lsn,
    ) -> Option<tokio::fs::File> {
-        if !self.is_enabled() {
-            return None;
-        }
-
        // Fast path. Check if the entry exists using the in-memory state.
        let tti = TenantTimelineId::new(tenant_id, timeline_id);
        if self.entries.lock().unwrap().get(&tti).map(|e| e.lsn) != Some(lsn) {
@@ -215,10 +150,6 @@ impl BasebackupCache {
        }
    }

-    pub fn is_enabled(&self) -> bool {
-        self.config.is_some()
-    }
-
    // Private methods.

    fn entry_filename(tenant_id: TenantId, timeline_id: TimelineId, lsn: Lsn) -> String {
@@ -436,7 +367,6 @@ impl BackgroundTask {
        loop {
            tokio::select! {
                Some(req) = prepare_receiver.recv() => {
-                    BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE.dec();
                    if let Err(err) = self.prepare_basebackup(
                        req.tenant_shard_id,
                        req.timeline_id,
@@ -664,6 +594,13 @@ impl BackgroundTask {
        let file = tokio::fs::File::create(entry_tmp_path).await?;
        let mut writer = BufWriter::new(file);

+        let mut encoder = GzipEncoder::with_quality(
+            &mut writer,
+            // Level::Best because compression is not on the hot path of basebackup requests.
+            // The decompression is almost not affected by the compression level.
+            async_compression::Level::Best,
+        );
+
        // We may receive a request before the WAL record is applied to the timeline.
        // Wait for the requested LSN to be applied.
        timeline
@@ -676,19 +613,17 @@ impl BackgroundTask {
            .await?;

        send_basebackup_tarball(
-            &mut writer,
+            &mut encoder,
            timeline,
            Some(req_lsn),
            None,
            false,
            false,
-            // Level::Best because compression is not on the hot path of basebackup requests.
-            // The decompression is almost not affected by the compression level.
-            Some(async_compression::Level::Best),
            &ctx,
        )
        .await?;

+        encoder.shutdown().await?;
        writer.flush().await?;
        writer.into_inner().sync_all().await?;

--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -569,10 +569,8 @@ fn start_pageserver(
        pageserver::l0_flush::L0FlushGlobalState::new(conf.l0_flush.clone());

    // Scan the local 'tenants/' directory and start loading the tenants
-    let (basebackup_cache, basebackup_prepare_receiver) = BasebackupCache::new(
-        conf.basebackup_cache_dir(),
-        conf.basebackup_cache_config.clone(),
-    );
+    let (basebackup_prepare_sender, basebackup_prepare_receiver) =
+        tokio::sync::mpsc::unbounded_channel();
    let deletion_queue_client = deletion_queue.new_client();
    let background_purges = mgr::BackgroundPurges::default();

@@ -584,7 +582,7 @@ fn start_pageserver(
            remote_storage: remote_storage.clone(),
            deletion_queue_client,
            l0_flush_global_state,
-            basebackup_cache: Arc::clone(&basebackup_cache),
+            basebackup_prepare_sender,
            feature_resolver: feature_resolver.clone(),
        },
        shutdown_pageserver.clone(),
@@ -592,8 +590,10 @@ fn start_pageserver(
    let tenant_manager = Arc::new(tenant_manager);
    BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(tenant_manager.clone(), order))?;

-    basebackup_cache.spawn_background_task(
+    let basebackup_cache = BasebackupCache::spawn(
        BACKGROUND_RUNTIME.handle(),
+        conf.basebackup_cache_dir(),
+        conf.basebackup_cache_config.clone(),
        basebackup_prepare_receiver,
        Arc::clone(&tenant_manager),
        shutdown_pageserver.child_token(),
@@ -806,6 +806,7 @@ fn start_pageserver(
        } else {
            None
        },
+        basebackup_cache,
    );

    // Spawn a Pageserver gRPC server task. It will spawn separate tasks for
--- a/pageserver/src/bin/test_helper_slow_client_reads.rs
+++ b/pageserver/src/bin/test_helper_slow_client_reads.rs
@@ -37,7 +37,7 @@ async fn main() -> anyhow::Result<()> {
                not_modified_since: Lsn(23),
            },
            batch_key: 42,
-            message: format!("message {msg}"),
+            message: format!("message {}", msg),
        }));
        let Ok(res) = tokio::time::timeout(Duration::from_secs(10), fut).await else {
            eprintln!("pipe seems full");
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -762,40 +762,4 @@ mod tests {
        let result = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir);
        assert_eq!(result.is_ok(), is_valid);
    }
-
-    #[test]
-    fn test_config_posthog_config_is_valid() {
-        let input = r#"
-            control_plane_api = "http://localhost:6666"
-
-            [posthog_config]
-            server_api_key = "phs_AAA"
-            client_api_key = "phc_BBB"
-            project_id = "000"
-            private_api_url = "https://us.posthog.com"
-            public_api_url = "https://us.i.posthog.com"
-        "#;
-        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
-            .expect("posthogconfig is valid");
-        let workdir = Utf8PathBuf::from("/nonexistent");
-        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
-            .expect("parse_and_validate");
-    }
-
-    #[test]
-    fn test_config_posthog_incomplete_config_is_valid() {
-        let input = r#"
-            control_plane_api = "http://localhost:6666"
-
-            [posthog_config]
-            server_api_key = "phs_AAA"
-            private_api_url = "https://us.posthog.com"
-            public_api_url = "https://us.i.posthog.com"
-        "#;
-        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
-            .expect("posthogconfig is valid");
-        let workdir = Utf8PathBuf::from("/nonexistent");
-        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
-            .expect("parse_and_validate");
-    }
 }
--- a/pageserver/src/feature_resolver.rs
+++ b/pageserver/src/feature_resolver.rs
@@ -3,7 +3,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration};
 use arc_swap::ArcSwap;
 use pageserver_api::config::NodeMetadata;
 use posthog_client_lite::{
-    CaptureEvent, FeatureResolverBackgroundLoop, PostHogEvaluationError,
+    CaptureEvent, FeatureResolverBackgroundLoop, PostHogClientConfig, PostHogEvaluationError,
    PostHogFlagFilterPropertyValue,
 };
 use remote_storage::RemoteStorageKind;
@@ -45,24 +45,16 @@ impl FeatureResolver {
    ) -> anyhow::Result<Self> {
        // DO NOT block in this function: make it return as fast as possible to avoid startup delays.
        if let Some(posthog_config) = &conf.posthog_config {
-            let posthog_client_config = match posthog_config.clone().try_into_posthog_config() {
-                Ok(config) => config,
-                Err(e) => {
-                    tracing::warn!(
-                        "invalid posthog config, skipping posthog integration: {}",
-                        e
-                    );
-                    return Ok(FeatureResolver {
-                        inner: None,
-                        internal_properties: None,
-                        force_overrides_for_testing: Arc::new(ArcSwap::new(Arc::new(
-                            HashMap::new(),
-                        ))),
-                    });
-                }
-            };
-            let inner =
-                FeatureResolverBackgroundLoop::new(posthog_client_config, shutdown_pageserver);
+            let inner = FeatureResolverBackgroundLoop::new(
+                PostHogClientConfig {
+                    server_api_key: posthog_config.server_api_key.clone(),
+                    client_api_key: posthog_config.client_api_key.clone(),
+                    project_id: posthog_config.project_id.clone(),
+                    private_api_url: posthog_config.private_api_url.clone(),
+                    public_api_url: posthog_config.public_api_url.clone(),
+                },
+                shutdown_pageserver,
+            );
            let inner = Arc::new(inner);

            // The properties shared by all tenants on this pageserver.
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1893,13 +1893,9 @@ async fn update_tenant_config_handler(
    let location_conf = LocationConf::attached_single(
        new_tenant_conf.clone(),
        tenant.get_generation(),
-        ShardParameters::from(tenant.get_shard_identity()),
+        &ShardParameters::default(),
    );

-    tenant
-        .get_shard_identity()
-        .assert_equal(location_conf.shard); // not strictly necessary since we construct it above
-
    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
        .await
        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
@@ -1941,13 +1937,9 @@ async fn patch_tenant_config_handler(
    let location_conf = LocationConf::attached_single(
        updated,
        tenant.get_generation(),
-        ShardParameters::from(tenant.get_shard_identity()),
+        &ShardParameters::default(),
    );

-    tenant
-        .get_shard_identity()
-        .assert_equal(location_conf.shard); // not strictly necessary since we construct it above
-
    crate::tenant::TenantShard::persist_tenant_config(state.conf, &tenant_shard_id, &location_conf)
        .await
        .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -4439,14 +4439,6 @@ pub(crate) static BASEBACKUP_CACHE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub(crate) static BASEBACKUP_CACHE_PREPARE_QUEUE_SIZE: Lazy<UIntGauge> = Lazy::new(|| {
-    register_uint_gauge!(
-        "pageserver_basebackup_cache_prepare_queue_size",
-        "Number of requests in the basebackup prepare channel"
-    )
-    .expect("failed to define a metric")
-});
-
 static PAGESERVER_CONFIG_IGNORED_ITEMS: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_config_ignored_items",
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -12,9 +12,9 @@ use std::task::{Context, Poll};
 use std::time::{Duration, Instant, SystemTime};
 use std::{io, str};

-use anyhow::{Context as _, bail};
+use anyhow::{Context as _, anyhow, bail};
+use async_compression::tokio::write::GzipEncoder;
 use bytes::{Buf as _, BufMut as _, BytesMut};
-use chrono::Utc;
 use futures::future::BoxFuture;
 use futures::{FutureExt, Stream};
 use itertools::Itertools;
@@ -63,6 +63,7 @@ use utils::{failpoint_support, span_record};

 use crate::auth::check_permission;
 use crate::basebackup::{self, BasebackupError};
+use crate::basebackup_cache::BasebackupCache;
 use crate::config::PageServerConf;
 use crate::context::{
    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
@@ -137,6 +138,7 @@ pub fn spawn(
    perf_trace_dispatch: Option<Dispatch>,
    tcp_listener: tokio::net::TcpListener,
    tls_config: Option<Arc<rustls::ServerConfig>>,
+    basebackup_cache: Arc<BasebackupCache>,
 ) -> Listener {
    let cancel = CancellationToken::new();
    let libpq_ctx = RequestContext::todo_child(
@@ -158,6 +160,7 @@ pub fn spawn(
            conf.pg_auth_type,
            tls_config,
            conf.page_service_pipelining.clone(),
+            basebackup_cache,
            libpq_ctx,
            cancel.clone(),
        )
@@ -216,6 +219,7 @@ pub async fn libpq_listener_main(
    auth_type: AuthType,
    tls_config: Option<Arc<rustls::ServerConfig>>,
    pipelining_config: PageServicePipeliningConfig,
+    basebackup_cache: Arc<BasebackupCache>,
    listener_ctx: RequestContext,
    listener_cancel: CancellationToken,
 ) -> Connections {
@@ -259,6 +263,7 @@ pub async fn libpq_listener_main(
                    auth_type,
                    tls_config.clone(),
                    pipelining_config.clone(),
+                    Arc::clone(&basebackup_cache),
                    connection_ctx,
                    connections_cancel.child_token(),
                    gate_guard,
@@ -301,6 +306,7 @@ async fn page_service_conn_main(
    auth_type: AuthType,
    tls_config: Option<Arc<rustls::ServerConfig>>,
    pipelining_config: PageServicePipeliningConfig,
+    basebackup_cache: Arc<BasebackupCache>,
    connection_ctx: RequestContext,
    cancel: CancellationToken,
    gate_guard: GateGuard,
@@ -366,6 +372,7 @@ async fn page_service_conn_main(
        pipelining_config,
        conf.get_vectored_concurrent_io,
        perf_span_fields,
+        basebackup_cache,
        connection_ctx,
        cancel.clone(),
        gate_guard,
@@ -419,6 +426,8 @@ struct PageServerHandler {
    pipelining_config: PageServicePipeliningConfig,
    get_vectored_concurrent_io: GetVectoredConcurrentIo,

+    basebackup_cache: Arc<BasebackupCache>,
+
    gate_guard: GateGuard,
 }

@@ -904,6 +913,7 @@ impl PageServerHandler {
        pipelining_config: PageServicePipeliningConfig,
        get_vectored_concurrent_io: GetVectoredConcurrentIo,
        perf_span_fields: ConnectionPerfSpanFields,
+        basebackup_cache: Arc<BasebackupCache>,
        connection_ctx: RequestContext,
        cancel: CancellationToken,
        gate_guard: GateGuard,
@@ -917,6 +927,7 @@ impl PageServerHandler {
            cancel,
            pipelining_config,
            get_vectored_concurrent_io,
+            basebackup_cache,
            gate_guard,
        }
    }
@@ -2602,16 +2613,26 @@ impl PageServerHandler {
                prev_lsn,
                full_backup,
                replica,
-                None,
                &ctx,
            )
            .await?;
        } else {
            let mut writer = BufWriter::new(pgb.copyout_writer());

-            let cached = timeline
-                .get_cached_basebackup_if_enabled(lsn, prev_lsn, full_backup, replica, gzip)
-                .await;
+            let cached = {
+                // Basebackup is cached only for this combination of parameters.
+                if timeline.is_basebackup_cache_enabled()
+                    && gzip
+                    && lsn.is_some()
+                    && prev_lsn.is_none()
+                {
+                    self.basebackup_cache
+                        .get(tenant_id, timeline_id, lsn.unwrap())
+                        .await
+                } else {
+                    None
+                }
+            };

            if let Some(mut cached) = cached {
                from_cache = true;
@@ -2620,6 +2641,31 @@ impl PageServerHandler {
                    .map_err(|err| {
                        BasebackupError::Client(err, "handle_basebackup_request,cached,copy")
                    })?;
+            } else if gzip {
+                let mut encoder = GzipEncoder::with_quality(
+                    &mut writer,
+                    // NOTE using fast compression because it's on the critical path
+                    //      for compute startup. For an empty database, we get
+                    //      <100KB with this method. The Level::Best compression method
+                    //      gives us <20KB, but maybe we should add basebackup caching
+                    //      on compute shutdown first.
+                    async_compression::Level::Fastest,
+                );
+                basebackup::send_basebackup_tarball(
+                    &mut encoder,
+                    &timeline,
+                    lsn,
+                    prev_lsn,
+                    full_backup,
+                    replica,
+                    &ctx,
+                )
+                .await?;
+                // shutdown the encoder to ensure the gzip footer is written
+                encoder
+                    .shutdown()
+                    .await
+                    .map_err(|e| QueryError::Disconnected(ConnectionError::Io(e)))?;
            } else {
                basebackup::send_basebackup_tarball(
                    &mut writer,
@@ -2628,11 +2674,6 @@ impl PageServerHandler {
                    prev_lsn,
                    full_backup,
                    replica,
-                    // NB: using fast compression because it's on the critical path for compute
-                    // startup. For an empty database, we get <100KB with this method. The
-                    // Level::Best compression method gives us <20KB, but maybe we should add
-                    // basebackup caching on compute shutdown first.
-                    gzip.then_some(async_compression::Level::Fastest),
                    &ctx,
                )
                .await?;
@@ -3512,7 +3553,7 @@ impl proto::PageService for GrpcPageServiceHandler {
        if timeline.is_archived() == Some(true) {
            return Err(tonic::Status::failed_precondition("timeline is archived"));
        }
-        let req: page_api::GetBaseBackupRequest = req.into_inner().try_into()?;
+        let req: page_api::GetBaseBackupRequest = req.into_inner().into();

        span_record!(lsn=?req.lsn);

@@ -3538,50 +3579,20 @@ impl proto::PageService for GrpcPageServiceHandler {
        let span = Span::current();
        let (mut simplex_read, mut simplex_write) = tokio::io::simplex(CHUNK_SIZE);
        let jh = tokio::spawn(async move {
-            let gzip_level = match req.compression {
-                page_api::BaseBackupCompression::None => None,
-                // NB: using fast compression because it's on the critical path for compute
-                // startup. For an empty database, we get <100KB with this method. The
-                // Level::Best compression method gives us <20KB, but maybe we should add
-                // basebackup caching on compute shutdown first.
-                page_api::BaseBackupCompression::Gzip => Some(async_compression::Level::Fastest),
-            };
-
-            // Check for a cached basebackup.
-            let cached = timeline
-                .get_cached_basebackup_if_enabled(
-                    req.lsn,
-                    None,
-                    req.full,
-                    req.replica,
-                    gzip_level.is_some(),
-                )
-                .await;
-
-            let result = if let Some(mut cached) = cached {
-                // If we have a cached basebackup, send it.
-                tokio::io::copy(&mut cached, &mut simplex_write)
-                    .await
-                    .map(|_| ())
-                    .map_err(|err| BasebackupError::Client(err, "cached,copy"))
-            } else {
-                basebackup::send_basebackup_tarball(
-                    &mut simplex_write,
-                    &timeline,
-                    req.lsn,
-                    None,
-                    req.full,
-                    req.replica,
-                    gzip_level,
-                    &ctx,
-                )
-                .instrument(span) // propagate request span
-                .await
-            };
-            simplex_write
-                .shutdown()
-                .await
-                .map_err(|err| BasebackupError::Client(err, "simplex_write"))?;
+            let result = basebackup::send_basebackup_tarball(
+                &mut simplex_write,
+                &timeline,
+                req.lsn,
+                None,
+                req.full,
+                req.replica,
+                &ctx,
+            )
+            .instrument(span) // propagate request span
+            .await;
+            simplex_write.shutdown().await.map_err(|err| {
+                BasebackupError::Server(anyhow!("simplex shutdown failed: {err}"))
+            })?;
            result
        });

@@ -3761,36 +3772,6 @@ impl proto::PageService for GrpcPageServiceHandler {
        let resp: page_api::GetSlruSegmentResponse = resp.segment;
        Ok(tonic::Response::new(resp.into()))
    }
-
-    #[instrument(skip_all, fields(lsn))]
-    async fn lease_lsn(
-        &self,
-        req: tonic::Request<proto::LeaseLsnRequest>,
-    ) -> Result<tonic::Response<proto::LeaseLsnResponse>, tonic::Status> {
-        let timeline = self.get_request_timeline(&req).await?;
-        let ctx = self.ctx.with_scope_timeline(&timeline);
-
-        // Validate and convert the request, and decorate the span.
-        let req: page_api::LeaseLsnRequest = req.into_inner().try_into()?;
-
-        span_record!(lsn=%req.lsn);
-
-        // Attempt to acquire a lease. Return FailedPrecondition if the lease could not be granted.
-        let lease_length = timeline.get_lsn_lease_length();
-        let expires = match timeline.renew_lsn_lease(req.lsn, lease_length, &ctx) {
-            Ok(lease) => lease.valid_until,
-            Err(err) => return Err(tonic::Status::failed_precondition(format!("{err}"))),
-        };
-
-        // TODO: is this spammy? Move it compute-side?
-        info!(
-            "acquired lease for {} until {}",
-            req.lsn,
-            chrono::DateTime::<Utc>::from(expires).to_rfc3339()
-        );
-
-        Ok(tonic::Response::new(expires.into()))
-    }
 }

 /// gRPC middleware layer that handles observability concerns:
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -3015,7 +3015,7 @@ mod tests {
        // This shard will get the even blocks
        let shard = ShardIdentity::from_params(
            ShardNumber(0),
-            ShardParameters {
+            &ShardParameters {
                count: ShardCount(2),
                stripe_size: ShardStripeSize(1),
            },
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -80,7 +80,7 @@ use self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, Uninit
 use self::timeline::{
    EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError,
 };
-use crate::basebackup_cache::BasebackupCache;
+use crate::basebackup_cache::BasebackupPrepareSender;
 use crate::config::PageServerConf;
 use crate::context;
 use crate::context::RequestContextBuilder;
@@ -162,7 +162,7 @@ pub struct TenantSharedResources {
    pub remote_storage: GenericRemoteStorage,
    pub deletion_queue_client: DeletionQueueClient,
    pub l0_flush_global_state: L0FlushGlobalState,
-    pub basebackup_cache: Arc<BasebackupCache>,
+    pub basebackup_prepare_sender: BasebackupPrepareSender,
    pub feature_resolver: FeatureResolver,
 }

@@ -331,7 +331,7 @@ pub struct TenantShard {
    deletion_queue_client: DeletionQueueClient,

    /// A channel to send async requests to prepare a basebackup for the basebackup cache.
-    basebackup_cache: Arc<BasebackupCache>,
+    basebackup_prepare_sender: BasebackupPrepareSender,

    /// Cached logical sizes updated updated on each [`TenantShard::gather_size_inputs`].
    cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
@@ -1363,7 +1363,7 @@ impl TenantShard {
            remote_storage,
            deletion_queue_client,
            l0_flush_global_state,
-            basebackup_cache,
+            basebackup_prepare_sender,
            feature_resolver,
        } = resources;

@@ -1380,7 +1380,7 @@ impl TenantShard {
            remote_storage.clone(),
            deletion_queue_client,
            l0_flush_global_state,
-            basebackup_cache,
+            basebackup_prepare_sender,
            feature_resolver,
        ));

@@ -3872,10 +3872,6 @@ impl TenantShard {
        &self.tenant_shard_id
    }

-    pub(crate) fn get_shard_identity(&self) -> ShardIdentity {
-        self.shard_identity
-    }
-
    pub(crate) fn get_shard_stripe_size(&self) -> ShardStripeSize {
        self.shard_identity.stripe_size
    }
@@ -4384,7 +4380,7 @@ impl TenantShard {
        remote_storage: GenericRemoteStorage,
        deletion_queue_client: DeletionQueueClient,
        l0_flush_global_state: L0FlushGlobalState,
-        basebackup_cache: Arc<BasebackupCache>,
+        basebackup_prepare_sender: BasebackupPrepareSender,
        feature_resolver: FeatureResolver,
    ) -> TenantShard {
        assert!(!attached_conf.location.generation.is_none());
@@ -4489,7 +4485,7 @@ impl TenantShard {
            ongoing_timeline_detach: std::sync::Mutex::default(),
            gc_block: Default::default(),
            l0_flush_global_state,
-            basebackup_cache,
+            basebackup_prepare_sender,
            feature_resolver,
        }
    }
@@ -4529,10 +4525,6 @@ impl TenantShard {
        Ok(toml_edit::de::from_str::<LocationConf>(&config)?)
    }

-    /// Stores a tenant location config to disk.
-    ///
-    /// NB: make sure to call `ShardIdentity::assert_equal` before persisting a new config, to avoid
-    /// changes to shard parameters that may result in data corruption.
    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
    pub(super) async fn persist_tenant_config(
        conf: &'static PageServerConf,
@@ -5422,7 +5414,7 @@ impl TenantShard {
            pagestream_throttle_metrics: self.pagestream_throttle_metrics.clone(),
            l0_compaction_trigger: self.l0_compaction_trigger.clone(),
            l0_flush_global_state: self.l0_flush_global_state.clone(),
-            basebackup_cache: self.basebackup_cache.clone(),
+            basebackup_prepare_sender: self.basebackup_prepare_sender.clone(),
            feature_resolver: self.feature_resolver.clone(),
        }
    }
@@ -6008,7 +6000,7 @@ pub(crate) mod harness {
        ) -> anyhow::Result<Arc<TenantShard>> {
            let walredo_mgr = Arc::new(WalRedoManager::from(TestRedoManager));

-            let (basebackup_cache, _) = BasebackupCache::new(Utf8PathBuf::new(), None);
+            let (basebackup_requst_sender, _) = tokio::sync::mpsc::unbounded_channel();

            let tenant = Arc::new(TenantShard::new(
                TenantState::Attaching,
@@ -6016,7 +6008,7 @@ pub(crate) mod harness {
                AttachedTenantConf::try_from(LocationConf::attached_single(
                    self.tenant_conf.clone(),
                    self.generation,
-                    ShardParameters::default(),
+                    &ShardParameters::default(),
                ))
                .unwrap(),
                self.shard_identity,
@@ -6026,7 +6018,7 @@ pub(crate) mod harness {
                self.deletion_queue.new_client(),
                // TODO: ideally we should run all unit tests with both configs
                L0FlushGlobalState::new(L0FlushConfig::default()),
-                basebackup_cache,
+                basebackup_requst_sender,
                FeatureResolver::new_disabled(),
            ));

@@ -11437,11 +11429,11 @@ mod tests {
        if left != right {
            eprintln!("---LEFT---");
            for left in left.iter() {
-                eprintln!("{left}");
+                eprintln!("{}", left);
            }
            eprintln!("---RIGHT---");
            for right in right.iter() {
-                eprintln!("{right}");
+                eprintln!("{}", right);
            }
            assert_eq!(left, right);
        }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -12,7 +12,6 @@
 use pageserver_api::models;
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize};
 use serde::{Deserialize, Serialize};
-use utils::critical;
 use utils::generation::Generation;

 #[derive(Debug, Copy, Clone, Serialize, Deserialize, PartialEq, Eq)]
@@ -137,7 +136,7 @@ impl LocationConf {
    pub(crate) fn attached_single(
        tenant_conf: pageserver_api::models::TenantConfig,
        generation: Generation,
-        shard_params: models::ShardParameters,
+        shard_params: &models::ShardParameters,
    ) -> Self {
        Self {
            mode: LocationMode::Attached(AttachedLocationConfig {
@@ -172,16 +171,6 @@ impl LocationConf {
            }
        }

-        // This should never happen.
-        // TODO: turn this into a proper assertion.
-        if stripe_size != self.shard.stripe_size {
-            critical!(
-                "stripe size mismatch: {} != {}",
-                self.shard.stripe_size,
-                stripe_size,
-            );
-        }
-
        self.shard.stripe_size = stripe_size;
    }

--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -880,9 +880,6 @@ impl TenantManager {
        // phase of writing config and/or waiting for flush, before returning.
        match fast_path_taken {
            Some(FastPathModified::Attached(tenant)) => {
-                tenant
-                    .shard_identity
-                    .assert_equal(new_location_config.shard);
                TenantShard::persist_tenant_config(
                    self.conf,
                    &tenant_shard_id,
@@ -917,10 +914,7 @@ impl TenantManager {

                return Ok(Some(tenant));
            }
-            Some(FastPathModified::Secondary(secondary_tenant)) => {
-                secondary_tenant
-                    .shard_identity
-                    .assert_equal(new_location_config.shard);
+            Some(FastPathModified::Secondary(_secondary_tenant)) => {
                TenantShard::persist_tenant_config(
                    self.conf,
                    &tenant_shard_id,
@@ -954,10 +948,6 @@ impl TenantManager {

        match slot_guard.get_old_value() {
            Some(TenantSlot::Attached(tenant)) => {
-                tenant
-                    .shard_identity
-                    .assert_equal(new_location_config.shard);
-
                // The case where we keep a Tenant alive was covered above in the special case
                // for Attached->Attached transitions in the same generation.  By this point,
                // if we see an attached tenant we know it will be discarded and should be
@@ -991,13 +981,9 @@ impl TenantManager {
                // rather than assuming it to be empty.
                spawn_mode = SpawnMode::Eager;
            }
-            Some(TenantSlot::Secondary(secondary_tenant)) => {
-                secondary_tenant
-                    .shard_identity
-                    .assert_equal(new_location_config.shard);
-
+            Some(TenantSlot::Secondary(state)) => {
                info!("Shutting down secondary tenant");
-                secondary_tenant.shutdown().await;
+                state.shutdown().await;
            }
            Some(TenantSlot::InProgress(_)) => {
                // This should never happen: acquire_slot should error out
@@ -2214,7 +2200,7 @@ impl TenantManager {
        selector: ShardSelector,
    ) -> ShardResolveResult {
        let tenants = self.tenants.read().unwrap();
-        let mut want_shard: Option<ShardIndex> = None;
+        let mut want_shard = None;
        let mut any_in_progress = None;

        match &*tenants {
@@ -2239,23 +2225,14 @@ impl TenantManager {
                            return ShardResolveResult::Found(tenant.clone());
                        }
                        ShardSelector::Page(key) => {
-                            // Each time we find an attached slot with a different shard count,
-                            // recompute the expected shard number: during shard splits we might
-                            // have multiple shards with the old shard count.
-                            if want_shard.is_none()
-                                || want_shard.unwrap().shard_count != tenant.shard_identity.count
-                            {
-                                want_shard = Some(ShardIndex {
-                                    shard_number: tenant.shard_identity.get_shard_number(&key),
-                                    shard_count: tenant.shard_identity.count,
-                                });
+                            // First slot we see for this tenant, calculate the expected shard number
+                            // for the key: we will use this for checking if this and subsequent
+                            // slots contain the key, rather than recalculating the hash each time.
+                            if want_shard.is_none() {
+                                want_shard = Some(tenant.shard_identity.get_shard_number(&key));
                            }

-                            if Some(ShardIndex {
-                                shard_number: tenant.shard_identity.number,
-                                shard_count: tenant.shard_identity.count,
-                            }) == want_shard
-                            {
+                            if Some(tenant.shard_identity.number) == want_shard {
                                return ShardResolveResult::Found(tenant.clone());
                            }
                        }
@@ -2914,18 +2891,14 @@ mod tests {
    use std::collections::BTreeMap;
    use std::sync::Arc;

-    use camino::Utf8PathBuf;
    use storage_broker::BrokerClientChannel;
    use tracing::Instrument;

    use super::super::harness::TenantHarness;
    use super::TenantsMap;
-    use crate::{
-        basebackup_cache::BasebackupCache,
-        tenant::{
-            TenantSharedResources,
-            mgr::{BackgroundPurges, TenantManager, TenantSlot},
-        },
+    use crate::tenant::{
+        TenantSharedResources,
+        mgr::{BackgroundPurges, TenantManager, TenantSlot},
    };

    #[tokio::test(start_paused = true)]
@@ -2951,7 +2924,9 @@ mod tests {
        // Invoke remove_tenant_from_memory with a cleanup hook that blocks until we manually
        // permit it to proceed: that will stick the tenant in InProgress

-        let (basebackup_cache, _) = BasebackupCache::new(Utf8PathBuf::new(), None);
+        let (basebackup_prepare_sender, _) = tokio::sync::mpsc::unbounded_channel::<
+            crate::basebackup_cache::BasebackupPrepareRequest,
+        >();

        let tenant_manager = TenantManager {
            tenants: std::sync::RwLock::new(TenantsMap::Open(tenants)),
@@ -2965,7 +2940,7 @@ mod tests {
                l0_flush_global_state: crate::l0_flush::L0FlushGlobalState::new(
                    h.conf.l0_flush.clone(),
                ),
-                basebackup_cache,
+                basebackup_prepare_sender,
                feature_resolver: crate::feature_resolver::FeatureResolver::new_disabled(),
            },
            cancel: tokio_util::sync::CancellationToken::new(),
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -101,7 +101,7 @@ pub(crate) struct SecondaryTenant {
    // Secondary mode does not need the full shard identity or the pageserver_api::models::TenantConfig.  However,
    // storing these enables us to report our full LocationConf, enabling convenient reconciliation
    // by the control plane (see [`Self::get_location_conf`])
-    pub(crate) shard_identity: ShardIdentity,
+    shard_identity: ShardIdentity,
    tenant_conf: std::sync::Mutex<pageserver_api::models::TenantConfig>,

    // Internal state used by the Downloader.
--- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs
@@ -55,11 +55,11 @@ pub struct BatchLayerWriter {
 }

 impl BatchLayerWriter {
-    pub fn new(conf: &'static PageServerConf) -> Self {
-        Self {
+    pub async fn new(conf: &'static PageServerConf) -> anyhow::Result<Self> {
+        Ok(Self {
            generated_layer_writers: Vec::new(),
            conf,
-        }
+        })
    }

    pub fn add_unfinished_image_writer(
@@ -209,7 +209,6 @@ impl<'a> SplitImageLayerWriter<'a> {
    ) -> anyhow::Result<Self> {
        Ok(Self {
            target_layer_size,
-            // XXX make this lazy like in SplitDeltaLayerWriter?
            inner: ImageLayerWriter::new(
                conf,
                timeline_id,
@@ -224,7 +223,7 @@ impl<'a> SplitImageLayerWriter<'a> {
            conf,
            timeline_id,
            tenant_shard_id,
-            batches: BatchLayerWriter::new(conf),
+            batches: BatchLayerWriter::new(conf).await?,
            lsn,
            start_key,
            gate,
@@ -320,7 +319,7 @@ pub struct SplitDeltaLayerWriter<'a> {
 }

 impl<'a> SplitDeltaLayerWriter<'a> {
-    pub fn new(
+    pub async fn new(
        conf: &'static PageServerConf,
        timeline_id: TimelineId,
        tenant_shard_id: TenantShardId,
@@ -328,8 +327,8 @@ impl<'a> SplitDeltaLayerWriter<'a> {
        target_layer_size: u64,
        gate: &'a utils::sync::gate::Gate,
        cancel: CancellationToken,
-    ) -> Self {
-        Self {
+    ) -> anyhow::Result<Self> {
+        Ok(Self {
            target_layer_size,
            inner: None,
            conf,
@@ -337,10 +336,10 @@ impl<'a> SplitDeltaLayerWriter<'a> {
            tenant_shard_id,
            lsn_range,
            last_key_written: Key::MIN,
-            batches: BatchLayerWriter::new(conf),
+            batches: BatchLayerWriter::new(conf).await?,
            gate,
            cancel,
-        }
+        })
    }

    pub async fn put_value(
@@ -511,7 +510,9 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        );
+        )
+        .await
+        .unwrap();

        image_writer
            .put_image(get_key(0), get_img(0), &ctx)
@@ -589,7 +590,9 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        );
+        )
+        .await
+        .unwrap();
        const N: usize = 2000;
        for i in 0..N {
            let i = i as u32;
@@ -689,7 +692,9 @@ mod tests {
            4 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        );
+        )
+        .await
+        .unwrap();

        image_writer
            .put_image(get_key(0), get_img(0), &ctx)
@@ -765,7 +770,9 @@ mod tests {
            4 * 1024 * 1024,
            &tline.gate,
            tline.cancel.clone(),
-        );
+        )
+        .await
+        .unwrap();

        for i in 0..N {
            let i = i as u32;
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -17,17 +17,14 @@ use tracing::*;
 use utils::backoff::exponential_backoff_duration;
 use utils::completion::Barrier;
 use utils::pausable_failpoint;
-use utils::sync::gate::GateError;

 use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS};
 use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind};
-use crate::tenant::blob_io::WriteBlobError;
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::timeline::compaction::CompactionOutcome;
 use crate::tenant::{TenantShard, TenantState};
-use crate::virtual_file::owned_buffers_io::write::FlushTaskError;

 /// Semaphore limiting concurrent background tasks (across all tenants).
 ///
@@ -316,20 +313,7 @@ pub(crate) fn log_compaction_error(
            let timeline = root_cause
                .downcast_ref::<PageReconstructError>()
                .is_some_and(|e| e.is_stopping());
-            let buffered_writer_flush_task_canelled = root_cause
-                .downcast_ref::<FlushTaskError>()
-                .is_some_and(|e| e.is_cancel());
-            let write_blob_cancelled = root_cause
-                .downcast_ref::<WriteBlobError>()
-                .is_some_and(|e| e.is_cancel());
-            let gate_closed = root_cause
-                .downcast_ref::<GateError>()
-                .is_some_and(|e| e.is_cancel());
-            let is_stopping = upload_queue
-                || timeline
-                || buffered_writer_flush_task_canelled
-                || write_blob_cancelled
-                || gate_closed;
+            let is_stopping = upload_queue || timeline;

            if is_stopping {
                Level::INFO
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -95,12 +95,12 @@ use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer};
 use super::tasks::log_compaction_error;
 use super::upload_queue::NotInitialized;
 use super::{
-    AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded,
+    AttachedTenantConf, BasebackupPrepareSender, GcError, HeatMapTimeline, MaybeOffloaded,
    debug_assert_current_span_has_tenant_and_timeline_id,
 };
 use crate::PERF_TRACE_TARGET;
 use crate::aux_file::AuxFileSizeEstimator;
-use crate::basebackup_cache::BasebackupCache;
+use crate::basebackup_cache::BasebackupPrepareRequest;
 use crate::config::PageServerConf;
 use crate::context::{
    DownloadBehavior, PerfInstrumentFutureExt, RequestContext, RequestContextBuilder,
@@ -201,7 +201,7 @@ pub struct TimelineResources {
    pub pagestream_throttle_metrics: Arc<crate::metrics::tenant_throttling::Pagestream>,
    pub l0_compaction_trigger: Arc<Notify>,
    pub l0_flush_global_state: l0_flush::L0FlushGlobalState,
-    pub basebackup_cache: Arc<BasebackupCache>,
+    pub basebackup_prepare_sender: BasebackupPrepareSender,
    pub feature_resolver: FeatureResolver,
 }

@@ -448,7 +448,7 @@ pub struct Timeline {
    wait_lsn_log_slow: tokio::sync::Semaphore,

    /// A channel to send async requests to prepare a basebackup for the basebackup cache.
-    basebackup_cache: Arc<BasebackupCache>,
+    basebackup_prepare_sender: BasebackupPrepareSender,

    feature_resolver: FeatureResolver,
 }
@@ -763,7 +763,7 @@ pub(crate) enum CreateImageLayersError {
    PageReconstructError(#[source] PageReconstructError),

    #[error(transparent)]
-    Other(anyhow::Error),
+    Other(#[from] anyhow::Error),
 }

 impl From<layer_manager::Shutdown> for CreateImageLayersError {
@@ -2500,37 +2500,6 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.basebackup_cache_enabled)
    }

-    /// Try to get a basebackup from the on-disk cache.
-    pub(crate) async fn get_cached_basebackup(&self, lsn: Lsn) -> Option<tokio::fs::File> {
-        self.basebackup_cache
-            .get(self.tenant_shard_id.tenant_id, self.timeline_id, lsn)
-            .await
-    }
-
-    /// Convenience method to attempt fetching a basebackup for the timeline if enabled and safe for
-    /// the given request parameters.
-    ///
-    /// TODO: consider moving this onto GrpcPageServiceHandler once the libpq handler is gone.
-    pub async fn get_cached_basebackup_if_enabled(
-        &self,
-        lsn: Option<Lsn>,
-        prev_lsn: Option<Lsn>,
-        full: bool,
-        replica: bool,
-        gzip: bool,
-    ) -> Option<tokio::fs::File> {
-        if !self.is_basebackup_cache_enabled() || !self.basebackup_cache.is_enabled() {
-            return None;
-        }
-        // We have to know which LSN to fetch the basebackup for.
-        let lsn = lsn?;
-        // We only cache gzipped, non-full basebackups for primary computes with automatic prev_lsn.
-        if prev_lsn.is_some() || full || replica || !gzip {
-            return None;
-        }
-        self.get_cached_basebackup(lsn).await
-    }
-
    /// Prepare basebackup for the given LSN and store it in the basebackup cache.
    /// The method is asynchronous and returns immediately.
    /// The actual basebackup preparation is performed in the background
@@ -2552,8 +2521,17 @@ impl Timeline {
            return;
        }

-        self.basebackup_cache
-            .send_prepare(self.tenant_shard_id, self.timeline_id, lsn);
+        let res = self
+            .basebackup_prepare_sender
+            .send(BasebackupPrepareRequest {
+                tenant_shard_id: self.tenant_shard_id,
+                timeline_id: self.timeline_id,
+                lsn,
+            });
+        if let Err(e) = res {
+            // May happen during shutdown, it's not critical.
+            info!("Failed to send shutdown checkpoint: {e:#}");
+        }
    }
 }

@@ -3110,7 +3088,7 @@ impl Timeline {

                wait_lsn_log_slow: tokio::sync::Semaphore::new(1),

-                basebackup_cache: resources.basebackup_cache,
+                basebackup_prepare_sender: resources.basebackup_prepare_sender,

                feature_resolver: resources.feature_resolver,
            };
@@ -4680,16 +4658,6 @@ impl Timeline {
        mut layer_flush_start_rx: tokio::sync::watch::Receiver<(u64, Lsn)>,
        ctx: &RequestContext,
    ) {
-        // Always notify waiters about the flush loop exiting since the loop might stop
-        // when the timeline hasn't been cancelled.
-        let scopeguard_rx = layer_flush_start_rx.clone();
-        scopeguard::defer! {
-            let (flush_counter, _) = *scopeguard_rx.borrow();
-            let _ = self
-                .layer_flush_done_tx
-                .send_replace((flush_counter, Err(FlushLayerError::Cancelled)));
-        }
-
        // Subscribe to L0 delta layer updates, for compaction backpressure.
        let mut watch_l0 = match self
            .layers
@@ -4719,6 +4687,9 @@ impl Timeline {
            let result = loop {
                if self.cancel.is_cancelled() {
                    info!("dropping out of flush loop for timeline shutdown");
+                    // Note: we do not bother transmitting into [`layer_flush_done_tx`], because
+                    // anyone waiting on that will respect self.cancel as well: they will stop
+                    // waiting at the same time we as drop out of this loop.
                    return;
                }

@@ -5590,7 +5561,7 @@ impl Timeline {
                self.should_check_if_image_layers_required(lsn)
            };

-        let mut batch_image_writer = BatchLayerWriter::new(self.conf);
+        let mut batch_image_writer = BatchLayerWriter::new(self.conf).await?;

        let mut all_generated = true;

@@ -5694,8 +5665,7 @@ impl Timeline {
                self.cancel.clone(),
                ctx,
            )
-            .await
-            .map_err(CreateImageLayersError::Other)?;
+            .await?;

            fail_point!("image-layer-writer-fail-before-finish", |_| {
                Err(CreateImageLayersError::Other(anyhow::anyhow!(
@@ -5790,10 +5760,7 @@ impl Timeline {
            }
        }

-        let image_layers = batch_image_writer
-            .finish(self, ctx)
-            .await
-            .map_err(CreateImageLayersError::Other)?;
+        let image_layers = batch_image_writer.finish(self, ctx).await?;

        let mut guard = self.layers.write(LayerManagerLockHolder::Compaction).await;

--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -3531,7 +3531,10 @@ impl Timeline {
            self.get_compaction_target_size(),
            &self.gate,
            self.cancel.clone(),
-        );
+        )
+        .await
+        .context("failed to create delta layer writer")
+        .map_err(CompactionError::Other)?;

        #[derive(Default)]
        struct RewritingLayers {
@@ -4327,8 +4330,7 @@ impl TimelineAdaptor {
            self.timeline.cancel.clone(),
            ctx,
        )
-        .await
-        .map_err(CreateImageLayersError::Other)?;
+        .await?;

        fail_point!("image-layer-writer-fail-before-finish", |_| {
            Err(CreateImageLayersError::Other(anyhow::anyhow!(
@@ -4337,10 +4339,7 @@ impl TimelineAdaptor {
        });

        let keyspace = KeySpace {
-            ranges: self
-                .get_keyspace(key_range, lsn, ctx)
-                .await
-                .map_err(CreateImageLayersError::Other)?,
+            ranges: self.get_keyspace(key_range, lsn, ctx).await?,
        };
        // TODO set proper (stateful) start. The create_image_layer_for_rel_blocks function mostly
        let outcome = self
@@ -4359,13 +4358,9 @@ impl TimelineAdaptor {
            unfinished_image_layer,
        } = outcome
        {
-            let (desc, path) = unfinished_image_layer
-                .finish(ctx)
-                .await
-                .map_err(CreateImageLayersError::Other)?;
+            let (desc, path) = unfinished_image_layer.finish(ctx).await?;
            let image_layer =
-                Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)
-                    .map_err(CreateImageLayersError::Other)?;
+                Layer::finish_creating(self.timeline.conf, &self.timeline, desc, &path)?;
            self.new_images.push(image_layer);
        }

--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -241,17 +241,8 @@ impl DeleteTimelineFlow {
                {
                    Ok(r) => r,
                    Err(DownloadError::NotFound) => {
-                        // Deletion is already complete.
-                        // As we came here, we will need to remove the timeline from the tenant though.
+                        // Deletion is already complete
                        tracing::info!("Timeline already deleted in remote storage");
-                        if let TimelineOrOffloaded::Offloaded(_) = &timeline {
-                            // We only supoprt this for offloaded timelines, as we don't know which state non-offloaded timelines are in.
-                            tracing::info!(
-                                "Timeline with gone index part is offloaded timeline. Removing from tenant."
-                            );
-                            remove_maybe_offloaded_timeline_from_tenant(tenant, &timeline, &guard)
-                                .await?;
-                        }
                        return Ok(());
                    }
                    Err(e) => {
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -885,7 +885,7 @@ async fn remote_copy(
                }
                tracing::info!("Deleting orphan layer file to make way for hard linking");
                // Delete orphan layer file and try again, to ensure this layer has a well understood source
-                std::fs::remove_file(&adoptee_path)
+                std::fs::remove_file(adopted_path)
                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
                std::fs::hard_link(adopted_path, &adoptee_path)
                    .map_err(|e| Error::launder(e.into(), Error::Prepare))?;
--- a/pageserver/src/tenant/timeline/handle.rs
+++ b/pageserver/src/tenant/timeline/handle.rs
@@ -887,7 +887,7 @@ mod tests {
            .expect("we still have it");
    }

-    fn make_relation_key_for_shard(shard: ShardNumber, params: ShardParameters) -> Key {
+    fn make_relation_key_for_shard(shard: ShardNumber, params: &ShardParameters) -> Key {
        rel_block_to_key(
            RelTag {
                spcnode: 1663,
@@ -917,14 +917,14 @@ mod tests {
        let child0 = Arc::new_cyclic(|myself| StubTimeline {
            gate: Default::default(),
            id: timeline_id,
-            shard: ShardIdentity::from_params(ShardNumber(0), child_params),
+            shard: ShardIdentity::from_params(ShardNumber(0), &child_params),
            per_timeline_state: PerTimelineState::default(),
            myself: myself.clone(),
        });
        let child1 = Arc::new_cyclic(|myself| StubTimeline {
            gate: Default::default(),
            id: timeline_id,
-            shard: ShardIdentity::from_params(ShardNumber(1), child_params),
+            shard: ShardIdentity::from_params(ShardNumber(1), &child_params),
            per_timeline_state: PerTimelineState::default(),
            myself: myself.clone(),
        });
@@ -937,7 +937,7 @@ mod tests {
            let handle = cache
                .get(
                    timeline_id,
-                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),
+                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)),
                    &StubManager {
                        shards: vec![parent.clone()],
                    },
@@ -961,7 +961,7 @@ mod tests {
            let handle = cache
                .get(
                    timeline_id,
-                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),
+                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)),
                    &StubManager {
                        shards: vec![], // doesn't matter what's in here, the cache is fully loaded
                    },
@@ -978,7 +978,7 @@ mod tests {
        let parent_handle = cache
            .get(
                timeline_id,
-                ShardSelector::Page(make_relation_key_for_shard(ShardNumber(0), child_params)),
+                ShardSelector::Page(make_relation_key_for_shard(ShardNumber(0), &child_params)),
                &StubManager {
                    shards: vec![parent.clone()],
                },
@@ -995,7 +995,7 @@ mod tests {
            let handle = cache
                .get(
                    timeline_id,
-                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), child_params)),
+                    ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)),
                    &StubManager {
                        shards: vec![child0.clone(), child1.clone()], // <====== this changed compared to previous loop
                    },
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -1295,8 +1295,7 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 		if (iteration_hits != 0)
 		{
-			/* chunk offset (#
-			   of pages) into the LFC file */
+			/* chunk offset (# of pages) into the LFC file */
 			off_t	first_read_offset = (off_t) entry_offset * lfc_blocks_per_chunk;
 			int		nwrite = iov_last_used - first_block_in_chunk_read;
 			/* offset of first IOV */
@@ -1314,6 +1313,16 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				lfc_disable("read");
 				return -1;
 			}
+
+			/*
+			 * We successfully read the pages we know were valid when we
+			 * started reading; now mark those pages as read
+			 */
+			for (int i = first_block_in_chunk_read; i < iov_last_used; i++)
+			{
+				if (BITMAP_ISSET(chunk_mask, i))
+					BITMAP_SET(mask, buf_offset + i);
+			}
 		}

 		/* Place entry to the head of LRU list */
@@ -1331,15 +1340,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			{
 				lfc_ctl->time_read += io_time_us;
 				inc_page_cache_read_wait(io_time_us);
-				/*
-				 * We successfully read the pages we know were valid when we
-				 * started reading; now mark those pages as read
-				 */
-				for (int i = first_block_in_chunk_read; i < iov_last_used; i++)
-				{
-					if (BITMAP_ISSET(chunk_mask, i))
-						BITMAP_SET(mask, buf_offset + i);
-				}
 			}

 			CriticalAssert(entry->access_count > 0);
--- a/postgres.mk
+++ b/postgres.mk
@@ -1,121 +0,0 @@
-# Sub-makefile for compiling PostgreSQL as part of Neon. This is
-# included from the main Makefile, and is not meant to be called
-# directly.
-#
-# CI workflows and Dockerfiles can take advantage of the following
-# properties for caching:
-#
-# - Compiling the targets in this file only builds the PostgreSQL sources
-#   under the vendor/ subdirectory, nothing else from the repository.
-# - All outputs go to POSTGRES_INSTALL_DIR (by default 'pg_install',
-#   see parent Makefile)
-# - intermediate build artifacts go to BUILD_DIR
-#
-#
-# Variables passed from the parent Makefile that control what gets
-# installed and where:
-# - POSTGRES_VERSIONS
-# - POSTGRES_INSTALL_DIR
-# - BUILD_DIR
-#
-# Variables passed from the parent Makefile that affect the build
-# process and the resulting binaries:
-# - PG_CONFIGURE_OPTS
-# - PG_CFLAGS
-# - PG_LDFLAGS
-# - EXTRA_PATH_OVERRIDES
-
-###
-### Main targets
-###
-### These are called from the main Makefile, and can also be called
-### directly from command line
-
-# Compile and install a specific PostgreSQL version
-postgres-install-%: postgres-configure-% \
-		  postgres-headers-install-% # to prevent `make install` conflicts with neon's `postgres-headers`
-
-# Install the PostgreSQL header files into $(POSTGRES_INSTALL_DIR)/<version>/include
-#
-# This is implicitly part of the 'postgres-install-%' target, but this can be handy
-# if you want to install just the headers without building PostgreSQL, e.g. for building
-# extensions.
-postgres-headers-install-%: postgres-configure-%
-	+@echo "Installing PostgreSQL $* headers"
-	$(MAKE) -C $(BUILD_DIR)/$*/src/include MAKELEVEL=0 install
-
-# Run Postgres regression tests
-postgres-check-%: postgres-install-%
-	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 check
-
-###
-### Shorthands for the main targets, for convenience
-###
-
-# Same as the above main targets, but for all supported PostgreSQL versions
-# For example, 'make postgres-install' is equivalent to
-# 'make postgres-install-v14 postgres-install-v15 postgres-install-v16 postgres-install-v17'
-all_version_targets=postgres-install postgres-headers-install postgres-check
-.PHONY: $(all_version_targets)
-$(all_version_targets): postgres-%: $(foreach pg_version,$(POSTGRES_VERSIONS),postgres-%-$(pg_version))
-
-.PHONY: postgres
-postgres: postgres-install
-
-.PHONY: postgres-headers
-postgres-headers: postgres-headers-install
-
-# 'postgres-v17' is an alias for 'postgres-install-v17' etc.
-$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-$(pg_version)): postgres-%: postgres-install-%
-
-###
-### Intermediate targets
-###
-### These are not intended to be called directly, but are dependencies for the
-### main targets.
-
-# Run 'configure'
-$(BUILD_DIR)/%/config.status:
-	mkdir -p $(BUILD_DIR)
-	test -e $(BUILD_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(BUILD_DIR)/CACHEDIR.TAG
-
-	+@echo "Configuring Postgres $* build"
-	@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
-		echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
-		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
-		exit 1; }
-	mkdir -p $(BUILD_DIR)/$*
-
-	VERSION=$*; \
-	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
-	(cd $(BUILD_DIR)/$$VERSION && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
-		CFLAGS='$(PG_CFLAGS)' LDFLAGS='$(PG_LDFLAGS)' \
-		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
-
-# nicer alias to run 'configure'.
-#
-# This tries to accomplish this rule:
-#
-# postgres-configure-%: $(BUILD_DIR)/%/config.status
-#
-# XXX: I'm not sure why the above rule doesn't work directly. But this accomplishses
-# the same thing
-$(foreach pg_version,$(POSTGRES_VERSIONS),postgres-configure-$(pg_version)): postgres-configure-%: FORCE $(BUILD_DIR)/%/config.status
-
-# Compile and install PostgreSQL (and a few contrib modules used in tests)
-postgres-install-%: postgres-configure-% \
-		  postgres-headers-install-% # to prevent `make install` conflicts with neon's `postgres-headers-install`
-	+@echo "Compiling PostgreSQL $*"
-	$(MAKE) -C $(BUILD_DIR)/$* MAKELEVEL=0 install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_prewarm install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_buffercache install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_visibility install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pageinspect install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/pg_trgm install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/amcheck install
-	$(MAKE) -C $(BUILD_DIR)/$*/contrib/test_decoding install
-
-.PHONY: FORCE
-FORCE:
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -279,6 +279,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
        },
        proxy_protocol_v2: config::ProxyProtocolV2::Rejected,
        handshake_timeout: Duration::from_secs(10),
+        region: "local".into(),
        wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,
        connect_compute_locks,
        connect_to_compute: compute_config,
--- a/proxy/src/binary/pg_sni_router.rs
+++ b/proxy/src/binary/pg_sni_router.rs
@@ -26,10 +26,9 @@ use utils::sentry_init::init_sentry;

 use crate::context::RequestContext;
 use crate::metrics::{Metrics, ThreadPoolMetrics};
-use crate::pglb::TlsRequired;
 use crate::pqproto::FeStartupPacket;
 use crate::protocol2::ConnectionInfo;
-use crate::proxy::{ErrorSource, copy_bidirectional_client_compute};
+use crate::proxy::{ErrorSource, TlsRequired, copy_bidirectional_client_compute};
 use crate::stream::{PqStream, Stream};
 use crate::util::run_until_cancelled;

@@ -237,6 +236,7 @@ pub(super) async fn task_main(
                        extra: None,
                    },
                    crate::metrics::Protocol::SniRouter,
+                    "sni",
                );
                handle_client(ctx, dest_suffix, tls_config, compute_tls_config, socket).await
            }
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -123,6 +123,12 @@ struct ProxyCliArgs {
    /// timeout for the TLS handshake
    #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
    handshake_timeout: tokio::time::Duration,
+    /// http endpoint to receive periodic metric updates
+    #[clap(long)]
+    metric_collection_endpoint: Option<String>,
+    /// how often metrics should be sent to a collection endpoint
+    #[clap(long)]
+    metric_collection_interval: Option<String>,
    /// cache for `wake_compute` api method (use `size=0` to disable)
    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
    wake_compute_cache: String,
@@ -149,31 +155,40 @@ struct ProxyCliArgs {
    /// Wake compute rate limiter max number of requests per second.
    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
    wake_compute_limit: Vec<RateBucketInfo>,
+    /// Redis rate limiter max number of requests per second.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_REDIS_SET)]
+    redis_rps_limit: Vec<RateBucketInfo>,
    /// Cancellation channel size (max queue size for redis kv client)
    #[clap(long, default_value_t = 1024)]
    cancellation_ch_size: usize,
    /// Cancellation ops batch size for redis
    #[clap(long, default_value_t = 8)]
    cancellation_batch_size: usize,
-    /// redis url for plain authentication
-    #[clap(long, alias("redis-notifications"))]
-    redis_plain: Option<String>,
-    /// what from the available authentications type to use for redis. Supported are "irsa" and "plain".
+    /// cache for `allowed_ips` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    allowed_ips_cache: String,
+    /// cache for `role_secret` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    role_secret_cache: String,
+    /// redis url for notifications (if empty, redis_host:port will be used for both notifications and streaming connections)
+    #[clap(long)]
+    redis_notifications: Option<String>,
+    /// what from the available authentications type to use for the regional redis we have. Supported are "irsa" and "plain".
    #[clap(long, default_value = "irsa")]
    redis_auth_type: String,
-    /// redis host for irsa authentication
+    /// redis host for streaming connections (might be different from the notifications host)
    #[clap(long)]
    redis_host: Option<String>,
-    /// redis port for irsa authentication
+    /// redis port for streaming connections (might be different from the notifications host)
    #[clap(long)]
    redis_port: Option<u16>,
-    /// redis cluster name for irsa authentication
+    /// redis cluster name, used in aws elasticache
    #[clap(long)]
    redis_cluster_name: Option<String>,
-    /// redis user_id for irsa authentication
+    /// redis user_id, used in aws elasticache
    #[clap(long)]
    redis_user_id: Option<String>,
-    /// aws region for irsa authentication
+    /// aws region to retrieve credentials
    #[clap(long, default_value_t = String::new())]
    aws_region: String,
    /// cache for `project_info` (use `size=0` to disable)
@@ -185,12 +200,6 @@ struct ProxyCliArgs {
    #[clap(flatten)]
    parquet_upload: ParquetUploadArgs,

-    /// http endpoint to receive periodic metric updates
-    #[clap(long)]
-    metric_collection_endpoint: Option<String>,
-    /// how often metrics should be sent to a collection endpoint
-    #[clap(long)]
-    metric_collection_interval: Option<String>,
    /// interval for backup metric collection
    #[clap(long, default_value = "10m", value_parser = humantime::parse_duration)]
    metric_backup_collection_interval: std::time::Duration,
@@ -203,7 +212,6 @@ struct ProxyCliArgs {
    /// Size of each event is no more than 400 bytes, so 2**22 is about 200MB before the compression.
    #[clap(long, default_value = "4194304")]
    metric_backup_collection_chunk_size: usize,
-
    /// Whether to retry the connection to the compute node
    #[clap(long, default_value = config::RetryConfig::CONNECT_TO_COMPUTE_DEFAULT_VALUES)]
    connect_to_compute_retry: String,
@@ -323,7 +331,7 @@ pub async fn run() -> anyhow::Result<()> {
        Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
    }
    info!("Using region: {}", args.aws_region);
-    let redis_client = configure_redis(&args).await?;
+    let (regional_redis_client, redis_notifications_client) = configure_redis(&args).await?;

    // Check that we can bind to address before further initialization
    info!("Starting http on {}", args.http);
@@ -378,6 +386,13 @@ pub async fn run() -> anyhow::Result<()> {

    let cancellation_token = CancellationToken::new();

+    let redis_rps_limit = Vec::leak(args.redis_rps_limit.clone());
+    RateBucketInfo::validate(redis_rps_limit)?;
+
+    let redis_kv_client = regional_redis_client
+        .as_ref()
+        .map(|redis_publisher| RedisKVClient::new(redis_publisher.clone(), redis_rps_limit));
+
    let cancellation_handler = Arc::new(CancellationHandler::new(&config.connect_to_compute));

    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new_with_shards(
@@ -392,7 +407,7 @@ pub async fn run() -> anyhow::Result<()> {
    match auth_backend {
        Either::Left(auth_backend) => {
            if let Some(proxy_listener) = proxy_listener {
-                client_tasks.spawn(crate::pglb::task_main(
+                client_tasks.spawn(crate::proxy::task_main(
                    config,
                    auth_backend,
                    proxy_listener,
@@ -457,7 +472,6 @@ pub async fn run() -> anyhow::Result<()> {
    client_tasks.spawn(crate::context::parquet::worker(
        cancellation_token.clone(),
        args.parquet_upload,
-        args.region,
    ));

    // maintenance tasks. these never return unless there's an error
@@ -481,17 +495,32 @@ pub async fn run() -> anyhow::Result<()> {
    #[cfg_attr(not(any(test, feature = "testing")), expect(irrefutable_let_patterns))]
    if let Either::Left(auth::Backend::ControlPlane(api, ())) = &auth_backend {
        if let crate::control_plane::client::ControlPlaneClient::ProxyV1(api) = &**api {
-            if let Some(client) = redis_client {
-                // project info cache and invalidation of that cache.
-                let cache = api.caches.project_info.clone();
-                maintenance_tasks.spawn(notifications::task_main(client.clone(), cache.clone()));
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
+            match (redis_notifications_client, regional_redis_client.clone()) {
+                (None, None) => {}
+                (client1, client2) => {
+                    let cache = api.caches.project_info.clone();
+                    if let Some(client) = client1 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    if let Some(client) = client2 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
+                }
+            }

-                // Try to connect to Redis 3 times with 1 + (0..0.1) second interval.
-                // This prevents immediate exit and pod restart,
-                // which can cause hammering of the redis in case of connection issues.
-                // cancellation key management
-                let mut redis_kv_client = RedisKVClient::new(client.clone());
+            // Try to connect to Redis 3 times with 1 + (0..0.1) second interval.
+            // This prevents immediate exit and pod restart,
+            // which can cause hammering of the redis in case of connection issues.
+            if let Some(mut redis_kv_client) = redis_kv_client {
                for attempt in (0..3).with_position() {
                    match redis_kv_client.try_connect().await {
                        Ok(()) => {
@@ -516,12 +545,14 @@ pub async fn run() -> anyhow::Result<()> {
                        }
                    }
                }
+            }

-                // listen for notifications of new projects/endpoints/branches
+            if let Some(regional_redis_client) = regional_redis_client {
                let cache = api.caches.endpoints_cache.clone();
+                let con = regional_redis_client;
                let span = tracing::info_span!("endpoints_cache");
                maintenance_tasks.spawn(
-                    async move { cache.do_read(client, cancellation_token.clone()).await }
+                    async move { cache.do_read(con, cancellation_token.clone()).await }
                        .instrument(span),
                );
            }
@@ -650,6 +681,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        authentication_config,
        proxy_protocol_v2: args.proxy_protocol_v2,
        handshake_timeout: args.handshake_timeout,
+        region: args.region.clone(),
        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
        connect_compute_locks,
        connect_to_compute: compute_config,
@@ -811,18 +843,21 @@ fn build_auth_backend(

 async fn configure_redis(
    args: &ProxyCliArgs,
-) -> anyhow::Result<Option<ConnectionWithCredentialsProvider>> {
+) -> anyhow::Result<(
+    Option<ConnectionWithCredentialsProvider>,
+    Option<ConnectionWithCredentialsProvider>,
+)> {
    // TODO: untangle the config args
-    let redis_client = match &*args.redis_auth_type {
-        "plain" => match &args.redis_plain {
+    let regional_redis_client = match (args.redis_auth_type.as_str(), &args.redis_notifications) {
+        ("plain", redis_url) => match redis_url {
            None => {
-                bail!("plain auth requires redis_plain to be set");
+                bail!("plain auth requires redis_notifications to be set");
            }
            Some(url) => {
                Some(ConnectionWithCredentialsProvider::new_with_static_credentials(url.clone()))
            }
        },
-        "irsa" => match (&args.redis_host, args.redis_port) {
+        ("irsa", _) => match (&args.redis_host, args.redis_port) {
            (Some(host), Some(port)) => Some(
                ConnectionWithCredentialsProvider::new_with_credentials_provider(
                    host.clone(),
@@ -846,12 +881,18 @@ async fn configure_redis(
                bail!("redis-host and redis-port must be specified together");
            }
        },
-        auth_type => {
-            bail!("unknown auth type {auth_type:?} given")
+        _ => {
+            bail!("unknown auth type given");
        }
    };

-    Ok(redis_client)
+    let redis_notifications_client = if let Some(url) = &args.redis_notifications {
+        Some(ConnectionWithCredentialsProvider::new_with_static_credentials(&**url))
+    } else {
+        regional_redis_client.clone()
+    };
+
+    Ok((regional_redis_client, redis_notifications_client))
 }

 #[cfg(test)]
--- a/proxy/src/cache/timed_lru.rs
+++ b/proxy/src/cache/timed_lru.rs
@@ -30,7 +30,7 @@ use super::{Cache, timed_lru};
 ///
 /// * There's an API for immediate invalidation (removal) of a cache entry;
 ///   It's useful in case we know for sure that the entry is no longer correct.
-///   See [`timed_lru::Cached`] for more information.
+///   See [`timed_lru::LookupInfo`] & [`timed_lru::Cached`] for more information.
 ///
 /// * Expired entries are kept in the cache, until they are evicted by the LRU policy,
 ///   or by a successful lookup (i.e. the entry hasn't expired yet).
@@ -54,7 +54,7 @@ pub(crate) struct TimedLru<K, V> {
 impl<K: Hash + Eq, V> Cache for TimedLru<K, V> {
    type Key = K;
    type Value = V;
-    type LookupInfo<Key> = Key;
+    type LookupInfo<Key> = LookupInfo<Key>;

    fn invalidate(&self, info: &Self::LookupInfo<K>) {
        self.invalidate_raw(info);
@@ -87,24 +87,30 @@ impl<K: Hash + Eq, V> TimedLru<K, V> {

    /// Drop an entry from the cache if it's outdated.
    #[tracing::instrument(level = "debug", fields(cache = self.name), skip_all)]
-    fn invalidate_raw(&self, key: &K) {
+    fn invalidate_raw(&self, info: &LookupInfo<K>) {
+        let now = Instant::now();
+
        // Do costly things before taking the lock.
        let mut cache = self.cache.lock();
-        let entry = match cache.raw_entry_mut().from_key(key) {
+        let raw_entry = match cache.raw_entry_mut().from_key(&info.key) {
            RawEntryMut::Vacant(_) => return,
-            RawEntryMut::Occupied(x) => x.remove(),
+            RawEntryMut::Occupied(x) => x,
        };
+
+        // Remove the entry if it was created prior to lookup timestamp.
+        let entry = raw_entry.get();
+        let (created_at, expires_at) = (entry.created_at, entry.expires_at);
+        let should_remove = created_at <= info.created_at || expires_at <= now;
+
+        if should_remove {
+            raw_entry.remove();
+        }
+
        drop(cache); // drop lock before logging
-
-        let Entry {
-            created_at,
-            expires_at,
-            ..
-        } = entry;
-
        debug!(
-            ?created_at,
-            ?expires_at,
+            created_at = format_args!("{created_at:?}"),
+            expires_at = format_args!("{expires_at:?}"),
+            entry_removed = should_remove,
            "processed a cache entry invalidation event"
        );
    }
@@ -205,10 +211,10 @@ impl<K: Hash + Eq + Clone, V: Clone> TimedLru<K, V> {
    }

    pub(crate) fn insert_unit(&self, key: K, value: V) -> (Option<V>, Cached<&Self, ()>) {
-        let (_, old) = self.insert_raw(key.clone(), value);
+        let (created_at, old) = self.insert_raw(key.clone(), value);

        let cached = Cached {
-            token: Some((self, key)),
+            token: Some((self, LookupInfo { created_at, key })),
            value: (),
        };

@@ -223,9 +229,28 @@ impl<K: Hash + Eq, V: Clone> TimedLru<K, V> {
        K: Borrow<Q> + Clone,
        Q: Hash + Eq + ?Sized,
    {
-        self.get_raw(key, |key, entry| Cached {
-            token: Some((self, key.clone())),
-            value: entry.value.clone(),
+        self.get_raw(key, |key, entry| {
+            let info = LookupInfo {
+                created_at: entry.created_at,
+                key: key.clone(),
+            };
+
+            Cached {
+                token: Some((self, info)),
+                value: entry.value.clone(),
+            }
        })
    }
 }
+
+/// Lookup information for key invalidation.
+pub(crate) struct LookupInfo<K> {
+    /// Time of creation of a cache [`Entry`].
+    /// We use this during invalidation lookups to prevent eviction of a newer
+    /// entry sharing the same key (it might've been inserted by a different
+    /// task after we got the entry we're trying to invalidate now).
+    created_at: Instant,
+
+    /// Search by this key.
+    key: K,
+}
--- a/proxy/src/cancellation.rs
+++ b/proxy/src/cancellation.rs
@@ -64,13 +64,6 @@ impl Pipeline {
        let responses = self.replies;
        let batch_size = self.inner.len();

-        if !client.credentials_refreshed() {
-            tracing::debug!(
-                "Redis credentials are not refreshed. Sleeping for 5 seconds before retrying..."
-            );
-            tokio::time::sleep(Duration::from_secs(5)).await;
-        }
-
        match client.query(&self.inner).await {
            // for each reply, we expect that many values.
            Ok(Value::Array(values)) if values.len() == responses => {
@@ -134,14 +127,6 @@ impl QueueProcessing for CancellationProcessor {
    }

    async fn apply(&mut self, batch: Vec<Self::Req>) -> Vec<Self::Res> {
-        if !self.client.credentials_refreshed() {
-            // this will cause a timeout for cancellation operations
-            tracing::debug!(
-                "Redis credentials are not refreshed. Sleeping for 5 seconds before retrying..."
-            );
-            tokio::time::sleep(Duration::from_secs(5)).await;
-        }
-
        let mut pipeline = Pipeline::with_capacity(batch.len());

        let batch_size = batch.len();
--- a/proxy/src/compute/mod.rs
+++ b/proxy/src/compute/mod.rs
@@ -236,7 +236,7 @@ impl AuthInfo {
        &self,
        ctx: &RequestContext,
        compute: &mut ComputeConnection,
-        user_info: &ComputeUserInfo,
+        user_info: ComputeUserInfo,
    ) -> Result<PostgresSettings, PostgresError> {
        // client config with stubbed connect info.
        // TODO(conrad): should we rewrite this to bypass tokio-postgres2 entirely,
@@ -272,7 +272,7 @@ impl AuthInfo {
                secret_key,
            },
            compute.hostname.to_string(),
-            user_info.clone(),
+            user_info,
        );

        Ok(PostgresSettings {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -22,6 +22,7 @@ pub struct ProxyConfig {
    pub http_config: HttpConfig,
    pub authentication_config: AuthenticationConfig,
    pub proxy_protocol_v2: ProxyProtocolV2,
+    pub region: String,
    pub handshake_timeout: Duration,
    pub wake_compute_retry_config: RetryConfig,
    pub connect_compute_locks: ApiLocks<Host>,
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -11,12 +11,11 @@ use crate::config::{ProxyConfig, ProxyProtocolV2};
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::{Metrics, NumClientConnectionsGuard};
-use crate::pglb::ClientRequestError;
 use crate::pglb::handshake::{HandshakeData, handshake};
 use crate::pglb::passthrough::ProxyPassthrough;
 use crate::protocol2::{ConnectHeader, ConnectionInfo, read_proxy_protocol};
 use crate::proxy::connect_compute::{TcpMechanism, connect_to_compute};
-use crate::proxy::{ErrorSource, finish_client_init};
+use crate::proxy::{ClientRequestError, ErrorSource, prepare_client_connection};
 use crate::util::run_until_cancelled;

 pub async fn task_main(
@@ -90,7 +89,12 @@ pub async fn task_main(
                }
            }

-            let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Tcp);
+            let ctx = RequestContext::new(
+                session_id,
+                conn_info,
+                crate::metrics::Protocol::Tcp,
+                &config.region,
+            );

            let res = handle_client(
                config,
@@ -227,13 +231,13 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    .await?;

    let pg_settings = auth_info
-        .authenticate(ctx, &mut node, &user_info)
+        .authenticate(ctx, &mut node, user_info)
        .or_else(|e| async { Err(stream.throw_error(e, Some(ctx)).await) })
        .await?;

    let session = cancellation_handler.get_key();

-    finish_client_init(&pg_settings, *session.key(), &mut stream);
+    prepare_client_connection(&pg_settings, *session.key(), &mut stream);
    let stream = stream.flush_and_into_inner().await?;

    let session_id = ctx.session_id();
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -46,6 +46,7 @@ struct RequestContextInner {
    pub(crate) session_id: Uuid,
    pub(crate) protocol: Protocol,
    first_packet: chrono::DateTime<Utc>,
+    region: &'static str,
    pub(crate) span: Span,

    // filled in as they are discovered
@@ -93,6 +94,7 @@ impl Clone for RequestContext {
            session_id: inner.session_id,
            protocol: inner.protocol,
            first_packet: inner.first_packet,
+            region: inner.region,
            span: info_span!("background_task"),

            project: inner.project,
@@ -122,7 +124,12 @@ impl Clone for RequestContext {
 }

 impl RequestContext {
-    pub fn new(session_id: Uuid, conn_info: ConnectionInfo, protocol: Protocol) -> Self {
+    pub fn new(
+        session_id: Uuid,
+        conn_info: ConnectionInfo,
+        protocol: Protocol,
+        region: &'static str,
+    ) -> Self {
        // TODO: be careful with long lived spans
        let span = info_span!(
            "connect_request",
@@ -138,6 +145,7 @@ impl RequestContext {
            session_id,
            protocol,
            first_packet: Utc::now(),
+            region,
            span,

            project: None,
@@ -171,7 +179,7 @@ impl RequestContext {
        let ip = IpAddr::from([127, 0, 0, 1]);
        let addr = SocketAddr::new(ip, 5432);
        let conn_info = ConnectionInfo { addr, extra: None };
-        RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp)
+        RequestContext::new(Uuid::now_v7(), conn_info, Protocol::Tcp, "test")
    }

    pub(crate) fn console_application_name(&self) -> String {
--- a/proxy/src/context/parquet.rs
+++ b/proxy/src/context/parquet.rs
@@ -74,7 +74,7 @@ pub(crate) const FAILED_UPLOAD_MAX_RETRIES: u32 = 10;

 #[derive(parquet_derive::ParquetRecordWriter)]
 pub(crate) struct RequestData {
-    region: String,
+    region: &'static str,
    protocol: &'static str,
    /// Must be UTC. The derive macro doesn't like the timezones
    timestamp: chrono::NaiveDateTime,
@@ -147,7 +147,7 @@ impl From<&RequestContextInner> for RequestData {
            }),
            jwt_issuer: value.jwt_issuer.clone(),
            protocol: value.protocol.as_str(),
-            region: String::new(),
+            region: value.region,
            error: value.error_kind.as_ref().map(|e| e.to_metric_label()),
            success: value.success,
            cold_start_info: value.cold_start_info.as_str(),
@@ -167,7 +167,6 @@ impl From<&RequestContextInner> for RequestData {
 pub async fn worker(
    cancellation_token: CancellationToken,
    config: ParquetUploadArgs,
-    region: String,
 ) -> anyhow::Result<()> {
    let Some(remote_storage_config) = config.parquet_upload_remote_storage else {
        tracing::warn!("parquet request upload: no s3 bucket configured");
@@ -233,17 +232,12 @@ pub async fn worker(
                .context("remote storage for disconnect events init")?;
        let parquet_config_disconnect = parquet_config.clone();
        tokio::try_join!(
-            worker_inner(storage, rx, parquet_config, &region),
-            worker_inner(
-                storage_disconnect,
-                rx_disconnect,
-                parquet_config_disconnect,
-                &region
-            )
+            worker_inner(storage, rx, parquet_config),
+            worker_inner(storage_disconnect, rx_disconnect, parquet_config_disconnect)
        )
        .map(|_| ())
    } else {
-        worker_inner(storage, rx, parquet_config, &region).await
+        worker_inner(storage, rx, parquet_config).await
    }
 }

@@ -263,7 +257,6 @@ async fn worker_inner(
    storage: GenericRemoteStorage,
    rx: impl Stream<Item = RequestData>,
    config: ParquetConfig,
-    region: &str,
 ) -> anyhow::Result<()> {
    #[cfg(any(test, feature = "testing"))]
    let storage = if config.test_remote_failures > 0 {
@@ -284,8 +277,7 @@ async fn worker_inner(
    let mut last_upload = time::Instant::now();

    let mut len = 0;
-    while let Some(mut row) = rx.next().await {
-        region.clone_into(&mut row.region);
+    while let Some(row) = rx.next().await {
        rows.push(row);
        let force = last_upload.elapsed() > config.max_duration;
        if rows.len() == config.rows_per_group || force {
@@ -541,7 +533,7 @@ mod tests {
            auth_method: None,
            jwt_issuer: None,
            protocol: ["tcp", "ws", "http"][rng.gen_range(0..3)],
-            region: String::new(),
+            region: "us-east-1",
            error: None,
            success: rng.r#gen(),
            cold_start_info: "no",
@@ -573,9 +565,7 @@ mod tests {
            .await
            .unwrap();

-        worker_inner(storage, rx, config, "us-east-1")
-            .await
-            .unwrap();
+        worker_inner(storage, rx, config).await.unwrap();

        let mut files = WalkDir::new(tmpdir.as_std_path())
            .into_iter()
--- a/proxy/src/pglb/handshake.rs
+++ b/proxy/src/pglb/handshake.rs
@@ -8,10 +8,10 @@ use crate::config::TlsConfig;
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
-use crate::pglb::TlsRequired;
 use crate::pqproto::{
    BeMessage, CancelKeyData, FeStartupPacket, ProtocolVersion, StartupMessageParams,
 };
+use crate::proxy::TlsRequired;
 use crate::stream::{PqStream, Stream, StreamUpgradeError};
 use crate::tls::PG_ALPN_PROTOCOL;

--- a/proxy/src/pglb/mod.rs
+++ b/proxy/src/pglb/mod.rs
@@ -2,332 +2,3 @@ pub mod copy_bidirectional;
 pub mod handshake;
 pub mod inprocess;
 pub mod passthrough;
-
-use std::sync::Arc;
-
-use futures::FutureExt;
-use smol_str::ToSmolStr;
-use thiserror::Error;
-use tokio::io::{AsyncRead, AsyncWrite};
-use tokio_util::sync::CancellationToken;
-use tracing::{Instrument, debug, error, info, warn};
-
-use crate::auth;
-use crate::cancellation::{self, CancellationHandler};
-use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
-use crate::context::RequestContext;
-use crate::error::{ReportableError, UserFacingError};
-use crate::metrics::{Metrics, NumClientConnectionsGuard};
-pub use crate::pglb::copy_bidirectional::ErrorSource;
-use crate::pglb::handshake::{HandshakeData, HandshakeError, handshake};
-use crate::pglb::passthrough::ProxyPassthrough;
-use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
-use crate::proxy::handle_client;
-use crate::rate_limiter::EndpointRateLimiter;
-use crate::stream::Stream;
-use crate::util::run_until_cancelled;
-
-pub const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
-
-#[derive(Error, Debug)]
-#[error("{ERR_INSECURE_CONNECTION}")]
-pub struct TlsRequired;
-
-impl ReportableError for TlsRequired {
-    fn get_error_kind(&self) -> crate::error::ErrorKind {
-        crate::error::ErrorKind::User
-    }
-}
-
-impl UserFacingError for TlsRequired {}
-
-pub async fn task_main(
-    config: &'static ProxyConfig,
-    auth_backend: &'static auth::Backend<'static, ()>,
-    listener: tokio::net::TcpListener,
-    cancellation_token: CancellationToken,
-    cancellation_handler: Arc<CancellationHandler>,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> anyhow::Result<()> {
-    scopeguard::defer! {
-        info!("proxy has shut down");
-    }
-
-    // When set for the server socket, the keepalive setting
-    // will be inherited by all accepted client sockets.
-    socket2::SockRef::from(&listener).set_keepalive(true)?;
-
-    let connections = tokio_util::task::task_tracker::TaskTracker::new();
-    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
-
-    while let Some(accept_result) =
-        run_until_cancelled(listener.accept(), &cancellation_token).await
-    {
-        let (socket, peer_addr) = accept_result?;
-
-        let conn_gauge = Metrics::get()
-            .proxy
-            .client_connections
-            .guard(crate::metrics::Protocol::Tcp);
-
-        let session_id = uuid::Uuid::new_v4();
-        let cancellation_handler = Arc::clone(&cancellation_handler);
-        let cancellations = cancellations.clone();
-
-        debug!(protocol = "tcp", %session_id, "accepted new TCP connection");
-        let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();
-
-        connections.spawn(async move {
-            let (socket, conn_info) = match config.proxy_protocol_v2 {
-                ProxyProtocolV2::Required => {
-                    match read_proxy_protocol(socket).await {
-                        Err(e) => {
-                            warn!("per-client task finished with an error: {e:#}");
-                            return;
-                        }
-                        // our load balancers will not send any more data. let's just exit immediately
-                        Ok((_socket, ConnectHeader::Local)) => {
-                            debug!("healthcheck received");
-                            return;
-                        }
-                        Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),
-                    }
-                }
-                // ignore the header - it cannot be confused for a postgres or http connection so will
-                // error later.
-                ProxyProtocolV2::Rejected => (
-                    socket,
-                    ConnectionInfo {
-                        addr: peer_addr,
-                        extra: None,
-                    },
-                ),
-            };
-
-            match socket.set_nodelay(true) {
-                Ok(()) => {}
-                Err(e) => {
-                    error!(
-                        "per-client task finished with an error: failed to set socket option: {e:#}"
-                    );
-                    return;
-                }
-            }
-
-            let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Tcp);
-
-            let res = handle_connection(
-                config,
-                auth_backend,
-                &ctx,
-                cancellation_handler,
-                socket,
-                ClientMode::Tcp,
-                endpoint_rate_limiter2,
-                conn_gauge,
-                cancellations,
-            )
-            .instrument(ctx.span())
-            .boxed()
-            .await;
-
-            match res {
-                Err(e) => {
-                    ctx.set_error_kind(e.get_error_kind());
-                    warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
-                }
-                Ok(None) => {
-                    ctx.set_success();
-                }
-                Ok(Some(p)) => {
-                    ctx.set_success();
-                    let _disconnect = ctx.log_connect();
-                    match p.proxy_pass().await {
-                        Ok(()) => {}
-                        Err(ErrorSource::Client(e)) => {
-                            warn!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the client: {e:#}"
-                            );
-                        }
-                        Err(ErrorSource::Compute(e)) => {
-                            error!(
-                                ?session_id,
-                                "per-client task finished with an IO error from the compute: {e:#}"
-                            );
-                        }
-                    }
-                }
-            }
-        });
-    }
-
-    connections.close();
-    cancellations.close();
-    drop(listener);
-
-    // Drain connections
-    connections.wait().await;
-    cancellations.wait().await;
-
-    Ok(())
-}
-
-pub(crate) enum ClientMode {
-    Tcp,
-    Websockets { hostname: Option<String> },
-}
-
-/// Abstracts the logic of handling TCP vs WS clients
-impl ClientMode {
-    pub fn allow_cleartext(&self) -> bool {
-        match self {
-            ClientMode::Tcp => false,
-            ClientMode::Websockets { .. } => true,
-        }
-    }
-
-    pub fn hostname<'a, S>(&'a self, s: &'a Stream<S>) -> Option<&'a str> {
-        match self {
-            ClientMode::Tcp => s.sni_hostname(),
-            ClientMode::Websockets { hostname } => hostname.as_deref(),
-        }
-    }
-
-    pub fn handshake_tls<'a>(&self, tls: Option<&'a TlsConfig>) -> Option<&'a TlsConfig> {
-        match self {
-            ClientMode::Tcp => tls,
-            // TLS is None here if using websockets, because the connection is already encrypted.
-            ClientMode::Websockets { .. } => None,
-        }
-    }
-}
-
-#[derive(Debug, Error)]
-// almost all errors should be reported to the user, but there's a few cases where we cannot
-// 1. Cancellation: we are not allowed to tell the client any cancellation statuses for security reasons
-// 2. Handshake: handshake reports errors if it can, otherwise if the handshake fails due to protocol violation,
-//    we cannot be sure the client even understands our error message
-// 3. PrepareClient: The client disconnected, so we can't tell them anyway...
-pub(crate) enum ClientRequestError {
-    #[error("{0}")]
-    Cancellation(#[from] cancellation::CancelError),
-    #[error("{0}")]
-    Handshake(#[from] HandshakeError),
-    #[error("{0}")]
-    HandshakeTimeout(#[from] tokio::time::error::Elapsed),
-    #[error("{0}")]
-    PrepareClient(#[from] std::io::Error),
-    #[error("{0}")]
-    ReportedError(#[from] crate::stream::ReportedError),
-}
-
-impl ReportableError for ClientRequestError {
-    fn get_error_kind(&self) -> crate::error::ErrorKind {
-        match self {
-            ClientRequestError::Cancellation(e) => e.get_error_kind(),
-            ClientRequestError::Handshake(e) => e.get_error_kind(),
-            ClientRequestError::HandshakeTimeout(_) => crate::error::ErrorKind::RateLimit,
-            ClientRequestError::ReportedError(e) => e.get_error_kind(),
-            ClientRequestError::PrepareClient(_) => crate::error::ErrorKind::ClientDisconnect,
-        }
-    }
-}
-
-#[allow(clippy::too_many_arguments)]
-pub(crate) async fn handle_connection<S: AsyncRead + AsyncWrite + Unpin + Send>(
-    config: &'static ProxyConfig,
-    auth_backend: &'static auth::Backend<'static, ()>,
-    ctx: &RequestContext,
-    cancellation_handler: Arc<CancellationHandler>,
-    client: S,
-    mode: ClientMode,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    conn_gauge: NumClientConnectionsGuard<'static>,
-    cancellations: tokio_util::task::task_tracker::TaskTracker,
-) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
-    debug!(
-        protocol = %ctx.protocol(),
-        "handling interactive connection from client"
-    );
-
-    let metrics = &Metrics::get().proxy;
-    let proto = ctx.protocol();
-    let request_gauge = metrics.connection_requests.guard(proto);
-
-    let tls = config.tls_config.load();
-    let tls = tls.as_deref();
-
-    let record_handshake_error = !ctx.has_private_peer_addr();
-    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-    let do_handshake = handshake(ctx, client, mode.handshake_tls(tls), record_handshake_error);
-
-    let (mut client, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
-        .await??
-    {
-        HandshakeData::Startup(client, params) => (client, params),
-        HandshakeData::Cancel(cancel_key_data) => {
-            // spawn a task to cancel the session, but don't wait for it
-            cancellations.spawn({
-                let cancellation_handler_clone = Arc::clone(&cancellation_handler);
-                let ctx = ctx.clone();
-                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
-                cancel_span.follows_from(tracing::Span::current());
-                async move {
-                    cancellation_handler_clone
-                        .cancel_session(
-                            cancel_key_data,
-                            ctx,
-                            config.authentication_config.ip_allowlist_check_enabled,
-                            config.authentication_config.is_vpc_acccess_proxy,
-                            auth_backend.get_api(),
-                        )
-                        .await
-                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
-                }.instrument(cancel_span)
-            });
-
-            return Ok(None);
-        }
-    };
-    drop(pause);
-
-    ctx.set_db_options(params.clone());
-
-    let common_names = tls.map(|tls| &tls.common_names);
-
-    let (node, cancel_on_shutdown) = handle_client(
-        config,
-        auth_backend,
-        ctx,
-        cancellation_handler,
-        &mut client,
-        &mode,
-        endpoint_rate_limiter,
-        common_names,
-        &params,
-    )
-    .await?;
-
-    let client = client.flush_and_into_inner().await?;
-
-    let private_link_id = match ctx.extra() {
-        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
-        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
-        None => None,
-    };
-
-    Ok(Some(ProxyPassthrough {
-        client,
-        compute: node.stream,
-
-        aux: node.aux,
-        private_link_id,
-
-        _cancel_on_shutdown: cancel_on_shutdown,
-
-        _req: request_gauge,
-        _conn: conn_gauge,
-        _db_conn: node.guage,
-    }))
-}
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -112,7 +112,7 @@ where
    let node_info = if !node_info.cached() || !err.should_retry_wake_compute() {
        // If we just recieved this from cplane and didn't get it from cache, we shouldn't retry.
        // Do not need to retrieve a new node_info, just return the old one.
-        if !should_retry(&err, num_retries, compute.retry) {
+        if should_retry(&err, num_retries, compute.retry) {
            Metrics::get().proxy.retries_metric.observe(
                RetriesMetricGroup {
                    outcome: ConnectOutcome::Failed,
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -5,64 +5,328 @@ pub(crate) mod connect_compute;
 pub(crate) mod retry;
 pub(crate) mod wake_compute;

-use std::collections::HashSet;
-use std::convert::Infallible;
 use std::sync::Arc;

+use futures::FutureExt;
 use itertools::Itertools;
 use once_cell::sync::OnceCell;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
-use smol_str::{SmolStr, format_smolstr};
+use smol_str::{SmolStr, ToSmolStr, format_smolstr};
+use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
-use tokio::sync::oneshot;
-use tracing::Instrument;
+use tokio_util::sync::CancellationToken;
+use tracing::{Instrument, debug, error, info, warn};

-use crate::cache::Cache;
-use crate::cancellation::CancellationHandler;
-use crate::compute::ComputeConnection;
-use crate::config::ProxyConfig;
+use crate::cancellation::{self, CancellationHandler};
+use crate::config::{ProxyConfig, ProxyProtocolV2, TlsConfig};
 use crate::context::RequestContext;
-use crate::control_plane::client::ControlPlaneClient;
+use crate::error::{ReportableError, UserFacingError};
+use crate::metrics::{Metrics, NumClientConnectionsGuard};
 pub use crate::pglb::copy_bidirectional::{ErrorSource, copy_bidirectional_client_compute};
-use crate::pglb::{ClientMode, ClientRequestError};
+use crate::pglb::handshake::{HandshakeData, HandshakeError, handshake};
+use crate::pglb::passthrough::ProxyPassthrough;
 use crate::pqproto::{BeMessage, CancelKeyData, StartupMessageParams};
+use crate::protocol2::{ConnectHeader, ConnectionInfo, ConnectionInfoExtra, read_proxy_protocol};
 use crate::proxy::connect_compute::{TcpMechanism, connect_to_compute};
-use crate::proxy::retry::ShouldRetryWakeCompute;
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::stream::{PqStream, Stream};
 use crate::types::EndpointCacheKey;
+use crate::util::run_until_cancelled;
 use crate::{auth, compute};

+const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
+
+#[derive(Error, Debug)]
+#[error("{ERR_INSECURE_CONNECTION}")]
+pub struct TlsRequired;
+
+impl ReportableError for TlsRequired {
+    fn get_error_kind(&self) -> crate::error::ErrorKind {
+        crate::error::ErrorKind::User
+    }
+}
+
+impl UserFacingError for TlsRequired {}
+
+pub async fn task_main(
+    config: &'static ProxyConfig,
+    auth_backend: &'static auth::Backend<'static, ()>,
+    listener: tokio::net::TcpListener,
+    cancellation_token: CancellationToken,
+    cancellation_handler: Arc<CancellationHandler>,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
+) -> anyhow::Result<()> {
+    scopeguard::defer! {
+        info!("proxy has shut down");
+    }
+
+    // When set for the server socket, the keepalive setting
+    // will be inherited by all accepted client sockets.
+    socket2::SockRef::from(&listener).set_keepalive(true)?;
+
+    let connections = tokio_util::task::task_tracker::TaskTracker::new();
+    let cancellations = tokio_util::task::task_tracker::TaskTracker::new();
+
+    while let Some(accept_result) =
+        run_until_cancelled(listener.accept(), &cancellation_token).await
+    {
+        let (socket, peer_addr) = accept_result?;
+
+        let conn_gauge = Metrics::get()
+            .proxy
+            .client_connections
+            .guard(crate::metrics::Protocol::Tcp);
+
+        let session_id = uuid::Uuid::new_v4();
+        let cancellation_handler = Arc::clone(&cancellation_handler);
+        let cancellations = cancellations.clone();
+
+        debug!(protocol = "tcp", %session_id, "accepted new TCP connection");
+        let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();
+
+        connections.spawn(async move {
+            let (socket, conn_info) = match config.proxy_protocol_v2 {
+                ProxyProtocolV2::Required => {
+                    match read_proxy_protocol(socket).await {
+                        Err(e) => {
+                            warn!("per-client task finished with an error: {e:#}");
+                            return;
+                        }
+                        // our load balancers will not send any more data. let's just exit immediately
+                        Ok((_socket, ConnectHeader::Local)) => {
+                            debug!("healthcheck received");
+                            return;
+                        }
+                        Ok((socket, ConnectHeader::Proxy(info))) => (socket, info),
+                    }
+                }
+                // ignore the header - it cannot be confused for a postgres or http connection so will
+                // error later.
+                ProxyProtocolV2::Rejected => (
+                    socket,
+                    ConnectionInfo {
+                        addr: peer_addr,
+                        extra: None,
+                    },
+                ),
+            };
+
+            match socket.set_nodelay(true) {
+                Ok(()) => {}
+                Err(e) => {
+                    error!(
+                        "per-client task finished with an error: failed to set socket option: {e:#}"
+                    );
+                    return;
+                }
+            }
+
+            let ctx = RequestContext::new(
+                session_id,
+                conn_info,
+                crate::metrics::Protocol::Tcp,
+                &config.region,
+            );
+
+            let res = handle_client(
+                config,
+                auth_backend,
+                &ctx,
+                cancellation_handler,
+                socket,
+                ClientMode::Tcp,
+                endpoint_rate_limiter2,
+                conn_gauge,
+                cancellations,
+            )
+            .instrument(ctx.span())
+            .boxed()
+            .await;
+
+            match res {
+                Err(e) => {
+                    ctx.set_error_kind(e.get_error_kind());
+                    warn!(parent: &ctx.span(), "per-client task finished with an error: {e:#}");
+                }
+                Ok(None) => {
+                    ctx.set_success();
+                }
+                Ok(Some(p)) => {
+                    ctx.set_success();
+                    let _disconnect = ctx.log_connect();
+                    match p.proxy_pass().await {
+                        Ok(()) => {}
+                        Err(ErrorSource::Client(e)) => {
+                            warn!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the client: {e:#}"
+                            );
+                        }
+                        Err(ErrorSource::Compute(e)) => {
+                            error!(
+                                ?session_id,
+                                "per-client task finished with an IO error from the compute: {e:#}"
+                            );
+                        }
+                    }
+                }
+            }
+        });
+    }
+
+    connections.close();
+    cancellations.close();
+    drop(listener);
+
+    // Drain connections
+    connections.wait().await;
+    cancellations.wait().await;
+
+    Ok(())
+}
+
+pub(crate) enum ClientMode {
+    Tcp,
+    Websockets { hostname: Option<String> },
+}
+
+/// Abstracts the logic of handling TCP vs WS clients
+impl ClientMode {
+    pub(crate) fn allow_cleartext(&self) -> bool {
+        match self {
+            ClientMode::Tcp => false,
+            ClientMode::Websockets { .. } => true,
+        }
+    }
+
+    fn hostname<'a, S>(&'a self, s: &'a Stream<S>) -> Option<&'a str> {
+        match self {
+            ClientMode::Tcp => s.sni_hostname(),
+            ClientMode::Websockets { hostname } => hostname.as_deref(),
+        }
+    }
+
+    fn handshake_tls<'a>(&self, tls: Option<&'a TlsConfig>) -> Option<&'a TlsConfig> {
+        match self {
+            ClientMode::Tcp => tls,
+            // TLS is None here if using websockets, because the connection is already encrypted.
+            ClientMode::Websockets { .. } => None,
+        }
+    }
+}
+
+#[derive(Debug, Error)]
+// almost all errors should be reported to the user, but there's a few cases where we cannot
+// 1. Cancellation: we are not allowed to tell the client any cancellation statuses for security reasons
+// 2. Handshake: handshake reports errors if it can, otherwise if the handshake fails due to protocol violation,
+//    we cannot be sure the client even understands our error message
+// 3. PrepareClient: The client disconnected, so we can't tell them anyway...
+pub(crate) enum ClientRequestError {
+    #[error("{0}")]
+    Cancellation(#[from] cancellation::CancelError),
+    #[error("{0}")]
+    Handshake(#[from] HandshakeError),
+    #[error("{0}")]
+    HandshakeTimeout(#[from] tokio::time::error::Elapsed),
+    #[error("{0}")]
+    PrepareClient(#[from] std::io::Error),
+    #[error("{0}")]
+    ReportedError(#[from] crate::stream::ReportedError),
+}
+
+impl ReportableError for ClientRequestError {
+    fn get_error_kind(&self) -> crate::error::ErrorKind {
+        match self {
+            ClientRequestError::Cancellation(e) => e.get_error_kind(),
+            ClientRequestError::Handshake(e) => e.get_error_kind(),
+            ClientRequestError::HandshakeTimeout(_) => crate::error::ErrorKind::RateLimit,
+            ClientRequestError::ReportedError(e) => e.get_error_kind(),
+            ClientRequestError::PrepareClient(_) => crate::error::ErrorKind::ClientDisconnect,
+        }
+    }
+}
+
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    config: &'static ProxyConfig,
    auth_backend: &'static auth::Backend<'static, ()>,
    ctx: &RequestContext,
    cancellation_handler: Arc<CancellationHandler>,
-    client: &mut PqStream<Stream<S>>,
-    mode: &ClientMode,
+    stream: S,
+    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    common_names: Option<&HashSet<String>>,
-    params: &StartupMessageParams,
-) -> Result<(ComputeConnection, oneshot::Sender<Infallible>), ClientRequestError> {
-    let hostname = mode.hostname(client.get_ref());
+    conn_gauge: NumClientConnectionsGuard<'static>,
+    cancellations: tokio_util::task::task_tracker::TaskTracker,
+) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
+    debug!(
+        protocol = %ctx.protocol(),
+        "handling interactive connection from client"
+    );
+
+    let metrics = &Metrics::get().proxy;
+    let proto = ctx.protocol();
+    let request_gauge = metrics.connection_requests.guard(proto);
+
+    let tls = config.tls_config.load();
+    let tls = tls.as_deref();
+
+    let record_handshake_error = !ctx.has_private_peer_addr();
+    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
+    let do_handshake = handshake(ctx, stream, mode.handshake_tls(tls), record_handshake_error);
+
+    let (mut stream, params) = match tokio::time::timeout(config.handshake_timeout, do_handshake)
+        .await??
+    {
+        HandshakeData::Startup(stream, params) => (stream, params),
+        HandshakeData::Cancel(cancel_key_data) => {
+            // spawn a task to cancel the session, but don't wait for it
+            cancellations.spawn({
+                let cancellation_handler_clone = Arc::clone(&cancellation_handler);
+                let ctx = ctx.clone();
+                let cancel_span = tracing::span!(parent: None, tracing::Level::INFO, "cancel_session", session_id = ?ctx.session_id());
+                cancel_span.follows_from(tracing::Span::current());
+                async move {
+                    cancellation_handler_clone
+                        .cancel_session(
+                            cancel_key_data,
+                            ctx,
+                            config.authentication_config.ip_allowlist_check_enabled,
+                            config.authentication_config.is_vpc_acccess_proxy,
+                            auth_backend.get_api(),
+                        )
+                        .await
+                        .inspect_err(|e | debug!(error = ?e, "cancel_session failed")).ok();
+                }.instrument(cancel_span)
+            });
+
+            return Ok(None);
+        }
+    };
+    drop(pause);
+
+    ctx.set_db_options(params.clone());
+
+    let hostname = mode.hostname(stream.get_ref());
+
+    let common_names = tls.map(|tls| &tls.common_names);
+
    // Extract credentials which we're going to use for auth.
    let result = auth_backend
        .as_ref()
-        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, params, hostname, common_names))
+        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names))
        .transpose();

    let user_info = match result {
        Ok(user_info) => user_info,
-        Err(e) => Err(client.throw_error(e, Some(ctx)).await)?,
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };

    let user = user_info.get_user().to_owned();
    let user_info = match user_info
        .authenticate(
            ctx,
-            client,
+            &mut stream,
            mode.allow_cleartext(),
            &config.authentication_config,
            endpoint_rate_limiter,
@@ -75,7 +339,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
            let app = params.get("application_name");
            let params_span = tracing::info_span!("", ?user, ?db, ?app);

-            return Err(client
+            return Err(stream
                .throw_error(e, Some(ctx))
                .instrument(params_span)
                .await)?;
@@ -88,67 +352,37 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
    };
    let params_compat = creds.info.options.get(NeonOptions::PARAMS_COMPAT).is_some();
    let mut auth_info = compute::AuthInfo::with_auth_keys(creds.keys);
-    auth_info.set_startup_params(params, params_compat);
+    auth_info.set_startup_params(&params, params_compat);

-    let mut node;
-    let mut attempt = 0;
-    let connect = TcpMechanism {
-        locks: &config.connect_compute_locks,
+    let res = connect_to_compute(
+        ctx,
+        &TcpMechanism {
+            locks: &config.connect_compute_locks,
+        },
+        &auth::Backend::ControlPlane(cplane, creds.info.clone()),
+        config.wake_compute_retry_config,
+        &config.connect_to_compute,
+    )
+    .await;
+
+    let mut node = match res {
+        Ok(node) => node,
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };
-    let backend = auth::Backend::ControlPlane(cplane, creds.info);

-    // NOTE: This is messy, but should hopefully be detangled with PGLB.
-    // We wanted to separate the concerns of **connect** to compute (a PGLB operation),
-    // from **authenticate** to compute (a NeonKeeper operation).
-    //
-    // This unfortunately removed retry handling for one error case where
-    // the compute was cached, and we connected, but the compute cache was actually stale
-    // and is associated with the wrong endpoint. We detect this when the **authentication** fails.
-    // As such, we retry once here if the `authenticate` function fails and the error is valid to retry.
-    let pg_settings = loop {
-        attempt += 1;
-
-        // TODO: callback to pglb
-        let res = connect_to_compute(
-            ctx,
-            &connect,
-            &backend,
-            config.wake_compute_retry_config,
-            &config.connect_to_compute,
-        )
-        .await;
-
-        match res {
-            Ok(n) => node = n,
-            Err(e) => return Err(client.throw_error(e, Some(ctx)).await)?,
-        }
-
-        let auth::Backend::ControlPlane(cplane, user_info) = &backend else {
-            unreachable!("ensured above");
-        };
-
-        let res = auth_info.authenticate(ctx, &mut node, user_info).await;
-        match res {
-            Ok(pg_settings) => break pg_settings,
-            Err(e) if attempt < 2 && e.should_retry_wake_compute() => {
-                tracing::warn!(error = ?e, "retrying wake compute");
-
-                #[allow(irrefutable_let_patterns)]
-                if let ControlPlaneClient::ProxyV1(cplane_proxy_v1) = &**cplane {
-                    let key = user_info.endpoint_cache_key();
-                    cplane_proxy_v1.caches.node_info.invalidate(&key);
-                }
-            }
-            Err(e) => Err(client.throw_error(e, Some(ctx)).await)?,
-        }
+    let pg_settings = auth_info.authenticate(ctx, &mut node, creds.info).await;
+    let pg_settings = match pg_settings {
+        Ok(pg_settings) => pg_settings,
+        Err(e) => Err(stream.throw_error(e, Some(ctx)).await)?,
    };

    let session = cancellation_handler.get_key();

-    finish_client_init(&pg_settings, *session.key(), client);
+    prepare_client_connection(&pg_settings, *session.key(), &mut stream);
+    let stream = stream.flush_and_into_inner().await?;

    let session_id = ctx.session_id();
-    let (cancel_on_shutdown, cancel) = oneshot::channel();
+    let (cancel_on_shutdown, cancel) = tokio::sync::oneshot::channel();
    tokio::spawn(async move {
        session
            .maintain_cancel_key(
@@ -160,32 +394,50 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin + Send>(
            .await;
    });

-    Ok((node, cancel_on_shutdown))
+    let private_link_id = match ctx.extra() {
+        Some(ConnectionInfoExtra::Aws { vpce_id }) => Some(vpce_id.clone()),
+        Some(ConnectionInfoExtra::Azure { link_id }) => Some(link_id.to_smolstr()),
+        None => None,
+    };
+
+    Ok(Some(ProxyPassthrough {
+        client: stream,
+        compute: node.stream,
+
+        aux: node.aux,
+        private_link_id,
+
+        _cancel_on_shutdown: cancel_on_shutdown,
+
+        _req: request_gauge,
+        _conn: conn_gauge,
+        _db_conn: node.guage,
+    }))
 }

 /// Finish client connection initialization: confirm auth success, send params, etc.
-pub(crate) fn finish_client_init(
+pub(crate) fn prepare_client_connection(
    settings: &compute::PostgresSettings,
    cancel_key_data: CancelKeyData,
-    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
+    stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) {
    // Forward all deferred notices to the client.
    for notice in &settings.delayed_notice {
-        client.write_raw(notice.as_bytes().len(), b'N', |buf| {
+        stream.write_raw(notice.as_bytes().len(), b'N', |buf| {
            buf.extend_from_slice(notice.as_bytes());
        });
    }

    // Forward all postgres connection params to the client.
    for (name, value) in &settings.params {
-        client.write_message(BeMessage::ParameterStatus {
+        stream.write_message(BeMessage::ParameterStatus {
            name: name.as_bytes(),
            value: value.as_bytes(),
        });
    }

-    client.write_message(BeMessage::BackendKeyData(cancel_key_data));
-    client.write_message(BeMessage::ReadyForQuery);
+    stream.write_message(BeMessage::BackendKeyData(cancel_key_data));
+    stream.write_message(BeMessage::ReadyForQuery);
 }

 #[derive(Debug, Clone, PartialEq, Eq, Default, Serialize, Deserialize)]
@@ -195,7 +447,7 @@ impl NeonOptions {
    // proxy options:

    /// `PARAMS_COMPAT` allows opting in to forwarding all startup parameters from client to compute.
-    pub const PARAMS_COMPAT: &str = "proxy_params_compat";
+    const PARAMS_COMPAT: &str = "proxy_params_compat";

    // cplane options:

--- a/proxy/src/proxy/retry.rs
+++ b/proxy/src/proxy/retry.rs
@@ -3,7 +3,7 @@ use std::io;

 use tokio::time;

-use crate::compute::{self, PostgresError};
+use crate::compute;
 use crate::config::RetryConfig;

 pub(crate) trait CouldRetry {
@@ -115,14 +115,6 @@ impl ShouldRetryWakeCompute for compute::ConnectionError {
    }
 }

-impl ShouldRetryWakeCompute for PostgresError {
-    fn should_retry_wake_compute(&self) -> bool {
-        match self {
-            PostgresError::Postgres(error) => error.should_retry_wake_compute(),
-        }
-    }
-}
-
 pub(crate) fn retry_after(num_retries: u32, config: RetryConfig) -> time::Duration {
    config
        .base_delay
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -14,9 +14,6 @@ use tokio::io::{AsyncReadExt, AsyncWriteExt, DuplexStream};
 use tokio_util::codec::{Decoder, Encoder};

 use super::*;
-use crate::config::TlsConfig;
-use crate::context::RequestContext;
-use crate::pglb::handshake::{HandshakeData, handshake};

 enum Intercept {
    None,
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -3,7 +3,6 @@

 mod mitm;

-use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, bail};
@@ -11,31 +10,26 @@ use async_trait::async_trait;
 use http::StatusCode;
 use postgres_client::config::SslMode;
 use postgres_client::tls::{MakeTlsConnect, NoTls};
+use retry::{ShouldRetryWakeCompute, retry_after};
 use rstest::rstest;
 use rustls::crypto::ring;
 use rustls::pki_types;
-use tokio::io::{AsyncRead, AsyncWrite, DuplexStream};
+use tokio::io::DuplexStream;
 use tracing_test::traced_test;

 use super::retry::CouldRetry;
+use super::*;
 use crate::auth::backend::{ComputeUserInfo, MaybeOwned};
-use crate::config::{ComputeConfig, RetryConfig, TlsConfig};
-use crate::context::RequestContext;
+use crate::config::{ComputeConfig, RetryConfig};
 use crate::control_plane::client::{ControlPlaneClient, TestControlPlaneClient};
 use crate::control_plane::messages::{ControlPlaneErrorMessage, Details, MetricsAuxInfo, Status};
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo, NodeInfoCache};
-use crate::error::{ErrorKind, ReportableError};
-use crate::pglb::ERR_INSECURE_CONNECTION;
-use crate::pglb::handshake::{HandshakeData, handshake};
-use crate::pqproto::BeMessage;
-use crate::proxy::NeonOptions;
-use crate::proxy::connect_compute::{ConnectMechanism, connect_to_compute};
-use crate::proxy::retry::{ShouldRetryWakeCompute, retry_after};
-use crate::stream::{PqStream, Stream};
+use crate::error::ErrorKind;
+use crate::proxy::connect_compute::ConnectMechanism;
 use crate::tls::client_config::compute_client_config_with_certs;
 use crate::tls::server_config::CertResolver;
 use crate::types::{BranchId, EndpointId, ProjectId};
-use crate::{auth, compute, sasl, scram};
+use crate::{sasl, scram};

 /// Generate a set of TLS certificates: CA + server.
 fn generate_certs(
@@ -380,7 +374,6 @@ fn connect_compute_total_wait() {
 #[derive(Clone, Copy, Debug)]
 enum ConnectAction {
    Wake,
-    WakeCold,
    WakeFail,
    WakeRetry,
    Connect,
@@ -511,9 +504,6 @@ impl TestControlPlaneClient for TestConnectMechanism {
        *counter += 1;
        match action {
            ConnectAction::Wake => Ok(helper_create_cached_node_info(self.cache)),
-            ConnectAction::WakeCold => Ok(CachedNodeInfo::new_uncached(
-                helper_create_uncached_node_info(),
-            )),
            ConnectAction::WakeFail => {
                let err = control_plane::errors::ControlPlaneError::Message(Box::new(
                    ControlPlaneErrorMessage {
@@ -561,8 +551,8 @@ impl TestControlPlaneClient for TestConnectMechanism {
    }
 }

-fn helper_create_uncached_node_info() -> NodeInfo {
-    NodeInfo {
+fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
+    let node = NodeInfo {
        conn_info: compute::ConnectInfo {
            host: "test".into(),
            port: 5432,
@@ -576,11 +566,7 @@ fn helper_create_uncached_node_info() -> NodeInfo {
            compute_id: "compute".into(),
            cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm,
        },
-    }
-}
-
-fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
-    let node = helper_create_uncached_node_info();
+    };
    let (_, node2) = cache.insert_unit("key".into(), Ok(node.clone()));
    node2.map(|()| node)
 }
@@ -756,7 +742,7 @@ async fn fail_no_wake_skips_cache_invalidation() {
    let ctx = RequestContext::test();
    let mech = TestConnectMechanism::new(vec![
        ConnectAction::Wake,
-        ConnectAction::RetryNoWake,
+        ConnectAction::FailNoWake,
        ConnectAction::Connect,
    ]);
    let user = helper_create_connect_info(&mech);
@@ -802,7 +788,7 @@ async fn retry_no_wake_skips_invalidation() {

    let ctx = RequestContext::test();
    // Wake → RetryNoWake (retryable + NOT wakeable)
-    let mechanism = TestConnectMechanism::new(vec![Wake, RetryNoWake, Fail]);
+    let mechanism = TestConnectMechanism::new(vec![Wake, RetryNoWake]);
    let user_info = helper_create_connect_info(&mechanism);
    let cfg = config();

@@ -816,44 +802,3 @@ async fn retry_no_wake_skips_invalidation() {
        "invalidating stalled compute node info cache entry"
    ));
 }
-
-#[tokio::test]
-#[traced_test]
-async fn retry_no_wake_error_fast() {
-    let _ = env_logger::try_init();
-    use ConnectAction::*;
-
-    let ctx = RequestContext::test();
-    // Wake → FailNoWake (not retryable + NOT wakeable)
-    let mechanism = TestConnectMechanism::new(vec![Wake, FailNoWake]);
-    let user_info = helper_create_connect_info(&mechanism);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
-        .await
-        .unwrap_err();
-    mechanism.verify();
-
-    // Because FailNoWake has wakeable=false, we must NOT see invalidate_cache
-    assert!(!logs_contain(
-        "invalidating stalled compute node info cache entry"
-    ));
-}
-
-#[tokio::test]
-#[traced_test]
-async fn retry_cold_wake_skips_invalidation() {
-    let _ = env_logger::try_init();
-    use ConnectAction::*;
-
-    let ctx = RequestContext::test();
-    // WakeCold → FailNoWake (not retryable + NOT wakeable)
-    let mechanism = TestConnectMechanism::new(vec![WakeCold, Retry, Connect]);
-    let user_info = helper_create_connect_info(&mechanism);
-    let cfg = config();
-
-    connect_to_compute(&ctx, &mechanism, &user_info, cfg.retry, &cfg)
-        .await
-        .unwrap();
-    mechanism.verify();
-}
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -139,6 +139,12 @@ impl RateBucketInfo {
        Self::new(200, Duration::from_secs(600)),
    ];

+    // For all the sessions will be cancel key. So this limit is essentially global proxy limit.
+    pub const DEFAULT_REDIS_SET: [Self; 2] = [
+        Self::new(100_000, Duration::from_secs(1)),
+        Self::new(50_000, Duration::from_secs(10)),
+    ];
+
    pub fn rps(&self) -> f64 {
        (self.max_rpi as f64) / self.interval.as_secs_f64()
    }
--- a/proxy/src/redis/connection_with_credentials_provider.rs
+++ b/proxy/src/redis/connection_with_credentials_provider.rs
@@ -1,12 +1,11 @@
-use std::convert::Infallible;
-use std::sync::{Arc, atomic::AtomicBool, atomic::Ordering};
+use std::sync::Arc;
 use std::time::Duration;

 use futures::FutureExt;
 use redis::aio::{ConnectionLike, MultiplexedConnection};
 use redis::{ConnectionInfo, IntoConnectionInfo, RedisConnectionInfo, RedisResult};
 use tokio::task::JoinHandle;
-use tracing::{error, info, warn};
+use tracing::{debug, error, info, warn};

 use super::elasticache::CredentialsProvider;

@@ -32,9 +31,8 @@ pub struct ConnectionWithCredentialsProvider {
    credentials: Credentials,
    // TODO: with more load on the connection, we should consider using a connection pool
    con: Option<MultiplexedConnection>,
-    refresh_token_task: Option<JoinHandle<Infallible>>,
+    refresh_token_task: Option<JoinHandle<()>>,
    mutex: tokio::sync::Mutex<()>,
-    credentials_refreshed: Arc<AtomicBool>,
 }

 impl Clone for ConnectionWithCredentialsProvider {
@@ -44,7 +42,6 @@ impl Clone for ConnectionWithCredentialsProvider {
            con: None,
            refresh_token_task: None,
            mutex: tokio::sync::Mutex::new(()),
-            credentials_refreshed: Arc::new(AtomicBool::new(false)),
        }
    }
 }
@@ -68,7 +65,6 @@ impl ConnectionWithCredentialsProvider {
            con: None,
            refresh_token_task: None,
            mutex: tokio::sync::Mutex::new(()),
-            credentials_refreshed: Arc::new(AtomicBool::new(false)),
        }
    }

@@ -82,7 +78,6 @@ impl ConnectionWithCredentialsProvider {
            con: None,
            refresh_token_task: None,
            mutex: tokio::sync::Mutex::new(()),
-            credentials_refreshed: Arc::new(AtomicBool::new(true)),
        }
    }

@@ -90,10 +85,6 @@ impl ConnectionWithCredentialsProvider {
        redis::cmd("PING").query_async(con).await
    }

-    pub(crate) fn credentials_refreshed(&self) -> bool {
-        self.credentials_refreshed.load(Ordering::Relaxed)
-    }
-
    pub(crate) async fn connect(&mut self) -> anyhow::Result<()> {
        let _guard = self.mutex.lock().await;
        if let Some(con) = self.con.as_mut() {
@@ -121,12 +112,12 @@ impl ConnectionWithCredentialsProvider {
        if let Credentials::Dynamic(credentials_provider, _) = &self.credentials {
            let credentials_provider = credentials_provider.clone();
            let con2 = con.clone();
-            let credentials_refreshed = self.credentials_refreshed.clone();
-            let f = tokio::spawn(Self::keep_connection(
-                con2,
-                credentials_provider,
-                credentials_refreshed,
-            ));
+            let f = tokio::spawn(async move {
+                Self::keep_connection(con2, credentials_provider)
+                    .await
+                    .inspect_err(|e| debug!("keep_connection failed: {e}"))
+                    .ok();
+            });
            self.refresh_token_task = Some(f);
        }
        match Self::ping(&mut con).await {
@@ -162,7 +153,6 @@ impl ConnectionWithCredentialsProvider {

    async fn get_client(&self) -> anyhow::Result<redis::Client> {
        let client = redis::Client::open(self.get_connection_info().await?)?;
-        self.credentials_refreshed.store(true, Ordering::Relaxed);
        Ok(client)
    }

@@ -178,19 +168,16 @@ impl ConnectionWithCredentialsProvider {
    async fn keep_connection(
        mut con: MultiplexedConnection,
        credentials_provider: Arc<CredentialsProvider>,
-        credentials_refreshed: Arc<AtomicBool>,
-    ) -> Infallible {
+    ) -> anyhow::Result<()> {
        loop {
            // The connection lives for 12h, for the sanity check we refresh it every hour.
            tokio::time::sleep(Duration::from_secs(60 * 60)).await;
            match Self::refresh_token(&mut con, credentials_provider.clone()).await {
                Ok(()) => {
                    info!("Token refreshed");
-                    credentials_refreshed.store(true, Ordering::Relaxed);
                }
                Err(e) => {
                    error!("Error during token refresh: {e:?}");
-                    credentials_refreshed.store(false, Ordering::Relaxed);
                }
            }
        }
--- a/proxy/src/redis/kv_ops.rs
+++ b/proxy/src/redis/kv_ops.rs
@@ -5,9 +5,11 @@ use redis::aio::ConnectionLike;
 use redis::{Cmd, FromRedisValue, Pipeline, RedisResult};

 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
+use crate::rate_limiter::{GlobalRateLimiter, RateBucketInfo};

 pub struct RedisKVClient {
    client: ConnectionWithCredentialsProvider,
+    limiter: GlobalRateLimiter,
 }

 #[allow(async_fn_in_trait)]
@@ -28,8 +30,11 @@ impl Queryable for Cmd {
 }

 impl RedisKVClient {
-    pub fn new(client: ConnectionWithCredentialsProvider) -> Self {
-        Self { client }
+    pub fn new(client: ConnectionWithCredentialsProvider, info: &'static [RateBucketInfo]) -> Self {
+        Self {
+            client,
+            limiter: GlobalRateLimiter::new(info.into()),
+        }
    }

    pub async fn try_connect(&mut self) -> anyhow::Result<()> {
@@ -40,20 +45,21 @@ impl RedisKVClient {
            .inspect_err(|e| tracing::error!("failed to connect to redis: {e}"))
    }

-    pub(crate) fn credentials_refreshed(&self) -> bool {
-        self.client.credentials_refreshed()
-    }
-
    pub(crate) async fn query<T: FromRedisValue>(
        &mut self,
        q: &impl Queryable,
    ) -> anyhow::Result<T> {
+        if !self.limiter.check() {
+            tracing::info!("Rate limit exceeded. Skipping query");
+            return Err(anyhow::anyhow!("Rate limit exceeded"));
+        }
+
        let e = match q.query(&mut self.client).await {
            Ok(t) => return Ok(t),
            Err(e) => e,
        };

-        tracing::debug!("failed to run query: {e}");
+        tracing::error!("failed to run query: {e}");
        match e.retry_method() {
            redis::RetryMethod::Reconnect => {
                tracing::info!("Redis client is disconnected. Reconnecting...");
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -141,19 +141,29 @@ where

 struct MessageHandler<C: ProjectInfoCache + Send + Sync + 'static> {
    cache: Arc<C>,
+    region_id: String,
 }

 impl<C: ProjectInfoCache + Send + Sync + 'static> Clone for MessageHandler<C> {
    fn clone(&self) -> Self {
        Self {
            cache: self.cache.clone(),
+            region_id: self.region_id.clone(),
        }
    }
 }

 impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
-    pub(crate) fn new(cache: Arc<C>) -> Self {
-        Self { cache }
+    pub(crate) fn new(cache: Arc<C>, region_id: String) -> Self {
+        Self { cache, region_id }
+    }
+
+    pub(crate) async fn increment_active_listeners(&self) {
+        self.cache.increment_active_listeners().await;
+    }
+
+    pub(crate) async fn decrement_active_listeners(&self) {
+        self.cache.decrement_active_listeners().await;
    }

    #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))]
@@ -266,7 +276,7 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
        }
        let mut conn = match try_connect(&redis).await {
            Ok(conn) => {
-                handler.cache.increment_active_listeners().await;
+                handler.increment_active_listeners().await;
                conn
            }
            Err(e) => {
@@ -287,11 +297,11 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
                }
            }
            if cancellation_token.is_cancelled() {
-                handler.cache.decrement_active_listeners().await;
+                handler.decrement_active_listeners().await;
                return Ok(());
            }
        }
-        handler.cache.decrement_active_listeners().await;
+        handler.decrement_active_listeners().await;
    }
 }

@@ -300,11 +310,12 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
 pub async fn task_main<C>(
    redis: ConnectionWithCredentialsProvider,
    cache: Arc<C>,
+    region_id: String,
 ) -> anyhow::Result<Infallible>
 where
    C: ProjectInfoCache + Send + Sync + 'static,
 {
-    let handler = MessageHandler::new(cache);
+    let handler = MessageHandler::new(cache, region_id);
    // 6h - 1m.
    // There will be 1 minute overlap between two tasks. But at least we can be sure that no message is lost.
    let mut interval = tokio::time::interval(std::time::Duration::from_secs(6 * 60 * 60 - 60));
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -417,7 +417,12 @@ async fn request_handler(
    if config.http_config.accept_websockets
        && framed_websockets::upgrade::is_upgrade_request(&request)
    {
-        let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Ws);
+        let ctx = RequestContext::new(
+            session_id,
+            conn_info,
+            crate::metrics::Protocol::Ws,
+            &config.region,
+        );

        ctx.set_user_agent(
            request
@@ -457,7 +462,12 @@ async fn request_handler(
        // Return the response so the spawned future can continue.
        Ok(response.map(|b| b.map_err(|x| match x {}).boxed()))
    } else if request.uri().path() == "/sql" && *request.method() == Method::POST {
-        let ctx = RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Http);
+        let ctx = RequestContext::new(
+            session_id,
+            conn_info,
+            crate::metrics::Protocol::Http,
+            &config.region,
+        );
        let span = ctx.span();

        let testodrome_id = request
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -17,8 +17,7 @@ use crate::config::ProxyConfig;
 use crate::context::RequestContext;
 use crate::error::ReportableError;
 use crate::metrics::Metrics;
-use crate::pglb::{ClientMode, handle_connection};
-use crate::proxy::ErrorSource;
+use crate::proxy::{ClientMode, ErrorSource, handle_client};
 use crate::rate_limiter::EndpointRateLimiter;

 pin_project! {
@@ -143,7 +142,7 @@ pub(crate) async fn serve_websocket(
        .client_connections
        .guard(crate::metrics::Protocol::Ws);

-    let res = Box::pin(handle_connection(
+    let res = Box::pin(handle_client(
        config,
        auth_backend,
        &ctx,
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.88.0"
+channel = "1.87.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -18,7 +18,8 @@ use metrics::set_build_info_metric;
 use remote_storage::RemoteStorageConfig;
 use safekeeper::defaults::{
    DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT,
-    DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
+    DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES,
+    DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY,
    DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, DEFAULT_SSL_CERT_FILE,
    DEFAULT_SSL_CERT_RELOAD_PERIOD, DEFAULT_SSL_KEY_FILE,
 };
@@ -138,6 +139,11 @@ struct Args {
    /// Safekeeper won't be elected for WAL offloading if it is lagging for more than this value in bytes
    #[arg(long, default_value_t = DEFAULT_MAX_OFFLOADER_LAG_BYTES)]
    max_offloader_lag: u64,
+    /* BEGIN_HADRON */
+    /// Safekeeper will re-elect a new offloader if the current backup lagging for more than this value in bytes
+    #[arg(long, default_value_t = DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES)]
+    max_reelect_offloader_lag_bytes: u64,
+    /* END_HADRON */
    /// Number of max parallel WAL segments to be offloaded to remote storage.
    #[arg(long, default_value = "5")]
    wal_backup_parallel_jobs: usize,
@@ -391,6 +397,9 @@ async fn main() -> anyhow::Result<()> {
        peer_recovery_enabled: args.peer_recovery,
        remote_storage: args.remote_storage,
        max_offloader_lag_bytes: args.max_offloader_lag,
+        /* BEGIN_HADRON */
+        max_reelect_offloader_lag_bytes: args.max_reelect_offloader_lag_bytes,
+        /* END_HADRON */
        wal_backup_enabled: !args.disable_wal_backup,
        backup_parallel_jobs: args.wal_backup_parallel_jobs,
        pg_auth,
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -220,7 +220,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
                                    stripe_size: ShardStripeSize(stripe_size),
                                };
                                self.shard =
-                                    Some(ShardIdentity::from_params(ShardNumber(number), params));
+                                    Some(ShardIdentity::from_params(ShardNumber(number), &params));
                            }
                            _ => {
                                return Err(QueryError::Other(anyhow::anyhow!(
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -61,6 +61,9 @@ pub mod defaults {

    pub const DEFAULT_HEARTBEAT_TIMEOUT: &str = "5000ms";
    pub const DEFAULT_MAX_OFFLOADER_LAG_BYTES: u64 = 128 * (1 << 20);
+    /* BEGIN_HADRON */
+    pub const DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES: u64 = 128 * (1 << 20);
+    /* END_HADRON */
    pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m";
    pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s";
    pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5";
@@ -99,6 +102,9 @@ pub struct SafeKeeperConf {
    pub peer_recovery_enabled: bool,
    pub remote_storage: Option<RemoteStorageConfig>,
    pub max_offloader_lag_bytes: u64,
+    /* BEGIN_HADRON */
+    pub max_reelect_offloader_lag_bytes: u64,
+    /* END_HADRON */
    pub backup_parallel_jobs: usize,
    pub wal_backup_enabled: bool,
    pub pg_auth: Option<Arc<JwtAuth>>,
@@ -151,6 +157,9 @@ impl SafeKeeperConf {
            sk_auth_token: None,
            heartbeat_timeout: Duration::new(5, 0),
            max_offloader_lag_bytes: defaults::DEFAULT_MAX_OFFLOADER_LAG_BYTES,
+            /* BEGIN_HADRON */
+            max_reelect_offloader_lag_bytes: defaults::DEFAULT_MAX_REELECT_OFFLOADER_LAG_BYTES,
+            /* END_HADRON */
            current_thread_runtime: false,
            walsenders_keep_horizon: false,
            partial_backup_timeout: Duration::from_secs(0),
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -138,6 +138,15 @@ pub static BACKUP_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
    )
    .expect("Failed to register safekeeper_backup_errors_total counter")
 });
+/* BEGIN_HADRON */
+pub static BACKUP_REELECT_LEADER_COUNT: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "safekeeper_backup_reelect_leader_total",
+        "Number of times the backup leader was reelected"
+    )
+    .expect("Failed to register safekeeper_backup_reelect_leader_total counter")
+});
+/* END_HADRON */
 pub static BROKER_PUSH_ALL_UPDATES_SECONDS: Lazy<Histogram> = Lazy::new(|| {
    register_histogram!(
        "safekeeper_broker_push_update_seconds",
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -1,6 +1,5 @@
 use std::cmp::min;
 use std::io::{self, ErrorKind};
-use std::ops::RangeInclusive;
 use std::sync::Arc;

 use anyhow::{Context, Result, anyhow, bail};
@@ -35,7 +34,7 @@ use crate::control_file::CONTROL_FILE_NAME;
 use crate::state::{EvictionState, TimelinePersistentState};
 use crate::timeline::{Timeline, TimelineError, WalResidentTimeline};
 use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline};
-use crate::wal_storage::{open_wal_file, wal_file_paths};
+use crate::wal_storage::open_wal_file;
 use crate::{GlobalTimelines, debug_dump, wal_backup};

 /// Stream tar archive of timeline to tx.
@@ -96,8 +95,8 @@ pub async fn stream_snapshot(

 /// State needed while streaming the snapshot.
 pub struct SnapshotContext {
-    /// The interval of segment numbers. If None, the timeline hasn't had writes yet, so only send the control file
-    pub from_to_segno: Option<RangeInclusive<XLogSegNo>>,
+    pub from_segno: XLogSegNo, // including
+    pub upto_segno: XLogSegNo, // including
    pub term: Term,
    pub last_log_term: Term,
    pub flush_lsn: Lsn,
@@ -175,35 +174,23 @@ pub async fn stream_snapshot_resident_guts(
        .await?;
    pausable_failpoint!("sk-snapshot-after-list-pausable");

-    if let Some(from_to_segno) = &bctx.from_to_segno {
-        let tli_dir = tli.get_timeline_dir();
-        info!(
-            "sending {} segments [{:#X}-{:#X}], term={}, last_log_term={}, flush_lsn={}",
-            from_to_segno.end() - from_to_segno.start() + 1,
-            from_to_segno.start(),
-            from_to_segno.end(),
-            bctx.term,
-            bctx.last_log_term,
-            bctx.flush_lsn,
-        );
-        for segno in from_to_segno.clone() {
-            let Some((mut sf, is_partial)) =
-                open_wal_file(&tli_dir, segno, bctx.wal_seg_size).await?
-            else {
-                // File is not found
-                let (wal_file_path, _wal_file_partial_path) =
-                    wal_file_paths(&tli_dir, segno, bctx.wal_seg_size);
-                tracing::warn!("couldn't find WAL segment file {wal_file_path}");
-                bail!("couldn't find WAL segment file {wal_file_path}")
-            };
-            let mut wal_file_name = XLogFileName(PG_TLI, segno, bctx.wal_seg_size);
-            if is_partial {
-                wal_file_name.push_str(".partial");
-            }
-            ar.append_file(&wal_file_name, &mut sf).await?;
+    let tli_dir = tli.get_timeline_dir();
+    info!(
+        "sending {} segments [{:#X}-{:#X}], term={}, last_log_term={}, flush_lsn={}",
+        bctx.upto_segno - bctx.from_segno + 1,
+        bctx.from_segno,
+        bctx.upto_segno,
+        bctx.term,
+        bctx.last_log_term,
+        bctx.flush_lsn,
+    );
+    for segno in bctx.from_segno..=bctx.upto_segno {
+        let (mut sf, is_partial) = open_wal_file(&tli_dir, segno, bctx.wal_seg_size).await?;
+        let mut wal_file_name = XLogFileName(PG_TLI, segno, bctx.wal_seg_size);
+        if is_partial {
+            wal_file_name.push_str(".partial");
        }
-    } else {
-        info!("Not including any segments into the snapshot");
+        ar.append_file(&wal_file_name, &mut sf).await?;
    }

    // Do the term check before ar.finish to make archive corrupted in case of
@@ -351,26 +338,19 @@ impl WalResidentTimeline {
        // removed further than `backup_lsn`. Since we're holding shared_state
        // lock and setting `wal_removal_on_hold` later, it guarantees that WAL
        // won't be removed until we're done.
-        let timeline_state = shared_state.sk.state();
        let from_lsn = min(
-            timeline_state.remote_consistent_lsn,
-            timeline_state.backup_lsn,
-        );
-        let flush_lsn = shared_state.sk.flush_lsn();
-        let (send_segments, msg) = if from_lsn == Lsn::INVALID {
-            (false, "snapshot is called on uninitialized timeline")
-        } else {
-            (true, "timeline is initialized")
-        };
-        tracing::info!(
-            remote_consistent_lsn=%timeline_state.remote_consistent_lsn,
-            backup_lsn=%timeline_state.backup_lsn,
-            %flush_lsn,
-            "{msg}"
+            shared_state.sk.state().remote_consistent_lsn,
+            shared_state.sk.state().backup_lsn,
        );
+        if from_lsn == Lsn::INVALID {
+            // this is possible if snapshot is called before handling first
+            // elected message
+            bail!("snapshot is called on uninitialized timeline");
+        }
        let from_segno = from_lsn.segment_number(wal_seg_size);
        let term = shared_state.sk.state().acceptor_state.term;
        let last_log_term = shared_state.sk.last_log_term();
+        let flush_lsn = shared_state.sk.flush_lsn();
        let upto_segno = flush_lsn.segment_number(wal_seg_size);
        // have some limit on max number of segments as a sanity check
        const MAX_ALLOWED_SEGS: u64 = 1000;
@@ -396,9 +376,9 @@ impl WalResidentTimeline {
        drop(shared_state);

        let tli_copy = self.wal_residence_guard().await?;
-        let from_to_segno = send_segments.then_some(from_segno..=upto_segno);
        let bctx = SnapshotContext {
-            from_to_segno,
+            from_segno,
+            upto_segno,
            term,
            last_log_term,
            flush_lsn,
--- a/safekeeper/src/state.rs
+++ b/safekeeper/src/state.rs
@@ -9,7 +9,7 @@ use anyhow::{Result, bail};
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use postgres_versioninfo::{PgMajorVersion, PgVersionId};
 use safekeeper_api::membership::Configuration;
-use safekeeper_api::models::TimelineTermBumpResponse;
+use safekeeper_api::models::{TimelineMembershipSwitchResponse, TimelineTermBumpResponse};
 use safekeeper_api::{INITIAL_TERM, ServerInfo, Term};
 use serde::{Deserialize, Serialize};
 use tracing::info;
@@ -83,11 +83,6 @@ pub enum EvictionState {
    Offloaded(Lsn),
 }

-pub struct MembershipSwitchResult {
-    pub previous_conf: Configuration,
-    pub current_conf: Configuration,
-}
-
 impl TimelinePersistentState {
    /// commit_lsn is the same as start_lsn in the normal creaiton; see
    /// `TimelineCreateRequest` comments.`
@@ -266,7 +261,10 @@ where

    /// Switch into membership configuration `to` if it is higher than the
    /// current one.
-    pub async fn membership_switch(&mut self, to: Configuration) -> Result<MembershipSwitchResult> {
+    pub async fn membership_switch(
+        &mut self,
+        to: Configuration,
+    ) -> Result<TimelineMembershipSwitchResponse> {
        let before = self.mconf.clone();
        // Is switch allowed?
        if to.generation <= self.mconf.generation {
@@ -280,7 +278,7 @@ where
            self.finish_change(&state).await?;
            info!("switched membership conf to {} from {}", to, before);
        }
-        Ok(MembershipSwitchResult {
+        Ok(TimelineMembershipSwitchResponse {
            previous_conf: before,
            current_conf: self.mconf.clone(),
        })
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -190,14 +190,7 @@ impl StateSK {
        &mut self,
        to: Configuration,
    ) -> Result<TimelineMembershipSwitchResponse> {
-        let result = self.state_mut().membership_switch(to).await?;
-
-        Ok(TimelineMembershipSwitchResponse {
-            previous_conf: result.previous_conf,
-            current_conf: result.current_conf,
-            term: self.state().acceptor_state.term,
-            flush_lsn: self.flush_lsn(),
-        })
+        self.state_mut().membership_switch(to).await
    }

    /// Close open WAL files to release FDs.
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -26,7 +26,9 @@ use utils::id::{NodeId, TenantTimelineId};
 use utils::lsn::Lsn;
 use utils::{backoff, pausable_failpoint};

-use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS};
+use crate::metrics::{
+    BACKED_UP_SEGMENTS, BACKUP_ERRORS, BACKUP_REELECT_LEADER_COUNT, WAL_BACKUP_TASKS,
+};
 use crate::timeline::WalResidentTimeline;
 use crate::timeline_manager::{Manager, StateSnapshot};
 use crate::{SafeKeeperConf, WAL_BACKUP_RUNTIME};
@@ -70,8 +72,7 @@ pub(crate) async fn update_task(
    need_backup: bool,
    state: &StateSnapshot,
 ) {
-    let (offloader, election_dbg_str) =
-        determine_offloader(&state.peers, state.backup_lsn, mgr.tli.ttid, &mgr.conf);
+    let (offloader, election_dbg_str) = hadron_determine_offloader(mgr, state);
    let elected_me = Some(mgr.conf.my_id) == offloader;

    let should_task_run = need_backup && elected_me;
@@ -127,6 +128,71 @@ async fn shut_down_task(entry: &mut Option<WalBackupTaskHandle>) {
    }
 }

+/* BEGIN_HADRON */
+// On top of the neon determine_offloader, we also check if the current offloader is lagging behind too much.
+// If it is, we re-elect a new offloader. This mitigates the below issue. It also helps distribute the load across SKs.
+//
+// We observe that the offloader fails to upload a segment due to race conditions on XLOG SWITCH and PG start streaming WALs.
+// wal_backup task continously failing to upload a full segment while the segment remains partial on the disk.
+// The consequence is that commit_lsn for all SKs move forward but backup_lsn stays the same. Then, all SKs run out of disk space.
+// See go/sk-ood-xlog-switch for more details.
+//
+// To mitigate this issue, we will re-elect a new offloader if the current offloader is lagging behind too much.
+// Each SK makes the decision locally but they are aware of each other's commit and backup lsns.
+//
+// determine_offloader will pick a SK. say SK-1.
+// Each SK checks
+// -- if commit_lsn - back_lsn > threshold,
+// -- -- remove SK-1 from the candidate and call determine_offloader again.
+// SK-1 will step down and all SKs will elect the same leader again.
+// After the backup is caught up, the leader will become SK-1 again.
+fn hadron_determine_offloader(mgr: &Manager, state: &StateSnapshot) -> (Option<NodeId>, String) {
+    let mut offloader: Option<NodeId>;
+    let mut election_dbg_str: String;
+    let caughtup_peers_count: usize;
+    (offloader, election_dbg_str, caughtup_peers_count) =
+        determine_offloader(&state.peers, state.backup_lsn, mgr.tli.ttid, &mgr.conf);
+
+    if offloader.is_none() || caughtup_peers_count <= 1 {
+        return (offloader, election_dbg_str);
+    }
+
+    let offloader_sk_id = offloader.unwrap();
+
+    let backup_lag = state.commit_lsn.checked_sub(state.backup_lsn);
+    if backup_lag.is_none() {
+        info!("Backup lag is None. Skipping re-election.");
+        return (offloader, election_dbg_str);
+    }
+
+    let backup_lag = backup_lag.unwrap().0;
+
+    if backup_lag < mgr.conf.max_reelect_offloader_lag_bytes {
+        info!(
+            "Backup lag {} is lower than the threshold {}. Skipping re-election.",
+            backup_lag, mgr.conf.max_reelect_offloader_lag_bytes
+        );
+        return (offloader, election_dbg_str);
+    }
+
+    info!(
+        "Electing a new leader: Backup lag is too high backup lsn lag {} threshold {}: {}",
+        backup_lag, mgr.conf.max_reelect_offloader_lag_bytes, election_dbg_str
+    );
+    BACKUP_REELECT_LEADER_COUNT.inc();
+    // Remove the current offloader if lag is too high.
+    let new_peers: Vec<_> = state
+        .peers
+        .iter()
+        .filter(|p| p.sk_id != offloader_sk_id)
+        .cloned()
+        .collect();
+    (offloader, election_dbg_str, _) =
+        determine_offloader(&new_peers, state.backup_lsn, mgr.tli.ttid, &mgr.conf);
+    (offloader, election_dbg_str)
+}
+/* END_HADRON */
+
 /// The goal is to ensure that normally only one safekeepers offloads. However,
 /// it is fine (and inevitable, as s3 doesn't provide CAS) that for some short
 /// time we have several ones as they PUT the same files. Also,
@@ -141,13 +207,13 @@ fn determine_offloader(
    wal_backup_lsn: Lsn,
    ttid: TenantTimelineId,
    conf: &SafeKeeperConf,
-) -> (Option<NodeId>, String) {
+) -> (Option<NodeId>, String, usize) {
    // TODO: remove this once we fill newly joined safekeepers since backup_lsn.
    let capable_peers = alive_peers
        .iter()
        .filter(|p| p.local_start_lsn <= wal_backup_lsn);
    match capable_peers.clone().map(|p| p.commit_lsn).max() {
-        None => (None, "no connected peers to elect from".to_string()),
+        None => (None, "no connected peers to elect from".to_string(), 0),
        Some(max_commit_lsn) => {
            let threshold = max_commit_lsn
                .checked_sub(conf.max_offloader_lag_bytes)
@@ -175,6 +241,7 @@ fn determine_offloader(
                    capable_peers_dbg,
                    caughtup_peers.len()
                ),
+                caughtup_peers.len(),
            )
        }
    }
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -9,7 +9,7 @@

 use std::cmp::{max, min};
 use std::future::Future;
-use std::io::{ErrorKind, SeekFrom};
+use std::io::{self, SeekFrom};
 use std::pin::Pin;

 use anyhow::{Context, Result, bail};
@@ -154,8 +154,8 @@ pub struct PhysicalStorage {
    ///     record
    ///
    /// Partial segment 002 has no WAL records, and it will be removed by the
-    /// next truncate_wal(). This flag will be set to false after the first
-    /// successful truncate_wal() call.
+    /// next truncate_wal(). This flag will be set to true after the first
+    /// truncate_wal() call.
    ///
    /// [`write_lsn`]: Self::write_lsn
    pending_wal_truncation: bool,
@@ -202,8 +202,6 @@ impl PhysicalStorage {
            ttid.timeline_id, flush_lsn, state.commit_lsn, state.peer_horizon_lsn,
        );
        if flush_lsn < state.commit_lsn {
-            // note: can never happen. find_end_of_wal returns provided start_lsn
-            // (state.commit_lsn in our case) if it doesn't find anything.
            bail!(
                "timeline {} potential data loss: flush_lsn {} by find_end_of_wal is less than commit_lsn  {} from control file",
                ttid.timeline_id,
@@ -796,13 +794,26 @@ impl WalReader {

        // Try to open local file, if we may have WAL locally
        if self.pos >= self.local_start_lsn {
-            let res = open_wal_file(&self.timeline_dir, segno, self.wal_seg_size).await?;
-            if let Some((mut file, _)) = res {
-                file.seek(SeekFrom::Start(xlogoff as u64)).await?;
-                return Ok(Box::pin(file));
-            } else {
-                // NotFound is expected, fall through to remote read
-            }
+            let res = open_wal_file(&self.timeline_dir, segno, self.wal_seg_size).await;
+            match res {
+                Ok((mut file, _)) => {
+                    file.seek(SeekFrom::Start(xlogoff as u64)).await?;
+                    return Ok(Box::pin(file));
+                }
+                Err(e) => {
+                    let is_not_found = e.chain().any(|e| {
+                        if let Some(e) = e.downcast_ref::<io::Error>() {
+                            e.kind() == io::ErrorKind::NotFound
+                        } else {
+                            false
+                        }
+                    });
+                    if !is_not_found {
+                        return Err(e);
+                    }
+                    // NotFound is expected, fall through to remote read
+                }
+            };
        }

        // Try to open remote file, if remote reads are enabled
@@ -821,31 +832,26 @@ pub(crate) async fn open_wal_file(
    timeline_dir: &Utf8Path,
    segno: XLogSegNo,
    wal_seg_size: usize,
-) -> Result<Option<(tokio::fs::File, bool)>> {
+) -> Result<(tokio::fs::File, bool)> {
    let (wal_file_path, wal_file_partial_path) = wal_file_paths(timeline_dir, segno, wal_seg_size);

    // First try to open the .partial file.
    let mut partial_path = wal_file_path.to_owned();
    partial_path.set_extension("partial");
    if let Ok(opened_file) = tokio::fs::File::open(&wal_file_partial_path).await {
-        return Ok(Some((opened_file, true)));
+        return Ok((opened_file, true));
    }

    // If that failed, try it without the .partial extension.
-    let pf_res = tokio::fs::File::open(&wal_file_path).await;
-    if let Err(e) = &pf_res {
-        if e.kind() == ErrorKind::NotFound {
-            return Ok(None);
-        }
-    }
-    let pf = pf_res
+    let pf = tokio::fs::File::open(&wal_file_path)
+        .await
        .with_context(|| format!("failed to open WAL file {wal_file_path:#}"))
        .map_err(|e| {
-            warn!("{e}");
+            warn!("{}", e);
            e
        })?;

-    Ok(Some((pf, false)))
+    Ok((pf, false))
 }

 /// Helper returning full path to WAL segment file and its .partial brother.
--- a/safekeeper/tests/walproposer_sim/safekeeper.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper.rs
@@ -159,6 +159,9 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
        heartbeat_timeout: Duration::from_secs(0),
        remote_storage: None,
        max_offloader_lag_bytes: 0,
+        /* BEGIN_HADRON */
+        max_reelect_offloader_lag_bytes: 0,
+        /* END_HADRON */
        wal_backup_enabled: false,
        listen_pg_addr_tenant_only: None,
        advertise_pg_addr: None,
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -20,7 +20,6 @@ camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 clashmap.workspace = true
-compute_api.workspace = true
 cron.workspace = true
 fail.workspace = true
 futures.workspace = true
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -5,8 +5,7 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::Context;
-use compute_api::spec::PageserverProtocol;
-use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
+use control_plane::endpoint::{ComputeControlPlane, EndpointStatus, PageserverProtocol};
 use control_plane::local_env::LocalEnv;
 use futures::StreamExt;
 use hyper::StatusCode;
@@ -14,12 +13,11 @@ use pageserver_api::config::DEFAULT_GRPC_LISTEN_PORT;
 use pageserver_api::controller_api::AvailabilityZone;
 use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
 use postgres_connection::parse_host_port;
-use safekeeper_api::membership::SafekeeperGeneration;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use tracing::{Instrument, info_span};
 use utils::backoff::{self};
-use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
+use utils::id::{NodeId, TenantId};

 use crate::service::Config;

@@ -37,7 +35,7 @@ struct UnshardedComputeHookTenant {
    preferred_az: Option<AvailabilityZone>,

    // Must hold this lock to send a notification.
-    send_lock: Arc<tokio::sync::Mutex<Option<ComputeRemoteTenantState>>>,
+    send_lock: Arc<tokio::sync::Mutex<Option<ComputeRemoteState>>>,
 }
 struct ShardedComputeHookTenant {
    stripe_size: ShardStripeSize,
@@ -50,7 +48,7 @@ struct ShardedComputeHookTenant {
    // Must hold this lock to send a notification.  The contents represent
    // the last successfully sent notification, and are used to coalesce multiple
    // updates by only sending when there is a chance since our last successful send.
-    send_lock: Arc<tokio::sync::Mutex<Option<ComputeRemoteTenantState>>>,
+    send_lock: Arc<tokio::sync::Mutex<Option<ComputeRemoteState>>>,
 }

 /// Represents our knowledge of the compute's state: we can update this when we get a
@@ -58,9 +56,9 @@ struct ShardedComputeHookTenant {
 ///
 /// Should be wrapped in an Option<>, as we cannot always know the remote state.
 #[derive(PartialEq, Eq, Debug)]
-struct ComputeRemoteState<R> {
+struct ComputeRemoteState {
    // The request body which was acked by the compute
-    request: R,
+    request: ComputeHookNotifyRequest,

    // Whether the cplane indicated that the state was applied to running computes, or just
    // persisted.  In the Neon control plane, this is the difference between a 423 response (meaning
@@ -68,36 +66,6 @@ struct ComputeRemoteState<R> {
    applied: bool,
 }

-type ComputeRemoteTenantState = ComputeRemoteState<NotifyAttachRequest>;
-type ComputeRemoteTimelineState = ComputeRemoteState<NotifySafekeepersRequest>;
-
-/// The trait which define the handler-specific types and methods.
-/// We have two implementations of this trait so far:
-/// - [`ComputeHookTenant`] for tenant attach notifications ("/notify-attach")
-/// - [`ComputeHookTimeline`] for safekeeper change notifications ("/notify-safekeepers")
-trait ApiMethod {
-    /// Type of the key which identifies the resource.
-    /// It's either TenantId for tenant attach notifications,
-    /// or TenantTimelineId for safekeeper change notifications.
-    type Key: std::cmp::Eq + std::hash::Hash + Clone;
-
-    type Request: serde::Serialize + std::fmt::Debug;
-
-    const API_PATH: &'static str;
-
-    fn maybe_send(
-        &self,
-        key: Self::Key,
-        lock: Option<tokio::sync::OwnedMutexGuard<Option<ComputeRemoteState<Self::Request>>>>,
-    ) -> MaybeSendResult<Self::Request, Self::Key>;
-
-    async fn notify_local(
-        env: &LocalEnv,
-        cplane: &ComputeControlPlane,
-        req: &Self::Request,
-    ) -> Result<(), NotifyError>;
-}
-
 enum ComputeHookTenant {
    Unsharded(UnshardedComputeHookTenant),
    Sharded(ShardedComputeHookTenant),
@@ -128,7 +96,7 @@ impl ComputeHookTenant {
        }
    }

-    fn get_send_lock(&self) -> &Arc<tokio::sync::Mutex<Option<ComputeRemoteTenantState>>> {
+    fn get_send_lock(&self) -> &Arc<tokio::sync::Mutex<Option<ComputeRemoteState>>> {
        match self {
            Self::Unsharded(unsharded_tenant) => &unsharded_tenant.send_lock,
            Self::Sharded(sharded_tenant) => &sharded_tenant.send_lock,
@@ -222,136 +190,19 @@ impl ComputeHookTenant {
    }
 }

-/// The state of a timeline we need to notify the compute about.
-struct ComputeHookTimeline {
-    generation: SafekeeperGeneration,
-    safekeepers: Vec<SafekeeperInfo>,
-
-    send_lock: Arc<tokio::sync::Mutex<Option<ComputeRemoteTimelineState>>>,
-}
-
-impl ComputeHookTimeline {
-    /// Construct a new ComputeHookTimeline with the given safekeepers and generation.
-    fn new(generation: SafekeeperGeneration, safekeepers: Vec<SafekeeperInfo>) -> Self {
-        Self {
-            generation,
-            safekeepers,
-            send_lock: Arc::default(),
-        }
-    }
-
-    /// Update the state with a new SafekeepersUpdate.
-    /// Noop if the update generation is not greater than the current generation.
-    fn update(&mut self, sk_update: SafekeepersUpdate) {
-        if sk_update.generation > self.generation {
-            self.generation = sk_update.generation;
-            self.safekeepers = sk_update.safekeepers;
-        }
-    }
-}
-
-impl ApiMethod for ComputeHookTimeline {
-    type Key = TenantTimelineId;
-    type Request = NotifySafekeepersRequest;
-
-    const API_PATH: &'static str = "notify-safekeepers";
-
-    fn maybe_send(
-        &self,
-        ttid: TenantTimelineId,
-        lock: Option<tokio::sync::OwnedMutexGuard<Option<ComputeRemoteTimelineState>>>,
-    ) -> MaybeSendNotifySafekeepersResult {
-        let locked = match lock {
-            Some(already_locked) => already_locked,
-            None => {
-                // Lock order: this _must_ be only a try_lock, because we are called inside of the [`ComputeHook::timelines`] lock.
-                let Ok(locked) = self.send_lock.clone().try_lock_owned() else {
-                    return MaybeSendResult::AwaitLock((ttid, self.send_lock.clone()));
-                };
-                locked
-            }
-        };
-
-        if locked
-            .as_ref()
-            .is_some_and(|s| s.request.generation >= self.generation)
-        {
-            return MaybeSendResult::Noop;
-        }
-
-        MaybeSendResult::Transmit((
-            NotifySafekeepersRequest {
-                tenant_id: ttid.tenant_id,
-                timeline_id: ttid.timeline_id,
-                generation: self.generation,
-                safekeepers: self.safekeepers.clone(),
-            },
-            locked,
-        ))
-    }
-
-    async fn notify_local(
-        _env: &LocalEnv,
-        cplane: &ComputeControlPlane,
-        req: &NotifySafekeepersRequest,
-    ) -> Result<(), NotifyError> {
-        let NotifySafekeepersRequest {
-            tenant_id,
-            timeline_id,
-            generation,
-            safekeepers,
-        } = req;
-
-        for (endpoint_name, endpoint) in &cplane.endpoints {
-            if endpoint.tenant_id == *tenant_id
-                && endpoint.timeline_id == *timeline_id
-                && endpoint.status() == EndpointStatus::Running
-            {
-                tracing::info!("Reconfiguring safekeepers for endpoint {endpoint_name}");
-
-                let safekeepers = safekeepers.iter().map(|sk| sk.id).collect::<Vec<_>>();
-
-                endpoint
-                    .reconfigure_safekeepers(safekeepers, *generation)
-                    .await
-                    .map_err(NotifyError::NeonLocal)?;
-            }
-        }
-
-        Ok(())
-    }
-}
-
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
-struct NotifyAttachRequestShard {
+struct ComputeHookNotifyRequestShard {
    node_id: NodeId,
    shard_number: ShardNumber,
 }

 /// Request body that we send to the control plane to notify it of where a tenant is attached
 #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
-struct NotifyAttachRequest {
+struct ComputeHookNotifyRequest {
    tenant_id: TenantId,
    preferred_az: Option<String>,
    stripe_size: Option<ShardStripeSize>,
-    shards: Vec<NotifyAttachRequestShard>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Eq, PartialEq, Clone)]
-pub(crate) struct SafekeeperInfo {
-    pub id: NodeId,
-    /// Hostname of the safekeeper.
-    /// It exists for better debuggability. Might be missing.
-    /// Should not be used for anything else.
-    pub hostname: Option<String>,
-}
-
-#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
-struct NotifySafekeepersRequest {
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    generation: SafekeeperGeneration,
-    safekeepers: Vec<SafekeeperInfo>,
+    shards: Vec<ComputeHookNotifyRequestShard>,
 }

 /// Error type for attempts to call into the control plane compute notification hook
@@ -383,50 +234,42 @@ pub(crate) enum NotifyError {
    NeonLocal(anyhow::Error),
 }

-enum MaybeSendResult<R, K> {
+enum MaybeSendResult {
    // Please send this request while holding the lock, and if you succeed then write
    // the request into the lock.
    Transmit(
        (
-            R,
-            tokio::sync::OwnedMutexGuard<Option<ComputeRemoteState<R>>>,
+            ComputeHookNotifyRequest,
+            tokio::sync::OwnedMutexGuard<Option<ComputeRemoteState>>,
        ),
    ),
    // Something requires sending, but you must wait for a current sender then call again
-    AwaitLock((K, Arc<tokio::sync::Mutex<Option<ComputeRemoteState<R>>>>)),
+    AwaitLock(Arc<tokio::sync::Mutex<Option<ComputeRemoteState>>>),
    // Nothing requires sending
    Noop,
 }

-type MaybeSendNotifyAttachResult = MaybeSendResult<NotifyAttachRequest, TenantId>;
-type MaybeSendNotifySafekeepersResult = MaybeSendResult<NotifySafekeepersRequest, TenantTimelineId>;
-
-impl ApiMethod for ComputeHookTenant {
-    type Key = TenantId;
-    type Request = NotifyAttachRequest;
-
-    const API_PATH: &'static str = "notify-attach";
-
+impl ComputeHookTenant {
    fn maybe_send(
        &self,
        tenant_id: TenantId,
-        lock: Option<tokio::sync::OwnedMutexGuard<Option<ComputeRemoteTenantState>>>,
-    ) -> MaybeSendNotifyAttachResult {
+        lock: Option<tokio::sync::OwnedMutexGuard<Option<ComputeRemoteState>>>,
+    ) -> MaybeSendResult {
        let locked = match lock {
            Some(already_locked) => already_locked,
            None => {
-                // Lock order: this _must_ be only a try_lock, because we are called inside of the [`ComputeHook::tenants`] lock.
+                // Lock order: this _must_ be only a try_lock, because we are called inside of the [`ComputeHook::state`] lock.
                let Ok(locked) = self.get_send_lock().clone().try_lock_owned() else {
-                    return MaybeSendResult::AwaitLock((tenant_id, self.get_send_lock().clone()));
+                    return MaybeSendResult::AwaitLock(self.get_send_lock().clone());
                };
                locked
            }
        };

        let request = match self {
-            Self::Unsharded(unsharded_tenant) => Some(NotifyAttachRequest {
+            Self::Unsharded(unsharded_tenant) => Some(ComputeHookNotifyRequest {
                tenant_id,
-                shards: vec![NotifyAttachRequestShard {
+                shards: vec![ComputeHookNotifyRequestShard {
                    shard_number: ShardNumber(0),
                    node_id: unsharded_tenant.node_id,
                }],
@@ -439,12 +282,12 @@ impl ApiMethod for ComputeHookTenant {
            Self::Sharded(sharded_tenant)
                if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize =>
            {
-                Some(NotifyAttachRequest {
+                Some(ComputeHookNotifyRequest {
                    tenant_id,
                    shards: sharded_tenant
                        .shards
                        .iter()
-                        .map(|(shard_number, node_id)| NotifyAttachRequestShard {
+                        .map(|(shard_number, node_id)| ComputeHookNotifyRequestShard {
                            shard_number: *shard_number,
                            node_id: *node_id,
                        })
@@ -489,22 +332,98 @@ impl ApiMethod for ComputeHookTenant {
            }
        }
    }
+}

-    async fn notify_local(
-        env: &LocalEnv,
-        cplane: &ComputeControlPlane,
-        req: &NotifyAttachRequest,
+/// The compute hook is a destination for notifications about changes to tenant:pageserver
+/// mapping.  It aggregates updates for the shards in a tenant, and when appropriate reconfigures
+/// the compute connection string.
+pub(super) struct ComputeHook {
+    config: Config,
+    state: std::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
+    authorization_header: Option<String>,
+
+    // Concurrency limiter, so that we do not overload the cloud control plane when updating
+    // large numbers of tenants (e.g. when failing over after a node failure)
+    api_concurrency: tokio::sync::Semaphore,
+
+    // This lock is only used in testing enviroments, to serialize calls into neon_lock
+    neon_local_lock: tokio::sync::Mutex<()>,
+
+    // We share a client across all notifications to enable connection re-use etc when
+    // sending large numbers of notifications
+    client: reqwest::Client,
+}
+
+/// Callers may give us a list of these when asking us to send a bulk batch
+/// of notifications in the background.  This is a 'notification' in the sense of
+/// other code notifying us of a shard's status, rather than being the final notification
+/// that we send upwards to the control plane for the whole tenant.
+pub(crate) struct ShardUpdate<'a> {
+    pub(crate) tenant_shard_id: TenantShardId,
+    pub(crate) node_id: NodeId,
+    pub(crate) stripe_size: ShardStripeSize,
+    pub(crate) preferred_az: Option<Cow<'a, AvailabilityZone>>,
+}
+
+impl ComputeHook {
+    pub(super) fn new(config: Config) -> anyhow::Result<Self> {
+        let authorization_header = config
+            .control_plane_jwt_token
+            .clone()
+            .map(|jwt| format!("Bearer {jwt}"));
+
+        let mut client = reqwest::ClientBuilder::new().timeout(NOTIFY_REQUEST_TIMEOUT);
+        for cert in &config.ssl_ca_certs {
+            client = client.add_root_certificate(cert.clone());
+        }
+        let client = client
+            .build()
+            .context("Failed to build http client for compute hook")?;
+
+        Ok(Self {
+            state: Default::default(),
+            config,
+            authorization_header,
+            neon_local_lock: Default::default(),
+            api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
+            client,
+        })
+    }
+
+    /// For test environments: use neon_local's LocalEnv to update compute
+    async fn do_notify_local(
+        &self,
+        reconfigure_request: &ComputeHookNotifyRequest,
    ) -> Result<(), NotifyError> {
-        let NotifyAttachRequest {
+        // neon_local updates are not safe to call concurrently, use a lock to serialize
+        // all calls to this function
+        let _locked = self.neon_local_lock.lock().await;
+
+        let Some(repo_dir) = self.config.neon_local_repo_dir.as_deref() else {
+            tracing::warn!(
+                "neon_local_repo_dir not set, likely a bug in neon_local; skipping compute update"
+            );
+            return Ok(());
+        };
+        let env = match LocalEnv::load_config(repo_dir) {
+            Ok(e) => e,
+            Err(e) => {
+                tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
+                return Ok(());
+            }
+        };
+        let cplane =
+            ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
+        let ComputeHookNotifyRequest {
            tenant_id,
            shards,
            stripe_size,
            preferred_az: _preferred_az,
-        } = req;
+        } = reconfigure_request;

        for (endpoint_name, endpoint) in &cplane.endpoints {
            if endpoint.tenant_id == *tenant_id && endpoint.status() == EndpointStatus::Running {
-                tracing::info!("Reconfiguring pageservers for endpoint {endpoint_name}");
+                tracing::info!("Reconfiguring endpoint {endpoint_name}");

                let pageservers = shards
                    .iter()
@@ -526,7 +445,7 @@ impl ApiMethod for ComputeHookTenant {
                    .collect::<Vec<_>>();

                endpoint
-                    .reconfigure_pageservers(pageservers, *stripe_size)
+                    .reconfigure(pageservers, *stripe_size, None)
                    .await
                    .map_err(NotifyError::NeonLocal)?;
            }
@@ -534,102 +453,11 @@ impl ApiMethod for ComputeHookTenant {

        Ok(())
    }
-}

-/// The compute hook is a destination for notifications about changes to tenant:pageserver
-/// mapping.  It aggregates updates for the shards in a tenant, and when appropriate reconfigures
-/// the compute connection string.
-pub(super) struct ComputeHook {
-    config: Config,
-    tenants: std::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
-    timelines: std::sync::Mutex<HashMap<TenantTimelineId, ComputeHookTimeline>>,
-    authorization_header: Option<String>,
-
-    // Concurrency limiter, so that we do not overload the cloud control plane when updating
-    // large numbers of tenants (e.g. when failing over after a node failure)
-    api_concurrency: tokio::sync::Semaphore,
-
-    // This lock is only used in testing enviroments, to serialize calls into neon_local
-    neon_local_lock: tokio::sync::Mutex<()>,
-
-    // We share a client across all notifications to enable connection re-use etc when
-    // sending large numbers of notifications
-    client: reqwest::Client,
-}
-
-/// Callers may give us a list of these when asking us to send a bulk batch
-/// of notifications in the background.  This is a 'notification' in the sense of
-/// other code notifying us of a shard's status, rather than being the final notification
-/// that we send upwards to the control plane for the whole tenant.
-pub(crate) struct ShardUpdate<'a> {
-    pub(crate) tenant_shard_id: TenantShardId,
-    pub(crate) node_id: NodeId,
-    pub(crate) stripe_size: ShardStripeSize,
-    pub(crate) preferred_az: Option<Cow<'a, AvailabilityZone>>,
-}
-
-pub(crate) struct SafekeepersUpdate {
-    pub(crate) tenant_id: TenantId,
-    pub(crate) timeline_id: TimelineId,
-    pub(crate) generation: SafekeeperGeneration,
-    pub(crate) safekeepers: Vec<SafekeeperInfo>,
-}
-
-impl ComputeHook {
-    pub(super) fn new(config: Config) -> anyhow::Result<Self> {
-        let authorization_header = config
-            .control_plane_jwt_token
-            .clone()
-            .map(|jwt| format!("Bearer {jwt}"));
-
-        let mut client = reqwest::ClientBuilder::new().timeout(NOTIFY_REQUEST_TIMEOUT);
-        for cert in &config.ssl_ca_certs {
-            client = client.add_root_certificate(cert.clone());
-        }
-        let client = client
-            .build()
-            .context("Failed to build http client for compute hook")?;
-
-        Ok(Self {
-            tenants: Default::default(),
-            timelines: Default::default(),
-            config,
-            authorization_header,
-            neon_local_lock: Default::default(),
-            api_concurrency: tokio::sync::Semaphore::new(API_CONCURRENCY),
-            client,
-        })
-    }
-
-    /// For test environments: use neon_local's LocalEnv to update compute
-    async fn do_notify_local<M: ApiMethod>(&self, req: &M::Request) -> Result<(), NotifyError> {
-        // neon_local updates are not safe to call concurrently, use a lock to serialize
-        // all calls to this function
-        let _locked = self.neon_local_lock.lock().await;
-
-        let Some(repo_dir) = self.config.neon_local_repo_dir.as_deref() else {
-            tracing::warn!(
-                "neon_local_repo_dir not set, likely a bug in neon_local; skipping compute update"
-            );
-            return Ok(());
-        };
-        let env = match LocalEnv::load_config(repo_dir) {
-            Ok(e) => e,
-            Err(e) => {
-                tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
-                return Ok(());
-            }
-        };
-        let cplane =
-            ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
-
-        M::notify_local(&env, &cplane, req).await
-    }
-
-    async fn do_notify_iteration<Req: serde::Serialize + std::fmt::Debug>(
+    async fn do_notify_iteration(
        &self,
        url: &String,
-        reconfigure_request: &Req,
+        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        let req = self.client.request(reqwest::Method::PUT, url);
@@ -651,7 +479,9 @@ impl ComputeHook {
        };

        // Treat all 2xx responses as success
-        if response.status().is_success() {
+        if response.status() >= reqwest::StatusCode::OK
+            && response.status() < reqwest::StatusCode::MULTIPLE_CHOICES
+        {
            if response.status() != reqwest::StatusCode::OK {
                // Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
                // log a warning.
@@ -702,10 +532,10 @@ impl ComputeHook {
        }
    }

-    async fn do_notify<R: serde::Serialize + std::fmt::Debug>(
+    async fn do_notify(
        &self,
        url: &String,
-        reconfigure_request: &R,
+        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        // We hold these semaphore units across all retries, rather than only across each
@@ -737,13 +567,13 @@ impl ComputeHook {
    }

    /// Synchronous phase: update the per-tenant state for the next intended notification
-    fn notify_attach_prepare(&self, shard_update: ShardUpdate) -> MaybeSendNotifyAttachResult {
-        let mut tenants_locked = self.tenants.lock().unwrap();
+    fn notify_prepare(&self, shard_update: ShardUpdate) -> MaybeSendResult {
+        let mut state_locked = self.state.lock().unwrap();

        use std::collections::hash_map::Entry;
        let tenant_shard_id = shard_update.tenant_shard_id;

-        let tenant = match tenants_locked.entry(tenant_shard_id.tenant_id) {
+        let tenant = match state_locked.entry(tenant_shard_id.tenant_id) {
            Entry::Vacant(e) => {
                let ShardUpdate {
                    tenant_shard_id,
@@ -767,37 +597,10 @@ impl ComputeHook {
        tenant.maybe_send(tenant_shard_id.tenant_id, None)
    }

-    fn notify_safekeepers_prepare(
+    async fn notify_execute(
        &self,
-        safekeepers_update: SafekeepersUpdate,
-    ) -> MaybeSendNotifySafekeepersResult {
-        let mut timelines_locked = self.timelines.lock().unwrap();
-
-        let ttid = TenantTimelineId {
-            tenant_id: safekeepers_update.tenant_id,
-            timeline_id: safekeepers_update.timeline_id,
-        };
-
-        use std::collections::hash_map::Entry;
-        let timeline = match timelines_locked.entry(ttid) {
-            Entry::Vacant(e) => e.insert(ComputeHookTimeline::new(
-                safekeepers_update.generation,
-                safekeepers_update.safekeepers,
-            )),
-            Entry::Occupied(e) => {
-                let timeline = e.into_mut();
-                timeline.update(safekeepers_update);
-                timeline
-            }
-        };
-
-        timeline.maybe_send(ttid, None)
-    }
-
-    async fn notify_execute<M: ApiMethod>(
-        &self,
-        state: &std::sync::Mutex<HashMap<M::Key, M>>,
-        maybe_send_result: MaybeSendResult<M::Request, M::Key>,
+        maybe_send_result: MaybeSendResult,
+        tenant_shard_id: TenantShardId,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        // Process result: we may get an update to send, or we may have to wait for a lock
@@ -806,7 +609,7 @@ impl ComputeHook {
            MaybeSendResult::Noop => {
                return Ok(());
            }
-            MaybeSendResult::AwaitLock((key, send_lock)) => {
+            MaybeSendResult::AwaitLock(send_lock) => {
                let send_locked = tokio::select! {
                    guard = send_lock.lock_owned() => {guard},
                    _ = cancel.cancelled() => {
@@ -817,11 +620,11 @@ impl ComputeHook {
                // Lock order: maybe_send is called within the `[Self::state]` lock, and takes the send lock, but here
                // we have acquired the send lock and take `[Self::state]` lock.  This is safe because maybe_send only uses
                // try_lock.
-                let state_locked = state.lock().unwrap();
-                let Some(resource_state) = state_locked.get(&key) else {
+                let state_locked = self.state.lock().unwrap();
+                let Some(tenant) = state_locked.get(&tenant_shard_id.tenant_id) else {
                    return Ok(());
                };
-                match resource_state.maybe_send(key, Some(send_locked)) {
+                match tenant.maybe_send(tenant_shard_id.tenant_id, Some(send_locked)) {
                    MaybeSendResult::AwaitLock(_) => {
                        unreachable!("We supplied lock guard")
                    }
@@ -840,18 +643,14 @@ impl ComputeHook {
                    .control_plane_url
                    .as_ref()
                    .map(|control_plane_url| {
-                        format!(
-                            "{}/{}",
-                            control_plane_url.trim_end_matches('/'),
-                            M::API_PATH
-                        )
+                        format!("{}/notify-attach", control_plane_url.trim_end_matches('/'))
                    });

            // We validate this at startup
            let notify_url = compute_hook_url.as_ref().unwrap();
            self.do_notify(notify_url, &request, cancel).await
        } else {
-            self.do_notify_local::<M>(&request).await.map_err(|e| {
+            self.do_notify_local(&request).await.map_err(|e| {
                // This path is for testing only, so munge the error into our prod-style error type.
                tracing::error!("neon_local notification hook failed: {e}");
                NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
@@ -887,7 +686,7 @@ impl ComputeHook {
    /// Infallible synchronous fire-and-forget version of notify(), that sends its results to
    /// a channel.  Something should consume the channel and arrange to try notifying again
    /// if something failed.
-    pub(super) fn notify_attach_background(
+    pub(super) fn notify_background(
        self: &Arc<Self>,
        notifications: Vec<ShardUpdate>,
        result_tx: tokio::sync::mpsc::Sender<Result<(), (TenantShardId, NotifyError)>>,
@@ -896,7 +695,7 @@ impl ComputeHook {
        let mut maybe_sends = Vec::new();
        for shard_update in notifications {
            let tenant_shard_id = shard_update.tenant_shard_id;
-            let maybe_send_result = self.notify_attach_prepare(shard_update);
+            let maybe_send_result = self.notify_prepare(shard_update);
            maybe_sends.push((tenant_shard_id, maybe_send_result))
        }

@@ -915,10 +714,10 @@ impl ComputeHook {

                    async move {
                        this
-                            .notify_execute(&this.tenants, maybe_send_result, &cancel)
+                            .notify_execute(maybe_send_result, tenant_shard_id, &cancel)
                            .await.map_err(|e| (tenant_shard_id, e))
                    }.instrument(info_span!(
-                        "notify_attach_background", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()
+                        "notify_background", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()
                    ))
                })
                .buffered(API_CONCURRENCY);
@@ -961,23 +760,14 @@ impl ComputeHook {
    /// ensuring that they eventually call again to ensure that the compute is eventually notified of
    /// the proper pageserver nodes for a tenant.
    #[tracing::instrument(skip_all, fields(tenant_id=%shard_update.tenant_shard_id.tenant_id, shard_id=%shard_update.tenant_shard_id.shard_slug(), node_id))]
-    pub(super) async fn notify_attach<'a>(
+    pub(super) async fn notify<'a>(
        &self,
        shard_update: ShardUpdate<'a>,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
-        let maybe_send_result = self.notify_attach_prepare(shard_update);
-        self.notify_execute(&self.tenants, maybe_send_result, cancel)
-            .await
-    }
-
-    pub(super) async fn notify_safekeepers(
-        &self,
-        safekeepers_update: SafekeepersUpdate,
-        cancel: &CancellationToken,
-    ) -> Result<(), NotifyError> {
-        let maybe_send_result = self.notify_safekeepers_prepare(safekeepers_update);
-        self.notify_execute(&self.timelines, maybe_send_result, cancel)
+        let tenant_shard_id = shard_update.tenant_shard_id;
+        let maybe_send_result = self.notify_prepare(shard_update);
+        self.notify_execute(maybe_send_result, tenant_shard_id, cancel)
            .await
    }

@@ -993,8 +783,8 @@ impl ComputeHook {
    ) {
        use std::collections::hash_map::Entry;

-        let mut tenants_locked = self.tenants.lock().unwrap();
-        match tenants_locked.entry(tenant_shard_id.tenant_id) {
+        let mut state_locked = self.state.lock().unwrap();
+        match state_locked.entry(tenant_shard_id.tenant_id) {
            Entry::Vacant(_) => {
                // This is a valid but niche case, where the tenant was previously attached
                // as a Secondary location and then detached, so has no previously notified
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -22,7 +22,7 @@ use pageserver_api::controller_api::{
    MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, SafekeeperSchedulingPolicyRequest,
    ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest,
-    TimelineImportRequest, TimelineSafekeeperMigrateRequest,
+    TimelineImportRequest,
 };
 use pageserver_api::models::{
    DetachBehavior, LsnLeaseRequest, TenantConfigPatchRequest, TenantConfigRequest,
@@ -34,7 +34,6 @@ use pageserver_api::upcall_api::{
    PutTimelineImportStatusRequest, ReAttachRequest, TimelineImportStatusRequest, ValidateRequest,
 };
 use pageserver_client::{BlockUnblock, mgmt_api};
-
 use routerify::Middleware;
 use tokio_util::sync::CancellationToken;
 use tracing::warn;
@@ -636,32 +635,6 @@ async fn handle_tenant_timeline_download_heatmap_layers(
    json_response(StatusCode::OK, ())
 }

-async fn handle_tenant_timeline_safekeeper_migrate(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    check_permissions(&req, Scope::PageServerApi)?;
-    maybe_rate_limit(&req, tenant_id).await;
-
-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
-
-    let mut req = match maybe_forward(req).await {
-        ForwardOutcome::Forwarded(res) => {
-            return res;
-        }
-        ForwardOutcome::NotForwarded(req) => req,
-    };
-
-    let migrate_req = json_request::<TimelineSafekeeperMigrateRequest>(&mut req).await?;
-
-    service
-        .tenant_timeline_safekeeper_migrate(tenant_id, timeline_id, migrate_req)
-        .await?;
-
-    json_response(StatusCode::OK, ())
-}
-
 async fn handle_tenant_timeline_lsn_lease(
    service: Arc<Service>,
    req: Request<Body>,
@@ -2485,16 +2458,6 @@ pub fn make_router(
                )
            },
        )
-        .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate",
-            |r| {
-                tenant_service_handler(
-                    r,
-                    handle_tenant_timeline_safekeeper_migrate,
-                    RequestName("v1_tenant_timeline_safekeeper_migrate"),
-                )
-            },
-        )
        // LSN lease passthrough to all shards
        .post(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/lsn_lease",
--- a/storage_controller/src/main.rs
+++ b/storage_controller/src/main.rs
@@ -6,7 +6,9 @@ use std::time::Duration;
 use anyhow::{Context, anyhow};
 use camino::Utf8PathBuf;

-use clap::{ArgAction, Parser};
+#[cfg(feature = "testing")]
+use clap::ArgAction;
+use clap::Parser;
 use futures::future::OptionFuture;
 use http_utils::tls_certs::ReloadingCertificateResolver;
 use hyper0::Uri;
@@ -214,13 +216,14 @@ struct Cli {
    /// Number of safekeepers to choose for a timeline when creating it.
    /// Safekeepers will be choosen from different availability zones.
    /// This option exists primarily for testing purposes.
-    #[arg(long, default_value = "3", value_parser = clap::builder::RangedU64ValueParser::<usize>::new().range(1..))]
-    timeline_safekeeper_count: usize,
+    #[arg(long, default_value = "3", value_parser = clap::value_parser!(i64).range(1..))]
+    timeline_safekeeper_count: i64,

    /// When set, actively checks and initiates heatmap downloads/uploads during reconciliation.
    /// This speed up migrations by avoiding the default wait for the heatmap download interval.
    /// Primarily useful for testing to reduce test execution time.
-    #[arg(long, default_value = "false", action=ArgAction::Set)]
+    #[cfg(feature = "testing")]
+    #[arg(long, default_value = "true", action=ArgAction::Set)]
    kick_secondary_downloads: bool,
 }

@@ -469,6 +472,7 @@ async fn async_main() -> anyhow::Result<()> {
        use_local_compute_notifications: args.use_local_compute_notifications,
        timeline_safekeeper_count: args.timeline_safekeeper_count,
        posthog_config: posthog_config.clone(),
+        #[cfg(feature = "testing")]
        kick_secondary_downloads: args.kick_secondary_downloads,
    };

@@ -556,15 +560,9 @@ async fn async_main() -> anyhow::Result<()> {
        let cancel_bg = cancel.clone();
        let task = tokio::task::spawn(
            async move {
-                match FeatureFlagService::new(service, posthog_config) {
-                    Ok(feature_flag_service) => {
-                        let feature_flag_service = Arc::new(feature_flag_service);
-                        feature_flag_service.run(cancel_bg).await
-                    }
-                    Err(e) => {
-                        tracing::warn!("Failed to create feature flag service: {}", e);
-                    }
-                };
+                let feature_flag_service = FeatureFlagService::new(service, posthog_config);
+                let feature_flag_service = Arc::new(feature_flag_service);
+                feature_flag_service.run(cancel_bg).await
            }
            .instrument(tracing::info_span!("feature_flag_service")),
        );
--- a/Show More
+++ b/Show More