revert pg-14 submodule changes

change subzero dep sha
add line to remove from diff
2026-05-21 15:10:44 +00:00 · 2025-07-04 13:55:35 +03:00 · 2025-07-04 13:39:49 +03:00 · 2025-07-04 13:28:52 +03:00 · 2025-07-04 13:27:59 +03:00 · 2025-07-04 13:03:55 +03:00
86 changed files with 4504 additions and 3719 deletions
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -33,7 +33,6 @@ workspace-members = [
    "compute_api",
    "consumption_metrics",
    "desim",
-    "json",
    "metrics",
    "pageserver_api",
    "postgres_backend",
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -7,7 +7,6 @@ self-hosted-runner:
    - small-metal
    - small-arm64
    - unit-perf
-    - unit-perf-aws-arm
    - us-east-2
 config-variables:
  - AWS_ECR_REGION
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -32,14 +32,162 @@ permissions:
  contents: read

 jobs:
-  make-all:
+  build-pgxn:
+    if: |
+      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    timeout-minutes: 30
+    runs-on: macos-15
+    strategy:
+      matrix:
+        postgres-version: ${{ inputs.rebuild_everything && fromJSON('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - name: Checkout main repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set pg ${{ matrix.postgres-version }} for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Cache postgres ${{ matrix.postgres-version }} build
+        id: cache_pg
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: pg_install/${{ matrix.postgres-version }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-${{ matrix.postgres-version }}
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build Postgres ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
+
+      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
+
+      - name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: pg_install--${{ matrix.postgres-version }}
+          path: pg_install/${{ matrix.postgres-version }}
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
+  build-walproposer-lib:
+    if: |
+      contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
+      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
+      github.ref_name == 'main'
+    timeout-minutes: 30
+    runs-on: macos-15
+    needs: [build-pgxn]
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - name: Checkout main repo
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - name: Set pg v17 for caching
+        id: pg_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
+
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v17
+          path: pg_install/v17
+
+      # `actions/download-artifact` doesn't preserve permissions:
+      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
+      - name: Make pg_install/v*/bin/* executable
+        run: |
+          chmod +x pg_install/v*/bin/*
+
+      - name: Cache walproposer-lib
+        id: cache_walproposer_lib
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: build/walproposer-lib
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Checkout submodule vendor/postgres-v17
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          git submodule init vendor/postgres-v17
+          git submodule update --depth 1 --recursive
+
+      - name: Install build dependencies
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Build walproposer-lib (only for v17)
+        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
+        run:
+          make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
+
+      - name: Upload "build/walproposer-lib" artifact
+        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
+        with:
+          name: build--walproposer-lib
+          path: build/walproposer-lib
+          # The artifact is supposed to be used by the next job in the same workflow,
+          # so there’s no need to store it for too long.
+          retention-days: 1
+
+  cargo-build:
    if: |
      inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    timeout-minutes: 60
+    timeout-minutes: 30
    runs-on: macos-15
+    needs: [build-pgxn, build-walproposer-lib]
    env:
      # Use release build only, to have less debug info around
      # Hence keeping target/ (and general cache size) smaller
@@ -55,53 +203,41 @@ jobs:
        with:
          submodules: true

-      - name: Install build dependencies
-        run: |
-          brew install flex bison openssl protobuf icu4c
-
-      - name: Set extra env for macOS
-        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
-
-      - name: Restore "pg_install/" cache
-        id: cache_pg
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+      - name: Download "pg_install/v14" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
        with:
-          path: pg_install
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-install-v14-${{ hashFiles('Makefile', 'postgres.mk', 'vendor/revisions.json') }}
+          name: pg_install--v14
+          path: pg_install/v14

-      - name: Checkout vendor/postgres submodules
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          git submodule init
-          git submodule update --depth 1 --recursive
+      - name: Download "pg_install/v15" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v15
+          path: pg_install/v15

-      - name: Build Postgres
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make postgres -j$(sysctl -n hw.ncpu)
+      - name: Download "pg_install/v16" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v16
+          path: pg_install/v16

-      # This isn't strictly necessary, but it makes the cached and non-cached builds more similar,
-      # When pg_install is restored from cache, there is no 'build/' directory. By removing it
-      # in a non-cached build too, we enforce that the rest of the steps don't depend on it,
-      # so that we notice any build caching bugs earlier.
-      - name: Remove build artifacts
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          rm -rf build
+      - name: Download "pg_install/v17" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: pg_install--v17
+          path: pg_install/v17

-      # Explicitly update the rust toolchain before running 'make'. The parallel make build can
-      # invoke 'cargo build' more than once in parallel, for different crates.  That's OK, 'cargo'
-      # does its own locking to prevent concurrent builds from stepping on each other's
-      # toes. However, it will first try to update the toolchain, and that step is not locked the
-      # same way. To avoid two toolchain updates running in parallel and stepping on each other's
-      # toes, ensure that the toolchain is up-to-date beforehand.
-      - name: Update rust toolchain
+      - name: Download "build/walproposer-lib" artifact
+        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
+        with:
+          name: build--walproposer-lib
+          path: build/walproposer-lib
+
+      # `actions/download-artifact` doesn't preserve permissions:
+      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
+      - name: Make pg_install/v*/bin/* executable
        run: |
-          rustup --version &&
-          rustup update &&
-          rustup show
+          chmod +x pg_install/v*/bin/*

      - name: Cache cargo deps
        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -113,12 +249,17 @@ jobs:
            target
          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust

-      # Build the neon-specific postgres extensions, and all the Rust bits.
-      #
-      # Pass PG_INSTALL_CACHED=1 because PostgreSQL was already built and cached
-      # separately.
-      - name: Build all
-        run: PG_INSTALL_CACHED=1 BUILD_TYPE=release make -j$(sysctl -n hw.ncpu) all
+      - name: Install build dependencies
+        run: |
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Run cargo build
+        run: cargo build --all --release -j$(sysctl -n hw.ncpu)

      - name: Check that no warnings are produced
        run: ./run_clippy.sh
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -306,14 +306,14 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf-aws-arm ]
+    runs-on: [ self-hosted, unit-perf ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
      credentials:
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
      # for changed limits, see comments on `options:` earlier in this file
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined
+      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    strategy:
      fail-fast: false
      matrix:
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -1,4 +1,4 @@
-name: Periodic pagebench performance test on unit-perf-aws-arm runners
+name: Periodic pagebench performance test on unit-perf hetzner runner

 on:
  schedule:
@@ -40,7 +40,7 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf-aws-arm ]
+    runs-on: [ self-hosted, unit-perf ]
    container:
      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
      credentials:
--- a/.github/workflows/proxy-benchmark.yml
+++ b/.github/workflows/proxy-benchmark.yml
@@ -1,4 +1,4 @@
-name: Periodic proxy performance test on unit-perf-aws-arm runners
+name: Periodic proxy performance test on unit-perf hetzner runner

 on:
  push: # TODO: remove after testing
@@ -32,7 +32,7 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [self-hosted, unit-perf-aws-arm]
+    runs-on: [self-hosted, unit-perf]
    timeout-minutes: 60  # 1h timeout
    container:
      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,11 @@ compaction-suite-results.*
 *.o
 *.so
 *.Po
+*.pid

 # pgindent typedef lists
 *.list
+
+# various files for local testing
+/proxy/.subzero
+local_proxy.json
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -42,12 +42,10 @@ members = [
    "libs/walproposer",
    "libs/wal_decoder",
    "libs/postgres_initdb",
-    "libs/proxy/json",
    "libs/proxy/postgres-protocol2",
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
-    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -257,7 +255,6 @@ desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
-neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
@@ -287,7 +284,6 @@ walproposer = { version = "0.1", path = "./libs/walproposer/" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
-cbindgen = "0.29.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
--- a/53
+++ b/53
@@ -30,18 +30,7 @@ ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}

-# Naive way:
-#
-# 1. COPY . .
-# 1. make neon-pg-ext
-# 2. cargo build <storage binaries>
-#
-# But to enable docker to cache intermediate layers, we perform a few preparatory steps:
-#
-# - Build all postgres versions, depending on just the contents of vendor/
-# - Use cargo chef to build all rust dependencies
-
-# 1. Build all postgres versions
+# Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
 WORKDIR /home/nonroot

@@ -49,15 +38,17 @@ COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
 COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
+COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot postgres.mk postgres.mk
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh

 ENV BUILD_TYPE=release
 RUN set -e \
-    && mold -run make -j $(nproc) -s postgres
+    && mold -run make -j $(nproc) -s neon-pg-ext \
+    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .

-# 2. Prepare cargo-chef recipe
+# Prepare cargo-chef recipe
 FROM $REPOSITORY/$IMAGE:$TAG AS plan
 WORKDIR /home/nonroot

@@ -65,22 +56,23 @@ COPY --chown=nonroot . .

 RUN cargo chef prepare --recipe-path recipe.json

-# Main build image
+# Build neon binaries
 FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
+
+COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
+COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
+
 ARG ADDITIONAL_RUSTFLAGS=""

-# 3. Build cargo dependencies. Note that this step doesn't depend on anything else than
-# `recipe.json`, so the layer can be reused as long as none of the dependencies change.
-COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
 RUN set -e \
    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json

-# Perform the main build. We reuse the Postgres build artifacts from the intermediate 'pg-build'
-# layer, and the cargo dependencies built in the previous step.
-COPY --chown=nonroot --from=pg-build /home/nonroot/pg_install/ pg_install
 COPY --chown=nonroot . .

 RUN set -e \
@@ -95,10 +87,10 @@ RUN set -e \
      --bin endpoint_storage \
      --bin neon_local \
      --bin storage_scrubber \
-      --locked --release \
-    && mold -run make -j $(nproc) -s neon-pg-ext
+      --locked --release

-# Assemble the final image
+# Build final image
+#
 FROM $BASE_IMAGE_SHA
 WORKDIR /data

@@ -138,15 +130,12 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/endpoint_storage    /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin
-COPY --from=build /home/nonroot/pg_install/v14 /usr/local/v14/
-COPY --from=build /home/nonroot/pg_install/v15 /usr/local/v15/
-COPY --from=build /home/nonroot/pg_install/v16 /usr/local/v16/
-COPY --from=build /home/nonroot/pg_install/v17 /usr/local/v17/

-# Deprecated: Old deployment scripts use this tarball which contains all the Postgres binaries.
-# That's obsolete, since all the same files are also present under /usr/local/v*. But to keep the
-# old scripts working for now, create the tarball.
-RUN tar -C /usr/local -cvzf /data/postgres_install.tar.gz v14 v15 v16 v17
+COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
+COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
+COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
+COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
+COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
 # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
--- a/16
+++ b/16
@@ -30,18 +30,11 @@ ifeq ($(BUILD_TYPE),release)
 	PG_CFLAGS += -O2 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=release
-	# NEON_CARGO_ARTIFACT_TARGET_DIR is the directory where `cargo build` places
-	# the final build artifacts. There is unfortunately no easy way of changing
-	# it to a fully predictable path, nor to extract the path with a simple
-	# command. See https://github.com/rust-lang/cargo/issues/9661 and
-	# https://github.com/rust-lang/cargo/issues/6790.
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=dev
-	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -109,7 +102,7 @@ all: neon postgres-install neon-pg-ext

 ### Neon Rust bits
 #
-# The 'postgres_ffi' crate depends on the Postgres headers.
+# The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: neon
 neon: postgres-headers-install walproposer-lib cargo-target-dir
 	+@echo "Compiling Neon"
@@ -122,13 +115,10 @@ cargo-target-dir:
 	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG

 .PHONY: neon-pg-ext-%
-neon-pg-ext-%: postgres-install-% cargo-target-dir
+neon-pg-ext-%: postgres-install-%
 	+@echo "Compiling neon-specific Postgres extensions for $*"
 	mkdir -p $(BUILD_DIR)/pgxn-$*
-	$(MAKE) PG_CONFIG="$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config" COPT='$(COPT)' \
-		NEON_CARGO_ARTIFACT_TARGET_DIR="$(NEON_CARGO_ARTIFACT_TARGET_DIR)" \
-		CARGO_BUILD_FLAGS="$(CARGO_BUILD_FLAGS)" \
-		CARGO_PROFILE="$(CARGO_PROFILE)" \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(BUILD_DIR)/pgxn-$*\
 		-f $(ROOT_PROJECT_DIR)/pgxn/Makefile  install

--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -1636,14 +1636,11 @@ RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
 # compile neon extensions
 #
 #########################################################################################
-FROM pg-build-with-cargo AS neon-ext-build
+FROM pg-build AS neon-ext-build
 ARG PG_VERSION

-USER root
-COPY . .
-
-RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute \
-      BUILD_TYPE=release CARGO_BUILD_FLAGS="--locked --release" NEON_CARGO_ARTIFACT_TARGET_DIR="$(pwd)/target/release"
+COPY pgxn/ pgxn/
+RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute

 #########################################################################################
 #
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -29,8 +29,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
-use tokio::task::JoinHandle;
-use tokio::{spawn, time};
+use tokio::spawn;
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
 use utils::id::{TenantId, TimelineId};
@@ -108,8 +107,6 @@ pub struct ComputeNodeParams {
    pub installed_extensions_collection_interval: Arc<AtomicU64>,
 }

-type TaskHandle = Mutex<Option<JoinHandle<()>>>;
-
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub params: ComputeNodeParams,
@@ -132,8 +129,7 @@ pub struct ComputeNode {
    pub compute_ctl_config: ComputeCtlConfig,

    /// Handle to the extension stats collection task
-    extension_stats_task: TaskHandle,
-    lfc_offload_task: TaskHandle,
+    extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
 }

 // store some metrics about download size that might impact startup time
@@ -372,7 +368,7 @@ fn maybe_cgexec(cmd: &str) -> Command {

 struct PostgresHandle {
    postgres: std::process::Child,
-    log_collector: JoinHandle<Result<()>>,
+    log_collector: tokio::task::JoinHandle<Result<()>>,
 }

 impl PostgresHandle {
@@ -386,7 +382,7 @@ struct StartVmMonitorResult {
    #[cfg(target_os = "linux")]
    token: tokio_util::sync::CancellationToken,
    #[cfg(target_os = "linux")]
-    vm_monitor: Option<JoinHandle<Result<()>>>,
+    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
 }

 impl ComputeNode {
@@ -437,7 +433,6 @@ impl ComputeNode {
            ext_download_progress: RwLock::new(HashMap::new()),
            compute_ctl_config: config.compute_ctl_config,
            extension_stats_task: Mutex::new(None),
-            lfc_offload_task: Mutex::new(None),
        })
    }

@@ -525,8 +520,8 @@ impl ComputeNode {
            None
        };

+        // Terminate the extension stats collection task
        this.terminate_extension_stats_task();
-        this.terminate_lfc_offload_task();

        // Terminate the vm_monitor so it releases the file watcher on
        // /sys/fs/cgroup/neon-postgres.
@@ -856,15 +851,12 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

+        // Spawn the extension stats background task
        self.spawn_extension_stats_task();

        if pspec.spec.autoprewarm {
-            info!("autoprewarming on startup as requested");
            self.prewarm_lfc(None);
        }
-        if let Some(seconds) = pspec.spec.offload_lfc_interval_seconds {
-            self.spawn_lfc_offload_task(Duration::from_secs(seconds.into()));
-        };
        Ok(())
    }

@@ -2365,7 +2357,10 @@ LIMIT 100",
    }

    pub fn spawn_extension_stats_task(&self) {
-        self.terminate_extension_stats_task();
+        // Cancel any existing task
+        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
+            handle.abort();
+        }

        let conf = self.tokio_conn_conf.clone();
        let atomic_interval = self.params.installed_extensions_collection_interval.clone();
@@ -2401,30 +2396,8 @@ LIMIT 100",
    }

    fn terminate_extension_stats_task(&self) {
-        if let Some(h) = self.extension_stats_task.lock().unwrap().take() {
-            h.abort()
-        }
-    }
-
-    pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
-        self.terminate_lfc_offload_task();
-        let secs = interval.as_secs();
-        info!("spawning lfc offload worker with {secs}s interval");
-        let this = self.clone();
-        let handle = spawn(async move {
-            let mut interval = time::interval(interval);
-            interval.tick().await; // returns immediately
-            loop {
-                interval.tick().await;
-                this.offload_lfc_async().await;
-            }
-        });
-        *self.lfc_offload_task.lock().unwrap() = Some(handle);
-    }
-
-    fn terminate_lfc_offload_task(&self) {
-        if let Some(h) = self.lfc_offload_task.lock().unwrap().take() {
-            h.abort()
+        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
+            handle.abort();
        }
    }

--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -5,7 +5,6 @@ use compute_api::responses::LfcOffloadState;
 use compute_api::responses::LfcPrewarmState;
 use http::StatusCode;
 use reqwest::Client;
-use std::mem::replace;
 use std::sync::Arc;
 use tokio::{io::AsyncReadExt, spawn};
 use tracing::{error, info};
@@ -89,15 +88,17 @@ impl ComputeNode {
        self.state.lock().unwrap().lfc_offload_state.clone()
    }

-    /// If there is a prewarm request ongoing, return false, true otherwise
+    /// Returns false if there is a prewarm request ongoing, true otherwise
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
+        crate::metrics::LFC_PREWARM_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
-            if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
+            if let LfcPrewarmState::Prewarming =
+                std::mem::replace(state, LfcPrewarmState::Prewarming)
+            {
                return false;
            }
        }
-        crate::metrics::LFC_PREWARMS.inc();

        let cloned = self.clone();
        spawn(async move {
@@ -151,39 +152,30 @@ impl ComputeNode {
            .map(|_| ())
    }

-    /// If offload request is ongoing, return false, true otherwise
+    /// Returns false if there is an offload request ongoing, true otherwise
    pub fn offload_lfc(self: &Arc<Self>) -> bool {
+        crate::metrics::LFC_OFFLOAD_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+            if let LfcOffloadState::Offloading =
+                std::mem::replace(state, LfcOffloadState::Offloading)
+            {
                return false;
            }
        }
+
        let cloned = self.clone();
-        spawn(async move { cloned.offload_lfc_with_state_update().await });
-        true
-    }
-
-    pub async fn offload_lfc_async(self: &Arc<Self>) {
-        {
-            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+        spawn(async move {
+            let Err(err) = cloned.offload_lfc_impl().await else {
+                cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
                return;
-            }
-        }
-        self.offload_lfc_with_state_update().await
-    }
-
-    async fn offload_lfc_with_state_update(&self) {
-        crate::metrics::LFC_OFFLOADS.inc();
-        let Err(err) = self.offload_lfc_impl().await else {
-            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
-            return;
-        };
-        error!(%err);
-        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
-            error: err.to_string(),
-        };
+            };
+            error!(%err);
+            cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
+                error: err.to_string(),
+            };
+        });
+        true
    }

    async fn offload_lfc_impl(&self) -> Result<()> {
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -97,18 +97,20 @@ pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::
    .expect("failed to define a metric")
 });

-pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
+/// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm.
+/// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
+pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_prewarms_total",
-        "Total number of LFC prewarms requested by compute_ctl or autoprewarm option",
+        "compute_ctl_lfc_prewarm_requests_total",
+        "Total number of LFC prewarm requests made by compute_ctl",
    )
    .expect("failed to define a metric")
 });

-pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
+pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_offloads_total",
-        "Total number of LFC offloads requested by compute_ctl or lfc_offload_period_seconds option",
+        "compute_ctl_lfc_offload_requests_total",
+        "Total number of LFC offload requests made by compute_ctl",
    )
    .expect("failed to define a metric")
 });
@@ -122,7 +124,7 @@ pub fn collect() -> Vec<MetricFamily> {
    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
    metrics.extend(PG_CURR_DOWNTIME_MS.collect());
    metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
-    metrics.extend(LFC_PREWARMS.collect());
-    metrics.extend(LFC_OFFLOADS.collect());
+    metrics.extend(LFC_PREWARM_REQUESTS.collect());
+    metrics.extend(LFC_OFFLOAD_REQUESTS.collect());
    metrics
 }
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -31,7 +31,6 @@ mod pg_helpers_tests {
 wal_level = logical
 hot_standby = on
 autoprewarm = off
-offload_lfc_interval_seconds = 20
 neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
 wal_log_hints = on
 log_connections = on
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -675,16 +675,6 @@ struct EndpointStartCmdArgs {
    #[arg(default_value = "90s")]
    start_timeout: Duration,

-    #[clap(
-        long,
-        help = "Download LFC cache from endpoint storage on endpoint startup",
-        default_value = "false"
-    )]
-    autoprewarm: bool,
-
-    #[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
-    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-
    #[clap(
        long,
        help = "Run in development mode, skipping VM-specific operations like process termination",
@@ -1595,24 +1585,22 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            let endpoint_storage_token = env.generate_auth_token(&claims)?;
            let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();

-            let args = control_plane::endpoint::EndpointStartArgs {
-                auth_token,
-                endpoint_storage_token,
-                endpoint_storage_addr,
-                safekeepers_generation,
-                safekeepers,
-                pageservers,
-                remote_ext_base_url: remote_ext_base_url.clone(),
-                shard_stripe_size: stripe_size.0 as usize,
-                create_test_user: args.create_test_user,
-                start_timeout: args.start_timeout,
-                autoprewarm: args.autoprewarm,
-                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
-                dev: args.dev,
-            };
-
            println!("Starting existing endpoint {endpoint_id}...");
-            endpoint.start(args).await?;
+            endpoint
+                .start(
+                    &auth_token,
+                    endpoint_storage_token,
+                    endpoint_storage_addr,
+                    safekeepers_generation,
+                    safekeepers,
+                    pageservers,
+                    remote_ext_base_url.as_ref(),
+                    stripe_size.0 as usize,
+                    args.create_test_user,
+                    args.start_timeout,
+                    args.dev,
+                )
+                .await?;
        }
        EndpointCmd::Reconfigure(args) => {
            let endpoint_id = &args.endpoint_id;
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -373,22 +373,6 @@ impl std::fmt::Display for EndpointTerminateMode {
    }
 }

-pub struct EndpointStartArgs {
-    pub auth_token: Option<String>,
-    pub endpoint_storage_token: String,
-    pub endpoint_storage_addr: String,
-    pub safekeepers_generation: Option<SafekeeperGeneration>,
-    pub safekeepers: Vec<NodeId>,
-    pub pageservers: Vec<(PageserverProtocol, Host, u16)>,
-    pub remote_ext_base_url: Option<String>,
-    pub shard_stripe_size: usize,
-    pub create_test_user: bool,
-    pub start_timeout: Duration,
-    pub autoprewarm: bool,
-    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-    pub dev: bool,
-}
-
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -693,7 +677,21 @@ impl Endpoint {
        })
    }

-    pub async fn start(&self, args: EndpointStartArgs) -> Result<()> {
+    #[allow(clippy::too_many_arguments)]
+    pub async fn start(
+        &self,
+        auth_token: &Option<String>,
+        endpoint_storage_token: String,
+        endpoint_storage_addr: String,
+        safekeepers_generation: Option<SafekeeperGeneration>,
+        safekeepers: Vec<NodeId>,
+        pageservers: Vec<(PageserverProtocol, Host, u16)>,
+        remote_ext_base_url: Option<&String>,
+        shard_stripe_size: usize,
+        create_test_user: bool,
+        start_timeout: Duration,
+        dev: bool,
+    ) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
        }
@@ -706,10 +704,10 @@ impl Endpoint {
            std::fs::remove_dir_all(self.pgdata())?;
        }

-        let pageserver_connstring = Self::build_pageserver_connstr(&args.pageservers);
+        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
        assert!(!pageserver_connstring.is_empty());

-        let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;
+        let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;

        // check for file remote_extensions_spec.json
        // if it is present, read it and pass to compute_ctl
@@ -737,7 +735,7 @@ impl Endpoint {
                    cluster_id: None, // project ID: not used
                    name: None,       // project name: not used
                    state: None,
-                    roles: if args.create_test_user {
+                    roles: if create_test_user {
                        vec![Role {
                            name: PgIdent::from_str("test").unwrap(),
                            encrypted_password: None,
@@ -746,7 +744,7 @@ impl Endpoint {
                    } else {
                        Vec::new()
                    },
-                    databases: if args.create_test_user {
+                    databases: if create_test_user {
                        vec![Database {
                            name: PgIdent::from_str("neondb").unwrap(),
                            owner: PgIdent::from_str("test").unwrap(),
@@ -768,21 +766,20 @@ impl Endpoint {
                endpoint_id: Some(self.endpoint_id.clone()),
                mode: self.mode,
                pageserver_connstring: Some(pageserver_connstring),
-                safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
+                safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
                safekeeper_connstrings,
-                storage_auth_token: args.auth_token.clone(),
+                storage_auth_token: auth_token.clone(),
                remote_extensions,
                pgbouncer_settings: None,
-                shard_stripe_size: Some(args.shard_stripe_size),
+                shard_stripe_size: Some(shard_stripe_size),
                local_proxy_config: None,
                reconfigure_concurrency: self.reconfigure_concurrency,
                drop_subscriptions_before_start: self.drop_subscriptions_before_start,
                audit_log_level: ComputeAudit::Disabled,
                logs_export_host: None::<String>,
-                endpoint_storage_addr: Some(args.endpoint_storage_addr),
-                endpoint_storage_token: Some(args.endpoint_storage_token),
-                autoprewarm: args.autoprewarm,
-                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
+                endpoint_storage_addr: Some(endpoint_storage_addr),
+                endpoint_storage_token: Some(endpoint_storage_token),
+                autoprewarm: false,
                suspend_timeout_seconds: -1, // Only used in neon_local.
            };

@@ -794,7 +791,7 @@ impl Endpoint {
                debug!("spec.cluster {:?}", spec.cluster);

                // fill missing fields again
-                if args.create_test_user {
+                if create_test_user {
                    spec.cluster.roles.push(Role {
                        name: PgIdent::from_str("test").unwrap(),
                        encrypted_password: None,
@@ -829,7 +826,7 @@ impl Endpoint {
        // Launch compute_ctl
        let conn_str = self.connstr("cloud_admin", "postgres");
        println!("Starting postgres node at '{conn_str}'");
-        if args.create_test_user {
+        if create_test_user {
            let conn_str = self.connstr("test", "neondb");
            println!("Also at '{conn_str}'");
        }
@@ -861,11 +858,11 @@ impl Endpoint {
        .stderr(logfile.try_clone()?)
        .stdout(logfile);

-        if let Some(remote_ext_base_url) = args.remote_ext_base_url {
-            cmd.args(["--remote-ext-base-url", &remote_ext_base_url]);
+        if let Some(remote_ext_base_url) = remote_ext_base_url {
+            cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
        }

-        if args.dev {
+        if dev {
            cmd.arg("--dev");
        }

@@ -897,11 +894,10 @@ impl Endpoint {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
-                            let timeout = args.start_timeout;
-                            if Instant::now().duration_since(start_at) > timeout {
+                            if Instant::now().duration_since(start_at) > start_timeout {
                                bail!(
                                    "compute startup timed out {:?}; still in Init state",
-                                    timeout
+                                    start_timeout
                                );
                            }
                            // keep retrying
@@ -929,10 +925,9 @@ impl Endpoint {
                    }
                }
                Err(e) => {
-                    if Instant::now().duration_since(start_at) > args.start_timeout {
+                    if Instant::now().duration_since(start_at) > start_timeout {
                        return Err(e).context(format!(
-                            "timed out {:?} waiting to connect to compute_ctl HTTP",
-                            args.start_timeout
+                            "timed out {start_timeout:?} waiting to connect to compute_ctl HTTP",
                        ));
                    }
                }
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -65,27 +65,12 @@ enum Command {
        #[arg(long)]
        scheduling: Option<NodeSchedulingPolicy>,
    },
-    /// Exists for backup usage and will be removed in future.
-    /// Use [`Command::NodeStartDelete`] instead, if possible.
+    // Set a node status as deleted.
    NodeDelete {
        #[arg(long)]
        node_id: NodeId,
    },
-    /// Start deletion of the specified pageserver.
-    NodeStartDelete {
-        #[arg(long)]
-        node_id: NodeId,
-    },
-    /// Cancel deletion of the specified pageserver and wait for `timeout`
-    /// for the operation to be canceled. May be retried.
-    NodeCancelDelete {
-        #[arg(long)]
-        node_id: NodeId,
-        #[arg(long)]
-        timeout: humantime::Duration,
-    },
    /// Delete a tombstone of node from the storage controller.
-    /// This is used when we want to allow the node to be re-registered.
    NodeDeleteTombstone {
        #[arg(long)]
        node_id: NodeId,
@@ -927,43 +912,10 @@ async fn main() -> anyhow::Result<()> {
                .await?;
        }
        Command::NodeDelete { node_id } => {
-            eprintln!("Warning: This command is obsolete and will be removed in a future version");
-            eprintln!("Use `NodeStartDelete` instead, if possible");
            storcon_client
                .dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
                .await?;
        }
-        Command::NodeStartDelete { node_id } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::PUT,
-                    format!("control/v1/node/{node_id}/delete"),
-                    None,
-                )
-                .await?;
-            println!("Delete started for {node_id}");
-        }
-        Command::NodeCancelDelete { node_id, timeout } => {
-            storcon_client
-                .dispatch::<(), ()>(
-                    Method::DELETE,
-                    format!("control/v1/node/{node_id}/delete"),
-                    None,
-                )
-                .await?;
-
-            println!("Waiting for node {node_id} to quiesce on scheduling policy ...");
-
-            let final_policy =
-                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {
-                    !matches!(sched, NodeSchedulingPolicy::Deleting)
-                })
-                .await?;
-
-            println!(
-                "Delete was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
-            );
-        }
        Command::NodeDeleteTombstone { node_id } => {
            storcon_client
                .dispatch::<(), ()>(
--- a/docs/rfcs/035-safekeeper-dynamic-membership-change.md
+++ b/docs/rfcs/035-safekeeper-dynamic-membership-change.md
@@ -20,7 +20,7 @@ In our case consensus leader is compute (walproposer), and we don't want to wake
 up all computes for the change. Neither we want to fully reimplement the leader
 logic second time outside compute. Because of that the proposed algorithm relies
 for issuing configurations on the external fault tolerant (distributed) strongly
-consistent storage with simple API: CAS (compare-and-swap) on the single key.
+consisent storage with simple API: CAS (compare-and-swap) on the single key.
 Properly configured postgres suits this.

 In the system consensus is implemented at the timeline level, so algorithm below
@@ -34,7 +34,7 @@ A configuration is

 ```
 struct Configuration {
-    generation: SafekeeperGeneration, // a number uniquely identifying configuration
+    generation: Generation, // a number uniquely identifying configuration
    sk_set: Vec<NodeId>, // current safekeeper set
    new_sk_set: Optional<Vec<NodeId>>,
 }
@@ -81,11 +81,11 @@ configuration generation in them is less than its current one. Namely, it
 refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
 response it sends its current configuration generation to let walproposer know.

-Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/membership`
-accepting `Configuration`. Safekeeper switches to the given conf if it is higher than its
+Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
+accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
 current one and ignores it otherwise. In any case it replies with
 ```
-struct TimelineMembershipSwitchResponse {
+struct ConfigurationSwitchResponse {
    conf: Configuration,
    term: Term,
    last_log_term: Term,
@@ -108,7 +108,7 @@ establishes this configuration as its own and moves to voting.
 It should stop talking to safekeepers not listed in the configuration at this
 point, though it is not unsafe to continue doing so.

-To be elected it must receive votes from both majorities if `new_sk_set` is present.
+To be elected it must receive votes from both majorites if `new_sk_set` is present.
 Similarly, to commit WAL it must receive flush acknowledge from both majorities.

 If walproposer hears from safekeeper configuration higher than his own (i.e.
@@ -130,7 +130,7 @@ storage are reachable.
 1) Fetch current timeline configuration from the configuration storage.
 2) If it is already joint one and `new_set` is different from `desired_set`
   refuse to change. However, assign join conf to (in memory) var
-   `joint_conf` and proceed to step 4 to finish the ongoing change.
+   `join_conf` and proceed to step 4 to finish the ongoing change.
 3) Else, create joint `joint_conf: Configuration`: increment current conf number
   `n` and put `desired_set` to `new_sk_set`. Persist it in the configuration
   storage by doing CAS on the current generation: change happens only if
@@ -161,11 +161,11 @@ storage are reachable.
   because `pull_timeline` already includes it and plus additionally would be
   broadcast by compute. More importantly, we may proceed to the next step
   only when `<last_log_term, flush_lsn>` on the majority of the new set reached
-   `sync_position`. Similarly, on the happy path no waiting is needed because
+   `sync_position`. Similarly, on the happy path no waiting is not needed because
   `pull_timeline` already includes it. However, we should double
    check to be safe. For example, timeline could have been created earlier e.g.
    manually or after try-to-migrate, abort, try-to-migrate-again sequence.
-7) Create `new_conf: Configuration` incrementing `joint_conf` generation and having new
+7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
   safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
   storage under one more CAS.
 8) Call `PUT` `configuration` on safekeepers from the new set,
@@ -178,12 +178,12 @@ spec of it.

 Description above focuses on safety. To make the flow practical and live, here a few more
 considerations.
-1) It makes sense to ping new set to ensure we are migrating to live node(s) before
+1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
  step 3.
 2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
   it is safe to rollback to the old conf with one more CAS.
 3) On step 4 timeline might be already created on members of the new set for various reasons;
-   the simplest is the procedure restart. There are more complicated scenarios like mentioned
+   the simplest is the procedure restart. There are more complicated scenarious like mentioned
   in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
   generations, so seems simpler to treat existing timeline as success. However, this also
   has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
@@ -192,7 +192,7 @@ considerations.
 4) In the end timeline should be locally deleted on the safekeeper(s) which are
   in the old set but not in the new one, unless they are unreachable. To be
   safe this also should be done under generation number (deletion proceeds only if
-   current configuration is <= than one in request and safekeeper is not member of it).
+   current configuration is <= than one in request and safekeeper is not memeber of it).
 5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
   jump to step 7, using it as `new_conf`.

@@ -261,14 +261,14 @@ Timeline (branch) creation in cplane should call storage_controller POST
 Response should be augmented with `safekeepers_generation` and `safekeepers`
 fields like described in `/notify-safekeepers` above. Initially (currently)
 these fields may be absent; in this case cplane chooses safekeepers on its own
-like it currently does. The call should be retried until it succeeds.
+like it currently does. The call should be retried until succeeds.

 Timeline deletion and tenant deletion in cplane should call appropriate
 storage_controller endpoints like it currently does for sharded tenants. The
 calls should be retried until they succeed.

-When compute receives safekeeper list from control plane it needs to know the
-generation to check whether it should be updated (note that compute may get
+When compute receives safekeepers list from control plane it needs to know the
+generation to checked whether it should be updated (note that compute may get
 safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
 GUC is just a comma separates list of `host:port`. Let's prefix it with
 `g#<generation>:` to this end, so it will look like
@@ -305,8 +305,8 @@ enum MigrationRequest {
 ```

 `FinishPending` requests to run the procedure to ensure state is clean: current
-configuration is not joint and the majority of safekeepers are aware of it, but do
-not attempt to migrate anywhere. If the current configuration fetched on step 1 is
+configuration is not joint and majority of safekeepers are aware of it, but do
+not attempt to migrate anywhere. If current configuration fetched on step 1 is
 not joint it jumps to step 7. It should be run at startup for all timelines (but
 similarly, in the first version it is ok to trigger it manually).

@@ -315,7 +315,7 @@ similarly, in the first version it is ok to trigger it manually).
 `safekeepers` table mirroring current `nodes` should be added, except that for
 `scheduling_policy`: it is enough to have at least in the beginning only 3
 fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
-3) `decommissioned` (node is removed).
+3) `decomissioned` (node is removed).

 `timelines` table:
 ```
@@ -326,10 +326,9 @@ table! {
        tenant_id -> Varchar,
        start_lsn -> pg_lsn,
        generation -> Int4,
-        sk_set -> Array<Int8>, // list of safekeeper ids
+        sk_set -> Array<Int4>, // list of safekeeper ids
        new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
        cplane_notified_generation -> Int4,
-        sk_set_notified_generation -> Int4, // the generation a quorum of sk_set knows about
        deleted_at -> Nullable<Timestamptz>,
    }
 }
@@ -339,23 +338,13 @@ table! {
 might also want to add ancestor_timeline_id to preserve the hierarchy, but for
 this RFC it is not needed.

-`cplane_notified_generation` and `sk_set_notified_generation` fields are used to
-track the last stage of the algorithm, when we need to notify safekeeper set and cplane
-with the final configuration after it's already committed to DB.
-
-The timeline is up-to-date (no migration in progress) if `new_sk_set` is null and
-`*_notified_generation` fields are up to date with `generation`. 
-
-It's possible to replace `*_notified_generation` with one boolean field `migration_completed`,
-but for better observability it's nice to have them separately.
-
 #### API

 Node management is similar to pageserver:
-1) POST `/control/v1/safekeeper` inserts safekeeper.
-2) GET `/control/v1/safekeeper` lists safekeepers.
-3) GET `/control/v1/safekeeper/:node_id` gets safekeeper.
-4) PUT `/control/v1/safekeper/:node_id/scheduling_policy` changes status to e.g.
+1) POST `/control/v1/safekeepers` inserts safekeeper.
+2) GET `/control/v1/safekeepers` lists safekeepers.
+3) GET `/control/v1/safekeepers/:node_id` gets safekeeper.
+4) PUT `/control/v1/safekepers/:node_id/status` changes status to e.g.
   `offline` or `decomissioned`. Initially it is simpler not to schedule any
    migrations here.

@@ -379,8 +368,8 @@ Migration API: the first version is the simplest and the most imperative:
 all timelines from one safekeeper to another. It accepts json
 ```
 {
-    "src_sk": NodeId,
-    "dst_sk": NodeId,
+    "src_sk": u32,
+    "dst_sk": u32,
    "limit": Optional<u32>,
 }
 ```
@@ -390,15 +379,12 @@ Returns list of scheduled requests.
 2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`
   to move single timeline to given set of safekeepers:
 ```
-struct TimelineSafekeeperMigrateRequest {
-    "new_sk_set": Vec<NodeId>,
+{
+    "desired_set": Vec<u32>,
 }
 ```

-In the first version the handler migrates the timeline to `new_sk_set` synchronously.
-Should be retried until success.
-
-In the future we might change it to asynchronous API and return scheduled request.
+Returns scheduled request.

 Similar call should be added for the tenant.

@@ -448,9 +434,6 @@ table! {
 }
 ```

-We load all pending ops from the table on startup into the memory.
-The table is needed only to preserve the state between restarts.
-
 `op_type` can be `include` (seed from peers and ensure generation is up to
 date), `exclude` (remove locally) and `delete`. Field is actually not strictly
 needed as it can be computed from current configuration, but gives more explicit
@@ -491,7 +474,7 @@ actions must be idempotent. Now, a tricky point here is timeline start LSN. For
 the initial (tenant creation) call cplane doesn't know it. However, setting
 start_lsn on safekeepers during creation is a good thing -- it provides a
 guarantee that walproposer can always find a common point in WAL histories of
-safekeeper and its own, and so absence of it would be a clear sign of
+safekeeper and its own, and so absense of it would be a clear sign of
 corruption. The following sequence works:
 1) Create timeline (or observe that it exists) on pageserver,
   figuring out last_record_lsn in response.
@@ -514,9 +497,11 @@ corruption. The following sequence works:
   retries the call until 200 response.

   There is a small question how request handler (timeline creation in this
-   case) would interact with per sk reconciler. In the current implementation
-   we first persist the request in the DB, and then send an in-memory request
-   to each safekeeper reconciler to process it.
+   case) would interact with per sk reconciler. As always I prefer to do the
+   simplest possible thing and here it seems to be just waking it up so it
+   re-reads the db for work to do. Passing work in memory is faster, but
+   that shouldn't matter, and path to scan db for work will exist anyway, 
+   simpler to reuse it.

 For pg version / wal segment size: while we may persist them in `timelines`
 table, it is not necessary as initial creation at step 3 can take them from
@@ -524,40 +509,30 @@ pageserver or cplane creation call and later pull_timeline will carry them
 around.

 Timeline migration.
-1) CAS to the db to create joint conf. Since this moment the migration is considered to be 
-   "in progress". We can detect all "in-progress" migrations looking into the database.
-2) Do steps 4-6 from the algorithm, including `pull_timeline` onto `new_sk_set`, update membership
-   configuration on all safekeepers, notify cplane, etc. All operations are idempotent,
-   so we don't need to persist anything in the database at this stage. If any errors occur,
-   it's safe to retry or abort the migration.
-3) Once it becomes possible per alg description above, get out of joint conf
-   with another CAS. Also should insert `exclude` entries into `safekeeper_timeline_pending_ops`
-   in the same DB transaction. Adding `exclude` entries atomically is nesessary because after
-   CAS we don't have the list of excluded safekeepers in the `timelines` table anymore, but we
-   need to have them persisted somewhere in case the migration is interrupted right after the CAS.
-4) Finish the migration. The final membership configuration is committed to the DB at this stage.
-   So, the migration can not be aborted anymore. But it can still be retried if the migration fails
-   past stage 3. To finish the migration we need to send the new membership configuration to
-   a new quorum of safekeepers, notify cplane with the new safekeeper list and schedule the `exclude`
-   requests to in-memory queue for safekeeper reconciler. If the algrorithm is retried, it's
-   possible that we have already committed `exclude` requests to DB, but didn't send them to
-   the in-memory queue. In this case we need to read them from `safekeeper_timeline_pending_ops`
-   because it's the only place where they are persistent. The fields `sk_set_notified_generation`
-   and `cplane_notified_generation` are updated after each step. The migration is considered
-   fully completed when they match the `generation` field.
-
-In practice, we can report "success" after stage 3 and do the "finish" step in per-timeline
-reconciler (if we implement it). But it's wise to at least try to finish them synchronously,
-so the timeline is always in a "good state" and doesn't require an old quorum to commit
-WAL after the migration reported "success".
+1) CAS to the db to create joint conf, and in the same transaction create
+   `safekeeper_timeline_pending_ops` `include` entries to initialize new members
+   as well as deliver this conf to current ones; poke per sk reconcilers to work
+   on it. Also any conf change should also poke cplane notifier task(s).
+2) Once it becomes possible per alg description above, get out of joint conf
+   with another CAS. Task should get wakeups from per sk reconcilers because 
+   conf switch is required for advancement; however retries should be sleep
+   based as well as LSN advancement might be needed, though in happy path 
+   it isn't. To see whether further transition is possible on wakup migration
+   executor polls safekeepers per the algorithm. CAS creating new conf with only
+   new members should again insert entries to `safekeeper_timeline_pending_ops`
+   to switch them there, as well as `exclude` rows to remove timeline from 
+   old members.

 Timeline deletion: just set `deleted_at` on the timeline row and insert
 `safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
 per sk reconcilers.

-When node is removed (set to `decommissioned`), `safekeeper_timeline_pending_ops`
+When node is removed (set to `decomissioned`), `safekeeper_timeline_pending_ops`
 for it must be cleared in the same transaction.

+One more task pool should infinitely retry notifying control plane about changed
+safekeeper sets (trying making `cplane_notified_generation` equal `generation`).
+
 #### Dealing with multiple instances of storage_controller

 Operations described above executed concurrently might create some errors but do
@@ -566,7 +541,7 @@ of storage_controller it is fine to have it temporarily, e.g. during redeploy.

 To harden against some controller instance creating some work in
 `safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
-the job per sk reconcilers apart from explicit wakeups should scan for work
+the job per sk reconcilers apart from explicit wakups should scan for work
 periodically. It is possible to remove that though if all db updates are
 protected with leadership token/term -- then such scans are needed only after
 leadership is acquired.
@@ -588,7 +563,7 @@ There should be following layers of tests:
   safekeeper communication and pull_timeline need to be mocked and main switch
   procedure wrapped to as a node (thread) in simulation tests, using these
   mocks. Test would inject migrations like it currently injects
-   safekeeper/walproposer restarts. Main assert is the same -- committed WAL must
+   safekeeper/walproposer restars. Main assert is the same -- committed WAL must
   not be lost.

 3) Since simulation testing injects at relatively high level points (not
@@ -638,7 +613,7 @@ Let's have the following implementation bits for gradual rollout:
  `notify-safekeepers`.

 Then the rollout for a region would be:
- Current situation: safekeepers are chosen by control_plane.
+- Current situation: safekeepers are choosen by control_plane.
 - We manually migrate some timelines, test moving them around.
 - Then we enable `--set-safekeepers` so that all new timelines
  are on storage controller.
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -58,7 +58,7 @@ pub enum LfcPrewarmState {
    },
 }

-#[derive(Serialize, Default, Debug, Clone, PartialEq)]
+#[derive(Serialize, Default, Debug, Clone)]
 #[serde(tag = "status", rename_all = "snake_case")]
 pub enum LfcOffloadState {
    #[default]
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -181,14 +181,10 @@ pub struct ComputeSpec {
    /// JWT for authorizing requests to endpoint storage service
    pub endpoint_storage_token: Option<String>,

+    /// Download LFC state from endpoint_storage and pass it to Postgres on startup
    #[serde(default)]
-    /// Download LFC state from endpoint storage and pass it to Postgres on compute startup
    pub autoprewarm: bool,

-    #[serde(default)]
-    /// Upload LFC state to endpoint storage periodically. Default value (None) means "don't upload"
-    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
-
    /// Suspend timeout in seconds.
    ///
    /// We use this value to derive other values, such as the installed extensions metric.
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -90,11 +90,6 @@
                "value": "off",
                "vartype": "bool"
            },
-            {
-                "name": "offload_lfc_interval_seconds",
-                "value": "20",
-                "vartype": "integer"
-            },
            {
                "name": "neon.safekeepers",
                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -386,7 +386,6 @@ pub enum NodeSchedulingPolicy {
    Pause,
    PauseForRestart,
    Draining,
-    Deleting,
 }

 impl FromStr for NodeSchedulingPolicy {
@@ -399,7 +398,6 @@ impl FromStr for NodeSchedulingPolicy {
            "pause" => Ok(Self::Pause),
            "pause_for_restart" => Ok(Self::PauseForRestart),
            "draining" => Ok(Self::Draining),
-            "deleting" => Ok(Self::Deleting),
            _ => Err(anyhow::anyhow!("Unknown scheduling state '{s}'")),
        }
    }
@@ -414,7 +412,6 @@ impl From<NodeSchedulingPolicy> for String {
            Pause => "pause",
            PauseForRestart => "pause_for_restart",
            Draining => "draining",
-            Deleting => "deleting",
        }
        .to_string()
    }
--- a/libs/proxy/json/Cargo.toml
+++ b/libs/proxy/json/Cargo.toml
@@ -1,12 +0,0 @@
-[package]
-name = "json"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-ryu = "1"
-itoa = "1"
-
-[dev-dependencies]
-futures = "0.3"
--- a/libs/proxy/json/src/lib.rs
+++ b/libs/proxy/json/src/lib.rs
@@ -1,412 +0,0 @@
-//! A JSON serialization lib, designed for more flexibility than `serde_json` offers.
-//!
-//! Features:
-//!
-//! ## Dynamic construction
-//!
-//! Sometimes you have dynamic values you want to serialize, that are not already in a serde-aware model like a struct or a Vec etc.
-//! To achieve this with serde, you need to implement a lot of different traits on a lot of different new-types.
-//! Because of this, it's often easier to give-in and pull all the data into a serde-aware model (`serde_json::Value` or some intermediate struct),
-//! but that is often not very efficient.
-//!
-//! This crate allows full control over the JSON encoding without needing to implement any extra traits. Just call the
-//! relevant functions, and it will guarantee a correctly encoded JSON value.
-//!
-//! ## Async construction
-//!
-//! Similar to the above, sometimes the values arrive asynchronously. Often collecting those values in memory
-//! is more expensive than writing them as JSON, since the overheads of `Vec` and `String` is much higher, however
-//! there are exceptions.
-//!
-//! Serializing to JSON all in one go is also more CPU intensive and can cause lag spikes,
-//! whereas serializing values incrementally spreads out the CPU load and reduces lag.
-//!
-//! ## Examples
-//!
-//! To represent the following JSON as a compact string
-//!
-//! ```json
-//! {
-//!   "results": {
-//!     "rows": [
-//!       {
-//!         "id": 1,
-//!         "value": null
-//!       },
-//!       {
-//!         "id": 2,
-//!         "value": "hello"
-//!       }
-//!     ]
-//!   }
-//! }
-//! ```
-//!
-//! We can use the following code:
-//!
-//! ```
-//! // create the outer object
-//! let s = json::value_to_string!(|v| json::value_as_object!(|v| {
-//!     // create an entry with key "results" and start an object value associated with it.
-//!     let results = v.key("results");
-//!     json::value_as_object!(|results| {
-//!         // create an entry with key "rows" and start an list value associated with it.
-//!         let rows = results.key("rows");
-//!         json::value_as_list!(|rows| {
-//!             // create a list entry and start an object value associated with it.
-//!             let row = rows.entry();
-//!             json::value_as_object!(|row| {
-//!                 // add entry "id": 1
-//!                 row.entry("id", 1);
-//!                 // add entry "value": null
-//!                 row.entry("value", json::Null);
-//!             });
-//!
-//!             // create a list entry and start an object value associated with it.
-//!             let row = rows.entry();
-//!             json::value_as_object!(|row| {
-//!                 // add entry "id": 2
-//!                 row.entry("id", 2);
-//!                 // add entry "value": "hello"
-//!                 row.entry("value", "hello");
-//!             });
-//!         });
-//!     });
-//! }));
-//!
-//! assert_eq!(s, r#"{"results":{"rows":[{"id":1,"value":null},{"id":2,"value":"hello"}]}}"#);
-//! ```
-
-mod macros;
-mod str;
-mod value;
-
-pub use value::{Null, ValueEncoder};
-
-#[must_use]
-/// Serialize a single json value.
-pub struct ValueSer<'buf> {
-    buf: &'buf mut Vec<u8>,
-    start: usize,
-}
-
-impl<'buf> ValueSer<'buf> {
-    /// Create a new json value serializer.
-    pub fn new(buf: &'buf mut Vec<u8>) -> Self {
-        Self { buf, start: 0 }
-    }
-
-    /// Borrow the underlying buffer
-    pub fn as_buffer(&self) -> &[u8] {
-        self.buf
-    }
-
-    #[inline]
-    pub fn value(self, e: impl ValueEncoder) {
-        e.encode(self);
-    }
-
-    /// Write raw bytes to the buf. This must be already JSON encoded.
-    #[inline]
-    pub fn write_raw_json(self, data: &[u8]) {
-        self.buf.extend_from_slice(data);
-        self.finish();
-    }
-
-    /// Start a new object serializer.
-    #[inline]
-    pub fn object(self) -> ObjectSer<'buf> {
-        ObjectSer::new(self)
-    }
-
-    /// Start a new list serializer.
-    #[inline]
-    pub fn list(self) -> ListSer<'buf> {
-        ListSer::new(self)
-    }
-
-    /// Finish the value ser.
-    #[inline]
-    fn finish(self) {
-        // don't trigger the drop handler which triggers a rollback.
-        // this won't cause memory leaks because `ValueSet` owns no allocations.
-        std::mem::forget(self);
-    }
-}
-
-impl Drop for ValueSer<'_> {
-    fn drop(&mut self) {
-        self.buf.truncate(self.start);
-    }
-}
-
-#[must_use]
-/// Serialize a json object.
-pub struct ObjectSer<'buf> {
-    value: ValueSer<'buf>,
-    start: usize,
-}
-
-impl<'buf> ObjectSer<'buf> {
-    /// Start a new object serializer.
-    #[inline]
-    pub fn new(value: ValueSer<'buf>) -> Self {
-        value.buf.push(b'{');
-        let start = value.buf.len();
-        Self { value, start }
-    }
-
-    /// Borrow the underlying buffer
-    pub fn as_buffer(&self) -> &[u8] {
-        self.value.as_buffer()
-    }
-
-    /// Start a new object entry with the given string key, returning a [`ValueSer`] for the associated value.
-    #[inline]
-    pub fn key(&mut self, key: impl KeyEncoder) -> ValueSer<'_> {
-        key.write_key(self)
-    }
-
-    /// Write an entry (key-value pair) to the object.
-    #[inline]
-    pub fn entry(&mut self, key: impl KeyEncoder, val: impl ValueEncoder) {
-        self.key(key).value(val);
-    }
-
-    #[inline]
-    fn entry_inner(&mut self, f: impl FnOnce(&mut Vec<u8>)) -> ValueSer<'_> {
-        // track before the separator so we the value is rolled back it also removes the separator.
-        let start = self.value.buf.len();
-
-        // push separator if necessary
-        if self.value.buf.len() > self.start {
-            self.value.buf.push(b',');
-        }
-        // push key
-        f(self.value.buf);
-        // push value separator
-        self.value.buf.push(b':');
-
-        // return value writer.
-        ValueSer {
-            buf: self.value.buf,
-            start,
-        }
-    }
-
-    /// Reset the buffer back to before this object was started.
-    #[inline]
-    pub fn rollback(self) -> ValueSer<'buf> {
-        // Do not fully reset the value, only reset it to before the `{`.
-        // This ensures any `,` before this value are not clobbered.
-        self.value.buf.truncate(self.start - 1);
-        self.value
-    }
-
-    /// Finish the object ser.
-    #[inline]
-    pub fn finish(self) {
-        self.value.buf.push(b'}');
-        self.value.finish();
-    }
-}
-
-pub trait KeyEncoder {
-    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a>;
-}
-
-#[must_use]
-/// Serialize a json object.
-pub struct ListSer<'buf> {
-    value: ValueSer<'buf>,
-    start: usize,
-}
-
-impl<'buf> ListSer<'buf> {
-    /// Start a new list serializer.
-    #[inline]
-    pub fn new(value: ValueSer<'buf>) -> Self {
-        value.buf.push(b'[');
-        let start = value.buf.len();
-        Self { value, start }
-    }
-
-    /// Borrow the underlying buffer
-    pub fn as_buffer(&self) -> &[u8] {
-        self.value.as_buffer()
-    }
-
-    /// Write an value to the list.
-    #[inline]
-    pub fn push(&mut self, val: impl ValueEncoder) {
-        self.entry().value(val);
-    }
-
-    /// Start a new value entry in this list.
-    #[inline]
-    pub fn entry(&mut self) -> ValueSer<'_> {
-        // track before the separator so we the value is rolled back it also removes the separator.
-        let start = self.value.buf.len();
-
-        // push separator if necessary
-        if self.value.buf.len() > self.start {
-            self.value.buf.push(b',');
-        }
-
-        // return value writer.
-        ValueSer {
-            buf: self.value.buf,
-            start,
-        }
-    }
-
-    /// Reset the buffer back to before this object was started.
-    #[inline]
-    pub fn rollback(self) -> ValueSer<'buf> {
-        // Do not fully reset the value, only reset it to before the `[`.
-        // This ensures any `,` before this value are not clobbered.
-        self.value.buf.truncate(self.start - 1);
-        self.value
-    }
-
-    /// Finish the object ser.
-    #[inline]
-    pub fn finish(self) {
-        self.value.buf.push(b']');
-        self.value.finish();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::{Null, ValueSer};
-
-    #[test]
-    fn object() {
-        let mut buf = vec![];
-        let mut object = ValueSer::new(&mut buf).object();
-        object.entry("foo", "bar");
-        object.entry("baz", Null);
-        object.finish();
-
-        assert_eq!(buf, br#"{"foo":"bar","baz":null}"#);
-    }
-
-    #[test]
-    fn list() {
-        let mut buf = vec![];
-        let mut list = ValueSer::new(&mut buf).list();
-        list.entry().value("bar");
-        list.entry().value(Null);
-        list.finish();
-
-        assert_eq!(buf, br#"["bar",null]"#);
-    }
-
-    #[test]
-    fn object_macro() {
-        let res = crate::value_to_string!(|obj| {
-            crate::value_as_object!(|obj| {
-                obj.entry("foo", "bar");
-                obj.entry("baz", Null);
-            })
-        });
-
-        assert_eq!(res, r#"{"foo":"bar","baz":null}"#);
-    }
-
-    #[test]
-    fn list_macro() {
-        let res = crate::value_to_string!(|list| {
-            crate::value_as_list!(|list| {
-                list.entry().value("bar");
-                list.entry().value(Null);
-            })
-        });
-
-        assert_eq!(res, r#"["bar",null]"#);
-    }
-
-    #[test]
-    fn rollback_on_drop() {
-        let res = crate::value_to_string!(|list| {
-            crate::value_as_list!(|list| {
-                list.entry().value("bar");
-
-                'cancel: {
-                    let nested_list = list.entry();
-                    crate::value_as_list!(|nested_list| {
-                        nested_list.entry().value(1);
-
-                        assert_eq!(nested_list.as_buffer(), br#"["bar",[1"#);
-                        if true {
-                            break 'cancel;
-                        }
-                    })
-                }
-
-                assert_eq!(list.as_buffer(), br#"["bar""#);
-
-                list.entry().value(Null);
-            })
-        });
-
-        assert_eq!(res, r#"["bar",null]"#);
-    }
-
-    #[test]
-    fn rollback_object() {
-        let res = crate::value_to_string!(|obj| {
-            crate::value_as_object!(|obj| {
-                let entry = obj.key("1");
-                entry.value(1_i32);
-
-                let entry = obj.key("2");
-                let entry = {
-                    let mut nested_obj = entry.object();
-                    nested_obj.entry("foo", "bar");
-                    nested_obj.rollback()
-                };
-
-                entry.value(2_i32);
-            })
-        });
-
-        assert_eq!(res, r#"{"1":1,"2":2}"#);
-    }
-
-    #[test]
-    fn rollback_list() {
-        let res = crate::value_to_string!(|list| {
-            crate::value_as_list!(|list| {
-                let entry = list.entry();
-                entry.value(1_i32);
-
-                let entry = list.entry();
-                let entry = {
-                    let mut nested_list = entry.list();
-                    nested_list.push("foo");
-                    nested_list.rollback()
-                };
-
-                entry.value(2_i32);
-            })
-        });
-
-        assert_eq!(res, r#"[1,2]"#);
-    }
-
-    #[test]
-    fn string_escaping() {
-        let mut buf = vec![];
-        let mut object = ValueSer::new(&mut buf).object();
-
-        let key = "hello";
-        let value = "\n world";
-
-        object.entry(format_args!("{key:?}"), value);
-        object.finish();
-
-        assert_eq!(buf, br#"{"\"hello\"":"\n world"}"#);
-    }
-}
--- a/libs/proxy/json/src/macros.rs
+++ b/libs/proxy/json/src/macros.rs
@@ -1,86 +0,0 @@
-//! # Examples
-//!
-//! ```
-//! use futures::{StreamExt, TryStream, TryStreamExt};
-//!
-//! async fn stream_to_json_list<S, T, E>(mut s: S) -> Result<String, E>
-//! where
-//!     S: TryStream<Ok = T, Error = E> + Unpin,
-//!     T: json::ValueEncoder
-//! {
-//!     Ok(json::value_to_string!(|val| json::value_as_list!(|val| {
-//!         // note how we can use `.await` and `?` in here.
-//!         while let Some(value) = s.try_next().await? {
-//!             val.push(value);
-//!         }
-//!     })))
-//! }
-//!
-//! let stream = futures::stream::iter([1, 2, 3]).map(Ok::<i32, ()>);
-//! let json_string = futures::executor::block_on(stream_to_json_list(stream)).unwrap();
-//! assert_eq!(json_string, "[1,2,3]");
-//! ```
-
-/// A helper to create a new JSON vec.
-///
-/// Implemented as a macro to preserve all control flow.
-#[macro_export]
-macro_rules! value_to_vec {
-    (|$val:ident| $body:expr) => {{
-        let mut buf = vec![];
-        let $val = $crate::ValueSer::new(&mut buf);
-        let _: () = $body;
-        buf
-    }};
-}
-
-/// A helper to create a new JSON string.
-///
-/// Implemented as a macro to preserve all control flow.
-#[macro_export]
-macro_rules! value_to_string {
-    (|$val:ident| $body:expr) => {{
-        ::std::string::String::from_utf8($crate::value_to_vec!(|$val| $body))
-            .expect("json should be valid utf8")
-    }};
-}
-
-/// A helper that ensures the [`ObjectSer::finish`](crate::ObjectSer::finish) method is called on completion.
-///
-/// Consumes `$val` and assigns it as an [`ObjectSer`](crate::ObjectSer) serializer.
-/// The serializer is only 'finished' if the body completes.
-/// The serializer is rolled back if `break`/`return` escapes the body.
-///
-/// Implemented as a macro to preserve all control flow.
-#[macro_export]
-macro_rules! value_as_object {
-    (|$val:ident| $body:expr) => {{
-        let mut obj = $crate::ObjectSer::new($val);
-
-        let $val = &mut obj;
-        let res = $body;
-
-        obj.finish();
-        res
-    }};
-}
-
-/// A helper that ensures the [`ListSer::finish`](crate::ListSer::finish) method is called on completion.
-///
-/// Consumes `$val` and assigns it as an [`ListSer`](crate::ListSer) serializer.
-/// The serializer is only 'finished' if the body completes.
-/// The serializer is rolled back if `break`/`return` escapes the body.
-///
-/// Implemented as a macro to preserve all control flow.
-#[macro_export]
-macro_rules! value_as_list {
-    (|$val:ident| $body:expr) => {{
-        let mut list = $crate::ListSer::new($val);
-
-        let $val = &mut list;
-        let res = $body;
-
-        list.finish();
-        res
-    }};
-}
--- a/libs/proxy/json/src/str.rs
+++ b/libs/proxy/json/src/str.rs
@@ -1,166 +0,0 @@
-//! Helpers for serializing escaped strings.
-//!
-//! ## License
-//!
-//! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L1514-L1552>
-//! <https://github.com/serde-rs/json/blob/c1826ebcccb1a520389c6b78ad3da15db279220d/src/ser.rs#L2081-L2157>
-//! Licensed by David Tolnay under MIT or Apache-2.0.
-//!
-//! With modifications by Conrad Ludgate on behalf of Databricks.
-
-use std::fmt::{self, Write};
-
-/// Represents a character escape code in a type-safe manner.
-pub enum CharEscape {
-    /// An escaped quote `"`
-    Quote,
-    /// An escaped reverse solidus `\`
-    ReverseSolidus,
-    // /// An escaped solidus `/`
-    // Solidus,
-    /// An escaped backspace character (usually escaped as `\b`)
-    Backspace,
-    /// An escaped form feed character (usually escaped as `\f`)
-    FormFeed,
-    /// An escaped line feed character (usually escaped as `\n`)
-    LineFeed,
-    /// An escaped carriage return character (usually escaped as `\r`)
-    CarriageReturn,
-    /// An escaped tab character (usually escaped as `\t`)
-    Tab,
-    /// An escaped ASCII plane control character (usually escaped as
-    /// `\u00XX` where `XX` are two hex characters)
-    AsciiControl(u8),
-}
-
-impl CharEscape {
-    #[inline]
-    fn from_escape_table(escape: u8, byte: u8) -> CharEscape {
-        match escape {
-            self::BB => CharEscape::Backspace,
-            self::TT => CharEscape::Tab,
-            self::NN => CharEscape::LineFeed,
-            self::FF => CharEscape::FormFeed,
-            self::RR => CharEscape::CarriageReturn,
-            self::QU => CharEscape::Quote,
-            self::BS => CharEscape::ReverseSolidus,
-            self::UU => CharEscape::AsciiControl(byte),
-            _ => unreachable!(),
-        }
-    }
-}
-
-pub(crate) fn format_escaped_str(writer: &mut Vec<u8>, value: &str) {
-    writer.reserve(2 + value.len());
-
-    writer.push(b'"');
-
-    let rest = format_escaped_str_contents(writer, value);
-    writer.extend_from_slice(rest);
-
-    writer.push(b'"');
-}
-
-pub(crate) fn format_escaped_fmt(writer: &mut Vec<u8>, args: fmt::Arguments) {
-    writer.push(b'"');
-
-    Collect { buf: writer }
-        .write_fmt(args)
-        .expect("formatting should not error");
-
-    writer.push(b'"');
-}
-
-struct Collect<'buf> {
-    buf: &'buf mut Vec<u8>,
-}
-
-impl fmt::Write for Collect<'_> {
-    fn write_str(&mut self, s: &str) -> fmt::Result {
-        let last = format_escaped_str_contents(self.buf, s);
-        self.buf.extend(last);
-        Ok(())
-    }
-}
-
-// writes any escape sequences, and returns the suffix still needed to be written.
-fn format_escaped_str_contents<'a>(writer: &mut Vec<u8>, value: &'a str) -> &'a [u8] {
-    let bytes = value.as_bytes();
-
-    let mut start = 0;
-
-    for (i, &byte) in bytes.iter().enumerate() {
-        let escape = ESCAPE[byte as usize];
-        if escape == 0 {
-            continue;
-        }
-
-        writer.extend_from_slice(&bytes[start..i]);
-
-        let char_escape = CharEscape::from_escape_table(escape, byte);
-        write_char_escape(writer, char_escape);
-
-        start = i + 1;
-    }
-
-    &bytes[start..]
-}
-
-const BB: u8 = b'b'; // \x08
-const TT: u8 = b't'; // \x09
-const NN: u8 = b'n'; // \x0A
-const FF: u8 = b'f'; // \x0C
-const RR: u8 = b'r'; // \x0D
-const QU: u8 = b'"'; // \x22
-const BS: u8 = b'\\'; // \x5C
-const UU: u8 = b'u'; // \x00...\x1F except the ones above
-const __: u8 = 0;
-
-// Lookup table of escape sequences. A value of b'x' at index i means that byte
-// i is escaped as "\x" in JSON. A value of 0 means that byte i is not escaped.
-static ESCAPE: [u8; 256] = [
-    //   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
-    UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, // 0
-    UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, // 1
-    __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 3
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 4
-    __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, // 5
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 6
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E
-    __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F
-];
-
-fn write_char_escape(writer: &mut Vec<u8>, char_escape: CharEscape) {
-    let s = match char_escape {
-        CharEscape::Quote => b"\\\"",
-        CharEscape::ReverseSolidus => b"\\\\",
-        // CharEscape::Solidus => b"\\/",
-        CharEscape::Backspace => b"\\b",
-        CharEscape::FormFeed => b"\\f",
-        CharEscape::LineFeed => b"\\n",
-        CharEscape::CarriageReturn => b"\\r",
-        CharEscape::Tab => b"\\t",
-        CharEscape::AsciiControl(byte) => {
-            static HEX_DIGITS: [u8; 16] = *b"0123456789abcdef";
-            let bytes = &[
-                b'\\',
-                b'u',
-                b'0',
-                b'0',
-                HEX_DIGITS[(byte >> 4) as usize],
-                HEX_DIGITS[(byte & 0xF) as usize],
-            ];
-            return writer.extend_from_slice(bytes);
-        }
-    };
-
-    writer.extend_from_slice(s);
-}
--- a/libs/proxy/json/src/value.rs
+++ b/libs/proxy/json/src/value.rs
@@ -1,168 +0,0 @@
-use core::fmt;
-use std::collections::{BTreeMap, HashMap};
-
-use crate::str::{format_escaped_fmt, format_escaped_str};
-use crate::{KeyEncoder, ObjectSer, ValueSer, value_as_list, value_as_object};
-
-/// Write a value to the underlying json representation.
-pub trait ValueEncoder {
-    fn encode(self, v: ValueSer<'_>);
-}
-
-pub(crate) fn write_int(x: impl itoa::Integer, b: &mut Vec<u8>) {
-    b.extend_from_slice(itoa::Buffer::new().format(x).as_bytes());
-}
-
-pub(crate) fn write_float(x: impl ryu::Float, b: &mut Vec<u8>) {
-    b.extend_from_slice(ryu::Buffer::new().format(x).as_bytes());
-}
-
-impl<T: Copy + ValueEncoder> ValueEncoder for &T {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        T::encode(*self, v);
-    }
-}
-
-impl ValueEncoder for &str {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        format_escaped_str(v.buf, self);
-        v.finish();
-    }
-}
-
-impl ValueEncoder for fmt::Arguments<'_> {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        if let Some(s) = self.as_str() {
-            format_escaped_str(v.buf, s);
-        } else {
-            format_escaped_fmt(v.buf, self);
-        }
-        v.finish();
-    }
-}
-
-macro_rules! int {
-    [$($t:ty),*] => {
-        $(
-            impl ValueEncoder for $t {
-                #[inline]
-                fn encode(self, v: ValueSer<'_>) {
-                    write_int(self, v.buf);
-                    v.finish();
-                }
-            }
-        )*
-    };
-}
-
-int![u8, u16, u32, u64, usize, u128];
-int![i8, i16, i32, i64, isize, i128];
-
-macro_rules! float {
-    [$($t:ty),*] => {
-        $(
-            impl ValueEncoder for $t {
-                #[inline]
-                fn encode(self, v: ValueSer<'_>) {
-                    write_float(self, v.buf);
-                    v.finish();
-                }
-            }
-        )*
-    };
-}
-
-float![f32, f64];
-
-impl ValueEncoder for bool {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        v.write_raw_json(if self { b"true" } else { b"false" });
-    }
-}
-
-impl<T: ValueEncoder> ValueEncoder for Option<T> {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        match self {
-            Some(value) => value.encode(v),
-            None => Null.encode(v),
-        }
-    }
-}
-
-impl KeyEncoder for &str {
-    #[inline]
-    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a> {
-        let obj = &mut *obj;
-        obj.entry_inner(|b| format_escaped_str(b, self))
-    }
-}
-
-impl KeyEncoder for fmt::Arguments<'_> {
-    #[inline]
-    fn write_key<'a>(self, obj: &'a mut ObjectSer) -> ValueSer<'a> {
-        if let Some(key) = self.as_str() {
-            obj.entry_inner(|b| format_escaped_str(b, key))
-        } else {
-            obj.entry_inner(|b| format_escaped_fmt(b, self))
-        }
-    }
-}
-
-/// Represents the JSON null value.
-pub struct Null;
-
-impl ValueEncoder for Null {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        v.write_raw_json(b"null");
-    }
-}
-
-impl<T: ValueEncoder> ValueEncoder for Vec<T> {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        value_as_list!(|v| {
-            for t in self {
-                v.entry().value(t);
-            }
-        });
-    }
-}
-
-impl<T: Copy + ValueEncoder> ValueEncoder for &[T] {
-    #[inline]
-    fn encode(self, v: ValueSer<'_>) {
-        value_as_list!(|v| {
-            for t in self {
-                v.entry().value(t);
-            }
-        });
-    }
-}
-
-impl<K: KeyEncoder, V: ValueEncoder, S> ValueEncoder for HashMap<K, V, S> {
-    #[inline]
-    fn encode(self, o: ValueSer<'_>) {
-        value_as_object!(|o| {
-            for (k, v) in self {
-                o.entry(k, v);
-            }
-        });
-    }
-}
-
-impl<K: KeyEncoder, V: ValueEncoder> ValueEncoder for BTreeMap<K, V> {
-    #[inline]
-    fn encode(self, o: ValueSer<'_>) {
-        value_as_object!(|o| {
-            for (k, v) in self {
-                o.entry(k, v);
-            }
-        });
-    }
-}
--- a/libs/safekeeper_api/src/models.rs
+++ b/libs/safekeeper_api/src/models.rs
@@ -221,7 +221,7 @@ pub struct TimelineMembershipSwitchRequest {
 pub struct TimelineMembershipSwitchResponse {
    pub previous_conf: Configuration,
    pub current_conf: Configuration,
-    pub last_log_term: Term,
+    pub term: Term,
    pub flush_lsn: Lsn,
 }

--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -28,7 +28,6 @@ use reqwest::Url;
 use storage_broker::Uri;
 use utils::id::{NodeId, TimelineId};
 use utils::logging::{LogFormat, SecretString};
-use utils::serde_percent::Percent;

 use crate::tenant::storage_layer::inmemory_layer::IndexEntry;
 use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME};
@@ -460,16 +459,7 @@ impl PageServerConf {
            metric_collection_endpoint,
            metric_collection_bucket,
            synthetic_size_calculation_interval,
-            disk_usage_based_eviction: Some(disk_usage_based_eviction.unwrap_or(
-                DiskUsageEvictionTaskConfig {
-                    max_usage_pct: Percent::new(80).unwrap(),
-                    min_avail_bytes: 2_000_000_000,
-                    period: Duration::from_secs(60),
-                    #[cfg(feature = "testing")]
-                    mock_statvfs: None,
-                    eviction_order: Default::default(),
-                },
-            )),
+            disk_usage_based_eviction,
            test_remote_failures,
            ondemand_download_behavior_treat_error_as_warn,
            background_task_maximum_delay,
@@ -707,8 +697,6 @@ impl ConfigurableSemaphore {
 #[cfg(test)]
 mod tests {

-    use std::time::Duration;
-
    use camino::Utf8PathBuf;
    use rstest::rstest;
    use utils::id::NodeId;
@@ -810,20 +798,4 @@ mod tests {
        PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir)
            .expect("parse_and_validate");
    }
-
-    #[test]
-    fn test_config_disk_usage_based_eviction_is_valid() {
-        let input = r#"
-            control_plane_api = "http://localhost:6666"
-        "#;
-        let config_toml = toml_edit::de::from_str::<pageserver_api::config::ConfigToml>(input)
-            .expect("disk_usage_based_eviction is valid");
-        let workdir = Utf8PathBuf::from("/nonexistent");
-        let config = PageServerConf::parse_and_validate(NodeId(0), config_toml, &workdir).unwrap();
-        let disk_usage_based_eviction = config.disk_usage_based_eviction.unwrap();
-        assert_eq!(disk_usage_based_eviction.max_usage_pct.get(), 80);
-        assert_eq!(disk_usage_based_eviction.min_avail_bytes, 2_000_000_000);
-        assert_eq!(disk_usage_based_eviction.period, Duration::from_secs(60));
-        assert_eq!(disk_usage_based_eviction.eviction_order, Default::default());
-    }
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -50,7 +50,6 @@ use tokio::io::{AsyncRead, AsyncReadExt as _, AsyncWrite, AsyncWriteExt as _, Bu
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tonic::service::Interceptor as _;
-use tonic::transport::server::TcpConnectInfo;
 use tracing::*;
 use utils::auth::{Claims, Scope, SwappableJwtAuth};
 use utils::id::{TenantId, TenantTimelineId, TimelineId};
@@ -3686,15 +3685,8 @@ impl proto::PageService for GrpcPageServiceHandler {
                yield match result {
                    Ok(resp) => resp,
                    // Convert per-request errors to GetPageResponses as appropriate, or terminate
-                    // the stream with a tonic::Status. Log the error regardless, since
-                    // ObservabilityLayer can't automatically log stream errors.
-                    Err(status) => {
-                        // TODO: it would be nice if we could propagate the get_page() fields here.
-                        span.in_scope(|| {
-                            warn!("request failed with {:?}: {}", status.code(), status.message());
-                        });
-                        page_api::GetPageResponse::try_from_status(status, req_id)?.into()
-                    }
+                    // the stream with a tonic::Status.
+                    Err(err) => page_api::GetPageResponse::try_from_status(err, req_id)?.into(),
                }
            }
        };
@@ -3832,85 +3824,40 @@ impl<S: tonic::server::NamedService> tonic::server::NamedService for Observabili
    const NAME: &'static str = S::NAME; // propagate inner service name
 }

-impl<S, Req, Resp> tower::Service<http::Request<Req>> for ObservabilityLayerService<S>
+impl<S, B> tower::Service<http::Request<B>> for ObservabilityLayerService<S>
 where
-    S: tower::Service<http::Request<Req>, Response = http::Response<Resp>> + Send,
+    S: tower::Service<http::Request<B>>,
    S::Future: Send + 'static,
 {
    type Response = S::Response;
    type Error = S::Error;
    type Future = BoxFuture<'static, Result<Self::Response, Self::Error>>;

-    fn call(&mut self, mut req: http::Request<Req>) -> Self::Future {
+    fn call(&mut self, mut req: http::Request<B>) -> Self::Future {
        // Record the request start time as a request extension.
        //
        // TODO: we should start a timer here instead, but it currently requires a timeline handle
        // and SmgrQueryType, which we don't have yet. Refactor it to provide it later.
        req.extensions_mut().insert(ReceivedAt(Instant::now()));

-        // Extract the peer address and gRPC method.
-        let peer = req
-            .extensions()
-            .get::<TcpConnectInfo>()
-            .and_then(|info| info.remote_addr())
-            .map(|addr| addr.to_string())
-            .unwrap_or_default();
-
-        let method = req
-            .uri()
-            .path()
-            .split('/')
-            .nth(2)
-            .unwrap_or(req.uri().path())
-            .to_string();
-
-        // Create a basic tracing span.
+        // Create a basic tracing span. Enter the span for the current thread (to use it for inner
+        // sync code like interceptors), and instrument the future (to use it for inner async code
+        // like the page service itself).
        //
-        // Enter the span for the current thread and instrument the future. It is not sufficient to
-        // only instrument the future, since it only takes effect after the future is returned and
-        // polled, not when the inner service is called below (e.g. during interceptor execution).
+        // The instrument() call below is not sufficient. It only affects the returned future, and
+        // only takes effect when the caller polls it. Any sync code executed when we call
+        // self.inner.call() below (such as interceptors) runs outside of the returned future, and
+        // is not affected by it. We therefore have to enter the span on the current thread too.
        let span = info_span!(
            "grpc:pageservice",
-            // These will be populated by TenantMetadataInterceptor.
+            // Set by TenantMetadataInterceptor.
            tenant_id = field::Empty,
            timeline_id = field::Empty,
            shard_id = field::Empty,
-            // NB: empty fields must be listed first above. Otherwise, the field names will be
-            // clobbered when the empty fields are populated. They will be output last regardless.
-            %peer,
-            %method,
        );
        let _guard = span.enter();

-        // Construct a future for calling the inner service, but don't await it. This avoids having
-        // to clone the inner service into the future below.
-        let call = self.inner.call(req);
-
-        async move {
-            // Await the inner service call.
-            let result = call.await;
-
-            // Log gRPC error statuses. This won't include request info from handler spans, but it
-            // will catch all errors (even those emitted before handler spans are constructed). Only
-            // unary request errors are logged here, not streaming response errors.
-            if let Ok(ref resp) = result
-                && let Some(status) = tonic::Status::from_header_map(resp.headers())
-                && status.code() != tonic::Code::Ok
-            {
-                // TODO: it would be nice if we could propagate the handler span's request fields
-                // here. This could e.g. be done by attaching the request fields to
-                // tonic::Status::metadata via a proc macro.
-                warn!(
-                    "request failed with {:?}: {}",
-                    status.code(),
-                    status.message()
-                );
-            }
-
-            result
-        }
-        .instrument(span.clone())
-        .boxed()
+        Box::pin(self.inner.call(req).instrument(span.clone()))
    }

    fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2144,31 +2144,14 @@ impl Timeline {
        debug_assert_current_span_has_tenant_and_timeline_id();

        // Regardless of whether we're going to try_freeze_and_flush
-        // cancel walreceiver to stop ingesting more data asap.
-        //
-        // Note that we're accepting a race condition here where we may
-        // do the final flush below, before walreceiver observes the
-        // cancellation and exits.
-        // This means we may open a new InMemoryLayer after the final flush below.
-        // Flush loop is also still running for a short while, so, in theory, it
-        // could also make its way into the upload queue.
-        //
-        // If we wait for the shutdown of the walreceiver before moving on to the
-        // flush, then that would be avoided. But we don't do it because the
-        // walreceiver entertains reads internally, which means that it possibly
-        // depends on the download of layers. Layer download is only sensitive to
-        // the cancellation of the entire timeline, so cancelling the walreceiver
-        // will have no effect on the individual get requests.
-        // This would cause problems when there is a lot of ongoing downloads or
-        // there is S3 unavailabilities, i.e. detach, deletion, etc would hang,
-        // and we can't deallocate resources of the timeline, etc.
+        // or not, stop ingesting any more data.
        let walreceiver = self.walreceiver.lock().unwrap().take();
        tracing::debug!(
            is_some = walreceiver.is_some(),
            "Waiting for WalReceiverManager..."
        );
        if let Some(walreceiver) = walreceiver {
-            walreceiver.cancel().await;
+            walreceiver.shutdown().await;
        }
        // ... and inform any waiters for newer LSNs that there won't be any.
        self.last_record_lsn.shutdown();
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -63,6 +63,7 @@ pub struct WalReceiver {
    /// All task spawned by [`WalReceiver::start`] and its children are sensitive to this token.
    /// It's a child token of [`Timeline`] so that timeline shutdown can cancel WalReceiver tasks early for `freeze_and_flush=true`.
    cancel: CancellationToken,
+    task: tokio::task::JoinHandle<()>,
 }

 impl WalReceiver {
@@ -79,7 +80,7 @@ impl WalReceiver {
        let loop_status = Arc::new(std::sync::RwLock::new(None));
        let manager_status = Arc::clone(&loop_status);
        let cancel = timeline.cancel.child_token();
-        let _task = WALRECEIVER_RUNTIME.spawn({
+        let task = WALRECEIVER_RUNTIME.spawn({
            let cancel = cancel.clone();
            async move {
                debug_assert_current_span_has_tenant_and_timeline_id();
@@ -120,14 +121,25 @@ impl WalReceiver {
        Self {
            manager_status,
            cancel,
+            task,
        }
    }

    #[instrument(skip_all, level = tracing::Level::DEBUG)]
-    pub async fn cancel(self) {
+    pub async fn shutdown(self) {
        debug_assert_current_span_has_tenant_and_timeline_id();
        debug!("cancelling walreceiver tasks");
        self.cancel.cancel();
+        match self.task.await {
+            Ok(()) => debug!("Shutdown success"),
+            Err(je) if je.is_cancelled() => unreachable!("not used"),
+            Err(je) if je.is_panic() => {
+                // already logged by panic hook
+            }
+            Err(je) => {
+                error!("shutdown walreceiver task join error: {je}")
+            }
+        }
    }

    pub(crate) fn status(&self) -> Option<ConnectionManagerStatus> {
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -100,7 +100,6 @@ pub(super) async fn connection_manager_loop_step(
    // with other streams on this client (other connection managers). When
    // object goes out of scope, stream finishes in drop() automatically.
    let mut broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
-    let mut broker_reset_interval = tokio::time::interval(tokio::time::Duration::from_secs(30));
    debug!("Subscribed for broker timeline updates");

    loop {
@@ -157,10 +156,7 @@ pub(super) async fn connection_manager_loop_step(
            // Got a new update from the broker
            broker_update = broker_subscription.message() /* TODO: review cancellation-safety */ => {
                match broker_update {
-                    Ok(Some(broker_update)) => {
-                        broker_reset_interval.reset();
-                        connection_manager_state.register_timeline_update(broker_update);
-                    },
+                    Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
                    Err(status) => {
                        match status.code() {
                            Code::Unknown if status.message().contains("stream closed because of a broken pipe") || status.message().contains("connection reset") || status.message().contains("error reading a body from connection") => {
@@ -182,14 +178,6 @@ pub(super) async fn connection_manager_loop_step(
                }
            },

-            _ = broker_reset_interval.tick() => {
-                if wait_lsn_status.borrow().is_some() {
-                    tracing::warn!("No broker updates received for a while, but waiting for WAL. Re-setting stream ...")
-                }
-
-                broker_subscription = subscribe_for_timeline_updates(broker_client, id, cancel).await?;
-            },
-
            new_event = async {
                // Reminder: this match arm needs to be cancellation-safe.
                loop {
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -275,12 +275,20 @@ pub(super) async fn handle_walreceiver_connection(
    let copy_stream = replication_client.copy_both_simple(&query).await?;
    let mut physical_stream = pin!(ReplicationStream::new(copy_stream));

-    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint, &ctx)
-        .await
-        .map_err(|e| match e.kind {
-            crate::walingest::WalIngestErrorKind::Cancelled => WalReceiverError::Cancelled,
-            _ => WalReceiverError::Other(e.into()),
-        })?;
+    let walingest_future = WalIngest::new(timeline.as_ref(), startpoint, &ctx);
+    let walingest_res = select! {
+        walingest_res = walingest_future => walingest_res,
+        _ = cancellation.cancelled() => {
+            // We are doing reads in WalIngest::new, and those can hang as they come from the network.
+            // Timeline cancellation hits the walreceiver cancellation token before it hits the timeline global one.
+            debug!("Connection cancelled");
+            return Err(WalReceiverError::Cancelled);
+        },
+    };
+    let mut walingest = walingest_res.map_err(|e| match e.kind {
+        crate::walingest::WalIngestErrorKind::Cancelled => WalReceiverError::Cancelled,
+        _ => WalReceiverError::Other(e.into()),
+    })?;

    let (format, compression) = match protocol {
        PostgresClientProtocol::Interpreted {
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -22,8 +22,7 @@ OBJS = \
 	walproposer.o \
 	walproposer_pg.o \
 	neon_ddl_handler.o \
-	walsender_hooks.o \
-	$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a
+	walsender_hooks.o

 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
@@ -55,17 +54,6 @@ WALPROP_OBJS = \
 	neon_utils.o \
 	walproposer_compat.o

-# libcommunicator.a is built by cargo from the Rust sources under communicator/
-# subdirectory. `cargo build` also generates communicator_bindings.h.
-neon.o: communicator/communicator_bindings.h
-
-$(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h &:
-	(cd $(srcdir)/communicator && cargo build $(CARGO_BUILD_FLAGS) $(CARGO_PROFILE))
-
-# Force `cargo build` every time. Some of the Rust sources might have
-# changed.
-.PHONY: $(NEON_CARGO_ARTIFACT_TARGET_DIR)/libcommunicator.a communicator/communicator_bindings.h
-
 .PHONY: walproposer-lib
 walproposer-lib: CPPFLAGS += -DWALPROPOSER_LIB
 walproposer-lib: libwalproposer.a;
--- a/pgxn/neon/communicator/.gitignore
+++ b/pgxn/neon/communicator/.gitignore
@@ -1,2 +0,0 @@
-# generated file (with cbindgen, see build.rs)
-communicator_bindings.h
--- a/pgxn/neon/communicator/Cargo.toml
+++ b/pgxn/neon/communicator/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "communicator"
-version = "0.1.0"
-license.workspace = true
-edition.workspace = true
-
-[lib]
-crate-type = ["staticlib"]
-
-[features]
-# 'testing' feature is currently unused in the communicator, but we accept it for convenience of
-# calling build scripts, so that you can pass the same feature to all packages.
-testing = []
-
-[dependencies]
-neon-shmem.workspace = true
-workspace_hack = { version = "0.1", path = "../../../workspace_hack" }
-
-[build-dependencies]
-cbindgen.workspace = true
--- a/pgxn/neon/communicator/README.md
+++ b/pgxn/neon/communicator/README.md
@@ -1,8 +0,0 @@
-This package will evolve into a "compute-pageserver communicator"
-process and machinery. For now, it's just a dummy that doesn't do
-anything interesting, but it allows us to test the compilation and
-linking of Rust code into the Postgres extensions.
-
-At compilation time, pgxn/neon/communicator/ produces a static
-library, libcommunicator.a. It is linked to the neon.so extension
-library.
--- a/pgxn/neon/communicator/build.rs
+++ b/pgxn/neon/communicator/build.rs
@@ -1,20 +0,0 @@
-use std::env;
-
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-
-    match cbindgen::generate(crate_dir) {
-        Ok(bindings) => {
-            bindings.write_to_file("communicator_bindings.h");
-        }
-        Err(cbindgen::Error::ParseSyntaxError { .. }) => {
-            // This means there was a syntax error in the Rust sources. Don't panic, because
-            // we want the build to continue and the Rust compiler to hit the error. The
-            // Rust compiler produces a better error message than cbindgen.
-            eprintln!("Generating C bindings failed because of a Rust syntax error");
-        }
-        Err(err) => panic!("Unable to generate C bindings: {err:?}"),
-    };
-
-    Ok(())
-}
--- a/pgxn/neon/communicator/cbindgen.toml
+++ b/pgxn/neon/communicator/cbindgen.toml
@@ -1,4 +0,0 @@
-language = "C"
-
-[enum]
-prefix_with_name = true
--- a/pgxn/neon/communicator/src/lib.rs
+++ b/pgxn/neon/communicator/src/lib.rs
@@ -1,6 +0,0 @@
-/// dummy function, just to test linking Rust functions into the C
-/// extension
-#[unsafe(no_mangle)]
-pub extern "C" fn communicator_dummy(arg: u32) -> u32 {
-    arg + 1
-}
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -43,9 +43,6 @@
 #include "storage/ipc.h"
 #endif

-/* the rust bindings, generated by cbindgen */
-#include "communicator/communicator_bindings.h"
-
 PG_MODULE_MAGIC;
 void		_PG_init(void);

@@ -455,9 +452,6 @@ _PG_init(void)
 	shmem_startup_hook = neon_shmem_startup_hook;
 #endif

-	/* dummy call to a Rust function in the communicator library, to check that it works */
-	(void) communicator_dummy(123);
-
 	pg_init_libpagestore();
 	lfc_init();
 	pg_init_walproposer();
--- a/pgxn/neon/neon_ddl_handler.c
+++ b/pgxn/neon/neon_ddl_handler.c
@@ -98,14 +98,12 @@ typedef struct
 typedef struct DdlHashTable
 {
 	struct DdlHashTable *prev_table;
-	size_t		subtrans_level;
 	HTAB	   *db_table;
 	HTAB	   *role_table;
 } DdlHashTable;

 static DdlHashTable RootTable;
 static DdlHashTable *CurrentDdlTable = &RootTable;
-static int SubtransLevel; /* current nesting level of subtransactions */

 static void
 PushKeyValue(JsonbParseState **state, char *key, char *value)
@@ -334,25 +332,9 @@ SendDeltasToControlPlane()
 	}
 }

-static void
-InitCurrentDdlTableIfNeeded()
-{
-	/* Lazy construction of DllHashTable chain */
-	if (SubtransLevel > CurrentDdlTable->subtrans_level)
-	{
-		DdlHashTable *new_table = MemoryContextAlloc(CurTransactionContext, sizeof(DdlHashTable));
-		new_table->prev_table = CurrentDdlTable;
-		new_table->subtrans_level = SubtransLevel;
-		new_table->role_table = NULL;
-		new_table->db_table = NULL;
-		CurrentDdlTable = new_table;
-	}
-}
-
 static void
 InitDbTableIfNeeded()
 {
-	InitCurrentDdlTableIfNeeded();
 	if (!CurrentDdlTable->db_table)
 	{
 		HASHCTL		db_ctl = {};
@@ -371,7 +353,6 @@ InitDbTableIfNeeded()
 static void
 InitRoleTableIfNeeded()
 {
-	InitCurrentDdlTableIfNeeded();
 	if (!CurrentDdlTable->role_table)
 	{
 		HASHCTL		role_ctl = {};
@@ -390,21 +371,19 @@ InitRoleTableIfNeeded()
 static void
 PushTable()
 {
-	SubtransLevel += 1;
+	DdlHashTable *new_table = MemoryContextAlloc(CurTransactionContext, sizeof(DdlHashTable));
+
+	new_table->prev_table = CurrentDdlTable;
+	new_table->role_table = NULL;
+	new_table->db_table = NULL;
+	CurrentDdlTable = new_table;
 }

 static void
 MergeTable()
 {
-	DdlHashTable *old_table;
+	DdlHashTable *old_table = CurrentDdlTable;

-	Assert(SubtransLevel >= CurrentDdlTable->subtrans_level);
-	if (--SubtransLevel >= CurrentDdlTable->subtrans_level)
-	{
-		return;
-	}
-
-	old_table = CurrentDdlTable;
 	CurrentDdlTable = old_table->prev_table;

 	if (old_table->db_table)
@@ -497,15 +476,11 @@ MergeTable()
 static void
 PopTable()
 {
-	Assert(SubtransLevel >= CurrentDdlTable->subtrans_level);
-	if (--SubtransLevel < CurrentDdlTable->subtrans_level)
-	{
-		/*
-		 * Current table gets freed because it is allocated in aborted
-		 * subtransaction's memory context.
-		 */
-		CurrentDdlTable = CurrentDdlTable->prev_table;
-	}
+	/*
+	 * Current table gets freed because it is allocated in aborted
+	 * subtransaction's memory context.
+	 */
+	CurrentDdlTable = CurrentDdlTable->prev_table;
 }

 static void
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -5,8 +5,9 @@ edition = "2024"
 license.workspace = true

 [features]
-default = []
+default = ["rest_broker"]
 testing = ["dep:tokio-postgres"]
+rest_broker = ["subzero-core", "jsonpath_lib", "ouroboros"]

 [dependencies]
 ahash.workspace = true
@@ -103,6 +104,9 @@ uuid.workspace = true
 x509-cert.workspace = true
 redis.workspace = true
 zerocopy.workspace = true
+subzero-core = { git = "https://github.com/neondatabase-labs/subzero", rev = "0b3d3278f5f9ac9311a7280cb1676de80e021f06", features = ["postgresql"], optional = true }
+jsonpath_lib = { version = "0.3.0", optional = true }
+ouroboros = { version = "0.18", optional = true }

 # jwt stuff
 jose-jwa = "0.1.2"
--- a/proxy/README.md
+++ b/proxy/README.md
@@ -170,8 +170,8 @@ Create a configuration file called `local_proxy.json` in the root of the repo (u

 Start the local proxy:
 ```sh
-cargo run --bin local_proxy -- \
-  --disable_pg_session_jwt true \
+cargo run --bin local_proxy --features testing -- \
+  --disable-pg-session-jwt \
  --http 0.0.0.0:7432
 ```

@@ -180,6 +180,7 @@ Start the auth broker:
 LOGFMT=text OTEL_SDK_DISABLED=true cargo run --bin proxy --features testing -- \
  -c server.crt -k server.key \
  --is-auth-broker true \
+  --is-rest-broker true \
  --wss 0.0.0.0:8080 \
  --http 0.0.0.0:7002 \
  --auth-backend local
@@ -197,3 +198,9 @@ curl -k "https://foo.local.neon.build:8080/sql" \
  -H "neon-connection-string: postgresql://authenticator@foo.local.neon.build/database" \
  -d '{"query":"select 1","params":[]}'
 ```
+
+Make a rest request against the auth broker (rest broker):
+```sh
+curl -k "https://foo.local.neon.build:8080/database/rest/v1/items?select=id,name&id=eq.1" \
+-H "Authorization: Bearer $NEON_JWT"
+```
--- a/proxy/src/binary/local_proxy.rs
+++ b/proxy/src/binary/local_proxy.rs
@@ -20,6 +20,9 @@ use crate::auth::backend::jwt::JwkCache;
 use crate::auth::backend::local::LocalBackend;
 use crate::auth::{self};
 use crate::cancellation::CancellationHandler;
+
+#[cfg(feature = "rest_broker")]
+use crate::config::RestConfig;
 use crate::config::{
    self, AuthenticationConfig, ComputeConfig, HttpConfig, ProxyConfig, RetryConfig,
    refresh_config_loop,
@@ -276,6 +279,11 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
            accept_jwts: true,
            console_redirect_confirmation_timeout: Duration::ZERO,
        },
+        #[cfg(feature = "rest_broker")]
+        rest_config: RestConfig {
+            is_rest_broker: false,
+            db_schema_cache: None,
+        },
        proxy_protocol_v2: config::ProxyProtocolV2::Rejected,
        handshake_timeout: Duration::from_secs(10),
        wake_compute_retry_config: RetryConfig::parse(RetryConfig::WAKE_COMPUTE_DEFAULT_VALUES)?,
--- a/proxy/src/binary/proxy.rs
+++ b/proxy/src/binary/proxy.rs
@@ -21,7 +21,7 @@ use tokio::net::TcpListener;
 use tokio::sync::Notify;
 use tokio::task::JoinSet;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, warn};
+use tracing::{Instrument, error, info, warn};
 use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version};

@@ -31,6 +31,8 @@ use crate::auth::backend::local::LocalBackend;
 use crate::auth::backend::{ConsoleRedirectBackend, MaybeOwned};
 use crate::batch::BatchQueue;
 use crate::cancellation::{CancellationHandler, CancellationProcessor};
+#[cfg(feature = "rest_broker")]
+use crate::config::RestConfig;
 #[cfg(any(test, feature = "testing"))]
 use crate::config::refresh_config_loop;
 use crate::config::{
@@ -47,6 +49,8 @@ use crate::redis::{elasticache, notifications};
 use crate::scram::threadpool::ThreadPool;
 use crate::serverless::GlobalConnPoolOptions;
 use crate::serverless::cancel_set::CancelSet;
+#[cfg(feature = "rest_broker")]
+use crate::serverless::rest::DbSchemaCache;
 use crate::tls::client_config::compute_client_config_with_root_certs;
 #[cfg(any(test, feature = "testing"))]
 use crate::url::ApiUrl;
@@ -195,9 +199,7 @@ struct ProxyCliArgs {
    #[clap(long, default_value = config::ProjectInfoCacheOptions::CACHE_DEFAULT_OPTIONS)]
    project_info_cache: String,
    /// cache for all valid endpoints
-    // TODO: remove after a couple of releases.
-    #[clap(long, default_value_t = String::new())]
-    #[deprecated]
+    #[clap(long, default_value = config::EndpointCacheConfig::CACHE_DEFAULT_OPTIONS)]
    endpoint_cache_config: String,
    #[clap(flatten)]
    parquet_upload: ParquetUploadArgs,
@@ -246,10 +248,12 @@ struct ProxyCliArgs {

    /// if this is not local proxy, this toggles whether we accept Postgres REST requests
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
+    #[cfg(feature = "rest_broker")]
    is_rest_broker: bool,

    /// cache for `db_schema_cache` introspection (use `size=0` to disable)
    #[clap(long, default_value = "size=1000,ttl=1h")]
+    #[cfg(feature = "rest_broker")]
    db_schema_cache: String,
 }

@@ -517,6 +521,17 @@ pub async fn run() -> anyhow::Result<()> {
    ));
    maintenance_tasks.spawn(control_plane::mgmt::task_main(mgmt_listener));

+    // add a task to flush the db_schema cache every 10 minutes
+    #[cfg(feature = "rest_broker")]
+    if let Some(db_schema_cache) = &config.rest_config.db_schema_cache {
+        maintenance_tasks.spawn(async move {
+            loop {
+                tokio::time::sleep(Duration::from_secs(600)).await;
+                db_schema_cache.flush();
+            }
+        });
+    }
+
    if let Some(metrics_config) = &config.metric_collection {
        // TODO: Add gc regardles of the metric collection being enabled.
        maintenance_tasks.spawn(usage_metrics::task_main(metrics_config));
@@ -560,6 +575,13 @@ pub async fn run() -> anyhow::Result<()> {
                }
            }
        }
+
+        // listen for notifications of new projects/endpoints/branches
+        let cache = api.caches.endpoints_cache.clone();
+        let span = tracing::info_span!("endpoints_cache");
+        maintenance_tasks.spawn(
+            async move { cache.do_read(client, cancellation_token.clone()).await }.instrument(span),
+        );
    }

    let maintenance = loop {
@@ -677,6 +699,28 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        timeout: Duration::from_secs(2),
    };

+    #[cfg(feature = "rest_broker")]
+    let rest_config = {
+        let db_schema_cache_config: CacheOptions = args.db_schema_cache.parse()?;
+        info!("Using DbSchemaCache with options={db_schema_cache_config:?}");
+
+        let db_schema_cache = if args.is_rest_broker {
+            Some(DbSchemaCache::new(
+                "db_schema_cache",
+                db_schema_cache_config.size,
+                db_schema_cache_config.ttl,
+                true,
+            ))
+        } else {
+            None
+        };
+
+        RestConfig {
+            is_rest_broker: args.is_rest_broker,
+            db_schema_cache,
+        }
+    };
+
    let config = ProxyConfig {
        tls_config,
        metric_collection,
@@ -689,6 +733,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        connect_to_compute: compute_config,
        #[cfg(feature = "testing")]
        disable_pg_session_jwt: false,
+        #[cfg(feature = "rest_broker")]
+        rest_config,
    };

    let config = Box::leak(Box::new(config));
@@ -707,15 +753,18 @@ fn build_auth_backend(
            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
            let project_info_cache_config: ProjectInfoCacheOptions =
                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;

            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
            info!(
                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
            );
-
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
                wake_compute_cache_config,
                project_info_cache_config,
+                endpoint_cache_config,
            )));

            let config::ConcurrencyLockOptions {
@@ -785,15 +834,18 @@ fn build_auth_backend(
            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
            let project_info_cache_config: ProjectInfoCacheOptions =
                args.project_info_cache.parse()?;
+            let endpoint_cache_config: config::EndpointCacheConfig =
+                args.endpoint_cache_config.parse()?;

            info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
            info!(
                "Using AllowedIpsCache (wake_compute) with options={project_info_cache_config:?}"
            );
-
+            info!("Using EndpointCacheConfig with options={endpoint_cache_config:?}");
            let caches = Box::leak(Box::new(control_plane::caches::ApiCaches::new(
                wake_compute_cache_config,
                project_info_cache_config,
+                endpoint_cache_config,
            )));

            let config::ConcurrencyLockOptions {
--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -0,0 +1,283 @@
+use std::convert::Infallible;
+use std::future::pending;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::{Arc, Mutex};
+
+use clashmap::ClashSet;
+use redis::streams::{StreamReadOptions, StreamReadReply};
+use redis::{AsyncCommands, FromRedisValue, Value};
+use serde::Deserialize;
+use tokio_util::sync::CancellationToken;
+use tracing::info;
+
+use crate::config::EndpointCacheConfig;
+use crate::context::RequestContext;
+use crate::ext::LockExt;
+use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
+use crate::metrics::{Metrics, RedisErrors, RedisEventsCount};
+use crate::rate_limiter::GlobalRateLimiter;
+use crate::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
+use crate::types::EndpointId;
+
+// TODO: this could be an enum, but events in Redis need to be fixed first.
+// ProjectCreated was sent with type:branch_created. So we ignore type.
+#[derive(Deserialize, Debug, Clone, PartialEq)]
+struct ControlPlaneEvent {
+    endpoint_created: Option<EndpointCreated>,
+    branch_created: Option<BranchCreated>,
+    project_created: Option<ProjectCreated>,
+    #[serde(rename = "type")]
+    _type: Option<String>,
+}
+
+#[derive(Deserialize, Debug, Clone, PartialEq)]
+struct EndpointCreated {
+    endpoint_id: EndpointIdInt,
+}
+
+#[derive(Deserialize, Debug, Clone, PartialEq)]
+struct BranchCreated {
+    branch_id: BranchIdInt,
+}
+
+#[derive(Deserialize, Debug, Clone, PartialEq)]
+struct ProjectCreated {
+    project_id: ProjectIdInt,
+}
+
+impl TryFrom<&Value> for ControlPlaneEvent {
+    type Error = anyhow::Error;
+    fn try_from(value: &Value) -> Result<Self, Self::Error> {
+        let json = String::from_redis_value(value)?;
+        Ok(serde_json::from_str(&json)?)
+    }
+}
+
+pub struct EndpointsCache {
+    config: EndpointCacheConfig,
+    endpoints: ClashSet<EndpointIdInt>,
+    branches: ClashSet<BranchIdInt>,
+    projects: ClashSet<ProjectIdInt>,
+    ready: AtomicBool,
+    limiter: Arc<Mutex<GlobalRateLimiter>>,
+}
+
+impl EndpointsCache {
+    pub(crate) fn new(config: EndpointCacheConfig) -> Self {
+        Self {
+            limiter: Arc::new(Mutex::new(GlobalRateLimiter::new(
+                config.limiter_info.clone(),
+            ))),
+            config,
+            endpoints: ClashSet::new(),
+            branches: ClashSet::new(),
+            projects: ClashSet::new(),
+            ready: AtomicBool::new(false),
+        }
+    }
+
+    pub(crate) fn is_valid(&self, ctx: &RequestContext, endpoint: &EndpointId) -> bool {
+        if !self.ready.load(Ordering::Acquire) {
+            // the endpoint cache is not yet fully initialised.
+            return true;
+        }
+
+        if !self.should_reject(endpoint) {
+            ctx.set_rejected(false);
+            return true;
+        }
+
+        // report that we might want to reject this endpoint
+        ctx.set_rejected(true);
+
+        // If cache is disabled, just collect the metrics and return.
+        if self.config.disable_cache {
+            return true;
+        }
+
+        // If the limiter allows, we can pretend like it's valid
+        // (incase it is, due to redis channel lag).
+        if self.limiter.lock_propagate_poison().check() {
+            return true;
+        }
+
+        // endpoint not found, and there's too much load.
+        false
+    }
+
+    fn should_reject(&self, endpoint: &EndpointId) -> bool {
+        if endpoint.is_endpoint() {
+            let Some(endpoint) = EndpointIdInt::get(endpoint) else {
+                // if we haven't interned this endpoint, it's not in the cache.
+                return true;
+            };
+            !self.endpoints.contains(&endpoint)
+        } else if endpoint.is_branch() {
+            let Some(branch) = BranchIdInt::get(endpoint) else {
+                // if we haven't interned this branch, it's not in the cache.
+                return true;
+            };
+            !self.branches.contains(&branch)
+        } else {
+            let Some(project) = ProjectIdInt::get(endpoint) else {
+                // if we haven't interned this project, it's not in the cache.
+                return true;
+            };
+            !self.projects.contains(&project)
+        }
+    }
+
+    fn insert_event(&self, event: ControlPlaneEvent) {
+        if let Some(endpoint_created) = event.endpoint_created {
+            self.endpoints.insert(endpoint_created.endpoint_id);
+            Metrics::get()
+                .proxy
+                .redis_events_count
+                .inc(RedisEventsCount::EndpointCreated);
+        } else if let Some(branch_created) = event.branch_created {
+            self.branches.insert(branch_created.branch_id);
+            Metrics::get()
+                .proxy
+                .redis_events_count
+                .inc(RedisEventsCount::BranchCreated);
+        } else if let Some(project_created) = event.project_created {
+            self.projects.insert(project_created.project_id);
+            Metrics::get()
+                .proxy
+                .redis_events_count
+                .inc(RedisEventsCount::ProjectCreated);
+        }
+    }
+
+    pub async fn do_read(
+        &self,
+        mut con: ConnectionWithCredentialsProvider,
+        cancellation_token: CancellationToken,
+    ) -> anyhow::Result<Infallible> {
+        let mut last_id = "0-0".to_string();
+        loop {
+            if let Err(e) = con.connect().await {
+                tracing::error!("error connecting to redis: {:?}", e);
+                self.ready.store(false, Ordering::Release);
+            }
+            if let Err(e) = self.read_from_stream(&mut con, &mut last_id).await {
+                tracing::error!("error reading from redis: {:?}", e);
+                self.ready.store(false, Ordering::Release);
+            }
+            if cancellation_token.is_cancelled() {
+                info!("cancellation token is cancelled, exiting");
+                // Maintenance tasks run forever. Sleep forever when canceled.
+                pending::<()>().await;
+            }
+            tokio::time::sleep(self.config.retry_interval).await;
+        }
+    }
+
+    async fn read_from_stream(
+        &self,
+        con: &mut ConnectionWithCredentialsProvider,
+        last_id: &mut String,
+    ) -> anyhow::Result<()> {
+        tracing::info!("reading endpoints/branches/projects from redis");
+        self.batch_read(
+            con,
+            StreamReadOptions::default().count(self.config.initial_batch_size),
+            last_id,
+            true,
+        )
+        .await?;
+        tracing::info!("ready to filter user requests");
+        self.ready.store(true, Ordering::Release);
+        self.batch_read(
+            con,
+            StreamReadOptions::default()
+                .count(self.config.default_batch_size)
+                .block(self.config.xread_timeout.as_millis() as usize),
+            last_id,
+            false,
+        )
+        .await
+    }
+
+    async fn batch_read(
+        &self,
+        conn: &mut ConnectionWithCredentialsProvider,
+        opts: StreamReadOptions,
+        last_id: &mut String,
+        return_when_finish: bool,
+    ) -> anyhow::Result<()> {
+        let mut total: usize = 0;
+        loop {
+            let mut res: StreamReadReply = conn
+                .xread_options(&[&self.config.stream_name], &[last_id.as_str()], &opts)
+                .await?;
+
+            if res.keys.is_empty() {
+                if return_when_finish {
+                    if total != 0 {
+                        break;
+                    }
+                    anyhow::bail!(
+                        "Redis stream {} is empty, cannot be used to filter endpoints",
+                        self.config.stream_name
+                    );
+                }
+                // If we are not returning when finish, we should wait for more data.
+                continue;
+            }
+            if res.keys.len() != 1 {
+                anyhow::bail!("Cannot read from redis stream {}", self.config.stream_name);
+            }
+
+            let key = res.keys.pop().expect("Checked length above");
+            let len = key.ids.len();
+            for stream_id in key.ids {
+                total += 1;
+                for value in stream_id.map.values() {
+                    match value.try_into() {
+                        Ok(event) => self.insert_event(event),
+                        Err(err) => {
+                            Metrics::get().proxy.redis_errors_total.inc(RedisErrors {
+                                channel: &self.config.stream_name,
+                            });
+                            tracing::error!("error parsing value {value:?}: {err:?}");
+                        }
+                    }
+                }
+                if total.is_power_of_two() {
+                    tracing::debug!("endpoints read {}", total);
+                }
+                *last_id = stream_id.id;
+            }
+            if return_when_finish && len <= self.config.default_batch_size {
+                break;
+            }
+        }
+        tracing::info!("read {} endpoints/branches/projects from redis", total);
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_control_plane_event() {
+        let s = r#"{"branch_created":null,"endpoint_created":{"endpoint_id":"ep-rapid-thunder-w0qqw2q9"},"project_created":null,"type":"endpoint_created"}"#;
+
+        let endpoint_id: EndpointId = "ep-rapid-thunder-w0qqw2q9".into();
+
+        assert_eq!(
+            serde_json::from_str::<ControlPlaneEvent>(s).unwrap(),
+            ControlPlaneEvent {
+                endpoint_created: Some(EndpointCreated {
+                    endpoint_id: endpoint_id.into(),
+                }),
+                branch_created: None,
+                project_created: None,
+                _type: Some("endpoint_created".into()),
+            }
+        );
+    }
+}
--- a/proxy/src/cache/mod.rs
+++ b/proxy/src/cache/mod.rs
@@ -1,4 +1,5 @@
 pub(crate) mod common;
+pub(crate) mod endpoints;
 pub(crate) mod project_info;
 mod timed_lru;

--- a/proxy/src/cache/timed_lru.rs
+++ b/proxy/src/cache/timed_lru.rs
@@ -204,6 +204,10 @@ impl<K: Hash + Eq + Clone, V: Clone> TimedLru<K, V> {
        self.insert_raw_ttl(key, value, ttl, false);
    }

+    pub(crate) fn insert(&self, key: K, value: V) {
+        self.insert_raw_ttl(key, value, self.ttl, self.update_ttl_on_retrieval);
+    }
+
    pub(crate) fn insert_unit(&self, key: K, value: V) -> (Option<V>, Cached<&Self, ()>) {
        let (_, old) = self.insert_raw(key.clone(), value);

@@ -214,6 +218,28 @@ impl<K: Hash + Eq + Clone, V: Clone> TimedLru<K, V> {

        (old, cached)
    }
+
+    pub(crate) fn flush(&self) {
+        let now = Instant::now();
+        let mut cache = self.cache.lock();
+
+        // Collect keys of expired entries first
+        let expired_keys: Vec<_> = cache
+            .iter()
+            .filter_map(|(key, entry)| {
+                if entry.expires_at <= now {
+                    Some(key.clone())
+                } else {
+                    None
+                }
+            })
+            .collect();
+
+        // Remove expired entries
+        for key in expired_keys {
+            cache.remove(&key);
+        }
+    }
 }

 impl<K: Hash + Eq, V: Clone> TimedLru<K, V> {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -18,10 +18,12 @@ use crate::control_plane::locks::ApiLocks;
 use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings};
 use crate::ext::TaskExt;
 use crate::intern::RoleNameInt;
-use crate::rate_limiter::{RateLimitAlgorithm, RateLimiterConfig};
+use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm, RateLimiterConfig};
 use crate::scram::threadpool::ThreadPool;
 use crate::serverless::GlobalConnPoolOptions;
 use crate::serverless::cancel_set::CancelSet;
+#[cfg(feature = "rest_broker")]
+use crate::serverless::rest::DbSchemaCache;
 pub use crate::tls::server_config::{TlsConfig, configure_tls};
 use crate::types::{Host, RoleName};

@@ -30,6 +32,8 @@ pub struct ProxyConfig {
    pub metric_collection: Option<MetricCollectionConfig>,
    pub http_config: HttpConfig,
    pub authentication_config: AuthenticationConfig,
+    #[cfg(feature = "rest_broker")]
+    pub rest_config: RestConfig,
    pub proxy_protocol_v2: ProxyProtocolV2,
    pub handshake_timeout: Duration,
    pub wake_compute_retry_config: RetryConfig,
@@ -80,6 +84,85 @@ pub struct AuthenticationConfig {
    pub console_redirect_confirmation_timeout: tokio::time::Duration,
 }

+#[cfg(feature = "rest_broker")]
+pub struct RestConfig {
+    pub is_rest_broker: bool,
+    pub db_schema_cache: Option<DbSchemaCache>,
+}
+
+#[derive(Debug)]
+pub struct EndpointCacheConfig {
+    /// Batch size to receive all endpoints on the startup.
+    pub initial_batch_size: usize,
+    /// Batch size to receive endpoints.
+    pub default_batch_size: usize,
+    /// Timeouts for the stream read operation.
+    pub xread_timeout: Duration,
+    /// Stream name to read from.
+    pub stream_name: String,
+    /// Limiter info (to distinguish when to enable cache).
+    pub limiter_info: Vec<RateBucketInfo>,
+    /// Disable cache.
+    /// If true, cache is ignored, but reports all statistics.
+    pub disable_cache: bool,
+    /// Retry interval for the stream read operation.
+    pub retry_interval: Duration,
+}
+
+impl EndpointCacheConfig {
+    /// Default options for [`crate::control_plane::NodeInfoCache`].
+    /// Notice that by default the limiter is empty, which means that cache is disabled.
+    pub const CACHE_DEFAULT_OPTIONS: &'static str = "initial_batch_size=1000,default_batch_size=10,xread_timeout=5m,stream_name=controlPlane,disable_cache=true,limiter_info=1000@1s,retry_interval=1s";
+
+    /// Parse cache options passed via cmdline.
+    /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
+    fn parse(options: &str) -> anyhow::Result<Self> {
+        let mut initial_batch_size = None;
+        let mut default_batch_size = None;
+        let mut xread_timeout = None;
+        let mut stream_name = None;
+        let mut limiter_info = vec![];
+        let mut disable_cache = false;
+        let mut retry_interval = None;
+
+        for option in options.split(',') {
+            let (key, value) = option
+                .split_once('=')
+                .with_context(|| format!("bad key-value pair: {option}"))?;
+
+            match key {
+                "initial_batch_size" => initial_batch_size = Some(value.parse()?),
+                "default_batch_size" => default_batch_size = Some(value.parse()?),
+                "xread_timeout" => xread_timeout = Some(humantime::parse_duration(value)?),
+                "stream_name" => stream_name = Some(value.to_string()),
+                "limiter_info" => limiter_info.push(RateBucketInfo::from_str(value)?),
+                "disable_cache" => disable_cache = value.parse()?,
+                "retry_interval" => retry_interval = Some(humantime::parse_duration(value)?),
+                unknown => bail!("unknown key: {unknown}"),
+            }
+        }
+        RateBucketInfo::validate(&mut limiter_info)?;
+
+        Ok(Self {
+            initial_batch_size: initial_batch_size.context("missing `initial_batch_size`")?,
+            default_batch_size: default_batch_size.context("missing `default_batch_size`")?,
+            xread_timeout: xread_timeout.context("missing `xread_timeout`")?,
+            stream_name: stream_name.context("missing `stream_name`")?,
+            disable_cache,
+            limiter_info,
+            retry_interval: retry_interval.context("missing `retry_interval`")?,
+        })
+    }
+}
+
+impl FromStr for EndpointCacheConfig {
+    type Err = anyhow::Error;
+
+    fn from_str(options: &str) -> Result<Self, Self::Err> {
+        let error = || format!("failed to parse endpoint cache options '{options}'");
+        Self::parse(options).with_context(error)
+    }
+}
 #[derive(Debug)]
 pub struct MetricBackupCollectionConfig {
    pub remote_storage_config: Option<RemoteStorageConfig>,
--- a/proxy/src/context/mod.rs
+++ b/proxy/src/context/mod.rs
@@ -7,7 +7,7 @@ use once_cell::sync::OnceCell;
 use smol_str::SmolStr;
 use tokio::sync::mpsc;
 use tracing::field::display;
-use tracing::{Span, error, info_span};
+use tracing::{Span, debug, error, info_span};
 use try_lock::TryLock;
 use uuid::Uuid;

@@ -15,7 +15,10 @@ use self::parquet::RequestData;
 use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
 use crate::error::ErrorKind;
 use crate::intern::{BranchIdInt, ProjectIdInt};
-use crate::metrics::{LatencyAccumulated, LatencyTimer, Metrics, Protocol, Waiting};
+use crate::metrics::{
+    ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol,
+    Waiting,
+};
 use crate::pqproto::StartupMessageParams;
 use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra};
 use crate::types::{DbName, EndpointId, RoleName};
@@ -67,6 +70,8 @@ struct RequestContextInner {
    // This sender is only used to log the length of session in case of success.
    disconnect_sender: Option<mpsc::UnboundedSender<RequestData>>,
    pub(crate) latency_timer: LatencyTimer,
+    // Whether proxy decided that it's not a valid endpoint end rejected it before going to cplane.
+    rejected: Option<bool>,
    disconnect_timestamp: Option<chrono::DateTime<Utc>>,
 }

@@ -101,6 +106,7 @@ impl Clone for RequestContext {
            auth_method: inner.auth_method.clone(),
            jwt_issuer: inner.jwt_issuer.clone(),
            success: inner.success,
+            rejected: inner.rejected,
            cold_start_info: inner.cold_start_info,
            pg_options: inner.pg_options.clone(),
            testodrome_query_id: inner.testodrome_query_id.clone(),
@@ -145,6 +151,7 @@ impl RequestContext {
            auth_method: None,
            jwt_issuer: None,
            success: false,
+            rejected: None,
            cold_start_info: ColdStartInfo::Unknown,
            pg_options: None,
            testodrome_query_id: None,
@@ -176,6 +183,11 @@ impl RequestContext {
        )
    }

+    pub(crate) fn set_rejected(&self, rejected: bool) {
+        let mut this = self.0.try_lock().expect("should not deadlock");
+        this.rejected = Some(rejected);
+    }
+
    pub(crate) fn set_cold_start_info(&self, info: ColdStartInfo) {
        self.0
            .try_lock()
@@ -449,6 +461,38 @@ impl RequestContextInner {
    }

    fn log_connect(&mut self) {
+        let outcome = if self.success {
+            ConnectOutcome::Success
+        } else {
+            ConnectOutcome::Failed
+        };
+
+        // TODO: get rid of entirely/refactor
+        // check for false positives
+        // AND false negatives
+        if let Some(rejected) = self.rejected {
+            let ep = self
+                .endpoint_id
+                .as_ref()
+                .map(|x| x.as_str())
+                .unwrap_or_default();
+            // This makes sense only if cache is disabled
+            debug!(
+                ?outcome,
+                ?rejected,
+                ?ep,
+                "check endpoint is valid with outcome"
+            );
+            Metrics::get()
+                .proxy
+                .invalid_endpoints_total
+                .inc(InvalidEndpointsGroup {
+                    protocol: self.protocol,
+                    rejected: rejected.into(),
+                    outcome,
+                });
+        }
+
        if let Some(tx) = self.sender.take() {
            // If type changes, this error handling needs to be updated.
            let tx: mpsc::UnboundedSender<RequestData> = tx;
--- a/proxy/src/control_plane/client/cplane_proxy_v1.rs
+++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs
@@ -159,6 +159,13 @@ impl NeonControlPlaneClient {
        ctx: &RequestContext,
        endpoint: &EndpointId,
    ) -> Result<Vec<AuthRule>, GetEndpointJwksError> {
+        if !self
+            .caches
+            .endpoints_cache
+            .is_valid(ctx, &endpoint.normalize())
+        {
+            return Err(GetEndpointJwksError::EndpointNotFound);
+        }
        let request_id = ctx.session_id().to_string();
        async {
            let request = self
@@ -293,6 +300,11 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
            return Ok(secret);
        }

+        if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
+            info!("endpoint is not valid, skipping the request");
+            return Err(GetAuthInfoError::UnknownEndpoint);
+        }
+
        let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;

        let control = EndpointAccessControl {
@@ -334,6 +346,11 @@ impl super::ControlPlaneApi for NeonControlPlaneClient {
            return Ok(control);
        }

+        if !self.caches.endpoints_cache.is_valid(ctx, normalized_ep) {
+            info!("endpoint is not valid, skipping the request");
+            return Err(GetAuthInfoError::UnknownEndpoint);
+        }
+
        let auth_info = self.do_get_auth_req(ctx, endpoint, role).await?;

        let control = EndpointAccessControl {
--- a/proxy/src/control_plane/client/mod.rs
+++ b/proxy/src/control_plane/client/mod.rs
@@ -8,13 +8,14 @@ use std::time::Duration;

 use clashmap::ClashMap;
 use tokio::time::Instant;
-use tracing::{debug, info};
+use tracing::debug;

 use super::{EndpointAccessControl, RoleAccessControl};
 use crate::auth::backend::ComputeUserInfo;
 use crate::auth::backend::jwt::{AuthRule, FetchAuthRules, FetchAuthRulesError};
+use crate::cache::endpoints::EndpointsCache;
 use crate::cache::project_info::ProjectInfoCacheImpl;
-use crate::config::{CacheOptions, ProjectInfoCacheOptions};
+use crate::config::{CacheOptions, EndpointCacheConfig, ProjectInfoCacheOptions};
 use crate::context::RequestContext;
 use crate::control_plane::{CachedNodeInfo, ControlPlaneApi, NodeInfoCache, errors};
 use crate::error::ReportableError;
@@ -120,12 +121,15 @@ pub struct ApiCaches {
    pub(crate) node_info: NodeInfoCache,
    /// Cache which stores project_id -> endpoint_ids mapping.
    pub project_info: Arc<ProjectInfoCacheImpl>,
+    /// List of all valid endpoints.
+    pub endpoints_cache: Arc<EndpointsCache>,
 }

 impl ApiCaches {
    pub fn new(
        wake_compute_cache_config: CacheOptions,
        project_info_cache_config: ProjectInfoCacheOptions,
+        endpoint_cache_config: EndpointCacheConfig,
    ) -> Self {
        Self {
            node_info: NodeInfoCache::new(
@@ -135,6 +139,7 @@ impl ApiCaches {
                true,
            ),
            project_info: Arc::new(ProjectInfoCacheImpl::new(project_info_cache_config)),
+            endpoints_cache: Arc::new(EndpointsCache::new(endpoint_cache_config)),
        }
    }
 }
@@ -229,7 +234,8 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
                // temporary lock a single shard and then clear any semaphores that aren't currently checked out
                // race conditions: if strong_count == 1, there's no way that it can increase while the shard is locked
                // therefore releasing it is safe from race conditions
-                info!(
+                debug!(
+                    //FIXME: is anything depending on this being info?
                    name = self.name,
                    shard = i,
                    "performing epoch reclamation on api lock"
--- a/proxy/src/control_plane/errors.rs
+++ b/proxy/src/control_plane/errors.rs
@@ -99,6 +99,10 @@ pub(crate) enum GetAuthInfoError {

    #[error(transparent)]
    ApiError(ControlPlaneError),
+
+    /// Proxy does not know about the endpoint in advanced
+    #[error("endpoint not found in endpoint cache")]
+    UnknownEndpoint,
 }

 // This allows more useful interactions than `#[from]`.
@@ -115,6 +119,8 @@ impl UserFacingError for GetAuthInfoError {
            Self::BadSecret => REQUEST_FAILED.to_owned(),
            // However, API might return a meaningful error.
            Self::ApiError(e) => e.to_string_client(),
+            // pretend like control plane returned an error.
+            Self::UnknownEndpoint => REQUEST_FAILED.to_owned(),
        }
    }
 }
@@ -124,6 +130,8 @@ impl ReportableError for GetAuthInfoError {
        match self {
            Self::BadSecret => crate::error::ErrorKind::ControlPlane,
            Self::ApiError(_) => crate::error::ErrorKind::ControlPlane,
+            // we only apply endpoint filtering if control plane is under high load.
+            Self::UnknownEndpoint => crate::error::ErrorKind::ServiceRateLimit,
        }
    }
 }
@@ -192,6 +200,9 @@ impl CouldRetry for WakeComputeError {

 #[derive(Debug, Error)]
 pub enum GetEndpointJwksError {
+    #[error("endpoint not found")]
+    EndpointNotFound,
+
    #[error("failed to build control plane request: {0}")]
    RequestBuild(#[source] reqwest::Error),

--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -16,6 +16,44 @@ use super::LeakyBucketConfig;
 use crate::ext::LockExt;
 use crate::intern::EndpointIdInt;

+pub struct GlobalRateLimiter {
+    data: Vec<RateBucket>,
+    info: Vec<RateBucketInfo>,
+}
+
+impl GlobalRateLimiter {
+    pub fn new(info: Vec<RateBucketInfo>) -> Self {
+        Self {
+            data: vec![
+                RateBucket {
+                    start: Instant::now(),
+                    count: 0,
+                };
+                info.len()
+            ],
+            info,
+        }
+    }
+
+    /// Check that number of connections is below `max_rps` rps.
+    pub fn check(&mut self) -> bool {
+        let now = Instant::now();
+
+        let should_allow_request = self
+            .data
+            .iter_mut()
+            .zip(&self.info)
+            .all(|(bucket, info)| bucket.should_allow_request(info, now, 1));
+
+        if should_allow_request {
+            // only increment the bucket counts if the request will actually be accepted
+            self.data.iter_mut().for_each(|b| b.inc(1));
+        }
+
+        should_allow_request
+    }
+}
+
 // Simple per-endpoint rate limiter.
 //
 // Check that number of connections to the endpoint is below `max_rps` rps.
--- a/proxy/src/rate_limiter/mod.rs
+++ b/proxy/src/rate_limiter/mod.rs
@@ -8,4 +8,4 @@ pub(crate) use limit_algorithm::aimd::Aimd;
 pub(crate) use limit_algorithm::{
    DynamicLimiter, Outcome, RateLimitAlgorithm, RateLimiterConfig, Token,
 };
-pub use limiter::{RateBucketInfo, WakeComputeRateLimiter};
+pub use limiter::{GlobalRateLimiter, RateBucketInfo, WakeComputeRateLimiter};
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -11,6 +11,8 @@ mod http_conn_pool;
 mod http_util;
 mod json;
 mod local_conn_pool;
+#[cfg(feature = "rest_broker")]
+pub mod rest;
 mod sql_over_http;
 mod websocket;

@@ -487,6 +489,37 @@ async fn request_handler(
            .body(Empty::new().map_err(|x| match x {}).boxed())
            .map_err(|e| ApiError::InternalServerError(e.into()))
    } else {
-        json_response(StatusCode::BAD_REQUEST, "query is not supported")
+        #[cfg(feature = "rest_broker")]
+        {
+            if config.rest_config.is_rest_broker && {
+                let path_parts: Vec<&str> = request.uri().path().split('/').collect();
+                path_parts.len() >= 3 && path_parts[2].starts_with("rest")
+            } {
+                let ctx =
+                    RequestContext::new(session_id, conn_info, crate::metrics::Protocol::Http);
+                let span = ctx.span();
+
+                let testodrome_id = request
+                    .headers()
+                    .get("X-Neon-Query-ID")
+                    .and_then(|value| value.to_str().ok())
+                    .map(|s| s.to_string());
+
+                if let Some(query_id) = testodrome_id {
+                    info!(parent: &ctx.span(), "testodrome query ID: {query_id}");
+                    ctx.set_testodrome_id(query_id.into());
+                }
+
+                rest::handle(config, ctx, request, backend, http_cancellation_token)
+                    .instrument(span)
+                    .await
+            } else {
+                json_response(StatusCode::BAD_REQUEST, "query is not supported")
+            }
+        }
+        #[cfg(not(feature = "rest_broker"))]
+        {
+            json_response(StatusCode::BAD_REQUEST, "query is not supported")
+        }
    }
 }
--- a/proxy/src/serverless/rest.rs
+++ b/proxy/src/serverless/rest.rs
--- a/proxy/src/types.rs
+++ b/proxy/src/types.rs
@@ -107,3 +107,13 @@ smol_str_wrapper!(DbName);

 // postgres hostname, will likely be a port:ip addr
 smol_str_wrapper!(Host);
+
+// Endpoints are a bit tricky. Rare they might be branches or projects.
+impl EndpointId {
+    pub(crate) fn is_endpoint(&self) -> bool {
+        self.0.starts_with("ep-")
+    }
+    pub(crate) fn is_branch(&self) -> bool {
+        self.0.starts_with("br-")
+    }
+}
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -197,7 +197,7 @@ impl StateSK {
        Ok(TimelineMembershipSwitchResponse {
            previous_conf: result.previous_conf,
            current_conf: result.current_conf,
-            last_log_term: self.state().acceptor_state.term,
+            term: self.state().acceptor_state.term,
            flush_lsn: self.flush_lsn(),
        })
    }
--- a/storage_controller/src/background_node_operations.rs
+++ b/storage_controller/src/background_node_operations.rs
@@ -6,11 +6,6 @@ use utils::id::NodeId;

 pub(crate) const MAX_RECONCILES_PER_OPERATION: usize = 64;

-#[derive(Copy, Clone)]
-pub(crate) struct Delete {
-    pub(crate) node_id: NodeId,
-}
-
 #[derive(Copy, Clone)]
 pub(crate) struct Drain {
    pub(crate) node_id: NodeId,
@@ -23,7 +18,6 @@ pub(crate) struct Fill {

 #[derive(Copy, Clone)]
 pub(crate) enum Operation {
-    Delete(Delete),
    Drain(Drain),
    Fill(Fill),
 }
@@ -36,8 +30,6 @@ pub(crate) enum OperationError {
    FinalizeError(Cow<'static, str>),
    #[error("Operation cancelled")]
    Cancelled,
-    #[error("Impossible constraint error: {0}")]
-    ImpossibleConstraint(Cow<'static, str>),
 }

 pub(crate) struct OperationHandler {
@@ -46,12 +38,6 @@ pub(crate) struct OperationHandler {
    pub(crate) cancel: CancellationToken,
 }

-impl Display for Delete {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        write!(f, "delete {}", self.node_id)
-    }
-}
-
 impl Display for Drain {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "drain {}", self.node_id)
@@ -67,7 +53,6 @@ impl Display for Fill {
 impl Display for Operation {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
-            Operation::Delete(op) => write!(f, "{op}"),
            Operation::Drain(op) => write!(f, "{op}"),
            Operation::Fill(op) => write!(f, "{op}"),
        }
--- a/storage_controller/src/operation_utils.rs
+++ b/storage_controller/src/operation_utils.rs
@@ -10,19 +10,63 @@ use crate::node::Node;
 use crate::scheduler::Scheduler;
 use crate::tenant_shard::TenantShard;

+pub(crate) struct TenantShardIterator<F> {
+    tenants_accessor: F,
+    inspected_all_shards: bool,
+    last_inspected_shard: Option<TenantShardId>,
+}
+
+/// A simple iterator which can be used in tandem with [`crate::service::Service`]
+/// to iterate over all known tenant shard ids without holding the lock on the
+/// service state at all times.
+impl<F> TenantShardIterator<F>
+where
+    F: Fn(Option<TenantShardId>) -> Option<TenantShardId>,
+{
+    pub(crate) fn new(tenants_accessor: F) -> Self {
+        Self {
+            tenants_accessor,
+            inspected_all_shards: false,
+            last_inspected_shard: None,
+        }
+    }
+
+    /// Returns the next tenant shard id if one exists
+    pub(crate) fn next(&mut self) -> Option<TenantShardId> {
+        if self.inspected_all_shards {
+            return None;
+        }
+
+        match (self.tenants_accessor)(self.last_inspected_shard) {
+            Some(tid) => {
+                self.last_inspected_shard = Some(tid);
+                Some(tid)
+            }
+            None => {
+                self.inspected_all_shards = true;
+                None
+            }
+        }
+    }
+
+    /// Returns true when the end of the iterator is reached and false otherwise
+    pub(crate) fn finished(&self) -> bool {
+        self.inspected_all_shards
+    }
+}
+
 /// Check that the state of the node being drained is as expected:
-/// node is present in memory and scheduling policy is set to expected_policy
+/// node is present in memory and scheduling policy is set to [`NodeSchedulingPolicy::Draining`]
 pub(crate) fn validate_node_state(
    node_id: &NodeId,
    nodes: Arc<HashMap<NodeId, Node>>,
-    expected_policy: NodeSchedulingPolicy,
 ) -> Result<(), OperationError> {
    let node = nodes.get(node_id).ok_or(OperationError::NodeStateChanged(
        format!("node {node_id} was removed").into(),
    ))?;

    let current_policy = node.get_scheduling();
-    if current_policy != expected_policy {
+    if !matches!(current_policy, NodeSchedulingPolicy::Draining) {
        // TODO(vlad): maybe cancel pending reconciles before erroring out. need to think
        // about it
        return Err(OperationError::NodeStateChanged(
@@ -138,3 +182,55 @@ impl TenantShardDrain {
        }
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+
+    use utils::id::TenantId;
+    use utils::shard::{ShardCount, ShardNumber, TenantShardId};
+
+    use super::TenantShardIterator;
+
+    #[test]
+    fn test_tenant_shard_iterator() {
+        let tenant_id = TenantId::generate();
+        let shard_count = ShardCount(8);
+
+        let mut tenant_shards = Vec::default();
+        for i in 0..shard_count.0 {
+            tenant_shards.push((
+                TenantShardId {
+                    tenant_id,
+                    shard_number: ShardNumber(i),
+                    shard_count,
+                },
+                (),
+            ))
+        }
+
+        let tenant_shards = Arc::new(tenant_shards);
+
+        let mut tid_iter = TenantShardIterator::new({
+            let tenants = tenant_shards.clone();
+            move |last_inspected_shard: Option<TenantShardId>| {
+                let entry = match last_inspected_shard {
+                    Some(skip_past) => {
+                        let mut cursor = tenants.iter().skip_while(|(tid, _)| *tid != skip_past);
+                        cursor.nth(1)
+                    }
+                    None => tenants.first(),
+                };
+
+                entry.map(|(tid, _)| tid).copied()
+            }
+        });
+
+        let mut iterated_over = Vec::default();
+        while let Some(tid) = tid_iter.next() {
+            iterated_over.push((tid, ()));
+        }
+
+        assert_eq!(iterated_over, *tenant_shards);
+    }
+}
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -919,7 +919,7 @@ async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError
    json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
 }

-async fn handle_node_delete_old(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

    let req = match maybe_forward(req).await {
@@ -931,10 +931,7 @@ async fn handle_node_delete_old(req: Request<Body>) -> Result<Response<Body>, Ap

    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-    json_response(
-        StatusCode::OK,
-        state.service.node_delete_old(node_id).await?,
-    )
+    json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
 }

 async fn handle_tombstone_list(req: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -1054,42 +1051,6 @@ async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiErro
    json_response(StatusCode::OK, leader)
 }

-async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let req = match maybe_forward(req).await {
-        ForwardOutcome::Forwarded(res) => {
-            return res;
-        }
-        ForwardOutcome::NotForwarded(req) => req,
-    };
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-    json_response(
-        StatusCode::OK,
-        state.service.start_node_delete(node_id).await?,
-    )
-}
-
-async fn handle_cancel_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Infra)?;
-
-    let req = match maybe_forward(req).await {
-        ForwardOutcome::Forwarded(res) => {
-            return res;
-        }
-        ForwardOutcome::NotForwarded(req) => req,
-    };
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-    json_response(
-        StatusCode::ACCEPTED,
-        state.service.cancel_node_delete(node_id).await?,
-    )
-}
-
 async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Infra)?;

@@ -2260,14 +2221,8 @@ pub fn make_router(
        .post("/control/v1/node", |r| {
            named_request_span(r, handle_node_register, RequestName("control_v1_node"))
        })
-        // This endpoint is deprecated and will be removed in a future version.
-        // Use PUT /control/v1/node/:node_id/delete instead.
        .delete("/control/v1/node/:node_id", |r| {
-            named_request_span(
-                r,
-                handle_node_delete_old,
-                RequestName("control_v1_node_delete"),
-            )
+            named_request_span(r, handle_node_delete, RequestName("control_v1_node_delete"))
        })
        .get("/control/v1/node", |r| {
            named_request_span(r, handle_node_list, RequestName("control_v1_node"))
@@ -2292,20 +2247,6 @@ pub fn make_router(
        .get("/control/v1/leader", |r| {
            named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
        })
-        .put("/control/v1/node/:node_id/delete", |r| {
-            named_request_span(
-                r,
-                handle_node_delete,
-                RequestName("control_v1_start_node_delete"),
-            )
-        })
-        .delete("/control/v1/node/:node_id/delete", |r| {
-            named_request_span(
-                r,
-                handle_cancel_node_delete,
-                RequestName("control_v1_cancel_node_delete"),
-            )
-        })
        .put("/control/v1/node/:node_id/drain", |r| {
            named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
        })
@@ -2371,7 +2312,7 @@ pub fn make_router(
            named_request_span(
                r,
                handle_safekeeper_scheduling_policy,
-                RequestName("v1_safekeeper_scheduling_policy"),
+                RequestName("v1_safekeeper_status"),
            )
        })
        // Tenant Shard operations
--- a/storage_controller/src/lib.rs
+++ b/storage_controller/src/lib.rs
@@ -6,13 +6,13 @@ extern crate hyper0 as hyper;
 mod auth;
 mod background_node_operations;
 mod compute_hook;
+mod drain_utils;
 mod heartbeater;
 pub mod http;
 mod id_lock_map;
 mod leadership;
 pub mod metrics;
 mod node;
-mod operation_utils;
 mod pageserver_client;
 mod peer_client;
 pub mod persistence;
--- a/storage_controller/src/node.rs
+++ b/storage_controller/src/node.rs
@@ -201,7 +201,6 @@ impl Node {

        match self.scheduling {
            NodeSchedulingPolicy::Active => MaySchedule::Yes(utilization),
-            NodeSchedulingPolicy::Deleting => MaySchedule::No,
            NodeSchedulingPolicy::Draining => MaySchedule::No,
            NodeSchedulingPolicy::Filling => MaySchedule::Yes(utilization),
            NodeSchedulingPolicy::Pause => MaySchedule::No,
--- a/storage_controller/src/persistence.rs
+++ b/storage_controller/src/persistence.rs
@@ -635,23 +635,18 @@ impl Persistence {
        let updated = self
            .with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
                Box::pin(async move {
-                    let node: Option<NodePersistence> = nodes
-                        .filter(node_id.eq(input_node_id.0 as i64))
-                        .first::<NodePersistence>(conn)
-                        .await
-                        .optional()?;
-
                    // Check if the node is not marked as deleted
-                    match node {
-                        Some(node) if matches!(NodeLifecycle::from_str(&node.lifecycle), Ok(NodeLifecycle::Deleted)) => {
-                            return Err(DatabaseError::Logical(format!(
-                                "Node {input_node_id} is marked as deleted, re-attach is not allowed"
-                            )));
-                        }
-                        _ => {
-                            // go through
-                        }
-                    };
+                    let deleted_node: i64 = nodes
+                        .filter(node_id.eq(input_node_id.0 as i64))
+                        .filter(lifecycle.eq(String::from(NodeLifecycle::Deleted)))
+                        .count()
+                        .get_result(conn)
+                        .await?;
+                    if deleted_node > 0 {
+                        return Err(DatabaseError::Logical(format!(
+                            "Node {input_node_id} is marked as deleted, re-attach is not allowed"
+                        )));
+                    }

                    let rows_updated = diesel::update(tenant_shards)
                        .filter(generation_pageserver.eq(input_node_id.0 as i64))
@@ -669,23 +664,21 @@ impl Persistence {
                        .load(conn)
                        .await?;

-                    if let Some(node) = node {
-                        let old_scheduling_policy =
-                            NodeSchedulingPolicy::from_str(&node.scheduling_policy).unwrap();
-                        let new_scheduling_policy = match old_scheduling_policy {
-                            NodeSchedulingPolicy::Active => NodeSchedulingPolicy::Active,
-                            NodeSchedulingPolicy::PauseForRestart => NodeSchedulingPolicy::Active,
-                            NodeSchedulingPolicy::Draining => NodeSchedulingPolicy::Active,
-                            NodeSchedulingPolicy::Filling => NodeSchedulingPolicy::Active,
-                            NodeSchedulingPolicy::Pause => NodeSchedulingPolicy::Pause,
-                            NodeSchedulingPolicy::Deleting => NodeSchedulingPolicy::Pause,
-                        };
-                        diesel::update(nodes)
-                            .filter(node_id.eq(input_node_id.0 as i64))
-                            .set(scheduling_policy.eq(String::from(new_scheduling_policy)))
-                            .execute(conn)
-                            .await?;
-                    }
+                    // If the node went through a drain and restart phase before re-attaching,
+                    // then reset it's node scheduling policy to active.
+                    diesel::update(nodes)
+                        .filter(node_id.eq(input_node_id.0 as i64))
+                        .filter(
+                            scheduling_policy
+                                .eq(String::from(NodeSchedulingPolicy::PauseForRestart))
+                                .or(scheduling_policy
+                                    .eq(String::from(NodeSchedulingPolicy::Draining)))
+                                .or(scheduling_policy
+                                    .eq(String::from(NodeSchedulingPolicy::Filling))),
+                        )
+                        .set(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Active)))
+                        .execute(conn)
+                        .await?;

                    Ok(updated)
                })
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -1,8 +1,8 @@
 pub mod chaos_injector;
+mod context_iterator;
 pub mod feature_flag;
 pub(crate) mod safekeeper_reconciler;
 mod safekeeper_service;
-mod tenant_shard_iterator;

 use std::borrow::Cow;
 use std::cmp::Ordering;
@@ -16,6 +16,7 @@ use std::sync::{Arc, OnceLock};
 use std::time::{Duration, Instant, SystemTime};

 use anyhow::Context;
+use context_iterator::TenantShardContextIterator;
 use control_plane::storage_controller::{
    AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse,
 };
@@ -54,7 +55,6 @@ use pageserver_client::{BlockUnblock, mgmt_api};
 use reqwest::{Certificate, StatusCode};
 use safekeeper_api::models::SafekeeperUtilization;
 use safekeeper_reconciler::SafekeeperReconcilers;
-use tenant_shard_iterator::{TenantShardExclusiveIterator, create_shared_shard_iterator};
 use tokio::sync::TryAcquireError;
 use tokio::sync::mpsc::error::TrySendError;
 use tokio_util::sync::CancellationToken;
@@ -68,9 +68,10 @@ use utils::sync::gate::{Gate, GateGuard};
 use utils::{failpoint_support, pausable_failpoint};

 use crate::background_node_operations::{
-    Delete, Drain, Fill, MAX_RECONCILES_PER_OPERATION, Operation, OperationError, OperationHandler,
+    Drain, Fill, MAX_RECONCILES_PER_OPERATION, Operation, OperationError, OperationHandler,
 };
 use crate::compute_hook::{self, ComputeHook, NotifyError};
+use crate::drain_utils::{self, TenantShardDrain, TenantShardIterator};
 use crate::heartbeater::{Heartbeater, PageserverState, SafekeeperState};
 use crate::id_lock_map::{
    IdLockMap, TracingExclusiveGuard, trace_exclusive_lock, trace_shared_lock,
@@ -78,7 +79,6 @@ use crate::id_lock_map::{
 use crate::leadership::Leadership;
 use crate::metrics;
 use crate::node::{AvailabilityTransition, Node};
-use crate::operation_utils::{self, TenantShardDrain};
 use crate::pageserver_client::PageserverClient;
 use crate::peer_client::GlobalObservedState;
 use crate::persistence::split_state::SplitState;
@@ -105,7 +105,7 @@ use crate::timeline_import::{
    TimelineImportFinalizeError, TimelineImportState, UpcallClient,
 };

-const WAITER_OPERATION_POLL_TIMEOUT: Duration = Duration::from_millis(500);
+const WAITER_FILL_DRAIN_POLL_TIMEOUT: Duration = Duration::from_millis(500);

 // For operations that should be quick, like attaching a new tenant
 const SHORT_RECONCILE_TIMEOUT: Duration = Duration::from_secs(5);
@@ -581,9 +581,7 @@ impl From<ReconcileWaitError> for ApiError {
 impl From<OperationError> for ApiError {
    fn from(value: OperationError) -> Self {
        match value {
-            OperationError::NodeStateChanged(err)
-            | OperationError::FinalizeError(err)
-            | OperationError::ImpossibleConstraint(err) => {
+            OperationError::NodeStateChanged(err) | OperationError::FinalizeError(err) => {
                ApiError::InternalServerError(anyhow::anyhow!(err))
            }
            OperationError::Cancelled => ApiError::Conflict("Operation was cancelled".into()),
@@ -2416,7 +2414,6 @@ impl Service {
                NodeSchedulingPolicy::PauseForRestart
                    | NodeSchedulingPolicy::Draining
                    | NodeSchedulingPolicy::Filling
-                    | NodeSchedulingPolicy::Deleting
            );

            let mut new_nodes = (**nodes).clone();
@@ -7058,7 +7055,7 @@ impl Service {
    /// If a node has any work on it, it will be rescheduled: this is "clean" in the sense
    /// that we don't leave any bad state behind in the storage controller, but unclean
    /// in the sense that we are not carefully draining the node.
-    pub(crate) async fn node_delete_old(&self, node_id: NodeId) -> Result<(), ApiError> {
+    pub(crate) async fn node_delete(&self, node_id: NodeId) -> Result<(), ApiError> {
        let _node_lock =
            trace_exclusive_lock(&self.node_op_locks, node_id, NodeOperations::Delete).await;

@@ -7092,7 +7089,7 @@ impl Service {
            }

            for (_tenant_id, mut schedule_context, shards) in
-                TenantShardExclusiveIterator::new(tenants, ScheduleMode::Normal)
+                TenantShardContextIterator::new(tenants, ScheduleMode::Normal)
            {
                for shard in shards {
                    if shard.deref_node(node_id) {
@@ -7161,171 +7158,6 @@ impl Service {
        Ok(())
    }

-    pub(crate) async fn delete_node(
-        self: &Arc<Self>,
-        node_id: NodeId,
-        policy_on_start: NodeSchedulingPolicy,
-        cancel: CancellationToken,
-    ) -> Result<(), OperationError> {
-        let reconciler_config = ReconcilerConfigBuilder::new(ReconcilerPriority::Normal).build();
-
-        let mut waiters: Vec<ReconcilerWaiter> = Vec::new();
-        let mut tid_iter = create_shared_shard_iterator(self.clone());
-
-        while !tid_iter.finished() {
-            if cancel.is_cancelled() {
-                match self
-                    .node_configure(node_id, None, Some(policy_on_start))
-                    .await
-                {
-                    Ok(()) => return Err(OperationError::Cancelled),
-                    Err(err) => {
-                        return Err(OperationError::FinalizeError(
-                            format!(
-                                "Failed to finalise delete cancel of {} by setting scheduling policy to {}: {}",
-                                node_id, String::from(policy_on_start), err
-                            )
-                            .into(),
-                        ));
-                    }
-                }
-            }
-
-            operation_utils::validate_node_state(
-                &node_id,
-                self.inner.read().unwrap().nodes.clone(),
-                NodeSchedulingPolicy::Deleting,
-            )?;
-
-            while waiters.len() < MAX_RECONCILES_PER_OPERATION {
-                let tid = match tid_iter.next() {
-                    Some(tid) => tid,
-                    None => {
-                        break;
-                    }
-                };
-
-                let mut locked = self.inner.write().unwrap();
-                let (nodes, tenants, scheduler) = locked.parts_mut();
-
-                let tenant_shard = match tenants.get_mut(&tid) {
-                    Some(tenant_shard) => tenant_shard,
-                    None => {
-                        // Tenant shard was deleted by another operation. Skip it.
-                        continue;
-                    }
-                };
-
-                match tenant_shard.get_scheduling_policy() {
-                    ShardSchedulingPolicy::Active | ShardSchedulingPolicy::Essential => {
-                        // A migration during delete is classed as 'essential' because it is required to
-                        // uphold our availability goals for the tenant: this shard is elegible for migration.
-                    }
-                    ShardSchedulingPolicy::Pause | ShardSchedulingPolicy::Stop => {
-                        // If we have been asked to avoid rescheduling this shard, then do not migrate it during a deletion
-                        tracing::warn!(
-                            tenant_id=%tid.tenant_id, shard_id=%tid.shard_slug(),
-                            "Skip migration during deletion because shard scheduling policy {:?} disallows it",
-                            tenant_shard.get_scheduling_policy(),
-                        );
-                        continue;
-                    }
-                }
-
-                if tenant_shard.deref_node(node_id) {
-                    // TODO(ephemeralsad): we should process all shards in a tenant at once, so
-                    // we can avoid settling the tenant unevenly.
-                    let mut schedule_context = ScheduleContext::new(ScheduleMode::Normal);
-                    if let Err(e) = tenant_shard.schedule(scheduler, &mut schedule_context) {
-                        tracing::error!(
-                            "Refusing to delete node, shard {} can't be rescheduled: {e}",
-                            tenant_shard.tenant_shard_id
-                        );
-                        return Err(OperationError::ImpossibleConstraint(e.to_string().into()));
-                    } else {
-                        tracing::info!(
-                            "Rescheduled shard {} away from node during deletion",
-                            tenant_shard.tenant_shard_id
-                        )
-                    }
-
-                    let waiter = self.maybe_configured_reconcile_shard(
-                        tenant_shard,
-                        nodes,
-                        reconciler_config,
-                    );
-                    if let Some(some) = waiter {
-                        waiters.push(some);
-                    }
-                }
-            }
-
-            waiters = self
-                .await_waiters_remainder(waiters, WAITER_OPERATION_POLL_TIMEOUT)
-                .await;
-
-            failpoint_support::sleep_millis_async!("sleepy-delete-loop", &cancel);
-        }
-
-        while !waiters.is_empty() {
-            if cancel.is_cancelled() {
-                match self
-                    .node_configure(node_id, None, Some(policy_on_start))
-                    .await
-                {
-                    Ok(()) => return Err(OperationError::Cancelled),
-                    Err(err) => {
-                        return Err(OperationError::FinalizeError(
-                            format!(
-                                "Failed to finalise drain cancel of {} by setting scheduling policy to {}: {}",
-                                node_id, String::from(policy_on_start), err
-                            )
-                            .into(),
-                        ));
-                    }
-                }
-            }
-
-            tracing::info!("Awaiting {} pending delete reconciliations", waiters.len());
-
-            waiters = self
-                .await_waiters_remainder(waiters, SHORT_RECONCILE_TIMEOUT)
-                .await;
-        }
-
-        self.persistence
-            .set_tombstone(node_id)
-            .await
-            .map_err(|e| OperationError::FinalizeError(e.to_string().into()))?;
-
-        {
-            let mut locked = self.inner.write().unwrap();
-            let (nodes, _, scheduler) = locked.parts_mut();
-
-            scheduler.node_remove(node_id);
-
-            let mut nodes_mut = (**nodes).clone();
-            if let Some(mut removed_node) = nodes_mut.remove(&node_id) {
-                // Ensure that any reconciler holding an Arc<> to this node will
-                // drop out when trying to RPC to it (setting Offline state sets the
-                // cancellation token on the Node object).
-                removed_node.set_availability(NodeAvailability::Offline);
-            }
-            *nodes = Arc::new(nodes_mut);
-
-            metrics::METRICS_REGISTRY
-                .metrics_group
-                .storage_controller_pageserver_nodes
-                .set(nodes.len() as i64);
-            metrics::METRICS_REGISTRY
-                .metrics_group
-                .storage_controller_https_pageserver_nodes
-                .set(nodes.values().filter(|n| n.has_https_port()).count() as i64);
-        }
-
-        Ok(())
-    }
-
    pub(crate) async fn node_list(&self) -> Result<Vec<Node>, ApiError> {
        let nodes = {
            self.inner
@@ -7714,7 +7546,7 @@ impl Service {
                let mut tenants_affected: usize = 0;

                for (_tenant_id, mut schedule_context, shards) in
-                    TenantShardExclusiveIterator::new(tenants, ScheduleMode::Normal)
+                    TenantShardContextIterator::new(tenants, ScheduleMode::Normal)
                {
                    for tenant_shard in shards {
                        let tenant_shard_id = tenant_shard.tenant_shard_id;
@@ -7885,142 +7717,6 @@ impl Service {
        self.node_configure(node_id, availability, scheduling).await
    }

-    pub(crate) async fn start_node_delete(
-        self: &Arc<Self>,
-        node_id: NodeId,
-    ) -> Result<(), ApiError> {
-        let (ongoing_op, node_policy, schedulable_nodes_count) = {
-            let locked = self.inner.read().unwrap();
-            let nodes = &locked.nodes;
-            let node = nodes.get(&node_id).ok_or(ApiError::NotFound(
-                anyhow::anyhow!("Node {} not registered", node_id).into(),
-            ))?;
-            let schedulable_nodes_count = nodes
-                .iter()
-                .filter(|(_, n)| matches!(n.may_schedule(), MaySchedule::Yes(_)))
-                .count();
-
-            (
-                locked
-                    .ongoing_operation
-                    .as_ref()
-                    .map(|ongoing| ongoing.operation),
-                node.get_scheduling(),
-                schedulable_nodes_count,
-            )
-        };
-
-        if let Some(ongoing) = ongoing_op {
-            return Err(ApiError::PreconditionFailed(
-                format!("Background operation already ongoing for node: {ongoing}").into(),
-            ));
-        }
-
-        if schedulable_nodes_count == 0 {
-            return Err(ApiError::PreconditionFailed(
-                "No other schedulable nodes to move shards".into(),
-            ));
-        }
-
-        match node_policy {
-            NodeSchedulingPolicy::Active | NodeSchedulingPolicy::Pause => {
-                self.node_configure(node_id, None, Some(NodeSchedulingPolicy::Deleting))
-                    .await?;
-
-                let cancel = self.cancel.child_token();
-                let gate_guard = self.gate.enter().map_err(|_| ApiError::ShuttingDown)?;
-                let policy_on_start = node_policy;
-
-                self.inner.write().unwrap().ongoing_operation = Some(OperationHandler {
-                    operation: Operation::Delete(Delete { node_id }),
-                    cancel: cancel.clone(),
-                });
-
-                let span = tracing::info_span!(parent: None, "delete_node", %node_id);
-
-                tokio::task::spawn(
-                    {
-                        let service = self.clone();
-                        let cancel = cancel.clone();
-                        async move {
-                            let _gate_guard = gate_guard;
-
-                            scopeguard::defer! {
-                                let prev = service.inner.write().unwrap().ongoing_operation.take();
-
-                                if let Some(Operation::Delete(removed_delete)) = prev.map(|h| h.operation) {
-                                    assert_eq!(removed_delete.node_id, node_id, "We always take the same operation");
-                                } else {
-                                    panic!("We always remove the same operation")
-                                }
-                            }
-
-                            tracing::info!("Delete background operation starting");
-                            let res = service
-                                .delete_node(node_id, policy_on_start, cancel)
-                                .await;
-                            match res {
-                                Ok(()) => {
-                                    tracing::info!(
-                                        "Delete background operation completed successfully"
-                                    );
-                                }
-                                Err(OperationError::Cancelled) => {
-                                    tracing::info!("Delete background operation was cancelled");
-                                }
-                                Err(err) => {
-                                    tracing::error!(
-                                        "Delete background operation encountered: {err}"
-                                    )
-                                }
-                            }
-                        }
-                    }
-                    .instrument(span),
-                );
-            }
-            NodeSchedulingPolicy::Deleting => {
-                return Err(ApiError::Conflict(format!(
-                    "Node {node_id} has delete in progress"
-                )));
-            }
-            policy => {
-                return Err(ApiError::PreconditionFailed(
-                    format!("Node {node_id} cannot be deleted due to {policy:?} policy").into(),
-                ));
-            }
-        }
-
-        Ok(())
-    }
-
-    pub(crate) async fn cancel_node_delete(
-        self: &Arc<Self>,
-        node_id: NodeId,
-    ) -> Result<(), ApiError> {
-        {
-            let locked = self.inner.read().unwrap();
-            let nodes = &locked.nodes;
-            nodes.get(&node_id).ok_or(ApiError::NotFound(
-                anyhow::anyhow!("Node {} not registered", node_id).into(),
-            ))?;
-        }
-
-        if let Some(op_handler) = self.inner.read().unwrap().ongoing_operation.as_ref() {
-            if let Operation::Delete(delete) = op_handler.operation {
-                if delete.node_id == node_id {
-                    tracing::info!("Cancelling background delete operation for node {node_id}");
-                    op_handler.cancel.cancel();
-                    return Ok(());
-                }
-            }
-        }
-
-        Err(ApiError::PreconditionFailed(
-            format!("Node {node_id} has no delete in progress").into(),
-        ))
-    }
-
    pub(crate) async fn start_node_drain(
        self: &Arc<Self>,
        node_id: NodeId,
@@ -8597,7 +8293,7 @@ impl Service {
        // to ignore the utilisation component of the score.

        for (_tenant_id, schedule_context, shards) in
-            TenantShardExclusiveIterator::new(tenants, ScheduleMode::Speculative)
+            TenantShardContextIterator::new(tenants, ScheduleMode::Speculative)
        {
            for shard in shards {
                if work.len() >= MAX_OPTIMIZATIONS_PLAN_PER_PASS {
@@ -9324,7 +9020,25 @@ impl Service {

        let mut waiters = Vec::new();

-        let mut tid_iter = create_shared_shard_iterator(self.clone());
+        let mut tid_iter = TenantShardIterator::new({
+            let service = self.clone();
+            move |last_inspected_shard: Option<TenantShardId>| {
+                let locked = &service.inner.read().unwrap();
+                let tenants = &locked.tenants;
+                let entry = match last_inspected_shard {
+                    Some(skip_past) => {
+                        // Skip to the last seen tenant shard id
+                        let mut cursor = tenants.iter().skip_while(|(tid, _)| **tid != skip_past);
+
+                        // Skip past the last seen
+                        cursor.nth(1)
+                    }
+                    None => tenants.first_key_value(),
+                };
+
+                entry.map(|(tid, _)| tid).copied()
+            }
+        });

        while !tid_iter.finished() {
            if cancel.is_cancelled() {
@@ -9344,11 +9058,7 @@ impl Service {
                }
            }

-            operation_utils::validate_node_state(
-                &node_id,
-                self.inner.read().unwrap().nodes.clone(),
-                NodeSchedulingPolicy::Draining,
-            )?;
+            drain_utils::validate_node_state(&node_id, self.inner.read().unwrap().nodes.clone())?;

            while waiters.len() < MAX_RECONCILES_PER_OPERATION {
                let tid = match tid_iter.next() {
@@ -9428,7 +9138,7 @@ impl Service {
            }

            waiters = self
-                .await_waiters_remainder(waiters, WAITER_OPERATION_POLL_TIMEOUT)
+                .await_waiters_remainder(waiters, WAITER_FILL_DRAIN_POLL_TIMEOUT)
                .await;

            failpoint_support::sleep_millis_async!("sleepy-drain-loop", &cancel);
@@ -9722,7 +9432,7 @@ impl Service {
            }

            waiters = self
-                .await_waiters_remainder(waiters, WAITER_OPERATION_POLL_TIMEOUT)
+                .await_waiters_remainder(waiters, WAITER_FILL_DRAIN_POLL_TIMEOUT)
                .await;
        }

--- a/storage_controller/src/service/tenant_shard_iterator.rs
+++ b/storage_controller/src/service/tenant_shard_iterator.rs
@@ -1,5 +1,4 @@
 use std::collections::BTreeMap;
-use std::sync::Arc;

 use utils::id::TenantId;
 use utils::shard::TenantShardId;
@@ -7,21 +6,16 @@ use utils::shard::TenantShardId;
 use crate::scheduler::{ScheduleContext, ScheduleMode};
 use crate::tenant_shard::TenantShard;

-use super::Service;
-
-/// Exclusive iterator over all tenant shards.
-/// It is used to iterate over consistent tenants state at specific point in time.
-///
 /// When making scheduling decisions, it is useful to have the ScheduleContext for a whole
 /// tenant while considering the individual shards within it.  This iterator is a helper
 /// that gathers all the shards in a tenant and then yields them together with a ScheduleContext
 /// for the tenant.
-pub(super) struct TenantShardExclusiveIterator<'a> {
+pub(super) struct TenantShardContextIterator<'a> {
    schedule_mode: ScheduleMode,
    inner: std::collections::btree_map::IterMut<'a, TenantShardId, TenantShard>,
 }

-impl<'a> TenantShardExclusiveIterator<'a> {
+impl<'a> TenantShardContextIterator<'a> {
    pub(super) fn new(
        tenants: &'a mut BTreeMap<TenantShardId, TenantShard>,
        schedule_mode: ScheduleMode,
@@ -33,7 +27,7 @@ impl<'a> TenantShardExclusiveIterator<'a> {
    }
 }

-impl<'a> Iterator for TenantShardExclusiveIterator<'a> {
+impl<'a> Iterator for TenantShardContextIterator<'a> {
    type Item = (TenantId, ScheduleContext, Vec<&'a mut TenantShard>);

    fn next(&mut self) -> Option<Self::Item> {
@@ -58,93 +52,13 @@ impl<'a> Iterator for TenantShardExclusiveIterator<'a> {
    }
 }

-/// Shared iterator over all tenant shards.
-/// It is used to iterate over all tenants without blocking another code, working with tenants
-///
-/// A simple iterator which can be used in tandem with [`crate::service::Service`]
-/// to iterate over all known tenant shard ids without holding the lock on the
-/// service state at all times.
-pub(crate) struct TenantShardSharedIterator<F> {
-    tenants_accessor: F,
-    inspected_all_shards: bool,
-    last_inspected_shard: Option<TenantShardId>,
-}
-
-impl<F> TenantShardSharedIterator<F>
-where
-    F: Fn(Option<TenantShardId>) -> Option<TenantShardId>,
-{
-    pub(crate) fn new(tenants_accessor: F) -> Self {
-        Self {
-            tenants_accessor,
-            inspected_all_shards: false,
-            last_inspected_shard: None,
-        }
-    }
-
-    pub(crate) fn finished(&self) -> bool {
-        self.inspected_all_shards
-    }
-}
-
-impl<F> Iterator for TenantShardSharedIterator<F>
-where
-    F: Fn(Option<TenantShardId>) -> Option<TenantShardId>,
-{
-    // TODO(ephemeralsad): consider adding schedule context to the iterator
-    type Item = TenantShardId;
-
-    /// Returns the next tenant shard id if one exists
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.inspected_all_shards {
-            return None;
-        }
-
-        match (self.tenants_accessor)(self.last_inspected_shard) {
-            Some(tid) => {
-                self.last_inspected_shard = Some(tid);
-                Some(tid)
-            }
-            None => {
-                self.inspected_all_shards = true;
-                None
-            }
-        }
-    }
-}
-
-pub(crate) fn create_shared_shard_iterator(
-    service: Arc<Service>,
-) -> TenantShardSharedIterator<impl Fn(Option<TenantShardId>) -> Option<TenantShardId>> {
-    let tenants_accessor = move |last_inspected_shard: Option<TenantShardId>| {
-        let locked = &service.inner.read().unwrap();
-        let tenants = &locked.tenants;
-        let entry = match last_inspected_shard {
-            Some(skip_past) => {
-                // Skip to the last seen tenant shard id
-                let mut cursor = tenants.iter().skip_while(|(tid, _)| **tid != skip_past);
-
-                // Skip past the last seen
-                cursor.nth(1)
-            }
-            None => tenants.first_key_value(),
-        };
-
-        entry.map(|(tid, _)| tid).copied()
-    };
-
-    TenantShardSharedIterator::new(tenants_accessor)
-}
-
 #[cfg(test)]
 mod tests {
    use std::collections::BTreeMap;
    use std::str::FromStr;
-    use std::sync::Arc;

    use pageserver_api::controller_api::PlacementPolicy;
-    use utils::id::TenantId;
-    use utils::shard::{ShardCount, ShardNumber, TenantShardId};
+    use utils::shard::{ShardCount, ShardNumber};

    use super::*;
    use crate::scheduler::test_utils::make_test_nodes;
@@ -152,7 +66,7 @@ mod tests {
    use crate::tenant_shard::tests::make_test_tenant_with_id;

    #[test]
-    fn test_exclusive_shard_iterator() {
+    fn test_context_iterator() {
        // Hand-crafted tenant IDs to ensure they appear in the expected order when put into
        // a btreemap & iterated
        let mut t_1_shards = make_test_tenant_with_id(
@@ -192,7 +106,7 @@ mod tests {
            shard.schedule(&mut scheduler, &mut context).unwrap();
        }

-        let mut iter = TenantShardExclusiveIterator::new(&mut tenants, ScheduleMode::Speculative);
+        let mut iter = TenantShardContextIterator::new(&mut tenants, ScheduleMode::Speculative);
        let (tenant_id, context, shards) = iter.next().unwrap();
        assert_eq!(tenant_id, t1_id);
        assert_eq!(shards[0].tenant_shard_id.shard_number, ShardNumber(0));
@@ -218,46 +132,4 @@ mod tests {
            shard.intent.clear(&mut scheduler);
        }
    }
-
-    #[test]
-    fn test_shared_shard_iterator() {
-        let tenant_id = TenantId::generate();
-        let shard_count = ShardCount(8);
-
-        let mut tenant_shards = Vec::default();
-        for i in 0..shard_count.0 {
-            tenant_shards.push((
-                TenantShardId {
-                    tenant_id,
-                    shard_number: ShardNumber(i),
-                    shard_count,
-                },
-                (),
-            ))
-        }
-
-        let tenant_shards = Arc::new(tenant_shards);
-
-        let tid_iter = TenantShardSharedIterator::new({
-            let tenants = tenant_shards.clone();
-            move |last_inspected_shard: Option<TenantShardId>| {
-                let entry = match last_inspected_shard {
-                    Some(skip_past) => {
-                        let mut cursor = tenants.iter().skip_while(|(tid, _)| *tid != skip_past);
-                        cursor.nth(1)
-                    }
-                    None => tenants.first(),
-                };
-
-                entry.map(|(tid, _)| tid).copied()
-            }
-        });
-
-        let mut iterated_over = Vec::default();
-        for tid in tid_iter {
-            iterated_over.push((tid, ()));
-        }
-
-        assert_eq!(iterated_over, *tenant_shards);
-    }
 }
--- a/storage_controller/src/service/safekeeper_service.rs
+++ b/storage_controller/src/service/safekeeper_service.rs
@@ -914,13 +914,13 @@ impl Service {
                        // so it isn't counted toward the quorum.
                        if let Some(min_position) = min_position {
                            if let Ok(ok_res) = &res {
-                                if (ok_res.last_log_term, ok_res.flush_lsn) < min_position {
+                                if (ok_res.term, ok_res.flush_lsn) < min_position {
                                    // Use Error::Timeout to make this error retriable.
                                    res = Err(mgmt_api::Error::Timeout(
                                        format!(
                                        "safekeeper {} returned position {:?} which is less than minimum required position {:?}",
                                        client.node_id_label(),
-                                        (ok_res.last_log_term, ok_res.flush_lsn),
+                                        (ok_res.term, ok_res.flush_lsn),
                                        min_position
                                        )
                                    ));
@@ -1216,7 +1216,7 @@ impl Service {

        let mut sync_position = (INITIAL_TERM, Lsn::INVALID);
        for res in results.into_iter().flatten() {
-            let sk_position = (res.last_log_term, res.flush_lsn);
+            let sk_position = (res.term, res.flush_lsn);
            if sync_position < sk_position {
                sync_position = sk_position;
            }
--- a/test_runner/fixtures/endpoint/http.py
+++ b/test_runner/fixtures/endpoint/http.py
@@ -57,8 +57,6 @@ class EndpointHttpClient(requests.Session):
        self.auth = BearerAuth(jwt)

        self.mount("http://", HTTPAdapter())
-        self.prewarm_url = f"http://localhost:{external_port}/lfc/prewarm"
-        self.offload_url = f"http://localhost:{external_port}/lfc/offload"

    def dbs_and_roles(self):
        res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles", auth=self.auth)
@@ -66,39 +64,33 @@ class EndpointHttpClient(requests.Session):
        return res.json()

    def prewarm_lfc_status(self) -> dict[str, str]:
-        res = self.get(self.prewarm_url)
+        res = self.get(f"http://localhost:{self.external_port}/lfc/prewarm")
        res.raise_for_status()
        json: dict[str, str] = res.json()
        return json

    def prewarm_lfc(self, from_endpoint_id: str | None = None):
+        url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
        params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
-        self.post(self.prewarm_url, params=params).raise_for_status()
-        self.prewarm_lfc_wait()
+        self.post(url, params=params).raise_for_status()

-    def prewarm_lfc_wait(self):
        def prewarmed():
            json = self.prewarm_lfc_status()
            status, err = json["status"], json.get("error")
-            assert status == "completed", f"{status}, {err=}"
+            assert status == "completed", f"{status}, error {err}"

        wait_until(prewarmed, timeout=60)

-    def offload_lfc_status(self) -> dict[str, str]:
-        res = self.get(self.offload_url)
-        res.raise_for_status()
-        json: dict[str, str] = res.json()
-        return json
-
    def offload_lfc(self):
-        self.post(self.offload_url).raise_for_status()
-        self.offload_lfc_wait()
+        url = f"http://localhost:{self.external_port}/lfc/offload"
+        self.post(url).raise_for_status()

-    def offload_lfc_wait(self):
        def offloaded():
-            json = self.offload_lfc_status()
+            res = self.get(url)
+            res.raise_for_status()
+            json = res.json()
            status, err = json["status"], json.get("error")
-            assert status == "completed", f"{status}, {err=}"
+            assert status == "completed", f"{status}, error {err}"

        wait_until(offloaded)

--- a/test_runner/fixtures/neon_cli.py
+++ b/test_runner/fixtures/neon_cli.py
@@ -568,8 +568,6 @@ class NeonLocalCli(AbstractNeonCli):
        timeout: str | None = None,
        env: dict[str, str] | None = None,
        dev: bool = False,
-        autoprewarm: bool = False,
-        offload_lfc_interval_seconds: int | None = None,
    ) -> subprocess.CompletedProcess[str]:
        args = [
            "endpoint",
@@ -595,10 +593,6 @@ class NeonLocalCli(AbstractNeonCli):
            args.extend(["--create-test-user"])
        if timeout is not None:
            args.extend(["--start-timeout", str(timeout)])
-        if autoprewarm:
-            args.extend(["--autoprewarm"])
-        if offload_lfc_interval_seconds is not None:
-            args.extend(["--offload-lfc-interval-seconds", str(offload_lfc_interval_seconds)])
        if dev:
            args.extend(["--dev"])

--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1867,7 +1867,6 @@ class PageserverSchedulingPolicy(StrEnum):
    FILLING = "Filling"
    PAUSE = "Pause"
    PAUSE_FOR_RESTART = "PauseForRestart"
-    DELETING = "Deleting"


 class StorageControllerLeadershipStatus(StrEnum):
@@ -2076,27 +2075,11 @@ class NeonStorageController(MetricsGetter, LogUtils):
            headers=self.headers(TokenScope.ADMIN),
        )

-    def node_delete_old(self, node_id):
-        log.info(f"node_delete_old({node_id})")
-        self.request(
-            "DELETE",
-            f"{self.api}/control/v1/node/{node_id}",
-            headers=self.headers(TokenScope.ADMIN),
-        )
-
    def node_delete(self, node_id):
        log.info(f"node_delete({node_id})")
-        self.request(
-            "PUT",
-            f"{self.api}/control/v1/node/{node_id}/delete",
-            headers=self.headers(TokenScope.ADMIN),
-        )
-
-    def cancel_node_delete(self, node_id):
-        log.info(f"cancel_node_delete({node_id})")
        self.request(
            "DELETE",
-            f"{self.api}/control/v1/node/{node_id}/delete",
+            f"{self.api}/control/v1/node/{node_id}",
            headers=self.headers(TokenScope.ADMIN),
        )

@@ -4362,8 +4345,6 @@ class Endpoint(PgProtocol, LogUtils):
        basebackup_request_tries: int | None = None,
        timeout: str | None = None,
        env: dict[str, str] | None = None,
-        autoprewarm: bool = False,
-        offload_lfc_interval_seconds: int | None = None,
    ) -> Self:
        """
        Start the Postgres instance.
@@ -4388,8 +4369,6 @@ class Endpoint(PgProtocol, LogUtils):
            basebackup_request_tries=basebackup_request_tries,
            timeout=timeout,
            env=env,
-            autoprewarm=autoprewarm,
-            offload_lfc_interval_seconds=offload_lfc_interval_seconds,
        )
        self._running.release(1)
        self.log_config_value("shared_buffers")
@@ -4605,8 +4584,6 @@ class Endpoint(PgProtocol, LogUtils):
        pageserver_id: int | None = None,
        allow_multiple: bool = False,
        basebackup_request_tries: int | None = None,
-        autoprewarm: bool = False,
-        offload_lfc_interval_seconds: int | None = None,
    ) -> Self:
        """
        Create an endpoint, apply config, and start Postgres.
@@ -4627,8 +4604,6 @@ class Endpoint(PgProtocol, LogUtils):
            pageserver_id=pageserver_id,
            allow_multiple=allow_multiple,
            basebackup_request_tries=basebackup_request_tries,
-            autoprewarm=autoprewarm,
-            offload_lfc_interval_seconds=offload_lfc_interval_seconds,
        )

        return self
@@ -4713,8 +4688,6 @@ class EndpointFactory:
        remote_ext_base_url: str | None = None,
        pageserver_id: int | None = None,
        basebackup_request_tries: int | None = None,
-        autoprewarm: bool = False,
-        offload_lfc_interval_seconds: int | None = None,
    ) -> Endpoint:
        ep = Endpoint(
            self.env,
@@ -4736,8 +4709,6 @@ class EndpointFactory:
            remote_ext_base_url=remote_ext_base_url,
            pageserver_id=pageserver_id,
            basebackup_request_tries=basebackup_request_tries,
-            autoprewarm=autoprewarm,
-            offload_lfc_interval_seconds=offload_lfc_interval_seconds,
        )

    def create(
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -111,7 +111,6 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
    ".*stalling layer flushes for compaction backpressure.*",
    ".*layer roll waiting for flush due to compaction backpressure.*",
    ".*BatchSpanProcessor.*",
-    ".*No broker updates received for a while.*",
    *(
        [
            r".*your platform is not a supported production platform, ignoing request for O_DIRECT; this could hide alignment bugs.*"
--- a/test_runner/fixtures/safekeeper/http.py
+++ b/test_runner/fixtures/safekeeper/http.py
@@ -112,18 +112,12 @@ class TimelineCreateRequest:
 class TimelineMembershipSwitchResponse:
    previous_conf: MembershipConfiguration
    current_conf: MembershipConfiguration
-    last_log_term: int
-    flush_lsn: Lsn

    @classmethod
    def from_json(cls, d: dict[str, Any]) -> TimelineMembershipSwitchResponse:
        previous_conf = MembershipConfiguration.from_json(d["previous_conf"])
        current_conf = MembershipConfiguration.from_json(d["current_conf"])
-        last_log_term = d["last_log_term"]
-        flush_lsn = Lsn(d["flush_lsn"])
-        return TimelineMembershipSwitchResponse(
-            previous_conf, current_conf, last_log_term, flush_lsn
-        )
+        return TimelineMembershipSwitchResponse(previous_conf, current_conf)


 class SafekeeperHttpClient(requests.Session, MetricsGetter):
--- a/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
+++ b/test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py
@@ -55,10 +55,9 @@ def test_pageserver_characterize_throughput_with_n_tenants(
@pytest.mark.parametrize("duration", [20 * 60])
@pytest.mark.parametrize("pgbench_scale", [get_scale_for_db(2048)])
 # we use 1 client to characterize latencies, and 64 clients to characterize throughput/scalability
-# we use 8 clients because we see a latency knee around 6-8 clients on im4gn.2xlarge instance type,
-# which we use for this periodic test - at a cpu utilization of around 70 % - which is considered
-# a good utilization for pageserver.
-@pytest.mark.parametrize("n_clients", [1, 8])
+# we use 64 clients because typically for a high number of connections we recommend the connection pooler
+# which by default uses 64 connections
+@pytest.mark.parametrize("n_clients", [1, 64])
@pytest.mark.parametrize("n_tenants", [1])
@pytest.mark.timeout(2400)
 def test_pageserver_characterize_latencies_with_1_client_and_throughput_with_many_clients_one_tenant(
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -416,8 +416,6 @@ def test_duplicate_creation(neon_env_builder: NeonEnvBuilder):
        # timeline creation (uploads). mask it out here to avoid flakyness.
        del success_result["remote_consistent_lsn_visible"]
        del repeat_result["remote_consistent_lsn_visible"]
-        del success_result["walreceiver_status"]
-        del repeat_result["walreceiver_status"]
        assert repeat_result == success_result
    finally:
        env.pageserver.stop(immediate=True)
--- a/test_runner/regress/test_lfc_prewarm.py
+++ b/test_runner/regress/test_lfc_prewarm.py
@@ -1,38 +1,34 @@
 import random
 import threading
-from enum import StrEnum
-from time import sleep
-from typing import Any
+import time
+from enum import Enum

 import pytest
 from fixtures.endpoint.http import EndpointHttpClient
 from fixtures.log_helper import log
 from fixtures.neon_fixtures import NeonEnv
-from fixtures.utils import USE_LFC, wait_until
+from fixtures.utils import USE_LFC
 from prometheus_client.parser import text_string_to_metric_families as prom_parse_impl
-from psycopg2.extensions import cursor as Cursor


-class PrewarmMethod(StrEnum):
-    POSTGRES = "postgres"
-    COMPUTE_CTL = "compute-ctl"
-    AUTOPREWARM = "autoprewarm"
+class LfcQueryMethod(Enum):
+    COMPUTE_CTL = False
+    POSTGRES = True


-PREWARM_LABEL = "compute_ctl_lfc_prewarms_total"
-OFFLOAD_LABEL = "compute_ctl_lfc_offloads_total"
-METHOD_VALUES = [e for e in PrewarmMethod]
-METHOD_IDS = [e.value for e in PrewarmMethod]
+PREWARM_LABEL = "compute_ctl_lfc_prewarm_requests_total"
+OFFLOAD_LABEL = "compute_ctl_lfc_offload_requests_total"
+QUERY_OPTIONS = LfcQueryMethod.POSTGRES, LfcQueryMethod.COMPUTE_CTL


-def check_pinned_entries(cur: Cursor):
+def check_pinned_entries(cur):
    # some LFC buffer can be temporary locked by autovacuum or background writer
    for _ in range(10):
        cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_chunks_pinned'")
        n_pinned = cur.fetchall()[0][0]
        if n_pinned == 0:
            break
-        sleep(1)
+        time.sleep(1)
    assert n_pinned == 0


@@ -45,68 +41,21 @@ def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
    }


-def offload_lfc(method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor) -> Any:
-    if method == PrewarmMethod.AUTOPREWARM:
-        client.offload_lfc_wait()
-    elif method == PrewarmMethod.COMPUTE_CTL:
-        status = client.prewarm_lfc_status()
-        assert status["status"] == "not_prewarmed"
-        assert "error" not in status
-        client.offload_lfc()
-        assert client.prewarm_lfc_status()["status"] == "not_prewarmed"
-        assert prom_parse(client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0}
-    elif method == PrewarmMethod.POSTGRES:
-        cur.execute("select get_local_cache_state()")
-        return cur.fetchall()[0][0]
-    else:
-        raise AssertionError(f"{method} not in PrewarmMethod")
-
-
-def prewarm_endpoint(
-    method: PrewarmMethod, client: EndpointHttpClient, cur: Cursor, lfc_state: str | None
-):
-    if method == PrewarmMethod.AUTOPREWARM:
-        client.prewarm_lfc_wait()
-    elif method == PrewarmMethod.COMPUTE_CTL:
-        client.prewarm_lfc()
-    elif method == PrewarmMethod.POSTGRES:
-        cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
-
-
-def check_prewarmed(
-    method: PrewarmMethod, client: EndpointHttpClient, desired_status: dict[str, str | int]
-):
-    if method == PrewarmMethod.AUTOPREWARM:
-        assert client.prewarm_lfc_status() == desired_status
-        assert prom_parse(client)[PREWARM_LABEL] == 1
-    elif method == PrewarmMethod.COMPUTE_CTL:
-        assert client.prewarm_lfc_status() == desired_status
-        assert prom_parse(client) == {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1}
-
-
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
-@pytest.mark.parametrize("method", METHOD_VALUES, ids=METHOD_IDS)
-def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
+@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"])
+def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
    env = neon_simple_env
    n_records = 1000000
-    cfg = [
-        "autovacuum = off",
-        "shared_buffers=1MB",
-        "neon.max_file_cache_size=1GB",
-        "neon.file_cache_size_limit=1GB",
-        "neon.file_cache_prewarm_limit=1000",
-    ]
-    offload_secs = 2
-
-    if method == PrewarmMethod.AUTOPREWARM:
-        endpoint = env.endpoints.create_start(
-            branch_name="main",
-            config_lines=cfg,
-            autoprewarm=True,
-            offload_lfc_interval_seconds=offload_secs,
-        )
-    else:
-        endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
+    endpoint = env.endpoints.create_start(
+        branch_name="main",
+        config_lines=[
+            "autovacuum = off",
+            "shared_buffers=1MB",
+            "neon.max_file_cache_size=1GB",
+            "neon.file_cache_size_limit=1GB",
+            "neon.file_cache_prewarm_limit=1000",
+        ],
+    )

    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()
@@ -120,21 +69,31 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
    lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
    log.info(f"Inserted {n_records} rows")

-    client = endpoint.http_client()
-    lfc_state = offload_lfc(method, client, pg_cur)
+    http_client = endpoint.http_client()
+    if query is LfcQueryMethod.COMPUTE_CTL:
+        status = http_client.prewarm_lfc_status()
+        assert status["status"] == "not_prewarmed"
+        assert "error" not in status
+        http_client.offload_lfc()
+        assert http_client.prewarm_lfc_status()["status"] == "not_prewarmed"
+        assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0}
+    else:
+        pg_cur.execute("select get_local_cache_state()")
+        lfc_state = pg_cur.fetchall()[0][0]

    endpoint.stop()
-    if method == PrewarmMethod.AUTOPREWARM:
-        endpoint.start(autoprewarm=True, offload_lfc_interval_seconds=offload_secs)
-    else:
-        endpoint.start()
+    endpoint.start()

    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()

    lfc_conn = endpoint.connect(dbname="lfc")
    lfc_cur = lfc_conn.cursor()
-    prewarm_endpoint(method, client, pg_cur, lfc_state)
+
+    if query is LfcQueryMethod.COMPUTE_CTL:
+        http_client.prewarm_lfc()
+    else:
+        pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))

    pg_cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'")
    lfc_used_pages = pg_cur.fetchall()[0][0]
@@ -152,32 +111,33 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv, method: PrewarmMethod):
        and prewarm_info[1] > 0
        and prewarm_info[0] == prewarm_info[1] + prewarm_info[2]
    )
+
    lfc_cur.execute("select sum(pk) from t")
    assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2

    check_pinned_entries(pg_cur)
+
    desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
-    check_prewarmed(method, client, desired)
-
-
-# autoprewarm isn't needed as we prewarm manually
-WORKLOAD_VALUES = METHOD_VALUES[:-1]
-WORKLOAD_IDS = METHOD_IDS[:-1]
+    if query is LfcQueryMethod.COMPUTE_CTL:
+        assert http_client.prewarm_lfc_status() == desired
+        assert prom_parse(http_client) == {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1}


@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
-@pytest.mark.parametrize("method", WORKLOAD_VALUES, ids=WORKLOAD_IDS)
-def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMethod):
+@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"])
+def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMethod):
    env = neon_simple_env
    n_records = 10000
    n_threads = 4
-    cfg = [
-        "shared_buffers=1MB",
-        "neon.max_file_cache_size=1GB",
-        "neon.file_cache_size_limit=1GB",
-        "neon.file_cache_prewarm_limit=1000000",
-    ]
-    endpoint = env.endpoints.create_start(branch_name="main", config_lines=cfg)
+    endpoint = env.endpoints.create_start(
+        branch_name="main",
+        config_lines=[
+            "shared_buffers=1MB",
+            "neon.max_file_cache_size=1GB",
+            "neon.file_cache_size_limit=1GB",
+            "neon.file_cache_prewarm_limit=1000000",
+        ],
+    )

    pg_conn = endpoint.connect()
    pg_cur = pg_conn.cursor()
@@ -194,7 +154,12 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
    log.info(f"Inserted {n_records} rows")

    http_client = endpoint.http_client()
-    lfc_state = offload_lfc(method, http_client, pg_cur)
+    if query is LfcQueryMethod.COMPUTE_CTL:
+        http_client.offload_lfc()
+    else:
+        pg_cur.execute("select get_local_cache_state()")
+        lfc_state = pg_cur.fetchall()[0][0]
+
    running = True
    n_prewarms = 0

@@ -205,8 +170,8 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
        while running:
            src = random.randint(1, n_records)
            dst = random.randint(1, n_records)
-            lfc_cur.execute(f"update accounts set balance=balance-100 where id={src}")
-            lfc_cur.execute(f"update accounts set balance=balance+100 where id={dst}")
+            lfc_cur.execute("update accounts set balance=balance-100 where id=%s", (src,))
+            lfc_cur.execute("update accounts set balance=balance+100 where id=%s", (dst,))
            n_transfers += 1
        log.info(f"Number of transfers: {n_transfers}")

@@ -218,7 +183,13 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
            pg_cur.execute("select pg_reload_conf()")
            pg_cur.execute("alter system set neon.file_cache_size_limit='1GB'")
            pg_cur.execute("select pg_reload_conf()")
-            prewarm_endpoint(method, http_client, pg_cur, lfc_state)
+
+            if query is LfcQueryMethod.COMPUTE_CTL:
+                # Same thing as prewarm_lfc(), testing other method
+                http_client.prewarm_lfc(endpoint.endpoint_id)
+            else:
+                pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
+
            nonlocal n_prewarms
            n_prewarms += 1
        log.info(f"Number of prewarms: {n_prewarms}")
@@ -232,10 +203,7 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
    prewarm_thread = threading.Thread(target=prewarm)
    prewarm_thread.start()

-    def prewarmed():
-        assert n_prewarms > 5
-
-    wait_until(prewarmed)
+    time.sleep(20)

    running = False
    for t in workload_threads:
@@ -247,5 +215,5 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, method: PrewarmMet
    assert total_balance == 0

    check_pinned_entries(pg_cur)
-    if method != PrewarmMethod.POSTGRES:
+    if query is LfcQueryMethod.COMPUTE_CTL:
        assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: n_prewarms}
--- a/test_runner/regress/test_storage_controller.py
+++ b/test_runner/regress/test_storage_controller.py
@@ -2618,7 +2618,7 @@ def test_storage_controller_node_deletion(
        wait_until(assert_shards_migrated)

    log.info(f"Deleting pageserver {victim.id}")
-    env.storage_controller.node_delete_old(victim.id)
+    env.storage_controller.node_delete(victim.id)

    if not while_offline:

@@ -2653,60 +2653,6 @@ def test_storage_controller_node_deletion(
    env.storage_controller.consistency_check()


-def test_storage_controller_node_delete_cancellation(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.num_pageservers = 3
-    neon_env_builder.num_azs = 3
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    tenant_count = 12
-    shard_count_per_tenant = 16
-    tenant_ids = []
-
-    for _ in range(0, tenant_count):
-        tid = TenantId.generate()
-        tenant_ids.append(tid)
-        env.create_tenant(
-            tid, placement_policy='{"Attached":1}', shard_count=shard_count_per_tenant
-        )
-
-    # Sanity check: initial creations should not leave the system in an unstable scheduling state
-    assert env.storage_controller.reconcile_all() == 0
-
-    nodes = env.storage_controller.node_list()
-    assert len(nodes) == 3
-
-    env.storage_controller.configure_failpoints(("sleepy-delete-loop", "return(10000)"))
-
-    ps_id_to_delete = env.pageservers[0].id
-
-    env.storage_controller.warm_up_all_secondaries()
-    env.storage_controller.retryable_node_operation(
-        lambda ps_id: env.storage_controller.node_delete(ps_id),
-        ps_id_to_delete,
-        max_attempts=3,
-        backoff=2,
-    )
-
-    env.storage_controller.poll_node_status(
-        ps_id_to_delete,
-        PageserverAvailability.ACTIVE,
-        PageserverSchedulingPolicy.DELETING,
-        max_attempts=6,
-        backoff=2,
-    )
-
-    env.storage_controller.cancel_node_delete(ps_id_to_delete)
-
-    env.storage_controller.poll_node_status(
-        ps_id_to_delete,
-        PageserverAvailability.ACTIVE,
-        PageserverSchedulingPolicy.ACTIVE,
-        max_attempts=6,
-        backoff=2,
-    )
-
-
@pytest.mark.parametrize("shard_count", [None, 2])
 def test_storage_controller_metadata_health(
    neon_env_builder: NeonEnvBuilder,
@@ -3262,7 +3208,7 @@ def test_ps_unavailable_after_delete(neon_env_builder: NeonEnvBuilder):
    assert_nodes_count(3)

    ps = env.pageservers[0]
-    env.storage_controller.node_delete_old(ps.id)
+    env.storage_controller.node_delete(ps.id)

    # After deletion, the node count must be reduced
    assert_nodes_count(2)
--- a/test_runner/regress/test_wal_receiver.py
+++ b/test_runner/regress/test_wal_receiver.py
@@ -13,6 +13,50 @@ if TYPE_CHECKING:
    from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder


+# Checks that pageserver's walreceiver state is printed in the logs during WAL wait timeout.
+# Ensures that walreceiver does not run without any data inserted and only starts after the insertion.
+def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
+    # we assert below that the walreceiver is not active before data writes.
+    # with manually created timelines, it is active.
+    # FIXME: remove this test once we remove timelines_onto_safekeepers
+    neon_env_builder.storage_controller_config = {
+        "timelines_onto_safekeepers": False,
+    }
+
+    # Trigger WAL wait timeout faster
+    neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
+    env = neon_env_builder.init_start()
+    env.pageserver.http_client()
+
+    # In this test we force 'Timed out while waiting for WAL record error' while
+    # fetching basebackup and don't want any retries.
+    os.environ["NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES"] = "1"
+
+    tenant_id, timeline_id = env.create_tenant()
+    expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"
+    env.pageserver.allowed_errors.append(f".*{expected_timeout_error}.*")
+
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+        assert "WalReceiver status: Not active" in exception_string, (
+            "Walreceiver should not be active before any data writes"
+        )
+
+    insert_test_elements(env, tenant_id, start=0, count=1_000)
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+        assert "WalReceiver status: Not active" not in exception_string, (
+            "Should not be inactive anymore after INSERTs are made"
+        )
+        assert "WalReceiver status" in exception_string, "But still should have some other status"
+
+
 # Checks that all active safekeepers are shown in pageserver's walreceiver state printed on WAL wait timeout.
 # Kills one of the safekeepers and ensures that only the active ones are printed in the state.
 def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuilder):
--- a/test_runner/sql_regress/expected/neon-subxacts.out
+++ b/test_runner/sql_regress/expected/neon-subxacts.out
@@ -1,21 +0,0 @@
-DO $$
-DECLARE
-i numeric;
-BEGIN
-  create role somebody;
-  FOR i IN 1..1000000 LOOP
-    BEGIN
-	  IF i % 1000 = 0 THEN
-	    alter role somebody password 'welcome';
-	  ELSE
-        PERFORM 1;
-	  END IF;
-    EXCEPTION WHEN OTHERS THEN
-      RAISE WARNING 'error';
-    END;
-    IF I = 1000000 THEN
-      PERFORM pg_log_backend_memory_contexts(pg_backend_pid());
-    END IF;
-  END LOOP;
-END;
-$$;
--- a/test_runner/sql_regress/parallel_schedule
+++ b/test_runner/sql_regress/parallel_schedule
@@ -10,4 +10,3 @@ test: neon-clog
 test: neon-test-utils
 test: neon-vacuum-full
 test: neon-event-triggers
-test: neon-subxacts
--- a/test_runner/sql_regress/sql/neon-subxacts.sql
+++ b/test_runner/sql_regress/sql/neon-subxacts.sql
@@ -1,21 +0,0 @@
-DO $$
-DECLARE
-i numeric;
-BEGIN
-  create role somebody;
-  FOR i IN 1..1000000 LOOP
-    BEGIN
-	  IF i % 1000 = 0 THEN
-	    alter role somebody password 'welcome';
-	  ELSE
-        PERFORM 1;
-	  END IF;
-    EXCEPTION WHEN OTHERS THEN
-      RAISE WARNING 'error';
-    END;
-    IF I = 1000000 THEN
-      PERFORM pg_log_backend_memory_contexts(pg_backend_pid());
-    END IF;
-  END LOOP;
-END;
-$$;
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -40,10 +40,8 @@ env_logger = { version = "0.11" }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 form_urlencoded = { version = "1" }
 futures-channel = { version = "0.3", features = ["sink"] }
-futures-core = { version = "0.3" }
 futures-executor = { version = "0.3" }
 futures-io = { version = "0.3" }
-futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
 generic-array = { version = "0.14", default-features = false, features = ["more_lengths", "zeroize"] }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
@@ -70,7 +68,6 @@ num-integer = { version = "0.1", features = ["i128"] }
 num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
-once_cell = { version = "1" }
 p256 = { version = "0.13", features = ["jwk"] }
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] }
@@ -115,13 +112,10 @@ zstd-sys = { version = "2", default-features = false, features = ["legacy", "std

 [build-dependencies]
 ahash = { version = "0.8" }
-anstream = { version = "0.6" }
 anyhow = { version = "1", features = ["backtrace"] }
 bytes = { version = "1", features = ["serde"] }
 cc = { version = "1", default-features = false, features = ["parallel"] }
 chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] }
-clap = { version = "4", features = ["derive", "env", "string"] }
-clap_builder = { version = "4", default-features = false, features = ["color", "env", "help", "std", "string", "suggestions", "usage"] }
 either = { version = "1" }
 getrandom = { version = "0.2", default-features = false, features = ["std"] }
 half = { version = "2", default-features = false, features = ["num-traits"] }
@@ -139,7 +133,6 @@ num-integer = { version = "0.1", features = ["i128"] }
 num-iter = { version = "0.1", default-features = false, features = ["i128", "std"] }
 num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] }
 num-traits = { version = "0.2", features = ["i128", "libm"] }
-once_cell = { version = "1" }
 parquet = { version = "53", default-features = false, features = ["zstd"] }
 prettyplease = { version = "0.2", default-features = false, features = ["verbatim"] }
 proc-macro2 = { version = "1" }
@@ -149,7 +142,6 @@ regex = { version = "1" }
 regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
 regex-syntax = { version = "0.8" }
 serde = { version = "1", features = ["alloc", "derive"] }
-serde_json = { version = "1", features = ["alloc", "raw_value"] }
 syn = { version = "2", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] }
 time-macros = { version = "0.2", default-features = false, features = ["formatting", "parsing", "serde"] }
 toml_edit = { version = "0.22", features = ["serde"] }
Author	SHA1	Message	Date
Ruslan Talpa	ee7d8e4512	revert pg-14 submodule changes	2025-07-04 13:55:35 +03:00
Ruslan Talpa	6549708b44	change subzero dep sha	2025-07-04 13:39:49 +03:00
Ruslan Talpa	45631bf2e5	add line to remove from diff	2025-07-04 13:28:52 +03:00
Ruslan Talpa	5dbca8c756	revert changes from original hack branch	2025-07-04 13:27:59 +03:00
Ruslan Talpa	9f46ca5eb1	Merge branch 'main' into ruslan/subzero-integration	2025-07-04 13:03:55 +03:00
Ruslan Talpa	54da030a2d	place the entire rest_broker code under a feature flag	2025-07-04 12:46:48 +03:00
Ruslan Talpa	afa4e48071	put subzero dependency under a feature flag	2025-07-04 11:27:36 +03:00
Ruslan Talpa	b54872a4dc	fix error after merging latest master	2025-07-03 17:24:13 +03:00
Ruslan Talpa	486829f875	Merge branch 'main' into ruslan/subzero-integration	2025-07-03 17:10:43 +03:00
Ruslan Talpa	4775aa3e01	Merge branch 'main' into ruslan/subzero-integration	2025-07-01 13:46:57 +03:00
Ruslan Talpa	1785f856b6	Move the local auth backend under the "testing" feature	2025-06-30 16:52:45 +03:00
Ruslan Talpa	69b22b05da	add in readme the way to run auth/rest broker locally	2025-06-30 16:17:35 +03:00
Ruslan Talpa	bf0007fa96	add note about local confir read code	2025-06-30 16:12:04 +03:00
Ruslan Talpa	a9bbe7b00b	remove unused imports	2025-06-30 16:02:30 +03:00
Ruslan Talpa	7e3f64b309	implement local auth backend for proxy and remove control plane hacks	2025-06-30 16:00:43 +03:00
Ruslan Talpa	9480d17de7	fix bug in pickcurrent_chema	2025-06-30 12:53:47 +03:00
Ruslan Talpa	424004ec95	apply cargo fmt	2025-06-30 12:32:47 +03:00
Ruslan Talpa	88d1a78260	cleanup the rest path code	2025-06-30 12:30:33 +03:00
Ruslan Talpa	8e544c7f99	import introspection queries instead of loading from files	2025-06-27 14:40:02 +03:00
Ruslan Talpa	4f49fc5b79	move common error types and http realted functions to error.rs and http_util.rs	2025-06-27 13:37:29 +03:00
Ruslan Talpa	5461039c3f	implement remote config fetch from the db and cache introspected schema	2025-06-27 10:20:26 +03:00
Ruslan Talpa	d6c36d103e	subzero integration WIP6 beginning work on introspection of config and schema shape from the database	2025-06-26 14:42:31 +03:00
Ruslan Talpa	fbb2416685	pg 14 vendor commit changed	2025-06-26 10:25:54 +03:00
Ruslan Talpa	8072fae2fe	Merge branch 'main' into ruslan/subzero-integration	2025-06-26 10:19:16 +03:00
Ruslan Talpa	3869d680f9	use a global parsed/cached schema	2025-06-26 10:14:28 +03:00
Ruslan Talpa	d3fa228d92	move subzero local test files to a "dot" folder	2025-06-25 16:43:50 +03:00
Ruslan Talpa	be6a259b85	subzero integration WIP5 cleanup and postprocess the response and set the correct headers/status and handle errors	2025-06-25 14:33:45 +03:00
Ruslan Talpa	af3ca24a5e	remove unused enum values	2025-06-25 14:32:46 +03:00
Ruslan Talpa	8b44f5b479	subzero integration WIP5 extract the response body from the local proxy response	2025-06-24 17:03:42 +03:00
Ruslan Talpa	d1445cf3eb	subzero integration WIP4 queries generated by subzero reach database and execute succesfully	2025-06-24 15:33:51 +03:00
Ruslan Talpa	67d3026fc4	subzero integration WIP3 * query makes it to the database	2025-06-23 11:48:55 +03:00
Ruslan Talpa	09e62e9b98	subzero integration WIP2	2025-06-23 10:11:06 +03:00
Ruslan Talpa	e121da4bfc	subzero integration WIP1	2025-06-20 15:10:45 +03:00
Ruslan Talpa	4a948c9781	add note about ICU lib missing on macs and the fix	2025-06-20 10:58:38 +03:00
Ruslan Talpa	b39f04ab99	add missing parts to make disable_pg_session_jwt flag work	2025-06-20 10:23:42 +03:00
Ruslan Talpa	6bd15908fb	make pg_session_jwt instalation optional with a cli flag	2025-06-20 10:17:32 +03:00
Ruslan Talpa	3e36d516c2	vanilla pg dokcer image setup	2025-06-20 09:37:39 +03:00
Conrad Ludgate	cc3af6f7dd	code for local setup of auth-broker	2025-06-20 09:37:39 +03:00
Conrad Ludgate	5badc7a3fb	code for local setup of auth-broker	2025-06-19 10:34:09 +01:00
Conrad Ludgate	3a73644308	use cargo-chef for compute-tools	2025-06-19 09:24:53 +01:00