Add safekeeper option to patch control file.

https://github.com/neondatabase/neon/issues/6397
2026-03-05 17:30:38 +00:00 · 2024-01-21 00:22:30 +03:00
161 changed files with 2218 additions and 6859 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -1,2 +1,2 @@
 [profile.default]
-slow-timeout = { period = "20s", terminate-after = 3 }
+slow-timeout = "1m"
--- a/.github/workflows/build_and_push_docker_image.yml
+++ b/.github/workflows/build_and_push_docker_image.yml
@@ -69,15 +69,7 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build
-        run: |
-          /kaniko/executor \
-            --reproducible \
-            --snapshotMode=redo \
-            --skip-unused-stages \
-            --dockerfile ${{ inputs.dockerfile-path }} \
-            --cache=true \
-            --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
-            --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64
+        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --dockerfile ${{ inputs.dockerfile-path }} --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64

  kaniko-arm:
    if: needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed == 'true'
@@ -93,15 +85,7 @@ jobs:
        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json

      - name: Kaniko build
-        run: |
-          /kaniko/executor \
-            --reproducible \
-            --snapshotMode=redo \
-            --skip-unused-stages \
-            --dockerfile ${{ inputs.dockerfile-path }} \
-            --cache=true \
-            --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
-            --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64
+        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --dockerfile ${{ inputs.dockerfile-path }} --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64

  manifest:
    if: needs.check-if-build-tools-dockerfile-changed.outputs.docker_file_changed == 'true'
@@ -115,10 +99,7 @@ jobs:

    steps:
      - name: Create manifest
-        run: |
-          docker manifest create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }} \
-                         --amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64 \
-                         --amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64
+        run: docker manifest create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }} --amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-amd64 --amend 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}-arm64

      - name: Push manifest
        run: docker manifest push 369495373322.dkr.ecr.eu-central-1.amazonaws.com/${{ inputs.image-name }}:${{ needs.tag.outputs.build-tools-tag }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -21,8 +21,6 @@ env:
  COPT: '-Werror'
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
-  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}

 jobs:
  check-permissions:
@@ -46,20 +44,6 @@ jobs:

        exit 1

-  cancel-previous-e2e-tests:
-    needs: [ check-permissions ]
-    if: github.event_name == 'pull_request'
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Cancel previous e2e-tests runs for this PR
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          gh workflow --repo neondatabase/cloud \
-            run cancel-previous-in-concurrency-group.yml \
-              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
-
  tag:
    needs: [ check-permissions ]
    runs-on: [ self-hosted, gen3, small ]
@@ -202,11 +186,7 @@ jobs:
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
-      # Raise locked memory limit for tokio-epoll-uring.
-      # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
-      # io_uring will account the memory of the CQ and SQ as locked.
-      # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
+      options: --init
    strategy:
      fail-fast: false
      matrix:
@@ -360,12 +340,8 @@ jobs:
          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests

      - name: Run rust tests
-        env:
-          NEXTEST_RETRIES: 3
        run: |
-          for io_engine in std-fs tokio-epoll-uring ; do
-            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
-          done
+          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES

          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
@@ -443,8 +419,8 @@ jobs:
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
-      # for changed limits, see comments on `options:` earlier in this file
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    strategy:
      fail-fast: false
      matrix:
@@ -472,7 +448,6 @@ jobs:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ needs.tag.outputs.build-tag }}
-          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: std-fs

      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
@@ -483,13 +458,12 @@ jobs:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${{ needs.build-buildtools-image.outputs.build-tools-tag }}
-      # for changed limits, see comments on `options:` earlier in this file
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
      fail-fast: false
      matrix:
-        # the amount of groups (N) should be reflected in `extra_params: --splits N ...`
        pytest_split_group: [ 1, 2, 3, 4 ]
        build_type: [ release ]
    steps:
@@ -503,12 +477,11 @@ jobs:
          test_selection: performance
          run_in_parallel: false
          save_perf_report: ${{ github.ref_name == 'main' }}
-          extra_params: --splits 4 --group ${{ matrix.pytest_split_group }}
+          extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}"
-          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

@@ -722,8 +695,7 @@ jobs:
                \"commit_hash\": \"$COMMIT_SHA\",
                \"remote_repo\": \"${{ github.repository }}\",
                \"storage_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
-                \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
-                \"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
+                \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\"
              }
            }"

--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -124,12 +124,12 @@ jobs:
      # Hence keeping target/ (and general cache size) smaller
      BUILD_TYPE: release
      CARGO_FEATURES: --features testing
-      CARGO_FLAGS: --release
+      CARGO_FLAGS: --locked --release
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}

    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

    steps:
@@ -210,20 +210,18 @@ jobs:

      - name: Run cargo build
        run: |
-          mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
+          mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests

      - name: Run cargo test
-        env:
-          NEXTEST_RETRIES: 3
        run: |
-          cargo nextest run $CARGO_FEATURES
+          cargo test $CARGO_FLAGS $CARGO_FEATURES

          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          cargo nextest run --package remote_storage --test test_real_s3
+          cargo test $CARGO_FLAGS --package remote_storage --test test_real_s3

          # Run separate tests for real Azure Blob Storage
          # XXX: replace region with `eu-central-1`-like region
@@ -233,7 +231,7 @@ jobs:
          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          cargo nextest run --package remote_storage --test test_real_azure
+          cargo test $CARGO_FLAGS --package remote_storage --test test_real_azure

  check-codestyle-rust-arm:
    timeout-minutes: 90
--- a/.github/workflows/update_build_tools_image.yml
+++ b/.github/workflows/update_build_tools_image.yml
@@ -20,51 +20,111 @@ defaults:
  run:
    shell: bash -euo pipefail {0}

+env:
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
+
 permissions: {}

 jobs:
  tag-image:
    runs-on: [ self-hosted, gen3, small ]
+    container: golang:1.19-bullseye

    env:
-      ECR_IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools
-      DOCKER_HUB_IMAGE: docker.io/neondatabase/build-tools
+      IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools
+      FROM_TAG: ${{ inputs.from-tag }}
+      TO_TAG: ${{ inputs.to-tag }}
+    outputs:
+      next-digest-buildtools: ${{ steps.next-digest.outputs.next-digest-buildtools }}
+      prev-digest-buildtools: ${{ steps.prev-digest.outputs.prev-digest-buildtools }}
+
+    steps:
+      - name: Install Crane & ECR helper
+        run: |
+          go install github.com/google/go-containerregistry/cmd/crane@a54d64203cffcbf94146e04069aae4a97f228ee2 # v0.16.1
+          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@adf1bafd791ae7d4ff098108b1e91f36a4da5404 # v0.7.1
+
+      - name: Configure ECR login
+        run: |
+          mkdir /github/home/.docker/
+          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
+
+      - name: Get source image digest
+        id: next-digest
+        run: |
+          NEXT_DIGEST=$(crane digest ${IMAGE}:${FROM_TAG} || true)
+          if [ -z "${NEXT_DIGEST}" ]; then
+            echo >&2 "Image ${IMAGE}:${FROM_TAG} does not exist"
+            exit 1
+          fi
+
+          echo "Current ${IMAGE}@${FROM_TAG} image is ${IMAGE}@${NEXT_DIGEST}"
+          echo "next-digest-buildtools=$NEXT_DIGEST" >> $GITHUB_OUTPUT
+
+      - name: Get destination image digest (if already exists)
+        id: prev-digest
+        run: |
+          PREV_DIGEST=$(crane digest ${IMAGE}:${TO_TAG} || true)
+          if [ -z "${PREV_DIGEST}" ]; then
+            echo >&2 "Image ${IMAGE}:${TO_TAG} does not exist (it's ok)"
+          else
+            echo >&2 "Current ${IMAGE}@${TO_TAG} image is ${IMAGE}@${PREV_DIGEST}"
+
+            echo "prev-digest-buildtools=$PREV_DIGEST" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Tag image
+        run: |
+          crane tag "${IMAGE}:${FROM_TAG}" "${TO_TAG}"
+
+  rollback-tag-image:
+    needs:  tag-image
+    if: ${{ !success() }}
+
+    runs-on: [ self-hosted, gen3, small ]
+    container: golang:1.19-bullseye
+
+    env:
+      IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools
      FROM_TAG: ${{ inputs.from-tag }}
      TO_TAG: ${{ inputs.to-tag }}

    steps:
-      # Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
-      # The default value is ~/.docker
-      - name: Set custom docker config directory
+      - name: Install Crane & ECR helper
        run: |
-          mkdir -p .docker-custom
-          echo DOCKER_CONFIG=$(pwd)/.docker-custom >> $GITHUB_ENV
+          go install github.com/google/go-containerregistry/cmd/crane@a54d64203cffcbf94146e04069aae4a97f228ee2 # v0.16.1
+          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@adf1bafd791ae7d4ff098108b1e91f36a4da5404 # v0.7.1

-      - uses: docker/login-action@v2
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
-      - uses: docker/login-action@v2
-        with:
-          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
-          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
-
-      - uses: actions/setup-go@v5
-        with:
-          go-version: '1.21'
-
-      - name: Install crane
+      - name: Configure ECR login
        run: |
-          go install github.com/google/go-containerregistry/cmd/crane@a0658aa1d0cc7a7f1bcc4a3af9155335b6943f40 # v0.18.0
+          mkdir /github/home/.docker/
+          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json

-      - name: Copy images
+      - name: Restore previous tag if needed
        run: |
-          crane copy "${ECR_IMAGE}:${FROM_TAG}" "${ECR_IMAGE}:${TO_TAG}"
-          crane copy "${ECR_IMAGE}:${FROM_TAG}" "${DOCKER_HUB_IMAGE}:${TO_TAG}"
+          NEXT_DIGEST="${{ needs.tag-image.outputs.next-digest-buildtools }}"
+          PREV_DIGEST="${{ needs.tag-image.outputs.prev-digest-buildtools }}"

-      - name: Remove custom docker config directory
-        if: always()
-        run: |
-          rm -rf .docker-custom
+          if [ -z "${NEXT_DIGEST}" ]; then
+            echo >&2 "Image ${IMAGE}:${FROM_TAG} does not exist, nothing to rollback"
+            exit 0
+          fi
+
+          if [ -z "${PREV_DIGEST}" ]; then
+            # I guess we should delete the tag here/untag the image, but crane does not support it
+            # - https://github.com/google/go-containerregistry/issues/999
+
+            echo >&2 "Image ${IMAGE}:${TO_TAG} did not exist, but it was created by the job, no need to rollback"
+
+            exit 0
+          fi
+
+          CURRENT_DIGEST=$(crane digest "${IMAGE}:${TO_TAG}")
+          if [ "${CURRENT_DIGEST}" == "${NEXT_DIGEST}" ]; then
+            crane tag "${IMAGE}@${PREV_DIGEST}" "${TO_TAG}"
+
+            echo >&2 "Successfully restored ${TO_TAG} tag from ${IMAGE}@${CURRENT_DIGEST} to ${IMAGE}@${PREV_DIGEST}"
+          else
+            echo >&2 "Image ${IMAGE}:${TO_TAG}@${CURRENT_DIGEST} is not required to be restored"
+          fi
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10,9 +10,9 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5"

 [[package]]
 name = "addr2line"
-version = "0.21.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
+checksum = "a76fd60b23679b7d19bd066031410fb7e458ccc5e958eb5c325888ce4baedc97"
 dependencies = [
 "gimli",
 ]
@@ -278,7 +278,6 @@ dependencies = [
 "camino",
 "clap",
 "control_plane",
- "diesel",
 "futures",
 "git-version",
 "hyper",
@@ -841,15 +840,15 @@ dependencies = [

 [[package]]
 name = "backtrace"
-version = "0.3.69"
+version = "0.3.67"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
+checksum = "233d376d6d185f2a3093e58f283f60f880315b6c60075b01f36b3b85154564ca"
 dependencies = [
 "addr2line",
 "cc",
 "cfg-if",
 "libc",
- "miniz_oxide",
+ "miniz_oxide 0.6.2",
 "object",
 "rustc-demangle",
 ]
@@ -1216,7 +1215,7 @@ dependencies = [
 "flate2",
 "futures",
 "hyper",
- "nix 0.27.1",
+ "nix 0.26.2",
 "notify",
 "num_cpus",
 "opentelemetry",
@@ -1328,13 +1327,11 @@ dependencies = [
 "clap",
 "comfy-table",
 "compute_api",
- "diesel",
- "diesel_migrations",
 "futures",
 "git-version",
 "hex",
 "hyper",
- "nix 0.27.1",
+ "nix 0.26.2",
 "once_cell",
 "pageserver_api",
 "pageserver_client",
@@ -1344,7 +1341,6 @@ dependencies = [
 "regex",
 "reqwest",
 "safekeeper_api",
- "scopeguard",
 "serde",
 "serde_json",
 "serde_with",
@@ -1640,52 +1636,6 @@ dependencies = [
 "rusticata-macros",
 ]

-[[package]]
-name = "diesel"
-version = "2.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "62c6fcf842f17f8c78ecf7c81d75c5ce84436b41ee07e03f490fbb5f5a8731d8"
-dependencies = [
- "bitflags 2.4.1",
- "byteorder",
- "diesel_derives",
- "itoa",
- "pq-sys",
- "serde_json",
-]
-
-[[package]]
-name = "diesel_derives"
-version = "2.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8337737574f55a468005a83499da720f20c65586241ffea339db9ecdfd2b44"
-dependencies = [
- "diesel_table_macro_syntax",
- "proc-macro2",
- "quote",
- "syn 2.0.32",
-]
-
-[[package]]
-name = "diesel_migrations"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6036b3f0120c5961381b570ee20a02432d7e2d27ea60de9578799cf9156914ac"
-dependencies = [
- "diesel",
- "migrations_internals",
- "migrations_macros",
-]
-
-[[package]]
-name = "diesel_table_macro_syntax"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5"
-dependencies = [
- "syn 2.0.32",
-]
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -1922,13 +1872,13 @@ dependencies = [

 [[package]]
 name = "filetime"
-version = "0.2.22"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4029edd3e734da6fe05b6cd7bd2960760a616bd2ddd0d59a0124746d6272af0"
+checksum = "5cbc844cecaee9d4443931972e1289c8ff485cb4cc2767cb03ca139ed6885153"
 dependencies = [
 "cfg-if",
 "libc",
- "redox_syscall 0.3.5",
+ "redox_syscall 0.2.16",
 "windows-sys 0.48.0",
 ]

@@ -1945,7 +1895,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
 dependencies = [
 "crc32fast",
- "miniz_oxide",
+ "miniz_oxide 0.7.1",
 ]

 [[package]]
@@ -2143,9 +2093,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.28.1"
+version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
+checksum = "ad0a93d233ebf96623465aad4046a8d3aa4da22d4f4beba5388838c8a434bbb4"

 [[package]]
 name = "git-version"
@@ -2612,16 +2562,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "io-uring"
-version = "0.6.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "460648e47a07a43110fbfa2e0b14afb2be920093c31e5dccc50e49568e099762"
-dependencies = [
- "bitflags 1.3.2",
- "libc",
-]
-
 [[package]]
 name = "ipnet"
 version = "2.9.0"
@@ -2808,18 +2748,18 @@ checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"

 [[package]]
 name = "memoffset"
-version = "0.8.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
+checksum = "5de893c32cde5f383baa4c04c5d6dbdd735cfd4a794b0debdb2bb1b421da5ff4"
 dependencies = [
 "autocfg",
 ]

 [[package]]
 name = "memoffset"
-version = "0.9.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c"
+checksum = "d61c719bcfbcf5d62b3a09efa6088de8c54bc0bfcd3ea7ae39fcc186108b8de1"
 dependencies = [
 "autocfg",
 ]
@@ -2835,27 +2775,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "migrations_internals"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f23f71580015254b020e856feac3df5878c2c7a8812297edd6c0a485ac9dada"
-dependencies = [
- "serde",
- "toml",
-]
-
-[[package]]
-name = "migrations_macros"
-version = "2.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cce3325ac70e67bbab5bd837a31cae01f1a6db64e0e744a33cb03a543469ef08"
-dependencies = [
- "migrations_internals",
- "proc-macro2",
- "quote",
-]
-
 [[package]]
 name = "mime"
 version = "0.3.17"
@@ -2878,6 +2797,15 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"

+[[package]]
+name = "miniz_oxide"
+version = "0.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
+dependencies = [
+ "adler",
+]
+
 [[package]]
 name = "miniz_oxide"
 version = "0.7.1"
@@ -2937,14 +2865,16 @@ dependencies = [

 [[package]]
 name = "nix"
-version = "0.27.1"
+version = "0.26.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053"
+checksum = "bfdda3d196821d6af13126e40375cdf7da646a96114af134d5f417a9a1dc8e1a"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 1.3.2",
 "cfg-if",
 "libc",
- "memoffset 0.9.0",
+ "memoffset 0.7.1",
+ "pin-utils",
+ "static_assertions",
 ]

 [[package]]
@@ -2959,21 +2889,20 @@ dependencies = [

 [[package]]
 name = "notify"
-version = "6.1.1"
+version = "5.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
+checksum = "729f63e1ca555a43fe3efa4f3efdf4801c479da85b432242a7b726f353c88486"
 dependencies = [
- "bitflags 2.4.1",
+ "bitflags 1.3.2",
 "crossbeam-channel",
 "filetime",
 "fsevent-sys",
 "inotify 0.9.6",
 "kqueue",
 "libc",
- "log",
 "mio",
 "walkdir",
- "windows-sys 0.48.0",
+ "windows-sys 0.45.0",
 ]

 [[package]]
@@ -3099,9 +3028,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.32.2"
+version = "0.30.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+checksum = "ea86265d3d3dcb6a27fc51bd29a4bf387fae9d2986b823079d4986af253eb439"
 dependencies = [
 "memchr",
 ]
@@ -3173,9 +3102,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry"
-version = "0.20.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9591d937bc0e6d2feb6f71a559540ab300ea49955229c347a517a28d27784c54"
+checksum = "5f4b8347cc26099d3aeee044065ecc3ae11469796b4d65d065a23a584ed92a6f"
 dependencies = [
 "opentelemetry_api",
 "opentelemetry_sdk",
@@ -3183,9 +3112,9 @@ dependencies = [

 [[package]]
 name = "opentelemetry-http"
-version = "0.9.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b"
+checksum = "a819b71d6530c4297b49b3cae2939ab3a8cc1b9f382826a1bc29dd0ca3864906"
 dependencies = [
 "async-trait",
 "bytes",
@@ -3196,56 +3125,54 @@ dependencies = [

 [[package]]
 name = "opentelemetry-otlp"
-version = "0.13.0"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e5e5a5c4135864099f3faafbe939eb4d7f9b80ebf68a8448da961b32a7c1275"
+checksum = "8af72d59a4484654ea8eb183fea5ae4eb6a41d7ac3e3bae5f4d2a282a3a7d3ca"
 dependencies = [
 "async-trait",
- "futures-core",
+ "futures",
+ "futures-util",
 "http",
+ "opentelemetry",
 "opentelemetry-http",
 "opentelemetry-proto",
- "opentelemetry-semantic-conventions",
- "opentelemetry_api",
- "opentelemetry_sdk",
 "prost",
 "reqwest",
 "thiserror",
- "tokio",
- "tonic",
 ]

 [[package]]
 name = "opentelemetry-proto"
-version = "0.3.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb"
+checksum = "045f8eea8c0fa19f7d48e7bc3128a39c2e5c533d5c61298c548dfefc1064474c"
 dependencies = [
- "opentelemetry_api",
- "opentelemetry_sdk",
+ "futures",
+ "futures-util",
+ "opentelemetry",
 "prost",
- "tonic",
+ "tonic 0.8.3",
 ]

 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.12.0"
+version = "0.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73c9f9340ad135068800e7f1b24e9e09ed9e7143f5bf8518ded3d3ec69789269"
+checksum = "24e33428e6bf08c6f7fcea4ddb8e358fab0fe48ab877a87c70c6ebe20f673ce5"
 dependencies = [
 "opentelemetry",
 ]

 [[package]]
 name = "opentelemetry_api"
-version = "0.20.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b"
+checksum = "ed41783a5bf567688eb38372f2b7a8530f5a607a4b49d38dd7573236c23ca7e2"
 dependencies = [
+ "fnv",
 "futures-channel",
 "futures-util",
 "indexmap 1.9.3",
- "js-sys",
 "once_cell",
 "pin-project-lite",
 "thiserror",
@@ -3254,22 +3181,21 @@ dependencies = [

 [[package]]
 name = "opentelemetry_sdk"
-version = "0.20.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa8e705a0612d48139799fcbaba0d4a90f06277153e43dd2bdc16c6f0edd8026"
+checksum = "8b3a2a91fdbfdd4d212c0dcc2ab540de2c2bcbbd90be17de7a7daf8822d010c1"
 dependencies = [
 "async-trait",
 "crossbeam-channel",
+ "dashmap",
+ "fnv",
 "futures-channel",
 "futures-executor",
 "futures-util",
 "once_cell",
 "opentelemetry_api",
- "ordered-float 3.9.2",
 "percent-encoding",
 "rand 0.8.5",
- "regex",
- "serde_json",
 "thiserror",
 "tokio",
 "tokio-stream",
@@ -3284,15 +3210,6 @@ dependencies = [
 "num-traits",
 ]

-[[package]]
-name = "ordered-float"
-version = "3.9.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc"
-dependencies = [
- "num-traits",
-]
-
 [[package]]
 name = "ordered-multimap"
 version = "0.7.1"
@@ -3408,7 +3325,7 @@ dependencies = [
 "itertools",
 "md5",
 "metrics",
- "nix 0.27.1",
+ "nix 0.26.2",
 "num-traits",
 "num_cpus",
 "once_cell",
@@ -3441,7 +3358,6 @@ dependencies = [
 "tenant_size_model",
 "thiserror",
 "tokio",
- "tokio-epoll-uring",
 "tokio-io-timeout",
 "tokio-postgres",
 "tokio-stream",
@@ -3864,15 +3780,6 @@ version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"

-[[package]]
-name = "pq-sys"
-version = "0.4.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
-dependencies = [
- "vcpkg",
-]
-
 [[package]]
 name = "pq_proto"
 version = "0.1.0"
@@ -4432,9 +4339,9 @@ dependencies = [

 [[package]]
 name = "reqwest-tracing"
-version = "0.4.7"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a0152176687dd5cfe7f507ac1cb1a491c679cfe483afd133a7db7aaea818bb3"
+checksum = "1b97ad83c2fc18113346b7158d79732242002427c30f620fa817c1f32901e0a8"
 dependencies = [
 "anyhow",
 "async-trait",
@@ -5124,9 +5031,9 @@ dependencies = [

 [[package]]
 name = "shlex"
-version = "1.3.0"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"

 [[package]]
 name = "signal-hook"
@@ -5203,9 +5110,9 @@ checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9"

 [[package]]
 name = "smol_str"
-version = "0.2.1"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49"
+checksum = "74212e6bbe9a4352329b2f68ba3130c15a3f26fe88ff22dbdc6cdd58fa85e99c"
 dependencies = [
 "serde",
 ]
@@ -5288,7 +5195,7 @@ dependencies = [
 "prost",
 "tokio",
 "tokio-stream",
- "tonic",
+ "tonic 0.9.2",
 "tonic-build",
 "tracing",
 "utils",
@@ -5472,18 +5379,18 @@ dependencies = [

 [[package]]
 name = "thiserror"
-version = "1.0.47"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f"
+checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.47"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b"
+checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -5508,7 +5415,7 @@ checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
 dependencies = [
 "byteorder",
 "integer-encoding",
- "ordered-float 2.10.1",
+ "ordered-float",
 ]

 [[package]]
@@ -5607,21 +5514,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "tokio-epoll-uring"
-version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0dd3a2f8bf3239d34a19719ef1a74146c093126f"
-dependencies = [
- "futures",
- "once_cell",
- "scopeguard",
- "thiserror",
- "tokio",
- "tokio-util",
- "tracing",
- "uring-common",
-]
-
 [[package]]
 name = "tokio-io-timeout"
 version = "1.2.0"
@@ -5789,6 +5681,38 @@ dependencies = [
 "winnow",
 ]

+[[package]]
+name = "tonic"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb"
+dependencies = [
+ "async-stream",
+ "async-trait",
+ "axum",
+ "base64 0.13.1",
+ "bytes",
+ "futures-core",
+ "futures-util",
+ "h2",
+ "http",
+ "http-body",
+ "hyper",
+ "hyper-timeout",
+ "percent-encoding",
+ "pin-project",
+ "prost",
+ "prost-derive",
+ "tokio",
+ "tokio-stream",
+ "tokio-util",
+ "tower",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "tracing-futures",
+]
+
 [[package]]
 name = "tonic"
 version = "0.9.2"
@@ -5932,6 +5856,16 @@ dependencies = [
 "tracing-subscriber",
 ]

+[[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "pin-project",
+ "tracing",
+]
+
 [[package]]
 name = "tracing-log"
 version = "0.1.3"
@@ -5945,9 +5879,9 @@ dependencies = [

 [[package]]
 name = "tracing-opentelemetry"
-version = "0.20.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc09e402904a5261e42cf27aea09ccb7d5318c6717a9eec3d8e2e65c56b18f19"
+checksum = "00a39dcf9bfc1742fa4d6215253b33a6e474be78275884c216fc2a06267b3600"
 dependencies = [
 "once_cell",
 "opentelemetry",
@@ -6131,15 +6065,6 @@ dependencies = [
 "webpki-roots 0.23.1",
 ]

-[[package]]
-name = "uring-common"
-version = "0.1.0"
-source = "git+https://github.com/neondatabase/tokio-epoll-uring.git?branch=main#0dd3a2f8bf3239d34a19719ef1a74146c093126f"
-dependencies = [
- "io-uring",
- "libc",
-]
-
 [[package]]
 name = "url"
 version = "2.3.1"
@@ -6193,7 +6118,7 @@ dependencies = [
 "hyper",
 "jsonwebtoken",
 "metrics",
- "nix 0.27.1",
+ "nix 0.26.2",
 "once_cell",
 "pin-project-lite",
 "postgres_connection",
@@ -6701,9 +6626,10 @@ dependencies = [
 "clap",
 "clap_builder",
 "crossbeam-utils",
- "diesel",
+ "dashmap",
 "either",
 "fail",
+ "futures",
 "futures-channel",
 "futures-core",
 "futures-executor",
@@ -6748,7 +6674,6 @@ dependencies = [
 "tokio-util",
 "toml_datetime",
 "toml_edit",
- "tonic",
 "tower",
 "tracing",
 "tracing-core",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -99,14 +99,14 @@ libc = "0.2"
 md5 = "0.7.0"
 memoffset = "0.8"
 native-tls = "0.2"
-nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
-notify = "6.0.0"
+nix = "0.26"
+notify = "5.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
-opentelemetry = "0.20.0"
-opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
-opentelemetry-semantic-conventions = "0.12.0"
+opentelemetry = "0.19.0"
+opentelemetry-otlp = { version = "0.12.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-semantic-conventions = "0.11.0"
 parking_lot = "0.12"
 parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
 parquet_derive = "49.0.0"
@@ -118,7 +118,7 @@ rand = "0.8"
 redis = { version = "0.24.0", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
+reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_19"] }
 reqwest-middleware = "0.2.0"
 reqwest-retry = "0.2.2"
 routerify = "3"
@@ -151,7 +151,6 @@ test-context = "0.1"
 thiserror = "1.0"
 tls-listener = { version = "0.7", features = ["rustls", "hyper-h1"] }
 tokio = { version = "1.17", features = ["macros"] }
-tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.git" , branch = "main" }
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.10.0"
 tokio-rustls = "0.24"
@@ -163,7 +162,7 @@ toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tracing = "0.1"
 tracing-error = "0.2.0"
-tracing-opentelemetry = "0.20.0"
+tracing-opentelemetry = "0.19.0"
 tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 url = "2.2"
 uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -52,7 +52,7 @@ RUN cd postgres && \
    # We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
    # In vanilla postgres this function is limited to Postgres role superuser.
    # In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
-    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
+    # We could add the additional grant statements to the postgres repository but it would be hard to maintain, 
    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
    # so we do it here.
    old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
@@ -63,14 +63,14 @@ RUN cd postgres && \
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
        fi; \
    done; \
-    # the second loop is for pg_stat_statement extension versions >= 1.7,
+    # the second loop is for pg_stat_statement extension versions >= 1.7, 
    # where pg_stat_statement_reset() got 3 additional arguments
    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
        filename=$(basename "$file"); \
        if ! echo "$old_list" | grep -q -F "$filename"; then \
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
        fi; \
-    done
+    done      

 #########################################################################################
 #
@@ -143,24 +143,29 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
 #########################################################################################
 FROM build-deps AS plv8-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
 RUN apt update && \
    apt install -y ninja-build python3-dev libncurses5 binutils clang

-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
-    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
+RUN case "${PG_VERSION}" in \
+      "v14" | "v15") \
+        export PLV8_VERSION=3.1.5 \
+        export PLV8_CHECKSUM=1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 \
+        ;; \
+      "v16") \
+        export PLV8_VERSION=3.1.8 \
+        export PLV8_CHECKSUM=92b10c7db39afdae97ff748c9ec54713826af222c459084ad002571b79eb3f49 \
+        ;; \
+      *) \
+        echo "Export the valid PG_VERSION variable" && exit 1 \
+        ;; \
+    esac && \
+    wget https://github.com/plv8/plv8/archive/refs/tags/v${PLV8_VERSION}.tar.gz -O plv8.tar.gz && \
+    echo "${PLV8_CHECKSUM} plv8.tar.gz" | sha256sum --check && \
    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
-    # generate and copy upgrade scripts
-    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
-    cp upgrade/* /usr/local/pgsql/share/extension/ && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
    rm -rf /plv8-* && \
    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
-    # don't break computes with installed old version of plv8
-    cd /usr/local/pgsql/lib/ && \
-    ln -s plv8-3.1.10.so plv8-3.1.5.so && \
-    ln -s plv8-3.1.10.so plv8-3.1.8.so && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
@@ -546,7 +551,6 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
        -D PostgreSQL_TYPE_INCLUDE_DIR=`pg_config --includedir-server` \
        -D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \
        -D RDK_INSTALL_INTREE=OFF \
-        -D RDK_INSTALL_COMIC_FONTS=OFF \
        -D CMAKE_BUILD_TYPE=Release \
        . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -613,7 +617,6 @@ RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O
 FROM build-deps AS pg-embedding-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-ARG PG_VERSION
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
@@ -776,8 +779,6 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
 #
 #########################################################################################
 FROM build-deps AS neon-pg-ext-build
-ARG PG_VERSION
-
 # Public extensions
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=postgis-build /sfcgal/* /
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -700,14 +700,13 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin` name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let connstr = self.connstr.clone();
-        let mut client = match Client::connect(connstr.as_str(), NoTls) {
+        let mut client = match Client::connect(self.connstr.as_str(), NoTls) {
            Err(e) => {
                info!(
                    "cannot connect to postgres: {}, retrying with `zenith_admin` username",
                    e
                );
-                let mut zenith_admin_connstr = connstr.clone();
+                let mut zenith_admin_connstr = self.connstr.clone();

                zenith_admin_connstr
                    .set_username("zenith_admin")
@@ -720,8 +719,8 @@ impl ComputeNode {
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);

-                // reconnect with connstring with expected name
-                Client::connect(connstr.as_str(), NoTls)?
+                // reconnect with connsting with expected name
+                Client::connect(self.connstr.as_str(), NoTls)?
            }
            Ok(client) => client,
        };
@@ -735,8 +734,8 @@ impl ComputeNode {
        cleanup_instance(&mut client)?;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
-        handle_role_deletions(spec, connstr.as_str(), &mut client)?;
-        handle_grants(spec, &mut client, connstr.as_str())?;
+        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
+        handle_grants(spec, &mut client, self.connstr.as_str())?;
        handle_extensions(spec, &mut client)?;
        handle_extension_neon(&mut client)?;
        create_availability_check_data(&mut client)?;
@@ -744,12 +743,6 @@ impl ComputeNode {
        // 'Close' connection
        drop(client);

-        if self.has_feature(ComputeFeature::Migrations) {
-            thread::spawn(move || {
-                let mut client = Client::connect(connstr.as_str(), NoTls)?;
-                handle_migrations(&mut client)
-            });
-        }
        Ok(())
    }

@@ -814,10 +807,6 @@ impl ComputeNode {
            handle_grants(&spec, &mut client, self.connstr.as_str())?;
            handle_extensions(&spec, &mut client)?;
            handle_extension_neon(&mut client)?;
-            // We can skip handle_migrations here because a new migration can only appear
-            // if we have a new version of the compute_ctl binary, which can only happen
-            // if compute got restarted, in which case we'll end up inside of apply_config
-            // instead of reconfigure.
        }

        // 'Close' connection
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -727,79 +727,3 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {

    Ok(())
 }
-
-#[instrument(skip_all)]
-pub fn handle_migrations(client: &mut Client) -> Result<()> {
-    info!("handle migrations");
-
-    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-    // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
-    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-    let migrations = [
-        "ALTER ROLE neon_superuser BYPASSRLS",
-        r#"
-DO $$
-DECLARE
-    role_name text;
-BEGIN
-    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
-    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
-        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
-    END LOOP;
-
-    FOR role_name IN SELECT rolname FROM pg_roles
-        WHERE
-            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
-    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
-        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
-    END LOOP;
-END $$;
-"#,
-    ];
-
-    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-    client.simple_query(query)?;
-
-    query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-    client.simple_query(query)?;
-
-    query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-    client.simple_query(query)?;
-
-    query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-    client.simple_query(query)?;
-
-    query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-    client.simple_query(query)?;
-
-    query = "SELECT id FROM neon_migration.migration_id";
-    let row = client.query_one(query, &[])?;
-    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
-    let starting_migration_id = current_migration;
-
-    query = "BEGIN";
-    client.simple_query(query)?;
-
-    while current_migration < migrations.len() {
-        info!("Running migration:\n{}\n", migrations[current_migration]);
-        client.simple_query(migrations[current_migration])?;
-        current_migration += 1;
-    }
-    let setval = format!(
-        "UPDATE neon_migration.migration_id SET id={}",
-        migrations.len()
-    );
-    client.simple_query(&setval)?;
-
-    query = "COMMIT";
-    client.simple_query(query)?;
-
-    info!(
-        "Ran {} migrations",
-        (migrations.len() - starting_migration_id)
-    );
-    Ok(())
-}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -10,8 +10,6 @@ async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
-diesel = { version = "2.1.4", features = ["postgres"]}
-diesel_migrations = { version = "2.1.0", features = ["postgres"]}
 futures.workspace = true
 git-version.workspace = true
 nix.workspace = true
@@ -21,7 +19,6 @@ hex.workspace = true
 hyper.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
-scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -25,8 +25,6 @@ tracing.workspace = true
 # a parsing function when loading pageservers from neon_local LocalEnv
 postgres_backend.workspace = true

-diesel = { version = "2.1.4", features = ["serde_json", "postgres"] }
-
 utils = { path = "../../libs/utils/" }
 metrics = { path = "../../libs/metrics/" }
 control_plane = { path = ".." }
--- a/control_plane/attachment_service/migrations/.keep
+++ b/control_plane/attachment_service/migrations/.keep
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
@@ -1,6 +0,0 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
-
-DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
-DROP FUNCTION IF EXISTS diesel_set_updated_at();
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
@@ -1,36 +0,0 @@
-- This file was automatically created by Diesel to setup helper functions
-- and other internal bookkeeping. This file is safe to edit, any future
-- changes will be added to existing projects as new migrations.
-
-
-
-
-- Sets up a trigger for the given table to automatically set a column called
-- `updated_at` whenever the row is modified (unless `updated_at` was included
-- in the modified columns)
--
-- # Example
--
-- ```sql
-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
--
-- SELECT diesel_manage_updated_at('users');
-- ```
-CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
-BEGIN
-    EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
-                    FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
-END;
-$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
-BEGIN
-    IF (
-        NEW IS DISTINCT FROM OLD AND
-        NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
-    ) THEN
-        NEW.updated_at := current_timestamp;
-    END IF;
-    RETURN NEW;
-END;
-$$ LANGUAGE plpgsql;
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
@@ -1 +0,0 @@
-DROP TABLE tenant_shards;
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
@@ -1,12 +0,0 @@
-CREATE TABLE tenant_shards (
-  tenant_id VARCHAR NOT NULL,
-  shard_number INTEGER NOT NULL,
-  shard_count INTEGER NOT NULL,
-  PRIMARY KEY(tenant_id, shard_number, shard_count),
-  shard_stripe_size INTEGER NOT NULL,
-  generation INTEGER NOT NULL,
-  generation_pageserver BIGINT NOT NULL,
-  placement_policy VARCHAR NOT NULL,
-  -- config is JSON encoded, opaque to the database.
-  config TEXT NOT NULL
-);
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
@@ -1 +0,0 @@
-DROP TABLE nodes;
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
@@ -1,10 +0,0 @@
-CREATE TABLE nodes (
-  node_id BIGINT PRIMARY KEY NOT NULL,
-
-  scheduling_policy VARCHAR NOT NULL,
-
-  listen_http_addr VARCHAR NOT NULL,
-  listen_http_port INTEGER NOT NULL,
-  listen_pg_addr VARCHAR NOT NULL,
-  listen_pg_port INTEGER NOT NULL
-);
--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -1,5 +1,5 @@
 use crate::reconciler::ReconcileError;
-use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
+use crate::service::Service;
 use hyper::{Body, Request, Response};
 use hyper::{StatusCode, Uri};
 use pageserver_api::models::{TenantCreateRequest, TimelineCreateRequest};
@@ -104,34 +104,34 @@ async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiErr
    json_response(StatusCode::OK, state.service.inspect(inspect_req))
 }

-async fn handle_tenant_create(
-    service: Arc<Service>,
-    mut req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
+async fn handle_tenant_create(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let create_req = json_request::<TenantCreateRequest>(&mut req).await?;
-    json_response(StatusCode::OK, service.tenant_create(create_req).await?)
-}
-
-async fn handle_tenant_timeline_create(
-    service: Arc<Service>,
-    mut req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
+    let state = get_state(&req);
    json_response(
        StatusCode::OK,
-        service
+        state.service.tenant_create(create_req).await?,
+    )
+}
+
+async fn handle_tenant_timeline_create(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
+
+    let state = get_state(&req);
+    json_response(
+        StatusCode::OK,
+        state
+            .service
            .tenant_timeline_create(tenant_id, create_req)
            .await?,
    )
 }

-async fn handle_tenant_locate(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
+async fn handle_tenant_locate(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    json_response(StatusCode::OK, service.tenant_locate(tenant_id)?)
+    let state = get_state(&req);
+
+    json_response(StatusCode::OK, state.service.tenant_locate(tenant_id)?)
 }

 async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -154,15 +154,14 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, state.service.node_configure(config_req)?)
 }

-async fn handle_tenant_shard_migrate(
-    service: Arc<Service>,
-    mut req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
+async fn handle_tenant_shard_migrate(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
    let migrate_req = json_request::<TenantShardMigrateRequest>(&mut req).await?;
+    let state = get_state(&req);
    json_response(
        StatusCode::OK,
-        service
+        state
+            .service
            .tenant_shard_migrate(tenant_shard_id, migrate_req)
            .await?,
    )
@@ -179,35 +178,6 @@ impl From<ReconcileError> for ApiError {
    }
 }

-/// Common wrapper for request handlers that call into Service and will operate on tenants: they must only
-/// be allowed to run if Service has finished its initial reconciliation.
-async fn tenant_service_handler<R, H>(request: Request<Body>, handler: H) -> R::Output
-where
-    R: std::future::Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
-    H: FnOnce(Arc<Service>, Request<Body>) -> R + Send + Sync + 'static,
-{
-    let state = get_state(&request);
-    let service = state.service.clone();
-
-    let startup_complete = service.startup_complete.clone();
-    if tokio::time::timeout(STARTUP_RECONCILE_TIMEOUT, startup_complete.wait())
-        .await
-        .is_err()
-    {
-        // This shouldn't happen: it is the responsibilty of [`Service::startup_reconcile`] to use appropriate
-        // timeouts around its remote calls, to bound its runtime.
-        return Err(ApiError::Timeout(
-            "Timed out waiting for service readiness".into(),
-        ));
-    }
-
-    request_span(
-        request,
-        |request| async move { handler(service, request).await },
-    )
-    .await
-}
-
 pub fn make_router(
    service: Arc<Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
@@ -235,20 +205,14 @@ pub fn make_router(
        .put("/node/:node_id/config", |r| {
            request_span(r, handle_node_configure)
        })
-        .post("/v1/tenant", |r| {
-            tenant_service_handler(r, handle_tenant_create)
-        })
-        .post("/v1/tenant/:tenant_id/timeline", |r| {
-            tenant_service_handler(r, handle_tenant_timeline_create)
+        .post("/tenant", |r| request_span(r, handle_tenant_create))
+        .post("/tenant/:tenant_id/timeline", |r| {
+            request_span(r, handle_tenant_timeline_create)
        })
        .get("/tenant/:tenant_id/locate", |r| {
-            tenant_service_handler(r, handle_tenant_locate)
+            request_span(r, handle_tenant_locate)
        })
        .put("/tenant/:tenant_shard_id/migrate", |r| {
-            tenant_service_handler(r, handle_tenant_shard_migrate)
+            request_span(r, handle_tenant_shard_migrate)
        })
-        // Path aliases for tests_forward_compatibility
-        // TODO: remove these in future PR
-        .post("/re-attach", |r| request_span(r, handle_re_attach))
-        .post("/validate", |r| request_span(r, handle_validate))
 }
--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -7,7 +7,6 @@ mod node;
 pub mod persistence;
 mod reconciler;
 mod scheduler;
-mod schema;
 pub mod service;
 mod tenant_state;

@@ -18,8 +17,6 @@ enum PlacementPolicy {
    /// Production-ready way to attach a tenant: one attached pageserver and
    /// some number of secondaries.
    Double(usize),
-    /// Do not attach to any pageservers
-    Detached,
 }

 #[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone)]
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -12,9 +12,9 @@ use camino::Utf8PathBuf;
 use clap::Parser;
 use metrics::launch_timestamp::LaunchTimestamp;
 use std::sync::Arc;
-use tokio::signal::unix::SignalKind;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};
+use utils::signals::{ShutdownSignals, Signal};

 use utils::{project_build_tag, project_git_version, tcp_listener};

@@ -40,10 +40,6 @@ struct Cli {
    /// Path to the .json file to store state (will be created if it doesn't exist)
    #[arg(short, long)]
    path: Utf8PathBuf,
-
-    /// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
-    #[arg(long)]
-    database_url: String,
 }

 #[tokio::main]
@@ -70,14 +66,9 @@ async fn main() -> anyhow::Result<()> {
        jwt_token: args.jwt_token,
    };

-    let json_path = if args.path.as_os_str().is_empty() {
-        None
-    } else {
-        Some(args.path)
-    };
-    let persistence = Arc::new(Persistence::new(args.database_url, json_path.clone()));
+    let persistence = Arc::new(Persistence::new(&args.path).await);

-    let service = Service::spawn(config, persistence.clone()).await?;
+    let service = Service::spawn(config, persistence).await?;

    let http_listener = tcp_listener::bind(args.listen)?;

@@ -90,31 +81,20 @@ async fn main() -> anyhow::Result<()> {
    let router = make_router(service, auth)
        .build()
        .map_err(|err| anyhow!(err))?;
-    let router_service = utils::http::RouterService::new(router).unwrap();
-    let server = hyper::Server::from_tcp(http_listener)?.serve(router_service);
+    let service = utils::http::RouterService::new(router).unwrap();
+    let server = hyper::Server::from_tcp(http_listener)?.serve(service);

    tracing::info!("Serving on {0}", args.listen);

    tokio::task::spawn(server);

-    // Wait until we receive a signal
-    let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
-    let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?;
-    let mut sigterm = tokio::signal::unix::signal(SignalKind::terminate())?;
-    tokio::select! {
-        _ = sigint.recv() => {},
-        _ = sigterm.recv() => {},
-        _ = sigquit.recv() => {},
-    }
-    tracing::info!("Terminating on signal");
-
-    if json_path.is_some() {
-        // Write out a JSON dump on shutdown: this is used in compat tests to avoid passing
-        // full postgres dumps around.
-        if let Err(e) = persistence.write_tenants_json().await {
-            tracing::error!("Failed to write JSON on shutdown: {e}")
+    ShutdownSignals::handle(|signal| match signal {
+        Signal::Interrupt | Signal::Terminate | Signal::Quit => {
+            tracing::info!("Got {}. Terminating", signal.name());
+            // We're just a test helper: no graceful shutdown.
+            std::process::exit(0);
        }
-    }
+    })?;

-    std::process::exit(0);
+    Ok(())
 }
--- a/control_plane/attachment_service/src/node.rs
+++ b/control_plane/attachment_service/src/node.rs
@@ -1,8 +1,6 @@
 use control_plane::attachment_service::{NodeAvailability, NodeSchedulingPolicy};
 use utils::id::NodeId;

-use crate::persistence::NodePersistence;
-
 #[derive(Clone)]
 pub(crate) struct Node {
    pub(crate) id: NodeId,
@@ -36,15 +34,4 @@ impl Node {
            NodeSchedulingPolicy::Pause => false,
        }
    }
-
-    pub(crate) fn to_persistent(&self) -> NodePersistence {
-        NodePersistence {
-            node_id: self.id.0 as i64,
-            scheduling_policy: self.scheduling.into(),
-            listen_http_addr: self.listen_http_addr.clone(),
-            listen_http_port: self.listen_http_port as i32,
-            listen_pg_addr: self.listen_pg_addr.clone(),
-            listen_pg_port: self.listen_pg_port as i32,
-        }
-    }
 }
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -1,161 +1,139 @@
-use std::collections::HashMap;
-use std::str::FromStr;
+use std::{collections::HashMap, str::FromStr};

-use camino::Utf8Path;
-use camino::Utf8PathBuf;
-use control_plane::attachment_service::{NodeAvailability, NodeSchedulingPolicy};
-use diesel::pg::PgConnection;
-use diesel::prelude::*;
-use diesel::Connection;
-use pageserver_api::models::TenantConfig;
-use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId};
+use camino::{Utf8Path, Utf8PathBuf};
+use control_plane::{
+    attachment_service::{NodeAvailability, NodeSchedulingPolicy},
+    local_env::LocalEnv,
+};
+use pageserver_api::{
+    models::TenantConfig,
+    shard::{ShardCount, ShardNumber, TenantShardId},
+};
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
-use utils::generation::Generation;
-use utils::id::{NodeId, TenantId};
+use utils::{
+    generation::Generation,
+    id::{NodeId, TenantId},
+};

-use crate::node::Node;
-use crate::PlacementPolicy;
+use crate::{node::Node, PlacementPolicy};

-/// ## What do we store?
-///
-/// The attachment service does not store most of its state durably.
-///
-/// The essential things to store durably are:
-/// - generation numbers, as these must always advance monotonically to ensure data safety.
-/// - Tenant's PlacementPolicy and TenantConfig, as the source of truth for these is something external.
-/// - Node's scheduling policies, as the source of truth for these is something external.
-///
-/// Other things we store durably as an implementation detail:
-/// - Node's host/port: this could be avoided it we made nodes emit a self-registering heartbeat,
-///   but it is operationally simpler to make this service the authority for which nodes
-///   it talks to.
-///
-/// ## Performance/efficiency
-///
-/// The attachment service does not go via the database for most things: there are
-/// a couple of places where we must, and where efficiency matters:
-/// - Incrementing generation numbers: the Reconciler has to wait for this to complete
-///   before it can attach a tenant, so this acts as a bound on how fast things like
-///   failover can happen.
-/// - Pageserver re-attach: we will increment many shards' generations when this happens,
-///   so it is important to avoid e.g. issuing O(N) queries.
-///
-/// Database calls relating to nodes have low performance requirements, as they are very rarely
-/// updated, and reads of nodes are always from memory, not the database.  We only require that
-/// we can UPDATE a node's scheduling mode reasonably quickly to mark a bad node offline.
+/// Placeholder for storage.  This will be replaced with a database client.
 pub struct Persistence {
-    database_url: String,
-
-    // In test environments, we support loading+saving a JSON file.  This is temporary, for the benefit of
-    // test_compatibility.py, so that we don't have to commit to making the database contents fully backward/forward
-    // compatible just yet.
-    json_path: Option<Utf8PathBuf>,
+    state: std::sync::Mutex<PersistentState>,
 }

-/// Legacy format, for use in JSON compat objects in test environment
+// Top level state available to all HTTP handlers
 #[derive(Serialize, Deserialize)]
-struct JsonPersistence {
+struct PersistentState {
    tenants: HashMap<TenantShardId, TenantShardPersistence>,
+
+    #[serde(skip)]
+    path: Utf8PathBuf,
 }

-#[derive(thiserror::Error, Debug)]
-pub(crate) enum DatabaseError {
-    #[error(transparent)]
-    Query(#[from] diesel::result::Error),
-    #[error(transparent)]
-    Connection(#[from] diesel::result::ConnectionError),
-    #[error("Logical error: {0}")]
-    Logical(String),
+/// A convenience for serializing the state inside a sync lock, and then
+/// writing it to disk outside of the lock.  This will go away when switching
+/// to a database backend.
+struct PendingWrite {
+    bytes: Vec<u8>,
+    path: Utf8PathBuf,
 }

-pub(crate) type DatabaseResult<T> = Result<T, DatabaseError>;
+impl PendingWrite {
+    async fn commit(&self) -> anyhow::Result<()> {
+        tokio::fs::write(&self.path, &self.bytes).await?;
+
+        Ok(())
+    }
+}
+
+impl PersistentState {
+    fn save(&self) -> PendingWrite {
+        PendingWrite {
+            bytes: serde_json::to_vec(self).expect("Serialization error"),
+            path: self.path.clone(),
+        }
+    }
+
+    async fn load(path: &Utf8Path) -> anyhow::Result<Self> {
+        let bytes = tokio::fs::read(path).await?;
+        let mut decoded = serde_json::from_slice::<Self>(&bytes)?;
+        decoded.path = path.to_owned();
+
+        for (tenant_id, tenant) in &mut decoded.tenants {
+            // Backward compat: an old attachments.json from before PR #6251, replace
+            // empty strings with proper defaults.
+            if tenant.tenant_id.is_empty() {
+                tenant.tenant_id = format!("{}", tenant_id);
+                tenant.config = serde_json::to_string(&TenantConfig::default())?;
+                tenant.placement_policy = serde_json::to_string(&PlacementPolicy::default())?;
+            }
+        }
+
+        Ok(decoded)
+    }
+
+    async fn load_or_new(path: &Utf8Path) -> Self {
+        match Self::load(path).await {
+            Ok(s) => {
+                tracing::info!("Loaded state file at {}", path);
+                s
+            }
+            Err(e)
+                if e.downcast_ref::<std::io::Error>()
+                    .map(|e| e.kind() == std::io::ErrorKind::NotFound)
+                    .unwrap_or(false) =>
+            {
+                tracing::info!("Will create state file at {}", path);
+                Self {
+                    tenants: HashMap::new(),
+                    path: path.to_owned(),
+                }
+            }
+            Err(e) => {
+                panic!("Failed to load state from '{}': {e:#} (maybe your .neon/ dir was written by an older version?)", path)
+            }
+        }
+    }
+}

 impl Persistence {
-    pub fn new(database_url: String, json_path: Option<Utf8PathBuf>) -> Self {
+    pub async fn new(path: &Utf8Path) -> Self {
+        let state = PersistentState::load_or_new(path).await;
        Self {
-            database_url,
-            json_path,
+            state: std::sync::Mutex::new(state),
        }
    }

-    /// Call the provided function in a tokio blocking thread, with a Diesel database connection.
-    async fn with_conn<F, R>(&self, func: F) -> DatabaseResult<R>
-    where
-        F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
-        R: Send + 'static,
-    {
-        let database_url = self.database_url.clone();
-        tokio::task::spawn_blocking(move || -> DatabaseResult<R> {
-            // TODO: connection pooling, such as via diesel::r2d2
-            let mut conn = PgConnection::establish(&database_url)?;
-            func(&mut conn)
-        })
-        .await
-        .expect("Task panic")
+    /// When registering a node, persist it so that on next start we will be able to
+    /// iterate over known nodes to synchronize their tenant shard states with our observed state.
+    pub(crate) async fn insert_node(&self, _node: &Node) -> anyhow::Result<()> {
+        // TODO: node persitence will come with database backend
+        Ok(())
    }

-    /// When a node is first registered, persist it before using it for anything
-    pub(crate) async fn insert_node(&self, node: &Node) -> DatabaseResult<()> {
-        let np = node.to_persistent();
-        self.with_conn(move |conn| -> DatabaseResult<()> {
-            diesel::insert_into(crate::schema::nodes::table)
-                .values(&np)
-                .execute(conn)?;
-            Ok(())
-        })
-        .await
-    }
+    /// At startup, we populate the service's list of nodes, and use this list to call into
+    /// each node to do an initial reconciliation of the state of the world with our in-memory
+    /// observed state.
+    pub(crate) async fn list_nodes(&self) -> anyhow::Result<Vec<Node>> {
+        let env = LocalEnv::load_config()?;
+        // TODO: node persitence will come with database backend

-    /// At startup, populate the list of nodes which our shards may be placed on
-    pub(crate) async fn list_nodes(&self) -> DatabaseResult<Vec<Node>> {
-        let nodes: Vec<Node> = self
-            .with_conn(move |conn| -> DatabaseResult<_> {
-                Ok(crate::schema::nodes::table
-                    .load::<NodePersistence>(conn)?
-                    .into_iter()
-                    .map(|n| Node {
-                        id: NodeId(n.node_id as u64),
-                        // At startup we consider a node offline until proven otherwise.
-                        availability: NodeAvailability::Offline,
-                        scheduling: NodeSchedulingPolicy::from_str(&n.scheduling_policy)
-                            .expect("Bad scheduling policy in DB"),
-                        listen_http_addr: n.listen_http_addr,
-                        listen_http_port: n.listen_http_port as u16,
-                        listen_pg_addr: n.listen_pg_addr,
-                        listen_pg_port: n.listen_pg_port as u16,
-                    })
-                    .collect::<Vec<Node>>())
-            })
-            .await?;
-
-        if nodes.is_empty() {
-            return self.list_nodes_local_env().await;
-        }
-
-        tracing::info!("list_nodes: loaded {} nodes", nodes.len());
-
-        Ok(nodes)
-    }
-
-    /// Shim for automated compatibility tests: load nodes from LocalEnv instead of database
-    pub(crate) async fn list_nodes_local_env(&self) -> DatabaseResult<Vec<Node>> {
-        // Enable test_backward_compatibility to work by populating our list of
+        // XXX hack: enable test_backward_compatibility to work by populating our list of
        // nodes from LocalEnv when it is not present in persistent storage.  Otherwise at
        // first startup in the compat test, we may have shards but no nodes.
-        use control_plane::local_env::LocalEnv;
-        let env = LocalEnv::load_config().map_err(|e| DatabaseError::Logical(format!("{e}")))?;
+        let mut result = Vec::new();
        tracing::info!(
-            "Loading {} pageserver nodes from LocalEnv",
+            "Loaded {} pageserver nodes from LocalEnv",
            env.pageservers.len()
        );
-        let mut nodes = Vec::new();
        for ps_conf in env.pageservers {
            let (pg_host, pg_port) =
                parse_host_port(&ps_conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
            let (http_host, http_port) = parse_host_port(&ps_conf.listen_http_addr)
                .expect("Unable to parse listen_http_addr");
-            let node = Node {
+            result.push(Node {
                id: ps_conf.id,
                listen_pg_addr: pg_host.to_string(),
                listen_pg_port: pg_port.unwrap_or(5432),
@@ -163,96 +141,16 @@ impl Persistence {
                listen_http_port: http_port.unwrap_or(80),
                availability: NodeAvailability::Active,
                scheduling: NodeSchedulingPolicy::Active,
-            };
-
-            // Synchronize database with what we learn from LocalEnv
-            self.insert_node(&node).await?;
-
-            nodes.push(node);
+            });
        }

-        Ok(nodes)
+        Ok(result)
    }

-    /// At startup, load the high level state for shards, such as their config + policy.  This will
-    /// be enriched at runtime with state discovered on pageservers.
-    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
-        let loaded = self
-            .with_conn(move |conn| -> DatabaseResult<_> {
-                Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
-            })
-            .await?;
-
-        if loaded.is_empty() {
-            if let Some(path) = &self.json_path {
-                if tokio::fs::try_exists(path)
-                    .await
-                    .map_err(|e| DatabaseError::Logical(format!("Error stat'ing JSON file: {e}")))?
-                {
-                    tracing::info!("Importing from legacy JSON format at {path}");
-                    return self.list_tenant_shards_json(path).await;
-                }
-            }
-        }
-        Ok(loaded)
-    }
-
-    /// Shim for automated compatibility tests: load tenants from a JSON file instead of database
-    pub(crate) async fn list_tenant_shards_json(
-        &self,
-        path: &Utf8Path,
-    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
-        let bytes = tokio::fs::read(path)
-            .await
-            .map_err(|e| DatabaseError::Logical(format!("Failed to load JSON: {e}")))?;
-
-        let mut decoded = serde_json::from_slice::<JsonPersistence>(&bytes)
-            .map_err(|e| DatabaseError::Logical(format!("Deserialization error: {e}")))?;
-        for (tenant_id, tenant) in &mut decoded.tenants {
-            // Backward compat: an old attachments.json from before PR #6251, replace
-            // empty strings with proper defaults.
-            if tenant.tenant_id.is_empty() {
-                tenant.tenant_id = tenant_id.to_string();
-                tenant.config = serde_json::to_string(&TenantConfig::default())
-                    .map_err(|e| DatabaseError::Logical(format!("Serialization error: {e}")))?;
-                tenant.placement_policy = serde_json::to_string(&PlacementPolicy::default())
-                    .map_err(|e| DatabaseError::Logical(format!("Serialization error: {e}")))?;
-            }
-        }
-
-        let tenants: Vec<TenantShardPersistence> = decoded.tenants.into_values().collect();
-
-        // Synchronize database with what is in the JSON file
-        self.insert_tenant_shards(tenants.clone()).await?;
-
-        Ok(tenants)
-    }
-
-    /// For use in testing environments, where we dump out JSON on shutdown.
-    pub async fn write_tenants_json(&self) -> anyhow::Result<()> {
-        let Some(path) = &self.json_path else {
-            anyhow::bail!("Cannot write JSON if path isn't set (test environment bug)");
-        };
-        tracing::info!("Writing state to {path}...");
-        let tenants = self.list_tenant_shards().await?;
-        let mut tenants_map = HashMap::new();
-        for tsp in tenants {
-            let tenant_shard_id = TenantShardId {
-                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
-                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount(tsp.shard_count as u8),
-            };
-
-            tenants_map.insert(tenant_shard_id, tsp);
-        }
-        let json = serde_json::to_string(&JsonPersistence {
-            tenants: tenants_map,
-        })?;
-
-        tokio::fs::write(path, &json).await?;
-        tracing::info!("Wrote {} bytes to {path}...", json.len());
-
-        Ok(())
+    /// At startup, we populate our map of tenant shards from persistent storage.
+    pub(crate) async fn list_tenant_shards(&self) -> anyhow::Result<Vec<TenantShardPersistence>> {
+        let locked = self.state.lock().unwrap();
+        Ok(locked.tenants.values().cloned().collect())
    }

    /// Tenants must be persisted before we schedule them for the first time.  This enables us
@@ -260,77 +158,24 @@ impl Persistence {
    pub(crate) async fn insert_tenant_shards(
        &self,
        shards: Vec<TenantShardPersistence>,
-    ) -> DatabaseResult<()> {
-        use crate::schema::tenant_shards::dsl::*;
-        self.with_conn(move |conn| -> DatabaseResult<()> {
-            conn.transaction(|conn| -> QueryResult<()> {
-                for tenant in &shards {
-                    diesel::insert_into(tenant_shards)
-                        .values(tenant)
-                        .execute(conn)?;
-                }
-                Ok(())
-            })?;
-            Ok(())
-        })
-        .await
-    }
+    ) -> anyhow::Result<()> {
+        let write = {
+            let mut locked = self.state.lock().unwrap();
+            for shard in shards {
+                let tenant_shard_id = TenantShardId {
+                    tenant_id: TenantId::from_str(shard.tenant_id.as_str())?,
+                    shard_number: ShardNumber(shard.shard_number as u8),
+                    shard_count: ShardCount(shard.shard_count as u8),
+                };

-    /// Ordering: call this _after_ deleting the tenant on pageservers, but _before_ dropping state for
-    /// the tenant from memory on this server.
-    #[allow(unused)]
-    pub(crate) async fn delete_tenant(&self, del_tenant_id: TenantId) -> DatabaseResult<()> {
-        use crate::schema::tenant_shards::dsl::*;
-        self.with_conn(move |conn| -> DatabaseResult<()> {
-            diesel::delete(tenant_shards)
-                .filter(tenant_id.eq(del_tenant_id.to_string()))
-                .execute(conn)?;
+                locked.tenants.insert(tenant_shard_id, shard);
+            }
+            locked.save()
+        };

-            Ok(())
-        })
-        .await
-    }
+        write.commit().await?;

-    /// When a tenant invokes the /re-attach API, this function is responsible for doing an efficient
-    /// batched increment of the generations of all tenants whose generation_pageserver is equal to
-    /// the node that called /re-attach.
-    #[tracing::instrument(skip_all, fields(node_id))]
-    pub(crate) async fn re_attach(
-        &self,
-        node_id: NodeId,
-    ) -> DatabaseResult<HashMap<TenantShardId, Generation>> {
-        use crate::schema::tenant_shards::dsl::*;
-        let updated = self
-            .with_conn(move |conn| {
-                let rows_updated = diesel::update(tenant_shards)
-                    .filter(generation_pageserver.eq(node_id.0 as i64))
-                    .set(generation.eq(generation + 1))
-                    .execute(conn)?;
-
-                tracing::info!("Incremented {} tenants' generations", rows_updated);
-
-                // TODO: UPDATE+SELECT in one query
-
-                let updated = tenant_shards
-                    .filter(generation_pageserver.eq(node_id.0 as i64))
-                    .select(TenantShardPersistence::as_select())
-                    .load(conn)?;
-                Ok(updated)
-            })
-            .await?;
-
-        let mut result = HashMap::new();
-        for tsp in updated {
-            let tenant_shard_id = TenantShardId {
-                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())
-                    .map_err(|e| DatabaseError::Logical(format!("Malformed tenant id: {e}")))?,
-                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount(tsp.shard_count as u8),
-            };
-            result.insert(tenant_shard_id, Generation::new(tsp.generation as u32));
-        }
-
-        Ok(result)
+        Ok(())
    }

    /// Reconciler calls this immediately before attaching to a new pageserver, to acquire a unique, monotonically
@@ -339,48 +184,49 @@ impl Persistence {
    pub(crate) async fn increment_generation(
        &self,
        tenant_shard_id: TenantShardId,
-        node_id: NodeId,
+        node_id: Option<NodeId>,
    ) -> anyhow::Result<Generation> {
-        use crate::schema::tenant_shards::dsl::*;
-        let updated = self
-            .with_conn(move |conn| {
-                let updated = diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
-                    .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                    .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
-                    .set((
-                        generation.eq(generation + 1),
-                        generation_pageserver.eq(node_id.0 as i64),
-                    ))
-                    // TODO: only returning() the generation column
-                    .returning(TenantShardPersistence::as_returning())
-                    .get_result(conn)?;
+        let (write, gen) = {
+            let mut locked = self.state.lock().unwrap();
+            let Some(shard) = locked.tenants.get_mut(&tenant_shard_id) else {
+                anyhow::bail!("Tried to increment generation of unknown shard");
+            };

-                Ok(updated)
-            })
-            .await?;
+            // If we're called with a None pageserver, we need only update the generation
+            // record to disassociate it with this pageserver, not actually increment the number, as
+            // the increment is guaranteed to happen the next time this tenant is attached.
+            if node_id.is_some() {
+                shard.generation += 1;
+            }

-        Ok(Generation::new(updated.generation as u32))
+            shard.generation_pageserver = node_id;
+            let gen = Generation::new(shard.generation);
+            (locked.save(), gen)
+        };
+
+        write.commit().await?;
+        Ok(gen)
    }

-    pub(crate) async fn detach(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
-        use crate::schema::tenant_shards::dsl::*;
-        self.with_conn(move |conn| {
-            let updated = diesel::update(tenant_shards)
-                .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
-                .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                .filter(shard_count.eq(tenant_shard_id.shard_count.0 as i32))
-                .set((
-                    generation_pageserver.eq(i64::MAX),
-                    placement_policy.eq(serde_json::to_string(&PlacementPolicy::Detached).unwrap()),
-                ))
-                .execute(conn)?;
+    pub(crate) async fn re_attach(
+        &self,
+        node_id: NodeId,
+    ) -> anyhow::Result<HashMap<TenantShardId, Generation>> {
+        let (write, result) = {
+            let mut result = HashMap::new();
+            let mut locked = self.state.lock().unwrap();
+            for (tenant_shard_id, shard) in locked.tenants.iter_mut() {
+                if shard.generation_pageserver == Some(node_id) {
+                    shard.generation += 1;
+                    result.insert(*tenant_shard_id, Generation::new(shard.generation));
+                }
+            }

-            Ok(updated)
-        })
-        .await?;
+            (locked.save(), result)
+        };

-        Ok(())
+        write.commit().await?;
+        Ok(result)
    }

    // TODO: when we start shard splitting, we must durably mark the tenant so that
@@ -400,8 +246,7 @@ impl Persistence {
 }

 /// Parts of [`crate::tenant_state::TenantState`] that are stored durably
-#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone)]
-#[diesel(table_name = crate::schema::tenant_shards)]
+#[derive(Serialize, Deserialize, Clone)]
 pub(crate) struct TenantShardPersistence {
    #[serde(default)]
    pub(crate) tenant_id: String,
@@ -412,28 +257,16 @@ pub(crate) struct TenantShardPersistence {
    #[serde(default)]
    pub(crate) shard_stripe_size: i32,

-    // Latest generation number: next time we attach, increment this
-    // and use the incremented number when attaching
-    pub(crate) generation: i32,
-
    // Currently attached pageserver
    #[serde(rename = "pageserver")]
-    pub(crate) generation_pageserver: i64,
+    pub(crate) generation_pageserver: Option<NodeId>,
+
+    // Latest generation number: next time we attach, increment this
+    // and use the incremented number when attaching
+    pub(crate) generation: u32,

    #[serde(default)]
    pub(crate) placement_policy: String,
    #[serde(default)]
    pub(crate) config: String,
 }
-
-/// Parts of [`crate::node::Node`] that are stored durably
-#[derive(Serialize, Deserialize, Queryable, Selectable, Insertable)]
-#[diesel(table_name = crate::schema::nodes)]
-pub(crate) struct NodePersistence {
-    pub(crate) node_id: i64,
-    pub(crate) scheduling_policy: String,
-    pub(crate) listen_http_addr: String,
-    pub(crate) listen_http_port: i32,
-    pub(crate) listen_pg_addr: String,
-    pub(crate) listen_pg_port: i32,
-}
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -296,7 +296,7 @@ impl Reconciler {
        // Increment generation before attaching to new pageserver
        self.generation = self
            .persistence
-            .increment_generation(self.tenant_shard_id, dest_ps_id)
+            .increment_generation(self.tenant_shard_id, Some(dest_ps_id))
            .await?;

        let dest_conf = build_location_config(
@@ -395,7 +395,7 @@ impl Reconciler {
                    // as locations with unknown (None) observed state.
                    self.generation = self
                        .persistence
-                        .increment_generation(self.tenant_shard_id, node_id)
+                        .increment_generation(self.tenant_shard_id, Some(node_id))
                        .await?;
                    wanted_conf.generation = self.generation.into();
                    tracing::info!("Observed configuration requires update.");
--- a/control_plane/attachment_service/src/schema.rs
+++ b/control_plane/attachment_service/src/schema.rs
@@ -1,27 +0,0 @@
-// @generated automatically by Diesel CLI.
-
-diesel::table! {
-    nodes (node_id) {
-        node_id -> Int8,
-        scheduling_policy -> Varchar,
-        listen_http_addr -> Varchar,
-        listen_http_port -> Int4,
-        listen_pg_addr -> Varchar,
-        listen_pg_port -> Int4,
-    }
-}
-
-diesel::table! {
-    tenant_shards (tenant_id, shard_number, shard_count) {
-        tenant_id -> Varchar,
-        shard_number -> Int4,
-        shard_count -> Int4,
-        shard_stripe_size -> Int4,
-        generation -> Int4,
-        generation_pageserver -> Int8,
-        placement_policy -> Varchar,
-        config -> Text,
-    }
-}
-
-diesel::allow_tables_to_appear_in_same_query!(nodes, tenant_shards,);
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
@@ -11,7 +11,6 @@ use control_plane::attachment_service::{
    TenantCreateResponseShard, TenantLocateResponse, TenantLocateResponseShard,
    TenantShardMigrateRequest, TenantShardMigrateResponse,
 };
-use diesel::result::DatabaseErrorKind;
 use hyper::StatusCode;
 use pageserver_api::{
    control_api::{
@@ -27,7 +26,6 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api;
 use utils::{
-    completion::Barrier,
    generation::Generation,
    http::error::ApiError,
    id::{NodeId, TenantId},
@@ -37,7 +35,7 @@ use utils::{
 use crate::{
    compute_hook::ComputeHook,
    node::Node,
-    persistence::{DatabaseError, Persistence, TenantShardPersistence},
+    persistence::{Persistence, TenantShardPersistence},
    scheduler::Scheduler,
    tenant_state::{
        IntentState, ObservedState, ObservedStateLocation, ReconcileResult, ReconcileWaitError,
@@ -48,10 +46,6 @@ use crate::{

 const RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);

-/// How long [`Service::startup_reconcile`] is allowed to take before it should give
-/// up on unresponsive pageservers and proceed.
-pub(crate) const STARTUP_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30);
-
 // Top level state available to all HTTP handlers
 struct ServiceState {
    tenants: BTreeMap<TenantShardId, TenantState>,
@@ -85,27 +79,10 @@ pub struct Config {
    pub jwt_token: Option<String>,
 }

-impl From<DatabaseError> for ApiError {
-    fn from(err: DatabaseError) -> ApiError {
-        match err {
-            DatabaseError::Query(e) => ApiError::InternalServerError(e.into()),
-            // FIXME: ApiError doesn't have an Unavailable variant, but ShuttingDown maps to 503.
-            DatabaseError::Connection(_e) => ApiError::ShuttingDown,
-            DatabaseError::Logical(reason) => {
-                ApiError::InternalServerError(anyhow::anyhow!(reason))
-            }
-        }
-    }
-}
-
 pub struct Service {
    inner: Arc<std::sync::RwLock<ServiceState>>,
    config: Config,
    persistence: Arc<Persistence>,
-
-    /// This waits for initial reconciliation with pageservers to complete.  Until this barrier
-    /// passes, it isn't safe to do any actions that mutate tenants.
-    pub(crate) startup_complete: Barrier,
 }

 impl From<ReconcileWaitError> for ApiError {
@@ -119,32 +96,77 @@ impl From<ReconcileWaitError> for ApiError {
 }

 impl Service {
-    pub fn get_config(&self) -> &Config {
-        &self.config
-    }
+    pub async fn spawn(config: Config, persistence: Arc<Persistence>) -> anyhow::Result<Arc<Self>> {
+        let (result_tx, mut result_rx) = tokio::sync::mpsc::unbounded_channel();
+
+        tracing::info!("Loading nodes from database...");
+        let mut nodes = persistence.list_nodes().await?;
+        tracing::info!("Loaded {} nodes from database.", nodes.len());
+
+        tracing::info!("Loading shards from database...");
+        let tenant_shard_persistence = persistence.list_tenant_shards().await?;
+        tracing::info!(
+            "Loaded {} shards from database.",
+            tenant_shard_persistence.len()
+        );
+
+        let mut tenants = BTreeMap::new();
+
+        for tsp in tenant_shard_persistence {
+            let tenant_shard_id = TenantShardId {
+                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
+                shard_number: ShardNumber(tsp.shard_number as u8),
+                shard_count: ShardCount(tsp.shard_count as u8),
+            };
+            let shard_identity = if tsp.shard_count == 0 {
+                ShardIdentity::unsharded()
+            } else {
+                ShardIdentity::new(
+                    ShardNumber(tsp.shard_number as u8),
+                    ShardCount(tsp.shard_count as u8),
+                    ShardStripeSize(tsp.shard_stripe_size as u32),
+                )?
+            };
+            let new_tenant = TenantState {
+                tenant_shard_id,
+                shard: shard_identity,
+                sequence: Sequence::initial(),
+                // Note that we load generation, but don't care about generation_pageserver.  We will either end up finding
+                // our existing attached location and it will match generation_pageserver, or we will attach somewhere new
+                // and update generation_pageserver in the process.
+                generation: Generation::new(tsp.generation),
+                policy: serde_json::from_str(&tsp.placement_policy).unwrap(),
+                intent: IntentState::new(),
+                observed: ObservedState::new(),
+                config: serde_json::from_str(&tsp.config).unwrap(),
+                reconciler: None,
+                waiter: Arc::new(SeqWait::new(Sequence::initial())),
+                error_waiter: Arc::new(SeqWait::new(Sequence::initial())),
+                last_error: Arc::default(),
+            };
+
+            tenants.insert(tenant_shard_id, new_tenant);
+        }

-    /// TODO: don't allow other API calls until this is done, don't start doing any background housekeeping
-    /// until this is done.
-    async fn startup_reconcile(&self) {
        // For all tenant shards, a vector of observed states on nodes (where None means
        // indeterminate, same as in [`ObservedStateLocation`])
        let mut observed = HashMap::new();

-        let nodes = {
-            let locked = self.inner.read().unwrap();
-            locked.nodes.clone()
-        };
-
        // TODO: issue these requests concurrently
-        for node in nodes.values() {
-            let client = mgmt_api::Client::new(node.base_url(), self.config.jwt_token.as_deref());
+        for node in &mut nodes {
+            let client = mgmt_api::Client::new(node.base_url(), config.jwt_token.as_deref());

            tracing::info!("Scanning shards on node {}...", node.id);
            match client.list_location_config().await {
                Err(e) => {
                    tracing::warn!("Could not contact pageserver {} ({e})", node.id);
-                    // TODO: be more tolerant, apply a generous 5-10 second timeout with retries, in case
-                    // pageserver is being restarted at the same time as we are
+                    // TODO: be more tolerant, apply a generous 5-10 second timeout
+                    // TODO: setting a node to Offline is a dramatic thing to do, and can
+                    // prevent neon_local from starting up (it starts this service before
+                    // any pageservers are  running).  It may make sense to give nodes
+                    // a Pending state to accomodate this situation, and allow (but deprioritize)
+                    // scheduling on Pending nodes.
+                    //node.availability = NodeAvailability::Offline;
                }
                Ok(listing) => {
                    tracing::info!(
@@ -152,6 +174,7 @@ impl Service {
                        listing.tenant_shards.len(),
                        node.id
                    );
+                    node.availability = NodeAvailability::Active;

                    for (tenant_shard_id, conf_opt) in listing.tenant_shards {
                        observed.insert(tenant_shard_id, (node.id, conf_opt));
@@ -163,46 +186,41 @@ impl Service {
        let mut cleanup = Vec::new();

        // Populate intent and observed states for all tenants, based on reported state on pageservers
-        let shard_count = {
-            let mut locked = self.inner.write().unwrap();
-            for (tenant_shard_id, (node_id, observed_loc)) in observed {
-                let Some(tenant_state) = locked.tenants.get_mut(&tenant_shard_id) else {
-                    cleanup.push((tenant_shard_id, node_id));
-                    continue;
-                };
+        for (tenant_shard_id, (node_id, observed_loc)) in observed {
+            let Some(tenant_state) = tenants.get_mut(&tenant_shard_id) else {
+                cleanup.push((tenant_shard_id, node_id));
+                continue;
+            };

-                tenant_state
-                    .observed
-                    .locations
-                    .insert(node_id, ObservedStateLocation { conf: observed_loc });
+            tenant_state
+                .observed
+                .locations
+                .insert(node_id, ObservedStateLocation { conf: observed_loc });
+        }
+
+        // State of nodes is now frozen, transform to a HashMap.
+        let mut nodes: HashMap<NodeId, Node> = nodes.into_iter().map(|n| (n.id, n)).collect();
+
+        // Populate each tenant's intent state
+        let mut scheduler = Scheduler::new(&tenants, &nodes);
+        for (tenant_shard_id, tenant_state) in tenants.iter_mut() {
+            tenant_state.intent_from_observed();
+            if let Err(e) = tenant_state.schedule(&mut scheduler) {
+                // Non-fatal error: we are unable to properly schedule the tenant, perhaps because
+                // not enough pageservers are available.  The tenant may well still be available
+                // to clients.
+                tracing::error!("Failed to schedule tenant {tenant_shard_id} at startup: {e}");
            }
-
-            // Populate each tenant's intent state
-            let mut scheduler = Scheduler::new(&locked.tenants, &nodes);
-            for (tenant_shard_id, tenant_state) in locked.tenants.iter_mut() {
-                tenant_state.intent_from_observed();
-                if let Err(e) = tenant_state.schedule(&mut scheduler) {
-                    // Non-fatal error: we are unable to properly schedule the tenant, perhaps because
-                    // not enough pageservers are available.  The tenant may well still be available
-                    // to clients.
-                    tracing::error!("Failed to schedule tenant {tenant_shard_id} at startup: {e}");
-                }
-            }
-
-            locked.tenants.len()
-        };
-
-        // TODO: if any tenant's intent now differs from its loaded generation_pageserver, we should clear that
-        // generation_pageserver in the database.
+        }

        // Clean up any tenants that were found on pageservers but are not known to us.
        for (tenant_shard_id, node_id) in cleanup {
            // A node reported a tenant_shard_id which is unknown to us: detach it.
            let node = nodes
-                .get(&node_id)
+                .get_mut(&node_id)
                .expect("Always exists: only known nodes are scanned");

-            let client = mgmt_api::Client::new(node.base_url(), self.config.jwt_token.as_deref());
+            let client = mgmt_api::Client::new(node.base_url(), config.jwt_token.as_deref());
            match client
                .location_config(
                    tenant_shard_id,
@@ -234,80 +252,13 @@ impl Service {
            }
        }

-        // Finally, now that the service is up and running, launch reconcile operations for any tenants
-        // which require it: under normal circumstances this should only include tenants that were in some
-        // transient state before we restarted.
-        let reconcile_tasks = self.reconcile_all();
-        tracing::info!("Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)");
-    }
-
-    pub async fn spawn(config: Config, persistence: Arc<Persistence>) -> anyhow::Result<Arc<Self>> {
-        let (result_tx, mut result_rx) = tokio::sync::mpsc::unbounded_channel();
-
-        tracing::info!("Loading nodes from database...");
-        let nodes = persistence.list_nodes().await?;
-        let nodes: HashMap<NodeId, Node> = nodes.into_iter().map(|n| (n.id, n)).collect();
-        tracing::info!("Loaded {} nodes from database.", nodes.len());
-
-        tracing::info!("Loading shards from database...");
-        let tenant_shard_persistence = persistence.list_tenant_shards().await?;
-        tracing::info!(
-            "Loaded {} shards from database.",
-            tenant_shard_persistence.len()
-        );
-
-        let mut tenants = BTreeMap::new();
-
-        for tsp in tenant_shard_persistence {
-            let tenant_shard_id = TenantShardId {
-                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
-                shard_number: ShardNumber(tsp.shard_number as u8),
-                shard_count: ShardCount(tsp.shard_count as u8),
-            };
-            let shard_identity = if tsp.shard_count == 0 {
-                ShardIdentity::unsharded()
-            } else {
-                ShardIdentity::new(
-                    ShardNumber(tsp.shard_number as u8),
-                    ShardCount(tsp.shard_count as u8),
-                    ShardStripeSize(tsp.shard_stripe_size as u32),
-                )?
-            };
-
-            // We will populate intent properly later in [`Self::startup_reconcile`], initially populate
-            // it with what we can infer: the node for which a generation was most recently issued.
-            let mut intent = IntentState::new();
-            if tsp.generation_pageserver != i64::MAX {
-                intent.attached = Some(NodeId(tsp.generation_pageserver as u64))
-            }
-
-            let new_tenant = TenantState {
-                tenant_shard_id,
-                shard: shard_identity,
-                sequence: Sequence::initial(),
-                generation: Generation::new(tsp.generation as u32),
-                policy: serde_json::from_str(&tsp.placement_policy).unwrap(),
-                intent,
-                observed: ObservedState::new(),
-                config: serde_json::from_str(&tsp.config).unwrap(),
-                reconciler: None,
-                waiter: Arc::new(SeqWait::new(Sequence::initial())),
-                error_waiter: Arc::new(SeqWait::new(Sequence::initial())),
-                last_error: Arc::default(),
-            };
-
-            tenants.insert(tenant_shard_id, new_tenant);
-        }
-
-        let (startup_completion, startup_complete) = utils::completion::channel();
-
+        let shard_count = tenants.len();
        let this = Arc::new(Self {
            inner: Arc::new(std::sync::RwLock::new(ServiceState::new(
                result_tx, nodes, tenants,
            ))),
            config,
            persistence,
-            startup_complete,
        });

        let result_task_this = this.clone();
@@ -365,13 +316,11 @@ impl Service {
            }
        });

-        let startup_reconcile_this = this.clone();
-        tokio::task::spawn(async move {
-            // Block the [`Service::startup_complete`] barrier until we're done
-            let _completion = startup_completion;
-
-            startup_reconcile_this.startup_reconcile().await
-        });
+        // Finally, now that the service is up and running, launch reconcile operations for any tenants
+        // which require it: under normal circumstances this should only include tenants that were in some
+        // transient state before we restarted.
+        let reconcile_tasks = this.reconcile_all();
+        tracing::info!("Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)");

        Ok(this)
    }
@@ -387,6 +336,7 @@ impl Service {
            let locked = self.inner.write().unwrap();
            !locked.tenants.contains_key(&attach_req.tenant_shard_id)
        };
+
        if insert {
            let tsp = TenantShardPersistence {
                tenant_id: attach_req.tenant_shard_id.tenant_id.to_string(),
@@ -394,49 +344,31 @@ impl Service {
                shard_count: attach_req.tenant_shard_id.shard_count.0 as i32,
                shard_stripe_size: 0,
                generation: 0,
-                generation_pageserver: i64::MAX,
+                generation_pageserver: None,
                placement_policy: serde_json::to_string(&PlacementPolicy::default()).unwrap(),
                config: serde_json::to_string(&TenantConfig::default()).unwrap(),
            };

-            match self.persistence.insert_tenant_shards(vec![tsp]).await {
-                Err(e) => match e {
-                    DatabaseError::Query(diesel::result::Error::DatabaseError(
-                        DatabaseErrorKind::UniqueViolation,
-                        _,
-                    )) => {
-                        tracing::info!(
-                            "Raced with another request to insert tenant {}",
-                            attach_req.tenant_shard_id
-                        )
-                    }
-                    _ => return Err(e.into()),
-                },
-                Ok(()) => {
-                    tracing::info!("Inserted shard {} in database", attach_req.tenant_shard_id);
+            self.persistence.insert_tenant_shards(vec![tsp]).await?;

-                    let mut locked = self.inner.write().unwrap();
-                    locked.tenants.insert(
-                        attach_req.tenant_shard_id,
-                        TenantState::new(
-                            attach_req.tenant_shard_id,
-                            ShardIdentity::unsharded(),
-                            PlacementPolicy::Single,
-                        ),
-                    );
-                    tracing::info!("Inserted shard {} in memory", attach_req.tenant_shard_id);
-                }
-            }
+            let mut locked = self.inner.write().unwrap();
+            locked.tenants.insert(
+                attach_req.tenant_shard_id,
+                TenantState::new(
+                    attach_req.tenant_shard_id,
+                    ShardIdentity::unsharded(),
+                    PlacementPolicy::Single,
+                ),
+            );
        }

-        let new_generation = if let Some(req_node_id) = attach_req.node_id {
+        let new_generation = if attach_req.node_id.is_some() {
            Some(
                self.persistence
-                    .increment_generation(attach_req.tenant_shard_id, req_node_id)
+                    .increment_generation(attach_req.tenant_shard_id, attach_req.node_id)
                    .await?,
            )
        } else {
-            self.persistence.detach(attach_req.tenant_shard_id).await?;
            None
        };

@@ -448,11 +380,6 @@ impl Service {

        if let Some(new_generation) = new_generation {
            tenant_state.generation = new_generation;
-        } else {
-            // This is a detach notification.  We must update placement policy to avoid re-attaching
-            // during background scheduling/reconciliation, or during attachment service restart.
-            assert!(attach_req.node_id.is_none());
-            tenant_state.policy = PlacementPolicy::Detached;
        }

        if let Some(attaching_pageserver) = attach_req.node_id.as_ref() {
@@ -480,7 +407,6 @@ impl Service {
            "attach_hook: tenant {} set generation {:?}, pageserver {}",
            attach_req.tenant_shard_id,
            tenant_state.generation,
-            // TODO: this is an odd number of 0xf's
            attach_req.node_id.unwrap_or(utils::id::NodeId(0xfffffff))
        );

@@ -573,14 +499,6 @@ impl Service {
                    id: req_tenant.id,
                    valid,
                });
-            } else {
-                // After tenant deletion, we may approve any validation.  This avoids
-                // spurious warnings on the pageserver if it has pending LSN updates
-                // at the point a deletion happens.
-                response.tenants.push(ValidateResponseTenant {
-                    id: req_tenant.id,
-                    valid: true,
-                });
            }
        }
        response
@@ -636,7 +554,7 @@ impl Service {
                shard_count: tenant_shard_id.shard_count.0 as i32,
                shard_stripe_size: create_req.shard_parameters.stripe_size.0 as i32,
                generation: 0,
-                generation_pageserver: i64::MAX,
+                generation_pageserver: None,
                placement_policy: serde_json::to_string(&placement_policy).unwrap(),
                config: serde_json::to_string(&create_req.config).unwrap(),
            })
@@ -950,6 +868,7 @@ impl Service {
            } else {
                let old_attached = shard.intent.attached;

+                shard.intent.attached = Some(migrate_req.node_id);
                match shard.policy {
                    PlacementPolicy::Single => {
                        shard.intent.secondary.clear();
@@ -963,13 +882,7 @@ impl Service {
                            shard.intent.secondary.push(old_attached);
                        }
                    }
-                    PlacementPolicy::Detached => {
-                        return Err(ApiError::BadRequest(anyhow::anyhow!(
-                            "Cannot migrate a tenant that is PlacementPolicy::Detached: configure it to an attached policy first"
-                        )))
-                    }
                }
-                shard.intent.attached = Some(migrate_req.node_id);

                tracing::info!("Migrating: new intent {:?}", shard.intent);
                shard.sequence = shard.sequence.next();
@@ -1042,7 +955,10 @@ impl Service {
            availability: NodeAvailability::Active,
        };
        // TODO: idempotency if the node already exists in the database
-        self.persistence.insert_node(&new_node).await?;
+        self.persistence
+            .insert_node(&new_node)
+            .await
+            .map_err(ApiError::InternalServerError)?;

        let mut locked = self.inner.write().unwrap();
        let mut new_nodes = (*locked.nodes).clone();
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -312,18 +312,6 @@ impl TenantState {
                    modified = true;
                }
            }
-            Detached => {
-                // Should have no attached or secondary pageservers
-                if self.intent.attached.is_some() {
-                    self.intent.attached = None;
-                    modified = true;
-                }
-
-                if !self.intent.secondary.is_empty() {
-                    self.intent.secondary.clear();
-                    modified = true;
-                }
-            }
        }

        if modified {
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -1,11 +1,5 @@
 use crate::{background_process, local_env::LocalEnv};
-use camino::{Utf8Path, Utf8PathBuf};
-use diesel::{
-    backend::Backend,
-    query_builder::{AstPass, QueryFragment, QueryId},
-    Connection, PgConnection, QueryResult, RunQueryDsl,
-};
-use diesel_migrations::{HarnessWithOutput, MigrationHarness};
+use camino::Utf8PathBuf;
 use hyper::Method;
 use pageserver_api::{
    models::{ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo},
@@ -13,9 +7,9 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
+use postgres_connection::parse_host_port;
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{env, str::FromStr};
-use tokio::process::Command;
+use std::{path::PathBuf, process::Child, str::FromStr};
 use tracing::instrument;
 use utils::{
    auth::{Claims, Scope},
@@ -25,17 +19,14 @@ use utils::{
 pub struct AttachmentService {
    env: LocalEnv,
    listen: String,
-    path: Utf8PathBuf,
+    path: PathBuf,
    jwt_token: Option<String>,
    public_key_path: Option<Utf8PathBuf>,
-    postgres_port: u16,
    client: reqwest::Client,
 }

 const COMMAND: &str = "attachment_service";

-const ATTACHMENT_SERVICE_POSTGRES_VERSION: u32 = 16;
-
 #[derive(Serialize, Deserialize)]
 pub struct AttachHookRequest {
    pub tenant_shard_id: TenantShardId,
@@ -178,9 +169,7 @@ pub struct TenantShardMigrateResponse {}

 impl AttachmentService {
    pub fn from_env(env: &LocalEnv) -> Self {
-        let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
-            .unwrap()
-            .join("attachments.json");
+        let path = env.base_data_dir.join("attachments.json");

        // Makes no sense to construct this if pageservers aren't going to use it: assume
        // pageservers have control plane API set
@@ -192,13 +181,6 @@ impl AttachmentService {
            listen_url.port().unwrap()
        );

-        // Convention: NeonEnv in python tests reserves the next port after the control_plane_api
-        // port, for use by our captive postgres.
-        let postgres_port = listen_url
-            .port()
-            .expect("Control plane API setting should always have a port")
-            + 1;
-
        // Assume all pageservers have symmetric auth configuration: this service
        // expects to use one JWT token to talk to all of them.
        let ps_conf = env
@@ -227,7 +209,6 @@ impl AttachmentService {
            listen,
            jwt_token,
            public_key_path,
-            postgres_port,
            client: reqwest::ClientBuilder::new()
                .build()
                .expect("Failed to construct http client"),
@@ -239,214 +220,13 @@ impl AttachmentService {
            .expect("non-Unicode path")
    }

-    /// PIDFile for the postgres instance used to store attachment service state
-    fn postgres_pid_file(&self) -> Utf8PathBuf {
-        Utf8PathBuf::from_path_buf(
-            self.env
-                .base_data_dir
-                .join("attachment_service_postgres.pid"),
-        )
-        .expect("non-Unicode path")
-    }
+    pub async fn start(&self) -> anyhow::Result<Child> {
+        let path_str = self.path.to_string_lossy();

-    /// In order to access database migrations, we need to find the Neon source tree
-    async fn find_source_root(&self) -> anyhow::Result<Utf8PathBuf> {
-        // We assume that either prd or our binary is in the source tree. The former is usually
-        // true for automated test runners, the latter is usually true for developer workstations. Often
-        // both are true, which is fine.
-        let candidate_start_points = [
-            // Current working directory
-            Utf8PathBuf::from_path_buf(std::env::current_dir()?).unwrap(),
-            // Directory containing the binary we're running inside
-            Utf8PathBuf::from_path_buf(env::current_exe()?.parent().unwrap().to_owned()).unwrap(),
-        ];
-
-        // For each candidate start point, search through ancestors looking for a neon.git source tree root
-        for start_point in &candidate_start_points {
-            // Start from the build dir: assumes we are running out of a built neon source tree
-            for path in start_point.ancestors() {
-                // A crude approximation: the root of the source tree is whatever contains a "control_plane"
-                // subdirectory.
-                let control_plane = path.join("control_plane");
-                if tokio::fs::try_exists(&control_plane).await? {
-                    return Ok(path.to_owned());
-                }
-            }
-        }
-
-        // Fall-through
-        Err(anyhow::anyhow!(
-            "Could not find control_plane src dir, after searching ancestors of {candidate_start_points:?}"
-        ))
-    }
-
-    /// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
-    ///
-    /// This usually uses ATTACHMENT_SERVICE_POSTGRES_VERSION of postgres, but will fall back
-    /// to other versions if that one isn't found.  Some automated tests create circumstances
-    /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
-    pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
-        let prefer_versions = [ATTACHMENT_SERVICE_POSTGRES_VERSION, 15, 14];
-
-        for v in prefer_versions {
-            let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
-            if tokio::fs::try_exists(&path).await? {
-                return Ok(path);
-            }
-        }
-
-        // Fall through
-        anyhow::bail!(
-            "Postgres binaries not found in {}",
-            self.env.pg_distrib_dir.display()
-        );
-    }
-
-    /// Readiness check for our postgres process
-    async fn pg_isready(&self, pg_bin_dir: &Utf8Path) -> anyhow::Result<bool> {
-        let bin_path = pg_bin_dir.join("pg_isready");
-        let args = ["-h", "localhost", "-p", &format!("{}", self.postgres_port)];
-        let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;
-
-        Ok(exitcode.success())
-    }
-
-    /// Create our database if it doesn't exist, and run migrations.
-    ///
-    /// This function is equivalent to the `diesel setup` command in the diesel CLI.  We implement
-    /// the same steps by hand to avoid imposing a dependency on installing diesel-cli for developers
-    /// who just want to run `cargo neon_local` without knowing about diesel.
-    ///
-    /// Returns the database url
-    pub async fn setup_database(&self) -> anyhow::Result<String> {
-        let database_url = format!(
-            "postgresql://localhost:{}/attachment_service",
-            self.postgres_port
-        );
-        println!("Running attachment service database setup...");
-        fn change_database_of_url(database_url: &str, default_database: &str) -> (String, String) {
-            let base = ::url::Url::parse(database_url).unwrap();
-            let database = base.path_segments().unwrap().last().unwrap().to_owned();
-            let mut new_url = base.join(default_database).unwrap();
-            new_url.set_query(base.query());
-            (database, new_url.into())
-        }
-
-        #[derive(Debug, Clone)]
-        pub struct CreateDatabaseStatement {
-            db_name: String,
-        }
-
-        impl CreateDatabaseStatement {
-            pub fn new(db_name: &str) -> Self {
-                CreateDatabaseStatement {
-                    db_name: db_name.to_owned(),
-                }
-            }
-        }
-
-        impl<DB: Backend> QueryFragment<DB> for CreateDatabaseStatement {
-            fn walk_ast<'b>(&'b self, mut out: AstPass<'_, 'b, DB>) -> QueryResult<()> {
-                out.push_sql("CREATE DATABASE ");
-                out.push_identifier(&self.db_name)?;
-                Ok(())
-            }
-        }
-
-        impl<Conn> RunQueryDsl<Conn> for CreateDatabaseStatement {}
-
-        impl QueryId for CreateDatabaseStatement {
-            type QueryId = ();
-
-            const HAS_STATIC_QUERY_ID: bool = false;
-        }
-        if PgConnection::establish(&database_url).is_err() {
-            let (database, postgres_url) = change_database_of_url(&database_url, "postgres");
-            println!("Creating database: {database}");
-            let mut conn = PgConnection::establish(&postgres_url)?;
-            CreateDatabaseStatement::new(&database).execute(&mut conn)?;
-        }
-        let mut conn = PgConnection::establish(&database_url)?;
-
-        let migrations_dir = self
-            .find_source_root()
-            .await?
-            .join("control_plane/attachment_service/migrations");
-
-        let migrations = diesel_migrations::FileBasedMigrations::from_path(migrations_dir)?;
-        println!("Running migrations in {}", migrations.path().display());
-        HarnessWithOutput::write_to_stdout(&mut conn)
-            .run_pending_migrations(migrations)
-            .map(|_| ())
-            .map_err(|e| anyhow::anyhow!(e))?;
-
-        println!("Migrations complete");
-
-        Ok(database_url)
-    }
-
-    pub async fn start(&self) -> anyhow::Result<()> {
-        // Start a vanilla Postgres process used by the attachment service for persistence.
-        let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
-            .unwrap()
-            .join("attachment_service_db");
-        let pg_bin_dir = self.get_pg_bin_dir().await?;
-        let pg_log_path = pg_data_path.join("postgres.log");
-
-        if !tokio::fs::try_exists(&pg_data_path).await? {
-            // Initialize empty database
-            let initdb_path = pg_bin_dir.join("initdb");
-            let mut child = Command::new(&initdb_path)
-                .args(["-D", pg_data_path.as_ref()])
-                .spawn()
-                .expect("Failed to spawn initdb");
-            let status = child.wait().await?;
-            if !status.success() {
-                anyhow::bail!("initdb failed with status {status}");
-            }
-
-            tokio::fs::write(
-                &pg_data_path.join("postgresql.conf"),
-                format!("port = {}", self.postgres_port),
-            )
-            .await?;
-        };
-
-        println!("Starting attachment service database...");
-        let db_start_args = [
-            "-w",
-            "-D",
-            pg_data_path.as_ref(),
-            "-l",
-            pg_log_path.as_ref(),
-            "start",
-        ];
-
-        background_process::start_process(
-            "attachment_service_db",
-            &self.env.base_data_dir,
-            pg_bin_dir.join("pg_ctl").as_std_path(),
-            db_start_args,
-            [],
-            background_process::InitialPidFile::Create(self.postgres_pid_file()),
-            || self.pg_isready(&pg_bin_dir),
-        )
-        .await?;
-
-        // Run migrations on every startup, in case something changed.
-        let database_url = self.setup_database().await?;
-
-        let mut args = vec![
-            "-l",
-            &self.listen,
-            "-p",
-            self.path.as_ref(),
-            "--database-url",
-            &database_url,
-        ]
-        .into_iter()
-        .map(|s| s.to_string())
-        .collect::<Vec<_>>();
+        let mut args = vec!["-l", &self.listen, "-p", &path_str]
+            .into_iter()
+            .map(|s| s.to_string())
+            .collect::<Vec<_>>();
        if let Some(jwt_token) = &self.jwt_token {
            args.push(format!("--jwt-token={jwt_token}"));
        }
@@ -455,7 +235,7 @@ impl AttachmentService {
            args.push(format!("--public-key={public_key_path}"));
        }

-        background_process::start_process(
+        let result = background_process::start_process(
            COMMAND,
            &self.env.base_data_dir,
            &self.env.attachment_service_bin(),
@@ -472,46 +252,29 @@ impl AttachmentService {
                }
            },
        )
-        .await?;
+        .await;

-        Ok(())
-    }
-
-    pub async fn stop(&self, immediate: bool) -> anyhow::Result<()> {
-        background_process::stop_process(immediate, COMMAND, &self.pid_file())?;
-
-        let pg_data_path = self.env.base_data_dir.join("attachment_service_db");
-        let pg_bin_dir = self.get_pg_bin_dir().await?;
-
-        println!("Stopping attachment service database...");
-        let pg_stop_args = ["-D", &pg_data_path.to_string_lossy(), "stop"];
-        let stop_status = Command::new(pg_bin_dir.join("pg_ctl"))
-            .args(pg_stop_args)
-            .spawn()?
-            .wait()
+        for ps_conf in &self.env.pageservers {
+            let (pg_host, pg_port) =
+                parse_host_port(&ps_conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
+            let (http_host, http_port) = parse_host_port(&ps_conf.listen_http_addr)
+                .expect("Unable to parse listen_http_addr");
+            self.node_register(NodeRegisterRequest {
+                node_id: ps_conf.id,
+                listen_pg_addr: pg_host.to_string(),
+                listen_pg_port: pg_port.unwrap_or(5432),
+                listen_http_addr: http_host.to_string(),
+                listen_http_port: http_port.unwrap_or(80),
+            })
            .await?;
-        if !stop_status.success() {
-            let pg_status_args = ["-D", &pg_data_path.to_string_lossy(), "status"];
-            let status_exitcode = Command::new(pg_bin_dir.join("pg_ctl"))
-                .args(pg_status_args)
-                .spawn()?
-                .wait()
-                .await?;
-
-            // pg_ctl status returns this exit code if postgres is not running: in this case it is
-            // fine that stop failed.  Otherwise it is an error that stop failed.
-            const PG_STATUS_NOT_RUNNING: i32 = 3;
-            if Some(PG_STATUS_NOT_RUNNING) == status_exitcode.code() {
-                println!("Attachment service data base is already stopped");
-                return Ok(());
-            } else {
-                anyhow::bail!("Failed to stop attachment service database: {stop_status}")
-            }
        }

-        Ok(())
+        result
    }

+    pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
+        background_process::stop_process(immediate, COMMAND, &self.pid_file())
+    }
    /// Simple HTTP request wrapper for calling into attachment service
    async fn dispatch<RQ, RS>(
        &self,
@@ -593,7 +356,7 @@ impl AttachmentService {
        &self,
        req: TenantCreateRequest,
    ) -> anyhow::Result<TenantCreateResponse> {
-        self.dispatch(Method::POST, "v1/tenant".to_string(), Some(req))
+        self.dispatch(Method::POST, "tenant".to_string(), Some(req))
            .await
    }

@@ -650,7 +413,7 @@ impl AttachmentService {
    ) -> anyhow::Result<TimelineInfo> {
        self.dispatch(
            Method::POST,
-            format!("v1/tenant/{tenant_id}/timeline"),
+            format!("tenant/{tenant_id}/timeline"),
            Some(req),
        )
        .await
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -17,7 +17,7 @@ use std::io::Write;
 use std::os::unix::prelude::AsRawFd;
 use std::os::unix::process::CommandExt;
 use std::path::Path;
-use std::process::Command;
+use std::process::{Child, Command};
 use std::time::Duration;
 use std::{fs, io, thread};

@@ -60,7 +60,7 @@ pub async fn start_process<F, Fut, AI, A, EI>(
    envs: EI,
    initial_pid_file: InitialPidFile,
    process_status_check: F,
-) -> anyhow::Result<()>
+) -> anyhow::Result<Child>
 where
    F: Fn() -> Fut,
    Fut: std::future::Future<Output = anyhow::Result<bool>>,
@@ -98,7 +98,7 @@ where
        InitialPidFile::Expect(path) => path,
    };

-    let spawned_process = filled_cmd.spawn().with_context(|| {
+    let mut spawned_process = filled_cmd.spawn().with_context(|| {
        format!("Could not spawn {process_name}, see console output and log files for details.")
    })?;
    let pid = spawned_process.id();
@@ -106,26 +106,12 @@ where
        i32::try_from(pid)
            .with_context(|| format!("Subprocess {process_name} has invalid pid {pid}"))?,
    );
-    // set up a scopeguard to kill & wait for the child in case we panic or bail below
-    let spawned_process = scopeguard::guard(spawned_process, |mut spawned_process| {
-        println!("SIGKILL & wait the started process");
-        (|| {
-            // TODO: use another signal that can be caught by the child so it can clean up any children it spawned (e..g, walredo).
-            spawned_process.kill().context("SIGKILL child")?;
-            spawned_process.wait().context("wait() for child process")?;
-            anyhow::Ok(())
-        })()
-        .with_context(|| format!("scopeguard kill&wait child {process_name:?}"))
-        .unwrap();
-    });

    for retries in 0..RETRIES {
        match process_started(pid, pid_file_to_check, &process_status_check).await {
            Ok(true) => {
-                println!("\n{process_name} started and passed status check, pid: {pid}");
-                // leak the child process, it'll outlive this neon_local invocation
-                drop(scopeguard::ScopeGuard::into_inner(spawned_process));
-                return Ok(());
+                println!("\n{process_name} started, pid: {pid}");
+                return Ok(spawned_process);
            }
            Ok(false) => {
                if retries == NOTICE_AFTER_RETRIES {
@@ -140,15 +126,16 @@ where
                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
-                println!("error starting process {process_name:?}: {e:#}");
+                println!("{process_name} failed to start: {e:#}");
+                if let Err(e) = spawned_process.kill() {
+                    println!("Could not stop {process_name} subprocess: {e:#}")
+                };
                return Err(e);
            }
        }
    }
    println!();
-    anyhow::bail!(
-        "{process_name} did not start+pass status checks within {RETRY_UNTIL_SECS} seconds"
-    );
+    anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
 }

 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -135,7 +135,7 @@ fn main() -> Result<()> {
            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
            "start" => rt.block_on(handle_start_all(sub_args, &env)),
-            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
+            "stop" => handle_stop_all(sub_args, &env),
            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
            "attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)),
            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
@@ -1056,9 +1056,8 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
 async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
-            let register = subcommand_args.get_one::<bool>("register").unwrap_or(&true);
            if let Err(e) = get_pageserver(env, subcommand_args)?
-                .start(&pageserver_config_overrides(subcommand_args), *register)
+                .start(&pageserver_config_overrides(subcommand_args))
                .await
            {
                eprintln!("pageserver start failed: {e}");
@@ -1087,7 +1086,24 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
            }

            if let Err(e) = pageserver
-                .start(&pageserver_config_overrides(subcommand_args), false)
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
+                eprintln!("pageserver start failed: {e}");
+                exit(1);
+            }
+        }
+
+        Some(("migrate", subcommand_args)) => {
+            let pageserver = get_pageserver(env, subcommand_args)?;
+            //TODO what shutdown strategy should we use here?
+            if let Err(e) = pageserver.stop(false) {
+                eprintln!("pageserver stop failed: {}", e);
+                exit(1);
+            }
+
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
                .await
            {
                eprintln!("pageserver start failed: {e}");
@@ -1145,7 +1161,7 @@ async fn handle_attachment_service(
                .map(|s| s.as_str())
                == Some("immediate");

-            if let Err(e) = svc.stop(immediate).await {
+            if let Err(e) = svc.stop(immediate) {
                eprintln!("stop failed: {}", e);
                exit(1);
            }
@@ -1241,7 +1257,7 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
        let attachment_service = AttachmentService::from_env(env);
        if let Err(e) = attachment_service.start().await {
            eprintln!("attachment_service start failed: {:#}", e);
-            try_stop_all(env, true).await;
+            try_stop_all(env, true);
            exit(1);
        }
    }
@@ -1249,11 +1265,11 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
        if let Err(e) = pageserver
-            .start(&pageserver_config_overrides(sub_match), true)
+            .start(&pageserver_config_overrides(sub_match))
            .await
        {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
-            try_stop_all(env, true).await;
+            try_stop_all(env, true);
            exit(1);
        }
    }
@@ -1262,23 +1278,23 @@ async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
        let safekeeper = SafekeeperNode::from_env(env, node);
        if let Err(e) = safekeeper.start(vec![]).await {
            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
-            try_stop_all(env, false).await;
+            try_stop_all(env, false);
            exit(1);
        }
    }
    Ok(())
 }

-async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    let immediate =
        sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");

-    try_stop_all(env, immediate).await;
+    try_stop_all(env, immediate);

    Ok(())
 }

-async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
+fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
    // Stop all endpoints
    match ComputeControlPlane::load(env.clone()) {
        Ok(cplane) => {
@@ -1313,7 +1329,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {

    if env.control_plane_api.is_some() {
        let attachment_service = AttachmentService::from_env(env);
-        if let Err(e) = attachment_service.stop(immediate).await {
+        if let Err(e) = attachment_service.stop(immediate) {
            eprintln!("attachment service stop failed: {e:#}");
        }
    }
@@ -1533,11 +1549,7 @@ fn cli() -> Command {
                .subcommand(Command::new("status"))
                .subcommand(Command::new("start")
                    .about("Start local pageserver")
-                    .arg(pageserver_config_args.clone()).arg(Arg::new("register")
-                    .long("register")
-                    .default_value("true").required(false)
-                    .value_parser(value_parser!(bool))
-                    .value_name("register"))
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("stop")
                    .about("Stop local pageserver")
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -57,7 +57,7 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;

 use compute_api::responses::{ComputeState, ComputeStatus};
-use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
+use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};

 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -70,7 +70,6 @@ pub struct EndpointConf {
    http_port: u16,
    pg_version: u32,
    skip_pg_catalog_updates: bool,
-    features: Vec<ComputeFeature>,
 }

 //
@@ -141,7 +140,6 @@ impl ComputeControlPlane {
            // with this we basically test a case of waking up an idle compute, where
            // we also skip catalog updates in the cloud.
            skip_pg_catalog_updates: true,
-            features: vec![],
        });

        ep.create_endpoint_dir()?;
@@ -156,7 +154,6 @@ impl ComputeControlPlane {
                pg_port,
                pg_version,
                skip_pg_catalog_updates: true,
-                features: vec![],
            })?,
        )?;
        std::fs::write(
@@ -218,9 +215,6 @@ pub struct Endpoint {

    // Optimizations
    skip_pg_catalog_updates: bool,
-
-    // Feature flags
-    features: Vec<ComputeFeature>,
 }

 impl Endpoint {
@@ -250,7 +244,6 @@ impl Endpoint {
            tenant_id: conf.tenant_id,
            pg_version: conf.pg_version,
            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
-            features: conf.features,
        })
    }

@@ -438,7 +431,7 @@ impl Endpoint {
    }

    fn wait_for_compute_ctl_to_exit(&self, send_sigterm: bool) -> Result<()> {
-        // TODO use background_process::stop_process instead: https://github.com/neondatabase/neon/pull/6482
+        // TODO use background_process::stop_process instead
        let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
        let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;
        let pid = nix::unistd::Pid::from_raw(pid as i32);
@@ -526,7 +519,7 @@ impl Endpoint {
            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
            format_version: 1.0,
            operation_uuid: None,
-            features: self.features.clone(),
+            features: vec![],
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
@@ -583,21 +576,9 @@ impl Endpoint {
        }

        let child = cmd.spawn()?;
-        // set up a scopeguard to kill & wait for the child in case we panic or bail below
-        let child = scopeguard::guard(child, |mut child| {
-            println!("SIGKILL & wait the started process");
-            (|| {
-                // TODO: use another signal that can be caught by the child so it can clean up any children it spawned
-                child.kill().context("SIGKILL child")?;
-                child.wait().context("wait() for child process")?;
-                anyhow::Ok(())
-            })()
-            .with_context(|| format!("scopeguard kill&wait child {child:?}"))
-            .unwrap();
-        });

        // Write down the pid so we can wait for it when we want to stop
-        // TODO use background_process::start_process instead: https://github.com/neondatabase/neon/pull/6482
+        // TODO use background_process::start_process instead
        let pid = child.id();
        let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
        std::fs::write(pidfile_path, pid.to_string())?;
@@ -646,9 +627,6 @@ impl Endpoint {
            std::thread::sleep(ATTEMPT_INTERVAL);
        }

-        // disarm the scopeguard, let the child outlive this function (and neon_local invoction)
-        drop(scopeguard::ScopeGuard::into_inner(child));
-
        Ok(())
    }

--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -223,11 +223,7 @@ impl LocalEnv {
    }

    pub fn attachment_service_bin(&self) -> PathBuf {
-        // Irrespective of configuration, attachment service binary is always
-        // run from the same location as neon_local.  This means that for compatibility
-        // tests that run old pageserver/safekeeper, they still run latest attachment service.
-        let neon_local_bin_dir = env::current_exe().unwrap().parent().unwrap().to_owned();
-        neon_local_bin_dir.join("attachment_service")
+        self.neon_distrib_dir.join("attachment_service")
    }

    pub fn safekeeper_bin(&self) -> PathBuf {
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -11,7 +11,7 @@ use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
-use std::process::Command;
+use std::process::{Child, Command};
 use std::time::Duration;

 use anyhow::{bail, Context};
@@ -30,7 +30,6 @@ use utils::{
    lsn::Lsn,
 };

-use crate::attachment_service::{AttachmentService, NodeRegisterRequest};
 use crate::local_env::PageServerConf;
 use crate::{background_process, local_env::LocalEnv};

@@ -162,8 +161,8 @@ impl PageServerNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self, config_overrides: &[&str], register: bool) -> anyhow::Result<()> {
-        self.start_node(config_overrides, false, register).await
+    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
+        self.start_node(config_overrides, false).await
    }

    fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
@@ -208,8 +207,7 @@ impl PageServerNode {
        &self,
        config_overrides: &[&str],
        update_config: bool,
-        register: bool,
-    ) -> anyhow::Result<()> {
+    ) -> anyhow::Result<Child> {
        // TODO: using a thread here because start_process() is not async but we need to call check_status()
        let datadir = self.repo_path();
        print!(
@@ -246,26 +244,7 @@ impl PageServerNode {
                }
            },
        )
-        .await?;
-
-        if register {
-            let attachment_service = AttachmentService::from_env(&self.env);
-            let (pg_host, pg_port) =
-                parse_host_port(&self.conf.listen_pg_addr).expect("Unable to parse listen_pg_addr");
-            let (http_host, http_port) = parse_host_port(&self.conf.listen_http_addr)
-                .expect("Unable to parse listen_http_addr");
-            attachment_service
-                .node_register(NodeRegisterRequest {
-                    node_id: self.conf.id,
-                    listen_pg_addr: pg_host.to_string(),
-                    listen_pg_port: pg_port.unwrap_or(5432),
-                    listen_http_addr: http_host.to_string(),
-                    listen_http_port: http_port.unwrap_or(80),
-                })
-                .await?;
-        }
-
-        Ok(())
+        .await
    }

    fn pageserver_basic_args<'a>(
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -7,6 +7,7 @@
 //! ```
 use std::io::Write;
 use std::path::PathBuf;
+use std::process::Child;
 use std::{io, result};

 use anyhow::Context;
@@ -103,7 +104,7 @@ impl SafekeeperNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<()> {
+    pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<Child> {
        print!(
            "Starting safekeeper at '{}' in '{}'",
            self.pg_connection_config.raw_address(),
--- a/diesel.toml
+++ b/diesel.toml
@@ -1,9 +0,0 @@
-# For documentation on how to configure this file,
-# see https://diesel.rs/guides/configuring-diesel-cli
-
-[print_schema]
-file = "control_plane/attachment_service/src/schema.rs"
-custom_type_derives = ["diesel::query_builder::QueryId"]
-
-[migrations_directory]
-dir = "control_plane/attachment_service/migrations"
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -90,9 +90,6 @@ pub enum ComputeFeature {
    /// track short-lived connections as user activity.
    ActivityMonitorExperimental,

-    /// Enable running migrations
-    Migrations,
-
    /// This is a special feature flag that is used to represent unknown feature flags.
    /// Basically all unknown to enum flags are represented as this one. See unit test
    /// `parse_unknown_features()` for more details.
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,11 +1,9 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
-use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
-use postgres_ffi::{Oid, TransactionId};
 use serde::{Deserialize, Serialize};
-use std::{fmt, ops::Range};
+use std::fmt;

-use crate::reltag::{BlockNumber, RelTag, SlruKind};
+use crate::reltag::{BlockNumber, RelTag};

 /// Key used in the Repository kv-store.
 ///
@@ -145,390 +143,12 @@ impl Key {
    }
 }

-// Layout of the Key address space
-//
-// The Key struct, used to address the underlying key-value store, consists of
-// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
-// all the data and metadata keys into those 18 bytes.
-//
-// Principles for the mapping:
-//
-// - Things that are often accessed or modified together, should be close to
-//   each other in the key space. For example, if a relation is extended by one
-//   block, we create a new key-value pair for the block data, and update the
-//   relation size entry. Because of that, the RelSize key comes after all the
-//   RelBlocks of a relation: the RelSize and the last RelBlock are always next
-//   to each other.
-//
-// The key space is divided into four major sections, identified by the first
-// byte, and the form a hierarchy:
-//
-// 00 Relation data and metadata
-//
-//   DbDir    () -> (dbnode, spcnode)
-//   Filenodemap
-//   RelDir   -> relnode forknum
-//       RelBlocks
-//       RelSize
-//
-// 01 SLRUs
-//
-//   SlruDir  kind
-//   SlruSegBlocks segno
-//   SlruSegSize
-//
-// 02 pg_twophase
-//
-// 03 misc
-//    Controlfile
-//    checkpoint
-//    pg_version
-//
-// 04 aux files
-//
-// Below is a full list of the keyspace allocation:
-//
-// DbDir:
-// 00 00000000 00000000 00000000 00   00000000
-//
-// Filenodemap:
-// 00 SPCNODE  DBNODE   00000000 00   00000000
-//
-// RelDir:
-// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
-//
-// RelBlock:
-// 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM
-//
-// RelSize:
-// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
-//
-// SlruDir:
-// 01 kind     00000000 00000000 00   00000000
-//
-// SlruSegBlock:
-// 01 kind     00000001 SEGNO    00   BLKNUM
-//
-// SlruSegSize:
-// 01 kind     00000001 SEGNO    00   FFFFFFFF
-//
-// TwoPhaseDir:
-// 02 00000000 00000000 00000000 00   00000000
-//
-// TwoPhaseFile:
-// 02 00000000 00000000 00000000 00   XID
-//
-// ControlFile:
-// 03 00000000 00000000 00000000 00   00000000
-//
-// Checkpoint:
-// 03 00000000 00000000 00000000 00   00000001
-//
-// AuxFiles:
-// 03 00000000 00000000 00000000 00   00000002
-//
-
-//-- Section 01: relation data and metadata
-
-pub const DBDIR_KEY: Key = Key {
-    field1: 0x00,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-#[inline(always)]
-pub fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }..Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0xffffffff,
-        field5: 0xff,
-        field6: 0xffffffff,
-    }
-}
-
-#[inline(always)]
-pub fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }
-}
-
-#[inline(always)]
-pub fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
-    Key {
-        field1: 0x00,
-        field2: spcnode,
-        field3: dbnode,
-        field4: 0,
-        field5: 0,
-        field6: 1,
-    }
-}
-
-#[inline(always)]
-pub fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: blknum,
-    }
-}
-
-#[inline(always)]
-pub fn rel_size_to_key(rel: RelTag) -> Key {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: 0xffffffff,
-    }
-}
-
-#[inline(always)]
-pub fn rel_key_range(rel: RelTag) -> Range<Key> {
-    Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum,
-        field6: 0,
-    }..Key {
-        field1: 0x00,
-        field2: rel.spcnode,
-        field3: rel.dbnode,
-        field4: rel.relnode,
-        field5: rel.forknum + 1,
-        field6: 0,
-    }
-}
-
-//-- Section 02: SLRUs
-
-#[inline(always)]
-pub fn slru_dir_to_key(kind: SlruKind) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: 0,
-    }
-}
-
-#[inline(always)]
-pub fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: blknum,
-    }
-}
-
-#[inline(always)]
-pub fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
-    Key {
-        field1: 0x01,
-        field2: match kind {
-            SlruKind::Clog => 0x00,
-            SlruKind::MultiXactMembers => 0x01,
-            SlruKind::MultiXactOffsets => 0x02,
-        },
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: 0xffffffff,
-    }
-}
-
-#[inline(always)]
-pub fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
-    let field2 = match kind {
-        SlruKind::Clog => 0x00,
-        SlruKind::MultiXactMembers => 0x01,
-        SlruKind::MultiXactOffsets => 0x02,
-    };
-
-    Key {
-        field1: 0x01,
-        field2,
-        field3: 1,
-        field4: segno,
-        field5: 0,
-        field6: 0,
-    }..Key {
-        field1: 0x01,
-        field2,
-        field3: 1,
-        field4: segno,
-        field5: 1,
-        field6: 0,
-    }
-}
-
-//-- Section 03: pg_twophase
-
-pub const TWOPHASEDIR_KEY: Key = Key {
-    field1: 0x02,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-#[inline(always)]
-pub fn twophase_file_key(xid: TransactionId) -> Key {
-    Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
-    }
-}
-
-#[inline(always)]
-pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
-    let (next_xid, overflowed) = xid.overflowing_add(1);
-
-    Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
-    }..Key {
-        field1: 0x02,
-        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: u8::from(overflowed),
-        field6: next_xid,
-    }
-}
-
-//-- Section 03: Control file
-pub const CONTROLFILE_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 0,
-};
-
-pub const CHECKPOINT_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 1,
-};
-
-pub const AUX_FILES_KEY: Key = Key {
-    field1: 0x03,
-    field2: 0,
-    field3: 0,
-    field4: 0,
-    field5: 0,
-    field6: 2,
-};
-
-// Reverse mappings for a few Keys.
-// These are needed by WAL redo manager.
-
-// AUX_FILES currently stores only data for logical replication (slots etc), and
-// we don't preserve these on a branch because safekeepers can't follow timeline
-// switch (and generally it likely should be optional), so ignore these.
-#[inline(always)]
-pub fn is_inherited_key(key: Key) -> bool {
-    key != AUX_FILES_KEY
-}
-
-#[inline(always)]
-pub fn is_rel_fsm_block_key(key: Key) -> bool {
-    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
-}
-
-#[inline(always)]
-pub fn is_rel_vm_block_key(key: Key) -> bool {
-    key.field1 == 0x00
-        && key.field4 != 0
-        && key.field5 == VISIBILITYMAP_FORKNUM
-        && key.field6 != 0xffffffff
-}
-
-#[inline(always)]
-pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
-    Ok(match key.field1 {
-        0x01 => {
-            let kind = match key.field2 {
-                0x00 => SlruKind::Clog,
-                0x01 => SlruKind::MultiXactMembers,
-                0x02 => SlruKind::MultiXactOffsets,
-                _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
-            };
-            let segno = key.field4;
-            let blknum = key.field6;
-
-            (kind, segno, blknum)
-        }
-        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
-    })
-}
-
-#[inline(always)]
-pub fn is_slru_block_key(key: Key) -> bool {
-    key.field1 == 0x01                // SLRU-related
-        && key.field3 == 0x00000001   // but not SlruDir
-        && key.field6 != 0xffffffff // and not SlruSegSize
-}
-
 #[inline(always)]
 pub fn is_rel_block_key(key: &Key) -> bool {
    key.field1 == 0x00 && key.field4 != 0 && key.field6 != 0xffffffff
 }

 /// Guaranteed to return `Ok()` if [[is_rel_block_key]] returns `true` for `key`.
-#[inline(always)]
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
    Ok(match key.field1 {
        0x00 => (
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -104,7 +104,6 @@ pub struct KeySpaceAccum {
    accum: Option<Range<Key>>,

    ranges: Vec<Range<Key>>,
-    size: u64,
 }

 impl KeySpaceAccum {
@@ -112,7 +111,6 @@ impl KeySpaceAccum {
        Self {
            accum: None,
            ranges: Vec::new(),
-            size: 0,
        }
    }

@@ -123,8 +121,6 @@ impl KeySpaceAccum {

    #[inline(always)]
    pub fn add_range(&mut self, range: Range<Key>) {
-        self.size += key_range_size(&range) as u64;
-
        match self.accum.as_mut() {
            Some(accum) => {
                if range.start == accum.end {
@@ -150,23 +146,6 @@ impl KeySpaceAccum {
            ranges: self.ranges,
        }
    }
-
-    pub fn consume_keyspace(&mut self) -> KeySpace {
-        if let Some(accum) = self.accum.take() {
-            self.ranges.push(accum);
-        }
-
-        let mut prev_accum = KeySpaceAccum::new();
-        std::mem::swap(self, &mut prev_accum);
-
-        KeySpace {
-            ranges: prev_accum.ranges,
-        }
-    }
-
-    pub fn size(&self) -> u64 {
-        self.size
-    }
 }

 ///
@@ -275,30 +254,6 @@ mod tests {
        }
    }

-    #[test]
-    fn keyspace_consume() {
-        let ranges = vec![kr(0..10), kr(20..35), kr(40..45)];
-
-        let mut accum = KeySpaceAccum::new();
-        for range in &ranges {
-            accum.add_range(range.clone());
-        }
-
-        let expected_size: u64 = ranges.iter().map(|r| key_range_size(r) as u64).sum();
-        assert_eq!(accum.size(), expected_size);
-
-        assert_ks_eq(&accum.consume_keyspace(), ranges.clone());
-        assert_eq!(accum.size(), 0);
-
-        assert_ks_eq(&accum.consume_keyspace(), vec![]);
-        assert_eq!(accum.size(), 0);
-
-        for range in &ranges {
-            accum.add_range(range.clone());
-        }
-        assert_ks_eq(&accum.to_keyspace(), ranges);
-    }
-
    #[test]
    fn keyspace_add_range() {
        // two separate ranges
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -111,19 +111,7 @@ impl RelTag {
 /// These files are divided into segments, which are divided into
 /// pages of the same BLCKSZ as used for relation files.
 ///
-#[derive(
-    Debug,
-    Clone,
-    Copy,
-    Hash,
-    Serialize,
-    Deserialize,
-    PartialEq,
-    Eq,
-    PartialOrd,
-    Ord,
-    strum_macros::EnumIter,
-)]
+#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
 pub enum SlruKind {
    Clog,
    MultiXactMembers,
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -329,8 +329,8 @@ impl CheckPoint {
    ///
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
-        // nextXid should be greater than any XID in WAL, so increment provided XID and check for wraparround.
-        let mut new_xid = std::cmp::max(xid.wrapping_add(1), pg_constants::FIRST_NORMAL_TRANSACTION_ID);
+        // nextXid should nw greater than any XID in WAL, so increment provided XID and check for wraparround.
+        let mut new_xid = std::cmp::max(xid + 1, pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
        new_xid =
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -8,7 +8,6 @@ use std::pin::Pin;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use std::time::SystemTime;

 use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Result;
@@ -24,7 +23,6 @@ use futures::stream::Stream;
 use futures_util::StreamExt;
 use http_types::{StatusCode, Url};
 use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
 use tracing::debug;

 use crate::s3_bucket::RequestKind;
@@ -185,6 +183,7 @@ fn to_download_error(error: azure_core::Error) -> DownloadError {
    }
 }

+#[async_trait::async_trait]
 impl RemoteStorage for AzureBlobStorage {
    async fn list(
        &self,
@@ -372,20 +371,6 @@ impl RemoteStorage for AzureBlobStorage {
            copy_status = status;
        }
    }
-
-    async fn time_travel_recover(
-        &self,
-        _prefix: Option<&RemotePath>,
-        _timestamp: SystemTime,
-        _done_if_after: SystemTime,
-        _cancel: CancellationToken,
-    ) -> anyhow::Result<()> {
-        // TODO use Azure point in time recovery feature for this
-        // https://learn.microsoft.com/en-us/azure/storage/blobs/point-in-time-restore-overview
-        Err(anyhow::anyhow!(
-            "time travel recovery for azure blob storage is not implemented"
-        ))
-    }
 }

 pin_project_lite::pin_project! {
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -25,7 +25,6 @@ use bytes::Bytes;
 use futures::stream::Stream;
 use serde::{Deserialize, Serialize};
 use tokio::sync::Semaphore;
-use tokio_util::sync::CancellationToken;
 use toml_edit::Item;
 use tracing::info;

@@ -143,7 +142,7 @@ pub struct Listing {
 /// Storage (potentially remote) API to manage its state.
 /// This storage tries to be unaware of any layered repository context,
 /// providing basic CRUD operations for storage files.
-#[allow(async_fn_in_trait)]
+#[async_trait::async_trait]
 pub trait RemoteStorage: Send + Sync + 'static {
    /// Lists all top level subdirectories for a given prefix
    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
@@ -211,15 +210,6 @@ pub trait RemoteStorage: Send + Sync + 'static {

    /// Copy a remote object inside a bucket from one path to another.
    async fn copy(&self, from: &RemotePath, to: &RemotePath) -> anyhow::Result<()>;
-
-    /// Resets the content of everything with the given prefix to the given state
-    async fn time_travel_recover(
-        &self,
-        prefix: Option<&RemotePath>,
-        timestamp: SystemTime,
-        done_if_after: SystemTime,
-        cancel: CancellationToken,
-    ) -> anyhow::Result<()>;
 }

 pub type DownloadStream = Pin<Box<dyn Stream<Item = std::io::Result<Bytes>> + Unpin + Send + Sync>>;
@@ -272,15 +262,14 @@ impl std::error::Error for DownloadError {}
 /// Every storage, currently supported.
 /// Serves as a simple way to pass around the [`RemoteStorage`] without dealing with generics.
 #[derive(Clone)]
-// Require Clone for `Other` due to https://github.com/rust-lang/rust/issues/26925
-pub enum GenericRemoteStorage<Other: Clone = Arc<UnreliableWrapper>> {
+pub enum GenericRemoteStorage {
    LocalFs(LocalFs),
    AwsS3(Arc<S3Bucket>),
    AzureBlob(Arc<AzureBlobStorage>),
-    Unreliable(Other),
+    Unreliable(Arc<UnreliableWrapper>),
 }

-impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
+impl GenericRemoteStorage {
    pub async fn list(
        &self,
        prefix: Option<&RemotePath>,
@@ -397,33 +386,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
            Self::Unreliable(s) => s.copy(from, to).await,
        }
    }
-
-    pub async fn time_travel_recover(
-        &self,
-        prefix: Option<&RemotePath>,
-        timestamp: SystemTime,
-        done_if_after: SystemTime,
-        cancel: CancellationToken,
-    ) -> anyhow::Result<()> {
-        match self {
-            Self::LocalFs(s) => {
-                s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
-                    .await
-            }
-            Self::AwsS3(s) => {
-                s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
-                    .await
-            }
-            Self::AzureBlob(s) => {
-                s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
-                    .await
-            }
-            Self::Unreliable(s) => {
-                s.time_travel_recover(prefix, timestamp, done_if_after, cancel)
-                    .await
-            }
-        }
-    }
 }

 impl GenericRemoteStorage {
@@ -711,7 +673,6 @@ impl ConcurrencyLimiter {
            RequestKind::List => &self.read,
            RequestKind::Delete => &self.write,
            RequestKind::Copy => &self.write,
-            RequestKind::TimeTravel => &self.write,
        }
    }

--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -4,7 +4,7 @@
 //! This storage used in tests, but can also be used in cases when a certain persistent
 //! volume is mounted to the local FS.

-use std::{borrow::Cow, future::Future, io::ErrorKind, pin::Pin, time::SystemTime};
+use std::{borrow::Cow, future::Future, io::ErrorKind, pin::Pin};

 use anyhow::{bail, ensure, Context};
 use bytes::Bytes;
@@ -14,7 +14,7 @@ use tokio::{
    fs,
    io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
 };
-use tokio_util::{io::ReaderStream, sync::CancellationToken};
+use tokio_util::io::ReaderStream;
 use tracing::*;
 use utils::{crashsafe::path_with_suffix_extension, fs_ext::is_directory_empty};

@@ -157,6 +157,7 @@ impl LocalFs {
    }
 }

+#[async_trait::async_trait]
 impl RemoteStorage for LocalFs {
    async fn list(
        &self,
@@ -422,17 +423,6 @@ impl RemoteStorage for LocalFs {
        })?;
        Ok(())
    }
-
-    #[allow(clippy::diverging_sub_expression)]
-    async fn time_travel_recover(
-        &self,
-        _prefix: Option<&RemotePath>,
-        _timestamp: SystemTime,
-        _done_if_after: SystemTime,
-        _cancel: CancellationToken,
-    ) -> anyhow::Result<()> {
-        unimplemented!()
-    }
 }

 fn storage_metadata_path(original_path: &Utf8Path) -> Utf8PathBuf {
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -6,14 +6,12 @@

 use std::{
    borrow::Cow,
-    collections::HashMap,
    pin::Pin,
    sync::Arc,
    task::{Context, Poll},
-    time::SystemTime,
 };

-use anyhow::{anyhow, Context as _};
+use anyhow::Context as _;
 use aws_config::{
    environment::credentials::EnvironmentVariableCredentialsProvider,
    imds::credentials::ImdsCredentialsProvider,
@@ -29,19 +27,17 @@ use aws_sdk_s3::{
    config::{AsyncSleep, Builder, IdentityCache, Region, SharedAsyncSleep},
    error::SdkError,
    operation::get_object::GetObjectError,
-    types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion},
+    types::{Delete, ObjectIdentifier},
    Client,
 };
 use aws_smithy_async::rt::sleep::TokioSleep;

+use aws_smithy_types::body::SdkBody;
 use aws_smithy_types::byte_stream::ByteStream;
-use aws_smithy_types::{body::SdkBody, DateTime};
 use bytes::Bytes;
 use futures::stream::Stream;
 use hyper::Body;
 use scopeguard::ScopeGuard;
-use tokio_util::sync::CancellationToken;
-use utils::backoff;

 use super::StorageMetadata;
 use crate::{
@@ -274,59 +270,6 @@ impl S3Bucket {
            }
        }
    }
-
-    async fn delete_oids(
-        &self,
-        kind: RequestKind,
-        delete_objects: &[ObjectIdentifier],
-    ) -> anyhow::Result<()> {
-        for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE) {
-            let started_at = start_measuring_requests(kind);
-
-            let resp = self
-                .client
-                .delete_objects()
-                .bucket(self.bucket_name.clone())
-                .delete(
-                    Delete::builder()
-                        .set_objects(Some(chunk.to_vec()))
-                        .build()?,
-                )
-                .send()
-                .await;
-
-            let started_at = ScopeGuard::into_inner(started_at);
-            metrics::BUCKET_METRICS
-                .req_seconds
-                .observe_elapsed(kind, &resp, started_at);
-
-            let resp = resp?;
-            metrics::BUCKET_METRICS
-                .deleted_objects_total
-                .inc_by(chunk.len() as u64);
-            if let Some(errors) = resp.errors {
-                // Log a bounded number of the errors within the response:
-                // these requests can carry 1000 keys so logging each one
-                // would be too verbose, especially as errors may lead us
-                // to retry repeatedly.
-                const LOG_UP_TO_N_ERRORS: usize = 10;
-                for e in errors.iter().take(LOG_UP_TO_N_ERRORS) {
-                    tracing::warn!(
-                        "DeleteObjects key {} failed: {}: {}",
-                        e.key.as_ref().map(Cow::from).unwrap_or("".into()),
-                        e.code.as_ref().map(Cow::from).unwrap_or("".into()),
-                        e.message.as_ref().map(Cow::from).unwrap_or("".into())
-                    );
-                }
-
-                return Err(anyhow::format_err!(
-                    "Failed to delete {} objects",
-                    errors.len()
-                ));
-            }
-        }
-        Ok(())
-    }
 }

 pin_project_lite::pin_project! {
@@ -430,6 +373,7 @@ impl<S: Stream<Item = std::io::Result<Bytes>>> Stream for TimedDownload<S> {
    }
 }

+#[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
    async fn list(
        &self,
@@ -625,168 +569,64 @@ impl RemoteStorage for S3Bucket {
            delete_objects.push(obj_id);
        }

-        self.delete_oids(kind, &delete_objects).await
+        for chunk in delete_objects.chunks(MAX_KEYS_PER_DELETE) {
+            let started_at = start_measuring_requests(kind);
+
+            let resp = self
+                .client
+                .delete_objects()
+                .bucket(self.bucket_name.clone())
+                .delete(
+                    Delete::builder()
+                        .set_objects(Some(chunk.to_vec()))
+                        .build()?,
+                )
+                .send()
+                .await;
+
+            let started_at = ScopeGuard::into_inner(started_at);
+            metrics::BUCKET_METRICS
+                .req_seconds
+                .observe_elapsed(kind, &resp, started_at);
+
+            match resp {
+                Ok(resp) => {
+                    metrics::BUCKET_METRICS
+                        .deleted_objects_total
+                        .inc_by(chunk.len() as u64);
+                    if let Some(errors) = resp.errors {
+                        // Log a bounded number of the errors within the response:
+                        // these requests can carry 1000 keys so logging each one
+                        // would be too verbose, especially as errors may lead us
+                        // to retry repeatedly.
+                        const LOG_UP_TO_N_ERRORS: usize = 10;
+                        for e in errors.iter().take(LOG_UP_TO_N_ERRORS) {
+                            tracing::warn!(
+                                "DeleteObjects key {} failed: {}: {}",
+                                e.key.as_ref().map(Cow::from).unwrap_or("".into()),
+                                e.code.as_ref().map(Cow::from).unwrap_or("".into()),
+                                e.message.as_ref().map(Cow::from).unwrap_or("".into())
+                            );
+                        }
+
+                        return Err(anyhow::format_err!(
+                            "Failed to delete {} objects",
+                            errors.len()
+                        ));
+                    }
+                }
+                Err(e) => {
+                    return Err(e.into());
+                }
+            }
+        }
+        Ok(())
    }

    async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
        let paths = std::array::from_ref(path);
        self.delete_objects(paths).await
    }
-
-    async fn time_travel_recover(
-        &self,
-        prefix: Option<&RemotePath>,
-        timestamp: SystemTime,
-        done_if_after: SystemTime,
-        cancel: CancellationToken,
-    ) -> anyhow::Result<()> {
-        let kind = RequestKind::TimeTravel;
-        let _guard = self.permit(kind).await;
-
-        let timestamp = DateTime::from(timestamp);
-        let done_if_after = DateTime::from(done_if_after);
-
-        tracing::trace!("Target time: {timestamp:?}, done_if_after {done_if_after:?}");
-
-        // get the passed prefix or if it is not set use prefix_in_bucket value
-        let prefix = prefix
-            .map(|p| self.relative_path_to_s3_object(p))
-            .or_else(|| self.prefix_in_bucket.clone());
-
-        let warn_threshold = 3;
-        let max_retries = 10;
-        let is_permanent = |_e: &_| false;
-
-        let list = backoff::retry(
-            || async {
-                Ok(self
-                    .client
-                    .list_object_versions()
-                    .bucket(self.bucket_name.clone())
-                    .set_prefix(prefix.clone())
-                    .send()
-                    .await?)
-            },
-            is_permanent,
-            warn_threshold,
-            max_retries,
-            "listing object versions for time_travel_recover",
-            backoff::Cancel::new(cancel.clone(), || anyhow!("Cancelled")),
-        )
-        .await?;
-
-        if list.is_truncated().unwrap_or_default() {
-            anyhow::bail!("Received truncated ListObjectVersions response for prefix={prefix:?}");
-        }
-
-        let mut versions_deletes = list
-            .versions()
-            .iter()
-            .map(VerOrDelete::Version)
-            .chain(list.delete_markers().iter().map(VerOrDelete::DeleteMarker))
-            .collect::<Vec<_>>();
-
-        versions_deletes.sort_by_key(|vd| (vd.key(), vd.last_modified()));
-
-        let mut vds_for_key = HashMap::<_, Vec<_>>::new();
-
-        for vd in versions_deletes {
-            let last_modified = vd.last_modified();
-            let version_id = vd.version_id();
-            let key = vd.key();
-            let (Some(last_modified), Some(version_id), Some(key)) =
-                (last_modified, version_id, key)
-            else {
-                anyhow::bail!(
-                    "One (or more) of last_modified, key, and id is None. \
-                    Is versioning enabled in the bucket? last_modified={:?} key={:?} version_id={:?}",
-                    last_modified, key, version_id,
-                );
-            };
-            if version_id == "null" {
-                anyhow::bail!("Received ListVersions response for key={key} with version_id='null', \
-                    indicating either disabled versioning, or legacy objects with null version id values");
-            }
-            tracing::trace!(
-                "Parsing version key={key} version_id={version_id} is_delete={}",
-                matches!(vd, VerOrDelete::DeleteMarker(_))
-            );
-
-            vds_for_key
-                .entry(key)
-                .or_default()
-                .push((vd, last_modified, version_id));
-        }
-        for (key, versions) in vds_for_key {
-            let (last_vd, last_last_modified, _version_id) = versions.last().unwrap();
-            if last_last_modified > &&done_if_after {
-                tracing::trace!("Key {key} has version later than done_if_after, skipping");
-                continue;
-            }
-            // the version we want to restore to.
-            let version_to_restore_to =
-                match versions.binary_search_by_key(&timestamp, |tpl| *tpl.1) {
-                    Ok(v) => v,
-                    Err(e) => e,
-                };
-            if version_to_restore_to == versions.len() {
-                tracing::trace!("Key {key} has no changes since timestamp, skipping");
-                continue;
-            }
-            let mut do_delete = false;
-            if version_to_restore_to == 0 {
-                // All versions more recent, so the key didn't exist at the specified time point.
-                tracing::trace!(
-                    "All {} versions more recent for {key}, deleting",
-                    versions.len()
-                );
-                do_delete = true;
-            } else {
-                match &versions[version_to_restore_to - 1] {
-                    (VerOrDelete::Version(_), _last_modified, version_id) => {
-                        tracing::trace!("Copying old version {version_id} for {key}...");
-                        // Restore the state to the last version by copying
-                        let source_id =
-                            format!("{}/{key}?versionId={version_id}", self.bucket_name);
-
-                        backoff::retry(
-                            || async {
-                                Ok(self
-                                    .client
-                                    .copy_object()
-                                    .bucket(self.bucket_name.clone())
-                                    .key(key)
-                                    .copy_source(&source_id)
-                                    .send()
-                                    .await?)
-                            },
-                            is_permanent,
-                            warn_threshold,
-                            max_retries,
-                            "listing object versions for time_travel_recover",
-                            backoff::Cancel::new(cancel.clone(), || anyhow!("Cancelled")),
-                        )
-                        .await?;
-                    }
-                    (VerOrDelete::DeleteMarker(_), _last_modified, _version_id) => {
-                        do_delete = true;
-                    }
-                }
-            };
-            if do_delete {
-                if matches!(last_vd, VerOrDelete::DeleteMarker(_)) {
-                    // Key has since been deleted (but there was some history), no need to do anything
-                    tracing::trace!("Key {key} already deleted, skipping.");
-                } else {
-                    tracing::trace!("Deleting {key}...");
-
-                    let oid = ObjectIdentifier::builder().key(key.to_owned()).build()?;
-                    self.delete_oids(kind, &[oid]).await?;
-                }
-            }
-        }
-        Ok(())
-    }
 }

 /// On drop (cancellation) count towards [`metrics::BucketMetrics::cancelled_waits`].
@@ -811,32 +651,6 @@ fn start_measuring_requests(
    })
 }

-enum VerOrDelete<'a> {
-    Version(&'a ObjectVersion),
-    DeleteMarker(&'a DeleteMarkerEntry),
-}
-
-impl<'a> VerOrDelete<'a> {
-    fn last_modified(&self) -> Option<&'a DateTime> {
-        match self {
-            VerOrDelete::Version(v) => v.last_modified(),
-            VerOrDelete::DeleteMarker(v) => v.last_modified(),
-        }
-    }
-    fn version_id(&self) -> Option<&'a str> {
-        match self {
-            VerOrDelete::Version(v) => v.version_id(),
-            VerOrDelete::DeleteMarker(v) => v.version_id(),
-        }
-    }
-    fn key(&self) -> Option<&'a str> {
-        match self {
-            VerOrDelete::Version(v) => v.key(),
-            VerOrDelete::DeleteMarker(v) => v.key(),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use camino::Utf8Path;
--- a/libs/remote_storage/src/s3_bucket/metrics.rs
+++ b/libs/remote_storage/src/s3_bucket/metrics.rs
@@ -12,7 +12,6 @@ pub(crate) enum RequestKind {
    Delete = 2,
    List = 3,
    Copy = 4,
-    TimeTravel = 5,
 }

 use RequestKind::*;
@@ -25,7 +24,6 @@ impl RequestKind {
            Delete => "delete_object",
            List => "list_objects",
            Copy => "copy_object",
-            TimeTravel => "time_travel_recover",
        }
    }
    const fn as_index(&self) -> usize {
@@ -33,7 +31,7 @@ impl RequestKind {
    }
 }

-pub(super) struct RequestTyped<C>([C; 6]);
+pub(super) struct RequestTyped<C>([C; 5]);

 impl<C> RequestTyped<C> {
    pub(super) fn get(&self, kind: RequestKind) -> &C {
@@ -42,8 +40,8 @@ impl<C> RequestTyped<C> {

    fn build_with(mut f: impl FnMut(RequestKind) -> C) -> Self {
        use RequestKind::*;
-        let mut it = [Get, Put, Delete, List, Copy, TimeTravel].into_iter();
-        let arr = std::array::from_fn::<C, 6, _>(|index| {
+        let mut it = [Get, Put, Delete, List, Copy].into_iter();
+        let arr = std::array::from_fn::<C, 5, _>(|index| {
            let next = it.next().unwrap();
            assert_eq!(index, next.as_index());
            f(next)
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -3,19 +3,16 @@
 //! testing purposes.
 use bytes::Bytes;
 use futures::stream::Stream;
+use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::sync::Mutex;
-use std::time::SystemTime;
-use std::{collections::hash_map::Entry, sync::Arc};
-use tokio_util::sync::CancellationToken;

 use crate::{
-    Download, DownloadError, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorage,
-    StorageMetadata,
+    Download, DownloadError, Listing, ListingMode, RemotePath, RemoteStorage, StorageMetadata,
 };

 pub struct UnreliableWrapper {
-    inner: GenericRemoteStorage<Arc<VoidStorage>>,
+    inner: crate::GenericRemoteStorage,

    // This many attempts of each operation will fail, then we let it succeed.
    attempts_to_fail: u64,
@@ -32,21 +29,11 @@ enum RemoteOp {
    Download(RemotePath),
    Delete(RemotePath),
    DeleteObjects(Vec<RemotePath>),
-    TimeTravelRecover(Option<RemotePath>),
 }

 impl UnreliableWrapper {
    pub fn new(inner: crate::GenericRemoteStorage, attempts_to_fail: u64) -> Self {
        assert!(attempts_to_fail > 0);
-        let inner = match inner {
-            GenericRemoteStorage::AwsS3(s) => GenericRemoteStorage::AwsS3(s),
-            GenericRemoteStorage::AzureBlob(s) => GenericRemoteStorage::AzureBlob(s),
-            GenericRemoteStorage::LocalFs(s) => GenericRemoteStorage::LocalFs(s),
-            // We could also make this a no-op, as in, extract the inner of the passed generic remote storage
-            GenericRemoteStorage::Unreliable(_s) => {
-                panic!("Can't wrap unreliable wrapper unreliably")
-            }
-        };
        UnreliableWrapper {
            inner,
            attempts_to_fail,
@@ -97,9 +84,7 @@ impl UnreliableWrapper {
    }
 }

-// We never construct this, so the type is not important, just has to not be UnreliableWrapper and impl RemoteStorage.
-type VoidStorage = crate::LocalFs;
-
+#[async_trait::async_trait]
 impl RemoteStorage for UnreliableWrapper {
    async fn list_prefixes(
        &self,
@@ -184,17 +169,4 @@ impl RemoteStorage for UnreliableWrapper {
        self.attempt(RemoteOp::Upload(to.clone()))?;
        self.inner.copy_object(from, to).await
    }
-
-    async fn time_travel_recover(
-        &self,
-        prefix: Option<&RemotePath>,
-        timestamp: SystemTime,
-        done_if_after: SystemTime,
-        cancel: CancellationToken,
-    ) -> anyhow::Result<()> {
-        self.attempt(RemoteOp::TimeTravelRecover(prefix.map(|p| p.to_owned())))?;
-        self.inner
-            .time_travel_recover(prefix, timestamp, done_if_after, cancel)
-            .await
-    }
 }
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -1,21 +1,15 @@
+use std::collections::HashSet;
 use std::env;
-use std::fmt::{Debug, Display};
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
 use std::sync::Arc;
-use std::time::{Duration, UNIX_EPOCH};
-use std::{collections::HashSet, time::SystemTime};
+use std::time::UNIX_EPOCH;

-use crate::common::{download_to_vec, upload_stream};
 use anyhow::Context;
-use camino::Utf8Path;
-use futures_util::Future;
 use remote_storage::{
    GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
-use test_context::test_context;
 use test_context::AsyncTestContext;
-use tokio_util::sync::CancellationToken;
 use tracing::info;

 mod common;
@@ -24,160 +18,11 @@ mod common;
 mod tests_s3;

 use common::{cleanup, ensure_logging_ready, upload_remote_data, upload_simple_remote_data};
-use utils::backoff;

 const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";

 const BASE_PREFIX: &str = "test";

-#[test_context(MaybeEnabledStorage)]
-#[tokio::test]
-async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledStorage::Enabled(ctx) => ctx,
-        MaybeEnabledStorage::Disabled => return Ok(()),
-    };
-    // Our test depends on discrepancies in the clock between S3 and the environment the tests
-    // run in. Therefore, wait a little bit before and after. The alternative would be
-    // to take the time from S3 response headers.
-    const WAIT_TIME: Duration = Duration::from_millis(3_000);
-
-    async fn retry<T, O, F, E>(op: O) -> Result<T, E>
-    where
-        E: Display + Debug + 'static,
-        O: FnMut() -> F,
-        F: Future<Output = Result<T, E>>,
-    {
-        let warn_threshold = 3;
-        let max_retries = 10;
-        backoff::retry(
-            op,
-            |_e| false,
-            warn_threshold,
-            max_retries,
-            "test retry",
-            backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
-        )
-        .await
-    }
-
-    async fn time_point() -> SystemTime {
-        tokio::time::sleep(WAIT_TIME).await;
-        let ret = SystemTime::now();
-        tokio::time::sleep(WAIT_TIME).await;
-        ret
-    }
-
-    async fn list_files(client: &Arc<GenericRemoteStorage>) -> anyhow::Result<HashSet<RemotePath>> {
-        Ok(retry(|| client.list_files(None))
-            .await
-            .context("list root files failure")?
-            .into_iter()
-            .collect::<HashSet<_>>())
-    }
-
-    let path1 = RemotePath::new(Utf8Path::new(format!("{}/path1", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path2 = RemotePath::new(Utf8Path::new(format!("{}/path2", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    let path3 = RemotePath::new(Utf8Path::new(format!("{}/path3", ctx.base_prefix).as_str()))
-        .with_context(|| "RemotePath conversion")?;
-
-    retry(|| {
-        let (data, len) = upload_stream("remote blob data1".as_bytes().into());
-        ctx.client.upload(data, len, &path1, None)
-    })
-    .await?;
-
-    let t0_files = list_files(&ctx.client).await?;
-    let t0 = time_point().await;
-    println!("at t0: {t0_files:?}");
-
-    let old_data = "remote blob data2";
-
-    retry(|| {
-        let (data, len) = upload_stream(old_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None)
-    })
-    .await?;
-
-    let t1_files = list_files(&ctx.client).await?;
-    let t1 = time_point().await;
-    println!("at t1: {t1_files:?}");
-
-    // A little check to ensure that our clock is not too far off from the S3 clock
-    {
-        let dl = retry(|| ctx.client.download(&path2)).await?;
-        let last_modified = dl.last_modified.unwrap();
-        let half_wt = WAIT_TIME.mul_f32(0.5);
-        let t0_hwt = t0 + half_wt;
-        let t1_hwt = t1 - half_wt;
-        if !(t0_hwt..=t1_hwt).contains(&last_modified) {
-            panic!("last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \
-                This likely means a large lock discrepancy between S3 and the local clock.");
-        }
-    }
-
-    retry(|| {
-        let (data, len) = upload_stream("remote blob data3".as_bytes().into());
-        ctx.client.upload(data, len, &path3, None)
-    })
-    .await?;
-
-    let new_data = "new remote blob data2";
-
-    retry(|| {
-        let (data, len) = upload_stream(new_data.as_bytes().into());
-        ctx.client.upload(data, len, &path2, None)
-    })
-    .await?;
-
-    retry(|| ctx.client.delete(&path1)).await?;
-    let t2_files = list_files(&ctx.client).await?;
-    let t2 = time_point().await;
-    println!("at t2: {t2_files:?}");
-
-    // No changes after recovery to t2 (no-op)
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t2, t_final, CancellationToken::new())
-        .await?;
-    let t2_files_recovered = list_files(&ctx.client).await?;
-    println!("after recovery to t2: {t2_files_recovered:?}");
-    assert_eq!(t2_files, t2_files_recovered);
-    let path2_recovered_t2 = download_to_vec(ctx.client.download(&path2).await?).await?;
-    assert_eq!(path2_recovered_t2, new_data.as_bytes());
-
-    // after recovery to t1: path1 is back, path2 has the old content
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t1, t_final, CancellationToken::new())
-        .await?;
-    let t1_files_recovered = list_files(&ctx.client).await?;
-    println!("after recovery to t1: {t1_files_recovered:?}");
-    assert_eq!(t1_files, t1_files_recovered);
-    let path2_recovered_t1 = download_to_vec(ctx.client.download(&path2).await?).await?;
-    assert_eq!(path2_recovered_t1, old_data.as_bytes());
-
-    // after recovery to t0: everything is gone except for path1
-    let t_final = time_point().await;
-    ctx.client
-        .time_travel_recover(None, t0, t_final, CancellationToken::new())
-        .await?;
-    let t0_files_recovered = list_files(&ctx.client).await?;
-    println!("after recovery to t0: {t0_files_recovered:?}");
-    assert_eq!(t0_files, t0_files_recovered);
-
-    // cleanup
-
-    let paths = &[path1, path2, path3];
-    retry(|| ctx.client.delete_objects(paths)).await?;
-
-    Ok(())
-}
-
 struct EnabledS3 {
    client: Arc<GenericRemoteStorage>,
    base_prefix: &'static str,
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -131,9 +131,7 @@ pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
        ApiError::ResourceUnavailable(_) => info!("Error processing HTTP request: {api_error:#}"),
        ApiError::NotFound(_) => info!("Error processing HTTP request: {api_error:#}"),
        ApiError::InternalServerError(_) => error!("Error processing HTTP request: {api_error:?}"),
-        ApiError::ShuttingDown => info!("Shut down while processing HTTP request"),
-        ApiError::Timeout(_) => info!("Timeout while processing HTTP request: {api_error:#}"),
-        _ => info!("Error processing HTTP request: {api_error:#}"),
+        _ => error!("Error processing HTTP request: {api_error:#}"),
    }

    api_error.into_response()
--- a/libs/utils/src/nonblock.rs
+++ b/libs/utils/src/nonblock.rs
@@ -5,10 +5,10 @@ use std::os::unix::io::RawFd;
 pub fn set_nonblock(fd: RawFd) -> Result<(), std::io::Error> {
    let bits = fcntl(fd, F_GETFL)?;

-    // If F_GETFL returns some unknown bits, they should be valid
+    // Safety: If F_GETFL returns some unknown bits, they should be valid
    // for passing back to F_SETFL, too. If we left them out, the F_SETFL
    // would effectively clear them, which is not what we want.
-    let mut flags = OFlag::from_bits_retain(bits);
+    let mut flags = unsafe { OFlag::from_bits_unchecked(bits) };
    flags |= OFlag::O_NONBLOCK;

    fcntl(fd, F_SETFL(flags))?;
--- a/libs/utils/src/tcp_listener.rs
+++ b/libs/utils/src/tcp_listener.rs
@@ -1,6 +1,7 @@
 use std::{
    io,
    net::{TcpListener, ToSocketAddrs},
+    os::unix::prelude::AsRawFd,
 };

 use nix::sys::socket::{setsockopt, sockopt::ReuseAddr};
@@ -9,7 +10,7 @@ use nix::sys::socket::{setsockopt, sockopt::ReuseAddr};
 pub fn bind<A: ToSocketAddrs>(addr: A) -> io::Result<TcpListener> {
    let listener = TcpListener::bind(addr)?;

-    setsockopt(&listener, ReuseAddr, &true)?;
+    setsockopt(listener.as_raw_fd(), ReuseAddr, &true)?;

    Ok(listener)
 }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -61,7 +61,6 @@ sync_wrapper.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
-tokio-epoll-uring.workspace = true
 tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 tokio-stream.workspace = true
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -18,7 +18,7 @@ use pageserver::tenant::block_io::FileBlockReader;
 use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection};
 use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE};
 use pageserver::tenant::storage_layer::range_overlaps;
-use pageserver::virtual_file::{self, VirtualFile};
+use pageserver::virtual_file::VirtualFile;

 use utils::{bin_ser::BeSer, lsn::Lsn};

@@ -142,7 +142,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
-    pageserver::virtual_file::init(10, virtual_file::IoEngineKind::StdFs);
+    pageserver::virtual_file::init(10);
    pageserver::page_cache::init(100);

    let mut total_delta_layers = 0usize;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -59,7 +59,7 @@ pub(crate) enum LayerCmd {

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
-    virtual_file::init(10, virtual_file::IoEngineKind::StdFs);
+    virtual_file::init(10);
    page_cache::init(100);
    let file = FileBlockReader::new(VirtualFile::open(path).await?);
    let summary_blk = file.read_blk(0, ctx).await?;
@@ -187,7 +187,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            new_tenant_id,
            new_timeline_id,
        } => {
-            pageserver::virtual_file::init(10, virtual_file::IoEngineKind::StdFs);
+            pageserver::virtual_file::init(10);
            pageserver::page_cache::init(100);

            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -123,7 +123,7 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {

 async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
    // Basic initialization of things that don't change after startup
-    virtual_file::init(10, virtual_file::IoEngineKind::StdFs);
+    virtual_file::init(10);
    page_cache::init(100);
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    dump_layerfile_from_path(path, true, &ctx).await
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -423,8 +423,8 @@ async fn client(
    tokio::select! {
        res = do_requests => { res },
        _ = cancel.cancelled() => {
-            // fallthrough to shutdown
+            client.shutdown().await;
+            return;
        }
    }
-    client.shutdown().await;
 }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -11,9 +11,8 @@
 //! from data stored in object storage.
 //!
 use anyhow::{anyhow, bail, ensure, Context};
-use bytes::{BufMut, Bytes, BytesMut};
+use bytes::{BufMut, BytesMut};
 use fail::fail_point;
-use pageserver_api::key::{key_to_slru_block, Key};
 use postgres_ffi::pg_constants;
 use std::fmt::Write as FmtWrite;
 use std::time::SystemTime;
@@ -134,87 +133,6 @@ where
    ctx: &'a RequestContext,
 }

-/// A sink that accepts SLRU blocks ordered by key and forwards
-/// full segments to the archive.
-struct SlruSegmentsBuilder<'a, 'b, W>
-where
-    W: AsyncWrite + Send + Sync + Unpin,
-{
-    ar: &'a mut Builder<&'b mut W>,
-    buf: Vec<u8>,
-    current_segment: Option<(SlruKind, u32)>,
-}
-
-impl<'a, 'b, W> SlruSegmentsBuilder<'a, 'b, W>
-where
-    W: AsyncWrite + Send + Sync + Unpin,
-{
-    fn new(ar: &'a mut Builder<&'b mut W>) -> Self {
-        Self {
-            ar,
-            buf: Vec::new(),
-            current_segment: None,
-        }
-    }
-
-    async fn add_block(&mut self, key: &Key, block: Bytes) -> anyhow::Result<()> {
-        let (kind, segno, _) = key_to_slru_block(*key)?;
-
-        match kind {
-            SlruKind::Clog => {
-                ensure!(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8);
-            }
-            SlruKind::MultiXactMembers | SlruKind::MultiXactOffsets => {
-                ensure!(block.len() == BLCKSZ as usize);
-            }
-        }
-
-        let segment = (kind, segno);
-        match self.current_segment {
-            None => {
-                self.current_segment = Some(segment);
-                self.buf
-                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());
-            }
-            Some(current_seg) if current_seg == segment => {
-                self.buf
-                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());
-            }
-            Some(_) => {
-                self.flush().await?;
-
-                self.current_segment = Some(segment);
-                self.buf
-                    .extend_from_slice(block.slice(..BLCKSZ as usize).as_ref());
-            }
-        }
-
-        Ok(())
-    }
-
-    async fn flush(&mut self) -> anyhow::Result<()> {
-        let nblocks = self.buf.len() / BLCKSZ as usize;
-        let (kind, segno) = self.current_segment.take().unwrap();
-        let segname = format!("{}/{:>04X}", kind.to_str(), segno);
-        let header = new_tar_header(&segname, self.buf.len() as u64)?;
-        self.ar.append(&header, self.buf.as_slice()).await?;
-
-        trace!("Added to basebackup slru {} relsize {}", segname, nblocks);
-
-        self.buf.clear();
-
-        Ok(())
-    }
-
-    async fn finish(mut self) -> anyhow::Result<()> {
-        if self.current_segment.is_none() || self.buf.is_empty() {
-            return Ok(());
-        }
-
-        self.flush().await
-    }
-}
-
 impl<'a, W> Basebackup<'a, W>
 where
    W: AsyncWrite + Send + Sync + Unpin,
@@ -250,27 +168,20 @@ where
        }

        // Gather non-relational files from object storage pages.
-        let slru_partitions = self
-            .timeline
-            .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
-            .await?
-            .partition(Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64);
-
-        let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);
-
-        for part in slru_partitions.parts {
-            let blocks = self
+        for kind in [
+            SlruKind::Clog,
+            SlruKind::MultiXactOffsets,
+            SlruKind::MultiXactMembers,
+        ] {
+            for segno in self
                .timeline
-                .get_vectored(&part.ranges, self.lsn, self.ctx)
-                .await?;
-
-            for (key, block) in blocks {
-                slru_builder.add_block(&key, block?).await?;
+                .list_slru_segments(kind, Version::Lsn(self.lsn), self.ctx)
+                .await?
+            {
+                self.add_slru_segment(kind, segno).await?;
            }
        }

-        slru_builder.finish().await?;
-
        let mut min_restart_lsn: Lsn = Lsn::MAX;
        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in
@@ -394,6 +305,39 @@ where
        Ok(())
    }

+    //
+    // Generate SLRU segment files from repository.
+    //
+    async fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
+        let nblocks = self
+            .timeline
+            .get_slru_segment_size(slru, segno, Version::Lsn(self.lsn), self.ctx)
+            .await?;
+
+        let mut slru_buf: Vec<u8> = Vec::with_capacity(nblocks as usize * BLCKSZ as usize);
+        for blknum in 0..nblocks {
+            let img = self
+                .timeline
+                .get_slru_page_at_lsn(slru, segno, blknum, self.lsn, self.ctx)
+                .await?;
+
+            if slru == SlruKind::Clog {
+                ensure!(img.len() == BLCKSZ as usize || img.len() == BLCKSZ as usize + 8);
+            } else {
+                ensure!(img.len() == BLCKSZ as usize);
+            }
+
+            slru_buf.extend_from_slice(&img[..BLCKSZ as usize]);
+        }
+
+        let segname = format!("{}/{:>04X}", slru.to_str(), segno);
+        let header = new_tar_header(&segname, slru_buf.len() as u64)?;
+        self.ar.append(&header, slru_buf.as_slice()).await?;
+
+        trace!("Added to basebackup slru {} relsize {}", segname, nblocks);
+        Ok(())
+    }
+
    //
    // Include database/tablespace directories.
    //
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -130,7 +130,7 @@ fn main() -> anyhow::Result<()> {
    let scenario = failpoint_support::init();

    // Basic initialization of things that don't change after startup
-    virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
+    virtual_file::init(conf.max_file_descriptors);
    page_cache::init(conf.page_cache_size);

    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -36,7 +36,6 @@ use crate::tenant::config::TenantConfOpt;
 use crate::tenant::{
    TENANTS_SEGMENT_NAME, TENANT_DELETED_MARKER_FILE_NAME, TIMELINES_SEGMENT_NAME,
 };
-use crate::virtual_file;
 use crate::{
    IGNORED_TENANT_FILE_NAME, METADATA_FILE_NAME, TENANT_CONFIG_NAME, TENANT_HEATMAP_BASENAME,
    TENANT_LOCATION_CONFIG_NAME, TIMELINE_DELETE_MARK_SUFFIX, TIMELINE_UNINIT_MARK_SUFFIX,
@@ -44,8 +43,6 @@ use crate::{

 use self::defaults::DEFAULT_CONCURRENT_TENANT_WARMUP;

-use self::defaults::DEFAULT_VIRTUAL_FILE_IO_ENGINE;
-
 pub mod defaults {
    use crate::tenant::config::defaults::*;
    use const_format::formatcp;
@@ -82,8 +79,6 @@ pub mod defaults {

    pub const DEFAULT_INGEST_BATCH_SIZE: u64 = 100;

-    pub const DEFAULT_VIRTUAL_FILE_IO_ENGINE: &str = "std-fs";
-
    ///
    /// Default built-in configuration file.
    ///
@@ -119,8 +114,6 @@ pub mod defaults {

 #ingest_batch_size = {DEFAULT_INGEST_BATCH_SIZE}

-#virtual_file_io_engine = '{DEFAULT_VIRTUAL_FILE_IO_ENGINE}'
-
 [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -254,8 +247,6 @@ pub struct PageServerConf {

    /// Maximum number of WAL records to be ingested and committed at the same time
    pub ingest_batch_size: u64,
-
-    pub virtual_file_io_engine: virtual_file::IoEngineKind,
 }

 /// We do not want to store this in a PageServerConf because the latter may be logged
@@ -340,8 +331,6 @@ struct PageServerConfigBuilder {
    secondary_download_concurrency: BuilderValue<usize>,

    ingest_batch_size: BuilderValue<u64>,
-
-    virtual_file_io_engine: BuilderValue<virtual_file::IoEngineKind>,
 }

 impl Default for PageServerConfigBuilder {
@@ -417,8 +406,6 @@ impl Default for PageServerConfigBuilder {
            secondary_download_concurrency: Set(DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY),

            ingest_batch_size: Set(DEFAULT_INGEST_BATCH_SIZE),
-
-            virtual_file_io_engine: Set(DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap()),
        }
    }
 }
@@ -575,10 +562,6 @@ impl PageServerConfigBuilder {
        self.ingest_batch_size = BuilderValue::Set(ingest_batch_size)
    }

-    pub fn virtual_file_io_engine(&mut self, value: virtual_file::IoEngineKind) {
-        self.virtual_file_io_engine = BuilderValue::Set(value);
-    }
-
    pub fn build(self) -> anyhow::Result<PageServerConf> {
        let concurrent_tenant_warmup = self
            .concurrent_tenant_warmup
@@ -686,9 +669,6 @@ impl PageServerConfigBuilder {
            ingest_batch_size: self
                .ingest_batch_size
                .ok_or(anyhow!("missing ingest_batch_size"))?,
-            virtual_file_io_engine: self
-                .virtual_file_io_engine
-                .ok_or(anyhow!("missing virtual_file_io_engine"))?,
        })
    }
 }
@@ -940,9 +920,6 @@ impl PageServerConf {
                    builder.secondary_download_concurrency(parse_toml_u64(key, item)? as usize)
                },
                "ingest_batch_size" => builder.ingest_batch_size(parse_toml_u64(key, item)?),
-                "virtual_file_io_engine" => {
-                    builder.virtual_file_io_engine(parse_toml_from_str("virtual_file_io_engine", item)?)
-                }
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
        }
@@ -1016,7 +993,6 @@ impl PageServerConf {
            heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
            secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
            ingest_batch_size: defaults::DEFAULT_INGEST_BATCH_SIZE,
-            virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
        }
    }
 }
@@ -1249,7 +1225,6 @@ background_task_maximum_delay = '334 s'
                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
                secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
                ingest_batch_size: defaults::DEFAULT_INGEST_BATCH_SIZE,
-                virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
            },
            "Correct defaults should be used when no config values are provided"
        );
@@ -1313,7 +1288,6 @@ background_task_maximum_delay = '334 s'
                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
                secondary_download_concurrency: defaults::DEFAULT_SECONDARY_DOWNLOAD_CONCURRENCY,
                ingest_batch_size: 100,
-                virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
            },
            "Should be able to parse all basic config values correctly"
        );
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -877,56 +877,6 @@ paths:
              schema:
                $ref: "#/components/schemas/ServiceUnavailableError"

-  /v1/tenant/{tenant_id}/{timeline_id}/preserve_initdb_archive:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-      - name: timeline_id
-        in: path
-        required: true
-        schema:
-          type: string
-    post:
-      description: |
-        Marks the initdb archive for preservation upon deletion of the timeline or tenant.
-        This is meant to be part of the disaster recovery process.
-      responses:
-        "202":
-          description: Tenant scheduled to load successfully
-        "404":
-          description: No tenant or timeline found for the specified ids
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "503":
-          description: Temporarily unavailable, please retry.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ServiceUnavailableError"
-

  /v1/tenant/{tenant_id}/synthetic_size:
    parameters:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -187,7 +187,6 @@ impl From<TenantSlotUpsertError> for ApiError {
        match e {
            InternalError(e) => ApiError::InternalServerError(anyhow::anyhow!("{e}")),
            MapState(e) => e.into(),
-            ShuttingDown(_) => ApiError::ShuttingDown,
        }
    }
 }
@@ -496,10 +495,6 @@ async fn timeline_create_handler(
                    .map_err(ApiError::InternalServerError)?;
                json_response(StatusCode::CREATED, timeline_info)
            }
-            Err(_) if tenant.cancel.is_cancelled() => {
-                // In case we get some ugly error type during shutdown, cast it into a clean 503.
-                json_response(StatusCode::SERVICE_UNAVAILABLE, HttpErrorBody::from_msg("Tenant shutting down".to_string()))
-            }
            Err(tenant::CreateTimelineError::Conflict | tenant::CreateTimelineError::AlreadyCreating) => {
                json_response(StatusCode::CONFLICT, ())
            }
@@ -566,43 +561,6 @@ async fn timeline_list_handler(
    json_response(StatusCode::OK, response_data)
 }

-async fn timeline_preserve_initdb_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-
-    // Part of the process for disaster recovery from safekeeper-stored WAL:
-    // If we don't recover into a new timeline but want to keep the timeline ID,
-    // then the initdb archive is deleted. This endpoint copies it to a different
-    // location where timeline recreation cand find it.
-
-    async {
-        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
-
-        let timeline = tenant
-            .get_timeline(timeline_id, false)
-            .map_err(|e| ApiError::NotFound(e.into()))?;
-
-        timeline
-            .preserve_initdb_archive()
-            .await
-            .context("preserving initdb archive")
-            .map_err(ApiError::InternalServerError)?;
-
-        Ok::<_, ApiError>(())
-    }
-    .instrument(info_span!("timeline_preserve_initdb_archive",
-                tenant_id = %tenant_shard_id.tenant_id,
-                shard_id = %tenant_shard_id.shard_slug(),
-                %timeline_id))
-    .await?;
-
-    json_response(StatusCode::OK, ())
-}
-
 async fn timeline_detail_handler(
    request: Request<Body>,
    _cancel: CancellationToken,
@@ -1262,9 +1220,19 @@ async fn tenant_create_handler(
    };
    // We created the tenant. Existing API semantics are that the tenant
    // is Active when this function returns.
-    new_tenant
+    if let res @ Err(_) = new_tenant
        .wait_to_become_active(ACTIVE_TENANT_TIMEOUT)
-        .await?;
+        .await
+    {
+        // This shouldn't happen because we just created the tenant directory
+        // in upsert_location, and there aren't any remote timelines
+        // to load, so, nothing can really fail during load.
+        // Don't do cleanup because we don't know how we got here.
+        // The tenant will likely be in `Broken` state and subsequent
+        // calls will fail.
+        res.context("created tenant failed to become active")
+            .map_err(ApiError::InternalServerError)?;
+    }

    json_response(
        StatusCode::CREATED,
@@ -1975,10 +1943,6 @@ pub fn make_router(
        .post("/v1/tenant/:tenant_id/ignore", |r| {
            api_handler(r, tenant_ignore_handler)
        })
-        .post(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/preserve_initdb_archive",
-            |r| api_handler(r, timeline_preserve_initdb_handler),
-        )
        .get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_detail_handler)
        })
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -1,4 +1,3 @@
-#![recursion_limit = "300"]
 #![deny(clippy::undocumented_unsafe_blocks)]

 mod auth;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -150,43 +150,6 @@ pub(crate) static MATERIALIZED_PAGE_CACHE_HIT: Lazy<IntCounter> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub(crate) struct GetVectoredLatency {
-    map: EnumMap<TaskKind, Option<Histogram>>,
-}
-
-impl GetVectoredLatency {
-    // Only these task types perform vectored gets. Filter all other tasks out to reduce total
-    // cardinality of the metric.
-    const TRACKED_TASK_KINDS: [TaskKind; 2] = [TaskKind::Compaction, TaskKind::PageRequestHandler];
-
-    pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
-        self.map[task_kind].as_ref()
-    }
-}
-
-pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
-    let inner = register_histogram_vec!(
-        "pageserver_get_vectored_seconds",
-        "Time spent in get_vectored",
-        &["task_kind"],
-        CRITICAL_OP_BUCKETS.into(),
-    )
-    .expect("failed to define a metric");
-
-    GetVectoredLatency {
-        map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
-            let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
-
-            if GetVectoredLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
-                let task_kind = task_kind.into();
-                Some(inner.with_label_values(&[task_kind]))
-            } else {
-                None
-            }
-        })),
-    }
-});
-
 pub(crate) struct PageCacheMetricsForTaskKind {
    pub read_accesses_materialized_page: IntCounter,
    pub read_accesses_immutable: IntCounter,
@@ -969,7 +932,6 @@ pub(crate) static STORAGE_IO_SIZE: Lazy<IntGaugeVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-#[cfg(not(test))]
 pub(crate) mod virtual_file_descriptor_cache {
    use super::*;

@@ -989,20 +951,6 @@ pub(crate) mod virtual_file_descriptor_cache {
    // ```
 }

-#[cfg(not(test))]
-pub(crate) mod virtual_file_io_engine {
-    use super::*;
-
-    pub(crate) static KIND: Lazy<UIntGaugeVec> = Lazy::new(|| {
-        register_uint_gauge_vec!(
-            "pageserver_virtual_file_io_engine_kind",
-            "The configured io engine for VirtualFile",
-            &["kind"],
-        )
-        .unwrap()
-    });
-}
-
 #[derive(Debug)]
 struct GlobalAndPerTimelineHistogram {
    global: Histogram,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -61,7 +61,7 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics;
 use crate::metrics::LIVE_CONNECTIONS_COUNT;
-use crate::pgdatadir_mapping::Version;
+use crate::pgdatadir_mapping::{rel_block_to_key, Version};
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -75,7 +75,6 @@ use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::trace::Tracer;

-use pageserver_api::key::rel_block_to_key;
 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

@@ -322,8 +321,8 @@ enum PageStreamError {
    Shutdown,

    /// Something went wrong reading a page: this likely indicates a pageserver bug
-    #[error("Read error")]
-    Read(#[source] PageReconstructError),
+    #[error("Read error: {0}")]
+    Read(PageReconstructError),

    /// Ran out of time waiting for an LSN
    #[error("LSN timeout: {0}")]
@@ -332,11 +331,11 @@ enum PageStreamError {
    /// The entity required to serve the request (tenant or timeline) is not found,
    /// or is not found in a suitable state to serve a request.
    #[error("Not found: {0}")]
-    NotFound(Cow<'static, str>),
+    NotFound(std::borrow::Cow<'static, str>),

    /// Request asked for something that doesn't make sense, like an invalid LSN
    #[error("Bad request: {0}")]
-    BadRequest(Cow<'static, str>),
+    BadRequest(std::borrow::Cow<'static, str>),
 }

 impl From<PageReconstructError> for PageStreamError {
@@ -387,18 +386,12 @@ impl PageServerHandler {

    /// Future that completes when we need to shut down the connection.
    ///
-    /// We currently need to shut down when any of the following happens:
-    /// 1. any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
-    /// 2. task_mgr requests shutdown of the connection
+    /// Reasons for need to shut down are:
+    /// - any of the timelines we hold GateGuards for in `shard_timelines` is cancelled
+    /// - task_mgr requests shutdown of the connection
    ///
-    /// NB on (1): the connection's lifecycle is not actually tied to any of the
-    /// `shard_timelines`s' lifecycles. But it's _necessary_ in the current
-    /// implementation to be responsive to timeline cancellation because
-    /// the connection holds their `GateGuards` open (sored in `shard_timelines`).
-    /// We currently do the easy thing and terminate the connection if any of the
-    /// shard_timelines gets cancelled. But really, we cuold spend more effort
-    /// and simply remove the cancelled timeline from the `shard_timelines`, thereby
-    /// dropping the guard.
+    /// The need to check for `task_mgr` cancellation arises mainly from `handle_pagerequests`
+    /// where, at first, `shard_timelines` is empty, see <https://github.com/neondatabase/neon/pull/6388>
    ///
    /// NB: keep in sync with [`Self::is_connection_cancelled`]
    async fn await_connection_cancelled(&self) {
@@ -411,17 +404,16 @@ impl PageServerHandler {
        // immutable &self).  So it's fine to evaluate shard_timelines after the sleep, we don't risk
        // missing any inserts to the map.

-        let mut cancellation_sources = Vec::with_capacity(1 + self.shard_timelines.len());
-        use futures::future::Either;
-        cancellation_sources.push(Either::Left(task_mgr::shutdown_watcher()));
-        cancellation_sources.extend(
-            self.shard_timelines
-                .values()
-                .map(|ht| Either::Right(ht.timeline.cancel.cancelled())),
-        );
-        FuturesUnordered::from_iter(cancellation_sources)
-            .next()
-            .await;
+        let mut futs = self
+            .shard_timelines
+            .values()
+            .map(|ht| ht.timeline.cancel.cancelled())
+            .collect::<FuturesUnordered<_>>();
+
+        tokio::select! {
+            _ = task_mgr::shutdown_watcher() => { }
+            _ = futs.next() => {}
+        }
    }

    /// Checking variant of [`Self::await_connection_cancelled`].
@@ -667,10 +659,7 @@ impl PageServerHandler {
                        // print the all details to the log with {:#}, but for the client the
                        // error message is enough.  Do not log if shutting down, as the anyhow::Error
                        // here includes cancellation which is not an error.
-                        let full = utils::error::report_compact_sources(&e);
-                        span.in_scope(|| {
-                            error!("error reading relation or page version: {full:#}")
-                        });
+                        span.in_scope(|| error!("error reading relation or page version: {:#}", e));
                        PagestreamBeMessage::Error(PagestreamErrorResponse {
                            message: e.to_string(),
                        })
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -13,12 +13,7 @@ use crate::repository::*;
 use crate::walrecord::NeonWalRecord;
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes};
-use pageserver_api::key::{
-    dbdir_key_range, is_rel_block_key, is_slru_block_key, rel_block_to_key, rel_dir_to_key,
-    rel_key_range, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key,
-    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
-    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
-};
+use pageserver_api::key::is_rel_block_key;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -27,7 +22,6 @@ use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::ControlFlow;
 use std::ops::Range;
-use strum::IntoEnumIterator;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::bin_ser::DeserializeError;
@@ -534,33 +528,6 @@ impl Timeline {
        Ok(Default::default())
    }

-    pub(crate) async fn get_slru_keyspace(
-        &self,
-        version: Version<'_>,
-        ctx: &RequestContext,
-    ) -> Result<KeySpace, PageReconstructError> {
-        let mut accum = KeySpaceAccum::new();
-
-        for kind in SlruKind::iter() {
-            let mut segments: Vec<u32> = self
-                .list_slru_segments(kind, version, ctx)
-                .await?
-                .into_iter()
-                .collect();
-            segments.sort_unstable();
-
-            for seg in segments {
-                let block_count = self.get_slru_segment_size(kind, seg, version, ctx).await?;
-
-                accum.add_range(
-                    slru_block_to_key(kind, seg, 0)..slru_block_to_key(kind, seg, block_count),
-                );
-            }
-        }
-
-        Ok(accum.to_keyspace())
-    }
-
    /// Get a list of SLRU segments
    pub(crate) async fn list_slru_segments(
        &self,
@@ -1568,6 +1535,366 @@ struct SlruSegmentDirectory {

 static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]);

+// Layout of the Key address space
+//
+// The Key struct, used to address the underlying key-value store, consists of
+// 18 bytes, split into six fields. See 'Key' in repository.rs. We need to map
+// all the data and metadata keys into those 18 bytes.
+//
+// Principles for the mapping:
+//
+// - Things that are often accessed or modified together, should be close to
+//   each other in the key space. For example, if a relation is extended by one
+//   block, we create a new key-value pair for the block data, and update the
+//   relation size entry. Because of that, the RelSize key comes after all the
+//   RelBlocks of a relation: the RelSize and the last RelBlock are always next
+//   to each other.
+//
+// The key space is divided into four major sections, identified by the first
+// byte, and the form a hierarchy:
+//
+// 00 Relation data and metadata
+//
+//   DbDir    () -> (dbnode, spcnode)
+//   Filenodemap
+//   RelDir   -> relnode forknum
+//       RelBlocks
+//       RelSize
+//
+// 01 SLRUs
+//
+//   SlruDir  kind
+//   SlruSegBlocks segno
+//   SlruSegSize
+//
+// 02 pg_twophase
+//
+// 03 misc
+//    Controlfile
+//    checkpoint
+//    pg_version
+//
+// 04 aux files
+//
+// Below is a full list of the keyspace allocation:
+//
+// DbDir:
+// 00 00000000 00000000 00000000 00   00000000
+//
+// Filenodemap:
+// 00 SPCNODE  DBNODE   00000000 00   00000000
+//
+// RelDir:
+// 00 SPCNODE  DBNODE   00000000 00   00000001 (Postgres never uses relfilenode 0)
+//
+// RelBlock:
+// 00 SPCNODE  DBNODE   RELNODE  FORK BLKNUM
+//
+// RelSize:
+// 00 SPCNODE  DBNODE   RELNODE  FORK FFFFFFFF
+//
+// SlruDir:
+// 01 kind     00000000 00000000 00   00000000
+//
+// SlruSegBlock:
+// 01 kind     00000001 SEGNO    00   BLKNUM
+//
+// SlruSegSize:
+// 01 kind     00000001 SEGNO    00   FFFFFFFF
+//
+// TwoPhaseDir:
+// 02 00000000 00000000 00000000 00   00000000
+//
+// TwoPhaseFile:
+// 02 00000000 00000000 00000000 00   XID
+//
+// ControlFile:
+// 03 00000000 00000000 00000000 00   00000000
+//
+// Checkpoint:
+// 03 00000000 00000000 00000000 00   00000001
+//
+// AuxFiles:
+// 03 00000000 00000000 00000000 00   00000002
+//
+
+//-- Section 01: relation data and metadata
+
+const DBDIR_KEY: Key = Key {
+    field1: 0x00,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+fn dbdir_key_range(spcnode: Oid, dbnode: Oid) -> Range<Key> {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0xffffffff,
+        field5: 0xff,
+        field6: 0xffffffff,
+    }
+}
+
+fn relmap_file_key(spcnode: Oid, dbnode: Oid) -> Key {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }
+}
+
+fn rel_dir_to_key(spcnode: Oid, dbnode: Oid) -> Key {
+    Key {
+        field1: 0x00,
+        field2: spcnode,
+        field3: dbnode,
+        field4: 0,
+        field5: 0,
+        field6: 1,
+    }
+}
+
+pub(crate) fn rel_block_to_key(rel: RelTag, blknum: BlockNumber) -> Key {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: blknum,
+    }
+}
+
+fn rel_size_to_key(rel: RelTag) -> Key {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: 0xffffffff,
+    }
+}
+
+fn rel_key_range(rel: RelTag) -> Range<Key> {
+    Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum,
+        field6: 0,
+    }..Key {
+        field1: 0x00,
+        field2: rel.spcnode,
+        field3: rel.dbnode,
+        field4: rel.relnode,
+        field5: rel.forknum + 1,
+        field6: 0,
+    }
+}
+
+//-- Section 02: SLRUs
+
+fn slru_dir_to_key(kind: SlruKind) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: 0,
+    }
+}
+
+fn slru_block_to_key(kind: SlruKind, segno: u32, blknum: BlockNumber) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: blknum,
+    }
+}
+
+fn slru_segment_size_to_key(kind: SlruKind, segno: u32) -> Key {
+    Key {
+        field1: 0x01,
+        field2: match kind {
+            SlruKind::Clog => 0x00,
+            SlruKind::MultiXactMembers => 0x01,
+            SlruKind::MultiXactOffsets => 0x02,
+        },
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: 0xffffffff,
+    }
+}
+
+fn slru_segment_key_range(kind: SlruKind, segno: u32) -> Range<Key> {
+    let field2 = match kind {
+        SlruKind::Clog => 0x00,
+        SlruKind::MultiXactMembers => 0x01,
+        SlruKind::MultiXactOffsets => 0x02,
+    };
+
+    Key {
+        field1: 0x01,
+        field2,
+        field3: 1,
+        field4: segno,
+        field5: 0,
+        field6: 0,
+    }..Key {
+        field1: 0x01,
+        field2,
+        field3: 1,
+        field4: segno,
+        field5: 1,
+        field6: 0,
+    }
+}
+
+//-- Section 03: pg_twophase
+
+const TWOPHASEDIR_KEY: Key = Key {
+    field1: 0x02,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+fn twophase_file_key(xid: TransactionId) -> Key {
+    Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: xid,
+    }
+}
+
+fn twophase_key_range(xid: TransactionId) -> Range<Key> {
+    let (next_xid, overflowed) = xid.overflowing_add(1);
+
+    Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: 0,
+        field6: xid,
+    }..Key {
+        field1: 0x02,
+        field2: 0,
+        field3: 0,
+        field4: 0,
+        field5: u8::from(overflowed),
+        field6: next_xid,
+    }
+}
+
+//-- Section 03: Control file
+const CONTROLFILE_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 0,
+};
+
+const CHECKPOINT_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 1,
+};
+
+const AUX_FILES_KEY: Key = Key {
+    field1: 0x03,
+    field2: 0,
+    field3: 0,
+    field4: 0,
+    field5: 0,
+    field6: 2,
+};
+
+// Reverse mappings for a few Keys.
+// These are needed by WAL redo manager.
+
+// AUX_FILES currently stores only data for logical replication (slots etc), and
+// we don't preserve these on a branch because safekeepers can't follow timeline
+// switch (and generally it likely should be optional), so ignore these.
+pub fn is_inherited_key(key: Key) -> bool {
+    key != AUX_FILES_KEY
+}
+
+pub fn is_rel_fsm_block_key(key: Key) -> bool {
+    key.field1 == 0x00 && key.field4 != 0 && key.field5 == FSM_FORKNUM && key.field6 != 0xffffffff
+}
+
+pub fn is_rel_vm_block_key(key: Key) -> bool {
+    key.field1 == 0x00
+        && key.field4 != 0
+        && key.field5 == VISIBILITYMAP_FORKNUM
+        && key.field6 != 0xffffffff
+}
+
+pub fn key_to_slru_block(key: Key) -> anyhow::Result<(SlruKind, u32, BlockNumber)> {
+    Ok(match key.field1 {
+        0x01 => {
+            let kind = match key.field2 {
+                0x00 => SlruKind::Clog,
+                0x01 => SlruKind::MultiXactMembers,
+                0x02 => SlruKind::MultiXactOffsets,
+                _ => anyhow::bail!("unrecognized slru kind 0x{:02x}", key.field2),
+            };
+            let segno = key.field4;
+            let blknum = key.field6;
+
+            (kind, segno, blknum)
+        }
+        _ => anyhow::bail!("unexpected value kind 0x{:02x}", key.field1),
+    })
+}
+
+fn is_slru_block_key(key: Key) -> bool {
+    key.field1 == 0x01                // SLRU-related
+        && key.field3 == 0x00000001   // but not SlruDir
+        && key.field6 != 0xffffffff // and not SlruSegSize
+}
+
 #[allow(clippy::bool_assert_comparison)]
 #[cfg(test)]
 mod tests {
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -91,6 +91,7 @@ use std::fs;
 use std::fs::File;
 use std::io;
 use std::ops::Bound::Included;
+use std::process::Stdio;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;
@@ -627,15 +628,9 @@ impl Tenant {
            deletion_queue_client,
        ));

-        // The attach task will carry a GateGuard, so that shutdown() reliably waits for it to drop out if
-        // we shut down while attaching.
-        let Ok(attach_gate_guard) = tenant.gate.enter() else {
-            // We just created the Tenant: nothing else can have shut it down yet
-            unreachable!();
-        };
-
        // Do all the hard work in the background
        let tenant_clone = Arc::clone(&tenant);
+
        let ctx = ctx.detached_child(TaskKind::Attach, DownloadBehavior::Warn);
        task_mgr::spawn(
            &tokio::runtime::Handle::current(),
@@ -645,8 +640,6 @@ impl Tenant {
            "attach tenant",
            false,
            async move {
-                let _gate_guard = attach_gate_guard;
-
                // Is this tenant being spawned as part of process startup?
                let starting_up = init_order.is_some();
                scopeguard::defer! {
@@ -723,10 +716,6 @@ impl Tenant {
                            // stayed in Activating for such a long time that shutdown found it in
                            // that state.
                            tracing::info!(state=%tenant_clone.current_state(), "Tenant shut down before activation");
-                            // Make the tenant broken so that set_stopping will not hang waiting for it to leave
-                            // the Attaching state.  This is an over-reaction (nothing really broke, the tenant is
-                            // just shutting down), but ensures progress.
-                            make_broken(&tenant_clone, anyhow::anyhow!("Shut down while Attaching"));
                            return Ok(());
                        },
                    )
@@ -821,7 +810,7 @@ impl Tenant {
                    SpawnMode::Create => None,
                    SpawnMode::Normal => {Some(TENANT.attach.start_timer())}
                };
-                match tenant_clone.attach(preload, mode, &ctx).await {
+                match tenant_clone.attach(preload, &ctx).await {
                    Ok(()) => {
                        info!("attach finished, activating");
                        if let Some(t)=  attach_timer {t.observe_duration();}
@@ -908,20 +897,15 @@ impl Tenant {
    async fn attach(
        self: &Arc<Tenant>,
        preload: Option<TenantPreload>,
-        mode: SpawnMode,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        span::debug_assert_current_span_has_tenant_id();

        failpoint_support::sleep_millis_async!("before-attaching-tenant");

-        let preload = match (preload, mode) {
-            (Some(p), _) => p,
-            (None, SpawnMode::Create) => TenantPreload {
-                deleting: false,
-                timelines: HashMap::new(),
-            },
-            (None, SpawnMode::Normal) => {
+        let preload = match preload {
+            Some(p) => p,
+            None => {
                // Deprecated dev mode: load from local disk state instead of remote storage
                // https://github.com/neondatabase/neon/issues/5624
                return self.load_local(ctx).await;
@@ -1029,10 +1013,7 @@ impl Tenant {
        // IndexPart is the source of truth.
        self.clean_up_timelines(&existent_timelines)?;

-        fail::fail_point!("attach-before-activate", |_| {
-            anyhow::bail!("attach-before-activate");
-        });
-        failpoint_support::sleep_millis_async!("attach-before-activate-sleep", &self.cancel);
+        failpoint_support::sleep_millis_async!("attach-before-activate", &self.cancel);

        info!("Done");

@@ -1696,13 +1677,9 @@ impl Tenant {
        ctx: &RequestContext,
    ) -> Result<Arc<Timeline>, CreateTimelineError> {
        if !self.is_active() {
-            if matches!(self.current_state(), TenantState::Stopping { .. }) {
-                return Err(CreateTimelineError::ShuttingDown);
-            } else {
-                return Err(CreateTimelineError::Other(anyhow::anyhow!(
-                    "Cannot create timelines on inactive tenant"
-                )));
-            }
+            return Err(CreateTimelineError::Other(anyhow::anyhow!(
+                "Cannot create timelines on inactive tenant"
+            )));
        }

        let _gate = self
@@ -3778,25 +3755,27 @@ async fn run_initdb(
        .env_clear()
        .env("LD_LIBRARY_PATH", &initdb_lib_dir)
        .env("DYLD_LIBRARY_PATH", &initdb_lib_dir)
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        // If the `select!` below doesn't finish the `wait_with_output`,
+        // let the task get `wait()`ed for asynchronously by tokio.
+        // This means there is a slim chance we can go over the INIT_DB_SEMAPHORE.
+        // TODO: fix for this is non-trivial, see
+        // https://github.com/neondatabase/neon/pull/5921#pullrequestreview-1750858021
+        //
+        .kill_on_drop(true)
        .spawn()?;

-    // Ideally we'd select here with the cancellation token, but the problem is that
-    // we can't safely terminate initdb: it launches processes of its own, and killing
-    // initdb doesn't kill them. After we return from this function, we want the target
-    // directory to be able to be cleaned up.
-    // See https://github.com/neondatabase/neon/issues/6385
-    let initdb_output = initdb_command.wait_with_output().await?;
-    if !initdb_output.status.success() {
-        return Err(InitdbError::Failed(
-            initdb_output.status,
-            initdb_output.stderr,
-        ));
-    }
-
-    // This isn't true cancellation support, see above. Still return an error to
-    // excercise the cancellation code path.
-    if cancel.is_cancelled() {
-        return Err(InitdbError::Cancelled);
+    tokio::select! {
+        initdb_output = initdb_command.wait_with_output() => {
+            let initdb_output = initdb_output?;
+            if !initdb_output.status.success() {
+                return Err(InitdbError::Failed(initdb_output.status, initdb_output.stderr));
+            }
+        }
+        _ = cancel.cancelled() => {
+            return Err(InitdbError::Cancelled);
+        }
    }

    Ok(())
@@ -4052,7 +4031,7 @@ pub(crate) mod harness {
                        .instrument(info_span!("try_load_preload", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
                        .await?;
                    tenant
-                        .attach(Some(preload), SpawnMode::Normal, ctx)
+                        .attach(Some(preload), ctx)
                        .instrument(info_span!("try_load", tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))
                        .await?;
                }
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -5,10 +5,10 @@
 use super::ephemeral_file::EphemeralFile;
 use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
 use crate::context::RequestContext;
-use crate::page_cache::{self, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ};
+use crate::page_cache::{self, PageReadGuard, ReadBufResult, PAGE_SZ};
 use crate::virtual_file::VirtualFile;
 use bytes::Bytes;
-use std::ops::Deref;
+use std::ops::{Deref, DerefMut};

 /// This is implemented by anything that can read 8 kB (PAGE_SZ)
 /// blocks, using the page cache
@@ -39,8 +39,6 @@ pub enum BlockLease<'a> {
    EphemeralFileMutableTail(&'a [u8; PAGE_SZ]),
    #[cfg(test)]
    Arc(std::sync::Arc<[u8; PAGE_SZ]>),
-    #[cfg(test)]
-    Vec(Vec<u8>),
 }

 impl From<PageReadGuard<'static>> for BlockLease<'static> {
@@ -65,10 +63,6 @@ impl<'a> Deref for BlockLease<'a> {
            BlockLease::EphemeralFileMutableTail(v) => v,
            #[cfg(test)]
            BlockLease::Arc(v) => v.deref(),
-            #[cfg(test)]
-            BlockLease::Vec(v) => {
-                TryFrom::try_from(&v[..]).expect("caller must ensure that v has PAGE_SZ")
-            }
        }
    }
 }
@@ -175,14 +169,10 @@ impl FileBlockReader {
    }

    /// Read a page from the underlying file into given buffer.
-    async fn fill_buffer(
-        &self,
-        buf: PageWriteGuard<'static>,
-        blkno: u32,
-    ) -> Result<PageWriteGuard<'static>, std::io::Error> {
+    async fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), std::io::Error> {
        assert!(buf.len() == PAGE_SZ);
        self.file
-            .read_exact_at_page(buf, blkno as u64 * PAGE_SZ as u64)
+            .read_exact_at(buf, blkno as u64 * PAGE_SZ as u64)
            .await
    }
    /// Read a block.
@@ -206,9 +196,9 @@ impl FileBlockReader {
                )
            })? {
            ReadBufResult::Found(guard) => Ok(guard.into()),
-            ReadBufResult::NotFound(write_guard) => {
+            ReadBufResult::NotFound(mut write_guard) => {
                // Read the page from disk into the buffer
-                let write_guard = self.fill_buffer(write_guard, blknum).await?;
+                self.fill_buffer(write_guard.deref_mut(), blknum).await?;
                Ok(write_guard.mark_valid().into())
            }
        }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -409,10 +409,7 @@ impl DeleteTenantFlow {
            .await
            .expect("cant be stopping or broken");

-        tenant
-            .attach(preload, super::SpawnMode::Normal, ctx)
-            .await
-            .context("attach")?;
+        tenant.attach(preload, ctx).await.context("attach")?;

        Self::background(
            guard,
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -5,11 +5,11 @@ use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::page_cache::{self, PAGE_SZ};
 use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReader};
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::VirtualFile;
 use camino::Utf8PathBuf;
 use pageserver_api::shard::TenantShardId;
 use std::cmp::min;
-
+use std::fs::OpenOptions;
 use std::io::{self, ErrorKind};
 use std::ops::DerefMut;
 use std::sync::atomic::AtomicU64;
@@ -47,10 +47,7 @@ impl EphemeralFile {

        let file = VirtualFile::open_with_options(
            &filename,
-            virtual_file::OpenOptions::new()
-                .read(true)
-                .write(true)
-                .create(true),
+            OpenOptions::new().read(true).write(true).create(true),
        )
        .await?;

@@ -92,10 +89,11 @@ impl EphemeralFile {
                page_cache::ReadBufResult::Found(guard) => {
                    return Ok(BlockLease::PageReadGuard(guard))
                }
-                page_cache::ReadBufResult::NotFound(write_guard) => {
-                    let write_guard = self
-                        .file
-                        .read_exact_at_page(write_guard, blknum as u64 * PAGE_SZ as u64)
+                page_cache::ReadBufResult::NotFound(mut write_guard) => {
+                    let buf: &mut [u8] = write_guard.deref_mut();
+                    debug_assert_eq!(buf.len(), PAGE_SZ);
+                    self.file
+                        .read_exact_at(&mut buf[..], blknum as u64 * PAGE_SZ as u64)
                        .await?;
                    let read_guard = write_guard.mark_valid();
                    return Ok(BlockLease::PageReadGuard(read_guard));
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -283,15 +283,15 @@ impl LayerMap {
    ///
    /// This is used for garbage collection, to determine if an old layer can
    /// be deleted.
-    pub fn image_layer_exists(&self, key: &Range<Key>, lsn: &Range<Lsn>) -> bool {
+    pub fn image_layer_exists(&self, key: &Range<Key>, lsn: &Range<Lsn>) -> Result<bool> {
        if key.is_empty() {
            // Vacuously true. There's a newer image for all 0 of the kerys in the range.
-            return true;
+            return Ok(true);
        }

        let version = match self.historic.get().unwrap().get_version(lsn.end.0 - 1) {
            Some(v) => v,
-            None => return false,
+            None => return Ok(false),
        };

        let start = key.start.to_i128();
@@ -304,17 +304,17 @@ impl LayerMap {

        // Check the start is covered
        if !layer_covers(version.image_coverage.query(start)) {
-            return false;
+            return Ok(false);
        }

        // Check after all changes of coverage
        for (_, change_val) in version.image_coverage.range(start..end) {
            if !layer_covers(change_val) {
-                return false;
+                return Ok(false);
            }
        }

-        true
+        Ok(true)
    }

    pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<PersistentLayerDesc>> {
@@ -325,14 +325,18 @@ impl LayerMap {
    /// Divide the whole given range of keys into sub-ranges based on the latest
    /// image layer that covers each range at the specified lsn (inclusive).
    /// This is used when creating  new image layers.
+    ///
+    // FIXME: clippy complains that the result type is very complex. She's probably
+    // right...
+    #[allow(clippy::type_complexity)]
    pub fn image_coverage(
        &self,
        key_range: &Range<Key>,
        lsn: Lsn,
-    ) -> Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)> {
+    ) -> Result<Vec<(Range<Key>, Option<Arc<PersistentLayerDesc>>)>> {
        let version = match self.historic.get().unwrap().get_version(lsn.0) {
            Some(v) => v,
-            None => return vec![],
+            None => return Ok(vec![]),
        };

        let start = key_range.start.to_i128();
@@ -355,7 +359,7 @@ impl LayerMap {
        let kr = Key::from_i128(current_key)..Key::from_i128(end);
        coverage.push((kr, current_val.take()));

-        coverage
+        Ok(coverage)
    }

    pub fn is_l0(layer: &PersistentLayerDesc) -> bool {
@@ -406,19 +410,24 @@ impl LayerMap {
    /// This number is used to compute the largest number of deltas that
    /// we'll need to visit for any page reconstruction in this region.
    /// We use this heuristic to decide whether to create an image layer.
-    pub fn count_deltas(&self, key: &Range<Key>, lsn: &Range<Lsn>, limit: Option<usize>) -> usize {
+    pub fn count_deltas(
+        &self,
+        key: &Range<Key>,
+        lsn: &Range<Lsn>,
+        limit: Option<usize>,
+    ) -> Result<usize> {
        // We get the delta coverage of the region, and for each part of the coverage
        // we recurse right underneath the delta. The recursion depth is limited by
        // the largest result this function could return, which is in practice between
        // 3 and 10 (since we usually try to create an image when the number gets larger).

        if lsn.is_empty() || key.is_empty() || limit == Some(0) {
-            return 0;
+            return Ok(0);
        }

        let version = match self.historic.get().unwrap().get_version(lsn.end.0 - 1) {
            Some(v) => v,
-            None => return 0,
+            None => return Ok(0),
        };

        let start = key.start.to_i128();
@@ -439,7 +448,8 @@ impl LayerMap {
                    if !kr.is_empty() {
                        let base_count = Self::is_reimage_worthy(&val, key) as usize;
                        let new_limit = limit.map(|l| l - base_count);
-                        let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
+                        let max_stacked_deltas_underneath =
+                            self.count_deltas(&kr, &lr, new_limit)?;
                        max_stacked_deltas = std::cmp::max(
                            max_stacked_deltas,
                            base_count + max_stacked_deltas_underneath,
@@ -461,7 +471,7 @@ impl LayerMap {
                if !kr.is_empty() {
                    let base_count = Self::is_reimage_worthy(&val, key) as usize;
                    let new_limit = limit.map(|l| l - base_count);
-                    let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
+                    let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit)?;
                    max_stacked_deltas = std::cmp::max(
                        max_stacked_deltas,
                        base_count + max_stacked_deltas_underneath,
@@ -470,7 +480,7 @@ impl LayerMap {
            }
        }

-        max_stacked_deltas
+        Ok(max_stacked_deltas)
    }

    /// Count how many reimage-worthy layers we need to visit for given key-lsn pair.
@@ -582,7 +592,10 @@ impl LayerMap {
                    if limit == Some(difficulty) {
                        break;
                    }
-                    for (img_range, last_img) in self.image_coverage(range, lsn) {
+                    for (img_range, last_img) in self
+                        .image_coverage(range, lsn)
+                        .expect("why would this err?")
+                    {
                        if limit == Some(difficulty) {
                            break;
                        }
@@ -593,7 +606,9 @@ impl LayerMap {
                        };

                        if img_lsn < lsn {
-                            let num_deltas = self.count_deltas(&img_range, &(img_lsn..lsn), limit);
+                            let num_deltas = self
+                                .count_deltas(&img_range, &(img_lsn..lsn), limit)
+                                .expect("why would this err lol?");
                            difficulty = std::cmp::max(difficulty, num_deltas);
                        }
                    }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -7,7 +7,6 @@ use pageserver_api::models::ShardParameters;
 use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId};
 use rand::{distributions::Alphanumeric, Rng};
 use std::borrow::Cow;
-use std::cmp::Ordering;
 use std::collections::{BTreeMap, HashMap};
 use std::ops::Deref;
 use std::sync::Arc;
@@ -33,8 +32,7 @@ use crate::deletion_queue::DeletionQueueClient;
 use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::{
-    AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig,
-    TenantConfOpt,
+    AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, TenantConfOpt,
 };
 use crate::tenant::delete::DeleteTenantFlow;
 use crate::tenant::span::debug_assert_current_span_has_tenant_id;
@@ -468,26 +466,6 @@ pub async fn init_tenant_mgr(
            // We have a generation map: treat it as the authority for whether
            // this tenant is really attached.
            if let Some(gen) = generations.get(&tenant_shard_id) {
-                if let LocationMode::Attached(attached) = &location_conf.mode {
-                    if attached.generation > *gen {
-                        tracing::error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                            "Control plane gave decreasing generation ({gen:?}) in re-attach response for tenant that was attached in generation {:?}, demoting to secondary",
-                            attached.generation
-                        );
-
-                        // We cannot safely attach this tenant given a bogus generation number, but let's avoid throwing away
-                        // local disk content: demote to secondary rather than detaching.
-                        tenants.insert(
-                            tenant_shard_id,
-                            TenantSlot::Secondary(SecondaryTenant::new(
-                                tenant_shard_id,
-                                location_conf.shard,
-                                location_conf.tenant_conf,
-                                &SecondaryLocationConfig { warm: false },
-                            )),
-                        );
-                    }
-                }
                *gen
            } else {
                match &location_conf.mode {
@@ -743,7 +721,7 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
        tokio::select! {
            Some(joined) = join_set.join_next() => {
                match joined {
-                    Ok(()) => {},
+                    Ok(()) => {}
                    Err(join_error) if join_error.is_cancelled() => {
                        unreachable!("we are not cancelling any of the tasks");
                    }
@@ -904,7 +882,7 @@ impl TenantManager {
        tenant_shard_id: TenantShardId,
        new_location_config: LocationConf,
        flush: Option<Duration>,
-        mut spawn_mode: SpawnMode,
+        spawn_mode: SpawnMode,
        ctx: &RequestContext,
    ) -> Result<Option<Arc<Tenant>>, UpsertLocationError> {
        debug_assert_current_span_has_tenant_id();
@@ -924,29 +902,19 @@ impl TenantManager {
                tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Write)?;
            match (&new_location_config.mode, peek_slot) {
                (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {
-                    match attach_conf.generation.cmp(&tenant.generation) {
-                        Ordering::Equal => {
-                            // A transition from Attached to Attached in the same generation, we may
-                            // take our fast path and just provide the updated configuration
-                            // to the tenant.
-                            tenant.set_new_location_config(
-                                AttachedTenantConf::try_from(new_location_config.clone())
-                                    .map_err(UpsertLocationError::BadRequest)?,
-                            );
+                    if attach_conf.generation == tenant.generation {
+                        // A transition from Attached to Attached in the same generation, we may
+                        // take our fast path and just provide the updated configuration
+                        // to the tenant.
+                        tenant.set_new_location_config(
+                            AttachedTenantConf::try_from(new_location_config.clone())
+                                .map_err(UpsertLocationError::BadRequest)?,
+                        );

-                            Some(FastPathModified::Attached(tenant.clone()))
-                        }
-                        Ordering::Less => {
-                            return Err(UpsertLocationError::BadRequest(anyhow::anyhow!(
-                                "Generation {:?} is less than existing {:?}",
-                                attach_conf.generation,
-                                tenant.generation
-                            )));
-                        }
-                        Ordering::Greater => {
-                            // Generation advanced, fall through to general case of replacing `Tenant` object
-                            None
-                        }
+                        Some(FastPathModified::Attached(tenant.clone()))
+                    } else {
+                        // Different generations, fall through to general case
+                        None
                    }
                }
                (
@@ -1051,12 +1019,6 @@ impl TenantManager {
                    }
                }
                slot_guard.drop_old_value().expect("We just shut it down");
-
-                // Edge case: if we were called with SpawnMode::Create, but a Tenant already existed, then
-                // the caller thinks they're creating but the tenant already existed.  We must switch to
-                // Normal mode so that when starting this Tenant we properly probe remote storage for timelines,
-                // rather than assuming it to be empty.
-                spawn_mode = SpawnMode::Normal;
            }
            Some(TenantSlot::Secondary(state)) => {
                info!("Shutting down secondary tenant");
@@ -1140,46 +1102,14 @@ impl TenantManager {
            None
        };

-        match slot_guard.upsert(new_slot) {
-            Err(TenantSlotUpsertError::InternalError(e)) => {
-                Err(UpsertLocationError::Other(anyhow::anyhow!(e)))
+        slot_guard.upsert(new_slot).map_err(|e| match e {
+            TenantSlotUpsertError::InternalError(e) => {
+                UpsertLocationError::Other(anyhow::anyhow!(e))
            }
-            Err(TenantSlotUpsertError::MapState(e)) => Err(UpsertLocationError::Unavailable(e)),
-            Err(TenantSlotUpsertError::ShuttingDown((new_slot, _completion))) => {
-                // If we just called tenant_spawn() on a new tenant, and can't insert it into our map, then
-                // we must not leak it: this would violate the invariant that after shutdown_all_tenants, all tenants
-                // are shutdown.
-                //
-                // We must shut it down inline here.
-                match new_slot {
-                    TenantSlot::InProgress(_) => {
-                        // Unreachable because we never insert an InProgress
-                        unreachable!()
-                    }
-                    TenantSlot::Attached(tenant) => {
-                        let (_guard, progress) = utils::completion::channel();
-                        info!("Shutting down just-spawned tenant, because tenant manager is shut down");
-                        match tenant.shutdown(progress, false).await {
-                            Ok(()) => {
-                                info!("Finished shutting down just-spawned tenant");
-                            }
-                            Err(barrier) => {
-                                info!("Shutdown already in progress, waiting for it to complete");
-                                barrier.wait().await;
-                            }
-                        }
-                    }
-                    TenantSlot::Secondary(secondary_tenant) => {
-                        secondary_tenant.shutdown().await;
-                    }
-                }
+            TenantSlotUpsertError::MapState(e) => UpsertLocationError::Unavailable(e),
+        })?;

-                Err(UpsertLocationError::Unavailable(
-                    TenantMapError::ShuttingDown,
-                ))
-            }
-            Ok(()) => Ok(attached_tenant),
-        }
+        Ok(attached_tenant)
    }

    /// Resetting a tenant is equivalent to detaching it, then attaching it again with the same
@@ -1798,31 +1728,14 @@ pub(crate) enum TenantSlotError {

 /// Superset of TenantMapError: issues that can occur when using a SlotGuard
 /// to insert a new value.
-#[derive(thiserror::Error)]
-pub(crate) enum TenantSlotUpsertError {
+#[derive(Debug, thiserror::Error)]
+pub enum TenantSlotUpsertError {
    /// An error where the slot is in an unexpected state, indicating a code bug
    #[error("Internal error updating Tenant")]
    InternalError(Cow<'static, str>),

    #[error(transparent)]
-    MapState(TenantMapError),
-
-    // If we encounter TenantManager shutdown during upsert, we must carry the Completion
-    // from the SlotGuard, so that the caller can hold it while they clean up: otherwise
-    // TenantManager shutdown might race ahead before we're done cleaning up any Tenant that
-    // was protected by the SlotGuard.
-    #[error("Shutting down")]
-    ShuttingDown((TenantSlot, utils::completion::Completion)),
-}
-
-impl std::fmt::Debug for TenantSlotUpsertError {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
-        match self {
-            Self::InternalError(reason) => write!(f, "Internal Error {reason}"),
-            Self::MapState(map_error) => write!(f, "Tenant map state: {map_error:?}"),
-            Self::ShuttingDown(_completion) => write!(f, "Tenant map shutting down"),
-        }
-    }
+    MapState(#[from] TenantMapError),
 }

 #[derive(Debug, thiserror::Error)]
@@ -1871,7 +1784,7 @@ pub struct SlotGuard {

    /// [`TenantSlot::InProgress`] carries the corresponding Barrier: it will
    /// release any waiters as soon as this SlotGuard is dropped.
-    completion: utils::completion::Completion,
+    _completion: utils::completion::Completion,
 }

 impl SlotGuard {
@@ -1884,7 +1797,7 @@ impl SlotGuard {
            tenant_shard_id,
            old_value,
            upserted: false,
-            completion,
+            _completion: completion,
        }
    }

@@ -1917,16 +1830,9 @@ impl SlotGuard {
            }

            let m = match &mut *locked {
-                TenantsMap::Initializing => {
-                    return Err(TenantSlotUpsertError::MapState(
-                        TenantMapError::StillInitializing,
-                    ))
-                }
+                TenantsMap::Initializing => return Err(TenantMapError::StillInitializing.into()),
                TenantsMap::ShuttingDown(_) => {
-                    return Err(TenantSlotUpsertError::ShuttingDown((
-                        new_value,
-                        self.completion.clone(),
-                    )));
+                    return Err(TenantMapError::ShuttingDown.into());
                }
                TenantsMap::Open(m) => m,
            };
@@ -1974,9 +1880,7 @@ impl SlotGuard {
                Err(TenantSlotUpsertError::InternalError(_)) => {
                    // We already logged the error, nothing else we can do.
                }
-                Err(
-                    TenantSlotUpsertError::MapState(_) | TenantSlotUpsertError::ShuttingDown(_),
-                ) => {
+                Err(TenantSlotUpsertError::MapState(_)) => {
                    // If the map is shutting down, we need not replace anything
                }
                Ok(()) => {}
@@ -2074,22 +1978,18 @@ fn tenant_map_peek_slot<'a>(
    tenant_shard_id: &TenantShardId,
    mode: TenantSlotPeekMode,
 ) -> Result<Option<&'a TenantSlot>, TenantMapError> {
-    match tenants.deref() {
-        TenantsMap::Initializing => Err(TenantMapError::StillInitializing),
+    let m = match tenants.deref() {
+        TenantsMap::Initializing => return Err(TenantMapError::StillInitializing),
        TenantsMap::ShuttingDown(m) => match mode {
-            TenantSlotPeekMode::Read => Ok(Some(
-                // When reading in ShuttingDown state, we must translate None results
-                // into a ShuttingDown error, because absence of a tenant shard ID in the map
-                // isn't a reliable indicator of the tenant being gone: it might have been
-                // InProgress when shutdown started, and cleaned up from that state such
-                // that it's now no longer in the map.  Callers will have to wait until
-                // we next start up to get a proper answer.  This avoids incorrect 404 API responses.
-                m.get(tenant_shard_id).ok_or(TenantMapError::ShuttingDown)?,
-            )),
-            TenantSlotPeekMode::Write => Err(TenantMapError::ShuttingDown),
+            TenantSlotPeekMode::Read => m,
+            TenantSlotPeekMode::Write => {
+                return Err(TenantMapError::ShuttingDown);
+            }
        },
-        TenantsMap::Open(m) => Ok(m.get(tenant_shard_id)),
-    }
+        TenantsMap::Open(m) => m,
+    };
+
+    Ok(m.get(tenant_shard_id))
 }

 enum TenantSlotAcquireMode {
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -257,8 +257,6 @@ pub(crate) const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;

 pub(crate) const INITDB_PATH: &str = "initdb.tar.zst";

-pub(crate) const INITDB_PRESERVED_PATH: &str = "initdb-preserved.tar.zst";
-
 /// Default buffer size when interfacing with [`tokio::fs::File`].
 pub(crate) const BUFFER_SIZE: usize = 32 * 1024;

@@ -1068,28 +1066,6 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    pub(crate) async fn preserve_initdb_archive(
-        self: &Arc<Self>,
-        tenant_id: &TenantId,
-        timeline_id: &TimelineId,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        backoff::retry(
-            || async {
-                upload::preserve_initdb_archive(&self.storage_impl, tenant_id, timeline_id, cancel)
-                    .await
-            },
-            |_e| false,
-            FAILED_DOWNLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-            "preserve_initdb_tar_zst",
-            backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled!")),
-        )
-        .await
-        .context("backing up initdb archive")?;
-        Ok(())
-    }
-
    /// Prerequisites: UploadQueue should be in stopped state and deleted_at should be successfuly set.
    /// The function deletes layer files one by one, then lists the prefix to see if we leaked something
    /// deletes leaked files if any and proceeds with deletion of index file at the end.
@@ -1125,14 +1101,6 @@ impl RemoteTimelineClient {
        let layer_deletion_count = layers.len();
        self.deletion_queue_client.push_immediate(layers).await?;

-        // Delete the initdb.tar.zst, which is not always present, but deletion attempts of
-        // inexistant objects are not considered errors.
-        let initdb_path =
-            remote_initdb_archive_path(&self.tenant_shard_id.tenant_id, &self.timeline_id);
-        self.deletion_queue_client
-            .push_immediate(vec![initdb_path])
-            .await?;
-
        // Do not delete index part yet, it is needed for possible retry. If we remove it first
        // and retry will arrive to different pageserver there wont be any traces of it on remote storage
        let timeline_storage_path = remote_timeline_path(&self.tenant_shard_id, &self.timeline_id);
@@ -1180,8 +1148,10 @@ impl RemoteTimelineClient {
                if p == &latest_index {
                    return false;
                }
-                if p.object_name() == Some(INITDB_PRESERVED_PATH) {
-                    return false;
+                if let Some(name) = p.object_name() {
+                    if name == INITDB_PATH {
+                        return false;
+                    }
                }
                true
            })
@@ -1754,16 +1724,6 @@ pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId
    .expect("Failed to construct path")
 }

-pub fn remote_initdb_preserved_archive_path(
-    tenant_id: &TenantId,
-    timeline_id: &TimelineId,
-) -> RemotePath {
-    RemotePath::from_string(&format!(
-        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PRESERVED_PATH}"
-    ))
-    .expect("Failed to construct path")
-}
-
 pub fn remote_index_path(
    tenant_shard_id: &TenantShardId,
    timeline_id: &TimelineId,
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -32,8 +32,7 @@ use utils::id::TimelineId;
 use super::index::{IndexPart, LayerFileMetadata};
 use super::{
    parse_remote_index_path, remote_index_path, remote_initdb_archive_path,
-    remote_initdb_preserved_archive_path, FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES,
-    INITDB_PATH,
+    FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
 };

 ///
@@ -431,9 +430,6 @@ pub(crate) async fn download_initdb_tar_zst(

    let remote_path = remote_initdb_archive_path(&tenant_shard_id.tenant_id, timeline_id);

-    let remote_preserved_path =
-        remote_initdb_preserved_archive_path(&tenant_shard_id.tenant_id, timeline_id);
-
    let timeline_path = conf.timelines_path(tenant_shard_id);

    if !timeline_path.exists() {
@@ -460,16 +456,8 @@ pub(crate) async fn download_initdb_tar_zst(
                .with_context(|| format!("tempfile creation {temp_path}"))
                .map_err(DownloadError::Other)?;

-            let download = match download_cancellable(&cancel_inner, storage.download(&remote_path))
-                .await
-            {
-                Ok(dl) => dl,
-                Err(DownloadError::NotFound) => {
-                    download_cancellable(&cancel_inner, storage.download(&remote_preserved_path))
-                        .await?
-                }
-                Err(other) => Err(other)?,
-            };
+            let download =
+                download_cancellable(&cancel_inner, storage.download(&remote_path)).await?;
            let mut download = tokio_util::io::StreamReader::new(download.download_stream);
            let mut writer = tokio::io::BufWriter::with_capacity(8 * 1024, file);

--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -13,8 +13,8 @@ use super::Generation;
 use crate::{
    config::PageServerConf,
    tenant::remote_timeline_client::{
-        index::IndexPart, remote_index_path, remote_initdb_archive_path,
-        remote_initdb_preserved_archive_path, remote_path, upload_cancellable,
+        index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path,
+        upload_cancellable,
    },
 };
 use remote_storage::GenericRemoteStorage;
@@ -144,16 +144,3 @@ pub(crate) async fn upload_initdb_dir(
    .await
    .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
 }
-
-pub(crate) async fn preserve_initdb_archive(
-    storage: &GenericRemoteStorage,
-    tenant_id: &TenantId,
-    timeline_id: &TimelineId,
-    cancel: &CancellationToken,
-) -> anyhow::Result<()> {
-    let source_path = remote_initdb_archive_path(tenant_id, timeline_id);
-    let dest_path = remote_initdb_preserved_archive_path(tenant_id, timeline_id);
-    upload_cancellable(cancel, storage.copy_object(&source_path, &dest_path))
-        .await
-        .with_context(|| format!("backing up initdb archive for '{tenant_id} / {timeline_id}'"))
-}
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -36,7 +36,7 @@ use crate::tenant::block_io::{BlockBuf, BlockCursor, BlockLease, BlockReader, Fi
 use crate::tenant::disk_btree::{DiskBtreeBuilder, DiskBtreeReader, VisitDirection};
 use crate::tenant::storage_layer::{Layer, ValueReconstructResult, ValueReconstructState};
 use crate::tenant::Timeline;
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::VirtualFile;
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
@@ -649,7 +649,7 @@ impl DeltaLayer {
    {
        let file = VirtualFile::open_with_options(
            path,
-            virtual_file::OpenOptions::new().read(true).write(true),
+            &*std::fs::OpenOptions::new().read(true).write(true),
        )
        .await
        .with_context(|| format!("Failed to open file '{}'", path))?;
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -34,7 +34,7 @@ use crate::tenant::storage_layer::{
    LayerAccessStats, ValueReconstructResult, ValueReconstructState,
 };
 use crate::tenant::Timeline;
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::VirtualFile;
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
@@ -327,7 +327,7 @@ impl ImageLayer {
    {
        let file = VirtualFile::open_with_options(
            path,
-            virtual_file::OpenOptions::new().read(true).write(true),
+            &*std::fs::OpenOptions::new().read(true).write(true),
        )
        .await
        .with_context(|| format!("Failed to open file '{}'", path))?;
@@ -492,15 +492,11 @@ impl ImageLayerWriterInner {
            },
        );
        info!("new image layer {path}");
-        let mut file = {
-            VirtualFile::open_with_options(
-                &path,
-                virtual_file::OpenOptions::new()
-                    .write(true)
-                    .create_new(true),
-            )
-            .await?
-        };
+        let mut file = VirtualFile::open_with_options(
+            &path,
+            std::fs::OpenOptions::new().write(true).create_new(true),
+        )
+        .await?;
        // make room for the header block
        file.seek(SeekFrom::Start(PAGE_SZ as u64)).await?;
        let blob_writer = BlobWriter::new(file, PAGE_SZ as u64);
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -9,7 +9,6 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -182,11 +181,8 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
                    );
                    error_run_count += 1;
                    let wait_duration = Duration::from_secs_f64(wait_duration);
-                    log_compaction_error(
-                        &e,
-                        error_run_count,
-                        &wait_duration,
-                        cancel.is_cancelled(),
+                    error!(
+                        "Compaction failed {error_run_count} times, retrying in {wait_duration:?}: {e:?}",
                    );
                    wait_duration
                } else {
@@ -214,58 +210,6 @@ async fn compaction_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
 }

-fn log_compaction_error(
-    e: &CompactionError,
-    error_run_count: u32,
-    sleep_duration: &std::time::Duration,
-    task_cancelled: bool,
-) {
-    use crate::tenant::upload_queue::NotInitialized;
-    use crate::tenant::PageReconstructError;
-    use CompactionError::*;
-
-    enum LooksLike {
-        Info,
-        Error,
-    }
-
-    let decision = match e {
-        ShuttingDown => None,
-        _ if task_cancelled => Some(LooksLike::Info),
-        Other(e) => {
-            let root_cause = e.root_cause();
-
-            let is_stopping = {
-                let upload_queue = root_cause
-                    .downcast_ref::<NotInitialized>()
-                    .is_some_and(|e| e.is_stopping());
-
-                let timeline = root_cause
-                    .downcast_ref::<PageReconstructError>()
-                    .is_some_and(|e| e.is_stopping());
-
-                upload_queue || timeline
-            };
-
-            if is_stopping {
-                Some(LooksLike::Info)
-            } else {
-                Some(LooksLike::Error)
-            }
-        }
-    };
-
-    match decision {
-        Some(LooksLike::Info) => info!(
-            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:#}",
-        ),
-        Some(LooksLike::Error) => error!(
-            "Compaction failed {error_run_count} times, retrying in {sleep_duration:?}: {e:?}",
-        ),
-        None => {}
-    }
-}
-
 ///
 /// GC task's main loop
 ///
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -14,7 +14,6 @@ use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
 use pageserver_api::{
-    keyspace::{key_range_size, KeySpaceAccum},
    models::{
        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
        LayerMapInfo, TimelineState,
@@ -33,7 +32,7 @@ use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::sync::gate::Gate;

-use std::collections::{BTreeMap, BinaryHeap, HashMap, HashSet};
+use std::collections::{BinaryHeap, HashMap, HashSet};
 use std::ops::{Deref, Range};
 use std::pin::pin;
 use std::sync::atomic::Ordering as AtomicOrdering;
@@ -74,8 +73,8 @@ use crate::metrics::{
    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
+use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::tenant::config::TenantConfOpt;
-use pageserver_api::key::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use pageserver_api::reltag::RelTag;
 use pageserver_api::shard::ShardIndex;

@@ -392,7 +391,8 @@ pub(crate) enum PageReconstructError {
    #[error("Ancestor LSN wait error: {0}")]
    AncestorLsnTimeout(#[from] WaitLsnError),

-    #[error("timeline shutting down")]
+    /// The operation was cancelled
+    #[error("Cancelled")]
    Cancelled,

    /// The ancestor of this is being stopped
@@ -404,34 +404,6 @@ pub(crate) enum PageReconstructError {
    WalRedo(anyhow::Error),
 }

-impl PageReconstructError {
-    /// Returns true if this error indicates a tenant/timeline shutdown alike situation
-    pub(crate) fn is_stopping(&self) -> bool {
-        use PageReconstructError::*;
-        match self {
-            Other(_) => false,
-            AncestorLsnTimeout(_) => false,
-            Cancelled | AncestorStopping(_) => true,
-            WalRedo(_) => false,
-        }
-    }
-}
-
-#[derive(thiserror::Error, Debug)]
-enum CreateImageLayersError {
-    #[error("timeline shutting down")]
-    Cancelled,
-
-    #[error(transparent)]
-    GetVectoredError(GetVectoredError),
-
-    #[error(transparent)]
-    PageReconstructError(PageReconstructError),
-
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 #[derive(thiserror::Error, Debug)]
 enum FlushLayerError {
    /// Timeline cancellation token was cancelled
@@ -439,24 +411,12 @@ enum FlushLayerError {
    Cancelled,

    #[error(transparent)]
-    CreateImageLayersError(CreateImageLayersError),
+    PageReconstructError(#[from] PageReconstructError),

    #[error(transparent)]
    Other(#[from] anyhow::Error),
 }

-#[derive(thiserror::Error, Debug)]
-pub(crate) enum GetVectoredError {
-    #[error("timeline shutting down")]
-    Cancelled,
-
-    #[error("Requested too many keys: {0} > {}", Timeline::MAX_GET_VECTORED_KEYS)]
-    Oversized(u64),
-
-    #[error("Requested at invalid LSN: {0}")]
-    InvalidLsn(Lsn),
-}
-
 #[derive(Clone, Copy)]
 pub enum LogicalSizeCalculationCause {
    Initial,
@@ -496,45 +456,6 @@ pub(crate) enum WaitLsnError {
    Timeout(String),
 }

-// The impls below achieve cancellation mapping for errors.
-// Perhaps there's a way of achieving this with less cruft.
-
-impl From<CreateImageLayersError> for CompactionError {
-    fn from(e: CreateImageLayersError) -> Self {
-        match e {
-            CreateImageLayersError::Cancelled => CompactionError::ShuttingDown,
-            _ => CompactionError::Other(e.into()),
-        }
-    }
-}
-
-impl From<CreateImageLayersError> for FlushLayerError {
-    fn from(e: CreateImageLayersError) -> Self {
-        match e {
-            CreateImageLayersError::Cancelled => FlushLayerError::Cancelled,
-            any => FlushLayerError::CreateImageLayersError(any),
-        }
-    }
-}
-
-impl From<PageReconstructError> for CreateImageLayersError {
-    fn from(e: PageReconstructError) -> Self {
-        match e {
-            PageReconstructError::Cancelled => CreateImageLayersError::Cancelled,
-            _ => CreateImageLayersError::PageReconstructError(e),
-        }
-    }
-}
-
-impl From<GetVectoredError> for CreateImageLayersError {
-    fn from(e: GetVectoredError) -> Self {
-        match e {
-            GetVectoredError::Cancelled => CreateImageLayersError::Cancelled,
-            _ => CreateImageLayersError::GetVectoredError(e),
-        }
-    }
-}
-
 /// Public interface functions
 impl Timeline {
    /// Get the LSN where this branch was created
@@ -654,57 +575,6 @@ impl Timeline {
        res
    }

-    pub(crate) const MAX_GET_VECTORED_KEYS: u64 = 32;
-
-    /// Look up multiple page versions at a given LSN
-    ///
-    /// This naive implementation will be replaced with a more efficient one
-    /// which actually vectorizes the read path.
-    pub(crate) async fn get_vectored(
-        &self,
-        key_ranges: &[Range<Key>],
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        if !lsn.is_valid() {
-            return Err(GetVectoredError::InvalidLsn(lsn));
-        }
-
-        let key_count = key_ranges
-            .iter()
-            .map(|range| key_range_size(range) as u64)
-            .sum();
-        if key_count > Timeline::MAX_GET_VECTORED_KEYS {
-            return Err(GetVectoredError::Oversized(key_count));
-        }
-
-        let _timer = crate::metrics::GET_VECTORED_LATENCY
-            .for_task_kind(ctx.task_kind())
-            .map(|t| t.start_timer());
-
-        let mut values = BTreeMap::new();
-        for range in key_ranges {
-            let mut key = range.start;
-            while key != range.end {
-                assert!(!self.shard_identity.is_key_disposable(&key));
-
-                let block = self.get(key, lsn, ctx).await;
-
-                if matches!(
-                    block,
-                    Err(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
-                ) {
-                    return Err(GetVectoredError::Cancelled);
-                }
-
-                values.insert(key, block);
-                key = key.next();
-            }
-        }
-
-        Ok(values)
-    }
-
    /// Get last or prev record separately. Same as get_last_record_rlsn().last/prev.
    pub fn get_last_record_lsn(&self) -> Lsn {
        self.last_record_lsn.load().last
@@ -2712,7 +2582,7 @@ impl Timeline {
                        return;
                    }
                    err @ Err(
-                        FlushLayerError::Other(_) | FlushLayerError::CreateImageLayersError(_),
+                        FlushLayerError::Other(_) | FlushLayerError::PageReconstructError(_),
                    ) => {
                        error!("could not flush frozen layer: {err:?}");
                        break err;
@@ -2989,21 +2859,6 @@ impl Timeline {
        Ok(())
    }

-    pub(crate) async fn preserve_initdb_archive(&self) -> anyhow::Result<()> {
-        if let Some(remote_client) = &self.remote_client {
-            remote_client
-                .preserve_initdb_archive(
-                    &self.tenant_shard_id.tenant_id,
-                    &self.timeline_id,
-                    &self.cancel,
-                )
-                .await?;
-        } else {
-            bail!("No remote storage configured, but was asked to backup the initdb archive for {} / {}", self.tenant_shard_id.tenant_id, self.timeline_id);
-        }
-        Ok(())
-    }
-
    // Write out the given frozen in-memory layer as a new L0 delta file. This L0 file will not be tracked
    // in layer map immediately. The caller is responsible to put it into the layer map.
    async fn create_delta_layer(
@@ -3095,7 +2950,11 @@ impl Timeline {
    }

    // Is it time to create a new image layer for the given partition?
-    async fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> bool {
+    async fn time_for_new_image_layer(
+        &self,
+        partition: &KeySpace,
+        lsn: Lsn,
+    ) -> anyhow::Result<bool> {
        let threshold = self.get_image_creation_threshold();

        let guard = self.layers.read().await;
@@ -3115,20 +2974,20 @@ impl Timeline {
                    // but the range is already covered by image layers at more recent LSNs. Before we
                    // create a new image layer, check if the range is already covered at more recent LSNs.
                    if !layers
-                        .image_layer_exists(&img_range, &(Lsn::min(lsn, *cutoff_lsn)..lsn + 1))
+                        .image_layer_exists(&img_range, &(Lsn::min(lsn, *cutoff_lsn)..lsn + 1))?
                    {
                        debug!(
                            "Force generation of layer {}-{} wanted by GC, cutoff={}, lsn={})",
                            img_range.start, img_range.end, cutoff_lsn, lsn
                        );
-                        return true;
+                        return Ok(true);
                    }
                }
            }
        }

        for part_range in &partition.ranges {
-            let image_coverage = layers.image_coverage(part_range, lsn);
+            let image_coverage = layers.image_coverage(part_range, lsn)?;
            for (img_range, last_img) in image_coverage {
                let img_lsn = if let Some(last_img) = last_img {
                    last_img.get_lsn_range().end
@@ -3149,7 +3008,7 @@ impl Timeline {
                // after we read last_record_lsn, which is passed here in the 'lsn' argument.
                if img_lsn < lsn {
                    let num_deltas =
-                        layers.count_deltas(&img_range, &(img_lsn..lsn), Some(threshold));
+                        layers.count_deltas(&img_range, &(img_lsn..lsn), Some(threshold))?;

                    max_deltas = max_deltas.max(num_deltas);
                    if num_deltas >= threshold {
@@ -3157,7 +3016,7 @@ impl Timeline {
                            "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
                            img_range.start, img_range.end, num_deltas, img_lsn, lsn
                        );
-                        return true;
+                        return Ok(true);
                    }
                }
            }
@@ -3167,7 +3026,7 @@ impl Timeline {
            max_deltas,
            "none of the partitioned ranges had >= {threshold} deltas"
        );
-        false
+        Ok(false)
    }

    #[tracing::instrument(skip_all, fields(%lsn, %force))]
@@ -3177,7 +3036,7 @@ impl Timeline {
        lsn: Lsn,
        force: bool,
        ctx: &RequestContext,
-    ) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
+    ) -> Result<Vec<ResidentLayer>, PageReconstructError> {
        let timer = self.metrics.create_images_time_histo.start_timer();
        let mut image_layers = Vec::new();

@@ -3195,7 +3054,7 @@ impl Timeline {
        for partition in partitioning.parts.iter() {
            let img_range = start..partition.ranges.last().unwrap().end;
            start = img_range.end;
-            if force || self.time_for_new_image_layer(partition, lsn).await {
+            if force || self.time_for_new_image_layer(partition, lsn).await? {
                let mut image_layer_writer = ImageLayerWriter::new(
                    self.conf,
                    self.timeline_id,
@@ -3206,12 +3065,10 @@ impl Timeline {
                .await?;

                fail_point!("image-layer-writer-fail-before-finish", |_| {
-                    Err(CreateImageLayersError::Other(anyhow::anyhow!(
+                    Err(PageReconstructError::Other(anyhow::anyhow!(
                        "failpoint image-layer-writer-fail-before-finish"
                    )))
                });
-
-                let mut key_request_accum = KeySpaceAccum::new();
                for range in &partition.ranges {
                    let mut key = range.start;
                    while key < range.end {
@@ -3224,55 +3081,34 @@ impl Timeline {
                            key = key.next();
                            continue;
                        }
-
-                        key_request_accum.add_key(key);
-                        if key_request_accum.size() >= Timeline::MAX_GET_VECTORED_KEYS
-                            || key.next() == range.end
-                        {
-                            let results = self
-                                .get_vectored(
-                                    &key_request_accum.consume_keyspace().ranges,
-                                    lsn,
-                                    ctx,
-                                )
-                                .await?;
-
-                            for (img_key, img) in results {
-                                let img = match img {
-                                    Ok(img) => img,
-                                    Err(err) => {
-                                        // If we fail to reconstruct a VM or FSM page, we can zero the
-                                        // page without losing any actual user data. That seems better
-                                        // than failing repeatedly and getting stuck.
-                                        //
-                                        // We had a bug at one point, where we truncated the FSM and VM
-                                        // in the pageserver, but the Postgres didn't know about that
-                                        // and continued to generate incremental WAL records for pages
-                                        // that didn't exist in the pageserver. Trying to replay those
-                                        // WAL records failed to find the previous image of the page.
-                                        // This special case allows us to recover from that situation.
-                                        // See https://github.com/neondatabase/neon/issues/2601.
-                                        //
-                                        // Unfortunately we cannot do this for the main fork, or for
-                                        // any metadata keys, keys, as that would lead to actual data
-                                        // loss.
-                                        if is_rel_fsm_block_key(img_key)
-                                            || is_rel_vm_block_key(img_key)
-                                        {
-                                            warn!("could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}");
-                                            ZERO_PAGE.clone()
-                                        } else {
-                                            return Err(
-                                                CreateImageLayersError::PageReconstructError(err),
-                                            );
-                                        }
-                                    }
-                                };
-
-                                image_layer_writer.put_image(img_key, &img).await?;
+                        let img = match self.get(key, lsn, ctx).await {
+                            Ok(img) => img,
+                            Err(err) => {
+                                // If we fail to reconstruct a VM or FSM page, we can zero the
+                                // page without losing any actual user data. That seems better
+                                // than failing repeatedly and getting stuck.
+                                //
+                                // We had a bug at one point, where we truncated the FSM and VM
+                                // in the pageserver, but the Postgres didn't know about that
+                                // and continued to generate incremental WAL records for pages
+                                // that didn't exist in the pageserver. Trying to replay those
+                                // WAL records failed to find the previous image of the page.
+                                // This special case allows us to recover from that situation.
+                                // See https://github.com/neondatabase/neon/issues/2601.
+                                //
+                                // Unfortunately we cannot do this for the main fork, or for
+                                // any metadata keys, keys, as that would lead to actual data
+                                // loss.
+                                if is_rel_fsm_block_key(key) || is_rel_vm_block_key(key) {
+                                    warn!("could not reconstruct FSM or VM key {key}, filling with zeros: {err:?}");
+                                    ZERO_PAGE.clone()
+                                } else {
+                                    return Err(err);
+                                }
                            }
-                        }
+                        };

+                        image_layer_writer.put_image(key, &img).await?;
                        key = key.next();
                    }
                }
@@ -3648,7 +3484,7 @@ impl Timeline {
                    // has not so much sense, because largest holes will corresponds field1/field2 changes.
                    // But we are mostly interested to eliminate holes which cause generation of excessive image layers.
                    // That is why it is better to measure size of hole as number of covering image layers.
-                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn).len();
+                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
                    if coverage_size >= min_hole_coverage_size {
                        heap.push(Hole {
                            key_range,
@@ -4274,7 +4110,7 @@ impl Timeline {
            // we cannot remove C, even though it's older than 2500, because
            // the delta layer 2000-3000 depends on it.
            if !layers
-                .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))
+                .image_layer_exists(&l.get_key_range(), &(l.get_lsn_range().end..new_gc_cutoff))?
            {
                debug!("keeping {} because it is the latest layer", l.filename());
                // Collect delta key ranges that need image layers to allow garbage
@@ -4404,7 +4240,7 @@ impl Timeline {
                    .walredo_mgr
                    .request_redo(key, request_lsn, data.img, data.records, self.pg_version)
                    .await
-                    .context("reconstruct a page image")
+                    .context("Failed to reconstruct a page image:")
                {
                    Ok(img) => img,
                    Err(e) => return Err(PageReconstructError::WalRedo(e)),
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -126,27 +126,6 @@ pub(super) struct UploadQueueStopped {
    pub(super) deleted_at: SetDeletedFlagProgress,
 }

-#[derive(thiserror::Error, Debug)]
-pub(crate) enum NotInitialized {
-    #[error("queue is in state Uninitialized")]
-    Uninitialized,
-    #[error("queue is in state Stopping")]
-    Stopped,
-    #[error("queue is shutting down")]
-    ShuttingDown,
-}
-
-impl NotInitialized {
-    pub(crate) fn is_stopping(&self) -> bool {
-        use NotInitialized::*;
-        match self {
-            Uninitialized => false,
-            Stopped => true,
-            ShuttingDown => true,
-        }
-    }
-}
-
 impl UploadQueue {
    pub(crate) fn initialize_empty_remote(
        &mut self,
@@ -235,17 +214,17 @@ impl UploadQueue {
    }

    pub(crate) fn initialized_mut(&mut self) -> anyhow::Result<&mut UploadQueueInitialized> {
-        use UploadQueue::*;
        match self {
-            Uninitialized => Err(NotInitialized::Uninitialized.into()),
-            Initialized(x) => {
-                if x.shutting_down {
-                    Err(NotInitialized::ShuttingDown.into())
-                } else {
+            UploadQueue::Uninitialized | UploadQueue::Stopped(_) => {
+                anyhow::bail!("queue is in state {}", self.as_str())
+            }
+            UploadQueue::Initialized(x) => {
+                if !x.shutting_down {
                    Ok(x)
+                } else {
+                    anyhow::bail!("queue is shutting down")
                }
            }
-            Stopped(_) => Err(NotInitialized::Stopped.into()),
        }
    }

--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -11,28 +11,18 @@
 //! src/backend/storage/file/fd.c
 //!
 use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};
-
-use crate::page_cache::PageWriteGuard;
 use crate::tenant::TENANTS_SEGMENT_NAME;
 use camino::{Utf8Path, Utf8PathBuf};
 use once_cell::sync::OnceCell;
 use pageserver_api::shard::TenantShardId;
-use std::fs::{self, File};
+use std::fs::{self, File, OpenOptions};
 use std::io::{Error, ErrorKind, Seek, SeekFrom};
-use tokio_epoll_uring::IoBufMut;
-
-use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
 use std::os::unix::fs::FileExt;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio::time::Instant;
 use utils::fs_ext;

-mod io_engine;
-mod open_options;
-pub use io_engine::IoEngineKind;
-pub(crate) use open_options::*;
-
 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
 /// the underlying file is closed if the system is low on file descriptors,
@@ -116,38 +106,7 @@ struct SlotInner {
    tag: u64,

    /// the underlying file
-    file: Option<OwnedFd>,
-}
-
-/// Impl of [`tokio_epoll_uring::IoBuf`] and [`tokio_epoll_uring::IoBufMut`] for [`PageWriteGuard`].
-struct PageWriteGuardBuf {
-    page: PageWriteGuard<'static>,
-    init_up_to: usize,
-}
-// Safety: the [`PageWriteGuard`] gives us exclusive ownership of the page cache slot,
-// and the location remains stable even if [`Self`] or the [`PageWriteGuard`] is moved.
-unsafe impl tokio_epoll_uring::IoBuf for PageWriteGuardBuf {
-    fn stable_ptr(&self) -> *const u8 {
-        self.page.as_ptr()
-    }
-    fn bytes_init(&self) -> usize {
-        self.init_up_to
-    }
-    fn bytes_total(&self) -> usize {
-        self.page.len()
-    }
-}
-// Safety: see above, plus: the ownership of [`PageWriteGuard`] means exclusive access,
-// hence it's safe to hand out the `stable_mut_ptr()`.
-unsafe impl tokio_epoll_uring::IoBufMut for PageWriteGuardBuf {
-    fn stable_mut_ptr(&mut self) -> *mut u8 {
-        self.page.as_mut_ptr()
-    }
-
-    unsafe fn set_init(&mut self, pos: usize) {
-        assert!(pos <= self.page.len());
-        self.init_up_to = pos;
-    }
+    file: Option<File>,
 }

 impl OpenFiles {
@@ -315,10 +274,6 @@ macro_rules! with_file {
        let $ident = $this.lock_file().await?;
        observe_duration!($op, $($body)*)
    }};
-    ($this:expr, $op:expr, | mut $ident:ident | $($body:tt)*) => {{
-        let mut $ident = $this.lock_file().await?;
-        observe_duration!($op, $($body)*)
-    }};
 }

 impl VirtualFile {
@@ -371,9 +326,7 @@ impl VirtualFile {
        // NB: there is also StorageIoOperation::OpenAfterReplace which is for the case
        // where our caller doesn't get to use the returned VirtualFile before its
        // slot gets re-used by someone else.
-        let file = observe_duration!(StorageIoOperation::Open, {
-            open_options.open(path.as_std_path()).await?
-        });
+        let file = observe_duration!(StorageIoOperation::Open, open_options.open(path))?;

        // Strip all options other than read and write.
        //
@@ -447,13 +400,15 @@ impl VirtualFile {

    /// Call File::sync_all() on the underlying File.
    pub async fn sync_all(&self) -> Result<(), Error> {
-        with_file!(self, StorageIoOperation::Fsync, |file_guard| file_guard
-            .with_std_file(|std_file| std_file.sync_all()))
+        with_file!(self, StorageIoOperation::Fsync, |file| file
+            .as_ref()
+            .sync_all())
    }

    pub async fn metadata(&self) -> Result<fs::Metadata, Error> {
-        with_file!(self, StorageIoOperation::Metadata, |file_guard| file_guard
-            .with_std_file(|std_file| std_file.metadata()))
+        with_file!(self, StorageIoOperation::Metadata, |file| file
+            .as_ref()
+            .metadata())
    }

    /// Helper function internal to `VirtualFile` that looks up the underlying File,
@@ -462,7 +417,7 @@ impl VirtualFile {
    ///
    /// We are doing it via a macro as Rust doesn't support async closures that
    /// take on parameters with lifetimes.
-    async fn lock_file(&self) -> Result<FileGuard, Error> {
+    async fn lock_file(&self) -> Result<FileGuard<'_>, Error> {
        let open_files = get_open_files();

        let mut handle_guard = {
@@ -508,9 +463,10 @@ impl VirtualFile {
        // NB: we use StorageIoOperation::OpenAferReplace for this to distinguish this
        // case from StorageIoOperation::Open. This helps with identifying thrashing
        // of the virtual file descriptor cache.
-        let file = observe_duration!(StorageIoOperation::OpenAfterReplace, {
-            self.open_options.open(self.path.as_std_path()).await?
-        });
+        let file = observe_duration!(
+            StorageIoOperation::OpenAfterReplace,
+            self.open_options.open(&self.path)
+        )?;

        // Store the File in the slot and update the handle in the VirtualFile
        // to point to it.
@@ -535,8 +491,9 @@ impl VirtualFile {
                self.pos = offset;
            }
            SeekFrom::End(offset) => {
-                self.pos = with_file!(self, StorageIoOperation::Seek, |mut file_guard| file_guard
-                    .with_std_file_mut(|std_file| std_file.seek(SeekFrom::End(offset))))?
+                self.pos = with_file!(self, StorageIoOperation::Seek, |file| file
+                    .as_ref()
+                    .seek(SeekFrom::End(offset)))?
            }
            SeekFrom::Current(offset) => {
                let pos = self.pos as i128 + offset as i128;
@@ -555,28 +512,25 @@ impl VirtualFile {
        Ok(self.pos)
    }

-    pub async fn read_exact_at<B>(&self, buf: B, offset: u64) -> Result<B, Error>
-    where
-        B: IoBufMut + Send,
-    {
-        let (buf, res) =
-            read_exact_at_impl(buf, offset, |buf, offset| self.read_at(buf, offset)).await;
-        res.map(|()| buf)
-    }
-
-    /// Like [`Self::read_exact_at`] but for [`PageWriteGuard`].
-    pub async fn read_exact_at_page(
-        &self,
-        page: PageWriteGuard<'static>,
-        offset: u64,
-    ) -> Result<PageWriteGuard<'static>, Error> {
-        let buf = PageWriteGuardBuf {
-            page,
-            init_up_to: 0,
-        };
-        let res = self.read_exact_at(buf, offset).await;
-        res.map(|PageWriteGuardBuf { page, .. }| page)
-            .map_err(|e| Error::new(ErrorKind::Other, e))
+    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
+    pub async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> Result<(), Error> {
+        while !buf.is_empty() {
+            match self.read_at(buf, offset).await {
+                Ok(0) => {
+                    return Err(Error::new(
+                        std::io::ErrorKind::UnexpectedEof,
+                        "failed to fill whole buffer",
+                    ))
+                }
+                Ok(n) => {
+                    buf = &mut buf[n..];
+                    offset += n as u64;
+                }
+                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
+                Err(e) => return Err(e),
+            }
+        }
+        Ok(())
    }

    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#219-235
@@ -626,35 +580,22 @@ impl VirtualFile {
        Ok(n)
    }

-    pub(crate) async fn read_at<B>(&self, buf: B, offset: u64) -> (B, Result<usize, Error>)
-    where
-        B: tokio_epoll_uring::BoundedBufMut + Send,
-    {
-        let file_guard = match self.lock_file().await {
-            Ok(file_guard) => file_guard,
-            Err(e) => return (buf, Err(e)),
-        };
-
-        observe_duration!(StorageIoOperation::Read, {
-            let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await;
-            if let Ok(size) = res {
-                STORAGE_IO_SIZE
-                    .with_label_values(&[
-                        "read",
-                        &self.tenant_id,
-                        &self.shard_id,
-                        &self.timeline_id,
-                    ])
-                    .add(size as i64);
-            }
-            (buf, res)
-        })
+    pub async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
+        let result = with_file!(self, StorageIoOperation::Read, |file| file
+            .as_ref()
+            .read_at(buf, offset));
+        if let Ok(size) = result {
+            STORAGE_IO_SIZE
+                .with_label_values(&["read", &self.tenant_id, &self.shard_id, &self.timeline_id])
+                .add(size as i64);
+        }
+        result
    }

    async fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
-        let result = with_file!(self, StorageIoOperation::Write, |file_guard| {
-            file_guard.with_std_file(|std_file| std_file.write_at(buf, offset))
-        });
+        let result = with_file!(self, StorageIoOperation::Write, |file| file
+            .as_ref()
+            .write_at(buf, offset));
        if let Ok(size) = result {
            STORAGE_IO_SIZE
                .with_label_values(&["write", &self.tenant_id, &self.shard_id, &self.timeline_id])
@@ -664,241 +605,18 @@ impl VirtualFile {
    }
 }

-// Adapted from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
-pub async fn read_exact_at_impl<B, F, Fut>(
-    buf: B,
-    mut offset: u64,
-    mut read_at: F,
-) -> (B, std::io::Result<()>)
-where
-    B: IoBufMut + Send,
-    F: FnMut(tokio_epoll_uring::Slice<B>, u64) -> Fut,
-    Fut: std::future::Future<Output = (tokio_epoll_uring::Slice<B>, std::io::Result<usize>)>,
-{
-    use tokio_epoll_uring::BoundedBuf;
-    let mut buf: tokio_epoll_uring::Slice<B> = buf.slice_full(); // includes all the uninitialized memory
-    while buf.bytes_total() != 0 {
-        let res;
-        (buf, res) = read_at(buf, offset).await;
-        match res {
-            Ok(0) => break,
-            Ok(n) => {
-                buf = buf.slice(n..);
-                offset += n as u64;
-            }
-            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
-            Err(e) => return (buf.into_inner(), Err(e)),
-        }
-    }
-    // NB: don't use `buf.is_empty()` here; it is from the
-    // `impl Deref for Slice { Target = [u8] }`; the the &[u8]
-    // returned by it only covers the initialized portion of `buf`.
-    // Whereas we're interested in ensuring that we filled the entire
-    // buffer that the user passed in.
-    if buf.bytes_total() != 0 {
-        (
-            buf.into_inner(),
-            Err(std::io::Error::new(
-                std::io::ErrorKind::UnexpectedEof,
-                "failed to fill whole buffer",
-            )),
-        )
-    } else {
-        assert_eq!(buf.len(), buf.bytes_total());
-        (buf.into_inner(), Ok(()))
-    }
+struct FileGuard<'a> {
+    slot_guard: RwLockReadGuard<'a, SlotInner>,
 }

-#[cfg(test)]
-mod test_read_exact_at_impl {
-
-    use std::{collections::VecDeque, sync::Arc};
-
-    use tokio_epoll_uring::{BoundedBuf, BoundedBufMut};
-
-    use super::read_exact_at_impl;
-
-    struct Expectation {
-        offset: u64,
-        bytes_total: usize,
-        result: std::io::Result<Vec<u8>>,
-    }
-    struct MockReadAt {
-        expectations: VecDeque<Expectation>,
-    }
-
-    impl MockReadAt {
-        async fn read_at(
-            &mut self,
-            mut buf: tokio_epoll_uring::Slice<Vec<u8>>,
-            offset: u64,
-        ) -> (tokio_epoll_uring::Slice<Vec<u8>>, std::io::Result<usize>) {
-            let exp = self
-                .expectations
-                .pop_front()
-                .expect("read_at called but we have no expectations left");
-            assert_eq!(exp.offset, offset);
-            assert_eq!(exp.bytes_total, buf.bytes_total());
-            match exp.result {
-                Ok(bytes) => {
-                    assert!(bytes.len() <= buf.bytes_total());
-                    buf.put_slice(&bytes);
-                    (buf, Ok(bytes.len()))
-                }
-                Err(e) => (buf, Err(e)),
-            }
-        }
-    }
-
-    impl Drop for MockReadAt {
-        fn drop(&mut self) {
-            assert_eq!(self.expectations.len(), 0);
-        }
-    }
-
-    #[tokio::test]
-    async fn test_basic() {
-        let buf = Vec::with_capacity(5);
-        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
-            expectations: VecDeque::from(vec![Expectation {
-                offset: 0,
-                bytes_total: 5,
-                result: Ok(vec![b'a', b'b', b'c', b'd', b'e']),
-            }]),
-        }));
-        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
-            let mock_read_at = Arc::clone(&mock_read_at);
-            async move { mock_read_at.lock().await.read_at(buf, offset).await }
-        })
-        .await;
-        assert!(res.is_ok());
-        assert_eq!(buf, vec![b'a', b'b', b'c', b'd', b'e']);
-    }
-
-    #[tokio::test]
-    async fn test_empty_buf_issues_no_syscall() {
-        let buf = Vec::new();
-        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
-            expectations: VecDeque::new(),
-        }));
-        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
-            let mock_read_at = Arc::clone(&mock_read_at);
-            async move { mock_read_at.lock().await.read_at(buf, offset).await }
-        })
-        .await;
-        assert!(res.is_ok());
-    }
-
-    #[tokio::test]
-    async fn test_two_read_at_calls_needed_until_buf_filled() {
-        let buf = Vec::with_capacity(4);
-        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
-            expectations: VecDeque::from(vec![
-                Expectation {
-                    offset: 0,
-                    bytes_total: 4,
-                    result: Ok(vec![b'a', b'b']),
-                },
-                Expectation {
-                    offset: 2,
-                    bytes_total: 2,
-                    result: Ok(vec![b'c', b'd']),
-                },
-            ]),
-        }));
-        let (buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
-            let mock_read_at = Arc::clone(&mock_read_at);
-            async move { mock_read_at.lock().await.read_at(buf, offset).await }
-        })
-        .await;
-        assert!(res.is_ok());
-        assert_eq!(buf, vec![b'a', b'b', b'c', b'd']);
-    }
-
-    #[tokio::test]
-    async fn test_eof_before_buffer_full() {
-        let buf = Vec::with_capacity(3);
-        let mock_read_at = Arc::new(tokio::sync::Mutex::new(MockReadAt {
-            expectations: VecDeque::from(vec![
-                Expectation {
-                    offset: 0,
-                    bytes_total: 3,
-                    result: Ok(vec![b'a']),
-                },
-                Expectation {
-                    offset: 1,
-                    bytes_total: 2,
-                    result: Ok(vec![b'b']),
-                },
-                Expectation {
-                    offset: 2,
-                    bytes_total: 1,
-                    result: Ok(vec![]),
-                },
-            ]),
-        }));
-        let (_buf, res) = read_exact_at_impl(buf, 0, |buf, offset| {
-            let mock_read_at = Arc::clone(&mock_read_at);
-            async move { mock_read_at.lock().await.read_at(buf, offset).await }
-        })
-        .await;
-        let Err(err) = res else {
-            panic!("should return an error");
-        };
-        assert_eq!(err.kind(), std::io::ErrorKind::UnexpectedEof);
-        assert_eq!(format!("{err}"), "failed to fill whole buffer");
-        // buffer contents on error are unspecified
-    }
-}
-
-struct FileGuard {
-    slot_guard: RwLockReadGuard<'static, SlotInner>,
-}
-
-impl AsRef<OwnedFd> for FileGuard {
-    fn as_ref(&self) -> &OwnedFd {
+impl<'a> AsRef<File> for FileGuard<'a> {
+    fn as_ref(&self) -> &File {
        // This unwrap is safe because we only create `FileGuard`s
        // if we know that the file is Some.
        self.slot_guard.file.as_ref().unwrap()
    }
 }

-impl FileGuard {
-    /// Soft deprecation: we'll move VirtualFile to async APIs and remove this function eventually.
-    fn with_std_file<F, R>(&self, with: F) -> R
-    where
-        F: FnOnce(&File) -> R,
-    {
-        // SAFETY:
-        // - lifetime of the fd: `file` doesn't outlive the OwnedFd stored in `self`.
-        // - `&` usage below: `self` is `&`, hence Rust typesystem guarantees there are is no `&mut`
-        let file = unsafe { File::from_raw_fd(self.as_ref().as_raw_fd()) };
-        let res = with(&file);
-        let _ = file.into_raw_fd();
-        res
-    }
-    /// Soft deprecation: we'll move VirtualFile to async APIs and remove this function eventually.
-    fn with_std_file_mut<F, R>(&mut self, with: F) -> R
-    where
-        F: FnOnce(&mut File) -> R,
-    {
-        // SAFETY:
-        // - lifetime of the fd: `file` doesn't outlive the OwnedFd stored in `self`.
-        // - &mut usage below: `self` is `&mut`, hence this call is the only task/thread that has control over the underlying fd
-        let mut file = unsafe { File::from_raw_fd(self.as_ref().as_raw_fd()) };
-        let res = with(&mut file);
-        let _ = file.into_raw_fd();
-        res
-    }
-}
-
-impl tokio_epoll_uring::IoFd for FileGuard {
-    unsafe fn as_fd(&self) -> RawFd {
-        let owned_fd: &OwnedFd = self.as_ref();
-        owned_fd.as_raw_fd()
-    }
-}
-
 #[cfg(test)]
 impl VirtualFile {
    pub(crate) async fn read_blk(
@@ -906,19 +624,16 @@ impl VirtualFile {
        blknum: u32,
    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
        use crate::page_cache::PAGE_SZ;
-        let buf = vec![0; PAGE_SZ];
-        let buf = self
-            .read_exact_at(buf, blknum as u64 * (PAGE_SZ as u64))
+        let mut buf = [0; PAGE_SZ];
+        self.read_exact_at(&mut buf, blknum as u64 * (PAGE_SZ as u64))
            .await?;
-        Ok(crate::tenant::block_io::BlockLease::Vec(buf))
+        Ok(std::sync::Arc::new(buf).into())
    }

    async fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<(), Error> {
-        let mut tmp = vec![0; 128];
        loop {
-            let res;
-            (tmp, res) = self.read_at(tmp, self.pos).await;
-            match res {
+            let mut tmp = [0; 128];
+            match self.read_at(&mut tmp, self.pos).await {
                Ok(0) => return Ok(()),
                Ok(n) => {
                    self.pos += n as u64;
@@ -994,12 +709,10 @@ impl OpenFiles {
 /// Initialize the virtual file module. This must be called once at page
 /// server startup.
 ///
-#[cfg(not(test))]
-pub fn init(num_slots: usize, engine: IoEngineKind) {
+pub fn init(num_slots: usize) {
    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
        panic!("virtual_file::init called twice");
    }
-    io_engine::init(engine);
    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }

@@ -1044,10 +757,10 @@ mod tests {
    }

    impl MaybeVirtualFile {
-        async fn read_exact_at(&self, mut buf: Vec<u8>, offset: u64) -> Result<Vec<u8>, Error> {
+        async fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset).await,
-                MaybeVirtualFile::File(file) => file.read_exact_at(&mut buf, offset).map(|()| buf),
+                MaybeVirtualFile::File(file) => file.read_exact_at(buf, offset),
            }
        }
        async fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<(), Error> {
@@ -1089,14 +802,14 @@ mod tests {

        // Helper function to slurp a portion of a file into a string
        async fn read_string_at(&mut self, pos: u64, len: usize) -> Result<String, Error> {
-            let buf = vec![0; len];
-            let buf = self.read_exact_at(buf, pos).await?;
+            let mut buf = vec![0; len];
+            self.read_exact_at(&mut buf, pos).await?;
            Ok(String::from_utf8(buf).unwrap())
        }
    }

    #[tokio::test]
-    async fn test_virtual_files() -> anyhow::Result<()> {
+    async fn test_virtual_files() -> Result<(), Error> {
        // The real work is done in the test_files() helper function. This
        // allows us to run the same set of tests against a native File, and
        // VirtualFile. We trust the native Files and wouldn't need to test them,
@@ -1112,17 +825,14 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_physical_files() -> anyhow::Result<()> {
+    async fn test_physical_files() -> Result<(), Error> {
        test_files("physical_files", |path, open_options| async move {
-            Ok(MaybeVirtualFile::File({
-                let owned_fd = open_options.open(path.as_std_path()).await?;
-                File::from(owned_fd)
-            }))
+            Ok(MaybeVirtualFile::File(open_options.open(path)?))
        })
        .await
    }

-    async fn test_files<OF, FT>(testname: &str, openfunc: OF) -> anyhow::Result<()>
+    async fn test_files<OF, FT>(testname: &str, openfunc: OF) -> Result<(), Error>
    where
        OF: Fn(Utf8PathBuf, OpenOptions) -> FT,
        FT: Future<Output = Result<MaybeVirtualFile, std::io::Error>>,
@@ -1266,11 +976,11 @@ mod tests {
        for _threadno in 0..THREADS {
            let files = files.clone();
            let hdl = rt.spawn(async move {
-                let mut buf = vec![0u8; SIZE];
+                let mut buf = [0u8; SIZE];
                let mut rng = rand::rngs::OsRng;
                for _ in 1..1000 {
                    let f = &files[rng.gen_range(0..files.len())];
-                    buf = f.read_exact_at(buf, 0).await.unwrap();
+                    f.read_exact_at(&mut buf, 0).await.unwrap();
                    assert!(buf == SAMPLE);
                }
            });
--- a/pageserver/src/virtual_file/io_engine.rs
+++ b/pageserver/src/virtual_file/io_engine.rs
@@ -1,114 +0,0 @@
-//! [`super::VirtualFile`] supports different IO engines.
-//!
-//! The [`IoEngineKind`] enum identifies them.
-//!
-//! The choice of IO engine is global.
-//! Initialize using [`init`].
-//!
-//! Then use [`get`] and  [`super::OpenOptions`].
-
-#[derive(
-    Copy,
-    Clone,
-    PartialEq,
-    Eq,
-    Hash,
-    strum_macros::EnumString,
-    strum_macros::Display,
-    serde_with::DeserializeFromStr,
-    serde_with::SerializeDisplay,
-    Debug,
-)]
-#[strum(serialize_all = "kebab-case")]
-pub enum IoEngineKind {
-    StdFs,
-    #[cfg(target_os = "linux")]
-    TokioEpollUring,
-}
-
-static IO_ENGINE: once_cell::sync::OnceCell<IoEngineKind> = once_cell::sync::OnceCell::new();
-
-#[cfg(not(test))]
-pub(super) fn init(engine: IoEngineKind) {
-    if IO_ENGINE.set(engine).is_err() {
-        panic!("called twice");
-    }
-    crate::metrics::virtual_file_io_engine::KIND
-        .with_label_values(&[&format!("{engine}")])
-        .set(1);
-}
-
-pub(super) fn get() -> &'static IoEngineKind {
-    #[cfg(test)]
-    {
-        let env_var_name = "NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE";
-        IO_ENGINE.get_or_init(|| match std::env::var(env_var_name) {
-            Ok(v) => match v.parse::<IoEngineKind>() {
-                Ok(engine_kind) => engine_kind,
-                Err(e) => {
-                    panic!("invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}")
-                }
-            },
-            Err(std::env::VarError::NotPresent) => {
-                crate::config::defaults::DEFAULT_VIRTUAL_FILE_IO_ENGINE
-                    .parse()
-                    .unwrap()
-            }
-            Err(std::env::VarError::NotUnicode(_)) => {
-                panic!("env var {env_var_name} is not unicode");
-            }
-        })
-    }
-    #[cfg(not(test))]
-    IO_ENGINE.get().unwrap()
-}
-
-use std::os::unix::prelude::FileExt;
-
-use super::FileGuard;
-
-impl IoEngineKind {
-    pub(super) async fn read_at<B>(
-        &self,
-        file_guard: FileGuard,
-        offset: u64,
-        mut buf: B,
-    ) -> ((FileGuard, B), std::io::Result<usize>)
-    where
-        B: tokio_epoll_uring::BoundedBufMut + Send,
-    {
-        match self {
-            IoEngineKind::StdFs => {
-                // SAFETY: `dst` only lives at most as long as this match arm, during which buf remains valid memory.
-                let dst = unsafe {
-                    std::slice::from_raw_parts_mut(buf.stable_mut_ptr(), buf.bytes_total())
-                };
-                let res = file_guard.with_std_file(|std_file| std_file.read_at(dst, offset));
-                if let Ok(nbytes) = &res {
-                    assert!(*nbytes <= buf.bytes_total());
-                    // SAFETY: see above assertion
-                    unsafe {
-                        buf.set_init(*nbytes);
-                    }
-                }
-                #[allow(dropping_references)]
-                drop(dst);
-                ((file_guard, buf), res)
-            }
-            #[cfg(target_os = "linux")]
-            IoEngineKind::TokioEpollUring => {
-                let system = tokio_epoll_uring::thread_local_system().await;
-                let (resources, res) = system.read(file_guard, offset, buf).await;
-                (
-                    resources,
-                    res.map_err(|e| match e {
-                        tokio_epoll_uring::Error::Op(e) => e,
-                        tokio_epoll_uring::Error::System(system) => {
-                            std::io::Error::new(std::io::ErrorKind::Other, system)
-                        }
-                    }),
-                )
-            }
-        }
-    }
-}
--- a/pageserver/src/virtual_file/open_options.rs
+++ b/pageserver/src/virtual_file/open_options.rs
@@ -1,138 +0,0 @@
-//! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`];
-
-use super::IoEngineKind;
-use std::{os::fd::OwnedFd, path::Path};
-
-#[derive(Debug, Clone)]
-pub enum OpenOptions {
-    StdFs(std::fs::OpenOptions),
-    #[cfg(target_os = "linux")]
-    TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions),
-}
-
-impl Default for OpenOptions {
-    fn default() -> Self {
-        match super::io_engine::get() {
-            IoEngineKind::StdFs => Self::StdFs(std::fs::OpenOptions::new()),
-            #[cfg(target_os = "linux")]
-            IoEngineKind::TokioEpollUring => {
-                Self::TokioEpollUring(tokio_epoll_uring::ops::open_at::OpenOptions::new())
-            }
-        }
-    }
-}
-
-impl OpenOptions {
-    pub fn new() -> OpenOptions {
-        Self::default()
-    }
-
-    pub fn read(&mut self, read: bool) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.read(read);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.read(read);
-            }
-        }
-        self
-    }
-
-    pub fn write(&mut self, write: bool) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.write(write);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.write(write);
-            }
-        }
-        self
-    }
-
-    pub fn create(&mut self, create: bool) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.create(create);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.create(create);
-            }
-        }
-        self
-    }
-
-    pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.create_new(create_new);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.create_new(create_new);
-            }
-        }
-        self
-    }
-
-    pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.truncate(truncate);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.truncate(truncate);
-            }
-        }
-        self
-    }
-
-    pub(in crate::virtual_file) async fn open(&self, path: &Path) -> std::io::Result<OwnedFd> {
-        match self {
-            OpenOptions::StdFs(x) => x.open(path).map(|file| file.into()),
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let system = tokio_epoll_uring::thread_local_system().await;
-                system.open(path, x).await.map_err(|e| match e {
-                    tokio_epoll_uring::Error::Op(e) => e,
-                    tokio_epoll_uring::Error::System(system) => {
-                        std::io::Error::new(std::io::ErrorKind::Other, system)
-                    }
-                })
-            }
-        }
-    }
-}
-
-impl std::os::unix::prelude::OpenOptionsExt for OpenOptions {
-    fn mode(&mut self, mode: u32) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.mode(mode);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.mode(mode);
-            }
-        }
-        self
-    }
-
-    fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions {
-        match self {
-            OpenOptions::StdFs(x) => {
-                let _ = x.custom_flags(flags);
-            }
-            #[cfg(target_os = "linux")]
-            OpenOptions::TokioEpollUring(x) => {
-                let _ = x.custom_flags(flags);
-            }
-        }
-        self
-    }
-}
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -33,12 +33,11 @@ use utils::failpoint_support;

 use crate::context::RequestContext;
 use crate::metrics::WAL_INGEST;
-use crate::pgdatadir_mapping::{DatadirModification, Version};
+use crate::pgdatadir_mapping::*;
 use crate::tenant::PageReconstructError;
 use crate::tenant::Timeline;
 use crate::walrecord::*;
 use crate::ZERO_PAGE;
-use pageserver_api::key::rel_block_to_key;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM};
@@ -103,9 +102,7 @@ impl WalIngest {
        buf.advance(decoded.main_data_offset);

        assert!(!self.checkpoint_modified);
-        if decoded.xl_xid != pg_constants::INVALID_TRANSACTION_ID
-            && self.checkpoint.update_next_xid(decoded.xl_xid)
-        {
+        if self.checkpoint.update_next_xid(decoded.xl_xid) {
            self.checkpoint_modified = true;
        }

@@ -333,13 +330,8 @@ impl WalIngest {
                        < 0
                    {
                        self.checkpoint.oldestXid = xlog_checkpoint.oldestXid;
+                        self.checkpoint_modified = true;
                    }
-
-                    // Write a new checkpoint key-value pair on every checkpoint record, even
-                    // if nothing really changed. Not strictly required, but it seems nice to
-                    // have some trace of the checkpoint records in the layer files at the same
-                    // LSNs.
-                    self.checkpoint_modified = true;
                }
            }
            pg_constants::RM_LOGICALMSG_ID => {
@@ -1033,23 +1025,7 @@ impl WalIngest {
            // Copy content
            debug!("copying rel {} to {}, {} blocks", src_rel, dst_rel, nblocks);
            for blknum in 0..nblocks {
-                // Sharding:
-                //  - src and dst are always on the same shard, because they differ only by dbNode, and
-                //    dbNode is not included in the hash inputs for sharding.
-                //  - This WAL command is replayed on all shards, but each shard only copies the blocks
-                //    that belong to it.
-                let src_key = rel_block_to_key(src_rel, blknum);
-                if !self.shard.is_key_local(&src_key) {
-                    debug!(
-                        "Skipping non-local key {} during XLOG_DBASE_CREATE",
-                        src_key
-                    );
-                    continue;
-                }
-                debug!(
-                    "copying block {} from {} ({}) to {}",
-                    blknum, src_rel, src_key, dst_rel
-                );
+                debug!("copying block {} from {} to {}", blknum, src_rel, dst_rel);

                let content = modification
                    .tline
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -47,10 +47,11 @@ use crate::metrics::{
    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, WAL_REDO_RECORDS_HISTOGRAM,
    WAL_REDO_RECORD_COUNTER, WAL_REDO_TIME,
 };
+use crate::pgdatadir_mapping::key_to_slru_block;
 use crate::repository::Key;
 use crate::walrecord::NeonWalRecord;

-use pageserver_api::key::{key_to_rel_block, key_to_slru_block};
+use pageserver_api::key::key_to_rel_block;
 use pageserver_api::reltag::{RelTag, SlruKind};
 use postgres_ffi::pg_constants;
 use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM;
@@ -836,8 +837,9 @@ impl WalRedoProcess {
        let mut proc = { input }; // TODO: remove this legacy rename, but this keep the patch small.
        let mut nwrite = 0usize;

+        let mut stdin_pollfds = [PollFd::new(proc.stdin.as_raw_fd(), PollFlags::POLLOUT)];
+
        while nwrite < writebuf.len() {
-            let mut stdin_pollfds = [PollFd::new(&proc.stdin, PollFlags::POLLOUT)];
            let n = loop {
                match nix::poll::poll(&mut stdin_pollfds[..], wal_redo_timeout.as_millis() as i32) {
                    Err(nix::errno::Errno::EINTR) => continue,
@@ -876,6 +878,7 @@ impl WalRedoProcess {
        // advancing processed responses number.

        let mut output = self.stdout.lock().unwrap();
+        let mut stdout_pollfds = [PollFd::new(output.stdout.as_raw_fd(), PollFlags::POLLIN)];
        let n_processed_responses = output.n_processed_responses;
        while n_processed_responses + output.pending_responses.len() <= request_no {
            // We expect the WAL redo process to respond with an 8k page image. We read it
@@ -883,7 +886,6 @@ impl WalRedoProcess {
            let mut resultbuf = vec![0; BLCKSZ.into()];
            let mut nresult: usize = 0; // # of bytes read into 'resultbuf' so far
            while nresult < BLCKSZ.into() {
-                let mut stdout_pollfds = [PollFd::new(&output.stdout, PollFlags::POLLIN)];
                // We do two things simultaneously: reading response from stdout
                // and forward any logging information that the child writes to its stderr to the page server's log.
                let n = loop {
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -637,7 +637,7 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	ListCell   *option;
 	const char *role_name = stmt->role->rolename;

-	if (RoleIsNeonSuperuser(role_name) && !superuser())
+	if (RoleIsNeonSuperuser(role_name))
 		elog(ERROR, "can't ALTER neon_superuser");

 	foreach(option, stmt->options)
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -15,7 +15,6 @@
 #include "postgres.h"

 #include "access/xlog.h"
-#include "common/hashfn.h"
 #include "fmgr.h"
 #include "libpq-fe.h"
 #include "libpq/libpq.h"
@@ -39,6 +38,17 @@
 #define MIN_RECONNECT_INTERVAL_USEC 1000
 #define MAX_RECONNECT_INTERVAL_USEC 1000000

+bool		connected = false;
+PGconn	   *pageserver_conn = NULL;
+
+/*
+ * WaitEventSet containing:
+ * - WL_SOCKET_READABLE on pageserver_conn,
+ * - WL_LATCH_SET on MyLatch, and
+ * - WL_EXIT_ON_PM_DEATH.
+ */
+WaitEventSet *pageserver_conn_wes = NULL;
+
 /* GUCs */
 char	   *neon_timeline;
 char	   *neon_tenant;
@@ -49,40 +59,16 @@ char	   *neon_auth_token;
 int			readahead_buffer_size = 128;
 int			flush_every_n_requests = 8;

-static int	n_reconnect_attempts = 0;
-static int	max_reconnect_attempts = 60;
-static int	stripe_size;
+static int n_reconnect_attempts = 0;
+static int max_reconnect_attempts = 60;
+
+#define MAX_PAGESERVER_CONNSTRING_SIZE 256

 typedef struct
 {
-	char		connstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE];
-	size_t		num_shards;
-} ShardMap;
-
-/*
- * PagestoreShmemState is kept in shared memory. It contains the connection
- * strings for each shard.
- *
- * The "neon.pageserver_connstring" GUC is marked with the PGC_SIGHUP option,
- * allowing it to be changed using pg_reload_conf(). The control plane can
- * update the connection string if the pageserver crashes, is relocated, or
- * new shards are added. A parsed copy of the current value of the GUC is kept
- * in shared memory, updated by the postmaster, because regular backends don't
- * reload the config during query execution, but we might need to re-establish
- * the pageserver connection with the new connection string even in the middle
- * of a query.
- *
- * The shared memory copy is protected by a lockless algorithm using two
- * atomic counters. The counters allow a backend to quickly check if the value
- * has changed since last access, and to detect and retry copying the value if
- * the postmaster changes the value concurrently. (Postmaster doesn't have a
- * PGPROC entry and therefore cannot use LWLocks.)
- */
-typedef struct
-{
-	pg_atomic_uint64 begin_update_counter;
-	pg_atomic_uint64 end_update_counter;
-	ShardMap	shard_map;
+	LWLockId	lock;
+	pg_atomic_uint64 update_counter;
+	char		pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
 } PagestoreShmemState;

 #if PG_VERSION_NUM >= 150000
@@ -92,242 +78,76 @@ static void walproposer_shmem_request(void);
 static shmem_startup_hook_type prev_shmem_startup_hook;
 static PagestoreShmemState *pagestore_shared;
 static uint64 pagestore_local_counter = 0;
+static char local_pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];

-/* This backend's per-shard connections */
-typedef struct
-{
-	PGconn	   *conn;
-
-	/*---
-	 * WaitEventSet containing:
-	 * - WL_SOCKET_READABLE on 'conn'
-	 * - WL_LATCH_SET on MyLatch, and
-	 * - WL_EXIT_ON_PM_DEATH.
-	 */
-	WaitEventSet *wes;
-} PageServer;
-
-static PageServer page_servers[MAX_SHARDS];
-
-static bool pageserver_flush(shardno_t shard_no);
-static void pageserver_disconnect(shardno_t shard_no);
+static bool pageserver_flush(void);
+static void pageserver_disconnect(void);

 static bool
-PagestoreShmemIsValid(void)
+PagestoreShmemIsValid()
 {
 	return pagestore_shared && UsedShmemSegAddr;
 }

-/*
- * Parse a comma-separated list of connection strings into a ShardMap.
- *
- * If 'result' is NULL, just checks that the input is valid. If the input is
- * not valid, returns false. The contents of *result are undefined in
- * that case, and must not be relied on.
- */
-static bool
-ParseShardMap(const char *connstr, ShardMap *result)
-{
-	const char *p;
-	int			nshards = 0;
-
-	if (result)
-		memset(result, 0, sizeof(ShardMap));
-
-	p = connstr;
-	nshards = 0;
-	for (;;)
-	{
-		const char *sep;
-		size_t		connstr_len;
-
-		sep = strchr(p, ',');
-		connstr_len = sep != NULL ? sep - p : strlen(p);
-
-		if (connstr_len == 0 && sep == NULL)
-			break;				/* ignore trailing comma */
-
-		if (nshards >= MAX_SHARDS)
-		{
-			neon_log(LOG, "Too many shards");
-			return false;
-		}
-		if (connstr_len >= MAX_PAGESERVER_CONNSTRING_SIZE)
-		{
-			neon_log(LOG, "Connection string too long");
-			return false;
-		}
-		if (result)
-		{
-			memcpy(result->connstring[nshards], p, connstr_len);
-			result->connstring[nshards][connstr_len] = '\0';
-		}
-		nshards++;
-
-		if (sep == NULL)
-			break;
-		p = sep + 1;
-	}
-	if (result)
-		result->num_shards = nshards;
-
-	return true;
-}
-
 static bool
 CheckPageserverConnstring(char **newval, void **extra, GucSource source)
 {
-	char	   *p = *newval;
-
-	return ParseShardMap(p, NULL);
+	return strlen(*newval) < MAX_PAGESERVER_CONNSTRING_SIZE;
 }

 static void
 AssignPageserverConnstring(const char *newval, void *extra)
 {
-	ShardMap	shard_map;
-
-	/*
-	 * Only postmaster updates the copy in shared memory.
-	 */
-	if (!PagestoreShmemIsValid() || IsUnderPostmaster)
+	if (!PagestoreShmemIsValid())
 		return;
-
-	if (!ParseShardMap(newval, &shard_map))
-	{
-		/*
-		 * shouldn't happen, because we already checked the value in
-		 * CheckPageserverConnstring
-		 */
-		elog(ERROR, "could not parse shard map");
-	}
-
-	if (memcmp(&pagestore_shared->shard_map, &shard_map, sizeof(ShardMap)) != 0)
-	{
-		pg_atomic_add_fetch_u64(&pagestore_shared->begin_update_counter, 1);
-		pg_write_barrier();
-		memcpy(&pagestore_shared->shard_map, &shard_map, sizeof(ShardMap));
-		pg_write_barrier();
-		pg_atomic_add_fetch_u64(&pagestore_shared->end_update_counter, 1);
-	}
-	else
-	{
-		/* no change */
-	}
-}
-
-/*
- * Get the current number of shards, and/or the connection string for a
- * particular shard from the shard map in shared memory.
- *
- * If num_shards_p is not NULL, it is set to the current number of shards.
- *
- * If connstr_p is not NULL, the connection string for 'shard_no' is copied to
- * it. It must point to a buffer at least MAX_PAGESERVER_CONNSTRING_SIZE bytes
- * long.
- *
- * As a side-effect, if the shard map in shared memory had changed since the
- * last call, terminates all existing connections to all pageservers.
- */
-static void
-load_shard_map(shardno_t shard_no, char *connstr_p, shardno_t *num_shards_p)
-{
-	uint64		begin_update_counter;
-	uint64		end_update_counter;
-	ShardMap   *shard_map = &pagestore_shared->shard_map;
-	shardno_t	num_shards;
-
-	/*
-	 * Postmaster can update the shared memory values concurrently, in which
-	 * case we would copy a garbled mix of the old and new values. We will
-	 * detect it because the counter's won't match, and retry. But it's
-	 * important that we don't do anything within the retry-loop that would
-	 * depend on the string having valid contents.
-	 */
-	do
-	{
-		begin_update_counter = pg_atomic_read_u64(&pagestore_shared->begin_update_counter);
-		end_update_counter = pg_atomic_read_u64(&pagestore_shared->end_update_counter);
-
-		num_shards = shard_map->num_shards;
-		if (connstr_p && shard_no < MAX_SHARDS)
-			strlcpy(connstr_p, shard_map->connstring[shard_no], MAX_PAGESERVER_CONNSTRING_SIZE);
-		pg_memory_barrier();
-	}
-	while (begin_update_counter != end_update_counter
-		   || begin_update_counter != pg_atomic_read_u64(&pagestore_shared->begin_update_counter)
-		   || end_update_counter != pg_atomic_read_u64(&pagestore_shared->end_update_counter));
-
-	if (connstr_p && shard_no >= num_shards)
-		neon_log(ERROR, "Shard %d is greater or equal than number of shards %d",
-				 shard_no, num_shards);
-
-	/*
-	 * If any of the connection strings changed, reset all connections.
-	 */
-	if (pagestore_local_counter != end_update_counter)
-	{
-		for (shardno_t i = 0; i < MAX_SHARDS; i++)
-		{
-			if (page_servers[i].conn)
-				pageserver_disconnect(i);
-		}
-		pagestore_local_counter = end_update_counter;
-	}
-
-	if (num_shards_p)
-		*num_shards_p = num_shards;
-}
-
-#define MB (1024*1024)
-
-shardno_t
-get_shard_number(BufferTag *tag)
-{
-	shardno_t	n_shards;
-	uint32		hash;
-
-	load_shard_map(0, NULL, &n_shards);
-
-#if PG_MAJORVERSION_NUM < 16
-	hash = murmurhash32(tag->rnode.relNode);
-	hash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));
-#else
-	hash = murmurhash32(tag->relNumber);
-	hash = hash_combine(hash, murmurhash32(tag->blockNum / stripe_size));
-#endif
-
-	return hash % n_shards;
+	LWLockAcquire(pagestore_shared->lock, LW_EXCLUSIVE);
+	strlcpy(pagestore_shared->pageserver_connstring, newval, MAX_PAGESERVER_CONNSTRING_SIZE);
+	pg_atomic_fetch_add_u64(&pagestore_shared->update_counter, 1);
+	LWLockRelease(pagestore_shared->lock);
 }

 static bool
-pageserver_connect(shardno_t shard_no, int elevel)
+CheckConnstringUpdated()
+{
+	if (!PagestoreShmemIsValid())
+		return false;
+	return pagestore_local_counter < pg_atomic_read_u64(&pagestore_shared->update_counter);
+}
+
+static void
+ReloadConnstring()
+{
+	if (!PagestoreShmemIsValid())
+		return;
+	LWLockAcquire(pagestore_shared->lock, LW_SHARED);
+	strlcpy(local_pageserver_connstring, pagestore_shared->pageserver_connstring, sizeof(local_pageserver_connstring));
+	pagestore_local_counter = pg_atomic_read_u64(&pagestore_shared->update_counter);
+	LWLockRelease(pagestore_shared->lock);
+}
+
+static bool
+pageserver_connect(int elevel)
 {
 	char	   *query;
 	int			ret;
 	const char *keywords[3];
 	const char *values[3];
 	int			n;
-	PGconn	   *conn;
-	WaitEventSet *wes;
-	char		connstr[MAX_PAGESERVER_CONNSTRING_SIZE];

 	static TimestampTz last_connect_time = 0;
 	static uint64_t delay_us = MIN_RECONNECT_INTERVAL_USEC;
 	TimestampTz now;
-	uint64_t	us_since_last_connect;
+        uint64_t us_since_last_connect;

-	Assert(page_servers[shard_no].conn == NULL);
+	Assert(!connected);

-	/*
-	 * Get the connection string for this shard. If the shard map has been
-	 * updated since we last looked, this will also disconnect any existing
-	 * pageserver connections as a side effect.
-	 */
-	load_shard_map(shard_no, connstr, NULL);
+	if (CheckConnstringUpdated())
+	{
+		ReloadConnstring();
+	}

 	now = GetCurrentTimestamp();
-	us_since_last_connect = now - last_connect_time;
+        us_since_last_connect = now - last_connect_time;
 	if (us_since_last_connect < delay_us)
 	{
 		pg_usleep(delay_us - us_since_last_connect);
@@ -360,105 +180,50 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		n++;
 	}
 	keywords[n] = "dbname";
-	values[n] = connstr;
+	values[n] = local_pageserver_connstring;
 	n++;
 	keywords[n] = NULL;
 	values[n] = NULL;
 	n++;
-	conn = PQconnectdbParams(keywords, values, 1);
+	pageserver_conn = PQconnectdbParams(keywords, values, 1);

-	if (PQstatus(conn) == CONNECTION_BAD)
+	if (PQstatus(pageserver_conn) == CONNECTION_BAD)
 	{
-		char	   *msg = pchomp(PQerrorMessage(conn));
+		char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

-		PQfinish(conn);
+		PQfinish(pageserver_conn);
+		pageserver_conn = NULL;

 		ereport(elevel,
 				(errcode(ERRCODE_SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION),
-				 errmsg(NEON_TAG "[shard %d] could not establish connection to pageserver", shard_no),
+				 errmsg(NEON_TAG "could not establish connection to pageserver"),
 				 errdetail_internal("%s", msg)));
-		pfree(msg);
 		return false;
 	}
+
 	query = psprintf("pagestream %s %s", neon_tenant, neon_timeline);
-	ret = PQsendQuery(conn, query);
-	pfree(query);
+	ret = PQsendQuery(pageserver_conn, query);
 	if (ret != 1)
 	{
-		PQfinish(conn);
-		neon_shard_log(shard_no, elevel, "could not send pagestream command to pageserver");
+		PQfinish(pageserver_conn);
+		pageserver_conn = NULL;
+		neon_log(elevel, "could not send pagestream command to pageserver");
 		return false;
 	}

-	wes = CreateWaitEventSet(TopMemoryContext, 3);
-	AddWaitEventToSet(wes, WL_LATCH_SET, PGINVALID_SOCKET,
+	pageserver_conn_wes = CreateWaitEventSet(TopMemoryContext, 3);
+	AddWaitEventToSet(pageserver_conn_wes, WL_LATCH_SET, PGINVALID_SOCKET,
 					  MyLatch, NULL);
-	AddWaitEventToSet(wes, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
+	AddWaitEventToSet(pageserver_conn_wes, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
 					  NULL, NULL);
-	AddWaitEventToSet(wes, WL_SOCKET_READABLE, PQsocket(conn), NULL, NULL);
+	AddWaitEventToSet(pageserver_conn_wes, WL_SOCKET_READABLE, PQsocket(pageserver_conn), NULL, NULL);

-	PG_TRY();
-	{
-		while (PQisBusy(conn))
-		{
-			WaitEvent	event;
-
-			/* Sleep until there's something to do */
-			(void) WaitEventSetWait(wes, -1L, &event, 1, PG_WAIT_EXTENSION);
-			ResetLatch(MyLatch);
-
-			CHECK_FOR_INTERRUPTS();
-
-			/* Data available in socket? */
-			if (event.events & WL_SOCKET_READABLE)
-			{
-				if (!PQconsumeInput(conn))
-				{
-					char	   *msg = pchomp(PQerrorMessage(conn));
-
-					PQfinish(conn);
-					FreeWaitEventSet(wes);
-
-					neon_shard_log(shard_no, elevel, "could not complete handshake with pageserver: %s",
-								   msg);
-					return false;
-				}
-			}
-		}
-	}
-	PG_CATCH();
-	{
-		PQfinish(conn);
-		FreeWaitEventSet(wes);
-		PG_RE_THROW();
-	}
-	PG_END_TRY();
-
-	neon_shard_log(shard_no, LOG, "libpagestore: connected to '%s'", connstr);
-	page_servers[shard_no].conn = conn;
-	page_servers[shard_no].wes = wes;
-
-	return true;
-}
-
-/*
- * A wrapper around PQgetCopyData that checks for interrupts while sleeping.
- */
-static int
-call_PQgetCopyData(shardno_t shard_no, char **buffer)
-{
-	int			ret;
-	PGconn	   *pageserver_conn = page_servers[shard_no].conn;
-
-retry:
-	ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ );
-
-	if (ret == 0)
+	while (PQisBusy(pageserver_conn))
 	{
 		WaitEvent	event;

 		/* Sleep until there's something to do */
-		(void) WaitEventSetWait(page_servers[shard_no].wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
 		ResetLatch(MyLatch);

 		CHECK_FOR_INTERRUPTS();
@@ -470,7 +235,53 @@ retry:
 			{
 				char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

-				neon_shard_log(shard_no, LOG, "could not get response from pageserver: %s", msg);
+				PQfinish(pageserver_conn);
+				pageserver_conn = NULL;
+				FreeWaitEventSet(pageserver_conn_wes);
+				pageserver_conn_wes = NULL;
+
+				neon_log(elevel, "could not complete handshake with pageserver: %s",
+						 msg);
+				return false;
+			}
+		}
+	}
+
+	neon_log(LOG, "libpagestore: connected to '%s'", page_server_connstring);
+
+	connected = true;
+	return true;
+}
+
+/*
+ * A wrapper around PQgetCopyData that checks for interrupts while sleeping.
+ */
+static int
+call_PQgetCopyData(char **buffer)
+{
+	int			ret;
+
+retry:
+	ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ );
+
+	if (ret == 0)
+	{
+		WaitEvent	event;
+
+		/* Sleep until there's something to do */
+		(void) WaitEventSetWait(pageserver_conn_wes, -1L, &event, 1, PG_WAIT_EXTENSION);
+		ResetLatch(MyLatch);
+
+		CHECK_FOR_INTERRUPTS();
+
+		/* Data available in socket? */
+		if (event.events & WL_SOCKET_READABLE)
+		{
+			if (!PQconsumeInput(pageserver_conn))
+			{
+				char	   *msg = pchomp(PQerrorMessage(pageserver_conn));
+
+				neon_log(LOG, "could not get response from pageserver: %s", msg);
 				pfree(msg);
 				return -1;
 			}
@@ -484,7 +295,7 @@ retry:


 static void
-pageserver_disconnect(shardno_t shard_no)
+pageserver_disconnect(void)
 {
 	/*
 	 * If anything goes wrong while we were sending a request, it's not clear
@@ -493,38 +304,38 @@ pageserver_disconnect(shardno_t shard_no)
 	 * time later after we have already sent a new unrelated request. Close
 	 * the connection to avoid getting confused.
 	 */
-	if (page_servers[shard_no].conn)
+	if (connected)
 	{
-		neon_shard_log(shard_no, LOG, "dropping connection to page server due to error");
-		PQfinish(page_servers[shard_no].conn);
-		page_servers[shard_no].conn = NULL;
+		neon_log(LOG, "dropping connection to page server due to error");
+		PQfinish(pageserver_conn);
+		pageserver_conn = NULL;
+		connected = false;

-		/*
-		 * If the connection to any pageserver is lost, we throw away the
-		 * whole prefetch queue, even for other pageservers. It should not
-		 * cause big problems, because connection loss is supposed to be a
-		 * rare event.
-		 */
 		prefetch_on_ps_disconnect();
 	}
-	if (page_servers[shard_no].wes != NULL)
+	if (pageserver_conn_wes != NULL)
 	{
-		FreeWaitEventSet(page_servers[shard_no].wes);
-		page_servers[shard_no].wes = NULL;
+		FreeWaitEventSet(pageserver_conn_wes);
+		pageserver_conn_wes = NULL;
 	}
 }

 static bool
-pageserver_send(shardno_t shard_no, NeonRequest *request)
+pageserver_send(NeonRequest *request)
 {
 	StringInfoData req_buff;
-	PGconn	   *pageserver_conn = page_servers[shard_no].conn;
+
+	if (CheckConnstringUpdated())
+	{
+		pageserver_disconnect();
+		ReloadConnstring();
+	}

 	/* If the connection was lost for some reason, reconnect */
-	if (pageserver_conn && PQstatus(pageserver_conn) == CONNECTION_BAD)
+	if (connected && PQstatus(pageserver_conn) == CONNECTION_BAD)
 	{
-		neon_shard_log(shard_no, LOG, "pageserver_send disconnect bad connection");
-		pageserver_disconnect(shard_no);
+		neon_log(LOG, "pageserver_send disconnect bad connection");
+		pageserver_disconnect();
 	}

 	req_buff = nm_pack_request(request);
@@ -538,9 +349,9 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	 * https://github.com/neondatabase/neon/issues/1138 So try to reestablish
 	 * connection in case of failure.
 	 */
-	if (!page_servers[shard_no].conn)
+	if (!connected)
 	{
-		while (!pageserver_connect(shard_no, n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))
+		while (!pageserver_connect(n_reconnect_attempts < max_reconnect_attempts ? LOG : ERROR))
 		{
 			HandleMainLoopInterrupts();
 			n_reconnect_attempts += 1;
@@ -548,8 +359,6 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 		n_reconnect_attempts = 0;
 	}

-	pageserver_conn = page_servers[shard_no].conn;
-
 	/*
 	 * Send request.
 	 *
@@ -562,8 +371,8 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	{
 		char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

-		pageserver_disconnect(shard_no);
-		neon_shard_log(shard_no, LOG, "pageserver_send disconnect because failed to send page request (try to reconnect): %s", msg);
+		pageserver_disconnect();
+		neon_log(LOG, "pageserver_send disconnect because failed to send page request (try to reconnect): %s", msg);
 		pfree(msg);
 		pfree(req_buff.data);
 		return false;
@@ -575,20 +384,19 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	{
 		char	   *msg = nm_to_string((NeonMessage *) request);

-		neon_shard_log(shard_no, PageStoreTrace, "sent request: %s", msg);
+		neon_log(PageStoreTrace, "sent request: %s", msg);
 		pfree(msg);
 	}
 	return true;
 }

 static NeonResponse *
-pageserver_receive(shardno_t shard_no)
+pageserver_receive(void)
 {
 	StringInfoData resp_buff;
 	NeonResponse *resp;
-	PGconn	   *pageserver_conn = page_servers[shard_no].conn;

-	if (!pageserver_conn)
+	if (!connected)
 		return NULL;

 	PG_TRY();
@@ -596,7 +404,7 @@ pageserver_receive(shardno_t shard_no)
 		/* read response */
 		int			rc;

-		rc = call_PQgetCopyData(shard_no, &resp_buff.data);
+		rc = call_PQgetCopyData(&resp_buff.data);
 		if (rc >= 0)
 		{
 			resp_buff.len = rc;
@@ -608,33 +416,33 @@ pageserver_receive(shardno_t shard_no)
 			{
 				char	   *msg = nm_to_string((NeonMessage *) resp);

-				neon_shard_log(shard_no, PageStoreTrace, "got response: %s", msg);
+				neon_log(PageStoreTrace, "got response: %s", msg);
 				pfree(msg);
 			}
 		}
 		else if (rc == -1)
 		{
-			neon_shard_log(shard_no, LOG, "pageserver_receive disconnect because call_PQgetCopyData returns -1: %s", pchomp(PQerrorMessage(pageserver_conn)));
-			pageserver_disconnect(shard_no);
+			neon_log(LOG, "pageserver_receive disconnect because call_PQgetCopyData returns -1: %s", pchomp(PQerrorMessage(pageserver_conn)));
+			pageserver_disconnect();
 			resp = NULL;
 		}
 		else if (rc == -2)
 		{
 			char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

-			pageserver_disconnect(shard_no);
-			neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect because could not read COPY data: %s", msg);
+			pageserver_disconnect();
+			neon_log(ERROR, "pageserver_receive disconnect because could not read COPY data: %s", msg);
 		}
 		else
 		{
-			pageserver_disconnect(shard_no);
-			neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect because unexpected PQgetCopyData return value: %d", rc);
+			pageserver_disconnect();
+			neon_log(ERROR, "pageserver_receive disconnect because unexpected PQgetCopyData return value: %d", rc);
 		}
 	}
 	PG_CATCH();
 	{
-		neon_shard_log(shard_no, LOG, "pageserver_receive disconnect due to caught exception");
-		pageserver_disconnect(shard_no);
+		neon_log(LOG, "pageserver_receive disconnect due to caught exception");
+		pageserver_disconnect();
 		PG_RE_THROW();
 	}
 	PG_END_TRY();
@@ -644,13 +452,11 @@ pageserver_receive(shardno_t shard_no)


 static bool
-pageserver_flush(shardno_t shard_no)
+pageserver_flush(void)
 {
-	PGconn	   *pageserver_conn = page_servers[shard_no].conn;
-
-	if (!pageserver_conn)
+	if (!connected)
 	{
-		neon_shard_log(shard_no, WARNING, "Tried to flush while disconnected");
+		neon_log(WARNING, "Tried to flush while disconnected");
 	}
 	else
 	{
@@ -658,8 +464,8 @@ pageserver_flush(shardno_t shard_no)
 		{
 			char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

-			pageserver_disconnect(shard_no);
-			neon_shard_log(shard_no, LOG, "pageserver_flush disconnect because failed to flush page requests: %s", msg);
+			pageserver_disconnect();
+			neon_log(LOG, "pageserver_flush disconnect because failed to flush page requests: %s", msg);
 			pfree(msg);
 			return false;
 		}
@@ -699,9 +505,8 @@ PagestoreShmemInit(void)
 									   &found);
 	if (!found)
 	{
-		pg_atomic_init_u64(&pagestore_shared->begin_update_counter, 0);
-		pg_atomic_init_u64(&pagestore_shared->end_update_counter, 0);
-		memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
+		pagestore_shared->lock = &(GetNamedLWLockTranche("neon_libpagestore")->lock);
+		pg_atomic_init_u64(&pagestore_shared->update_counter, 0);
 		AssignPageserverConnstring(page_server_connstring, NULL);
 	}
 	LWLockRelease(AddinShmemInitLock);
@@ -726,6 +531,7 @@ pagestore_shmem_request(void)
 #endif

 	RequestAddinShmemSpace(PagestoreShmemSize());
+	RequestNamedLWLockTranche("neon_libpagestore", 1);
 }

 static void
@@ -776,15 +582,6 @@ pg_init_libpagestore(void)
 							   0,	/* no flags required */
 							   check_neon_id, NULL, NULL);

-	DefineCustomIntVariable("neon.stripe_size",
-							"sharding stripe size",
-							NULL,
-							&stripe_size,
-							32768, 1, INT_MAX,
-							PGC_SIGHUP,
-							GUC_UNIT_BLOCKS,
-							NULL, NULL, NULL);
-
 	DefineCustomIntVariable("neon.max_cluster_size",
 							"cluster size limit",
 							NULL,
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -20,13 +20,9 @@
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "storage/block.h"
-#include "storage/buf_internals.h"
 #include "storage/smgr.h"
 #include "utils/memutils.h"

-#define MAX_SHARDS 128
-#define MAX_PAGESERVER_CONNSTRING_SIZE 256
-
 typedef enum
 {
 	/* pagestore_client -> pagestore */
@@ -55,9 +51,6 @@ typedef struct
 #define neon_log(tag, fmt, ...) ereport(tag,                                  \
 										(errmsg(NEON_TAG fmt, ##__VA_ARGS__), \
 										 errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))
-#define neon_shard_log(shard_no, tag, fmt, ...) ereport(tag,	\
-														(errmsg(NEON_TAG "[shard %d] " fmt, shard_no, ##__VA_ARGS__), \
-														 errhidestmt(true), errhidecontext(true), errposition(0), internalerrposition(0)))

 /*
 * supertype of all the Neon*Request structs below
@@ -148,13 +141,11 @@ extern char *nm_to_string(NeonMessage *msg);
 * API
 */

-typedef unsigned shardno_t;
-
 typedef struct
 {
-	bool		(*send) (shardno_t  shard_no, NeonRequest * request);
-	NeonResponse *(*receive) (shardno_t shard_no);
-	bool		(*flush) (shardno_t shard_no);
+	bool		(*send) (NeonRequest *request);
+	NeonResponse *(*receive) (void);
+	bool		(*flush) (void);
 } page_server_api;

 extern void prefetch_on_ps_disconnect(void);
@@ -168,8 +159,6 @@ extern char *neon_timeline;
 extern char *neon_tenant;
 extern int32 max_cluster_size;

-extern shardno_t get_shard_number(BufferTag* tag);
-
 extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
 extern void smgr_init_neon(void);
 extern void readahead_buffer_resize(int newsize, void *extra);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -172,7 +172,6 @@ typedef struct PrefetchRequest
 	XLogRecPtr	actual_request_lsn;
 	NeonResponse *response;		/* may be null */
 	PrefetchStatus status;
-	shardno_t   shard_no;
 	uint64		my_ring_index;
 } PrefetchRequest;

@@ -240,17 +239,10 @@ typedef struct PrefetchState
 								 * also unused */

 	/* the buffers */
-	prfh_hash	*prf_hash;
-	int			max_shard_no;
-	/* Mark shards involved in prefetch */
-	uint8		shard_bitmap[(MAX_SHARDS + 7)/8];
+	prfh_hash  *prf_hash;
 	PrefetchRequest prf_buffer[];	/* prefetch buffers */
 } PrefetchState;

-#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))
-#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))
-#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))
-
 static PrefetchState *MyPState;

 #define GetPrfSlot(ring_index) ( \
@@ -335,7 +327,6 @@ compact_prefetch_buffers(void)
 		Assert(target_slot->status == PRFS_UNUSED);

 		target_slot->buftag = source_slot->buftag;
-		target_slot->shard_no = source_slot->shard_no;
 		target_slot->status = source_slot->status;
 		target_slot->response = source_slot->response;
 		target_slot->effective_request_lsn = source_slot->effective_request_lsn;
@@ -503,23 +494,6 @@ prefetch_cleanup_trailing_unused(void)
 	}
 }

-
-static bool
-prefetch_flush_requests(void)
-{
-	for (shardno_t shard_no = 0; shard_no < MyPState->max_shard_no; shard_no++)
-	{
-		if (BITMAP_ISSET(MyPState->shard_bitmap, shard_no))
-		{
-			if (!page_server->flush(shard_no))
-				return false;
-			BITMAP_CLR(MyPState->shard_bitmap, shard_no);
-		}
-	}
-	MyPState->max_shard_no = 0;
-	return true;
-}
-
 /*
 * Wait for slot of ring_index to have received its response.
 * The caller is responsible for making sure the request buffer is flushed.
@@ -535,7 +509,7 @@ prefetch_wait_for(uint64 ring_index)
 	if (MyPState->ring_flush <= ring_index &&
 		MyPState->ring_unused > MyPState->ring_flush)
 	{
-		if (!prefetch_flush_requests())
+		if (!page_server->flush())
 			return false;
 		MyPState->ring_flush = MyPState->ring_unused;
 	}
@@ -573,7 +547,7 @@ prefetch_read(PrefetchRequest *slot)
 	Assert(slot->my_ring_index == MyPState->ring_receive);

 	old = MemoryContextSwitchTo(MyPState->errctx);
-	response = (NeonResponse *) page_server->receive(slot->shard_no);
+	response = (NeonResponse *) page_server->receive();
 	MemoryContextSwitchTo(old);
 	if (response)
 	{
@@ -730,14 +704,12 @@ prefetch_do_request(PrefetchRequest *slot, bool *force_latest, XLogRecPtr *force
 	Assert(slot->response == NULL);
 	Assert(slot->my_ring_index == MyPState->ring_unused);

-	while (!page_server->send(slot->shard_no, (NeonRequest *) &request));
+	while (!page_server->send((NeonRequest *) &request));

 	/* update prefetch state */
 	MyPState->n_requests_inflight += 1;
 	MyPState->n_unused -= 1;
 	MyPState->ring_unused += 1;
-	BITMAP_SET(MyPState->shard_bitmap, slot->shard_no);
-	MyPState->max_shard_no = Max(slot->shard_no+1, MyPState->max_shard_no);

 	/* update slot state */
 	slot->status = PRFS_REQUESTED;
@@ -908,7 +880,6 @@ Retry:
 	 * function reads the buffer tag from the slot.
 	 */
 	slot->buftag = tag;
-	slot->shard_no = get_shard_number(&tag);
 	slot->my_ring_index = ring_index;

 	prefetch_do_request(slot, force_latest, force_lsn);
@@ -919,7 +890,7 @@ Retry:
 	if (flush_every_n_requests > 0 &&
 		MyPState->ring_unused - MyPState->ring_flush >= flush_every_n_requests)
 	{
-		if (!prefetch_flush_requests())
+		if (!page_server->flush())
 		{
 			/*
 			 * Prefetch set is reset in case of error, so we should try to
@@ -937,44 +908,13 @@ static NeonResponse *
 page_server_request(void const *req)
 {
 	NeonResponse *resp;
-	BufferTag tag = {0};
-	shardno_t shard_no;
-
-	switch (((NeonRequest *) req)->tag)
-	{
-		case T_NeonExistsRequest:
-			CopyNRelFileInfoToBufTag(tag, ((NeonExistsRequest *) req)->rinfo);
-			break;
-		case T_NeonNblocksRequest:
-			CopyNRelFileInfoToBufTag(tag, ((NeonNblocksRequest *) req)->rinfo);
-			break;
-		case T_NeonDbSizeRequest:
-			NInfoGetDbOid(BufTagGetNRelFileInfo(tag)) = ((NeonDbSizeRequest *) req)->dbNode;
-			break;
-		case T_NeonGetPageRequest:
-			CopyNRelFileInfoToBufTag(tag, ((NeonGetPageRequest *) req)->rinfo);
-			tag.blockNum = ((NeonGetPageRequest *) req)->blkno;
-			break;
-		default:
-			neon_log(ERROR, "Unexpected request tag: %d", ((NeonRequest *) req)->tag);
-	}
-	shard_no = get_shard_number(&tag);
-
-
-	/*
-	 * Current sharding model assumes that all metadata is present only at shard 0.
-	 * We still need to call get_shard_no() to check if shard map is up-to-date.
-	 */
-	if (((NeonRequest *) req)->tag != T_NeonGetPageRequest || ((NeonGetPageRequest *) req)->forknum != MAIN_FORKNUM)
-	{
-		shard_no = 0;
-	}

 	do
 	{
-		while (!page_server->send(shard_no, (NeonRequest *) req) || !page_server->flush(shard_no));
+		while (!page_server->send((NeonRequest *) req) || !page_server->flush());
+		MyPState->ring_flush = MyPState->ring_unused;
 		consume_prefetch_responses();
-		resp = page_server->receive(shard_no);
+		resp = page_server->receive();
 	} while (resp == NULL);
 	return resp;

@@ -2158,8 +2098,8 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		case T_NeonErrorResponse:
 			ereport(ERROR,
 					(errcode(ERRCODE_IO_ERROR),
-					 errmsg(NEON_TAG "[shard %d] could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
-							slot->shard_no, blkno,
+					 errmsg(NEON_TAG "could not read block %u in rel %u/%u/%u.%u from page server at lsn %X/%08X",
+							blkno,
 							RelFileInfoFmt(rinfo),
 							forkNum,
 							(uint32) (request_lsn >> 32), (uint32) request_lsn),
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -5,7 +5,7 @@ edition.workspace = true
 license.workspace = true

 [features]
-default = []
+default = ["testing"]
 testing = []

 [dependencies]
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -4,9 +4,7 @@ pub mod backend;
 pub use backend::BackendType;

 mod credentials;
-pub use credentials::{
-    check_peer_addr_is_in_list, endpoint_sni, ComputeUserInfoMaybeEndpoint, IpPattern,
-};
+pub use credentials::{check_peer_addr_is_in_list, endpoint_sni, ComputeUserInfoMaybeEndpoint};

 mod password_hack;
 pub use password_hack::parse_endpoint_param;
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -3,18 +3,19 @@ mod hacks;
 mod link;

 pub use link::LinkAuthError;
+use smol_str::SmolStr;
 use tokio_postgres::config::AuthKeys;

 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::validate_password_and_exchange;
 use crate::cache::Cached;
 use crate::console::errors::GetAuthInfoError;
-use crate::console::provider::ConsoleBackend;
 use crate::console::AuthSecret;
 use crate::context::RequestMonitoring;
 use crate::proxy::connect_compute::handle_try_wake;
 use crate::proxy::retry::retry_after;
 use crate::proxy::NeonOptions;
+use crate::scram;
 use crate::stream::Stream;
 use crate::{
    auth::{self, ComputeUserInfoMaybeEndpoint},
@@ -26,7 +27,6 @@ use crate::{
    },
    stream, url,
 };
-use crate::{scram, EndpointCacheKey, EndpointId, RoleName};
 use futures::TryFutureExt;
 use std::borrow::Cow;
 use std::ops::ControlFlow;
@@ -34,8 +34,6 @@ use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{error, info, warn};

-use super::IpPattern;
-
 /// This type serves two purposes:
 ///
 /// * When `T` is `()`, it's just a regular auth backend selector
@@ -45,8 +43,11 @@ use super::IpPattern;
 ///   this helps us provide the credentials only to those auth
 ///   backends which require them for the authentication process.
 pub enum BackendType<'a, T> {
-    /// Cloud API (V2).
-    Console(Cow<'a, ConsoleBackend>, T),
+    /// Current Cloud API (V2).
+    Console(Cow<'a, console::provider::neon::Api>, T),
+    /// Local mock of Cloud API (V2).
+    #[cfg(feature = "testing")]
+    Postgres(Cow<'a, console::provider::mock::Api>, T),
    /// Authentication via a web browser.
    Link(Cow<'a, url::ApiUrl>),
    #[cfg(test)]
@@ -56,22 +57,16 @@ pub enum BackendType<'a, T> {

 pub trait TestBackend: Send + Sync + 'static {
    fn wake_compute(&self) -> Result<CachedNodeInfo, console::errors::WakeComputeError>;
-    fn get_allowed_ips(&self) -> Result<Vec<IpPattern>, console::errors::GetAuthInfoError>;
+    fn get_allowed_ips(&self) -> Result<Vec<SmolStr>, console::errors::GetAuthInfoError>;
 }

 impl std::fmt::Display for BackendType<'_, ()> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        use BackendType::*;
        match self {
-            Console(api, _) => match &**api {
-                ConsoleBackend::Console(endpoint) => {
-                    fmt.debug_tuple("Console").field(&endpoint.url()).finish()
-                }
-                #[cfg(feature = "testing")]
-                ConsoleBackend::Postgres(endpoint) => {
-                    fmt.debug_tuple("Postgres").field(&endpoint.url()).finish()
-                }
-            },
+            Console(endpoint, _) => fmt.debug_tuple("Console").field(&endpoint.url()).finish(),
+            #[cfg(feature = "testing")]
+            Postgres(endpoint, _) => fmt.debug_tuple("Postgres").field(&endpoint.url()).finish(),
            Link(url) => fmt.debug_tuple("Link").field(&url.as_str()).finish(),
            #[cfg(test)]
            Test(_) => fmt.debug_tuple("Test").finish(),
@@ -86,6 +81,8 @@ impl<T> BackendType<'_, T> {
        use BackendType::*;
        match self {
            Console(c, x) => Console(Cow::Borrowed(c), x),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => Postgres(Cow::Borrowed(c), x),
            Link(c) => Link(Cow::Borrowed(c)),
            #[cfg(test)]
            Test(x) => Test(*x),
@@ -101,6 +98,8 @@ impl<'a, T> BackendType<'a, T> {
        use BackendType::*;
        match self {
            Console(c, x) => Console(c, f(x)),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => Postgres(c, f(x)),
            Link(c) => Link(c),
            #[cfg(test)]
            Test(x) => Test(x),
@@ -115,6 +114,8 @@ impl<'a, T, E> BackendType<'a, Result<T, E>> {
        use BackendType::*;
        match self {
            Console(c, x) => x.map(|x| Console(c, x)),
+            #[cfg(feature = "testing")]
+            Postgres(c, x) => x.map(|x| Postgres(c, x)),
            Link(c) => Ok(Link(c)),
            #[cfg(test)]
            Test(x) => Ok(Test(x)),
@@ -129,19 +130,19 @@ pub struct ComputeCredentials<T> {

 #[derive(Debug, Clone)]
 pub struct ComputeUserInfoNoEndpoint {
-    pub user: RoleName,
+    pub user: SmolStr,
    pub options: NeonOptions,
 }

 #[derive(Debug, Clone)]
 pub struct ComputeUserInfo {
-    pub endpoint: EndpointId,
-    pub user: RoleName,
+    pub endpoint: SmolStr,
+    pub user: SmolStr,
    pub options: NeonOptions,
 }

 impl ComputeUserInfo {
-    pub fn endpoint_cache_key(&self) -> EndpointCacheKey {
+    pub fn endpoint_cache_key(&self) -> SmolStr {
        self.options.get_cache_key(&self.endpoint)
    }
 }
@@ -157,7 +158,7 @@ impl TryFrom<ComputeUserInfoMaybeEndpoint> for ComputeUserInfo {
    type Error = ComputeUserInfoNoEndpoint;

    fn try_from(user_info: ComputeUserInfoMaybeEndpoint) -> Result<Self, Self::Error> {
-        match user_info.endpoint_id {
+        match user_info.project {
            None => Err(ComputeUserInfoNoEndpoint {
                user: user_info.user,
                options: user_info.options,
@@ -203,18 +204,21 @@ async fn auth_quirks(
    if !check_peer_addr_is_in_list(&ctx.peer_addr, &allowed_ips) {
        return Err(auth::AuthError::ip_address_not_allowed());
    }
-    let cached_secret = api.get_role_secret(ctx, &info).await?;
+    let maybe_secret = api.get_role_secret(ctx, &info).await?;

-    let secret = cached_secret.value.clone().unwrap_or_else(|| {
+    let cached_secret = maybe_secret.unwrap_or_else(|| {
        // If we don't have an authentication secret, we mock one to
        // prevent malicious probing (possible due to missing protocol steps).
        // This mocked secret will never lead to successful authentication.
        info!("authentication info not found, mocking it");
-        AuthSecret::Scram(scram::ServerSecret::mock(&info.user, rand::random()))
+        Cached::new_uncached(AuthSecret::Scram(scram::ServerSecret::mock(
+            &info.user,
+            rand::random(),
+        )))
    });
    match authenticate_with_secret(
        ctx,
-        secret,
+        cached_secret.value.clone(),
        info,
        client,
        unauthenticated_password,
@@ -316,11 +320,13 @@ async fn auth_and_wake_compute(

 impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
    /// Get compute endpoint name from the credentials.
-    pub fn get_endpoint(&self) -> Option<EndpointId> {
+    pub fn get_endpoint(&self) -> Option<SmolStr> {
        use BackendType::*;

        match self {
-            Console(_, user_info) => user_info.endpoint_id.clone(),
+            Console(_, user_info) => user_info.project.clone(),
+            #[cfg(feature = "testing")]
+            Postgres(_, user_info) => user_info.project.clone(),
            Link(_) => Some("link".into()),
            #[cfg(test)]
            Test(_) => Some("test".into()),
@@ -333,6 +339,8 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {

        match self {
            Console(_, user_info) => &user_info.user,
+            #[cfg(feature = "testing")]
+            Postgres(_, user_info) => &user_info.user,
            Link(_) => "link",
            #[cfg(test)]
            Test(_) => "test",
@@ -354,7 +362,7 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
            Console(api, user_info) => {
                info!(
                    user = &*user_info.user,
-                    project = user_info.endpoint(),
+                    project = user_info.project(),
                    "performing authentication using the console"
                );

@@ -363,6 +371,19 @@ impl<'a> BackendType<'a, ComputeUserInfoMaybeEndpoint> {
                        .await?;
                (cache_info, BackendType::Console(api, user_info))
            }
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => {
+                info!(
+                    user = &*user_info.user,
+                    project = user_info.project(),
+                    "performing authentication using a local postgres instance"
+                );
+
+                let (cache_info, user_info) =
+                    auth_and_wake_compute(ctx, &*api, user_info, client, allow_cleartext, config)
+                        .await?;
+                (cache_info, BackendType::Postgres(api, user_info))
+            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
                info!("performing link authentication");
@@ -393,6 +414,8 @@ impl BackendType<'_, ComputeUserInfo> {
        use BackendType::*;
        match self {
            Console(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => api.get_allowed_ips(ctx, user_info).await,
            Link(_) => Ok(Cached::new_uncached(Arc::new(vec![]))),
            #[cfg(test)]
            Test(x) => Ok(Cached::new_uncached(Arc::new(x.get_allowed_ips()?))),
@@ -409,6 +432,8 @@ impl BackendType<'_, ComputeUserInfo> {

        match self {
            Console(api, user_info) => api.wake_compute(ctx, user_info).map_ok(Some).await,
+            #[cfg(feature = "testing")]
+            Postgres(api, user_info) => api.wake_compute(ctx, user_info).map_ok(Some).await,
            Link(_) => Ok(None),
            #[cfg(test)]
            Test(x) => x.wake_compute().map(Some),
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -57,31 +57,24 @@ pub(super) async fn authenticate(
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
-    // registering waiter can fail if we get unlucky with rng.
-    // just try again.
-    let (psql_session_id, waiter) = loop {
-        let psql_session_id = new_psql_session_id();
-
-        match console::mgmt::get_waiter(&psql_session_id) {
-            Ok(waiter) => break (psql_session_id, waiter),
-            Err(_e) => continue,
-        }
-    };
-
+    let psql_session_id = new_psql_session_id();
    let span = info_span!("link", psql_session_id = &psql_session_id);
    let greeting = hello_message(link_uri, &psql_session_id);

-    // Give user a URL to spawn a new database.
-    info!(parent: &span, "sending the auth URL to the user");
-    client
-        .write_message_noflush(&Be::AuthenticationOk)?
-        .write_message_noflush(&Be::CLIENT_ENCODING)?
-        .write_message(&Be::NoticeResponse(&greeting))
-        .await?;
+    let db_info = console::mgmt::with_waiter(psql_session_id, |waiter| async {
+        // Give user a URL to spawn a new database.
+        info!(parent: &span, "sending the auth URL to the user");
+        client
+            .write_message_noflush(&Be::AuthenticationOk)?
+            .write_message_noflush(&Be::CLIENT_ENCODING)?
+            .write_message(&Be::NoticeResponse(&greeting))
+            .await?;

-    // Wait for web console response (see `mgmt`).
-    info!(parent: &span, "waiting for console's reply...");
-    let db_info = waiter.await.map_err(LinkAuthError::from)?;
+        // Wait for web console response (see `mgmt`).
+        info!(parent: &span, "waiting for console's reply...");
+        waiter.await?.map_err(LinkAuthError::AuthFailed)
+    })
+    .await?;

    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -2,12 +2,12 @@

 use crate::{
    auth::password_hack::parse_endpoint_param, context::RequestMonitoring, error::UserFacingError,
-    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI, proxy::NeonOptions, EndpointId, RoleName,
+    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI, proxy::NeonOptions,
 };
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
 use smol_str::SmolStr;
-use std::{collections::HashSet, net::IpAddr, str::FromStr};
+use std::{collections::HashSet, net::IpAddr};
 use thiserror::Error;
 use tracing::{info, warn};

@@ -21,10 +21,7 @@ pub enum ComputeUserInfoParseError {
         SNI ('{}') and project option ('{}').",
        .domain, .option,
    )]
-    InconsistentProjectNames {
-        domain: EndpointId,
-        option: EndpointId,
-    },
+    InconsistentProjectNames { domain: SmolStr, option: SmolStr },

    #[error(
        "Common name inferred from SNI ('{}') is not known",
@@ -33,7 +30,7 @@ pub enum ComputeUserInfoParseError {
    UnknownCommonName { cn: String },

    #[error("Project name ('{0}') must contain only alphanumeric characters and hyphen.")]
-    MalformedProjectName(EndpointId),
+    MalformedProjectName(SmolStr),
 }

 impl UserFacingError for ComputeUserInfoParseError {}
@@ -42,15 +39,17 @@ impl UserFacingError for ComputeUserInfoParseError {}
 /// Note that we don't store any kind of client key or password here.
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ComputeUserInfoMaybeEndpoint {
-    pub user: RoleName,
-    pub endpoint_id: Option<EndpointId>,
+    pub user: SmolStr,
+    // TODO: this is a severe misnomer! We should think of a new name ASAP.
+    pub project: Option<SmolStr>,
+
    pub options: NeonOptions,
 }

 impl ComputeUserInfoMaybeEndpoint {
    #[inline]
-    pub fn endpoint(&self) -> Option<&str> {
-        self.endpoint_id.as_deref()
+    pub fn project(&self) -> Option<&str> {
+        self.project.as_deref()
    }
 }

@@ -80,15 +79,15 @@ impl ComputeUserInfoMaybeEndpoint {

        // Some parameters are stored in the startup message.
        let get_param = |key| params.get(key).ok_or(MissingKey(key));
-        let user: RoleName = get_param("user")?.into();
+        let user: SmolStr = get_param("user")?.into();

        // record the values if we have them
        ctx.set_application(params.get("application_name").map(SmolStr::from));
        ctx.set_user(user.clone());
-        ctx.set_endpoint_id(sni.map(EndpointId::from));
+        ctx.set_endpoint_id(sni.map(SmolStr::from));

        // Project name might be passed via PG's command-line options.
-        let endpoint_option = params
+        let project_option = params
            .options_raw()
            .and_then(|options| {
                // We support both `project` (deprecated) and `endpoint` options for backward compatibility.
@@ -101,9 +100,9 @@ impl ComputeUserInfoMaybeEndpoint {
            })
            .map(|name| name.into());

-        let endpoint_from_domain = if let Some(sni_str) = sni {
+        let project_from_domain = if let Some(sni_str) = sni {
            if let Some(cn) = common_names {
-                Some(EndpointId::from(endpoint_sni(sni_str, cn)?))
+                Some(SmolStr::from(endpoint_sni(sni_str, cn)?))
            } else {
                None
            }
@@ -111,7 +110,7 @@ impl ComputeUserInfoMaybeEndpoint {
            None
        };

-        let endpoint = match (endpoint_option, endpoint_from_domain) {
+        let project = match (project_option, project_from_domain) {
            // Invariant: if we have both project name variants, they should match.
            (Some(option), Some(domain)) if option != domain => {
                Some(Err(InconsistentProjectNames { domain, option }))
@@ -124,13 +123,13 @@ impl ComputeUserInfoMaybeEndpoint {
        }
        .transpose()?;

-        info!(%user, project = endpoint.as_deref(), "credentials");
+        info!(%user, project = project.as_deref(), "credentials");
        if sni.is_some() {
            info!("Connection with sni");
            NUM_CONNECTION_ACCEPTED_BY_SNI
                .with_label_values(&["sni"])
                .inc();
-        } else if endpoint.is_some() {
+        } else if project.is_some() {
            NUM_CONNECTION_ACCEPTED_BY_SNI
                .with_label_values(&["no_sni"])
                .inc();
@@ -146,57 +145,36 @@ impl ComputeUserInfoMaybeEndpoint {

        Ok(Self {
            user,
-            endpoint_id: endpoint.map(EndpointId::from),
+            project,
            options,
        })
    }
 }

-pub fn check_peer_addr_is_in_list(peer_addr: &IpAddr, ip_list: &[IpPattern]) -> bool {
-    ip_list.is_empty() || ip_list.iter().any(|pattern| check_ip(peer_addr, pattern))
+pub fn check_peer_addr_is_in_list(peer_addr: &IpAddr, ip_list: &Vec<SmolStr>) -> bool {
+    if ip_list.is_empty() {
+        return true;
+    }
+    for ip in ip_list {
+        // We expect that all ip addresses from control plane are correct.
+        // However, if some of them are broken, we still can check the others.
+        match parse_ip_pattern(ip) {
+            Ok(pattern) => {
+                if check_ip(peer_addr, &pattern) {
+                    return true;
+                }
+            }
+            Err(err) => warn!("Cannot parse ip: {}; err: {}", ip, err),
+        }
+    }
+    false
 }

 #[derive(Debug, Clone, Eq, PartialEq)]
-pub enum IpPattern {
+enum IpPattern {
    Subnet(ipnet::IpNet),
    Range(IpAddr, IpAddr),
    Single(IpAddr),
-    None,
-}
-
-impl<'de> serde::de::Deserialize<'de> for IpPattern {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct StrVisitor;
-        impl<'de> serde::de::Visitor<'de> for StrVisitor {
-            type Value = IpPattern;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                write!(formatter, "comma separated list with ip address, ip address range, or ip address subnet mask")
-            }
-
-            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                Ok(parse_ip_pattern(v).unwrap_or_else(|e| {
-                    warn!("Cannot parse ip pattern {v}: {e}");
-                    IpPattern::None
-                }))
-            }
-        }
-        deserializer.deserialize_str(StrVisitor)
-    }
-}
-
-impl FromStr for IpPattern {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        parse_ip_pattern(s)
-    }
 }

 fn parse_ip_pattern(pattern: &str) -> anyhow::Result<IpPattern> {
@@ -218,7 +196,6 @@ fn check_ip(ip: &IpAddr, pattern: &IpPattern) -> bool {
        IpPattern::Subnet(subnet) => subnet.contains(ip),
        IpPattern::Range(start, end) => start <= ip && ip <= end,
        IpPattern::Single(addr) => addr == ip,
-        IpPattern::None => false,
    }
 }

@@ -229,7 +206,6 @@ fn project_name_valid(name: &str) -> bool {
 #[cfg(test)]
 mod tests {
    use super::*;
-    use serde_json::json;
    use ComputeUserInfoParseError::*;

    #[test]
@@ -239,7 +215,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id, None);
+        assert_eq!(user_info.project, None);

        Ok(())
    }
@@ -254,7 +230,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id, None);
+        assert_eq!(user_info.project, None);

        Ok(())
    }
@@ -270,7 +246,7 @@ mod tests {
        let user_info =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("foo"));
+        assert_eq!(user_info.project.as_deref(), Some("foo"));
        assert_eq!(user_info.options.get_cache_key("foo"), "foo");

        Ok(())
@@ -286,7 +262,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
+        assert_eq!(user_info.project.as_deref(), Some("bar"));

        Ok(())
    }
@@ -301,7 +277,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("bar"));
+        assert_eq!(user_info.project.as_deref(), Some("bar"));

        Ok(())
    }
@@ -319,7 +295,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert!(user_info.endpoint_id.is_none());
+        assert!(user_info.project.is_none());

        Ok(())
    }
@@ -334,7 +310,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info = ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, None, None)?;
        assert_eq!(user_info.user, "john_doe");
-        assert!(user_info.endpoint_id.is_none());
+        assert!(user_info.project.is_none());

        Ok(())
    }
@@ -350,7 +326,7 @@ mod tests {
        let user_info =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
        assert_eq!(user_info.user, "john_doe");
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("baz"));
+        assert_eq!(user_info.project.as_deref(), Some("baz"));

        Ok(())
    }
@@ -364,14 +340,14 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
+        assert_eq!(user_info.project.as_deref(), Some("p1"));

        let common_names = Some(["a.com".into(), "b.com".into()].into());
        let sni = Some("p1.b.com");
        let mut ctx = RequestMonitoring::test();
        let user_info =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("p1"));
+        assert_eq!(user_info.project.as_deref(), Some("p1"));

        Ok(())
    }
@@ -428,7 +404,7 @@ mod tests {
        let mut ctx = RequestMonitoring::test();
        let user_info =
            ComputeUserInfoMaybeEndpoint::parse(&mut ctx, &options, sni, common_names.as_ref())?;
-        assert_eq!(user_info.endpoint_id.as_deref(), Some("project"));
+        assert_eq!(user_info.project.as_deref(), Some("project"));
        assert_eq!(
            user_info.options.get_cache_key("project"),
            "project endpoint_type:read_write lsn:0/2"
@@ -439,17 +415,21 @@ mod tests {

    #[test]
    fn test_check_peer_addr_is_in_list() {
-        fn check(v: serde_json::Value) -> bool {
-            let peer_addr = IpAddr::from([127, 0, 0, 1]);
-            let ip_list: Vec<IpPattern> = serde_json::from_value(v).unwrap();
-            check_peer_addr_is_in_list(&peer_addr, &ip_list)
-        }
-
-        assert!(check(json!([])));
-        assert!(check(json!(["127.0.0.1"])));
-        assert!(!check(json!(["8.8.8.8"])));
+        let peer_addr = IpAddr::from([127, 0, 0, 1]);
+        assert!(check_peer_addr_is_in_list(&peer_addr, &vec![]));
+        assert!(check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["127.0.0.1".into()]
+        ));
+        assert!(!check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["8.8.8.8".into()]
+        ));
        // If there is an incorrect address, it will be skipped.
-        assert!(check(json!(["88.8.8", "127.0.0.1"])));
+        assert!(check_peer_addr_is_in_list(
+            &peer_addr,
+            &vec!["88.8.8".into(), "127.0.0.1".into()]
+        ));
    }
    #[test]
    fn test_parse_ip_v4() -> anyhow::Result<()> {
--- a/proxy/src/auth/password_hack.rs
+++ b/proxy/src/auth/password_hack.rs
@@ -4,11 +4,10 @@
 //! UPDATE (Mon Aug  8 13:20:34 UTC 2022): the payload format has been simplified.

 use bstr::ByteSlice;
-
-use crate::EndpointId;
+use smol_str::SmolStr;

 pub struct PasswordHackPayload {
-    pub endpoint: EndpointId,
+    pub endpoint: SmolStr,
    pub password: Vec<u8>,
 }

--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -249,19 +249,12 @@ async fn main() -> anyhow::Result<()> {
    }

    if let auth::BackendType::Console(api, _) = &config.auth_backend {
-        match &**api {
-            proxy::console::provider::ConsoleBackend::Console(api) => {
-                let cache = api.caches.project_info.clone();
-                if let Some(url) = args.redis_notifications {
-                    info!("Starting redis notifications listener ({url})");
-                    maintenance_tasks
-                        .spawn(notifications::task_main(url.to_owned(), cache.clone()));
-                }
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
-            }
-            #[cfg(feature = "testing")]
-            proxy::console::provider::ConsoleBackend::Postgres(_) => {}
+        let cache = api.caches.project_info.clone();
+        if let Some(url) = args.redis_notifications {
+            info!("Starting redis notifications listener ({url})");
+            maintenance_tasks.spawn(notifications::task_main(url.to_owned(), cache.clone()));
        }
+        maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
    }

    let maintenance = loop {
@@ -358,15 +351,13 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            let endpoint = http::Endpoint::new(url, http::new_client(rate_limiter_config));

            let api = console::provider::neon::Api::new(endpoint, caches, locks);
-            let api = console::provider::ConsoleBackend::Console(api);
            auth::BackendType::Console(Cow::Owned(api), ())
        }
        #[cfg(feature = "testing")]
        AuthBackend::Postgres => {
            let url = args.auth_endpoint.parse()?;
            let api = console::provider::mock::Api::new(url);
-            let api = console::provider::ConsoleBackend::Postgres(api);
-            auth::BackendType::Console(Cow::Owned(api), ())
+            auth::BackendType::Postgres(Cow::Owned(api), ())
        }
        AuthBackend::Link => {
            let url = args.uri.parse()?;
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -11,16 +11,13 @@ use smol_str::SmolStr;
 use tokio::time::Instant;
 use tracing::{debug, info};

-use crate::{
-    auth::IpPattern, config::ProjectInfoCacheOptions, console::AuthSecret, EndpointId, ProjectId,
-    RoleName,
-};
+use crate::{config::ProjectInfoCacheOptions, console::AuthSecret};

 use super::{Cache, Cached};

 pub trait ProjectInfoCache {
-    fn invalidate_allowed_ips_for_project(&self, project_id: &ProjectId);
-    fn invalidate_role_secret_for_project(&self, project_id: &ProjectId, role_name: &RoleName);
+    fn invalidate_allowed_ips_for_project(&self, project_id: &SmolStr);
+    fn invalidate_role_secret_for_project(&self, project_id: &SmolStr, role_name: &SmolStr);
    fn enable_ttl(&self);
    fn disable_ttl(&self);
 }
@@ -47,8 +44,8 @@ impl<T> From<T> for Entry<T> {

 #[derive(Default)]
 struct EndpointInfo {
-    secret: std::collections::HashMap<RoleName, Entry<Option<AuthSecret>>>,
-    allowed_ips: Option<Entry<Arc<Vec<IpPattern>>>>,
+    secret: std::collections::HashMap<SmolStr, Entry<AuthSecret>>,
+    allowed_ips: Option<Entry<Arc<Vec<SmolStr>>>>,
 }

 impl EndpointInfo {
@@ -60,10 +57,10 @@ impl EndpointInfo {
    }
    pub fn get_role_secret(
        &self,
-        role_name: &RoleName,
+        role_name: &SmolStr,
        valid_since: Instant,
        ignore_cache_since: Option<Instant>,
-    ) -> Option<(Option<AuthSecret>, bool)> {
+    ) -> Option<(AuthSecret, bool)> {
        if let Some(secret) = self.secret.get(role_name) {
            if valid_since < secret.created_at {
                return Some((
@@ -79,7 +76,7 @@ impl EndpointInfo {
        &self,
        valid_since: Instant,
        ignore_cache_since: Option<Instant>,
-    ) -> Option<(Arc<Vec<IpPattern>>, bool)> {
+    ) -> Option<(Arc<Vec<SmolStr>>, bool)> {
        if let Some(allowed_ips) = &self.allowed_ips {
            if valid_since < allowed_ips.created_at {
                return Some((
@@ -93,7 +90,7 @@ impl EndpointInfo {
    pub fn invalidate_allowed_ips(&mut self) {
        self.allowed_ips = None;
    }
-    pub fn invalidate_role_secret(&mut self, role_name: &RoleName) {
+    pub fn invalidate_role_secret(&mut self, role_name: &SmolStr) {
        self.secret.remove(role_name);
    }
 }
@@ -106,9 +103,9 @@ impl EndpointInfo {
 /// One may ask, why the data is stored per project, when on the user request there is only data about the endpoint available?
 /// On the cplane side updates are done per project (or per branch), so it's easier to invalidate the whole project cache.
 pub struct ProjectInfoCacheImpl {
-    cache: DashMap<EndpointId, EndpointInfo>,
+    cache: DashMap<SmolStr, EndpointInfo>,

-    project2ep: DashMap<ProjectId, HashSet<EndpointId>>,
+    project2ep: DashMap<SmolStr, HashSet<SmolStr>>,
    config: ProjectInfoCacheOptions,

    start_time: Instant,
@@ -116,7 +113,7 @@ pub struct ProjectInfoCacheImpl {
 }

 impl ProjectInfoCache for ProjectInfoCacheImpl {
-    fn invalidate_allowed_ips_for_project(&self, project_id: &ProjectId) {
+    fn invalidate_allowed_ips_for_project(&self, project_id: &SmolStr) {
        info!("invalidating allowed ips for project `{}`", project_id);
        let endpoints = self
            .project2ep
@@ -129,7 +126,7 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
            }
        }
    }
-    fn invalidate_role_secret_for_project(&self, project_id: &ProjectId, role_name: &RoleName) {
+    fn invalidate_role_secret_for_project(&self, project_id: &SmolStr, role_name: &SmolStr) {
        info!(
            "invalidating role secret for project_id `{}` and role_name `{}`",
            project_id, role_name
@@ -170,9 +167,9 @@ impl ProjectInfoCacheImpl {

    pub fn get_role_secret(
        &self,
-        endpoint_id: &EndpointId,
-        role_name: &RoleName,
-    ) -> Option<Cached<&Self, Option<AuthSecret>>> {
+        endpoint_id: &SmolStr,
+        role_name: &SmolStr,
+    ) -> Option<Cached<&Self, AuthSecret>> {
        let (valid_since, ignore_cache_since) = self.get_cache_times();
        let endpoint_info = self.cache.get(endpoint_id)?;
        let (value, ignore_cache) =
@@ -191,8 +188,8 @@ impl ProjectInfoCacheImpl {
    }
    pub fn get_allowed_ips(
        &self,
-        endpoint_id: &EndpointId,
-    ) -> Option<Cached<&Self, Arc<Vec<IpPattern>>>> {
+        endpoint_id: &SmolStr,
+    ) -> Option<Cached<&Self, Arc<Vec<SmolStr>>>> {
        let (valid_since, ignore_cache_since) = self.get_cache_times();
        let endpoint_info = self.cache.get(endpoint_id)?;
        let value = endpoint_info.get_allowed_ips(valid_since, ignore_cache_since);
@@ -208,10 +205,10 @@ impl ProjectInfoCacheImpl {
    }
    pub fn insert_role_secret(
        &self,
-        project_id: &ProjectId,
-        endpoint_id: &EndpointId,
-        role_name: &RoleName,
-        secret: Option<AuthSecret>,
+        project_id: &SmolStr,
+        endpoint_id: &SmolStr,
+        role_name: &SmolStr,
+        secret: AuthSecret,
    ) {
        if self.cache.len() >= self.config.size {
            // If there are too many entries, wait until the next gc cycle.
@@ -225,9 +222,9 @@ impl ProjectInfoCacheImpl {
    }
    pub fn insert_allowed_ips(
        &self,
-        project_id: &ProjectId,
-        endpoint_id: &EndpointId,
-        allowed_ips: Arc<Vec<IpPattern>>,
+        project_id: &SmolStr,
+        endpoint_id: &SmolStr,
+        allowed_ips: Arc<Vec<SmolStr>>,
    ) {
        if self.cache.len() >= self.config.size {
            // If there are too many entries, wait until the next gc cycle.
@@ -239,7 +236,7 @@ impl ProjectInfoCacheImpl {
            .or_default()
            .allowed_ips = Some(allowed_ips.into());
    }
-    fn inser_project2endpoint(&self, project_id: &ProjectId, endpoint_id: &EndpointId) {
+    fn inser_project2endpoint(&self, project_id: &SmolStr, endpoint_id: &SmolStr) {
        if let Some(mut endpoints) = self.project2ep.get_mut(project_id) {
            endpoints.insert(endpoint_id.clone());
        } else {
@@ -269,7 +266,7 @@ impl ProjectInfoCacheImpl {
            tokio::time::interval(self.config.gc_interval / (self.cache.shards().len()) as u32);
        loop {
            interval.tick().await;
-            if self.cache.len() < self.config.size {
+            if self.cache.len() <= self.config.size {
                // If there are not too many entries, wait until the next gc cycle.
                continue;
            }
@@ -300,18 +297,18 @@ impl ProjectInfoCacheImpl {
 /// This is used to invalidate cache entries.
 pub struct CachedLookupInfo {
    /// Search by this key.
-    endpoint_id: EndpointId,
+    endpoint_id: SmolStr,
    lookup_type: LookupType,
 }

 impl CachedLookupInfo {
-    pub(self) fn new_role_secret(endpoint_id: EndpointId, role_name: RoleName) -> Self {
+    pub(self) fn new_role_secret(endpoint_id: SmolStr, role_name: SmolStr) -> Self {
        Self {
            endpoint_id,
            lookup_type: LookupType::RoleSecret(role_name),
        }
    }
-    pub(self) fn new_allowed_ips(endpoint_id: EndpointId) -> Self {
+    pub(self) fn new_allowed_ips(endpoint_id: SmolStr) -> Self {
        Self {
            endpoint_id,
            lookup_type: LookupType::AllowedIps,
@@ -320,7 +317,7 @@ impl CachedLookupInfo {
 }

 enum LookupType {
-    RoleSecret(RoleName),
+    RoleSecret(SmolStr),
    AllowedIps,
 }

@@ -351,6 +348,7 @@ impl Cache for ProjectInfoCacheImpl {
 mod tests {
    use super::*;
    use crate::{console::AuthSecret, scram::ServerSecret};
+    use smol_str::SmolStr;
    use std::{sync::Arc, time::Duration};

    #[tokio::test]
@@ -364,17 +362,11 @@ mod tests {
        });
        let project_id = "project".into();
        let endpoint_id = "endpoint".into();
-        let user1: RoleName = "user1".into();
-        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user1.as_str(),
-            [1; 32],
-        )));
-        let secret2 = None;
-        let allowed_ips = Arc::new(vec![
-            "127.0.0.1".parse().unwrap(),
-            "127.0.0.2".parse().unwrap(),
-        ]);
+        let user1: SmolStr = "user1".into();
+        let user2: SmolStr = "user2".into();
+        let secret1 = AuthSecret::Scram(ServerSecret::mock(user1.as_str(), [1; 32]));
+        let secret2 = AuthSecret::Scram(ServerSecret::mock(user2.as_str(), [2; 32]));
+        let allowed_ips = Arc::new(vec!["allowed_ip1".into(), "allowed_ip2".into()]);
        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
        cache.insert_role_secret(&project_id, &endpoint_id, &user2, secret2.clone());
        cache.insert_allowed_ips(&project_id, &endpoint_id, allowed_ips.clone());
@@ -387,11 +379,8 @@ mod tests {
        assert_eq!(cached.value, secret2);

        // Shouldn't add more than 2 roles.
-        let user3: RoleName = "user3".into();
-        let secret3 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user3.as_str(),
-            [3; 32],
-        )));
+        let user3: SmolStr = "user3".into();
+        let secret3 = AuthSecret::Scram(ServerSecret::mock(user3.as_str(), [3; 32]));
        cache.insert_role_secret(&project_id, &endpoint_id, &user3, secret3.clone());
        assert!(cache.get_role_secret(&endpoint_id, &user3).is_none());

@@ -422,20 +411,11 @@ mod tests {

        let project_id = "project".into();
        let endpoint_id = "endpoint".into();
-        let user1: RoleName = "user1".into();
-        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user1.as_str(),
-            [1; 32],
-        )));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user2.as_str(),
-            [2; 32],
-        )));
-        let allowed_ips = Arc::new(vec![
-            "127.0.0.1".parse().unwrap(),
-            "127.0.0.2".parse().unwrap(),
-        ]);
+        let user1: SmolStr = "user1".into();
+        let user2: SmolStr = "user2".into();
+        let secret1 = AuthSecret::Scram(ServerSecret::mock(user1.as_str(), [1; 32]));
+        let secret2 = AuthSecret::Scram(ServerSecret::mock(user2.as_str(), [2; 32]));
+        let allowed_ips = Arc::new(vec!["allowed_ip1".into(), "allowed_ip2".into()]);
        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
        cache.insert_role_secret(&project_id, &endpoint_id, &user2, secret2.clone());
        cache.insert_allowed_ips(&project_id, &endpoint_id, allowed_ips.clone());
@@ -477,20 +457,11 @@ mod tests {

        let project_id = "project".into();
        let endpoint_id = "endpoint".into();
-        let user1: RoleName = "user1".into();
-        let user2: RoleName = "user2".into();
-        let secret1 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user1.as_str(),
-            [1; 32],
-        )));
-        let secret2 = Some(AuthSecret::Scram(ServerSecret::mock(
-            user2.as_str(),
-            [2; 32],
-        )));
-        let allowed_ips = Arc::new(vec![
-            "127.0.0.1".parse().unwrap(),
-            "127.0.0.2".parse().unwrap(),
-        ]);
+        let user1: SmolStr = "user1".into();
+        let user2: SmolStr = "user2".into();
+        let secret1 = AuthSecret::Scram(ServerSecret::mock(user1.as_str(), [1; 32]));
+        let secret2 = AuthSecret::Scram(ServerSecret::mock(user2.as_str(), [2; 32]));
+        let allowed_ips = Arc::new(vec!["allowed_ip1".into(), "allowed_ip2".into()]);
        cache.insert_role_secret(&project_id, &endpoint_id, &user1, secret1.clone());
        cache.clone().disable_ttl();
        tokio::time::advance(Duration::from_millis(100)).await;
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -1,10 +1,7 @@
 use serde::Deserialize;
+use smol_str::SmolStr;
 use std::fmt;

-use crate::auth::IpPattern;
-
-use crate::{BranchId, EndpointId, ProjectId};
-
 /// Generic error response with human-readable description.
 /// Note that we can't always present it to user as is.
 #[derive(Debug, Deserialize)]
@@ -17,8 +14,8 @@ pub struct ConsoleError {
 #[derive(Deserialize)]
 pub struct GetRoleSecret {
    pub role_secret: Box<str>,
-    pub allowed_ips: Option<Vec<IpPattern>>,
-    pub project_id: Option<ProjectId>,
+    pub allowed_ips: Option<Vec<Box<str>>>,
+    pub project_id: Option<Box<str>>,
 }

 // Manually implement debug to omit sensitive info.
@@ -95,9 +92,9 @@ impl fmt::Debug for DatabaseInfo {
 /// Also known as `ProxyMetricsAuxInfo` in the console.
 #[derive(Debug, Deserialize, Clone, Default)]
 pub struct MetricsAuxInfo {
-    pub endpoint_id: EndpointId,
-    pub project_id: ProjectId,
-    pub branch_id: BranchId,
+    pub endpoint_id: SmolStr,
+    pub project_id: SmolStr,
+    pub branch_id: SmolStr,
 }

 impl MetricsAuxInfo {
--- a/proxy/src/console/mgmt.rs
+++ b/proxy/src/console/mgmt.rs
@@ -13,10 +13,16 @@ use tracing::{error, info, info_span, Instrument};
 static CPLANE_WAITERS: Lazy<Waiters<ComputeReady>> = Lazy::new(Default::default);

 /// Give caller an opportunity to wait for the cloud's reply.
-pub fn get_waiter(
+pub async fn with_waiter<R, T, E>(
    psql_session_id: impl Into<String>,
-) -> Result<Waiter<'static, ComputeReady>, waiters::RegisterError> {
-    CPLANE_WAITERS.register(psql_session_id.into())
+    action: impl FnOnce(Waiter<'static, ComputeReady>) -> R,
+) -> Result<T, E>
+where
+    R: std::future::Future<Output = Result<T, E>>,
+    E: From<waiters::RegisterError>,
+{
+    let waiter = CPLANE_WAITERS.register(psql_session_id.into())?;
+    action(waiter).await
 }

 pub fn notify(psql_session_id: &str, msg: ComputeReady) -> Result<(), waiters::NotifyError> {
@@ -71,7 +77,7 @@ async fn handle_connection(socket: TcpStream) -> Result<(), QueryError> {
 }

 /// A message received by `mgmt` when a compute node is ready.
-pub type ComputeReady = DatabaseInfo;
+pub type ComputeReady = Result<DatabaseInfo, String>;

 // TODO: replace with an http-based protocol.
 struct MgmtHandler;
@@ -96,7 +102,7 @@ fn try_process_query(pgb: &mut PostgresBackendTCP, query: &str) -> Result<(), Qu
    let _enter = span.enter();
    info!("got response: {:?}", resp.result);

-    match notify(resp.session_id, resp.result) {
+    match notify(resp.session_id, Ok(resp.result)) {
        Ok(()) => {
            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::DataRow(&[Some(b"ok")]))?
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -4,15 +4,16 @@ pub mod neon;

 use super::messages::MetricsAuxInfo;
 use crate::{
-    auth::{backend::ComputeUserInfo, IpPattern},
+    auth::backend::ComputeUserInfo,
    cache::{project_info::ProjectInfoCacheImpl, Cached, TimedLru},
    compute,
    config::{CacheOptions, ProjectInfoCacheOptions},
    context::RequestMonitoring,
-    scram, EndpointCacheKey, ProjectId,
+    scram,
 };
 use async_trait::async_trait;
 use dashmap::DashMap;
+use smol_str::SmolStr;
 use std::{sync::Arc, time::Duration};
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio::time::Instant;
@@ -211,9 +212,9 @@ pub enum AuthSecret {
 pub struct AuthInfo {
    pub secret: Option<AuthSecret>,
    /// List of IP addresses allowed for the autorization.
-    pub allowed_ips: Vec<IpPattern>,
+    pub allowed_ips: Vec<SmolStr>,
    /// Project ID. This is used for cache invalidation.
-    pub project_id: Option<ProjectId>,
+    pub project_id: Option<SmolStr>,
 }

 /// Info for establishing a connection to a compute node.
@@ -232,10 +233,10 @@ pub struct NodeInfo {
    pub allow_self_signed_compute: bool,
 }

-pub type NodeInfoCache = TimedLru<EndpointCacheKey, NodeInfo>;
+pub type NodeInfoCache = TimedLru<SmolStr, NodeInfo>;
 pub type CachedNodeInfo = Cached<&'static NodeInfoCache>;
-pub type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, Option<AuthSecret>>;
-pub type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<IpPattern>>>;
+pub type CachedRoleSecret = Cached<&'static ProjectInfoCacheImpl, AuthSecret>;
+pub type CachedAllowedIps = Cached<&'static ProjectInfoCacheImpl, Arc<Vec<SmolStr>>>;

 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
@@ -247,75 +248,23 @@ pub trait Api {
    async fn get_role_secret(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
+        creds: &ComputeUserInfo,
+    ) -> Result<Option<CachedRoleSecret>, errors::GetAuthInfoError>;

    async fn get_allowed_ips(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
+        creds: &ComputeUserInfo,
    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError>;

    /// Wake up the compute node and return the corresponding connection info.
    async fn wake_compute(
        &self,
        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
+        creds: &ComputeUserInfo,
    ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }

-#[derive(Clone)]
-pub enum ConsoleBackend {
-    /// Current Cloud API (V2).
-    Console(neon::Api),
-    /// Local mock of Cloud API (V2).
-    #[cfg(feature = "testing")]
-    Postgres(mock::Api),
-}
-
-#[async_trait]
-impl Api for ConsoleBackend {
-    async fn get_role_secret(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError> {
-        use ConsoleBackend::*;
-        match self {
-            Console(api) => api.get_role_secret(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.get_role_secret(ctx, user_info).await,
-        }
-    }
-
-    async fn get_allowed_ips(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedAllowedIps, errors::GetAuthInfoError> {
-        use ConsoleBackend::*;
-        match self {
-            Console(api) => api.get_allowed_ips(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.get_allowed_ips(ctx, user_info).await,
-        }
-    }
-
-    async fn wake_compute(
-        &self,
-        ctx: &mut RequestMonitoring,
-        user_info: &ComputeUserInfo,
-    ) -> Result<CachedNodeInfo, errors::WakeComputeError> {
-        use ConsoleBackend::*;
-
-        match self {
-            Console(api) => api.wake_compute(ctx, user_info).await,
-            #[cfg(feature = "testing")]
-            Postgres(api) => api.wake_compute(ctx, user_info).await,
-        }
-    }
-}
-
 /// Various caches for [`console`](super).
 pub struct ApiCaches {
    /// Cache for the `wake_compute` API method.
@@ -344,7 +293,7 @@ impl ApiCaches {
 /// Various caches for [`console`](super).
 pub struct ApiLocks {
    name: &'static str,
-    node_locks: DashMap<EndpointCacheKey, Arc<Semaphore>>,
+    node_locks: DashMap<SmolStr, Arc<Semaphore>>,
    permits: usize,
    timeout: Duration,
    registered: prometheus::IntCounter,
@@ -412,7 +361,7 @@ impl ApiLocks {

    pub async fn get_wake_compute_permit(
        &self,
-        key: &EndpointCacheKey,
+        key: &SmolStr,
    ) -> Result<WakeComputePermit, errors::WakeComputeError> {
        if self.permits == 0 {
            return Ok(WakeComputePermit { permit: None });
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -4,13 +4,14 @@ use super::{
    errors::{ApiError, GetAuthInfoError, WakeComputeError},
    AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
 };
+use crate::cache::Cached;
 use crate::console::provider::{CachedAllowedIps, CachedRoleSecret};
 use crate::context::RequestMonitoring;
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
-use crate::{auth::IpPattern, cache::Cached};
 use async_trait::async_trait;
 use futures::TryFutureExt;
-use std::{str::FromStr, sync::Arc};
+use smol_str::SmolStr;
+use std::sync::Arc;
 use thiserror::Error;
 use tokio_postgres::{config::SslMode, Client};
 use tracing::{error, info, info_span, warn, Instrument};
@@ -87,9 +88,7 @@ impl Api {
            {
                Some(s) => {
                    info!("got allowed_ips: {s}");
-                    s.split(',')
-                        .map(|s| IpPattern::from_str(s).unwrap())
-                        .collect()
+                    s.split(',').map(String::from).collect()
                }
                None => vec![],
            };
@@ -101,7 +100,7 @@ impl Api {
        .await?;
        Ok(AuthInfo {
            secret,
-            allowed_ips,
+            allowed_ips: allowed_ips.iter().map(SmolStr::from).collect(),
            project_id: None,
        })
    }
@@ -151,10 +150,12 @@ impl super::Api for Api {
        &self,
        _ctx: &mut RequestMonitoring,
        user_info: &ComputeUserInfo,
-    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
-        Ok(CachedRoleSecret::new_uncached(
-            self.do_get_auth_info(user_info).await?.secret,
-        ))
+    ) -> Result<Option<CachedRoleSecret>, GetAuthInfoError> {
+        Ok(self
+            .do_get_auth_info(user_info)
+            .await?
+            .secret
+            .map(CachedRoleSecret::new_uncached))
    }

    async fn get_allowed_ips(
--- a/Show More
+++ b/Show More