Add test for prewarm under workload

Make ruffhappy
Eliminate stale reads from LFC in case of prewarm conflict
2026-02-13 15:40:37 +00:00 · 2024-12-23 11:36:47 +02:00 · 2024-12-15 08:02:35 +02:00 · 2024-12-14 21:58:26 +02:00 · 2024-12-14 21:11:20 +02:00 · 2024-12-14 21:09:58 +02:00
215 changed files with 2416 additions and 3799 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -3,16 +3,6 @@
 # by the RUSTDOCFLAGS env var in CI.
 rustdocflags = ["-Arustdoc::private_intra_doc_links"]

-# Enable frame pointers. This may have a minor performance overhead, but makes it easier and more
-# efficient to obtain stack traces (and thus CPU/heap profiles). It may also avoid seg faults that
-# we've seen with libunwind-based profiling. See also:
-#
-# * <https://www.brendangregg.com/blog/2024-03-17/the-return-of-the-frame-pointers.html>
-# * <https://github.com/rust-lang/rust/pull/122646>
-#
-# NB: the RUSTFLAGS envvar will replace this. Make sure to update e.g. Dockerfile as well.
-rustflags = ["-Cforce-frame-pointers=yes"]
-
 [alias]
 build_testing = ["build", "--features", "testing"]
 neon = ["run", "--bin", "neon_local"]
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -33,7 +33,7 @@ jobs:
          # SC2086 - Double quote to prevent globbing and word splitting. - https://www.shellcheck.net/wiki/SC2086
          SHELLCHECK_OPTS: --exclude=SC2046,SC2086
        with:
-          fail_level: error
+          fail_on_error: true
          filter_mode: nofilter
          level: error

--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -308,7 +308,6 @@ jobs:
          "image": [ "'"$image_default"'" ],
          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new-many-tables","db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned-bookworm" },
@@ -411,7 +410,7 @@ jobs:
        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

    - name: Create Neon Project
-      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-new-many-tables", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
+      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
@@ -430,7 +429,7 @@ jobs:
          neonvm-captest-sharding-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
            ;;
-          neonvm-captest-new | neonvm-captest-new-many-tables | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
+          neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -447,26 +446,6 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

-    # we want to compare Neon project OLTP throughput and latency at scale factor 10 GB 
-    # without (neonvm-captest-new)
-    # and with (neonvm-captest-new-many-tables) many relations in the database
-    - name: Create many relations before the run
-      if: contains(fromJson('["neonvm-captest-new-many-tables"]'), matrix.platform)
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_perf_many_relations
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
-      env:
-        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        TEST_NUM_RELATIONS: 10000
-
    - name: Benchmark init
      uses: ./.github/actions/run-python-test-set
      with:
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -212,7 +212,7 @@ jobs:
          fi
          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
      - name: Run cargo clippy (debug)
-        run: cargo hack --features default --ignore-unknown-features --feature-powerset clippy $CLIPPY_COMMON_ARGS
+        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS

      - name: Check documentation generation
        run: cargo doc --workspace --no-deps --document-private-items
@@ -538,7 +538,7 @@ jobs:

  trigger-e2e-tests:
    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, promote-images-dev, tag ]
+    needs: [ check-permissions, promote-images, tag ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
    secrets: inherit

@@ -930,8 +930,8 @@ jobs:
          docker compose -f ./docker-compose/docker-compose.yml logs || 0
          docker compose -f ./docker-compose/docker-compose.yml down

-  promote-images-dev:
-    needs: [ check-permissions, tag, vm-compute-node-image ]
+  promote-images:
+    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
    runs-on: ubuntu-22.04

    permissions:
@@ -965,25 +965,6 @@ jobs:
                                               neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }}
          done

-  promote-images-prod:
-    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
-    runs-on: ubuntu-22.04
-    if: github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-
-    permissions:
-      id-token: write # aws-actions/configure-aws-credentials
-      statuses: write
-      contents: read
-
-    env:
-      VERSIONS: v14 v15 v16 v17
-
-    steps:
-      - uses: docker/login-action@v3
-        with:
-          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-
      - name: Add latest tag to images
        if: github.ref_name == 'main'
        run: |
@@ -1029,7 +1010,7 @@ jobs:

  push-to-acr-dev:
    if: github.ref_name == 'main'
-    needs: [ tag, promote-images-dev ]
+    needs: [ tag, promote-images ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
@@ -1041,7 +1022,7 @@ jobs:

  push-to-acr-prod:
    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ tag, promote-images-prod ]
+    needs: [ tag, promote-images ]
    uses: ./.github/workflows/_push-to-acr.yml
    with:
      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
@@ -1131,7 +1112,7 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, promote-images-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
    permissions:
@@ -1352,7 +1333,7 @@ jobs:
          done

  pin-build-tools-image:
-    needs: [ build-build-tools-image, promote-images-prod, build-and-test-locally ]
+    needs: [ build-build-tools-image, promote-images, build-and-test-locally ]
    if: github.ref_name == 'main'
    uses: ./.github/workflows/pin-build-tools-image.yml
    with:
@@ -1375,7 +1356,7 @@ jobs:
      - build-and-test-locally
      - check-codestyle-python
      - check-codestyle-rust
-      - promote-images-dev
+      - promote-images
      - test-images
      - trigger-custom-extensions-build-and-wait
    runs-on: ubuntu-22.04
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -21,8 +21,6 @@ concurrency:

 permissions:
  id-token: write # aws-actions/configure-aws-credentials
-  statuses: write
-  contents: write

 jobs:
  regress:
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -68,7 +68,7 @@ jobs:
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      TAG: ${{ needs.tag.outputs.build-tag }}
    steps:
-      - name: Wait for `promote-images-dev` job to finish
+      - name: Wait for `promote-images` job to finish
        # It's important to have a timeout here, the script in the step can run infinitely
        timeout-minutes: 60
        run: |
@@ -79,17 +79,17 @@ jobs:
          # For PRs we use the run id as the tag
          BUILD_AND_TEST_RUN_ID=${TAG}
          while true; do
-            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images-dev") | .conclusion')
+            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
            case "$conclusion" in
              success)
                break
                ;;
              failure | cancelled | skipped)
-                echo "The 'promote-images-dev' job didn't succeed: '${conclusion}'. Exiting..."
+                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
                exit 1
                ;;
              *)
-                echo "The 'promote-images-dev' hasn't succeed yet. Waiting..."
+                echo "The 'promote-images' hasn't succeed yet. Waiting..."
                sleep 60
                ;;
            esac
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -10,9 +10,9 @@ checksum = "8b5ace29ee3216de37c0546865ad08edef58b0f9e76838ed8959a84a990e58c5"

 [[package]]
 name = "addr2line"
-version = "0.24.2"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1"
+checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
 dependencies = [
 "gimli",
 ]
@@ -23,12 +23,6 @@ version = "1.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"

-[[package]]
-name = "adler2"
-version = "2.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
-
 [[package]]
 name = "ahash"
 version = "0.8.11"
@@ -877,17 +871,17 @@ dependencies = [

 [[package]]
 name = "backtrace"
-version = "0.3.74"
+version = "0.3.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837"
 dependencies = [
 "addr2line",
+ "cc",
 "cfg-if",
 "libc",
- "miniz_oxide 0.8.0",
+ "miniz_oxide",
 "object",
 "rustc-demangle",
- "windows-targets 0.52.6",
 ]

 [[package]]
@@ -1133,7 +1127,7 @@ dependencies = [
 "num-traits",
 "serde",
 "wasm-bindgen",
- "windows-targets 0.52.6",
+ "windows-targets 0.52.4",
 ]

 [[package]]
@@ -1274,7 +1268,6 @@ dependencies = [
 "chrono",
 "clap",
 "compute_api",
- "fail",
 "flate2",
 "futures",
 "hyper 0.14.30",
@@ -1733,9 +1726,9 @@ checksum = "ab03c107fafeb3ee9f5925686dbb7a73bc76e3932abb0d2b365cb64b169cf04c"

 [[package]]
 name = "diesel"
-version = "2.2.6"
+version = "2.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
+checksum = "65e13bab2796f412722112327f3e575601a3e9cdcbe426f0d30dbf43f3f5dc71"
 dependencies = [
 "bitflags 2.4.1",
 "byteorder",
@@ -2114,7 +2107,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
 dependencies = [
 "crc32fast",
- "miniz_oxide 0.7.1",
+ "miniz_oxide",
 ]

 [[package]]
@@ -2315,9 +2308,9 @@ dependencies = [

 [[package]]
 name = "gimli"
-version = "0.31.1"
+version = "0.28.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
+checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"

 [[package]]
 name = "git-version"
@@ -3411,15 +3404,6 @@ dependencies = [
 "adler",
 ]

-[[package]]
-name = "miniz_oxide"
-version = "0.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1"
-dependencies = [
- "adler2",
-]
-
 [[package]]
 name = "mio"
 version = "0.8.11"
@@ -3654,9 +3638,9 @@ dependencies = [

 [[package]]
 name = "object"
-version = "0.36.5"
+version = "0.32.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e"
+checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
 dependencies = [
 "memchr",
 ]
@@ -4417,13 +4401,11 @@ dependencies = [
 "bindgen",
 "bytes",
 "crc32c",
- "criterion",
 "env_logger",
 "log",
 "memoffset 0.9.0",
 "once_cell",
 "postgres",
- "pprof",
 "regex",
 "serde",
 "thiserror",
@@ -4494,9 +4476,9 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"

 [[package]]
 name = "pq-sys"
-version = "0.6.3"
+version = "0.4.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6cc05d7ea95200187117196eee9edd0644424911821aeb28a18ce60ea0b8793"
+checksum = "31c0052426df997c0cbd30789eb44ca097e3541717a7b8fa36b1c464ee7edebd"
 dependencies = [
 "vcpkg",
 ]
@@ -5080,7 +5062,6 @@ dependencies = [
 "once_cell",
 "pin-project-lite",
 "rand 0.8.5",
- "reqwest",
 "scopeguard",
 "serde",
 "serde_json",
@@ -5339,9 +5320,9 @@ dependencies = [

 [[package]]
 name = "rustc-demangle"
-version = "0.1.24"
+version = "0.1.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"

 [[package]]
 name = "rustc-hash"
@@ -5554,7 +5535,6 @@ dependencies = [
 "remote_storage",
 "reqwest",
 "safekeeper_api",
- "safekeeper_client",
 "scopeguard",
 "sd-notify",
 "serde",
@@ -5592,18 +5572,6 @@ dependencies = [
 "utils",
 ]

-[[package]]
-name = "safekeeper_client"
-version = "0.1.0"
-dependencies = [
- "reqwest",
- "safekeeper_api",
- "serde",
- "thiserror",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "same-file"
 version = "1.0.6"
@@ -7235,7 +7203,6 @@ dependencies = [
 "anyhow",
 "arc-swap",
 "async-compression",
- "backtrace",
 "bincode",
 "byteorder",
 "bytes",
@@ -7246,14 +7213,12 @@ dependencies = [
 "criterion",
 "diatomic-waker",
 "fail",
- "flate2",
 "futures",
 "git-version",
 "hex",
 "hex-literal",
 "humantime",
 "hyper 0.14.30",
- "itertools 0.10.5",
 "jemalloc_pprof",
 "jsonwebtoken",
 "metrics",
@@ -7610,7 +7575,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
 dependencies = [
 "windows-core",
- "windows-targets 0.52.6",
+ "windows-targets 0.52.4",
 ]

 [[package]]
@@ -7619,7 +7584,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-targets 0.52.4",
 ]

 [[package]]
@@ -7637,7 +7602,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-targets 0.52.4",
 ]

 [[package]]
@@ -7657,18 +7622,17 @@ dependencies = [

 [[package]]
 name = "windows-targets"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.6",
- "windows_aarch64_msvc 0.52.6",
- "windows_i686_gnu 0.52.6",
- "windows_i686_gnullvm",
- "windows_i686_msvc 0.52.6",
- "windows_x86_64_gnu 0.52.6",
- "windows_x86_64_gnullvm 0.52.6",
- "windows_x86_64_msvc 0.52.6",
+ "windows_aarch64_gnullvm 0.52.4",
+ "windows_aarch64_msvc 0.52.4",
+ "windows_i686_gnu 0.52.4",
+ "windows_i686_msvc 0.52.4",
+ "windows_x86_64_gnu 0.52.4",
+ "windows_x86_64_gnullvm 0.52.4",
+ "windows_x86_64_msvc 0.52.4",
 ]

 [[package]]
@@ -7679,9 +7643,9 @@ checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc"

 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"

 [[package]]
 name = "windows_aarch64_msvc"
@@ -7691,9 +7655,9 @@ checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3"

 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"

 [[package]]
 name = "windows_i686_gnu"
@@ -7703,15 +7667,9 @@ checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241"

 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.52.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"

 [[package]]
 name = "windows_i686_msvc"
@@ -7721,9 +7679,9 @@ checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00"

 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"

 [[package]]
 name = "windows_x86_64_gnu"
@@ -7733,9 +7691,9 @@ checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1"

 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"

 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -7745,9 +7703,9 @@ checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953"

 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"

 [[package]]
 name = "windows_x86_64_msvc"
@@ -7757,9 +7715,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"

 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.6"
+version = "0.52.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"

 [[package]]
 name = "winnow"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,7 +11,6 @@ members = [
    "pageserver/pagebench",
    "proxy",
    "safekeeper",
-    "safekeeper/client",
    "storage_broker",
    "storage_controller",
    "storage_controller/client",
@@ -52,7 +51,6 @@ anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
 atomic-take = "1.1.0"
-backtrace = "0.3.74"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
@@ -135,7 +133,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] }
 parquet_derive = "53"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] }
+pprof = { version = "0.14", features = ["criterion", "flamegraph", "protobuf", "protobuf-codec"] }
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.13"
@@ -235,7 +233,6 @@ postgres_initdb = { path = "./libs/postgres_initdb" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
-safekeeper_client = { path = "./safekeeper/client" }
 desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 storage_controller_client = { path = "./storage_controller/client" }
@@ -266,8 +263,6 @@ tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", br
 [profile.release]
 # This is useful for profiling and, to some extent, debug.
 # Besides, debug info should not affect the performance.
-#
-# NB: we also enable frame pointers for improved profiling, see .cargo/config.toml.
 debug = true

 # disable debug symbols for all packages except this one to decrease binaries size
--- a/4
+++ b/4
@@ -45,7 +45,7 @@ COPY --chown=nonroot . .

 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -69,8 +69,6 @@ RUN set -e \
        libreadline-dev \
        libseccomp-dev \
        ca-certificates \
-	# System postgres for use with client libraries (e.g. in storage controller)
-        postgresql-15 \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
    && useradd -d /data neon \
    && chown -R neon:neon /data
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -35,12 +35,10 @@ RUN case $DEBIAN_VERSION in \
      ;; \
    esac && \
    apt update &&  \
-    apt install --no-install-recommends --no-install-suggests -y \
-    ninja-build git autoconf automake libtool build-essential bison flex libreadline-dev \
+    apt install --no-install-recommends -y git autoconf automake libtool build-essential bison flex libreadline-dev \
    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget ca-certificates pkg-config libssl-dev \
    libicu-dev libxslt1-dev liblz4-dev libzstd-dev zstd \
-    $VERSION_INSTALLS \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+    $VERSION_INSTALLS

 #########################################################################################
 #
@@ -115,12 +113,10 @@ ARG DEBIAN_VERSION
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
-    gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
+    apt install --no-install-recommends -y gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
    libboost-system-dev libboost-iostreams-dev libboost-program-options-dev libboost-timer-dev \
    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
-    protobuf-c-compiler xsltproc \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+    protobuf-c-compiler xsltproc


 # Postgis 3.5.0 requires SFCGAL 1.4+
@@ -147,9 +143,9 @@ RUN case "${DEBIAN_VERSION}" in \
    wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \
    echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
-    cmake -DCMAKE_BUILD_TYPE=Release -GNinja . && ninja -j $(getconf _NPROCESSORS_ONLN) && \
-    DESTDIR=/sfcgal ninja install -j $(getconf _NPROCESSORS_ONLN) && \
-    ninja clean && cp -R /sfcgal/* /
+    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
+    make clean && cp -R /sfcgal/* /

 ENV PATH="/usr/local/pgsql/bin:$PATH"

@@ -217,9 +213,9 @@ RUN case "${PG_VERSION}" in \
    echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
-    cmake -GNinja -DCMAKE_BUILD_TYPE=Release .. && \
-    ninja -j $(getconf _NPROCESSORS_ONLN) && \
-    ninja -j $(getconf _NPROCESSORS_ONLN) install && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
    cp /usr/local/pgsql/share/extension/pgrouting.control /extensions/postgis && \
@@ -239,9 +235,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY compute/patches/plv8-3.1.10.patch /plv8-3.1.10.patch

 RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
-    ninja-build python3-dev libncurses5 binutils clang \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+    apt install --no-install-recommends -y ninja-build python3-dev libncurses5 binutils clang

 # plv8 3.2.3 supports v17
 # last release v3.2.3 - Sep 7, 2024
@@ -307,10 +301,9 @@ RUN mkdir -p /h3/usr/ && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
-    cmake .. -GNinja -DBUILD_BENCHMARKS=0 -DCMAKE_BUILD_TYPE=Release \
-        -DBUILD_FUZZERS=0 -DBUILD_FILTERS=0 -DBUILD_GENERATORS=0 -DBUILD_TESTING=0 \
-    && ninja -j $(getconf _NPROCESSORS_ONLN) && \
-    DESTDIR=/h3 ninja install && \
+    cmake .. -DCMAKE_BUILD_TYPE=Release && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    DESTDIR=/h3 make install && \
    cp -R /h3/usr / && \
    rm -rf build

@@ -657,15 +650,14 @@ FROM build-deps AS rdkit-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y \
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y \
        libboost-iostreams1.74-dev \
        libboost-regex1.74-dev \
        libboost-serialization1.74-dev \
        libboost-system1.74-dev \
        libeigen3-dev \
-        libboost-all-dev \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+        libboost-all-dev

 # rdkit Release_2024_09_1 supports v17
 # last release Release_2024_09_1 - Sep 27, 2024
@@ -701,8 +693,6 @@ RUN case "${PG_VERSION}" in \
        -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \
        -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \
        -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \
-        -D RDK_TEST_MULTITHREADED=OFF \
-        -D RDK_BUILD_CPP_TESTS=OFF \
        -D RDK_USE_URF=OFF \
        -D RDK_BUILD_PGSQL=ON \
        -D RDK_PGSQL_STATIC=ON \
@@ -714,10 +704,9 @@ RUN case "${PG_VERSION}" in \
        -D RDK_INSTALL_COMIC_FONTS=OFF \
        -D RDK_BUILD_FREETYPE_SUPPORT=OFF \
        -D CMAKE_BUILD_TYPE=Release \
-        -GNinja \
        . && \
-    ninja -j $(getconf _NPROCESSORS_ONLN) && \
-    ninja -j $(getconf _NPROCESSORS_ONLN) install && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control

 #########################################################################################
@@ -860,9 +849,8 @@ FROM build-deps AS rust-extensions-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y curl libclang-dev && \
    useradd -ms /bin/bash nonroot -b /home

 ENV HOME=/home/nonroot
@@ -897,9 +885,8 @@ FROM build-deps AS rust-extensions-build-pgrx12
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN apt update && \
-    apt install --no-install-recommends --no-install-suggests -y curl libclang-dev && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y curl libclang-dev && \
    useradd -ms /bin/bash nonroot -b /home

 ENV HOME=/home/nonroot
@@ -927,22 +914,18 @@ FROM rust-extensions-build-pgrx12 AS pg-onnx-build

 # cmake 3.26 or higher is required, so installing it using pip (bullseye-backports has cmake 3.25).
 # Install it using virtual environment, because Python 3.11 (the default version on Debian 12 (Bookworm)) complains otherwise
-RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
-    python3 python3-pip python3-venv && \
-    apt clean && rm -rf /var/lib/apt/lists/* && \
+RUN apt-get update && apt-get install -y python3 python3-pip python3-venv && \
    python3 -m venv venv && \
    . venv/bin/activate && \
    python3 -m pip install cmake==3.30.5 && \
    wget https://github.com/microsoft/onnxruntime/archive/refs/tags/v1.18.1.tar.gz -O onnxruntime.tar.gz && \
    mkdir onnxruntime-src && cd onnxruntime-src && tar xzf ../onnxruntime.tar.gz --strip-components=1 -C . && \
-    ./build.sh --config Release --parallel --cmake_generator Ninja \
-    --skip_submodule_sync --skip_tests --allow_running_as_root
+    ./build.sh --config Release --parallel --skip_submodule_sync --skip_tests --allow_running_as_root


 FROM pg-onnx-build AS pgrag-pg-build

-RUN apt update && apt install --no-install-recommends --no-install-suggests -y protobuf-compiler \
-    && apt clean && rm -rf /var/lib/apt/lists/* && \
+RUN apt-get install -y protobuf-compiler && \
    wget https://github.com/neondatabase-labs/pgrag/archive/refs/tags/v0.0.0.tar.gz -O pgrag.tar.gz &&  \
    echo "2cbe394c1e74fc8bcad9b52d5fbbfb783aef834ca3ce44626cfd770573700bb4 pgrag.tar.gz" | sha256sum --check && \
    mkdir pgrag-src && cd pgrag-src && tar xzf ../pgrag.tar.gz --strip-components=1 -C . && \
@@ -1185,25 +1168,6 @@ RUN case "${PG_VERSION}" in \
    make BUILD_TYPE=release -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_mooncake.control

-#########################################################################################
-#
-# Layer "pg_repack"
-# compile pg_repack extension
-#
-#########################################################################################
-
-FROM build-deps AS pg-repack-build
-ARG PG_VERSION
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
-
-RUN wget https://github.com/reorg/pg_repack/archive/refs/tags/ver_1.5.2.tar.gz -O pg_repack.tar.gz && \
-    echo '4516cad42251ed3ad53ff619733004db47d5755acac83f75924cd94d1c4fb681 pg_repack.tar.gz' | sha256sum --check && \
-    mkdir pg_repack-src && cd pg_repack-src && tar xzf ../pg_repack.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install
-
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -1249,7 +1213,6 @@ COPY --from=pg-anon-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-repack-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -1285,7 +1248,7 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \

 #########################################################################################
 #
-# Compile the Neon-specific `compute_ctl`, `fast_import`, and `local_proxy` binaries
+# Compile and run the Neon-specific `compute_ctl` and `fast_import` binaries
 #
 #########################################################################################
 FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
@@ -1295,7 +1258,7 @@ ENV BUILD_TAG=$BUILD_TAG
 USER nonroot
 # Copy entire project to get Cargo.* files with proper dependencies for the whole project
 COPY --chown=nonroot . .
-RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin compute_ctl --bin fast_import --bin local_proxy
+RUN cd compute_tools && mold -run cargo build --locked --profile release-line-debug-size-lto

 #########################################################################################
 #
@@ -1316,8 +1279,8 @@ COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/fast_

 FROM debian:$DEBIAN_FLAVOR AS pgbouncer
 RUN set -e \
-    && apt update \
-    && apt install --no-install-suggests --no-install-recommends -y \
+    && apt-get update \
+    && apt-get install --no-install-recommends -y \
        build-essential \
        git \
        ca-certificates \
@@ -1325,8 +1288,7 @@ RUN set -e \
        automake \
        libevent-dev \
        libtool \
-        pkg-config \
-    && apt clean && rm -rf /var/lib/apt/lists/*
+        pkg-config

 # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
 ENV PGBOUNCER_TAG=pgbouncer_1_22_1
@@ -1338,6 +1300,20 @@ RUN set -e \
    && make -j $(nproc) dist_man_MANS= \
    && make install dist_man_MANS=

+#########################################################################################
+#
+# Compile the Neon-specific `local_proxy` binary
+#
+#########################################################################################
+FROM $REPOSITORY/$IMAGE:$TAG AS local_proxy
+ARG BUILD_TAG
+ENV BUILD_TAG=$BUILD_TAG
+
+USER nonroot
+# Copy entire project to get Cargo.* files with proper dependencies for the whole project
+COPY --chown=nonroot . .
+RUN mold -run cargo build --locked --profile release-line-debug-size-lto --bin local_proxy
+
 #########################################################################################
 #
 # Layers "postgres-exporter" and "sql-exporter"
@@ -1477,7 +1453,7 @@ COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/
 COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini

 # local_proxy and its config
-COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
+COPY --from=local_proxy --chown=postgres /home/nonroot/target/release-line-debug-size-lto/local_proxy /usr/local/bin/local_proxy
 RUN mkdir -p /etc/local_proxy && chown postgres:postgres /etc/local_proxy

 # Metrics exporter binaries and  configuration files
@@ -1542,30 +1518,28 @@ RUN apt update && \
        locales \
        procps \
        ca-certificates \
-        curl \
-        unzip \
        $VERSION_INSTALLS && \
-    apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8

-# aws cli is used by fast_import (curl and unzip above are at this time only used for this installation step)
+# s5cmd 2.2.2 from https://github.com/peak/s5cmd/releases/tag/v2.2.2
+# used by fast_import
 ARG TARGETARCH
+ADD https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_linux_$TARGETARCH.deb /tmp/s5cmd.deb
 RUN set -ex; \
+    \
+    # Determine the expected checksum based on TARGETARCH
    if [ "${TARGETARCH}" = "amd64" ]; then \
-        TARGETARCH_ALT="x86_64"; \
-        CHECKSUM="c9a9df3770a3ff9259cb469b6179e02829687a464e0824d5c32d378820b53a00"; \
+        CHECKSUM="392c385320cd5ffa435759a95af77c215553d967e4b1c0fffe52e4f14c29cf85"; \
    elif [ "${TARGETARCH}" = "arm64" ]; then \
-        TARGETARCH_ALT="aarch64"; \
-        CHECKSUM="8181730be7891582b38b028112e81b4899ca817e8c616aad807c9e9d1289223a"; \
+        CHECKSUM="939bee3cf4b5604ddb00e67f8c157b91d7c7a5b553d1fbb6890fad32894b7b46"; \
    else \
        echo "Unsupported architecture: ${TARGETARCH}"; exit 1; \
    fi; \
-    curl -L "https://awscli.amazonaws.com/awscli-exe-linux-${TARGETARCH_ALT}-2.17.5.zip" -o /tmp/awscliv2.zip; \
-    echo "${CHECKSUM}  /tmp/awscliv2.zip" | sha256sum -c -; \
-    unzip /tmp/awscliv2.zip -d /tmp/awscliv2; \
-    /tmp/awscliv2/aws/install; \
-    rm -rf /tmp/awscliv2.zip /tmp/awscliv2; \
-    true
+    \
+    # Compute and validate the checksum
+    echo "${CHECKSUM}  /tmp/s5cmd.deb" | sha256sum -c -
+RUN dpkg -i /tmp/s5cmd.deb && rm /tmp/s5cmd.deb

 ENV LANG=en_US.utf8
 USER postgres
--- a/compute/etc/neon_collector.jsonnet
+++ b/compute/etc/neon_collector.jsonnet
@@ -3,7 +3,7 @@
  metrics: [
    import 'sql_exporter/checkpoints_req.libsonnet',
    import 'sql_exporter/checkpoints_timed.libsonnet',
-    import 'sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet',
+    import 'sql_exporter/compute_backpressure_throttling_seconds.libsonnet',
    import 'sql_exporter/compute_current_lsn.libsonnet',
    import 'sql_exporter/compute_logical_snapshot_files.libsonnet',
    import 'sql_exporter/compute_logical_snapshots_bytes.libsonnet',
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.libsonnet
@@ -1,10 +1,10 @@
 {
-  metric_name: 'compute_backpressure_throttling_seconds_total',
-  type: 'counter',
+  metric_name: 'compute_backpressure_throttling_seconds',
+  type: 'gauge',
  help: 'Time compute has spent throttled',
  key_labels: null,
  values: [
    'throttled',
  ],
-  query: importstr 'sql_exporter/compute_backpressure_throttling_seconds_total.sql',
+  query: importstr 'sql_exporter/compute_backpressure_throttling_seconds.sql',
 }
--- a/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
+++ b/compute/etc/sql_exporter/compute_backpressure_throttling_seconds_total.sql
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -7,7 +7,7 @@ license.workspace = true
 [features]
 default = []
 # Enables test specific features.
-testing = ["fail/failpoints"]
+testing = []

 [dependencies]
 base64.workspace = true
@@ -19,7 +19,6 @@ camino.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
-fail.workspace = true
 flate2.workspace = true
 futures.workspace = true
 hyper0 = { workspace = true, features = ["full"] }
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -67,15 +67,12 @@ use compute_tools::params::*;
 use compute_tools::spec::*;
 use compute_tools::swap::resize_swap;
 use rlimit::{setrlimit, Resource};
-use utils::failpoint_support;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";

 fn main() -> Result<()> {
-    let scenario = failpoint_support::init();
-
    let (build_tag, clap_args) = init()?;

    // enable core dumping for all child processes
@@ -103,8 +100,6 @@ fn main() -> Result<()> {

    maybe_delay_exit(delay_exit);

-    scenario.teardown();
-
    deinit_and_exit(wait_pg_result);
 }

@@ -424,14 +419,9 @@ fn start_postgres(
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
-    // before we release the mutex, fetch some parameters for later.
-    let &ComputeSpec {
-        swap_size_bytes,
-        disk_quota_bytes,
-        #[cfg(target_os = "linux")]
-        disable_lfc_resizing,
-        ..
-    } = &state.pspec.as_ref().unwrap().spec;
+    // before we release the mutex, fetch the swap size (if any) for later.
+    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
+    let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
    drop(state);

    // Launch remaining service threads
@@ -536,18 +526,11 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();

-            // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
-            let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
-                None
-            } else {
-                file_cache_connstr.cloned()
-            };
-
            let vm_monitor = rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
-                        pgconnstr,
+                        pgconnstr: file_cache_connstr.cloned(),
                        addr: vm_monitor_addr.clone(),
                    })),
                    token.clone(),
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -34,12 +34,12 @@ use nix::unistd::Pid;
 use tracing::{info, info_span, warn, Instrument};
 use utils::fs_ext::is_directory_empty;

-#[path = "fast_import/aws_s3_sync.rs"]
-mod aws_s3_sync;
 #[path = "fast_import/child_stdio_to_log.rs"]
 mod child_stdio_to_log;
 #[path = "fast_import/s3_uri.rs"]
 mod s3_uri;
+#[path = "fast_import/s5cmd.rs"]
+mod s5cmd;

 #[derive(clap::Parser)]
 struct Args {
@@ -326,7 +326,7 @@ pub(crate) async fn main() -> anyhow::Result<()> {
    }

    info!("upload pgdata");
-    aws_s3_sync::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/pgdata/"))
+    s5cmd::sync(Utf8Path::new(&pgdata_dir), &s3_prefix.append("/"))
        .await
        .context("sync dump directory to destination")?;

@@ -334,10 +334,10 @@ pub(crate) async fn main() -> anyhow::Result<()> {
    {
        let status_dir = working_directory.join("status");
        std::fs::create_dir(&status_dir).context("create status directory")?;
-        let status_file = status_dir.join("pgdata");
+        let status_file = status_dir.join("status");
        std::fs::write(&status_file, serde_json::json!({"done": true}).to_string())
            .context("write status file")?;
-        aws_s3_sync::sync(&status_dir, &s3_prefix.append("/status/"))
+        s5cmd::sync(&status_file, &s3_prefix.append("/status/pgdata"))
            .await
            .context("sync status directory to destination")?;
    }
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -4,21 +4,24 @@ use camino::Utf8Path;
 use super::s3_uri::S3Uri;

 pub(crate) async fn sync(local: &Utf8Path, remote: &S3Uri) -> anyhow::Result<()> {
-    let mut builder = tokio::process::Command::new("aws");
+    let mut builder = tokio::process::Command::new("s5cmd");
+    // s5cmd uses aws-sdk-go v1, hence doesn't support AWS_ENDPOINT_URL
+    if let Some(val) = std::env::var_os("AWS_ENDPOINT_URL") {
+        builder.arg("--endpoint-url").arg(val);
+    }
    builder
-        .arg("s3")
        .arg("sync")
        .arg(local.as_str())
        .arg(remote.to_string());
    let st = builder
        .spawn()
-        .context("spawn aws s3 sync")?
+        .context("spawn s5cmd")?
        .wait()
        .await
-        .context("wait for aws s3 sync")?;
+        .context("wait for s5cmd")?;
    if st.success() {
        Ok(())
    } else {
-        Err(anyhow::anyhow!("aws s3 sync failed"))
+        Err(anyhow::anyhow!("s5cmd failed"))
    }
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1181,19 +1181,8 @@ impl ComputeNode {
            let mut conf = postgres::config::Config::from(conf);
            conf.application_name("compute_ctl:migrations");

-            match conf.connect(NoTls) {
-                Ok(mut client) => {
-                    if let Err(e) = handle_migrations(&mut client) {
-                        error!("Failed to run migrations: {}", e);
-                    }
-                }
-                Err(e) => {
-                    error!(
-                        "Failed to connect to the compute for running migrations: {}",
-                        e
-                    );
-                }
-            };
+            let mut client = conf.connect(NoTls)?;
+            handle_migrations(&mut client).context("apply_config handle_migrations")
        });

        Ok::<(), anyhow::Error>(())
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -24,11 +24,8 @@ use metrics::proto::MetricFamily;
 use metrics::Encoder;
 use metrics::TextEncoder;
 use tokio::task;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, warn};
 use tracing_utils::http::OtelName;
-use utils::failpoint_support::failpoints_handler;
-use utils::http::error::ApiError;
 use utils::http::request::must_get_query_param;

 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
@@ -313,18 +310,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        (&Method::POST, "/failpoints") if cfg!(feature = "testing") => {
-            match failpoints_handler(req, CancellationToken::new()).await {
-                Ok(r) => r,
-                Err(ApiError::BadRequest(e)) => {
-                    render_json_error(&e.to_string(), StatusCode::BAD_REQUEST)
-                }
-                Err(_) => {
-                    render_json_error("Internal server error", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,16 +1,13 @@
 use anyhow::{Context, Result};
-use fail::fail_point;
 use postgres::Client;
 use tracing::info;

-/// Runs a series of migrations on a target database
 pub(crate) struct MigrationRunner<'m> {
    client: &'m mut Client,
    migrations: &'m [&'m str],
 }

 impl<'m> MigrationRunner<'m> {
-    /// Create a new migration runner
    pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
        // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
        assert!(migrations.len() + 1 < i64::MAX as usize);
@@ -18,7 +15,6 @@ impl<'m> MigrationRunner<'m> {
        Self { client, migrations }
    }

-    /// Get the current value neon_migration.migration_id
    fn get_migration_id(&mut self) -> Result<i64> {
        let query = "SELECT id FROM neon_migration.migration_id";
        let row = self
@@ -29,61 +25,37 @@ impl<'m> MigrationRunner<'m> {
        Ok(row.get::<&str, i64>("id"))
    }

-    /// Update the neon_migration.migration_id value
-    ///
-    /// This function has a fail point called compute-migration, which can be
-    /// used if you would like to fail the application of a series of migrations
-    /// at some point.
    fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
-        // We use this fail point in order to check that failing in the
-        // middle of applying a series of migrations fails in an expected
-        // manner
-        if cfg!(feature = "testing") {
-            let fail = (|| {
-                fail_point!("compute-migration", |fail_migration_id| {
-                    migration_id == fail_migration_id.unwrap().parse::<i64>().unwrap()
-                });
-
-                false
-            })();
-
-            if fail {
-                return Err(anyhow::anyhow!(format!(
-                    "migration {} was configured to fail because of a failpoint",
-                    migration_id
-                )));
-            }
-        }
+        let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);

        self.client
-            .query(
-                "UPDATE neon_migration.migration_id SET id = $1",
-                &[&migration_id],
-            )
+            .simple_query(&setval)
            .context("run_migrations update id")?;

        Ok(())
    }

-    /// Prepare the migrations the target database for handling migrations
-    fn prepare_database(&mut self) -> Result<()> {
-        self.client
-            .simple_query("CREATE SCHEMA IF NOT EXISTS neon_migration")?;
-        self.client.simple_query("CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)")?;
-        self.client.simple_query(
-            "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING",
-        )?;
-        self.client
-            .simple_query("ALTER SCHEMA neon_migration OWNER TO cloud_admin")?;
-        self.client
-            .simple_query("REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC")?;
+    fn prepare_migrations(&mut self) -> Result<()> {
+        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
+        self.client.simple_query(query)?;
+
+        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
+        self.client.simple_query(query)?;
+
+        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
+        self.client.simple_query(query)?;
+
+        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
+        self.client.simple_query(query)?;
+
+        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
+        self.client.simple_query(query)?;

        Ok(())
    }

-    /// Run the configrured set of migrations
    pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_database()?;
+        self.prepare_migrations()?;

        let mut current_migration = self.get_migration_id()? as usize;
        while current_migration < self.migrations.len() {
@@ -97,11 +69,6 @@ impl<'m> MigrationRunner<'m> {

            if migration.starts_with("-- SKIP") {
                info!("Skipping migration id={}", migration_id!(current_migration));
-
-                // Even though we are skipping the migration, updating the
-                // migration ID should help keep logic easy to understand when
-                // trying to understand the state of a cluster.
-                self.update_migration_id(migration_id!(current_migration))?;
            } else {
                info!(
                    "Running migration id={}:\n{}\n",
@@ -120,6 +87,7 @@ impl<'m> MigrationRunner<'m> {
                    )
                })?;

+                // Migration IDs start at 1
                self.update_migration_id(migration_id!(current_migration))?;

                self.client
--- a/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/tests/0001-neon_superuser_bypass_rls.sql
@@ -1,9 +0,0 @@
-DO $$
-DECLARE
-    bypassrls boolean;
-BEGIN
-    SELECT rolbypassrls INTO bypassrls FROM pg_roles WHERE rolname = 'neon_superuser';
-    IF NOT bypassrls THEN
-        RAISE EXCEPTION 'neon_superuser cannot bypass RLS';
-    END IF;
-END $$;
--- a/compute_tools/src/migrations/tests/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/tests/0002-alter_roles.sql
@@ -1,25 +0,0 @@
-DO $$
-DECLARE
-    role record;
-BEGIN
-    FOR role IN
-        SELECT rolname AS name, rolinherit AS inherit
-        FROM pg_roles
-        WHERE pg_has_role(rolname, 'neon_superuser', 'member')
-    LOOP
-        IF NOT role.inherit THEN
-            RAISE EXCEPTION '% cannot inherit', quote_ident(role.name);
-        END IF;
-    END LOOP;
-
-    FOR role IN
-        SELECT rolname AS name, rolbypassrls AS bypassrls
-        FROM pg_roles
-        WHERE NOT pg_has_role(rolname, 'neon_superuser', 'member')
-            AND NOT starts_with(rolname, 'pg_')
-    LOOP
-        IF role.bypassrls THEN
-            RAISE EXCEPTION  '% can bypass RLS', quote_ident(role.name);
-        END IF;
-    END LOOP;
-END $$;
--- a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_neon_superuser.sql
@@ -1,10 +0,0 @@
-DO $$
-BEGIN
-    IF (SELECT current_setting('server_version_num')::numeric < 160000) THEN
-        RETURN;
-    END IF;
-
-    IF NOT (SELECT pg_has_role('neon_superuser', 'pg_create_subscription', 'member')) THEN
-        RAISE EXCEPTION 'neon_superuser cannot execute pg_create_subscription';
-    END IF;
-END $$;
--- a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -1,19 +0,0 @@
-DO $$
-DECLARE
-    monitor record;
-BEGIN
-    SELECT pg_has_role('neon_superuser', 'pg_monitor', 'member') AS member,
-            admin_option AS admin
-        INTO monitor
-        FROM pg_auth_members
-        WHERE roleid = 'pg_monitor'::regrole
-            AND member = 'pg_monitor'::regrole;
-
-    IF NOT monitor.member THEN
-        RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
-    END IF;
-
-    IF NOT monitor.admin THEN
-        RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
-    END IF;
-END $$;
--- a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_neon_superuser.sql
@@ -1,2 +0,0 @@
-- This test was never written becuase at the time migration tests were added
-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_neon_superuser.sql
@@ -1,2 +0,0 @@
-- This test was never written becuase at the time migration tests were added
-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
@@ -1,2 +0,0 @@
-- This test was never written becuase at the time migration tests were added
-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
@@ -1,2 +0,0 @@
-- This test was never written becuase at the time migration tests were added
-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
+++ b/compute_tools/src/migrations/tests/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -1,2 +0,0 @@
-- This test was never written becuase at the time migration tests were added
-- the accompanying migration was already skipped.
--- a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
@@ -1,13 +0,0 @@
-DO $$
-DECLARE
-    can_execute boolean;
-BEGIN
-    SELECT bool_and(has_function_privilege('neon_superuser', oid, 'execute'))
-       INTO can_execute
-       FROM pg_proc
-       WHERE proname IN ('pg_export_snapshot', 'pg_log_standby_snapshot')
-           AND pronamespace = 'pg_catalog'::regnamespace;
-    IF NOT can_execute THEN
-        RAISE EXCEPTION 'neon_superuser cannot execute both pg_export_snapshot and pg_log_standby_snapshot';
-    END IF;
-END $$;
--- a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -1,13 +0,0 @@
-DO $$
-DECLARE
-    can_execute boolean;
-BEGIN
-    SELECT has_function_privilege('neon_superuser', oid, 'execute')
-       INTO can_execute
-       FROM pg_proc
-       WHERE proname = 'pg_show_replication_origin_status'
-           AND pronamespace = 'pg_catalog'::regnamespace;
-    IF NOT can_execute THEN
-        RAISE EXCEPTION 'neon_superuser cannot execute pg_show_replication_origin_status';
-    END IF;
-END $$;
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -19,7 +19,6 @@ use control_plane::storage_controller::{
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
-use nix::fcntl::{flock, FlockArg};
 use pageserver_api::config::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -37,8 +36,6 @@ use safekeeper_api::{
 };
 use std::borrow::Cow;
 use std::collections::{BTreeSet, HashMap};
-use std::fs::File;
-use std::os::fd::AsRawFd;
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
@@ -692,21 +689,6 @@ struct TimelineTreeEl {
    pub children: BTreeSet<TimelineId>,
 }

-/// A flock-based guard over the neon_local repository directory
-struct RepoLock {
-    _file: File,
-}
-
-impl RepoLock {
-    fn new() -> Result<Self> {
-        let repo_dir = File::open(local_env::base_path())?;
-        let repo_dir_fd = repo_dir.as_raw_fd();
-        flock(repo_dir_fd, FlockArg::LockExclusive)?;
-
-        Ok(Self { _file: repo_dir })
-    }
-}
-
 // Main entry point for the 'neon_local' CLI utility
 //
 // This utility helps to manage neon installation. That includes following:
@@ -718,14 +700,9 @@ fn main() -> Result<()> {
    let cli = Cli::parse();

    // Check for 'neon init' command first.
-    let (subcommand_result, _lock) = if let NeonLocalCmd::Init(args) = cli.command {
-        (handle_init(&args).map(|env| Some(Cow::Owned(env))), None)
+    let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
+        handle_init(&args).map(|env| Some(Cow::Owned(env)))
    } else {
-        // This tool uses a collection of simple files to store its state, and consequently
-        // it is not generally safe to run multiple commands concurrently.  Rather than expect
-        // all callers to know this, use a lock file to protect against concurrent execution.
-        let _repo_lock = RepoLock::new().unwrap();
-
        // all other commands need an existing config
        let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
        let original_env = env.clone();
@@ -751,12 +728,11 @@ fn main() -> Result<()> {
            NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
        };

-        let subcommand_result = if &original_env != env {
+        if &original_env != env {
            subcommand_result.map(|()| Some(Cow::Borrowed(env)))
        } else {
            subcommand_result.map(|()| None)
-        };
-        (subcommand_result, Some(_repo_lock))
+        }
    };

    match subcommand_result {
@@ -946,7 +922,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
    } else {
        // User (likely interactive) did not provide a description of the environment, give them the default
        NeonLocalInitConf {
-            control_plane_api: Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap()),
+            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
            broker: NeonBroker {
                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
            },
@@ -1742,15 +1718,18 @@ async fn handle_start_all_impl(
            broker::start_broker_process(env, &retry_timeout).await
        });

-        js.spawn(async move {
-            let storage_controller = StorageController::from_env(env);
-            storage_controller
-                .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                    retry_timeout,
-                ))
-                .await
-                .map_err(|e| e.context("start storage_controller"))
-        });
+        // Only start the storage controller if the pageserver is configured to need it
+        if env.control_plane_api.is_some() {
+            js.spawn(async move {
+                let storage_controller = StorageController::from_env(env);
+                storage_controller
+                    .start(NeonStorageControllerStartArgs::with_default_instance_id(
+                        retry_timeout,
+                    ))
+                    .await
+                    .map_err(|e| e.context("start storage_controller"))
+            });
+        }

        for ps_conf in &env.pageservers {
            js.spawn(async move {
@@ -1795,6 +1774,10 @@ async fn neon_start_status_check(
    const RETRY_INTERVAL: Duration = Duration::from_millis(100);
    const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);

+    if env.control_plane_api.is_none() {
+        return Ok(());
+    }
+
    let storcon = StorageController::from_env(env);

    let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -316,10 +316,6 @@ impl Endpoint {
        // and can cause errors like 'no unpinned buffers available', see
        // <https://github.com/neondatabase/neon/issues/9956>
        conf.append("shared_buffers", "1MB");
-        // Postgres defaults to effective_io_concurrency=1, which does not exercise the pageserver's
-        // batching logic.  Set this to 2 so that we exercise the code a bit without letting
-        // individual tests do a lot of concurrent work on underpowered test machines
-        conf.append("effective_io_concurrency", "2");
        conf.append("fsync", "off");
        conf.append("max_connections", "100");
        conf.append("wal_level", "logical");
@@ -585,7 +581,6 @@ impl Endpoint {
            features: self.features.clone(),
            swap_size_bytes: None,
            disk_quota_bytes: None,
-            disable_lfc_resizing: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -76,7 +76,7 @@ pub struct LocalEnv {

    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
    // be propagated into each pageserver's configuration.
-    pub control_plane_api: Url,
+    pub control_plane_api: Option<Url>,

    // Control plane upcall API for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
@@ -133,7 +133,7 @@ pub struct NeonLocalInitConf {
    pub storage_controller: Option<NeonStorageControllerConf>,
    pub pageservers: Vec<NeonLocalInitPageserverConf>,
    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Url>,
+    pub control_plane_api: Option<Option<Url>>,
    pub control_plane_compute_hook_api: Option<Option<Url>>,
 }

@@ -180,7 +180,7 @@ impl NeonStorageControllerConf {
    const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);

    // Very tight heartbeat interval to speed up tests
-    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(1000);
+    const DEFAULT_HEARTBEAT_INTERVAL: std::time::Duration = std::time::Duration::from_millis(100);
 }

 impl Default for NeonStorageControllerConf {
@@ -535,7 +535,7 @@ impl LocalEnv {
                storage_controller,
                pageservers,
                safekeepers,
-                control_plane_api: control_plane_api.unwrap(),
+                control_plane_api,
                control_plane_compute_hook_api,
                branch_name_mappings,
            }
@@ -638,7 +638,7 @@ impl LocalEnv {
                storage_controller: self.storage_controller.clone(),
                pageservers: vec![], // it's skip_serializing anyway
                safekeepers: self.safekeepers.clone(),
-                control_plane_api: Some(self.control_plane_api.clone()),
+                control_plane_api: self.control_plane_api.clone(),
                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
                branch_name_mappings: self.branch_name_mappings.clone(),
            },
@@ -768,7 +768,7 @@ impl LocalEnv {
            storage_controller: storage_controller.unwrap_or_default(),
            pageservers: pageservers.iter().map(Into::into).collect(),
            safekeepers,
-            control_plane_api: control_plane_api.unwrap(),
+            control_plane_api: control_plane_api.unwrap_or_default(),
            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
            branch_name_mappings: Default::default(),
        };
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -95,19 +95,21 @@ impl PageServerNode {

        let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];

-        overrides.push(format!(
-            "control_plane_api='{}'",
-            self.env.control_plane_api.as_str()
-        ));
+        if let Some(control_plane_api) = &self.env.control_plane_api {
+            overrides.push(format!(
+                "control_plane_api='{}'",
+                control_plane_api.as_str()
+            ));

-        // Storage controller uses the same auth as pageserver: if JWT is enabled
-        // for us, we will also need it to talk to them.
-        if matches!(conf.http_auth_type, AuthType::NeonJWT) {
-            let jwt_token = self
-                .env
-                .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
-                .unwrap();
-            overrides.push(format!("control_plane_api_token='{}'", jwt_token));
+            // Storage controller uses the same auth as pageserver: if JWT is enabled
+            // for us, we will also need it to talk to them.
+            if matches!(conf.http_auth_type, AuthType::NeonJWT) {
+                let jwt_token = self
+                    .env
+                    .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
+                    .unwrap();
+                overrides.push(format!("control_plane_api_token='{}'", jwt_token));
+            }
        }

        if !conf.other.contains_key("remote_storage") {
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -338,7 +338,7 @@ impl StorageController {
                        .port(),
                )
            } else {
-                let listen_url = self.env.control_plane_api.clone();
+                let listen_url = self.env.control_plane_api.clone().unwrap();

                let listen = format!(
                    "{}:{}",
@@ -708,7 +708,7 @@ impl StorageController {
        } else {
            // The configured URL has the /upcall path prefix for pageservers to use: we will strip that out
            // for general purpose API access.
-            let listen_url = self.env.control_plane_api.clone();
+            let listen_url = self.env.control_plane_api.clone().unwrap();
            Url::from_str(&format!(
                "http://{}:{}/{path}",
                listen_url.host_str().unwrap(),
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -5,8 +5,7 @@ use clap::{Parser, Subcommand};
 use pageserver_api::{
    controller_api::{
        AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        SafekeeperDescribeResponse, ShardSchedulingPolicy, TenantCreateRequest,
-        TenantDescribeResponse, TenantPolicyRequest,
+        ShardSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
    },
    models::{
        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
@@ -212,8 +211,6 @@ enum Command {
        #[arg(long)]
        timeout: humantime::Duration,
    },
-    /// List safekeepers known to the storage controller
-    Safekeepers {},
 }

 #[derive(Parser)]
@@ -1023,31 +1020,6 @@ async fn main() -> anyhow::Result<()> {
                "Fill was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
            );
        }
-        Command::Safekeepers {} => {
-            let mut resp = storcon_client
-                .dispatch::<(), Vec<SafekeeperDescribeResponse>>(
-                    Method::GET,
-                    "control/v1/safekeeper".to_string(),
-                    None,
-                )
-                .await?;
-
-            resp.sort_by(|a, b| a.id.cmp(&b.id));
-
-            let mut table = comfy_table::Table::new();
-            table.set_header(["Id", "Version", "Host", "Port", "Http Port", "AZ Id"]);
-            for sk in resp {
-                table.add_row([
-                    format!("{}", sk.id),
-                    format!("{}", sk.version),
-                    sk.host,
-                    format!("{}", sk.port),
-                    format!("{}", sk.http_port),
-                    sk.availability_zone_id.to_string(),
-                ]);
-            }
-            println!("{table}");
-        }
    }

    Ok(())
--- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
@@ -132,6 +132,11 @@
                "name": "cron.database",
                "value": "postgres",
                "vartype": "string"
+            },
+            {
+                "name": "session_preload_libraries",
+                "value": "anon",
+                "vartype": "string"
            }
        ]
    },
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -35,11 +35,11 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
    echo "clean up containers if exists"
    cleanup
    PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
-    # The support of pg_anon not yet added to PG17, so we have to add the corresponding option for other PG versions
-    if [ "${pg_version}" -ne 17 ]; then
+    # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
+    if [ $pg_version -eq 17 ]; then
      SPEC_PATH="compute_wrapper/var/db/postgres/specs"
      mv $SPEC_PATH/spec.json $SPEC_PATH/spec.bak
-      jq '.cluster.settings += [{"name": "session_preload_libraries","value": "anon","vartype": "string"}]' "${SPEC_PATH}/spec.bak" > "${SPEC_PATH}/spec.json"
+      jq 'del(.cluster.settings[] | select (.name == "session_preload_libraries"))' $SPEC_PATH/spec.bak > $SPEC_PATH/spec.json
    fi
    PG_VERSION=$pg_version PG_TEST_VERSION=$PG_TEST_VERSION docker compose --profile test-extensions -f $COMPOSE_FILE up --build -d

@@ -106,8 +106,8 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
        fi
    fi
    cleanup
-    # Restore the original spec.json
-    if [ "$pg_version" -ne 17 ]; then
-      mv "$SPEC_PATH/spec.bak" "$SPEC_PATH/spec.json"
+    # The support of pg_anon not yet added to PG17, so we have to remove the corresponding option
+    if [ $pg_version -eq 17 ]; then
+      mv $SPEC_PATH/spec.bak $SPEC_PATH/spec.json
    fi
 done
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -67,15 +67,6 @@ pub struct ComputeSpec {
    #[serde(default)]
    pub disk_quota_bytes: Option<u64>,

-    /// Disables the vm-monitor behavior that resizes LFC on upscale/downscale, instead relying on
-    /// the initial size of LFC.
-    ///
-    /// This is intended for use when the LFC size is being overridden from the default but
-    /// autoscaling is still enabled, and we don't want the vm-monitor to interfere with the custom
-    /// LFC sizing.
-    #[serde(default)]
-    pub disable_lfc_resizing: Option<bool>,
-
    /// Expected cluster state at the end of transition process.
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
--- a/libs/desim/src/time.rs
+++ b/libs/desim/src/time.rs
@@ -91,7 +91,7 @@ impl Timing {

    /// Return true if there is a ready event.
    fn is_event_ready(&self, queue: &mut BinaryHeap<Pending>) -> bool {
-        queue.peek().is_some_and(|x| x.time <= self.now())
+        queue.peek().map_or(false, |x| x.time <= self.now())
    }

    /// Clear all pending events.
--- a/libs/pageserver_api/src/controller_api.rs
+++ b/libs/pageserver_api/src/controller_api.rs
@@ -372,23 +372,6 @@ pub struct MetadataHealthListOutdatedResponse {
    pub health_records: Vec<MetadataHealthRecord>,
 }

-/// Publicly exposed safekeeper description
-///
-/// The `active` flag which we have in the DB is not included on purpose: it is deprecated.
-#[derive(Serialize, Deserialize, Clone)]
-pub struct SafekeeperDescribeResponse {
-    pub id: NodeId,
-    pub region_id: String,
-    /// 1 is special, it means just created (not currently posted to storcon).
-    /// Zero or negative is not really expected.
-    /// Otherwise the number from `release-$(number_of_commits_on_branch)` tag.
-    pub version: i64,
-    pub host: String,
-    pub port: i32,
-    pub http_port: i32,
-    pub availability_zone_id: String,
-}
-
 #[cfg(test)]
 mod test {
    use super::*;
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -565,10 +565,6 @@ impl Key {
            && self.field5 == 0
            && self.field6 == u32::MAX
    }
-
-    pub fn is_slru_dir_key(&self) -> bool {
-        slru_dir_kind(self).is_some()
-    }
 }

 #[inline(always)]
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -6,7 +6,6 @@ pub mod utilization;
 use camino::Utf8PathBuf;
 pub use utilization::PageserverUtilization;

-use core::ops::Range;
 use std::{
    collections::HashMap,
    fmt::Display,
@@ -29,7 +28,6 @@ use utils::{
 };

 use crate::{
-    key::Key,
    reltag::RelTag,
    shard::{ShardCount, ShardStripeSize, TenantShardId},
 };
@@ -212,68 +210,6 @@ pub enum TimelineState {
    Broken { reason: String, backtrace: String },
 }

-#[serde_with::serde_as]
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub struct CompactLsnRange {
-    pub start: Lsn,
-    pub end: Lsn,
-}
-
-#[serde_with::serde_as]
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub struct CompactKeyRange {
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub start: Key,
-    #[serde_as(as = "serde_with::DisplayFromStr")]
-    pub end: Key,
-}
-
-impl From<Range<Lsn>> for CompactLsnRange {
-    fn from(range: Range<Lsn>) -> Self {
-        Self {
-            start: range.start,
-            end: range.end,
-        }
-    }
-}
-
-impl From<Range<Key>> for CompactKeyRange {
-    fn from(range: Range<Key>) -> Self {
-        Self {
-            start: range.start,
-            end: range.end,
-        }
-    }
-}
-
-impl From<CompactLsnRange> for Range<Lsn> {
-    fn from(range: CompactLsnRange) -> Self {
-        range.start..range.end
-    }
-}
-
-impl From<CompactKeyRange> for Range<Key> {
-    fn from(range: CompactKeyRange) -> Self {
-        range.start..range.end
-    }
-}
-
-impl CompactLsnRange {
-    pub fn above(lsn: Lsn) -> Self {
-        Self {
-            start: lsn,
-            end: Lsn::MAX,
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize)]
-pub struct CompactInfoResponse {
-    pub compact_key_range: Option<CompactKeyRange>,
-    pub compact_lsn_range: Option<CompactLsnRange>,
-    pub sub_compaction: bool,
-}
-
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TimelineCreateRequest {
    pub new_timeline_id: TimelineId,
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -173,11 +173,7 @@ impl ShardIdentity {

    /// Return true if the key should be stored on all shards, not just one.
    pub fn is_key_global(&self, key: &Key) -> bool {
-        if key.is_slru_block_key()
-            || key.is_slru_segment_size_key()
-            || key.is_aux_file_key()
-            || key.is_slru_dir_key()
-        {
+        if key.is_slru_block_key() || key.is_slru_segment_size_key() || key.is_aux_file_key() {
            // Special keys that are only stored on shard 0
            false
        } else if key.is_rel_block_key() {
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -9,11 +9,9 @@ regex.workspace = true
 bytes.workspace = true
 anyhow.workspace = true
 crc32c.workspace = true
-criterion.workspace = true
 once_cell.workspace = true
 log.workspace = true
 memoffset.workspace = true
-pprof.workspace = true
 thiserror.workspace = true
 serde.workspace = true
 utils.workspace = true
@@ -26,7 +24,3 @@ postgres.workspace = true
 [build-dependencies]
 anyhow.workspace = true
 bindgen.workspace = true
-
-[[bench]]
-name = "waldecoder"
-harness = false
--- a/libs/postgres_ffi/benches/README.md
+++ b/libs/postgres_ffi/benches/README.md
@@ -1,26 +0,0 @@
-## Benchmarks
-
-To run benchmarks:
-
-```sh
-# All benchmarks.
-cargo bench --package postgres_ffi
-
-# Specific file.
-cargo bench --package postgres_ffi --bench waldecoder
-
-# Specific benchmark.
-cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024
-
-# List available benchmarks.
-cargo bench --package postgres_ffi --benches -- --list
-
-# Generate flamegraph profiles using pprof-rs, profiling for 10 seconds.
-# Output in target/criterion/*/profile/flamegraph.svg.
-cargo bench --package postgres_ffi --bench waldecoder complete_record/size=1024 -- --profile-time 10
-```
-
-Additional charts and statistics are available in `target/criterion/report/index.html`.
-
-Benchmarks are automatically compared against the previous run. To compare against other runs, see
-`--baseline` and `--save-baseline`.
--- a/libs/postgres_ffi/benches/waldecoder.rs
+++ b/libs/postgres_ffi/benches/waldecoder.rs
@@ -1,49 +0,0 @@
-use std::ffi::CStr;
-
-use criterion::{criterion_group, criterion_main, Bencher, Criterion};
-use postgres_ffi::v17::wal_generator::LogicalMessageGenerator;
-use postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler;
-use postgres_ffi::waldecoder::WalStreamDecoder;
-use pprof::criterion::{Output, PProfProfiler};
-use utils::lsn::Lsn;
-
-const KB: usize = 1024;
-
-// Register benchmarks with Criterion.
-criterion_group!(
-    name = benches;
-    config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
-    targets = bench_complete_record,
-);
-criterion_main!(benches);
-
-/// Benchmarks WalStreamDecoder::complete_record() for a logical message of varying size.
-fn bench_complete_record(c: &mut Criterion) {
-    let mut g = c.benchmark_group("complete_record");
-    for size in [64, KB, 8 * KB, 128 * KB] {
-        // Kind of weird to change the group throughput per benchmark, but it's the only way
-        // to vary it per benchmark. It works.
-        g.throughput(criterion::Throughput::Bytes(size as u64));
-        g.bench_function(format!("size={size}"), |b| run_bench(b, size).unwrap());
-    }
-
-    fn run_bench(b: &mut Bencher, size: usize) -> anyhow::Result<()> {
-        const PREFIX: &CStr = c"";
-        let value_size = LogicalMessageGenerator::make_value_size(size, PREFIX);
-        let value = vec![1; value_size];
-
-        let mut decoder = WalStreamDecoder::new(Lsn(0), 170000);
-        let msg = LogicalMessageGenerator::new(PREFIX, &value)
-            .next()
-            .unwrap()
-            .encode(Lsn(0));
-        assert_eq!(msg.len(), size);
-
-        b.iter(|| {
-            let msg = msg.clone(); // Bytes::clone() is cheap
-            decoder.complete_record(msg).unwrap();
-        });
-
-        Ok(())
-    }
-}
--- a/libs/postgres_ffi/src/wal_generator.rs
+++ b/libs/postgres_ffi/src/wal_generator.rs
@@ -106,11 +106,11 @@ impl<R: RecordGenerator> WalGenerator<R> {
    const TIMELINE_ID: u32 = 1;

    /// Creates a new WAL generator with the given record generator.
-    pub fn new(record_generator: R, start_lsn: Lsn) -> WalGenerator<R> {
+    pub fn new(record_generator: R) -> WalGenerator<R> {
        Self {
            record_generator,
-            lsn: start_lsn,
-            prev_lsn: start_lsn,
+            lsn: Lsn(0),
+            prev_lsn: Lsn(0),
        }
    }

@@ -231,22 +231,6 @@ impl LogicalMessageGenerator {
        };
        [&header.encode(), prefix, message].concat().into()
    }
-
-    /// Computes how large a value must be to get a record of the given size. Convenience method to
-    /// construct records of pre-determined size. Panics if the record size is too small.
-    pub fn make_value_size(record_size: usize, prefix: &CStr) -> usize {
-        let xlog_header_size = XLOG_SIZE_OF_XLOG_RECORD;
-        let lm_header_size = size_of::<XlLogicalMessage>();
-        let prefix_size = prefix.to_bytes_with_nul().len();
-        let data_header_size = match record_size - xlog_header_size - 2 {
-            0..=255 => 2,
-            256..=258 => panic!("impossible record_size {record_size}"),
-            259.. => 5,
-        };
-        record_size
-            .checked_sub(xlog_header_size + lm_header_size + prefix_size + data_header_size)
-            .expect("record_size too small")
-    }
 }

 impl Iterator for LogicalMessageGenerator {
--- a/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
+++ b/libs/postgres_ffi/wal_craft/src/xlog_utils_test.rs
@@ -81,7 +81,7 @@ fn test_end_of_wal<C: crate::Crafter>(test_name: &str) {
                continue;
            }
            let mut f = File::options().write(true).open(file.path()).unwrap();
-            static ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
+            const ZEROS: [u8; WAL_SEGMENT_SIZE] = [0u8; WAL_SEGMENT_SIZE];
            f.write_all(
                &ZEROS[0..min(
                    WAL_SEGMENT_SIZE,
--- a/libs/proxy/postgres-protocol2/Cargo.toml
+++ b/libs/proxy/postgres-protocol2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "postgres-protocol2"
 version = "0.1.0"
-edition = "2021"
+edition = "2018"
 license = "MIT/Apache-2.0"

 [dependencies]
--- a/libs/proxy/postgres-protocol2/src/lib.rs
+++ b/libs/proxy/postgres-protocol2/src/lib.rs
@@ -9,7 +9,8 @@
 //!
 //! This library assumes that the `client_encoding` backend parameter has been
 //! set to `UTF8`. It will most likely not behave properly if that is not the case.
-#![warn(missing_docs, clippy::all)]
+#![doc(html_root_url = "https://docs.rs/postgres-protocol/0.6")]
+#![warn(missing_docs, rust_2018_idioms, clippy::all)]

 use byteorder::{BigEndian, ByteOrder};
 use bytes::{BufMut, BytesMut};
--- a/libs/proxy/postgres-protocol2/src/message/frontend.rs
+++ b/libs/proxy/postgres-protocol2/src/message/frontend.rs
@@ -3,6 +3,7 @@

 use byteorder::{BigEndian, ByteOrder};
 use bytes::{Buf, BufMut, BytesMut};
+use std::convert::TryFrom;
 use std::error::Error;
 use std::io;
 use std::marker;
--- a/libs/proxy/postgres-types2/Cargo.toml
+++ b/libs/proxy/postgres-types2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "postgres-types2"
 version = "0.1.0"
-edition = "2021"
+edition = "2018"
 license = "MIT/Apache-2.0"

 [dependencies]
--- a/libs/proxy/postgres-types2/src/lib.rs
+++ b/libs/proxy/postgres-types2/src/lib.rs
@@ -2,7 +2,8 @@
 //!
 //! This crate is used by the `tokio-postgres` and `postgres` crates. You normally don't need to depend directly on it
 //! unless you want to define your own `ToSql` or `FromSql` definitions.
-#![warn(clippy::all, missing_docs)]
+#![doc(html_root_url = "https://docs.rs/postgres-types/0.2")]
+#![warn(clippy::all, rust_2018_idioms, missing_docs)]

 use fallible_iterator::FallibleIterator;
 use postgres_protocol2::types;
--- a/libs/proxy/tokio-postgres2/Cargo.toml
+++ b/libs/proxy/tokio-postgres2/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "tokio-postgres2"
 version = "0.1.0"
-edition = "2021"
+edition = "2018"
 license = "MIT/Apache-2.0"

 [dependencies]
--- a/libs/proxy/tokio-postgres2/src/connection.rs
+++ b/libs/proxy/tokio-postgres2/src/connection.rs
@@ -33,12 +33,8 @@ pub struct Response {
 #[derive(PartialEq, Debug)]
 enum State {
    Active,
-    Closing,
-}
-
-enum WriteReady {
    Terminating,
-    WaitingOnRead,
+    Closing,
 }

 /// A connection to a PostgreSQL database.
@@ -55,6 +51,7 @@ pub struct Connection<S, T> {
    /// HACK: we need this in the Neon Proxy to forward params.
    pub parameters: HashMap<String, String>,
    receiver: mpsc::UnboundedReceiver<Request>,
+    pending_request: Option<RequestMessages>,
    pending_responses: VecDeque<BackendMessage>,
    responses: VecDeque<Response>,
    state: State,
@@ -75,6 +72,7 @@ where
            stream,
            parameters,
            receiver,
+            pending_request: None,
            pending_responses,
            responses: VecDeque::new(),
            state: State::Active,
@@ -95,23 +93,26 @@ where
            .map(|o| o.map(|r| r.map_err(Error::io)))
    }

-    /// Read and process messages from the connection to postgres.
-    /// client <- postgres
-    fn poll_read(&mut self, cx: &mut Context<'_>) -> Poll<Result<AsyncMessage, Error>> {
+    fn poll_read(&mut self, cx: &mut Context<'_>) -> Result<Option<AsyncMessage>, Error> {
+        if self.state != State::Active {
+            trace!("poll_read: done");
+            return Ok(None);
+        }
+
        loop {
            let message = match self.poll_response(cx)? {
                Poll::Ready(Some(message)) => message,
-                Poll::Ready(None) => return Poll::Ready(Err(Error::closed())),
+                Poll::Ready(None) => return Err(Error::closed()),
                Poll::Pending => {
                    trace!("poll_read: waiting on response");
-                    return Poll::Pending;
+                    return Ok(None);
                }
            };

            let (mut messages, request_complete) = match message {
                BackendMessage::Async(Message::NoticeResponse(body)) => {
                    let error = DbError::parse(&mut body.fields()).map_err(Error::parse)?;
-                    return Poll::Ready(Ok(AsyncMessage::Notice(error)));
+                    return Ok(Some(AsyncMessage::Notice(error)));
                }
                BackendMessage::Async(Message::NotificationResponse(body)) => {
                    let notification = Notification {
@@ -119,7 +120,7 @@ where
                        channel: body.channel().map_err(Error::parse)?.to_string(),
                        payload: body.message().map_err(Error::parse)?.to_string(),
                    };
-                    return Poll::Ready(Ok(AsyncMessage::Notification(notification)));
+                    return Ok(Some(AsyncMessage::Notification(notification)));
                }
                BackendMessage::Async(Message::ParameterStatus(body)) => {
                    self.parameters.insert(
@@ -138,10 +139,8 @@ where
            let mut response = match self.responses.pop_front() {
                Some(response) => response,
                None => match messages.next().map_err(Error::parse)? {
-                    Some(Message::ErrorResponse(error)) => {
-                        return Poll::Ready(Err(Error::db(error)))
-                    }
-                    _ => return Poll::Ready(Err(Error::unexpected_message())),
+                    Some(Message::ErrorResponse(error)) => return Err(Error::db(error)),
+                    _ => return Err(Error::unexpected_message()),
                },
            };

@@ -165,14 +164,18 @@ where
                        request_complete,
                    });
                    trace!("poll_read: waiting on sender");
-                    return Poll::Pending;
+                    return Ok(None);
                }
            }
        }
    }

-    /// Fetch the next client request and enqueue the response sender.
    fn poll_request(&mut self, cx: &mut Context<'_>) -> Poll<Option<RequestMessages>> {
+        if let Some(messages) = self.pending_request.take() {
+            trace!("retrying pending request");
+            return Poll::Ready(Some(messages));
+        }
+
        if self.receiver.is_closed() {
            return Poll::Ready(None);
        }
@@ -190,80 +193,74 @@ where
        }
    }

-    /// Process client requests and write them to the postgres connection, flushing if necessary.
-    /// client -> postgres
-    fn poll_write(&mut self, cx: &mut Context<'_>) -> Poll<Result<WriteReady, Error>> {
+    fn poll_write(&mut self, cx: &mut Context<'_>) -> Result<bool, Error> {
        loop {
+            if self.state == State::Closing {
+                trace!("poll_write: done");
+                return Ok(false);
+            }
+
            if Pin::new(&mut self.stream)
                .poll_ready(cx)
                .map_err(Error::io)?
                .is_pending()
            {
                trace!("poll_write: waiting on socket");
-
-                // poll_ready is self-flushing.
-                return Poll::Pending;
+                return Ok(false);
            }

-            match self.poll_request(cx) {
-                // send the message to postgres
-                Poll::Ready(Some(RequestMessages::Single(request))) => {
-                    Pin::new(&mut self.stream)
-                        .start_send(request)
-                        .map_err(Error::io)?;
-                }
-                // No more messages from the client, and no more responses to wait for.
-                // Send a terminate message to postgres
-                Poll::Ready(None) if self.responses.is_empty() => {
+            let request = match self.poll_request(cx) {
+                Poll::Ready(Some(request)) => request,
+                Poll::Ready(None) if self.responses.is_empty() && self.state == State::Active => {
                    trace!("poll_write: at eof, terminating");
+                    self.state = State::Terminating;
                    let mut request = BytesMut::new();
                    frontend::terminate(&mut request);
-                    let request = FrontendMessage::Raw(request.freeze());
-
-                    Pin::new(&mut self.stream)
-                        .start_send(request)
-                        .map_err(Error::io)?;
-
-                    trace!("poll_write: sent eof, closing");
-                    trace!("poll_write: done");
-                    return Poll::Ready(Ok(WriteReady::Terminating));
+                    RequestMessages::Single(FrontendMessage::Raw(request.freeze()))
                }
-                // No more messages from the client, but there are still some responses to wait for.
                Poll::Ready(None) => {
                    trace!(
                        "poll_write: at eof, pending responses {}",
                        self.responses.len()
                    );
-                    ready!(self.poll_flush(cx))?;
-                    return Poll::Ready(Ok(WriteReady::WaitingOnRead));
+                    return Ok(true);
                }
-                // Still waiting for a message from the client.
                Poll::Pending => {
                    trace!("poll_write: waiting on request");
-                    ready!(self.poll_flush(cx))?;
-                    return Poll::Pending;
+                    return Ok(true);
+                }
+            };
+
+            match request {
+                RequestMessages::Single(request) => {
+                    Pin::new(&mut self.stream)
+                        .start_send(request)
+                        .map_err(Error::io)?;
+                    if self.state == State::Terminating {
+                        trace!("poll_write: sent eof, closing");
+                        self.state = State::Closing;
+                    }
                }
            }
        }
    }

-    fn poll_flush(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+    fn poll_flush(&mut self, cx: &mut Context<'_>) -> Result<(), Error> {
        match Pin::new(&mut self.stream)
            .poll_flush(cx)
            .map_err(Error::io)?
        {
-            Poll::Ready(()) => {
-                trace!("poll_flush: flushed");
-                Poll::Ready(Ok(()))
-            }
-            Poll::Pending => {
-                trace!("poll_flush: waiting on socket");
-                Poll::Pending
-            }
+            Poll::Ready(()) => trace!("poll_flush: flushed"),
+            Poll::Pending => trace!("poll_flush: waiting on socket"),
        }
+        Ok(())
    }

    fn poll_shutdown(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
+        if self.state != State::Closing {
+            return Poll::Pending;
+        }
+
        match Pin::new(&mut self.stream)
            .poll_close(cx)
            .map_err(Error::io)?
@@ -292,30 +289,18 @@ where
        &mut self,
        cx: &mut Context<'_>,
    ) -> Poll<Option<Result<AsyncMessage, Error>>> {
-        if self.state != State::Closing {
-            // if the state is still active, try read from and write to postgres.
-            let message = self.poll_read(cx)?;
-            let closing = self.poll_write(cx)?;
-            if let Poll::Ready(WriteReady::Terminating) = closing {
-                self.state = State::Closing;
-            }
-
-            if let Poll::Ready(message) = message {
-                return Poll::Ready(Some(Ok(message)));
-            }
-
-            // poll_read returned Pending.
-            // poll_write returned Pending or Ready(WriteReady::WaitingOnRead).
-            // if poll_write returned Ready(WriteReady::WaitingOnRead), then we are waiting to read more data from postgres.
-            if self.state != State::Closing {
-                return Poll::Pending;
-            }
+        let message = self.poll_read(cx)?;
+        let want_flush = self.poll_write(cx)?;
+        if want_flush {
+            self.poll_flush(cx)?;
        }
-
-        match self.poll_shutdown(cx) {
-            Poll::Ready(Ok(())) => Poll::Ready(None),
-            Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
-            Poll::Pending => Poll::Pending,
+        match message {
+            Some(message) => Poll::Ready(Some(Ok(message))),
+            None => match self.poll_shutdown(cx) {
+                Poll::Ready(Ok(())) => Poll::Ready(None),
+                Poll::Ready(Err(e)) => Poll::Ready(Some(Err(e))),
+                Poll::Pending => Poll::Pending,
+            },
        }
    }
 }
--- a/libs/proxy/tokio-postgres2/src/lib.rs
+++ b/libs/proxy/tokio-postgres2/src/lib.rs
@@ -1,5 +1,5 @@
 //! An asynchronous, pipelined, PostgreSQL client.
-#![warn(clippy::all)]
+#![warn(rust_2018_idioms, clippy::all)]

 pub use crate::cancel_token::CancelToken;
 pub use crate::client::{Client, SocketConfig};
--- a/libs/proxy/tokio-postgres2/src/to_statement.rs
+++ b/libs/proxy/tokio-postgres2/src/to_statement.rs
@@ -11,7 +11,7 @@ mod private {
        Query(&'a str),
    }

-    impl ToStatementType<'_> {
+    impl<'a> ToStatementType<'a> {
        pub async fn into_statement(self, client: &Client) -> Result<Statement, Error> {
            match self {
                ToStatementType::Statement(s) => Ok(s.clone()),
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -18,7 +18,6 @@ camino = { workspace = true, features = ["serde1"] }
 humantime-serde.workspace = true
 hyper = { workspace = true, features = ["client"] }
 futures.workspace = true
-reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -8,7 +8,6 @@ use std::io;
 use std::num::NonZeroU32;
 use std::pin::Pin;
 use std::str::FromStr;
-use std::sync::Arc;
 use std::time::Duration;
 use std::time::SystemTime;

@@ -16,8 +15,6 @@ use super::REMOTE_STORAGE_PREFIX_SEPARATOR;
 use anyhow::Context;
 use anyhow::Result;
 use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range};
-use azure_core::HttpClient;
-use azure_core::TransportOptions;
 use azure_core::{Continuable, RetryOptions};
 use azure_storage::StorageCredentials;
 use azure_storage_blobs::blob::CopyStatus;
@@ -83,13 +80,8 @@ impl AzureBlobStorage {
            StorageCredentials::token_credential(token_credential)
        };

-        let builder = ClientBuilder::new(account, credentials)
-            // we have an outer retry
-            .retry(RetryOptions::none())
-            // Customize transport to configure conneciton pooling
-            .transport(TransportOptions::new(Self::reqwest_client(
-                azure_config.conn_pool_size,
-            )));
+        // we have an outer retry
+        let builder = ClientBuilder::new(account, credentials).retry(RetryOptions::none());

        let client = builder.container_client(azure_config.container_name.to_owned());

@@ -114,14 +106,6 @@ impl AzureBlobStorage {
        })
    }

-    fn reqwest_client(conn_pool_size: usize) -> Arc<dyn HttpClient> {
-        let client = reqwest::ClientBuilder::new()
-            .pool_max_idle_per_host(conn_pool_size)
-            .build()
-            .expect("failed to build `reqwest` client");
-        Arc::new(client)
-    }
-
    pub fn relative_path_to_name(&self, path: &RemotePath) -> String {
        assert_eq!(std::path::MAIN_SEPARATOR, REMOTE_STORAGE_PREFIX_SEPARATOR);
        let path_string = path.get_path().as_str();
@@ -560,9 +544,9 @@ impl RemoteStorage for AzureBlobStorage {
            .await
    }

-    async fn delete_objects(
+    async fn delete_objects<'a>(
        &self,
-        paths: &[RemotePath],
+        paths: &'a [RemotePath],
        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        let kind = RequestKind::Delete;
--- a/libs/remote_storage/src/config.rs
+++ b/libs/remote_storage/src/config.rs
@@ -114,16 +114,6 @@ fn default_max_keys_per_list_response() -> Option<i32> {
    DEFAULT_MAX_KEYS_PER_LIST_RESPONSE
 }

-fn default_azure_conn_pool_size() -> usize {
-    // Conservative default: no connection pooling.  At time of writing this is the Azure
-    // SDK's default as well, due to historic reports of hard-to-reproduce issues
-    // (https://github.com/hyperium/hyper/issues/2312)
-    //
-    // However, using connection pooling is important to avoid exhausting client ports when
-    // doing huge numbers of requests (https://github.com/neondatabase/cloud/issues/20971)
-    0
-}
-
 impl Debug for S3Config {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("S3Config")
@@ -156,8 +146,6 @@ pub struct AzureConfig {
    pub concurrency_limit: NonZeroUsize,
    #[serde(default = "default_max_keys_per_list_response")]
    pub max_keys_per_list_response: Option<i32>,
-    #[serde(default = "default_azure_conn_pool_size")]
-    pub conn_pool_size: usize,
 }

 fn default_remote_storage_azure_concurrency_limit() -> NonZeroUsize {
@@ -314,7 +302,6 @@ timeout = '5s'";
    container_region = 'westeurope'
    upload_storage_class = 'INTELLIGENT_TIERING'
    timeout = '7s'
-    conn_pool_size = 8
    ";

        let config = parse(toml).unwrap();
@@ -329,7 +316,6 @@ timeout = '5s'";
                    prefix_in_container: None,
                    concurrency_limit: default_remote_storage_azure_concurrency_limit(),
                    max_keys_per_list_response: DEFAULT_MAX_KEYS_PER_LIST_RESPONSE,
-                    conn_pool_size: 8,
                }),
                timeout: Duration::from_secs(7),
                small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -341,9 +341,9 @@ pub trait RemoteStorage: Send + Sync + 'static {
    /// If the operation fails because of timeout or cancellation, the root cause of the error will be
    /// set to `TimeoutOrCancel`. In such situation it is unknown which deletions, if any, went
    /// through.
-    async fn delete_objects(
+    async fn delete_objects<'a>(
        &self,
-        paths: &[RemotePath],
+        paths: &'a [RemotePath],
        cancel: &CancellationToken,
    ) -> anyhow::Result<()>;

--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -562,9 +562,9 @@ impl RemoteStorage for LocalFs {
        }
    }

-    async fn delete_objects(
+    async fn delete_objects<'a>(
        &self,
-        paths: &[RemotePath],
+        paths: &'a [RemotePath],
        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        for path in paths {
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -813,9 +813,9 @@ impl RemoteStorage for S3Bucket {
        .await
    }

-    async fn delete_objects(
+    async fn delete_objects<'a>(
        &self,
-        paths: &[RemotePath],
+        paths: &'a [RemotePath],
        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        let kind = RequestKind::Delete;
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -181,9 +181,9 @@ impl RemoteStorage for UnreliableWrapper {
        self.delete_inner(path, true, cancel).await
    }

-    async fn delete_objects(
+    async fn delete_objects<'a>(
        &self,
-        paths: &[RemotePath],
+        paths: &'a [RemotePath],
        cancel: &CancellationToken,
    ) -> anyhow::Result<()> {
        self.attempt(RemoteOp::DeleteObjects(paths.to_vec()))?;
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -218,7 +218,6 @@ async fn create_azure_client(
            prefix_in_container: Some(format!("test_{millis}_{random:08x}/")),
            concurrency_limit: NonZeroUsize::new(100).unwrap(),
            max_keys_per_list_response,
-            conn_pool_size: 8,
        }),
        timeout: RemoteStorageConfig::DEFAULT_TIMEOUT,
        small_timeout: RemoteStorageConfig::DEFAULT_SMALL_TIMEOUT,
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -15,20 +15,17 @@ arc-swap.workspace = true
 sentry.workspace = true
 async-compression.workspace = true
 anyhow.workspace = true
-backtrace.workspace = true
 bincode.workspace = true
 bytes.workspace = true
 camino.workspace = true
 chrono.workspace = true
 diatomic-waker.workspace = true
-flate2.workspace = true
 git-version.workspace = true
 hex = { workspace = true, features = ["serde"] }
 humantime.workspace = true
 hyper0 = { workspace = true, features = ["full"] }
-itertools.workspace = true
 fail.workspace = true
-futures = { workspace = true }
+futures = { workspace = true}
 jemalloc_pprof.workspace = true
 jsonwebtoken.workspace = true
 nix.workspace = true
--- a/libs/utils/src/failpoint_support.rs
+++ b/libs/utils/src/failpoint_support.rs
@@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use tracing::*;

-/// Declare a failpoint that can use to `pause` failpoint action.
+/// Declare a failpoint that can use the `pause` failpoint action.
 /// We don't want to block the executor thread, hence, spawn_blocking + await.
 #[macro_export]
 macro_rules! pausable_failpoint {
@@ -181,7 +181,7 @@ pub async fn failpoints_handler(
 ) -> Result<Response<Body>, ApiError> {
    if !fail::has_failpoints() {
        return Err(ApiError::BadRequest(anyhow::anyhow!(
-            "Cannot manage failpoints because neon was compiled without failpoints support"
+            "Cannot manage failpoints because storage was compiled without failpoints support"
        )));
    }

--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,22 +1,15 @@
 use crate::auth::{AuthError, Claims, SwappableJwtAuth};
 use crate::http::error::{api_error_handler, route_error_handler, ApiError};
 use crate::http::request::{get_query_param, parse_query_param};
-use crate::pprof;
-use ::pprof::protos::Message as _;
-use ::pprof::ProfilerGuardBuilder;
 use anyhow::{anyhow, Context};
-use bytes::{Bytes, BytesMut};
 use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION};
 use hyper::http::HeaderValue;
 use hyper::Method;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response};
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
 use once_cell::sync::Lazy;
-use regex::Regex;
 use routerify::ext::RequestExt;
 use routerify::{Middleware, RequestInfo, Router, RouterBuilder};
-use tokio::sync::{mpsc, Mutex};
-use tokio_stream::wrappers::ReceiverStream;
 use tokio_util::io::ReaderStream;
 use tracing::{debug, info, info_span, warn, Instrument};

@@ -25,6 +18,11 @@ use std::io::Write as _;
 use std::str::FromStr;
 use std::time::Duration;

+use bytes::{Bytes, BytesMut};
+use pprof::protos::Message as _;
+use tokio::sync::{mpsc, Mutex};
+use tokio_stream::wrappers::ReceiverStream;
+
 static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
        "libmetrics_metric_handler_requests_total",
@@ -367,7 +365,7 @@ pub async fn profile_cpu_handler(req: Request<Body>) -> Result<Response<Body>, A

    // Take the profile.
    let report = tokio::task::spawn_blocking(move || {
-        let guard = ProfilerGuardBuilder::default()
+        let guard = pprof::ProfilerGuardBuilder::default()
            .frequency(frequency_hz)
            .blocklist(&["libc", "libgcc", "pthread", "vdso"])
            .build()?;
@@ -459,34 +457,10 @@ pub async fn profile_heap_handler(req: Request<Body>) -> Result<Response<Body>,
        }

        Format::Pprof => {
-            let data = tokio::task::spawn_blocking(move || {
-                let bytes = prof_ctl.dump_pprof()?;
-                // Symbolize the profile.
-                // TODO: consider moving this upstream to jemalloc_pprof and avoiding the
-                // serialization roundtrip.
-                static STRIP_FUNCTIONS: Lazy<Vec<(Regex, bool)>> = Lazy::new(|| {
-                    // Functions to strip from profiles. If true, also remove child frames.
-                    vec![
-                        (Regex::new("^__rust").unwrap(), false),
-                        (Regex::new("^_start$").unwrap(), false),
-                        (Regex::new("^irallocx_prof").unwrap(), true),
-                        (Regex::new("^prof_alloc_prep").unwrap(), true),
-                        (Regex::new("^std::rt::lang_start").unwrap(), false),
-                        (Regex::new("^std::sys::backtrace::__rust").unwrap(), false),
-                    ]
-                });
-                let profile = pprof::decode(&bytes)?;
-                let profile = pprof::symbolize(profile)?;
-                let profile = pprof::strip_locations(
-                    profile,
-                    &["libc", "libgcc", "pthread", "vdso"],
-                    &STRIP_FUNCTIONS,
-                );
-                pprof::encode(&profile)
-            })
-            .await
-            .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
-            .map_err(ApiError::InternalServerError)?;
+            let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof())
+                .await
+                .map_err(|join_err| ApiError::InternalServerError(join_err.into()))?
+                .map_err(ApiError::InternalServerError)?;
            Response::builder()
                .status(200)
                .header(CONTENT_TYPE, "application/octet-stream")
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -96,8 +96,6 @@ pub mod circuit_breaker;

 pub mod try_rcu;

-pub mod pprof;
-
 // Re-export used in macro. Avoids adding git-version as dep in target crates.
 #[doc(hidden)]
 pub use git_version;
--- a/libs/utils/src/pprof.rs
+++ b/libs/utils/src/pprof.rs
@@ -1,190 +0,0 @@
-use flate2::write::{GzDecoder, GzEncoder};
-use flate2::Compression;
-use itertools::Itertools as _;
-use once_cell::sync::Lazy;
-use pprof::protos::{Function, Line, Message as _, Profile};
-use regex::Regex;
-
-use std::borrow::Cow;
-use std::collections::{HashMap, HashSet};
-use std::ffi::c_void;
-use std::io::Write as _;
-
-/// Decodes a gzip-compressed Protobuf-encoded pprof profile.
-pub fn decode(bytes: &[u8]) -> anyhow::Result<Profile> {
-    let mut gz = GzDecoder::new(Vec::new());
-    gz.write_all(bytes)?;
-    Ok(Profile::parse_from_bytes(&gz.finish()?)?)
-}
-
-/// Encodes a pprof profile as gzip-compressed Protobuf.
-pub fn encode(profile: &Profile) -> anyhow::Result<Vec<u8>> {
-    let mut gz = GzEncoder::new(Vec::new(), Compression::default());
-    profile.write_to_writer(&mut gz)?;
-    Ok(gz.finish()?)
-}
-
-/// Symbolizes a pprof profile using the current binary.
-pub fn symbolize(mut profile: Profile) -> anyhow::Result<Profile> {
-    if !profile.function.is_empty() {
-        return Ok(profile); // already symbolized
-    }
-
-    // Collect function names.
-    let mut functions: HashMap<String, Function> = HashMap::new();
-    let mut strings: HashMap<String, i64> = profile
-        .string_table
-        .into_iter()
-        .enumerate()
-        .map(|(i, s)| (s, i as i64))
-        .collect();
-
-    // Helper to look up or register a string.
-    let mut string_id = |s: &str| -> i64 {
-        // Don't use .entry() to avoid unnecessary allocations.
-        if let Some(id) = strings.get(s) {
-            return *id;
-        }
-        let id = strings.len() as i64;
-        strings.insert(s.to_string(), id);
-        id
-    };
-
-    for loc in &mut profile.location {
-        if !loc.line.is_empty() {
-            continue;
-        }
-
-        // Resolve the line and function for each location.
-        backtrace::resolve(loc.address as *mut c_void, |symbol| {
-            let Some(symname) = symbol.name() else {
-                return;
-            };
-            let mut name = symname.to_string();
-
-            // Strip the Rust monomorphization suffix from the symbol name.
-            static SUFFIX_REGEX: Lazy<Regex> =
-                Lazy::new(|| Regex::new("::h[0-9a-f]{16}$").expect("invalid regex"));
-            if let Some(m) = SUFFIX_REGEX.find(&name) {
-                name.truncate(m.start());
-            }
-
-            let function_id = match functions.get(&name) {
-                Some(function) => function.id,
-                None => {
-                    let id = functions.len() as u64 + 1;
-                    let system_name = String::from_utf8_lossy(symname.as_bytes());
-                    let filename = symbol
-                        .filename()
-                        .map(|path| path.to_string_lossy())
-                        .unwrap_or(Cow::Borrowed(""));
-                    let function = Function {
-                        id,
-                        name: string_id(&name),
-                        system_name: string_id(&system_name),
-                        filename: string_id(&filename),
-                        ..Default::default()
-                    };
-                    functions.insert(name, function);
-                    id
-                }
-            };
-            loc.line.push(Line {
-                function_id,
-                line: symbol.lineno().unwrap_or(0) as i64,
-                ..Default::default()
-            });
-        });
-    }
-
-    // Store the resolved functions, and mark the mapping as resolved.
-    profile.function = functions.into_values().sorted_by_key(|f| f.id).collect();
-    profile.string_table = strings
-        .into_iter()
-        .sorted_by_key(|(_, i)| *i)
-        .map(|(s, _)| s)
-        .collect();
-
-    for mapping in &mut profile.mapping {
-        mapping.has_functions = true;
-        mapping.has_filenames = true;
-    }
-
-    Ok(profile)
-}
-
-/// Strips locations (stack frames) matching the given mappings (substring) or function names
-/// (regex). The function bool specifies whether child frames should be stripped as well.
-///
-/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all
-/// string references.
-pub fn strip_locations(
-    mut profile: Profile,
-    mappings: &[&str],
-    functions: &[(Regex, bool)],
-) -> Profile {
-    // Strip mappings.
-    let mut strip_mappings: HashSet<u64> = HashSet::new();
-
-    profile.mapping.retain(|mapping| {
-        let Some(name) = profile.string_table.get(mapping.filename as usize) else {
-            return true;
-        };
-        if mappings.iter().any(|substr| name.contains(substr)) {
-            strip_mappings.insert(mapping.id);
-            return false;
-        }
-        true
-    });
-
-    // Strip functions.
-    let mut strip_functions: HashMap<u64, bool> = HashMap::new();
-
-    profile.function.retain(|function| {
-        let Some(name) = profile.string_table.get(function.name as usize) else {
-            return true;
-        };
-        for (regex, strip_children) in functions {
-            if regex.is_match(name) {
-                strip_functions.insert(function.id, *strip_children);
-                return false;
-            }
-        }
-        true
-    });
-
-    // Strip locations. The bool specifies whether child frames should be stripped too.
-    let mut strip_locations: HashMap<u64, bool> = HashMap::new();
-
-    profile.location.retain(|location| {
-        for line in &location.line {
-            if let Some(strip_children) = strip_functions.get(&line.function_id) {
-                strip_locations.insert(location.id, *strip_children);
-                return false;
-            }
-        }
-        if strip_mappings.contains(&location.mapping_id) {
-            strip_locations.insert(location.id, false);
-            return false;
-        }
-        true
-    });
-
-    // Strip sample locations.
-    for sample in &mut profile.sample {
-        // First, find the uppermost function with child removal and truncate the stack.
-        if let Some(truncate) = sample
-            .location_id
-            .iter()
-            .rposition(|id| strip_locations.get(id) == Some(&true))
-        {
-            sample.location_id.drain(..=truncate);
-        }
-        // Next, strip any individual frames without child removal.
-        sample
-            .location_id
-            .retain(|id| !strip_locations.contains_key(id));
-    }
-
-    profile
-}
--- a/pageserver/compaction/src/compact_tiered.rs
+++ b/pageserver/compaction/src/compact_tiered.rs
@@ -272,7 +272,7 @@ struct CompactionJob<E: CompactionJobExecutor> {
    completed: bool,
 }

-impl<E> LevelCompactionState<'_, E>
+impl<'a, E> LevelCompactionState<'a, E>
 where
    E: CompactionJobExecutor,
 {
--- a/pageserver/compaction/src/identify_levels.rs
+++ b/pageserver/compaction/src/identify_levels.rs
@@ -224,8 +224,9 @@ impl<L> Level<L> {
            }

            // recalculate depth if this was the last event at this point
-            let more_events_at_this_key =
-                events_iter.peek().is_some_and(|next_e| next_e.key == e.key);
+            let more_events_at_this_key = events_iter
+                .peek()
+                .map_or(false, |next_e| next_e.key == e.key);
            if !more_events_at_this_key {
                let mut active_depth = 0;
                for (_end_lsn, is_image, _idx) in active_set.iter().rev() {
--- a/pageserver/compaction/src/interface.rs
+++ b/pageserver/compaction/src/interface.rs
@@ -148,7 +148,7 @@ pub trait CompactionDeltaLayer<E: CompactionJobExecutor + ?Sized>: CompactionLay
        Self: 'a;

    /// Return all keys in this delta layer.
-    fn load_keys(
+    fn load_keys<'a>(
        &self,
        ctx: &E::RequestContext,
    ) -> impl Future<Output = anyhow::Result<Vec<Self::DeltaEntry<'_>>>> + Send;
--- a/pageserver/compaction/src/simulator.rs
+++ b/pageserver/compaction/src/simulator.rs
@@ -143,7 +143,7 @@ impl interface::CompactionLayer<Key> for Arc<MockDeltaLayer> {
 impl interface::CompactionDeltaLayer<MockTimeline> for Arc<MockDeltaLayer> {
    type DeltaEntry<'a> = MockRecord;

-    async fn load_keys(&self, _ctx: &MockRequestContext) -> anyhow::Result<Vec<MockRecord>> {
+    async fn load_keys<'a>(&self, _ctx: &MockRequestContext) -> anyhow::Result<Vec<MockRecord>> {
        Ok(self.records.clone())
    }
 }
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -248,7 +248,7 @@ where
    }
 }

-impl<W> Basebackup<'_, W>
+impl<'a, W> Basebackup<'a, W>
 where
    W: AsyncWrite + Send + Sync + Unpin,
 {
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -53,12 +53,10 @@ project_build_tag!(BUILD_TAG);
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;

-// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
-// TODO: disabled because concurrent CPU profiles cause seg faults. See:
-// https://github.com/neondatabase/neon/issues/10225.
-//#[allow(non_upper_case_globals)]
-//#[export_name = "malloc_conf"]
-//pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";
+/// Configure jemalloc to sample allocations for profiles every 1 MB (1 << 20).
+#[allow(non_upper_case_globals)]
+#[export_name = "malloc_conf"]
+pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0";

 const PID_FILE_NAME: &str = "pageserver.pid";

--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -97,8 +97,8 @@ use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError};
 use crate::DEFAULT_PG_VERSION;
 use crate::{disk_usage_eviction_task, tenant};
 use pageserver_api::models::{
-    CompactInfoResponse, StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest,
-    TimelineGcRequest, TimelineInfo,
+    StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest,
+    TimelineInfo,
 };
 use utils::{
    auth::SwappableJwtAuth,
@@ -2039,34 +2039,6 @@ async fn timeline_cancel_compact_handler(
    .await
 }

-// Get compact info of a timeline
-async fn timeline_compact_info_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let state = get_state(&request);
-    async {
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
-        let res = tenant.get_scheduled_compaction_tasks(timeline_id);
-        let mut resp = Vec::new();
-        for item in res {
-            resp.push(CompactInfoResponse {
-                compact_key_range: item.compact_key_range,
-                compact_lsn_range: item.compact_lsn_range,
-                sub_compaction: item.sub_compaction,
-            });
-        }
-        json_response(StatusCode::OK, resp)
-    }
-    .instrument(info_span!("timeline_compact_info", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
-    .await
-}
-
 // Run compaction immediately on given timeline.
 async fn timeline_compact_handler(
    mut request: Request<Body>,
@@ -3428,10 +3400,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
            |r| api_handler(r, timeline_gc_handler),
        )
-        .get(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
-            |r| api_handler(r, timeline_compact_info_handler),
-        )
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
            |r| api_handler(r, timeline_compact_handler),
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -1242,7 +1242,7 @@ pub struct DatadirModification<'a> {
    pending_metadata_bytes: usize,
 }

-impl DatadirModification<'_> {
+impl<'a> DatadirModification<'a> {
    // When a DatadirModification is committed, we do a monolithic serialization of all its contents.  WAL records can
    // contain multiple pages, so the pageserver's record-based batch size isn't sufficient to bound this allocation: we
    // additionally specify a limit on how much payload a DatadirModification may contain before it should be committed.
@@ -1263,7 +1263,7 @@ impl DatadirModification<'_> {
    pub(crate) fn has_dirty_data(&self) -> bool {
        self.pending_data_batch
            .as_ref()
-            .is_some_and(|b| b.has_data())
+            .map_or(false, |b| b.has_data())
    }

    /// Set the current lsn
@@ -1319,23 +1319,18 @@ impl DatadirModification<'_> {

        let buf: Bytes = SlruSegmentDirectory::ser(&SlruSegmentDirectory::default())?.into();
        let empty_dir = Value::Image(buf);
-
-        // Initialize SLRUs on shard 0 only: creating these on other shards would be
-        // harmless but they'd just be dropped on later compaction.
-        if self.tline.tenant_shard_id.is_shard_zero() {
-            self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
-            self.put(
-                slru_dir_to_key(SlruKind::MultiXactMembers),
-                empty_dir.clone(),
-            );
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
-            self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);
-            self.pending_directory_entries
-                .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0));
-        }
+        self.put(slru_dir_to_key(SlruKind::Clog), empty_dir.clone());
+        self.pending_directory_entries
+            .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
+        self.put(
+            slru_dir_to_key(SlruKind::MultiXactMembers),
+            empty_dir.clone(),
+        );
+        self.pending_directory_entries
+            .push((DirectoryKind::SlruSegment(SlruKind::Clog), 0));
+        self.put(slru_dir_to_key(SlruKind::MultiXactOffsets), empty_dir);
+        self.pending_directory_entries
+            .push((DirectoryKind::SlruSegment(SlruKind::MultiXactOffsets), 0));

        Ok(())
    }
@@ -2230,7 +2225,7 @@ impl DatadirModification<'_> {
                assert!(!self
                    .pending_data_batch
                    .as_ref()
-                    .is_some_and(|b| b.updates_key(&key)));
+                    .map_or(false, |b| b.updates_key(&key)));
            }
        }

@@ -2299,7 +2294,7 @@ pub enum Version<'a> {
    Modified(&'a DatadirModification<'a>),
 }

-impl Version<'_> {
+impl<'a> Version<'a> {
    async fn get(
        &self,
        timeline: &Timeline,
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -2604,15 +2604,9 @@ impl Tenant {
                WaitCompletionError::NotInitialized(
                    e, // If the queue is already stopped, it's a shutdown error.
                ) if e.is_stopping() => CreateTimelineError::ShuttingDown,
-                WaitCompletionError::NotInitialized(_) => {
-                    // This is a bug: we should never try to wait for uploads before initializing the timeline
-                    debug_assert!(false);
-                    CreateTimelineError::Other(anyhow::anyhow!("timeline not initialized"))
-                }
-                WaitCompletionError::UploadQueueShutDownOrStopped => {
-                    CreateTimelineError::ShuttingDown
-                }
-            })?;
+                e => CreateTimelineError::Other(e.into()),
+            })
+            .context("wait for timeline initial uploads to complete")?;

        // The creating task is responsible for activating the timeline.
        // We do this after `wait_completion()` so that we don't spin up tasks that start
@@ -3128,23 +3122,6 @@ impl Tenant {
        }
    }

-    pub(crate) fn get_scheduled_compaction_tasks(
-        &self,
-        timeline_id: TimelineId,
-    ) -> Vec<CompactOptions> {
-        use itertools::Itertools;
-        let guard = self.scheduled_compaction_tasks.lock().unwrap();
-        guard
-            .get(&timeline_id)
-            .map(|tline_pending_tasks| {
-                tline_pending_tasks
-                    .iter()
-                    .map(|x| x.options.clone())
-                    .collect_vec()
-            })
-            .unwrap_or_default()
-    }
-
    /// Schedule a compaction task for a timeline.
    pub(crate) async fn schedule_compaction(
        &self,
@@ -4488,17 +4465,13 @@ impl Tenant {
        let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
            HashMap::with_capacity(timelines.len());

-        // Ensures all timelines use the same start time when computing the time cutoff.
-        let now_ts_for_pitr_calc = SystemTime::now();
        for timeline in timelines.iter() {
            let cutoff = timeline
                .get_last_record_lsn()
                .checked_sub(horizon)
                .unwrap_or(Lsn(0));

-            let cutoffs = timeline
-                .find_gc_cutoffs(now_ts_for_pitr_calc, cutoff, pitr, cancel, ctx)
-                .await?;
+            let cutoffs = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await?;
            let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
            assert!(old.is_none());
        }
@@ -5786,13 +5759,13 @@ mod tests {
    use timeline::{CompactOptions, DeltaLayerTestDesc};
    use utils::id::TenantId;

-    #[cfg(feature = "testing")]
-    use models::CompactLsnRange;
    #[cfg(feature = "testing")]
    use pageserver_api::record::NeonWalRecord;
    #[cfg(feature = "testing")]
    use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn};
    #[cfg(feature = "testing")]
+    use timeline::CompactLsnRange;
+    #[cfg(feature = "testing")]
    use timeline::GcInfo;

    static TEST_KEY: Lazy<Key> =
@@ -9661,7 +9634,7 @@ mod tests {
    #[cfg(feature = "testing")]
    #[tokio::test]
    async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> {
-        use models::CompactLsnRange;
+        use timeline::CompactLsnRange;

        let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?;
        let (tenant, ctx) = harness.load().await;
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -35,7 +35,7 @@ pub struct CompressionInfo {
    pub compressed_size: Option<usize>,
 }

-impl BlockCursor<'_> {
+impl<'a> BlockCursor<'a> {
    /// Read a blob into a new buffer.
    pub async fn read_blob(
        &self,
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -89,7 +89,7 @@ pub(crate) enum BlockReaderRef<'a> {
    VirtualFile(&'a VirtualFile),
 }

-impl BlockReaderRef<'_> {
+impl<'a> BlockReaderRef<'a> {
    #[inline(always)]
    async fn read_blk(
        &self,
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -1,15 +1,12 @@
 use std::collections::BTreeSet;

 use itertools::Itertools;
-use pageserver_compaction::helpers::overlaps_with;

 use super::storage_layer::LayerName;

 /// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
 ///
-/// The function implements a fast path check and a slow path check.
-///
-/// The fast path checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
+/// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
 ///
 /// ```plain
 /// |       |                 |       |
@@ -28,47 +25,31 @@ use super::storage_layer::LayerName;
 /// |       |    |   4   |    |       |
 ///
 /// If layer 2 and 4 contain the same single key, this is also a valid layer map.
-///
-/// However, if a partial compaction is still going on, it is possible that we get a layer map not satisfying the above condition.
-/// Therefore, we fallback to simply check if any of the two delta layers overlap. (See "A slow path...")
 pub fn check_valid_layermap(metadata: &[LayerName]) -> Option<String> {
    let mut lsn_split_point = BTreeSet::new(); // TODO: use a better data structure (range tree / range set?)
    let mut all_delta_layers = Vec::new();
    for name in metadata {
        if let LayerName::Delta(layer) = name {
-            all_delta_layers.push(layer.clone());
+            if layer.key_range.start.next() != layer.key_range.end {
+                all_delta_layers.push(layer.clone());
+            }
        }
    }
    for layer in &all_delta_layers {
-        if layer.key_range.start.next() != layer.key_range.end {
-            let lsn_range = &layer.lsn_range;
-            lsn_split_point.insert(lsn_range.start);
-            lsn_split_point.insert(lsn_range.end);
-        }
+        let lsn_range = &layer.lsn_range;
+        lsn_split_point.insert(lsn_range.start);
+        lsn_split_point.insert(lsn_range.end);
    }
-    for (idx, layer) in all_delta_layers.iter().enumerate() {
-        if layer.key_range.start.next() == layer.key_range.end {
-            continue;
-        }
+    for layer in &all_delta_layers {
        let lsn_range = layer.lsn_range.clone();
        let intersects = lsn_split_point.range(lsn_range).collect_vec();
        if intersects.len() > 1 {
-            // A slow path to check if the layer intersects with any other delta layer.
-            for (other_idx, other_layer) in all_delta_layers.iter().enumerate() {
-                if other_idx == idx {
-                    // do not check self intersects with self
-                    continue;
-                }
-                if overlaps_with(&layer.lsn_range, &other_layer.lsn_range)
-                    && overlaps_with(&layer.key_range, &other_layer.key_range)
-                {
-                    let err = format!(
-                            "layer violates the layer map LSN split assumption: layer {} intersects with layer {}",
-                            layer, other_layer
-                        );
-                    return Some(err);
-                }
-            }
+            let err = format!(
+                "layer violates the layer map LSN split assumption: layer {} intersects with LSN [{}]",
+                layer,
+                intersects.into_iter().map(|lsn| lsn.to_string()).join(", ")
+            );
+            return Some(err);
        }
    }
    None
--- a/pageserver/src/tenant/disk_btree.rs
+++ b/pageserver/src/tenant/disk_btree.rs
@@ -532,7 +532,7 @@ pub struct DiskBtreeIterator<'a> {
    >,
 }

-impl DiskBtreeIterator<'_> {
+impl<'a> DiskBtreeIterator<'a> {
    pub async fn next(&mut self) -> Option<std::result::Result<(Vec<u8>, u64), DiskBtreeError>> {
        self.stream.next().await
    }
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -174,11 +174,11 @@ impl EphemeralFile {
 }

 impl super::storage_layer::inmemory_layer::vectored_dio_read::File for EphemeralFile {
-    async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
-        &self,
+    async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+        &'b self,
        start: u64,
        dst: tokio_epoll_uring::Slice<B>,
-        ctx: &RequestContext,
+        ctx: &'a RequestContext,
    ) -> std::io::Result<(tokio_epoll_uring::Slice<B>, usize)> {
        let submitted_offset = self.buffered_writer.bytes_submitted();

--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -392,8 +392,8 @@ impl LayerMap {
        image_layer: Option<Arc<PersistentLayerDesc>>,
        end_lsn: Lsn,
    ) -> Option<SearchResult> {
-        assert!(delta_layer.as_ref().is_none_or(|l| l.is_delta()));
-        assert!(image_layer.as_ref().is_none_or(|l| !l.is_delta()));
+        assert!(delta_layer.as_ref().map_or(true, |l| l.is_delta()));
+        assert!(image_layer.as_ref().map_or(true, |l| !l.is_delta()));

        match (delta_layer, image_layer) {
            (None, None) => None,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -749,7 +749,7 @@ impl RemoteTimelineClient {
        // ahead of what's _actually_ on the remote during index upload.
        upload_queue.dirty.metadata = metadata.clone();

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue)?;

        Ok(())
    }
@@ -770,7 +770,7 @@ impl RemoteTimelineClient {

        upload_queue.dirty.metadata.apply(update);

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue)?;

        Ok(())
    }
@@ -809,7 +809,7 @@ impl RemoteTimelineClient {
        if let Some(archived_at_set) = need_upload_scheduled {
            let intended_archived_at = archived_at_set.then(|| Utc::now().naive_utc());
            upload_queue.dirty.archived_at = intended_archived_at;
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue)?;
        }

        let need_wait = need_change(&upload_queue.clean.0.archived_at, state).is_some();
@@ -824,7 +824,7 @@ impl RemoteTimelineClient {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;
        upload_queue.dirty.import_pgdata = state;
-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue)?;
        Ok(())
    }

@@ -843,14 +843,17 @@ impl RemoteTimelineClient {
        let upload_queue = guard.initialized_mut()?;

        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue)?;
        }

        Ok(())
    }

    /// Launch an index-file upload operation in the background (internal function)
-    fn schedule_index_upload(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {
+    fn schedule_index_upload(
+        self: &Arc<Self>,
+        upload_queue: &mut UploadQueueInitialized,
+    ) -> Result<(), NotInitialized> {
        let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn();
        // fix up the duplicated field
        upload_queue.dirty.disk_consistent_lsn = disk_consistent_lsn;
@@ -877,6 +880,7 @@ impl RemoteTimelineClient {

        // Launch the task immediately, if possible
        self.launch_queued_tasks(upload_queue);
+        Ok(())
    }

    /// Reparent this timeline to a new parent.
@@ -905,7 +909,7 @@ impl RemoteTimelineClient {
                upload_queue.dirty.metadata.reparent(new_parent);
                upload_queue.dirty.lineage.record_previous_ancestor(&prev);

-                self.schedule_index_upload(upload_queue);
+                self.schedule_index_upload(upload_queue)?;

                Some(self.schedule_barrier0(upload_queue))
            }
@@ -944,7 +948,7 @@ impl RemoteTimelineClient {
                    assert!(prev.is_none(), "copied layer existed already {layer}");
                }

-                self.schedule_index_upload(upload_queue);
+                self.schedule_index_upload(upload_queue)?;

                Some(self.schedule_barrier0(upload_queue))
            }
@@ -1000,7 +1004,7 @@ impl RemoteTimelineClient {
                    upload_queue.dirty.gc_blocking = current
                        .map(|x| x.with_reason(reason))
                        .or_else(|| Some(index::GcBlocking::started_now_for(reason)));
-                    self.schedule_index_upload(upload_queue);
+                    self.schedule_index_upload(upload_queue)?;
                    Some(self.schedule_barrier0(upload_queue))
                }
            }
@@ -1053,7 +1057,8 @@ impl RemoteTimelineClient {
                    upload_queue.dirty.gc_blocking =
                        current.as_ref().and_then(|x| x.without_reason(reason));
                    assert!(wanted(upload_queue.dirty.gc_blocking.as_ref()));
-                    self.schedule_index_upload(upload_queue);
+                    // FIXME: bogus ?
+                    self.schedule_index_upload(upload_queue)?;
                    Some(self.schedule_barrier0(upload_queue))
                }
            }
@@ -1120,8 +1125,8 @@ impl RemoteTimelineClient {
        let mut guard = self.upload_queue.lock().unwrap();
        let upload_queue = guard.initialized_mut()?;

-        let with_metadata =
-            self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned());
+        let with_metadata = self
+            .schedule_unlinking_of_layers_from_index_part0(upload_queue, names.iter().cloned())?;

        self.schedule_deletion_of_unlinked0(upload_queue, with_metadata);

@@ -1148,7 +1153,7 @@ impl RemoteTimelineClient {

        let names = gc_layers.iter().map(|x| x.layer_desc().layer_name());

-        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
+        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?;

        self.launch_queued_tasks(upload_queue);

@@ -1161,7 +1166,7 @@ impl RemoteTimelineClient {
        self: &Arc<Self>,
        upload_queue: &mut UploadQueueInitialized,
        names: I,
-    ) -> Vec<(LayerName, LayerFileMetadata)>
+    ) -> Result<Vec<(LayerName, LayerFileMetadata)>, NotInitialized>
    where
        I: IntoIterator<Item = LayerName>,
    {
@@ -1203,10 +1208,10 @@ impl RemoteTimelineClient {
        // index_part update, because that needs to be uploaded before we can actually delete the
        // files.
        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue)?;
        }

-        with_metadata
+        Ok(with_metadata)
    }

    /// Schedules deletion for layer files which have previously been unlinked from the
@@ -1297,7 +1302,7 @@ impl RemoteTimelineClient {

        let names = compacted_from.iter().map(|x| x.layer_desc().layer_name());

-        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names);
+        self.schedule_unlinking_of_layers_from_index_part0(upload_queue, names)?;
        self.launch_queued_tasks(upload_queue);

        Ok(())
@@ -1943,30 +1948,6 @@ impl RemoteTimelineClient {
                return;
            }

-            // Assert that we don't modify a layer that's referenced by the current index.
-            if cfg!(debug_assertions) {
-                let modified = match &task.op {
-                    UploadOp::UploadLayer(layer, layer_metadata, _) => {
-                        vec![(layer.layer_desc().layer_name(), layer_metadata)]
-                    }
-                    UploadOp::Delete(delete) => {
-                        delete.layers.iter().map(|(n, m)| (n.clone(), m)).collect()
-                    }
-                    // These don't modify layers.
-                    UploadOp::UploadMetadata { .. } => Vec::new(),
-                    UploadOp::Barrier(_) => Vec::new(),
-                    UploadOp::Shutdown => Vec::new(),
-                };
-                if let Ok(queue) = self.upload_queue.lock().unwrap().initialized_mut() {
-                    for (ref name, metadata) in modified {
-                        debug_assert!(
-                            !queue.clean.0.references(name, metadata),
-                            "layer {name} modified while referenced by index",
-                        );
-                    }
-                }
-            }
-
            let upload_result: anyhow::Result<()> = match &task.op {
                UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => {
                    if let Some(OpType::FlushDeletion) = mode {
@@ -2533,21 +2514,6 @@ pub fn remote_layer_path(
    RemotePath::from_string(&path).expect("Failed to construct path")
 }

-/// Returns true if a and b have the same layer path within a tenant/timeline. This is essentially
-/// remote_layer_path(a) == remote_layer_path(b) without the string allocations.
-///
-/// TODO: there should be a variant of LayerName for the physical path that contains information
-/// about the shard and generation, such that this could be replaced by a simple comparison.
-pub fn is_same_remote_layer_path(
-    aname: &LayerName,
-    ameta: &LayerFileMetadata,
-    bname: &LayerName,
-    bmeta: &LayerFileMetadata,
-) -> bool {
-    // NB: don't assert remote_layer_path(a) == remote_layer_path(b); too expensive even for debug.
-    aname == bname && ameta.shard == bmeta.shard && ameta.generation == bmeta.generation
-}
-
 pub fn remote_initdb_archive_path(tenant_id: &TenantId, timeline_id: &TimelineId) -> RemotePath {
    RemotePath::from_string(&format!(
        "tenants/{tenant_id}/{TIMELINES_SEGMENT_NAME}/{timeline_id}/{INITDB_PATH}"
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -145,8 +145,8 @@ pub async fn download_layer_file<'a>(
 ///
 /// If Err() is returned, there was some error. The file at `dst_path` has been unlinked.
 /// The unlinking has _not_ been made durable.
-async fn download_object(
-    storage: &GenericRemoteStorage,
+async fn download_object<'a>(
+    storage: &'a GenericRemoteStorage,
    src_path: &RemotePath,
    dst_path: &Utf8PathBuf,
    #[cfg_attr(target_os = "macos", allow(unused_variables))] gate: &utils::sync::gate::Gate,
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -8,14 +8,14 @@ use std::collections::HashMap;
 use chrono::NaiveDateTime;
 use pageserver_api::models::AuxFilePolicy;
 use serde::{Deserialize, Serialize};
+use utils::id::TimelineId;

-use super::is_same_remote_layer_path;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerName;
 use crate::tenant::timeline::import_pgdata;
 use crate::tenant::Generation;
 use pageserver_api::shard::ShardIndex;
-use utils::id::TimelineId;
+
 use utils::lsn::Lsn;

 /// In-memory representation of an `index_part.json` file
@@ -45,8 +45,10 @@ pub struct IndexPart {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub import_pgdata: Option<import_pgdata::index_part_format::Root>,

-    /// Layer filenames and metadata. For an index persisted in remote storage, all layers must
-    /// exist in remote storage.
+    /// Per layer file name metadata, which can be present for a present or missing layer file.
+    ///
+    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
+    /// that latest version stores.
    pub layer_metadata: HashMap<LayerName, LayerFileMetadata>,

    /// Because of the trouble of eyeballing the legacy "metadata" field, we copied the
@@ -141,17 +143,6 @@ impl IndexPart {
    pub(crate) fn example() -> Self {
        Self::empty(TimelineMetadata::example())
    }
-
-    /// Returns true if the index contains a reference to the given layer (i.e. file path).
-    ///
-    /// TODO: there should be a variant of LayerName for the physical remote path that contains
-    /// information about the shard and generation, to avoid passing in metadata.
-    pub fn references(&self, name: &LayerName, metadata: &LayerFileMetadata) -> bool {
-        let Some(index_metadata) = self.layer_metadata.get(name) else {
-            return false;
-        };
-        is_same_remote_layer_path(name, metadata, name, index_metadata)
-    }
 }

 /// Metadata gathered for each of the layer files.
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -25,8 +25,8 @@ use utils::id::{TenantId, TimelineId};
 use tracing::info;

 /// Serializes and uploads the given index part data to the remote storage.
-pub(crate) async fn upload_index_part(
-    storage: &GenericRemoteStorage,
+pub(crate) async fn upload_index_part<'a>(
+    storage: &'a GenericRemoteStorage,
    tenant_shard_id: &TenantShardId,
    timeline_id: &TimelineId,
    generation: Generation,
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -345,7 +345,10 @@ impl LayerFringe {
    }

    pub(crate) fn next_layer(&mut self) -> Option<(ReadableLayer, KeySpace, Range<Lsn>)> {
-        let read_desc = self.planned_visits_by_lsn.pop()?;
+        let read_desc = match self.planned_visits_by_lsn.pop() {
+            Some(desc) => desc,
+            None => return None,
+        };

        let removed = self.visit_reads.remove_entry(&read_desc.layer_to_visit_id);

--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -1486,7 +1486,7 @@ pub struct ValueRef<'a> {
    layer: &'a DeltaLayerInner,
 }

-impl ValueRef<'_> {
+impl<'a> ValueRef<'a> {
    /// Loads the value from disk
    pub async fn load(&self, ctx: &RequestContext) -> Result<Value> {
        let buf = self.load_raw(ctx).await?;
@@ -1543,7 +1543,7 @@ pub struct DeltaLayerIterator<'a> {
    is_end: bool,
 }

-impl DeltaLayerIterator<'_> {
+impl<'a> DeltaLayerIterator<'a> {
    pub(crate) fn layer_dbg_info(&self) -> String {
        self.delta_layer.layer_dbg_info()
    }
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -1052,7 +1052,7 @@ pub struct ImageLayerIterator<'a> {
    is_end: bool,
 }

-impl ImageLayerIterator<'_> {
+impl<'a> ImageLayerIterator<'a> {
    pub(crate) fn layer_dbg_info(&self) -> String {
        self.image_layer.layer_dbg_info()
    }
--- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs
@@ -25,11 +25,11 @@ pub trait File: Send {
    /// [`std::io::ErrorKind::UnexpectedEof`] error if the file is shorter than `start+dst.len()`.
    ///
    /// No guarantees are made about the remaining bytes in `dst` in case of a short read.
-    async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
-        &self,
+    async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+        &'b self,
        start: u64,
        dst: Slice<B>,
-        ctx: &RequestContext,
+        ctx: &'a RequestContext,
    ) -> std::io::Result<(Slice<B>, usize)>;
 }

@@ -479,11 +479,11 @@ mod tests {
    }

    impl File for InMemoryFile {
-        async fn read_exact_at_eof_ok<B: IoBufMut + Send>(
-            &self,
+        async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>(
+            &'b self,
            start: u64,
            mut dst: Slice<B>,
-            _ctx: &RequestContext,
+            _ctx: &'a RequestContext,
        ) -> std::io::Result<(Slice<B>, usize)> {
            let dst_slice: &mut [u8] = dst.as_mut_rust_slice_full_zeroed();
            let nread = {
@@ -609,12 +609,12 @@ mod tests {
        }
    }

-    impl File for RecorderFile<'_> {
-        async fn read_exact_at_eof_ok<B: IoBufAlignedMut + Send>(
-            &self,
+    impl<'x> File for RecorderFile<'x> {
+        async fn read_exact_at_eof_ok<'a, 'b, B: IoBufAlignedMut + Send>(
+            &'b self,
            start: u64,
            dst: Slice<B>,
-            ctx: &RequestContext,
+            ctx: &'a RequestContext,
        ) -> std::io::Result<(Slice<B>, usize)> {
            let (dst, nread) = self.file.read_exact_at_eof_ok(start, dst, ctx).await?;
            self.recorded.borrow_mut().push(RecordedRead {
@@ -740,11 +740,11 @@ mod tests {
    }

    impl File for MockFile {
-        async fn read_exact_at_eof_ok<B: IoBufMut + Send>(
-            &self,
+        async fn read_exact_at_eof_ok<'a, 'b, B: IoBufMut + Send>(
+            &'b self,
            start: u64,
            mut dst: Slice<B>,
-            _ctx: &RequestContext,
+            _ctx: &'a RequestContext,
        ) -> std::io::Result<(Slice<B>, usize)> {
            let ExpectedRead {
                expect_pos,
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -31,9 +31,9 @@ use pageserver_api::{
    },
    keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning},
    models::{
-        CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings,
-        DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy,
-        InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState,
+        CompactionAlgorithm, CompactionAlgorithmSettings, DownloadRemoteLayersTaskInfo,
+        DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, InMemoryLayerInfo, LayerMapInfo,
+        LsnLease, TimelineState,
    },
    reltag::BlockNumber,
    shard::{ShardIdentity, ShardNumber, TenantShardId},
@@ -792,6 +792,63 @@ pub(crate) struct CompactRequest {
    pub sub_compaction_max_job_size_mb: Option<u64>,
 }

+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize)]
+pub(crate) struct CompactLsnRange {
+    pub start: Lsn,
+    pub end: Lsn,
+}
+
+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize)]
+pub(crate) struct CompactKeyRange {
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub start: Key,
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub end: Key,
+}
+
+impl From<Range<Lsn>> for CompactLsnRange {
+    fn from(range: Range<Lsn>) -> Self {
+        Self {
+            start: range.start,
+            end: range.end,
+        }
+    }
+}
+
+impl From<Range<Key>> for CompactKeyRange {
+    fn from(range: Range<Key>) -> Self {
+        Self {
+            start: range.start,
+            end: range.end,
+        }
+    }
+}
+
+impl From<CompactLsnRange> for Range<Lsn> {
+    fn from(range: CompactLsnRange) -> Self {
+        range.start..range.end
+    }
+}
+
+impl From<CompactKeyRange> for Range<Key> {
+    fn from(range: CompactKeyRange) -> Self {
+        range.start..range.end
+    }
+}
+
+impl CompactLsnRange {
+    #[cfg(test)]
+    #[cfg(feature = "testing")]
+    pub fn above(lsn: Lsn) -> Self {
+        Self {
+            start: lsn,
+            end: Lsn::MAX,
+        }
+    }
+}
+
 #[derive(Debug, Clone, Default)]
 pub(crate) struct CompactOptions {
    pub flags: EnumSet<CompactFlags>,
@@ -4859,7 +4916,6 @@ impl Timeline {

    async fn find_gc_time_cutoff(
        &self,
-        now: SystemTime,
        pitr: Duration,
        cancel: &CancellationToken,
        ctx: &RequestContext,
@@ -4867,6 +4923,7 @@ impl Timeline {
        debug_assert_current_span_has_tenant_and_timeline_id();
        if self.shard_identity.is_shard_zero() {
            // Shard Zero has SLRU data and can calculate the PITR time -> LSN mapping itself
+            let now = SystemTime::now();
            let time_range = if pitr == Duration::ZERO {
                humantime::parse_duration(DEFAULT_PITR_INTERVAL).expect("constant is invalid")
            } else {
@@ -4952,7 +5009,6 @@ impl Timeline {
    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
    pub(super) async fn find_gc_cutoffs(
        &self,
-        now: SystemTime,
        space_cutoff: Lsn,
        pitr: Duration,
        cancel: &CancellationToken,
@@ -4980,7 +5036,7 @@ impl Timeline {
        // - if PITR interval is set, then this is our cutoff.
        // - if PITR interval is not set, then we do a lookup
        //   based on DEFAULT_PITR_INTERVAL, so that size-based retention does not result in keeping history around permanently on idle databases.
-        let time_cutoff = self.find_gc_time_cutoff(now, pitr, cancel, ctx).await?;
+        let time_cutoff = self.find_gc_time_cutoff(pitr, cancel, ctx).await?;

        Ok(match (pitr, time_cutoff) {
            (Duration::ZERO, Some(time_cutoff)) => {
@@ -5808,7 +5864,7 @@ enum OpenLayerAction {
    None,
 }

-impl TimelineWriter<'_> {
+impl<'a> TimelineWriter<'a> {
    async fn handle_open_layer_action(
        &mut self,
        at: Lsn,
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -29,7 +29,6 @@ use utils::id::TimelineId;
 use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder};
 use crate::page_cache;
 use crate::statvfs::Statvfs;
-use crate::tenant::checks::check_valid_layermap;
 use crate::tenant::remote_timeline_client::WaitCompletionError;
 use crate::tenant::storage_layer::batch_split_writer::{
    BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter,
@@ -1111,7 +1110,7 @@ impl Timeline {
                return Err(CompactionError::ShuttingDown);
            }

-            let same_key = prev_key == Some(key);
+            let same_key = prev_key.map_or(false, |prev_key| prev_key == key);
            // We need to check key boundaries once we reach next key or end of layer with the same key
            if !same_key || lsn == dup_end_lsn {
                let mut next_key_size = 0u64;
@@ -1799,24 +1798,6 @@ impl Timeline {
        Ok(())
    }

-    /// Get a watermark for gc-compaction, that is the lowest LSN that we can use as the `gc_horizon` for
-    /// the compaction algorithm. It is min(space_cutoff, time_cutoff, latest_gc_cutoff, standby_horizon).
-    /// Leases and retain_lsns are considered in the gc-compaction job itself so we don't need to account for them
-    /// here.
-    pub(crate) fn get_gc_compaction_watermark(self: &Arc<Self>) -> Lsn {
-        let gc_cutoff_lsn = {
-            let gc_info = self.gc_info.read().unwrap();
-            gc_info.min_cutoff()
-        };
-
-        // TODO: standby horizon should use leases so we don't really need to consider it here.
-        // let watermark = watermark.min(self.standby_horizon.load());
-
-        // TODO: ensure the child branches will not use anything below the watermark, or consider
-        // them when computing the watermark.
-        gc_cutoff_lsn.min(*self.get_latest_gc_cutoff_lsn())
-    }
-
    /// Split a gc-compaction job into multiple compaction jobs. The split is based on the key range and the estimated size of the compaction job.
    /// The function returns a list of compaction jobs that can be executed separately. If the upper bound of the compact LSN
    /// range is not specified, we will use the latest gc_cutoff as the upper bound, so that all jobs in the jobset acts
@@ -1829,7 +1810,7 @@ impl Timeline {
        let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX {
            job.compact_lsn_range.end
        } else {
-            self.get_gc_compaction_watermark()
+            *self.get_latest_gc_cutoff_lsn() // use the real gc cutoff
        };

        // Split compaction job to about 4GB each
@@ -1842,7 +1823,7 @@ impl Timeline {
        // by estimating the amount of files read for a compaction job. We should also partition on LSN.
        let ((dense_ks, sparse_ks), _) = {
            let Ok(partition) = self.partitioning.try_lock() else {
-                bail!("failed to acquire partition lock during gc-compaction");
+                bail!("failed to acquire partition lock");
            };
            partition.clone()
        };
@@ -2024,7 +2005,7 @@ impl Timeline {
                // Therefore, it can only clean up data that cannot be cleaned up with legacy gc, instead of
                // cleaning everything that theoritically it could. In the future, it should use `self.gc_info`
                // to get the truth data.
-                let real_gc_cutoff = self.get_gc_compaction_watermark();
+                let real_gc_cutoff = *self.get_latest_gc_cutoff_lsn();
                // The compaction algorithm will keep all keys above the gc_cutoff while keeping only necessary keys below the gc_cutoff for
                // each of the retain_lsn. Therefore, if the user-provided `compact_lsn_range.end` is larger than the real gc cutoff, we will use
                // the real cutoff.
@@ -2175,14 +2156,15 @@ impl Timeline {

        // Step 1: construct a k-merge iterator over all layers.
        // Also, verify if the layer map can be split by drawing a horizontal line at every LSN start/end split point.
-        let layer_names = job_desc
-            .selected_layers
-            .iter()
-            .map(|layer| layer.layer_desc().layer_name())
-            .collect_vec();
-        if let Some(err) = check_valid_layermap(&layer_names) {
-            bail!("gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err);
-        }
+        // disable the check for now because we need to adjust the check for partial compactions, will enable later.
+        // let layer_names = job_desc
+        //     .selected_layers
+        //     .iter()
+        //     .map(|layer| layer.layer_desc().layer_name())
+        //     .collect_vec();
+        // if let Some(err) = check_valid_layermap(&layer_names) {
+        //     warn!("gc-compaction layer map check failed because {}, this is normal if partial compaction is not finished yet", err);
+        // }
        // The maximum LSN we are processing in this compaction loop
        let end_lsn = job_desc
            .selected_layers
@@ -2564,48 +2546,13 @@ impl Timeline {
        );

        // Step 3: Place back to the layer map.
-
-        // First, do a sanity check to ensure the newly-created layer map does not contain overlaps.
-        let all_layers = {
-            let guard = self.layers.read().await;
-            let layer_map = guard.layer_map()?;
-            layer_map.iter_historic_layers().collect_vec()
-        };
-
-        let mut final_layers = all_layers
-            .iter()
-            .map(|layer| layer.layer_name())
-            .collect::<HashSet<_>>();
-        for layer in &layer_selection {
-            final_layers.remove(&layer.layer_desc().layer_name());
-        }
-        for layer in &compact_to {
-            final_layers.insert(layer.layer_desc().layer_name());
-        }
-        let final_layers = final_layers.into_iter().collect_vec();
-
-        // TODO: move this check before we call `finish` on image layer writers. However, this will require us to get the layer name before we finish
-        // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are
-        // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails.
-        if let Some(err) = check_valid_layermap(&final_layers) {
-            bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err);
-        }
-
-        // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only
-        // operate on L1 layers.
        {
+            // TODO: sanity check if the layer map is valid (i.e., should not have overlaps)
            let mut guard = self.layers.write().await;
            guard
                .open_mut()?
                .finish_gc_compaction(&layer_selection, &compact_to, &self.metrics)
        };
-
-        // Schedule an index-only upload to update the `latest_gc_cutoff` in the index_part.json.
-        // Otherwise, after restart, the index_part only contains the old `latest_gc_cutoff` and
-        // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should
-        // be batched into `schedule_compaction_update`.
-        let disk_consistent_lsn = self.disk_consistent_lsn.load();
-        self.schedule_uploads(disk_consistent_lsn, None)?;
        self.remote_client
            .schedule_compaction_update(&layer_selection, &compact_to)?;

@@ -2957,7 +2904,7 @@ impl CompactionLayer<Key> for ResidentDeltaLayer {
 impl CompactionDeltaLayer<TimelineAdaptor> for ResidentDeltaLayer {
    type DeltaEntry<'a> = DeltaEntry<'a>;

-    async fn load_keys(&self, ctx: &RequestContext) -> anyhow::Result<Vec<DeltaEntry<'_>>> {
+    async fn load_keys<'a>(&self, ctx: &RequestContext) -> anyhow::Result<Vec<DeltaEntry<'_>>> {
        self.0.get_as_delta(ctx).await?.index_entries(ctx).await
    }
 }
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -1,7 +1,5 @@
 use std::sync::Arc;

-use pageserver_api::models::TenantState;
-
 use super::delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard};
 use super::Timeline;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
@@ -72,15 +70,6 @@ pub(crate) async fn offload_timeline(

    {
        let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
-        if matches!(
-            tenant.current_state(),
-            TenantState::Stopping { .. } | TenantState::Broken { .. }
-        ) {
-            // Cancel the operation if the tenant is shutting down. Do this while the
-            // timelines_offloaded lock is held to prevent a race with Tenant::shutdown
-            // for defusing the lock
-            return Err(OffloadError::Cancelled);
-        }
        offloaded_timelines.insert(
            timeline.timeline_id,
            Arc::new(
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -34,6 +34,8 @@ DATA = \
 	neon--1.2--1.3.sql \
 	neon--1.3--1.4.sql \
 	neon--1.4--1.5.sql \
+	neon--1.5--1.6.sql \
+	neon--1.6--1.5.sql \
 	neon--1.5--1.4.sql \
 	neon--1.4--1.3.sql \
 	neon--1.3--1.2.sql \
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -22,6 +22,7 @@
 #include "neon_pgversioncompat.h"

 #include "access/parallel.h"
+#include "access/xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "pagestore_client.h"
@@ -40,12 +41,16 @@
 #include "utils/dynahash.h"
 #include "utils/guc.h"

+#if PG_VERSION_NUM >= 150000
+#include "access/xlogrecovery.h"
+#endif
+
 #include "hll.h"
 #include "bitmap.h"
 #include "neon.h"
 #include "neon_perf_counters.h"

-#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)
+#define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "LFC: assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)

 /*
 * Local file cache is used to temporary store relations pages in local file system.
@@ -100,7 +105,9 @@ typedef struct FileCacheEntry
 	BufferTag	key;
 	uint32		hash;
 	uint32		offset;
-	uint32		access_count;
+	uint32		access_count : 30;
+	uint32      prewarm_requested : 1; /* entry should be filled by prewarm */
+	uint32      prewarm_started : 1;   /* chunk is written by lfc_prewarm */
 	uint32		bitmap[CHUNK_BITMAP_SIZE];
 	dlist_node	list_node;		/* LRU/holes list node */
 } FileCacheEntry;
@@ -118,17 +125,29 @@ typedef struct FileCacheControl
 	uint64		writes;			/* number of writes issued */
 	uint64		time_read;		/* time spent reading (us) */
 	uint64		time_write;		/* time spent writing (us) */
+	uint32		prewarm_total_chunks;
+	uint32		prewarm_curr_chunk;
+	uint32		prewarmed_pages;
+	uint32		skipped_pages;
 	dlist_head	lru;			/* double linked list for LRU replacement
 								 * algorithm */
 	dlist_head  holes;          /* double linked list of punched holes */
 	HyperLogLogState wss_estimation; /* estimation of working set size */
 } FileCacheControl;

+typedef struct FileCacheStateEntry
+{
+	BufferTag	key;
+	uint32		bitmap[CHUNK_BITMAP_SIZE];
+} FileCacheStateEntry;
+
 static HTAB *lfc_hash;
 static int	lfc_desc = 0;
 static LWLockId lfc_lock;
 static int	lfc_max_size;
 static int	lfc_size_limit;
+static int	lfc_prewarm_limit;
+static int	lfc_prewarm_batch;
 static char *lfc_path;
 static FileCacheControl *lfc_ctl;
 static shmem_startup_hook_type prev_shmem_startup_hook;
@@ -149,7 +168,7 @@ lfc_disable(char const *op)
 {
 	int			fd;

-	elog(WARNING, "Failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);
+	elog(WARNING, "LFC: failed to %s local file cache at %s: %m, disabling local file cache", op, lfc_path);

 	/* Invalidate hash */
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
@@ -184,7 +203,7 @@ lfc_disable(char const *op)
 			pgstat_report_wait_end();

 			if (rc < 0)
-				elog(WARNING, "Failed to truncate local file cache %s: %m", lfc_path);
+				elog(WARNING, "LFC: failed to truncate local file cache %s: %m", lfc_path);
 		}
 	}

@@ -196,7 +215,7 @@ lfc_disable(char const *op)

 	fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
 	if (fd < 0)
-		elog(WARNING, "Failed to recreate local file cache %s: %m", lfc_path);
+		elog(WARNING, "LFC: failed to recreate local file cache %s: %m", lfc_path);
 	else
 		close(fd);

@@ -267,14 +286,7 @@ lfc_shmem_startup(void)
 								 n_chunks + 1, n_chunks + 1,
 								 &info,
 								 HASH_ELEM | HASH_BLOBS);
-		lfc_ctl->generation = 0;
-		lfc_ctl->size = 0;
-		lfc_ctl->used = 0;
-		lfc_ctl->hits = 0;
-		lfc_ctl->misses = 0;
-		lfc_ctl->writes = 0;
-		lfc_ctl->time_read = 0;
-		lfc_ctl->time_write = 0;
+		memset(lfc_ctl, 0, sizeof *lfc_ctl);
 		dlist_init(&lfc_ctl->lru);
 		dlist_init(&lfc_ctl->holes);

@@ -285,7 +297,7 @@ lfc_shmem_startup(void)
 		fd = BasicOpenFile(lfc_path, O_RDWR | O_CREAT | O_TRUNC);
 		if (fd < 0)
 		{
-			elog(WARNING, "Failed to create local file cache %s: %m", lfc_path);
+			elog(WARNING, "LFC: failed to create local file cache %s: %m", lfc_path);
 			lfc_ctl->limit = 0;
 		}
 		else
@@ -327,7 +339,7 @@ lfc_check_limit_hook(int *newval, void **extra, GucSource source)
 {
 	if (*newval > lfc_max_size)
 	{
-		elog(ERROR, "neon.file_cache_size_limit can not be larger than neon.max_file_cache_size");
+		elog(ERROR, "LFC: neon.file_cache_size_limit can not be larger than neon.max_file_cache_size");
 		return false;
 	}
 	return true;
@@ -440,6 +452,32 @@ lfc_init(void)
 							   NULL,
 							   NULL);

+	DefineCustomIntVariable("neon.file_cache_prewarm_limit",
+							"Maximal number of prewarmed pages",
+							NULL,
+							&lfc_prewarm_limit,
+							0,	/* disabled by default */
+							0,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
+	DefineCustomIntVariable("neon.file_cache_prewarm_batch",
+							"Number of pages retrivied by prewarm from page server",
+							NULL,
+							&lfc_prewarm_batch,
+							64,
+							1,
+							INT_MAX,
+							PGC_SIGHUP,
+							0,
+							NULL,
+							NULL,
+							NULL);
+
 	if (lfc_max_size == 0)
 		return;

@@ -453,6 +491,264 @@ lfc_init(void)
 #endif
 }

+static FileCacheStateEntry*
+lfc_get_state(size_t* n_entries)
+{
+	size_t max_entries = *n_entries;
+	size_t i = 0;
+	FileCacheStateEntry* fs;
+
+	if (lfc_maybe_disabled() || max_entries == 0)	/* fast exit if file cache is disabled */
+		return NULL;
+
+	fs = (FileCacheStateEntry*)palloc(sizeof(FileCacheStateEntry) * max_entries);
+
+	LWLockAcquire(lfc_lock, LW_SHARED);
+
+	if (LFC_ENABLED())
+	{
+		dlist_iter	iter;
+		dlist_reverse_foreach(iter, &lfc_ctl->lru)
+		{
+			FileCacheEntry *entry = dlist_container(FileCacheEntry, list_node, iter.cur);
+			memcpy(&fs[i].key, &entry->key, sizeof entry->key);
+			memcpy(fs[i].bitmap, entry->bitmap, sizeof entry->bitmap);
+			if (++i == max_entries)
+				break;
+		}
+		elog(LOG, "LFC: save state of %ld chunks", (long)i);
+	}
+
+	LWLockRelease(lfc_lock);
+
+	*n_entries = i;
+	return fs;
+}
+
+/*
+ * Prewarm LFC cache to the specified state.
+ *
+ * Prewarming can interfere with accesses to the pages by other backends. Usually access to LFC is protected by shared buffers: when Postgres
+ * is reading page, it pins shared buffer and enforces that only one backend is reading it, while other are waiting for read completion.
+ *
+ * But it is not true for prewarming: backend can fetch page itself, modify and then write it to LFC. At the
+ * same time `lfc_prewarm` tries to write deteriorated image of this page in LFC. To increase concurrency, access to LFC files (both read and write)
+ * is performed without holding locks. So it can happen that two or more processes write different content to the same location in the LFC file.
+ * Certainly we can not rely on disk content in this case.
+ *
+ * To solve this problem we use two flags in LFC entry: `prewarm_requested` and `prewarm_started`. First is set before prewarm is actually started.
+ * `lfc_prewarm` writes to LFC file only if this flag is set. This flag is cleared if any other backend performs write to this LFC chunk.
+ * In this case data loaded by `lfc_prewarm` is considered to be deteriorated and should be just ignored.
+ *
+ * But as far as write to LFC is performed without holding lock, there is no guarantee that no such write is in progress.
+ * This is why second flag is used: `prewarm_started`. It is set by `lfc_prewarm` when is starts writing page and cleared when write is completed.
+ * Any other backend writing to LFC should abandon it's write to LFC file (just not mark page as loaded in bitmap) if this flag is set.
+ * So neither `lfc_prewarm`, neither backend are saving page in LFC in this case - it is just skipped.
+ */
+
+static void
+lfc_prewarm(FileCacheStateEntry* fs, size_t n_entries)
+{
+	ssize_t rc;
+	size_t snd_idx = 0, rcv_idx = 0;
+	size_t n_sent = 0, n_received = 0;
+	FileCacheEntry *entry;
+	uint64 generation;
+	uint32 entry_offset;
+	uint32 hash;
+	size_t i;
+	bool   found;
+	int    shard_no;
+
+	if (!lfc_ensure_opened())
+		return;
+
+	if (n_entries == 0 || fs == NULL)
+	{
+		elog(LOG, "LFC: prewarm is disabled");
+		return;
+	}
+
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	/* Do not prewarm more entries than LFC limit */
+	if (lfc_ctl->limit <= lfc_ctl->size)
+	{
+		LWLockRelease(lfc_lock);
+		return;
+	}
+	if (n_entries > lfc_ctl->limit - lfc_ctl->size)
+	{
+		n_entries = lfc_ctl->limit - lfc_ctl->size;
+	}
+
+	/* Initialize fields used to track prewarming progress */
+	lfc_ctl->prewarm_total_chunks = n_entries;
+	lfc_ctl->prewarm_curr_chunk = 0;
+
+    /*
+	 * Load LFC state and add entries in hash table.
+	 * It is needed to track modification of prewarmed pages.
+	 * All such entries have `prewarm_requested` flag set. When entry is updated (some backed reads or writes
+	 * some pages from this chunk), then `prewarm_requested` flag is cleared, prohibiting prewarm of this chunk.
+	 * It prevents overwritting page updated or loaded by backend with older one, loaded by prewarm.
+	 */
+	for (i = 0; i < n_entries; i++)
+	{
+		hash = get_hash_value(lfc_hash, &fs[i].key);
+		entry = hash_search_with_hash_value(lfc_hash, &fs[i].key, hash, HASH_ENTER, &found);
+		/* Do not prewarm chunks which are already present in LFC */
+		if (!found)
+		{
+			entry->offset = lfc_ctl->size++;
+			entry->hash = hash;
+			entry->access_count = 0;
+			entry->prewarm_requested = true;
+			entry->prewarm_started = false;
+			memset(entry->bitmap, 0, sizeof entry->bitmap);
+			/* Most recently visted pages are stored first */
+			dlist_push_head(&lfc_ctl->lru, &entry->list_node);
+			lfc_ctl->used += 1;
+		}
+	}
+	LWLockRelease(lfc_lock);
+
+	elog(LOG, "LFC: start loading %ld chunks", (long)n_entries);
+
+	while (true)
+	{
+		size_t chunk_no = snd_idx / BLOCKS_PER_CHUNK;
+		size_t offs_in_chunk = snd_idx % BLOCKS_PER_CHUNK;
+		if (chunk_no < n_entries)
+		{
+			if (fs[chunk_no].bitmap[offs_in_chunk >> 5] & (1 << (offs_in_chunk & 31)))
+			{
+				/*
+				 * In case of prewarming replica we should be careful not to load too new version
+				 * of the page - with LSN larger than current replay LSN.
+				 * At primary we are always loading latest version.
+				 */
+				XLogRecPtr req_lsn = RecoveryInProgress() ? GetXLogReplayRecPtr(NULL) : UINT64_MAX;
+
+				NeonGetPageRequest request = {
+					.req.tag = T_NeonGetPageRequest,
+					/* lsn and not_modified_since are filled in below */
+					.rinfo = BufTagGetNRelFileInfo(fs[chunk_no].key),
+					.forknum = fs[chunk_no].key.forkNum,
+					.blkno = fs[chunk_no].key.blockNum + offs_in_chunk,
+					.req.lsn = req_lsn,
+					.req.not_modified_since = 0
+				};
+				shard_no = get_shard_number(&fs[chunk_no].key);
+				while (!page_server->send(shard_no, (NeonRequest *) &request)
+					|| !page_server->flush(shard_no))
+				{
+					/* page server disconnected: all previusly sent prefetch requests are lost */
+					n_sent = 0;
+				}
+				n_sent += 1;
+			}
+			snd_idx += 1;
+		}
+		if (n_sent >= n_received + lfc_prewarm_batch || chunk_no == n_entries)
+		{
+			NeonResponse * resp;
+			do
+			{
+				chunk_no = rcv_idx / BLOCKS_PER_CHUNK;
+				offs_in_chunk = rcv_idx % BLOCKS_PER_CHUNK;
+				rcv_idx += 1;
+			} while (!(fs[chunk_no].bitmap[offs_in_chunk >> 5] & (1 << (offs_in_chunk & 31))));
+
+			shard_no = get_shard_number(&fs[chunk_no].key);
+			resp = page_server->receive(shard_no);
+			lfc_ctl->prewarm_curr_chunk = chunk_no;
+
+			switch (resp->tag)
+			{
+				case T_NeonGetPageResponse:
+					break;
+				case T_NeonErrorResponse:
+				{
+					/* Prefech can request page which is already dropped so PS can respond with error: just ignore it */
+					NeonErrorResponse *err_resp = (NeonErrorResponse *) resp;
+					elog(LOG, "LFC: page server failed to load page %u of relation %u/%u/%u.%u: %s",
+						 fs[chunk_no].key.blockNum + (BlockNumber)offs_in_chunk, RelFileInfoFmt(BufTagGetNRelFileInfo(fs[chunk_no].key)), fs[chunk_no].key.forkNum, err_resp->message);
+					continue;
+				}
+				default:
+					elog(LOG, "LFC: unexpected response type: %d", resp->tag);
+					return;
+			}
+
+			hash = get_hash_value(lfc_hash, &fs[chunk_no].key);
+
+			LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+			entry = hash_search_with_hash_value(lfc_hash, &fs[chunk_no].key, hash, HASH_FIND, NULL);
+			if (entry != NULL && entry->prewarm_requested)
+			{
+				/* Unlink entry from LRU list to pin it for the duration of IO operation */
+				if (entry->access_count++ == 0)
+					dlist_delete(&entry->list_node);
+
+				generation = lfc_ctl->generation;
+				entry_offset = entry->offset;
+				Assert(!entry->prewarm_started);
+				entry->prewarm_started = true;
+
+				LWLockRelease(lfc_lock);
+
+				rc = pwrite(lfc_desc, ((NeonGetPageResponse*)resp)->page, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + offs_in_chunk) * BLCKSZ);
+				if (rc != BLCKSZ)
+				{
+					lfc_disable("write");
+					break;
+				}
+				else
+				{
+					LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+					if (lfc_ctl->generation == generation)
+					{
+						CriticalAssert(LFC_ENABLED());
+						if (--entry->access_count == 0)
+							dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+						if (entry->prewarm_requested)
+						{
+							lfc_ctl->used_pages += 1 - ((entry->bitmap[offs_in_chunk >> 5] >> (offs_in_chunk & 31)) & 1);
+							entry->bitmap[offs_in_chunk >> 5] |= 1 << (offs_in_chunk & 31);
+							lfc_ctl->prewarmed_pages += 1;
+						}
+						else
+						{
+							lfc_ctl->skipped_pages += 1;
+						}
+						Assert(entry->prewarm_started);
+						entry->prewarm_started = false;
+					}
+
+					LWLockRelease(lfc_lock);
+				}
+			}
+			else
+			{
+				Assert(!entry || !entry->prewarm_started);
+				lfc_ctl->skipped_pages += 1;
+				LWLockRelease(lfc_lock);
+			}
+
+			if (++n_received == n_sent && snd_idx >= n_entries * BLOCKS_PER_CHUNK)
+			{
+				break;
+			}
+		}
+	}
+	Assert(n_sent == n_received);
+	lfc_ctl->prewarm_curr_chunk = n_entries;
+	elog(LOG, "LFC: complete prewarming: loaded %ld pages", (long)n_received);
+}
+
+
 /*
 * Check if page is present in the cache.
 * Returns true if page is found in local cache.
@@ -541,7 +837,6 @@ lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		}
 		else
 		{
-			LWLockRelease(lfc_lock);
 			return found;
 		}

@@ -621,6 +916,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)

 	/* remove the page from the cache */
 	entry->bitmap[chunk_offs >> 5] &= ~(1 << (chunk_offs & (32 - 1)));
+	entry->prewarm_requested = false; /* prohibit prewarm of this LFC entry */

 	if (entry->access_count == 0)
 	{
@@ -866,7 +1162,15 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);

-	/* 
+	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+	if (!LFC_ENABLED())
+	{
+		LWLockRelease(lfc_lock);
+		return;
+	}
+
+	/*
 	 * For every chunk that has blocks we're interested in, we
 	 * 1. get the chunk header
 	 * 2. Check if the chunk actually has the blocks we're interested in
@@ -892,18 +1196,26 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
 		hash = get_hash_value(lfc_hash, &tag);

-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (!LFC_ENABLED())
-		{
-			LWLockRelease(lfc_lock);
-			return;
-		}
-
 		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);

 		if (found)
 		{
+			if (entry->prewarm_started)
+			{
+				/*
+				 * Some page of this chunk is currently written by `lfc_prewarm`.
+				 * We should give-up not to interfere with it.
+				 * But clearing `prewarm_requested` flag also will not allow `lfc_prewarm` to fix it result.
+				 */
+				entry->prewarm_requested = false;
+				/* cleanup all affected pages of the chunk: we do not know which one of them is conflicting with prewarm */
+				for (int i = 0; i < blocks_in_chunk; i++)
+				{
+					lfc_ctl->used_pages -= ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
+					entry->bitmap[(chunk_offs + i) >> 5] &= ~(1 << ((chunk_offs + i) & 31));
+				}
+				goto next_chunk;
+			}
 			/*
 			 * Unlink entry from LRU list to pin it for the duration of IO
 			 * operation
@@ -933,7 +1245,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			{
 				/* Cache overflow: evict least recently used chunk */
 				FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
-	
+
 				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
 				{
 					lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
@@ -949,10 +1261,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
 				uint32		offset = hole->offset;
 				bool		hole_found;
-	
+
 				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
 				CriticalAssert(hole_found);
-	
+
 				lfc_ctl->used += 1;
 				entry->offset = offset;	/* reuse the hole */
 			}
@@ -964,9 +1276,11 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 			}
 			entry->access_count = 1;
 			entry->hash = hash;
+			entry->prewarm_started = false;
 			memset(entry->bitmap, 0, sizeof entry->bitmap);
 		}

+		entry->prewarm_requested = false; /* prohibit prewarm if LFC entry is updated by some backend */
 		generation = lfc_ctl->generation;
 		entry_offset = entry->offset;
 		LWLockRelease(lfc_lock);
@@ -981,6 +1295,7 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 		if (rc != BLCKSZ * blocks_in_chunk)
 		{
 			lfc_disable("write");
+			return;
 		}
 		else
 		{
@@ -1010,12 +1325,13 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				}
 			}

-			LWLockRelease(lfc_lock);
 		}
+	  next_chunk:
 		blkno += blocks_in_chunk;
 		buf_offset += blocks_in_chunk;
 		nblocks -= blocks_in_chunk;
 	}
+	LWLockRelease(lfc_lock);
 }

 typedef struct
@@ -1339,3 +1655,69 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 	}
 	PG_RETURN_NULL();
 }
+
+PG_FUNCTION_INFO_V1(get_local_cache_state);
+
+Datum
+get_local_cache_state(PG_FUNCTION_ARGS)
+{
+	size_t n_entries = PG_ARGISNULL(0) ? lfc_prewarm_limit : PG_GETARG_INT32(0);
+	FileCacheStateEntry* fs = lfc_get_state(&n_entries);
+	if (fs != NULL)
+	{
+		size_t size_in_bytes = sizeof(FileCacheStateEntry) * n_entries;
+		bytea* res = (bytea*)palloc(VARHDRSZ + size_in_bytes);
+
+		SET_VARSIZE(res, VARHDRSZ + size_in_bytes);
+		memcpy(VARDATA(res), fs, size_in_bytes);
+		pfree(fs);
+
+		PG_RETURN_BYTEA_P(res);
+	}
+	PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(prewarm_local_cache);
+
+Datum
+prewarm_local_cache(PG_FUNCTION_ARGS)
+{
+	bytea* state = PG_GETARG_BYTEA_PP(0);
+	uint32 n_entries = VARSIZE_ANY_EXHDR(state)/sizeof(FileCacheStateEntry);
+	FileCacheStateEntry* fs = (FileCacheStateEntry*)VARDATA_ANY(state);
+
+	lfc_prewarm(fs, n_entries);
+
+	PG_RETURN_NULL();
+}
+
+PG_FUNCTION_INFO_V1(get_prewarm_info);
+
+Datum
+get_prewarm_info(PG_FUNCTION_ARGS)
+{
+	Datum		values[4];
+	bool		nulls[4];
+	TupleDesc	tupdesc;
+
+	if (lfc_size_limit == 0)
+		PG_RETURN_NULL();
+
+	tupdesc = CreateTemplateTupleDesc(4);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 1, "total_chunks", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 2, "curr_chunk", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 3, "prewarmed_pages", INT4OID, -1, 0);
+	TupleDescInitEntry(tupdesc, (AttrNumber) 4, "skipped_pages", INT4OID, -1, 0);
+	tupdesc = BlessTupleDesc(tupdesc);
+
+	MemSet(nulls, 0, sizeof(nulls));
+	LWLockAcquire(lfc_lock, LW_SHARED);
+	values[0] = Int32GetDatum(lfc_ctl->prewarm_total_chunks);
+	values[1] = Int32GetDatum(lfc_ctl->prewarm_curr_chunk);
+	values[2] = Int32GetDatum(lfc_ctl->prewarmed_pages);
+	values[3] = Int32GetDatum(lfc_ctl->skipped_pages);
+	LWLockRelease(lfc_lock);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Konstantin Knizhnik	cca517fb94	Add test for prewarm under workload	2024-12-23 11:36:47 +02:00
Konstantin Knizhnik	5f74ff1307	Make ruffhappy	2024-12-15 08:02:35 +02:00
Konstantin Knizhnik	1d77fb0dea	Eliminate stale reads from LFC in case of prewarm conflict	2024-12-14 21:58:26 +02:00
Konstantin Knizhnik	07027bde7d	Do not run test_lfc_prewarm test without LFC	2024-12-14 21:11:20 +02:00
Konstantin Knizhnik	1f2b47c70f	Set LFC path in test-lfc_prewarm test	2024-12-14 21:09:58 +02:00
Konstantin Knizhnik	7b80ad4950	Fix format warning	2024-12-14 21:09:58 +02:00
Konstantin Knizhnik	e07eedca5d	correctly handle PS disconect duriug prewarm	2024-12-14 21:09:58 +02:00
Konstantin Knizhnik	7e2fb10cca	Fix handling zero neon.file_cache_prewarm_limit	2024-12-14 21:09:57 +02:00
Konstantin Knizhnik	dc1684efcc	Add delay between upgrade of extension version	2024-12-14 21:09:57 +02:00
Konstantin Knizhnik	ec8b8b941d	Add functions to get LFC state, prewarm LFC and monitor prewarm process	2024-12-14 21:09:57 +02:00