Merge pull request #9056 from neondatabase/rc/proxy/2024-09-19

Proxy release 2024-09-19
fix(storage-scrubber): log version after initialize the logger (#9049 )
2026-03-07 10:20:38 +00:00 · 2024-09-19 10:41:17 +01:00 · 2024-09-18 14:13:57 -04:00 · 2024-09-18 18:17:55 +02:00 · 2024-09-18 17:14:53 +02:00 · 2024-09-18 14:51:59 +01:00
128 changed files with 4130 additions and 1691 deletions
--- a/.devcontainer/Dockerfile.devcontainer
+++ b/.devcontainer/Dockerfile.devcontainer
@@ -1 +0,0 @@
-FROM neondatabase/build-tools:pinned
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,23 +0,0 @@
-// https://containers.dev/implementors/json_reference/
-{
-  "name": "Neon",
-  "build": {
-    "context": "..",
-    "dockerfile": "Dockerfile.devcontainer"
-  },
-
-  "postCreateCommand": {
-    "build neon": "BUILD_TYPE=debug CARGO_BUILD_FLAGS='--features=testing' mold -run make -s -j`nproc`",
-    "install python deps": "./scripts/pysync"
-  },
-
-  "customizations": {
-    "vscode": {
-      "extensions": [
-        "charliermarsh.ruff",
-        "github.vscode-github-actions",
-        "rust-lang.rust-analyzer"
-      ]
-    }
-  }
-}
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -62,7 +62,7 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
+          for r in 14 15 16 17; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done
@@ -83,6 +83,10 @@ jobs:
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT

+      - name: Set pg 17 revision for caching
+        id: pg_v17_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
+
      # Set some environment variables used by all the steps.
      #
      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
@@ -136,6 +140,13 @@ jobs:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}

+      - name: Cache postgres v17 build
+        id: cache_pg_17
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
+
      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
        run: mold -run make postgres-v14 -j$(nproc)
@@ -148,6 +159,10 @@ jobs:
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: mold -run make postgres-v16 -j$(nproc)

+      - name: Build postgres v17
+        if: steps.cache_pg_17.outputs.cache-hit != 'true'
+        run: mold -run make postgres-v17 -j$(nproc)
+
      - name: Build neon extensions
        run: mold -run make neon-pg-ext -j$(nproc)

@@ -210,7 +225,7 @@ jobs:
        run: |
          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
          export PQ_LIB_DIR
-          LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
+          LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
          export LD_LIBRARY_PATH

          #nextest does not yet support running doctests
--- a/.github/workflows/_push-to-acr.yml
+++ b/.github/workflows/_push-to-acr.yml
@@ -52,5 +52,5 @@ jobs:
          for image in ${images}; do
            docker buildx imagetools create \
              -t ${{ inputs.registry_name }}.azurecr.io/neondatabase/${image}:${{ inputs.image_tag }} \
-                                        neondatabase/${image}:${{ inputs.image_tag }}
+                                                        neondatabase/${image}:${{ inputs.image_tag }}
          done
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -54,8 +54,8 @@ jobs:
      build-tag: ${{steps.build-tag.outputs.tag}}

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

@@ -211,7 +211,7 @@ jobs:
      build-tag: ${{ needs.tag.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds
-      pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }}
+      pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16", "v17"]' || '["v17"]' }}
    secrets: inherit

  # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking
@@ -357,6 +357,7 @@ jobs:
            })

  coverage-report:
+    if: ${{ !startsWith(github.ref_name, 'release') }}
    needs: [ check-permissions, build-build-tools-image, build-and-test-locally ]
    runs-on: [ self-hosted, small ]
    container:
@@ -373,8 +374,8 @@ jobs:
        coverage-html: ${{ steps.upload-coverage-report-new.outputs.report-url }}
        coverage-json: ${{ steps.upload-coverage-report-new.outputs.summary-json }}
    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      # Need `fetch-depth: 0` for differential coverage (to get diff between two commits)
+      - uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 0
@@ -475,11 +476,9 @@ jobs:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - uses: actions/checkout@v4
        with:
          submodules: true
-          fetch-depth: 0

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/setup-buildx-action@v3
@@ -548,17 +547,15 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: [ v14, v15, v16 ]
+        version: [ v14, v15, v16, v17 ]
        arch: [ x64, arm64 ]

    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      - uses: actions/checkout@v4
        with:
          submodules: true
-          fetch-depth: 0

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/setup-buildx-action@v3
@@ -627,7 +624,7 @@ jobs:

      - name: Build compute-tools image
        # compute-tools are Postgres independent, so build it only once
-        if: matrix.version == 'v16'
+        if: matrix.version == 'v17'
        uses: docker/build-push-action@v6
        with:
          target: compute-tools-image
@@ -649,7 +646,7 @@ jobs:

    strategy:
      matrix:
-        version: [ v14, v15, v16 ]
+        version: [ v14, v15, v16, v17 ]

    steps:
      - uses: docker/login-action@v3
@@ -671,7 +668,7 @@ jobs:
                                             neondatabase/neon-test-extensions-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}-arm64

      - name: Create multi-arch compute-tools image
-        if: matrix.version == 'v16'
+        if: matrix.version == 'v17'
        run: |
          docker buildx imagetools create -t neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }} \
                                             neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-x64 \
@@ -689,7 +686,7 @@ jobs:
                                                                                neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}

      - name: Push multi-arch compute-tools image to ECR
-        if: matrix.version == 'v16'
+        if: matrix.version == 'v17'
        run: |
          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{ needs.tag.outputs.build-tag }} \
                                                                                neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}
@@ -700,15 +697,12 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        version: [ v14, v15, v16 ]
+        version: [ v14, v15, v16, v17 ]
    env:
      VM_BUILDER_VERSION: v0.29.3

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
+      - uses: actions/checkout@v4

      - name: Downloading vm-builder
        run: |
@@ -748,10 +742,7 @@ jobs:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'small-arm64' || 'small')) }}

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
+      - uses: actions/checkout@v4

      - uses: ./.github/actions/set-docker-config-dir
      - uses: docker/login-action@v3
@@ -798,7 +789,7 @@ jobs:
    runs-on: ubuntu-22.04

    env:
-      VERSIONS: v14 v15 v16
+      VERSIONS: v14 v15 v16 v17

    steps:
      - uses: docker/login-action@v3
@@ -839,7 +830,7 @@ jobs:
            done
          done
          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
-                                             neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}
+                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}

      - name: Login to prod ECR
        uses: docker/login-action@v3
@@ -852,7 +843,7 @@ jobs:
      - name: Copy all images to prod ECR
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
        run: |
-          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do
+          for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16,v17}; do
            docker buildx imagetools create -t 093970136003.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }} \
                                               369495373322.dkr.ecr.eu-central-1.amazonaws.com/${image}:${{ needs.tag.outputs.build-tag }}
          done
@@ -864,7 +855,7 @@ jobs:
    with:
      client_id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
+      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
      registry_name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -876,7 +867,7 @@ jobs:
    with:
      client_id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      image_tag: ${{ needs.tag.outputs.build-tag }}
-      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 compute-node-v14 compute-node-v15 compute-node-v16
+      images: neon compute-tools vm-compute-node-v14 vm-compute-node-v15 vm-compute-node-v16 vm-compute-node-v17 compute-node-v14 compute-node-v15 compute-node-v16 compute-node-v17
      registry_name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
      subscription_id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      tenant_id: ${{ vars.AZURE_TENANT_ID }}
@@ -957,6 +948,7 @@ jobs:

  deploy:
    needs: [ check-permissions, promote-images, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait, push-to-acr-dev, push-to-acr-prod ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy') && !failure() && !cancelled()

    runs-on: [ self-hosted, small ]
@@ -971,15 +963,12 @@ jobs:
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
+          for r in 14 15 16 17; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done

-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
+      - uses: actions/checkout@v4

      - name: Trigger deploy workflow
        env:
@@ -1058,7 +1047,8 @@ jobs:
  # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory
  promote-compatibility-data:
    needs: [ deploy ]
-    if: github.ref_name == 'release'
+    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
+    if: github.ref_name == 'release' && !failure() && !cancelled()

    runs-on: ubuntu-22.04
    steps:
@@ -1117,6 +1107,7 @@ jobs:

              files_to_promote+=("s3://${BUCKET}/${s3_key}")

+              # TODO Add v17
              for pg_version in v14 v15 v16; do
                # We run less tests for debug builds, so we don't need to promote them
                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -72,6 +72,10 @@ jobs:
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT

+      - name: Set pg 17 revision for caching
+        id: pg_v17_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT
+
      - name: Cache postgres v14 build
        id: cache_pg_14
        uses: actions/cache@v4
@@ -93,6 +97,13 @@ jobs:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

+      - name: Cache postgres v17 build
+        id: cache_pg_17
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v17
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
      - name: Set extra env for macOS
        run: |
          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
@@ -120,6 +131,10 @@ jobs:
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: make postgres-v16 -j$(sysctl -n hw.ncpu)

+      - name: Build postgres v17
+        if: steps.cache_pg_17.outputs.cache-hit != 'true'
+        run: make postgres-v17 -j$(sysctl -n hw.ncpu)
+
      - name: Build neon extensions
        run: make neon-pg-ext -j$(sysctl -n hw.ncpu)

@@ -166,7 +181,7 @@ jobs:
        run: make walproposer-lib -j$(nproc)

      - name: Produce the build stats
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc)
+        run: PQ_LIB_DIR=$(pwd)/pg_install/v17/lib cargo build --all --release --timings -j$(nproc)

      - name: Upload the build stats
        id: upload-stats
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -34,8 +34,8 @@ jobs:
      build-tag: ${{ steps.build-tag.outputs.tag }}

    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
        with:
          fetch-depth: 0

--- a/.gitmodules
+++ b/.gitmodules
@@ -10,3 +10,7 @@
 	path = vendor/postgres-v16
 	url = https://github.com/neondatabase/postgres.git
 	branch = REL_16_STABLE_neon
+[submodule "vendor/postgres-v17"]
+	path = vendor/postgres-v17
+	url = https://github.com/neondatabase/postgres.git
+	branch = REL_17_STABLE_neon
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1209,7 +1209,6 @@ dependencies = [
 "remote_storage",
 "serde",
 "serde_json",
- "serde_with",
 "utils",
 ]

@@ -1218,7 +1217,6 @@ name = "compute_tools"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-compression",
 "bytes",
 "cfg-if",
 "chrono",
@@ -1237,7 +1235,6 @@ dependencies = [
 "reqwest 0.12.4",
 "rlimit",
 "rust-ini",
- "serde",
 "serde_json",
 "signal-hook",
 "tar",
@@ -1246,7 +1243,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-stream",
 "tokio-util",
- "toml_edit",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -1317,12 +1313,9 @@ dependencies = [
 name = "consumption_metrics"
 version = "0.1.0"
 dependencies = [
- "anyhow",
 "chrono",
 "rand 0.8.5",
 "serde",
- "serde_with",
- "utils",
 ]

 [[package]]
@@ -1334,9 +1327,7 @@ dependencies = [
 "clap",
 "comfy-table",
 "compute_api",
- "futures",
 "git-version",
- "hex",
 "humantime",
 "humantime-serde",
 "hyper 0.14.26",
@@ -1344,7 +1335,6 @@ dependencies = [
 "once_cell",
 "pageserver_api",
 "pageserver_client",
- "postgres",
 "postgres_backend",
 "postgres_connection",
 "regex",
@@ -1353,9 +1343,7 @@ dependencies = [
 "scopeguard",
 "serde",
 "serde_json",
- "serde_with",
 "storage_broker",
- "tar",
 "thiserror",
 "tokio",
 "tokio-postgres",
@@ -1663,7 +1651,6 @@ dependencies = [
 "hex",
 "parking_lot 0.12.1",
 "rand 0.8.5",
- "scopeguard",
 "smallvec",
 "tracing",
 "utils",
@@ -2233,24 +2220,22 @@ checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"

 [[package]]
 name = "git-version"
-version = "0.3.5"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6b0decc02f4636b9ccad390dcbe77b722a77efedfa393caf8379a51d5c61899"
+checksum = "1ad568aa3db0fcbc81f2f116137f263d7304f512a1209b35b85150d3ef88ad19"
 dependencies = [
 "git-version-macro",
- "proc-macro-hack",
 ]

 [[package]]
 name = "git-version-macro"
-version = "0.3.5"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe69f1cbdb6e28af2bac214e943b99ce8a0a06b447d15d3e61161b0423139f3f"
+checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0"
 dependencies = [
- "proc-macro-hack",
 "proc-macro2",
 "quote",
- "syn 1.0.109",
+ "syn 2.0.52",
 ]

 [[package]]
@@ -2744,19 +2729,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "inotify"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd168d97690d0b8c412d6b6c10360277f4d7ee495c5d0d5d5fe0854923255cc"
-dependencies = [
- "bitflags 1.3.2",
- "futures-core",
- "inotify-sys",
- "libc",
- "tokio",
-]
-
 [[package]]
 name = "inotify-sys"
 version = "0.1.5"
@@ -3251,7 +3223,7 @@ dependencies = [
 "crossbeam-channel",
 "filetime",
 "fsevent-sys",
- "inotify 0.9.6",
+ "inotify",
 "kqueue",
 "libc",
 "log",
@@ -3642,7 +3614,6 @@ name = "pagectl"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "bytes",
 "camino",
 "clap",
 "git-version",
@@ -3651,7 +3622,6 @@ dependencies = [
 "pageserver_api",
 "postgres_ffi",
 "remote_storage",
- "serde",
 "serde_json",
 "svg_fmt",
 "thiserror",
@@ -3670,7 +3640,6 @@ dependencies = [
 "arc-swap",
 "async-compression",
 "async-stream",
- "async-trait",
 "bit_field",
 "byteorder",
 "bytes",
@@ -3678,16 +3647,13 @@ dependencies = [
 "camino-tempfile",
 "chrono",
 "clap",
- "const_format",
 "consumption_metrics",
 "crc32c",
 "criterion",
- "crossbeam-utils",
 "either",
 "enum-map",
 "enumset",
 "fail",
- "flate2",
 "futures",
 "git-version",
 "hex",
@@ -3726,13 +3692,9 @@ dependencies = [
 "serde_json",
 "serde_path_to_error",
 "serde_with",
- "signal-hook",
- "smallvec",
 "storage_broker",
 "strum",
 "strum_macros",
- "svg_fmt",
- "sync_wrapper",
 "sysinfo",
 "tenant_size_model",
 "thiserror",
@@ -3746,7 +3708,6 @@ dependencies = [
 "tokio-util",
 "toml_edit",
 "tracing",
- "twox-hash",
 "url",
 "utils",
 "walkdir",
@@ -3810,44 +3771,22 @@ name = "pageserver_compaction"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-compression",
 "async-stream",
- "byteorder",
- "bytes",
- "chrono",
 "clap",
- "const_format",
- "consumption_metrics",
 "criterion",
- "crossbeam-utils",
- "either",
- "fail",
- "flate2",
 "futures",
 "git-version",
- "hex",
 "hex-literal",
- "humantime",
- "humantime-serde",
 "itertools 0.10.5",
- "metrics",
 "once_cell",
 "pageserver_api",
 "pin-project-lite",
 "rand 0.8.5",
- "smallvec",
 "svg_fmt",
- "sync_wrapper",
- "thiserror",
 "tokio",
- "tokio-io-timeout",
- "tokio-util",
 "tracing",
- "tracing-error",
 "tracing-subscriber",
- "url",
 "utils",
- "walkdir",
 "workspace_hack",
 ]

@@ -4164,9 +4103,7 @@ name = "postgres_backend"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "async-trait",
 "bytes",
- "futures",
 "once_cell",
 "pq_proto",
 "rustls 0.22.4",
@@ -4199,16 +4136,13 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "bindgen",
- "byteorder",
 "bytes",
 "crc32c",
 "env_logger",
- "hex",
 "log",
 "memoffset 0.8.0",
 "once_cell",
 "postgres",
- "rand 0.8.5",
 "regex",
 "serde",
 "thiserror",
@@ -4243,13 +4177,11 @@ dependencies = [
 "byteorder",
 "bytes",
 "itertools 0.10.5",
- "pin-project-lite",
 "postgres-protocol",
 "rand 0.8.5",
 "serde",
 "thiserror",
 "tokio",
- "tracing",
 ]

 [[package]]
@@ -4281,12 +4213,6 @@ dependencies = [
 "elliptic-curve 0.13.8",
 ]

-[[package]]
-name = "proc-macro-hack"
-version = "0.5.20+deprecated"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068"
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.78"
@@ -4405,7 +4331,6 @@ dependencies = [
 "aws-config",
 "aws-sdk-iam",
 "aws-sigv4",
- "aws-types",
 "base64 0.13.1",
 "bstr",
 "bytes",
@@ -4414,7 +4339,6 @@ dependencies = [
 "chrono",
 "clap",
 "consumption_metrics",
- "crossbeam-deque",
 "dashmap",
 "ecdsa 0.16.9",
 "env_logger",
@@ -4440,11 +4364,9 @@ dependencies = [
 "jose-jwa",
 "jose-jwk",
 "lasso",
- "md5",
 "measured",
 "metrics",
 "once_cell",
- "opentelemetry",
 "p256 0.13.2",
 "parking_lot 0.12.1",
 "parquet",
@@ -4465,7 +4387,6 @@ dependencies = [
 "reqwest-middleware",
 "reqwest-retry",
 "reqwest-tracing",
- "routerify",
 "rsa",
 "rstest",
 "rustc-hash",
@@ -4481,7 +4402,6 @@ dependencies = [
 "smol_str",
 "socket2 0.5.5",
 "subtle",
- "task-local-extensions",
 "thiserror",
 "tikv-jemalloc-ctl",
 "tikv-jemallocator",
@@ -4491,7 +4411,6 @@ dependencies = [
 "tokio-rustls 0.25.0",
 "tokio-tungstenite",
 "tokio-util",
- "tower-service",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -4781,7 +4700,6 @@ dependencies = [
 "async-stream",
 "async-trait",
 "aws-config",
- "aws-credential-types",
 "aws-sdk-s3",
 "aws-smithy-async",
 "aws-smithy-types",
@@ -4795,7 +4713,6 @@ dependencies = [
 "futures",
 "futures-util",
 "http-types",
- "humantime",
 "humantime-serde",
 "hyper 0.14.26",
 "itertools 0.10.5",
@@ -5275,14 +5192,12 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-stream",
- "async-trait",
 "byteorder",
 "bytes",
 "camino",
 "camino-tempfile",
 "chrono",
 "clap",
- "const_format",
 "crc32c",
 "desim",
 "fail",
@@ -5308,9 +5223,7 @@ dependencies = [
 "sd-notify",
 "serde",
 "serde_json",
- "serde_with",
 "sha2",
- "signal-hook",
 "storage_broker",
 "strum",
 "strum_macros",
@@ -5321,7 +5234,6 @@ dependencies = [
 "tokio-stream",
 "tokio-tar",
 "tokio-util",
- "toml_edit",
 "tracing",
 "tracing-subscriber",
 "url",
@@ -5336,7 +5248,6 @@ version = "0.1.0"
 dependencies = [
 "const_format",
 "serde",
- "serde_with",
 "utils",
 ]

@@ -5865,7 +5776,6 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-stream",
- "bytes",
 "clap",
 "const_format",
 "futures",
@@ -5879,7 +5789,6 @@ dependencies = [
 "parking_lot 0.12.1",
 "prost",
 "tokio",
- "tokio-stream",
 "tonic",
 "tonic-build",
 "tracing",
@@ -5892,9 +5801,7 @@ name = "storage_controller"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "aws-config",
 "bytes",
- "camino",
 "chrono",
 "clap",
 "control_plane",
@@ -5935,20 +5842,9 @@ dependencies = [
 name = "storage_controller_client"
 version = "0.1.0"
 dependencies = [
- "anyhow",
- "bytes",
- "futures",
- "pageserver_api",
 "pageserver_client",
- "postgres",
 "reqwest 0.12.4",
 "serde",
- "thiserror",
- "tokio",
- "tokio-postgres",
- "tokio-stream",
- "tokio-util",
- "utils",
 "workspace_hack",
 ]

@@ -5960,13 +5856,9 @@ dependencies = [
 "async-stream",
 "aws-config",
 "aws-sdk-s3",
- "aws-smithy-async",
- "bincode",
- "bytes",
 "camino",
 "chrono",
 "clap",
- "crc32c",
 "either",
 "futures",
 "futures-util",
@@ -5978,20 +5870,16 @@ dependencies = [
 "pageserver",
 "pageserver_api",
 "postgres_ffi",
- "rand 0.8.5",
 "remote_storage",
 "reqwest 0.12.4",
 "rustls 0.22.4",
 "rustls-native-certs 0.7.0",
 "serde",
 "serde_json",
- "serde_with",
 "storage_controller_client",
- "thiserror",
 "tokio",
 "tokio-postgres",
 "tokio-postgres-rustls",
- "tokio-rustls 0.25.0",
 "tokio-stream",
 "tokio-util",
 "tracing",
@@ -6010,14 +5898,11 @@ dependencies = [
 "comfy-table",
 "futures",
 "humantime",
- "hyper 0.14.26",
 "pageserver_api",
 "pageserver_client",
 "reqwest 0.12.4",
- "serde",
 "serde_json",
 "storage_controller_client",
- "thiserror",
 "tokio",
 "tracing",
 "utils",
@@ -6140,15 +6025,6 @@ dependencies = [
 "xattr",
 ]

-[[package]]
-name = "task-local-extensions"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba323866e5d033818e3240feeb9f7db2c4296674e4d9e16b97b7bf8f490434e8"
-dependencies = [
- "pin-utils",
-]
-
 [[package]]
 name = "tempfile"
 version = "3.9.0"
@@ -6739,7 +6615,6 @@ dependencies = [
 "opentelemetry",
 "opentelemetry-otlp",
 "opentelemetry-semantic-conventions",
- "reqwest 0.12.4",
 "tokio",
 "tracing",
 "tracing-opentelemetry",
@@ -6943,7 +6818,6 @@ dependencies = [
 "serde_assert",
 "serde_json",
 "serde_path_to_error",
- "serde_with",
 "signal-hook",
 "strum",
 "strum_macros",
@@ -6999,13 +6873,11 @@ dependencies = [
 "cgroups-rs",
 "clap",
 "futures",
- "inotify 0.10.2",
 "serde",
 "serde_json",
 "sysinfo",
 "tokio",
 "tokio-postgres",
- "tokio-stream",
 "tokio-util",
 "tracing",
 "tracing-subscriber",
@@ -7032,7 +6904,6 @@ dependencies = [
 "clap",
 "env_logger",
 "log",
- "once_cell",
 "postgres",
 "postgres_ffi",
 "regex",
@@ -7555,6 +7426,7 @@ dependencies = [
 "digest",
 "either",
 "fail",
+ "futures",
 "futures-channel",
 "futures-executor",
 "futures-io",
@@ -7610,6 +7482,8 @@ dependencies = [
 "tower",
 "tracing",
 "tracing-core",
+ "tracing-log",
+ "tracing-subscriber",
 "url",
 "uuid",
 "zeroize",
--- a/12
+++ b/12
@@ -5,6 +5,8 @@
 ARG REPOSITORY=neondatabase
 ARG IMAGE=build-tools
 ARG TAG=pinned
+ARG DEFAULT_PG_VERSION=17
+ARG STABLE_PG_VERSION=16

 # Build Postgres
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
@@ -13,6 +15,7 @@ WORKDIR /home/nonroot
 COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
+COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
 COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
@@ -28,16 +31,19 @@ FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
+ARG STABLE_PG_VERSION

 COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
+COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
+COPY --from=pg-build /home/nonroot/pg_install/v17/lib                       pg_install/v17/lib
 COPY --chown=nonroot . .

 ARG ADDITIONAL_RUSTFLAGS
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && PQ_LIB_DIR=$(pwd)/pg_install/v${STABLE_PG_VERSION}/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -52,6 +58,7 @@ RUN set -e \
 # Build final image
 #
 FROM debian:bullseye-slim
+ARG DEFAULT_PG_VERSION
 WORKDIR /data

 RUN set -e \
@@ -77,6 +84,7 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubbe
 COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
 COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
 COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
+COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
 COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
@@ -93,7 +101,7 @@ RUN mkdir -p /data/.neon/ && \

 # When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
 # that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v16/lib
+ENV LD_LIBRARY_PATH=/usr/local/v${DEFAULT_PG_VERSION}/lib


 VOLUME ["/data"]
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -55,22 +55,27 @@ RUN cd postgres && \
    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
    # so we do it here.
-    old_list="pg_stat_statements--1.0--1.1.sql pg_stat_statements--1.1--1.2.sql pg_stat_statements--1.2--1.3.sql pg_stat_statements--1.3--1.4.sql pg_stat_statements--1.4--1.5.sql pg_stat_statements--1.4.sql pg_stat_statements--1.5--1.6.sql"; \
-    # the first loop is for pg_stat_statement extension version <= 1.6
    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
        filename=$(basename "$file"); \
-        if echo "$old_list" | grep -q -F "$filename"; then \
+        # Note that there are no downgrade scripts for pg_stat_statements, so we \
+        # don't have to modify any downgrade paths or (much) older versions: we only \
+        # have to make sure every creation of the pg_stat_statements_reset function \
+        # also adds execute permissions to the neon_superuser.
+        case $filename in \
+          pg_stat_statements--1.4.sql) \
+            # pg_stat_statements_reset is first created with 1.4
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
-        fi; \
-    done; \
-    # the second loop is for pg_stat_statement extension versions >= 1.7,
-    # where pg_stat_statement_reset() got 3 additional arguments
-    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        if ! echo "$old_list" | grep -q -F "$filename"; then \
+            ;; \
+          pg_stat_statements--1.6--1.7.sql) \
+            # Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
-        fi; \
-    done
+            ;; \
+          pg_stat_statements--1.10--1.11.sql) \
+            # Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
+            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
+            ;; \
+        esac; \
+    done;

 #########################################################################################
 #
@@ -79,6 +84,7 @@ RUN cd postgres && \
 #
 #########################################################################################
 FROM build-deps AS postgis-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
    apt install -y cmake gdal-bin libboost-dev libboost-thread-dev libboost-filesystem-dev \
@@ -87,7 +93,11 @@ RUN apt update && \
    protobuf-c-compiler xsltproc

 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
-RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    mkdir -p /sfcgal && \
+    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
+    esac && \
+    wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -96,7 +106,10 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar

 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
+    esac && \
+    wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -122,7 +135,10 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis

-RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
@@ -142,12 +158,19 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
 #
 #########################################################################################
 FROM build-deps AS plv8-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN apt update && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    apt update && \
    apt install -y ninja-build python3-dev libncurses5 binutils clang

-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
    # generate and copy upgrade scripts
@@ -172,9 +195,13 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.t
 #
 #########################################################################################
 FROM build-deps AS h3-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "$(uname -m)" in \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    case "$(uname -m)" in \
      "x86_64") \
        export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
        ;; \
@@ -192,7 +219,11 @@ RUN case "$(uname -m)" in \
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh

-RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+        mkdir -p /h3/usr/ && \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
@@ -202,7 +233,10 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
    cp -R /h3/usr / && \
    rm -rf build

-RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
@@ -218,9 +252,13 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3
 #
 #########################################################################################
 FROM build-deps AS unit-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -239,6 +277,7 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
 #
 #########################################################################################
 FROM build-deps AS vector-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY patches/pgvector.patch /pgvector.patch
@@ -246,7 +285,10 @@ COPY patches/pgvector.patch /pgvector.patch
 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
 # Pass OPTFLAGS="" to remove it.
-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
    echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
    patch -p1 < /pgvector.patch && \
@@ -261,10 +303,14 @@ RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O
 #
 #########################################################################################
 FROM build-deps AS pgjwt-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
-RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -277,9 +323,13 @@ RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b214
 #
 #########################################################################################
 FROM build-deps AS hypopg-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -293,9 +343,13 @@ RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypo
 #
 #########################################################################################
 FROM build-deps AS pg-hashids-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -309,11 +363,15 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
 #
 #########################################################################################
 FROM build-deps AS rum-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 COPY patches/rum.patch /rum.patch

-RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
    mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
    patch -p1 < /rum.patch && \
@@ -328,9 +386,13 @@ RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O r
 #
 #########################################################################################
 FROM build-deps AS pgtap-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -344,9 +406,13 @@ RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgta
 #
 #########################################################################################
 FROM build-deps AS ip4r-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -360,9 +426,13 @@ RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O i
 #
 #########################################################################################
 FROM build-deps AS prefix-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -376,9 +446,13 @@ RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O p
 #
 #########################################################################################
 FROM build-deps AS hll-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -392,9 +466,13 @@ RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar
 #
 #########################################################################################
 FROM build-deps AS plpgsql-check-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -413,7 +491,10 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    case "${PG_VERSION}" in \
      "v14" | "v15") \
        export TIMESCALEDB_VERSION=2.10.1 \
        export TIMESCALEDB_CHECKSUM=6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 \
@@ -446,7 +527,10 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    case "${PG_VERSION}" in \
      "v14") \
        export PG_HINT_PLAN_VERSION=14_1_4_1 \
        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
@@ -459,6 +543,9 @@ RUN case "${PG_VERSION}" in \
        export PG_HINT_PLAN_VERSION=16_1_6_0 \
        export PG_HINT_PLAN_CHECKSUM=fc85a9212e7d2819d4ae4ac75817481101833c3cfa9f0fe1f980984e12347d00 \
        ;; \
+      "v17") \
+        echo "TODO: PG17 pg_hint_plan support" && exit 0 \
+        ;; \
      *) \
        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
        ;; \
@@ -478,10 +565,14 @@ RUN case "${PG_VERSION}" in \
 #
 #########################################################################################
 FROM build-deps AS pg-cron-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -495,9 +586,13 @@ RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O
 #
 #########################################################################################
 FROM build-deps AS rdkit-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN apt-get update && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    apt-get update && \
    apt-get install -y \
        cmake \
        libboost-iostreams1.74-dev \
@@ -507,7 +602,10 @@ RUN apt-get update && \
        libeigen3-dev

 ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
-RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
@@ -544,10 +642,14 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
 #
 #########################################################################################
 FROM build-deps AS pg-uuidv7-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -561,10 +663,14 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
 #
 #########################################################################################
 FROM build-deps AS pg-roaringbitmap-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -578,10 +684,14 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
 #
 #########################################################################################
 FROM build-deps AS pg-semver-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
+    esac && \
+    wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -620,10 +730,14 @@ RUN case "${PG_VERSION}" in \
 #
 #########################################################################################
 FROM build-deps AS pg-anon-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "postgresql_anonymizer does not yet support PG17" && exit 0;; \
+    esac && \
+    wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
@@ -641,6 +755,7 @@ RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tag
 #
 #########################################################################################
 FROM build-deps AS rust-extensions-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN apt-get update && \
@@ -651,9 +766,11 @@ ENV HOME=/home/nonroot
 ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
 USER nonroot
 WORKDIR /home/nonroot
-ARG PG_VERSION

-RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 is not supported yet by pgrx. Quit" && exit 0;; \
+    esac && \
+    curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
    rm rustup-init && \
@@ -672,7 +789,10 @@ USER root
 FROM rust-extensions-build AS pg-jsonschema-pg-build
 ARG PG_VERSION

-RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "pg_jsonschema does not yet have a release that supports pg17" && exit 0;; \
+    esac && \
+    wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
    echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
    # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
@@ -694,7 +814,10 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.
 FROM rust-extensions-build AS pg-graphql-pg-build
 ARG PG_VERSION

-RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "pg_graphql does not yet have a release that supports pg17 as of now" && exit 0;; \
+    esac && \
+    wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
    echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
    mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -714,7 +837,10 @@ FROM rust-extensions-build AS pg-tiktoken-pg-build
 ARG PG_VERSION

 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
-RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "pg_tiktoken does not have versions, nor support for pg17" && exit 0;; \
+    esac && \
+    wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
    echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
    # TODO update pgrx version in the pg_tiktoken repo and remove this line
@@ -733,7 +859,10 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6
 FROM rust-extensions-build AS pg-pgx-ulid-build
 ARG PG_VERSION

-RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "pgx_ulid does not support pg17 as of the latest version (0.1.5)" && exit 0;; \
+    esac && \
+    wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
    echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx       = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
@@ -748,10 +877,14 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -
 #########################################################################################

 FROM build-deps AS wal2json-pg-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "We'll need to update wal2json to 2.6+ for pg17 support" && exit 0;; \
+    esac && \
+    wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -764,10 +897,14 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
 #
 #########################################################################################
 FROM build-deps AS pg-ivm-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "We'll need to update pg_ivm to 1.9+ for pg17 support" && exit 0;; \
+    esac && \
+    wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -781,10 +918,14 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
 #
 #########################################################################################
 FROM build-deps AS pg-partman-build
+ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "pg_partman doesn't support PG17 yet" && exit 0;; \
+    esac && \
+    wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -854,8 +995,8 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
    case "${PG_VERSION}" in \
        "v14" | "v15") \
        ;; \
-        "v16") \
-            echo "Skipping HNSW for PostgreSQL 16" && exit 0 \
+        "v16" | "v17") \
+            echo "Skipping HNSW for PostgreSQL ${PG_VERSION}" && exit 0 \
        ;; \
        *) \
            echo "unexpected PostgreSQL version" && exit 1 \
@@ -899,7 +1040,7 @@ FROM neon-pg-ext-build AS postgres-cleanup-layer
 COPY --from=neon-pg-ext-build /usr/local/pgsql /usr/local/pgsql

 # Remove binaries from /bin/ that we won't use (or would manually copy & install otherwise)
-RUN cd /usr/local/pgsql/bin && rm ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp
+RUN cd /usr/local/pgsql/bin && rm -f ecpg raster2pgsql shp2pgsql pgtopo_export pgtopo_import pgsql2shp

 # Remove headers that we won't need anymore - we've completed installation of all extensions
 RUN rm -r /usr/local/pgsql/include
@@ -918,7 +1059,10 @@ RUN rm /usr/local/pgsql/lib/lib*.a

 FROM neon-pg-ext-build AS neon-pg-ext-test
 ARG PG_VERSION
-RUN mkdir /ext-src
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    mkdir /ext-src

 #COPY --from=postgis-build /postgis.tar.gz /ext-src/
 #COPY --from=postgis-build /sfcgal/* /usr
@@ -956,18 +1100,39 @@ COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
 COPY patches/pg_anon.patch /ext-src
 COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
 COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
-RUN cd /ext-src/ && for f in *.tar.gz; \
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    cd /ext-src/ && for f in *.tar.gz; \
    do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
    || exit 1; rm -f $f; done
-RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
-RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    cd /ext-src/rum-src && patch -p1 <../rum.patch
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
 # cmake is required for the h3 test
-RUN apt-get update && apt-get install -y cmake
-RUN cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    apt-get update && apt-get install -y cmake
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    cd /ext-src/pg_hint_plan-src && patch -p1 < /ext-src/pg_hint_plan.patch
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN patch -p1 </ext-src/pg_anon.patch
-RUN patch -p1 </ext-src/pg_cron.patch
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    patch -p1 </ext-src/pg_anon.patch
+RUN case "${PG_VERSION}" in "v17") \
+    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+    esac && \
+    patch -p1 </ext-src/pg_cron.patch
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
--- a/56
+++ b/56
@@ -119,6 +119,8 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 # I'm not sure why it wouldn't work, but this is the only place (apart from
 # the "build-all-versions" entry points) where direct mention of PostgreSQL
 # versions is used.
+.PHONY: postgres-configure-v17
+postgres-configure-v17: $(POSTGRES_INSTALL_DIR)/build/v17/config.status
 .PHONY: postgres-configure-v16
 postgres-configure-v16: $(POSTGRES_INSTALL_DIR)/build/v16/config.status
 .PHONY: postgres-configure-v15
@@ -215,29 +217,31 @@ neon-pg-clean-ext-%:
 # they depend on openssl and other libraries that are not included in our
 # Rust build.
 .PHONY: walproposer-lib
-walproposer-lib: neon-pg-ext-v16
+walproposer-lib: neon-pg-ext-v17
 	+@echo "Compiling walproposer-lib"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	cp $(POSTGRES_INSTALL_DIR)/v16/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-ifeq ($(UNAME_S),Linux)
+	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
+	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgcommon.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
 	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgport.a \
 		pg_strong_random.o
 	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
-		pg_crc32c.o \
-		hmac_openssl.o \
+		checksum_helper.o \
 		cryptohash_openssl.o \
-		scram-common.o \
+		hmac_openssl.o \
 		md5_common.o \
-		checksum_helper.o
+		parse_manifest.o \
+		scram-common.o
+ifeq ($(UNAME_S),Linux)
+	$(AR) d $(POSTGRES_INSTALL_DIR)/build/walproposer-lib/libpgcommon.a \
+		pg_crc32c.o
 endif

 .PHONY: walproposer-lib-clean
 walproposer-lib-clean:
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean

@@ -245,38 +249,44 @@ walproposer-lib-clean:
 neon-pg-ext: \
 	neon-pg-ext-v14 \
 	neon-pg-ext-v15 \
-	neon-pg-ext-v16
+	neon-pg-ext-v16 \
+	neon-pg-ext-v17

 .PHONY: neon-pg-clean-ext
 neon-pg-clean-ext: \
 	neon-pg-clean-ext-v14 \
 	neon-pg-clean-ext-v15 \
-	neon-pg-clean-ext-v16
+	neon-pg-clean-ext-v16 \
+	neon-pg-clean-ext-v17

 # shorthand to build all Postgres versions
 .PHONY: postgres
 postgres: \
 	postgres-v14 \
 	postgres-v15 \
-	postgres-v16
+	postgres-v16 \
+	postgres-v17

 .PHONY: postgres-headers
 postgres-headers: \
 	postgres-headers-v14 \
 	postgres-headers-v15 \
-	postgres-headers-v16
+	postgres-headers-v16 \
+	postgres-headers-v17

 .PHONY: postgres-clean
 postgres-clean: \
 	postgres-clean-v14 \
 	postgres-clean-v15 \
-	postgres-clean-v16
+	postgres-clean-v16 \
+	postgres-clean-v17

 .PHONY: postgres-check
 postgres-check: \
 	postgres-check-v14 \
 	postgres-check-v15 \
-	postgres-check-v16
+	postgres-check-v16 \
+	postgres-check-v17

 # This doesn't remove the effects of 'configure'.
 .PHONY: clean
@@ -321,13 +331,13 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 	rm -f pg*.BAK

 # Indent pxgn/neon.
-.PHONY: pgindent
-neon-pgindent: postgres-v16-pg-bsd-indent neon-pg-ext-v16
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v16/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/find_typedef \
-		INDENT=$(POSTGRES_INSTALL_DIR)/build/v16/src/tools/pg_bsd_indent/pg_bsd_indent \
-		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v16/src/tools/pgindent/pgindent \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-v16 \
+.PHONY: neon-pgindent
+neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
+		INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
+		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
+		-C $(POSTGRES_INSTALL_DIR)/build/neon-v17 \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent


--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -11,7 +11,6 @@ testing = []

 [dependencies]
 anyhow.workspace = true
-async-compression.workspace = true
 chrono.workspace = true
 cfg-if.workspace = true
 clap.workspace = true
@@ -24,7 +23,6 @@ num_cpus.workspace = true
 opentelemetry.workspace = true
 postgres.workspace = true
 regex.workspace = true
-serde.workspace = true
 serde_json.workspace = true
 signal-hook.workspace = true
 tar.workspace = true
@@ -43,7 +41,6 @@ url.workspace = true
 compute_api.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true
-toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.13"
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1052,26 +1052,19 @@ impl ComputeNode {
        let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?;

        let config_time = Utc::now();
-        if pspec.spec.mode == ComputeMode::Primary {
-            if !pspec.spec.skip_pg_catalog_updates {
-                let pgdata_path = Path::new(&self.pgdata);
-                // temporarily reset max_cluster_size in config
-                // to avoid the possibility of hitting the limit, while we are applying config:
-                // creating new extensions, roles, etc...
-                config::with_compute_ctl_tmp_override(
-                    pgdata_path,
-                    "neon.max_cluster_size=-1",
-                    || {
-                        self.pg_reload_conf()?;
-
-                        self.apply_config(&compute_state)?;
-
-                        Ok(())
-                    },
-                )?;
+        if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
+            let pgdata_path = Path::new(&self.pgdata);
+            // temporarily reset max_cluster_size in config
+            // to avoid the possibility of hitting the limit, while we are applying config:
+            // creating new extensions, roles, etc...
+            config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
                self.pg_reload_conf()?;
-            }
-            self.post_apply_config()?;
+
+                self.apply_config(&compute_state)?;
+
+                Ok(())
+            })?;
+            self.pg_reload_conf()?;
        }

        let startup_end_time = Utc::now();
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -124,6 +124,7 @@ fn parse_pg_version(human_version: &str) -> &str {
            "14" => return "v14",
            "15" => return "v15",
            "16" => return "v16",
+            "17" => return "v17",
            _ => {}
        },
        _ => {}
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -0,0 +1 @@
+GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -793,6 +793,9 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
        include_str!(
            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
        ),
+        include_str!(
+            "./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
+        ),
    ];

    MigrationRunner::new(client, &migrations).run_migrations()?;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,13 +9,10 @@ anyhow.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
-futures.workspace = true
 git-version.workspace = true
 humantime.workspace = true
 nix.workspace = true
 once_cell.workspace = true
-postgres.workspace = true
-hex.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 regex.workspace = true
@@ -23,8 +20,6 @@ reqwest = { workspace = true, features = ["blocking", "json"] }
 scopeguard.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-serde_with.workspace = true
-tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 toml_edit.workspace = true
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -151,7 +151,7 @@ where
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                thread::sleep(RETRY_INTERVAL);
+                tokio::time::sleep(RETRY_INTERVAL).await;
            }
            Err(e) => {
                println!("error starting process {process_name:?}: {e:#}");
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -34,12 +34,14 @@ use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
+use std::borrow::Cow;
 use std::collections::{BTreeSet, HashMap};
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
 use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
+use tokio::task::JoinSet;
 use url::Host;
 use utils::{
    auth::{Claims, Scope},
@@ -87,34 +89,35 @@ fn main() -> Result<()> {

    // Check for 'neon init' command first.
    let subcommand_result = if sub_name == "init" {
-        handle_init(sub_args).map(Some)
+        handle_init(sub_args).map(|env| Some(Cow::Owned(env)))
    } else {
        // all other commands need an existing config
-        let mut env =
-            LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
-        let original_env = env.clone();

+        let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
+        let original_env = env.clone();
+        let env = Box::leak(Box::new(env));
        let rt = tokio::runtime::Builder::new_current_thread()
            .enable_all()
            .build()
            .unwrap();

        let subcommand_result = match sub_name {
-            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
-            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))),
-            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
-            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
-            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
-            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
-            "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
-            "mappings" => handle_mappings(sub_args, &mut env),
+            "tenant" => rt.block_on(handle_tenant(sub_args, env)),
+            "timeline" => rt.block_on(handle_timeline(sub_args, env)),
+            "start" => rt.block_on(handle_start_all(env, get_start_timeout(sub_args))),
+            "stop" => rt.block_on(handle_stop_all(sub_args, env)),
+            "pageserver" => rt.block_on(handle_pageserver(sub_args, env)),
+            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, env)),
+            "storage_broker" => rt.block_on(handle_storage_broker(sub_args, env)),
+            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, env)),
+            "endpoint" => rt.block_on(handle_endpoint(sub_args, env)),
+            "mappings" => handle_mappings(sub_args, env),
            "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
            _ => bail!("unexpected subcommand {sub_name}"),
        };

-        if original_env != env {
-            subcommand_result.map(|()| Some(env))
+        if &original_env != env {
+            subcommand_result.map(|()| Some(Cow::Borrowed(env)))
        } else {
            subcommand_result.map(|()| None)
        }
@@ -1245,49 +1248,122 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

+async fn handle_storage_broker(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+    let (sub_name, sub_args) = match sub_match.subcommand() {
+        Some(broker_command_data) => broker_command_data,
+        None => bail!("no broker subcommand provided"),
+    };
+
+    match sub_name {
+        "start" => {
+            if let Err(e) = broker::start_broker_process(env, get_start_timeout(sub_args)).await {
+                eprintln!("broker start failed: {e}");
+                exit(1);
+            }
+        }
+
+        "stop" => {
+            if let Err(e) = broker::stop_broker_process(env) {
+                eprintln!("broker stop failed: {e}");
+                exit(1);
+            }
+        }
+
+        _ => bail!("Unexpected broker subcommand '{}'", sub_name),
+    }
+    Ok(())
+}
+
 async fn handle_start_all(
-    env: &local_env::LocalEnv,
+    env: &'static local_env::LocalEnv,
    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
+    let Err(errors) = handle_start_all_impl(env, *retry_timeout).await else {
+        neon_start_status_check(env, retry_timeout)
+            .await
+            .context("status check after successful startup of all services")?;
+        return Ok(());
+    };
+
+    eprintln!("startup failed because one or more services could not be started");
+
+    for e in errors {
+        eprintln!("{e}");
+        let debug_repr = format!("{e:?}");
+        for line in debug_repr.lines() {
+            eprintln!("  {line}");
+        }
+    }
+
+    try_stop_all(env, true).await;
+
+    exit(2);
+}
+
+/// Returns Ok() if and only if all services could be started successfully.
+/// Otherwise, returns the list of errors that occurred during startup.
+async fn handle_start_all_impl(
+    env: &'static local_env::LocalEnv,
+    retry_timeout: Duration,
+) -> Result<(), Vec<anyhow::Error>> {
    // Endpoints are not started automatically

-    broker::start_broker_process(env, retry_timeout).await?;
+    let mut js = JoinSet::new();

-    // Only start the storage controller if the pageserver is configured to need it
-    if env.control_plane_api.is_some() {
-        let storage_controller = StorageController::from_env(env);
-        if let Err(e) = storage_controller
-            .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                (*retry_timeout).into(),
-            ))
-            .await
-        {
-            eprintln!("storage_controller start failed: {:#}", e);
-            try_stop_all(env, true).await;
-            exit(1);
+    // force infalliblity through closure
+    #[allow(clippy::redundant_closure_call)]
+    (|| {
+        js.spawn(async move {
+            let retry_timeout = retry_timeout;
+            broker::start_broker_process(env, &retry_timeout).await
+        });
+
+        // Only start the storage controller if the pageserver is configured to need it
+        if env.control_plane_api.is_some() {
+            js.spawn(async move {
+                let storage_controller = StorageController::from_env(env);
+                storage_controller
+                    .start(NeonStorageControllerStartArgs::with_default_instance_id(
+                        retry_timeout.into(),
+                    ))
+                    .await
+                    .map_err(|e| e.context("start storage_controller"))
+            });
+        }
+
+        for ps_conf in &env.pageservers {
+            js.spawn(async move {
+                let pageserver = PageServerNode::from_env(env, ps_conf);
+                pageserver
+                    .start(&retry_timeout)
+                    .await
+                    .map_err(|e| e.context(format!("start pageserver {}", ps_conf.id)))
+            });
+        }
+
+        for node in env.safekeepers.iter() {
+            js.spawn(async move {
+                let safekeeper = SafekeeperNode::from_env(env, node);
+                safekeeper
+                    .start(vec![], &retry_timeout)
+                    .await
+                    .map_err(|e| e.context(format!("start safekeeper {}", safekeeper.id)))
+            });
+        }
+    })();
+
+    let mut errors = Vec::new();
+    while let Some(result) = js.join_next().await {
+        let result = result.expect("we don't panic or cancel the tasks");
+        if let Err(e) = result {
+            errors.push(e);
        }
    }

-    for ps_conf in &env.pageservers {
-        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start(retry_timeout).await {
-            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
-            try_stop_all(env, true).await;
-            exit(1);
-        }
+    if !errors.is_empty() {
+        return Err(errors);
    }

-    for node in env.safekeepers.iter() {
-        let safekeeper = SafekeeperNode::from_env(env, node);
-        if let Err(e) = safekeeper.start(vec![], retry_timeout).await {
-            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
-            try_stop_all(env, false).await;
-            exit(1);
-        }
-    }
-
-    neon_start_status_check(env, retry_timeout).await?;
-
    Ok(())
 }

@@ -1672,6 +1748,19 @@ fn cli() -> Command {
                            .arg(stop_mode_arg.clone())
                            .arg(instance_id))
        )
+        .subcommand(
+            Command::new("storage_broker")
+                .arg_required_else_help(true)
+                .about("Manage broker")
+                .subcommand(Command::new("start")
+                            .about("Start broker")
+                            .arg(timeout_arg.clone())
+                )
+                .subcommand(Command::new("stop")
+                            .about("Stop broker")
+                            .arg(stop_mode_arg.clone())
+                )
+        )
        .subcommand(
            Command::new("safekeeper")
                .arg_required_else_help(true)
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -702,7 +702,7 @@ impl Endpoint {
                    }
                }
            }
-            std::thread::sleep(ATTEMPT_INTERVAL);
+            tokio::time::sleep(ATTEMPT_INTERVAL).await;
        }

        // disarm the scopeguard, let the child outlive this function (and neon_local invoction)
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -342,7 +342,7 @@ impl LocalEnv {

        #[allow(clippy::manual_range_patterns)]
        match pg_version {
-            14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
+            14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
            _ => bail!("Unsupported postgres version: {}", pg_version),
        }
    }
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -28,6 +28,7 @@ use utils::{
    auth::{encode_from_key_file, Claims, Scope},
    id::{NodeId, TenantId},
 };
+use whoami::username;

 pub struct StorageController {
    env: LocalEnv,
@@ -183,7 +184,7 @@ impl StorageController {
    /// to other versions if that one isn't found.  Some automated tests create circumstances
    /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
    async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
-        let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];
+        let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 16, 15, 14];

        for v in prefer_versions {
            let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
@@ -211,7 +212,16 @@ impl StorageController {
    /// Readiness check for our postgres process
    async fn pg_isready(&self, pg_bin_dir: &Utf8Path, postgres_port: u16) -> anyhow::Result<bool> {
        let bin_path = pg_bin_dir.join("pg_isready");
-        let args = ["-h", "localhost", "-p", &format!("{}", postgres_port)];
+        let args = [
+            "-h",
+            "localhost",
+            "-U",
+            &username(),
+            "-d",
+            DB_NAME,
+            "-p",
+            &format!("{}", postgres_port),
+        ];
        let exitcode = Command::new(bin_path).args(args).spawn()?.wait().await?;

        Ok(exitcode.success())
@@ -225,7 +235,11 @@ impl StorageController {
    ///
    /// Returns the database url
    pub async fn setup_database(&self, postgres_port: u16) -> anyhow::Result<String> {
-        let database_url = format!("postgresql://localhost:{}/{DB_NAME}", postgres_port);
+        let database_url = format!(
+            "postgresql://{}@localhost:{}/{DB_NAME}",
+            &username(),
+            postgres_port
+        );

        let pg_bin_dir = self.get_pg_bin_dir().await?;
        let createdb_path = pg_bin_dir.join("createdb");
@@ -235,6 +249,10 @@ impl StorageController {
                "localhost",
                "-p",
                &format!("{}", postgres_port),
+                "-U",
+                &username(),
+                "-O",
+                &username(),
                DB_NAME,
            ])
            .output()
@@ -271,7 +289,7 @@ impl StorageController {
            // But tokio-postgres fork doesn't have this upstream commit:
            // https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
            // => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
-            .user(&whoami::username())
+            .user(&username())
            .dbname(DB_NAME)
            .connect(tokio_postgres::NoTls)
            .await
@@ -328,6 +346,12 @@ impl StorageController {
            let pg_log_path = pg_data_path.join("postgres.log");

            if !tokio::fs::try_exists(&pg_data_path).await? {
+                let initdb_args = ["-D", pg_data_path.as_ref(), "--username", &username()];
+                tracing::info!(
+                    "Initializing storage controller database with args: {:?}",
+                    initdb_args
+                );
+
                // Initialize empty database
                let initdb_path = pg_bin_dir.join("initdb");
                let mut child = Command::new(&initdb_path)
@@ -335,7 +359,7 @@ impl StorageController {
                        ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
                        ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
                    ])
-                    .args(["-D", pg_data_path.as_ref()])
+                    .args(initdb_args)
                    .spawn()
                    .expect("Failed to spawn initdb");
                let status = child.wait().await?;
@@ -364,8 +388,14 @@ impl StorageController {
                pg_data_path.as_ref(),
                "-l",
                pg_log_path.as_ref(),
+                "-U",
+                &username(),
                "start",
            ];
+            tracing::info!(
+                "Starting storage controller database with args: {:?}",
+                db_start_args
+            );

            background_process::start_process(
                "storage_controller_db",
--- a/control_plane/storcon_cli/Cargo.toml
+++ b/control_plane/storcon_cli/Cargo.toml
@@ -11,14 +11,11 @@ clap.workspace = true
 comfy-table.workspace = true
 futures.workspace = true
 humantime.workspace = true
-hyper.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 reqwest.workspace = true
-serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 storage_controller_client.workspace = true
-thiserror.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 utils.workspace = true
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true
 anyhow.workspace = true
 chrono.workspace = true
 serde.workspace = true
-serde_with.workspace = true
 serde_json.workspace = true
 regex.workspace = true

--- a/libs/consumption_metrics/Cargo.toml
+++ b/libs/consumption_metrics/Cargo.toml
@@ -5,9 +5,6 @@ edition = "2021"
 license = "Apache-2.0"

 [dependencies]
-anyhow.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 rand.workspace = true
 serde.workspace = true
-serde_with.workspace = true
-utils.workspace = true
--- a/libs/desim/Cargo.toml
+++ b/libs/desim/Cargo.toml
@@ -12,5 +12,4 @@ bytes.workspace = true
 utils.workspace = true
 parking_lot.workspace = true
 hex.workspace = true
-scopeguard.workspace = true
 smallvec = { workspace = true, features = ["write"] }
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -1,8 +1,8 @@
 use anyhow::{bail, Result};
 use byteorder::{ByteOrder, BE};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
+use postgres_ffi::Oid;
 use postgres_ffi::RepOriginId;
-use postgres_ffi::{Oid, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::{fmt, ops::Range};

@@ -350,7 +350,17 @@ impl Key {
 // 02 00000000 00000000 00000000 00   00000000
 //
 // TwoPhaseFile:
-// 02 00000000 00000000 00000000 00   XID
+//
+// 02 00000000 00000000 00XXXXXX XX   XXXXXXXX
+//
+//                        \______XID_________/
+//
+// The 64-bit XID is stored a little awkwardly in field6, field5 and
+// field4. PostgreSQL v16 and below only stored a 32-bit XID, which
+// fit completely in field6, but starting with PostgreSQL v17, a full
+// 64-bit XID is used. Most pageserver code that accesses
+// TwoPhaseFiles now deals with 64-bit XIDs even on v16, the high bits
+// are just unused.
 //
 // ControlFile:
 // 03 00000000 00000000 00000000 00   00000000
@@ -582,35 +592,36 @@ pub const TWOPHASEDIR_KEY: Key = Key {
 };

 #[inline(always)]
-pub fn twophase_file_key(xid: TransactionId) -> Key {
+pub fn twophase_file_key(xid: u64) -> Key {
    Key {
        field1: 0x02,
        field2: 0,
        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
+        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
+        field5: ((xid & 0x000000FF00000000) >> 32) as u8,
+        field6: (xid & 0x00000000FFFFFFFF) as u32,
    }
 }

 #[inline(always)]
-pub fn twophase_key_range(xid: TransactionId) -> Range<Key> {
+pub fn twophase_key_range(xid: u64) -> Range<Key> {
+    // 64-bit XIDs really should not overflow
    let (next_xid, overflowed) = xid.overflowing_add(1);

    Key {
        field1: 0x02,
        field2: 0,
        field3: 0,
-        field4: 0,
-        field5: 0,
-        field6: xid,
+        field4: ((xid & 0xFFFFFF0000000000) >> 40) as u32,
+        field5: ((xid & 0x000000FF00000000) >> 32) as u8,
+        field6: (xid & 0x00000000FFFFFFFF) as u32,
    }..Key {
        field1: 0x02,
        field2: 0,
-        field3: 0,
-        field4: 0,
-        field5: u8::from(overflowed),
-        field6: next_xid,
+        field3: u32::from(overflowed),
+        field4: ((next_xid & 0xFFFFFF0000000000) >> 40) as u32,
+        field5: ((next_xid & 0x000000FF00000000) >> 32) as u8,
+        field6: (next_xid & 0x00000000FFFFFFFF) as u32,
    }
 }

--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -5,10 +5,8 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-async-trait.workspace = true
 anyhow.workspace = true
 bytes.workspace = true
-futures.workspace = true
 rustls.workspace = true
 serde.workspace = true
 thiserror.workspace = true
--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -5,13 +5,10 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-rand.workspace = true
 regex.workspace = true
 bytes.workspace = true
-byteorder.workspace = true
 anyhow.workspace = true
 crc32c.workspace = true
-hex.workspace = true
 once_cell.workspace = true
 log.workspace = true
 memoffset.workspace = true
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -56,7 +56,7 @@ fn main() -> anyhow::Result<()> {
        PathBuf::from("pg_install")
    };

-    for pg_version in &["v14", "v15", "v16"] {
+    for pg_version in &["v14", "v15", "v16", "v17"] {
        let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
        if pg_install_dir_versioned.is_relative() {
            let cwd = env::current_dir().context("Failed to get current_dir")?;
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -57,6 +57,7 @@ macro_rules! for_all_postgres_versions {
        $macro!(v14);
        $macro!(v15);
        $macro!(v16);
+        $macro!(v17);
    };
 }

@@ -91,6 +92,7 @@ macro_rules! dispatch_pgversion {
                14 : v14,
                15 : v15,
                16 : v16,
+                17 : v17,
            ]
        )
    };
@@ -121,6 +123,7 @@ macro_rules! enum_pgversion_dispatch {
                V14 : v14,
                V15 : v15,
                V16 : v16,
+                V17 : v17,
            ]
        )
    };
@@ -150,6 +153,7 @@ macro_rules! enum_pgversion {
                V14 : v14,
                V15 : v15,
                V16 : v16,
+                V17 : v17,
            ]
        }
    };
@@ -162,6 +166,7 @@ macro_rules! enum_pgversion {
                V14 : v14,
                V15 : v15,
                V16 : v16,
+                V17 : v17,
            ]
        }
    };
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -152,6 +152,9 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
 pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
 pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;

+// From heapam_xlog.h
+pub const XLOG_HEAP2_REWRITE: u8 = 0x00;
+
 // From replication/message.h
 pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;

@@ -219,15 +222,20 @@ pub const INVALID_TRANSACTION_ID: u32 = 0;
 pub const FIRST_BOOTSTRAP_OBJECT_ID: u32 = 12000;
 pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;

+/* pg_control.h */
 pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
 pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
-pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
-pub const XLP_LONG_HEADER: u16 = 0x0002;
+pub const XLOG_PARAMETER_CHANGE: u8 = 0x60;
+pub const XLOG_END_OF_RECOVERY: u8 = 0x90;

 /* From xlog.h */
 pub const XLOG_REPLORIGIN_SET: u8 = 0x00;
 pub const XLOG_REPLORIGIN_DROP: u8 = 0x10;

+/* xlog_internal.h */
+pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
+pub const XLP_LONG_HEADER: u16 = 0x0002;
+
 /* From replication/slot.h */
 pub const REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN: usize = 4*4  /* offset of `slotdata` in ReplicationSlotOnDisk  */
   + 64 /* NameData */  + 4*4;
@@ -245,33 +253,6 @@ pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
 /* From origin.c */
 pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;

-// List of subdirectories inside pgdata.
-// Copied from src/bin/initdb/initdb.c
-pub const PGDATA_SUBDIRS: [&str; 22] = [
-    "global",
-    "pg_wal/archive_status",
-    "pg_commit_ts",
-    "pg_dynshmem",
-    "pg_notify",
-    "pg_serial",
-    "pg_snapshots",
-    "pg_subtrans",
-    "pg_twophase",
-    "pg_multixact",
-    "pg_multixact/members",
-    "pg_multixact/offsets",
-    "base",
-    "base/1",
-    "pg_replslot",
-    "pg_tblspc",
-    "pg_stat",
-    "pg_stat_tmp",
-    "pg_xact",
-    "pg_logical",
-    "pg_logical/snapshots",
-    "pg_logical/mappings",
-];
-
 // Don't include postgresql.conf as it is inconvenient on node start:
 // we need postgresql.conf before basebackup to synchronize safekeepers
 // so no point in overwriting it during backup restore. Rest of the files
--- a/libs/postgres_ffi/src/pg_constants_v14.rs
+++ b/libs/postgres_ffi/src/pg_constants_v14.rs
@@ -5,6 +5,33 @@ pub const BKPIMAGE_IS_COMPRESSED: u8 = 0x02; /* page image is compressed */
 pub const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay */
 pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */

+// List of subdirectories inside pgdata.
+// Copied from src/bin/initdb/initdb.c
+pub const PGDATA_SUBDIRS: [&str; 22] = [
+    "global",
+    "pg_wal/archive_status",
+    "pg_commit_ts",
+    "pg_dynshmem",
+    "pg_notify",
+    "pg_serial",
+    "pg_snapshots",
+    "pg_subtrans",
+    "pg_twophase",
+    "pg_multixact",
+    "pg_multixact/members",
+    "pg_multixact/offsets",
+    "base",
+    "base/1",
+    "pg_replslot",
+    "pg_tblspc",
+    "pg_stat",
+    "pg_stat_tmp",
+    "pg_xact",
+    "pg_logical",
+    "pg_logical/snapshots",
+    "pg_logical/mappings",
+];
+
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    (bimg_info & BKPIMAGE_IS_COMPRESSED) != 0
 }
--- a/libs/postgres_ffi/src/pg_constants_v15.rs
+++ b/libs/postgres_ffi/src/pg_constants_v15.rs
@@ -11,6 +11,8 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */

 pub const SIZEOF_RELMAPFILE: usize = 512; /* sizeof(RelMapFile) in relmapper.c */

+pub use super::super::v14::bindings::PGDATA_SUBDIRS;
+
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

--- a/libs/postgres_ffi/src/pg_constants_v16.rs
+++ b/libs/postgres_ffi/src/pg_constants_v16.rs
@@ -11,6 +11,8 @@ pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */

 pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */

+pub use super::super::v14::bindings::PGDATA_SUBDIRS;
+
 pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;

--- a/libs/postgres_ffi/src/pg_constants_v17.rs
+++ b/libs/postgres_ffi/src/pg_constants_v17.rs
@@ -0,0 +1,55 @@
+pub const XACT_XINFO_HAS_DROPPED_STATS: u32 = 1u32 << 8;
+
+pub const XLOG_DBASE_CREATE_FILE_COPY: u8 = 0x00;
+pub const XLOG_DBASE_CREATE_WAL_LOG: u8 = 0x10;
+pub const XLOG_DBASE_DROP: u8 = 0x20;
+
+pub const BKPIMAGE_APPLY: u8 = 0x02; /* page image should be restored during replay */
+pub const BKPIMAGE_COMPRESS_PGLZ: u8 = 0x04; /* page image is compressed */
+pub const BKPIMAGE_COMPRESS_LZ4: u8 = 0x08; /* page image is compressed */
+pub const BKPIMAGE_COMPRESS_ZSTD: u8 = 0x10; /* page image is compressed */
+
+pub const SIZEOF_RELMAPFILE: usize = 524; /* sizeof(RelMapFile) in relmapper.c */
+
+// List of subdirectories inside pgdata.
+// Copied from src/bin/initdb/initdb.c
+pub const PGDATA_SUBDIRS: [&str; 23] = [
+    "global",
+    "pg_wal/archive_status",
+    "pg_wal/summaries",
+    "pg_commit_ts",
+    "pg_dynshmem",
+    "pg_notify",
+    "pg_serial",
+    "pg_snapshots",
+    "pg_subtrans",
+    "pg_twophase",
+    "pg_multixact",
+    "pg_multixact/members",
+    "pg_multixact/offsets",
+    "base",
+    "base/1",
+    "pg_replslot",
+    "pg_tblspc",
+    "pg_stat",
+    "pg_stat_tmp",
+    "pg_xact",
+    "pg_logical",
+    "pg_logical/snapshots",
+    "pg_logical/mappings",
+];
+
+pub fn bkpimg_is_compressed(bimg_info: u8) -> bool {
+    const ANY_COMPRESS_FLAG: u8 = BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4 | BKPIMAGE_COMPRESS_ZSTD;
+
+    (bimg_info & ANY_COMPRESS_FLAG) != 0
+}
+
+
+pub const XLOG_HEAP2_PRUNE_ON_ACCESS: u8 = 0x10;
+pub const XLOG_HEAP2_PRUNE_VACUUM_SCAN: u8 = 0x20;
+pub const XLOG_HEAP2_PRUNE_VACUUM_CLEANUP: u8 = 0x30;
+
+
+pub const XLOG_OVERWRITE_CONTRECORD: u8 = 0xD0;
+pub const XLOG_CHECKPOINT_REDO: u8 = 0xE0;
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -30,7 +30,7 @@ use std::fs::File;
 use std::io::prelude::*;
 use std::io::ErrorKind;
 use std::io::SeekFrom;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::time::SystemTime;
 use utils::bin_ser::DeserializeError;
 use utils::bin_ser::SerializeError;
@@ -260,13 +260,6 @@ fn open_wal_segment(seg_file_path: &Path) -> anyhow::Result<Option<File>> {
    }
 }

-pub fn main() {
-    let mut data_dir = PathBuf::new();
-    data_dir.push(".");
-    let wal_end = find_end_of_wal(&data_dir, WAL_SEGMENT_SIZE, Lsn(0)).unwrap();
-    println!("wal_end={:?}", wal_end);
-}
-
 impl XLogRecord {
    pub fn from_slice(buf: &[u8]) -> Result<XLogRecord, DeserializeError> {
        use utils::bin_ser::LeSer;
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -9,7 +9,6 @@ anyhow.workspace = true
 clap.workspace = true
 env_logger.workspace = true
 log.workspace = true
-once_cell.workspace = true
 postgres.workspace = true
 postgres_ffi.workspace = true
 camino-tempfile.workspace = true
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -53,7 +53,7 @@ impl Conf {

        #[allow(clippy::manual_range_patterns)]
        match self.pg_version {
-            14 | 15 | 16 => Ok(path.join(format!("v{}", self.pg_version))),
+            14 | 15 | 16 | 17 => Ok(path.join(format!("v{}", self.pg_version))),
            _ => bail!("Unsupported postgres version: {}", self.pg_version),
        }
    }
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -8,10 +8,8 @@ license.workspace = true
 bytes.workspace = true
 byteorder.workspace = true
 itertools.workspace = true
-pin-project-lite.workspace = true
 postgres-protocol.workspace = true
 rand.workspace = true
 tokio = { workspace = true, features = ["io-util"] }
-tracing.workspace = true
 thiserror.workspace = true
 serde.workspace = true
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -13,14 +13,11 @@ aws-smithy-async.workspace = true
 aws-smithy-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
-aws-credential-types.workspace = true
 bytes.workspace = true
 camino = { workspace = true, features = ["serde1"] }
-humantime.workspace = true
 humantime-serde.workspace = true
 hyper = { workspace = true, features = ["stream"] }
 futures.workspace = true
-rand.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tokio = { workspace = true, features = ["sync", "fs", "io-util"] }
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -6,6 +6,5 @@ license.workspace = true

 [dependencies]
 serde.workspace = true
-serde_with.workspace = true
 const_format.workspace = true
 utils.workspace = true
--- a/libs/tracing-utils/Cargo.toml
+++ b/libs/tracing-utils/Cargo.toml
@@ -9,8 +9,9 @@ hyper.workspace = true
 opentelemetry = { workspace = true, features=["rt-tokio"] }
 opentelemetry-otlp = { workspace = true, default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions.workspace = true
-reqwest = { workspace = true, default-features = false, features = ["rustls-tls"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
-tracing-subscriber.workspace = true
+
+[dev-dependencies]
+tracing-subscriber.workspace = true    # For examples in docs
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -42,7 +42,6 @@ tracing.workspace = true
 tracing-error.workspace = true
 tracing-subscriber = { workspace = true, features = ["json", "registry"] }
 rand.workspace = true
-serde_with.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 url.workspace = true
--- a/libs/utils/src/accum.rs
+++ b/libs/utils/src/accum.rs
@@ -1,33 +0,0 @@
-/// A helper to "accumulate" a value similar to `Iterator::reduce`, but lets you
-/// feed the accumulated values by calling the 'accum' function, instead of having an
-/// iterator.
-///
-/// For example, to calculate the smallest value among some integers:
-///
-/// ```
-/// use utils::accum::Accum;
-///
-/// let values = [1, 2, 3];
-///
-/// let mut min_value: Accum<u32> = Accum(None);
-/// for new_value in &values {
-///     min_value.accum(std::cmp::min, *new_value);
-/// }
-///
-/// assert_eq!(min_value.0.unwrap(), 1);
-/// ```
-pub struct Accum<T>(pub Option<T>);
-impl<T: Copy> Accum<T> {
-    pub fn accum<F>(&mut self, func: F, new_value: T)
-    where
-        F: FnOnce(T, T) -> T,
-    {
-        // If there is no previous value, just store the new value.
-        // Otherwise call the function to decide which one to keep.
-        self.0 = Some(if let Some(accum) = self.0 {
-            func(accum, new_value)
-        } else {
-            new_value
-        });
-    }
-}
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -88,12 +88,6 @@ impl<'de> Deserialize<'de> for Id {
 }

 impl Id {
-    pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
-        let mut arr = [0u8; 16];
-        buf.copy_to_slice(&mut arr);
-        Id::from(arr)
-    }
-
    pub fn from_slice(src: &[u8]) -> Result<Id, IdError> {
        if src.len() != 16 {
            return Err(IdError::SliceParseError(src.len()));
@@ -179,10 +173,6 @@ impl fmt::Debug for Id {
 macro_rules! id_newtype {
    ($t:ident) => {
        impl $t {
-            pub fn get_from_buf(buf: &mut impl bytes::Buf) -> $t {
-                $t(Id::get_from_buf(buf))
-            }
-
            pub fn from_slice(src: &[u8]) -> Result<$t, IdError> {
                Ok($t(Id::from_slice(src)?))
            }
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -43,16 +43,9 @@ pub mod logging;
 pub mod lock_file;
 pub mod pid_file;

-// Misc
-pub mod accum;
-pub mod shutdown;
-
 // Utility for binding TcpListeners with proper socket options.
 pub mod tcp_listener;

-// Utility for putting a raw file descriptor into non-blocking mode
-pub mod nonblock;
-
 // Default signal handling
 pub mod sentry_init;
 pub mod signals;
--- a/libs/utils/src/lsn.rs
+++ b/libs/utils/src/lsn.rs
@@ -1,6 +1,5 @@
 #![warn(missing_docs)]

-use camino::Utf8Path;
 use serde::{de::Visitor, Deserialize, Serialize};
 use std::fmt;
 use std::ops::{Add, AddAssign};
@@ -145,14 +144,6 @@ impl Lsn {
        i128::from(self.0) - i128::from(other)
    }

-    /// Parse an LSN from a filename in the form `0000000000000000`
-    pub fn from_filename<F>(filename: F) -> Result<Self, LsnParseError>
-    where
-        F: AsRef<Utf8Path>,
-    {
-        Lsn::from_hex(filename.as_ref().as_str())
-    }
-
    /// Parse an LSN from a string in the form `0000000000000000`
    pub fn from_hex<S>(s: S) -> Result<Self, LsnParseError>
    where
--- a/libs/utils/src/nonblock.rs
+++ b/libs/utils/src/nonblock.rs
@@ -1,17 +0,0 @@
-use nix::fcntl::{fcntl, OFlag, F_GETFL, F_SETFL};
-use std::os::unix::io::RawFd;
-
-/// Put a file descriptor into non-blocking mode
-pub fn set_nonblock(fd: RawFd) -> Result<(), std::io::Error> {
-    let bits = fcntl(fd, F_GETFL)?;
-
-    // If F_GETFL returns some unknown bits, they should be valid
-    // for passing back to F_SETFL, too. If we left them out, the F_SETFL
-    // would effectively clear them, which is not what we want.
-    let mut flags = OFlag::from_bits_retain(bits);
-    flags |= OFlag::O_NONBLOCK;
-
-    fcntl(fd, F_SETFL(flags))?;
-
-    Ok(())
-}
--- a/libs/utils/src/shutdown.rs
+++ b/libs/utils/src/shutdown.rs
@@ -1,7 +0,0 @@
-/// Immediately terminate the calling process without calling
-/// atexit callbacks, C runtime destructors etc. We mainly use
-/// this to protect coverage data from concurrent writes.
-pub fn exit_now(code: u8) -> ! {
-    // SAFETY: exiting is safe, the ffi is not safe
-    unsafe { nix::libc::_exit(code as _) };
-}
--- a/libs/vm_monitor/Cargo.toml
+++ b/libs/vm_monitor/Cargo.toml
@@ -15,13 +15,11 @@ anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
 futures.workspace = true
-inotify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 sysinfo.workspace = true
 tokio = { workspace = true, features = ["rt-multi-thread"] }
 tokio-postgres.workspace = true
-tokio-stream.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 tracing-subscriber.workspace = true
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -5,6 +5,8 @@ use std::{env, path::PathBuf, process::Command};

 use anyhow::{anyhow, Context};

+const WALPROPOSER_PG_VERSION: &str = "v17";
+
 fn main() -> anyhow::Result<()> {
    // Tell cargo to invalidate the built crate whenever the wrapper changes
    println!("cargo:rerun-if-changed=bindgen_deps.h");
@@ -36,7 +38,10 @@ fn main() -> anyhow::Result<()> {
    // Rebuild crate when libwalproposer.a changes
    println!("cargo:rerun-if-changed={walproposer_lib_search_str}/libwalproposer.a");

-    let pg_config_bin = pg_install_abs.join("v16").join("bin").join("pg_config");
+    let pg_config_bin = pg_install_abs
+        .join(WALPROPOSER_PG_VERSION)
+        .join("bin")
+        .join("pg_config");
    let inc_server_path: String = if pg_config_bin.exists() {
        let output = Command::new(pg_config_bin)
            .arg("--includedir-server")
@@ -53,7 +58,7 @@ fn main() -> anyhow::Result<()> {
            .into()
    } else {
        let server_path = pg_install_abs
-            .join("v16")
+            .join(WALPROPOSER_PG_VERSION)
            .join("include")
            .join("postgresql")
            .join("server")
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -15,7 +15,6 @@ anyhow.workspace = true
 arc-swap.workspace = true
 async-compression.workspace = true
 async-stream.workspace = true
-async-trait.workspace = true
 bit_field.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
@@ -23,12 +22,9 @@ camino.workspace = true
 camino-tempfile.workspace = true
 chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
-const_format.workspace = true
 consumption_metrics.workspace = true
 crc32c.workspace = true
-crossbeam-utils.workspace = true
 either.workspace = true
-flate2.workspace = true
 fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
@@ -57,10 +53,6 @@ serde.workspace = true
 serde_json = { workspace = true, features = ["raw_value"] }
 serde_path_to_error.workspace = true
 serde_with.workspace = true
-signal-hook.workspace = true
-smallvec = { workspace = true, features = ["write"] }
-svg_fmt.workspace = true
-sync_wrapper.workspace = true
 sysinfo.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
@@ -73,7 +65,6 @@ tokio-stream.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tracing.workspace = true
-twox-hash.workspace = true
 url.workspace = true
 walkdir.workspace = true
 metrics.workspace = true
--- a/pageserver/compaction/Cargo.toml
+++ b/pageserver/compaction/Cargo.toml
@@ -9,41 +9,19 @@ default = []

 [dependencies]
 anyhow.workspace = true
-async-compression.workspace = true
 async-stream.workspace = true
-byteorder.workspace = true
-bytes.workspace = true
-chrono = { workspace = true, features = ["serde"] }
 clap = { workspace = true, features = ["string"] }
-const_format.workspace = true
-consumption_metrics.workspace = true
-crossbeam-utils.workspace = true
-either.workspace = true
-flate2.workspace = true
-fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
-hex.workspace = true
-humantime.workspace = true
-humantime-serde.workspace = true
 itertools.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
 pin-project-lite.workspace = true
 rand.workspace = true
-smallvec = { workspace = true, features = ["write"] }
 svg_fmt.workspace = true
-sync_wrapper.workspace = true
-thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
-tokio-io-timeout.workspace = true
-tokio-util.workspace = true
 tracing.workspace = true
-tracing-error.workspace = true
 tracing-subscriber.workspace = true
-url.workspace = true
-walkdir.workspace = true
-metrics.workspace = true
 utils.workspace = true
 workspace_hack.workspace = true

--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -8,7 +8,6 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-bytes.workspace = true
 camino.workspace = true
 clap = { workspace = true, features = ["string"] }
 git-version.workspace = true
@@ -24,5 +23,4 @@ toml_edit.workspace = true
 utils.workspace = true
 svg_fmt.workspace = true
 workspace_hack.workspace = true
-serde.workspace = true
 serde_json.workspace = true
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -79,16 +79,24 @@ pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
        return None;
    }
    let keys: Vec<&str> = split[0].split('-').collect();
-    let mut lsns: Vec<&str> = split[1].split('-').collect();
-    let is_delta = if lsns.len() == 1 {
-        lsns.push(lsns[0]);
+    let lsn_and_opt_generation: Vec<&str> = split[1].split('v').collect();
+    let lsns: Vec<&str> = lsn_and_opt_generation[0].split('-').collect();
+    let the_lsns: [&str; 2];
+
+    /*
+     * Generations add a -vX-XXXXXX postfix, which causes issues when we try to
+     * parse 'vX' as an LSN.
+     */
+    let is_delta = if lsns.len() == 1 || lsns[1].is_empty() {
+        the_lsns = [lsns[0], lsns[0]];
        false
    } else {
+        the_lsns = [lsns[0], lsns[1]];
        true
    };

    let key_range = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
-    let lsn_range = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
+    let lsn_range = Lsn::from_hex(the_lsns[0]).unwrap()..Lsn::from_hex(the_lsns[1]).unwrap();
    let holes = Vec::new();
    Some(LayerFile {
        key_range,
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -30,9 +30,8 @@ use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::dispatch_pgversion;
 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
-use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
+use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA};
 use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
-use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
 use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
@@ -255,8 +254,11 @@ where

        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;

+        let pgversion = self.timeline.pg_version;
+        let subdirs = dispatch_pgversion!(pgversion, &pgv::bindings::PGDATA_SUBDIRS[..]);
+
        // Create pgdata subdirs structure
-        for dir in PGDATA_SUBDIRS.iter() {
+        for dir in subdirs.iter() {
            let header = new_tar_header_dir(dir)?;
            self.ar
                .append(&header, &mut io::empty())
@@ -606,7 +608,7 @@ where
    //
    // Extract twophase state files
    //
-    async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
+    async fn add_twophase_file(&mut self, xid: u64) -> Result<(), BasebackupError> {
        let img = self
            .timeline
            .get_twophase_file(xid, self.lsn, self.ctx)
@@ -617,7 +619,11 @@ where
        buf.extend_from_slice(&img[..]);
        let crc = crc32c::crc32c(&img[..]);
        buf.put_u32_le(crc);
-        let path = format!("pg_twophase/{:>08X}", xid);
+        let path = if self.timeline.pg_version < 17 {
+            format!("pg_twophase/{:>08X}", xid)
+        } else {
+            format!("pg_twophase/{:>016X}", xid)
+        };
        let header = new_tar_header(&path, buf.len() as u64)?;
        self.ar
            .append(&header, &buf[..])
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -281,7 +281,7 @@ impl PageServerConf {

        #[allow(clippy::manual_range_patterns)]
        match pg_version {
-            14 | 15 | 16 => Ok(path.join(format!("v{pg_version}"))),
+            14 | 15 | 16 | 17 => Ok(path.join(format!("v{pg_version}"))),
            _ => bail!("Unsupported postgres version: {}", pg_version),
        }
    }
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -178,7 +178,7 @@ async fn collect_metrics(
                )
                .await;
                if let Err(e) = res {
-                    tracing::error!("failed to upload to S3: {e:#}");
+                    tracing::error!("failed to upload to remote storage: {e:#}");
                }
            }
        };
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -580,9 +580,11 @@ async fn import_file(
        import_slru(modification, slru, file_path, reader, len, ctx).await?;
        debug!("imported multixact members slru");
    } else if file_path.starts_with("pg_twophase") {
-        let xid = u32::from_str_radix(file_name.as_ref(), 16)?;
-
        let bytes = read_all_bytes(reader).await?;
+
+        // In PostgreSQL v17, this is a 64-bit FullTransactionid. In previous versions,
+        // it's a 32-bit TransactionId, which fits in u64 anyway.
+        let xid = u64::from_str_radix(file_name.as_ref(), 16)?;
        modification
            .put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
            .await?;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1777,7 +1777,7 @@ pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| {
    .expect("failed to define a metric"),
    upload_heatmap_duration: register_histogram!(
        "pageserver_secondary_upload_heatmap_duration",
-        "Time to build and upload a heatmap, including any waiting inside the S3 client"
+        "Time to build and upload a heatmap, including any waiting inside the remote storage client"
    )
    .expect("failed to define a metric"),
    download_heatmap: register_int_counter!(
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -633,7 +633,7 @@ impl Timeline {

    pub(crate) async fn get_twophase_file(
        &self,
-        xid: TransactionId,
+        xid: u64,
        lsn: Lsn,
        ctx: &RequestContext,
    ) -> Result<Bytes, PageReconstructError> {
@@ -646,11 +646,19 @@ impl Timeline {
        &self,
        lsn: Lsn,
        ctx: &RequestContext,
-    ) -> Result<HashSet<TransactionId>, PageReconstructError> {
+    ) -> Result<HashSet<u64>, PageReconstructError> {
        // fetch directory entry
        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;

-        Ok(TwoPhaseDirectory::des(&buf)?.xids)
+        if self.pg_version >= 17 {
+            Ok(TwoPhaseDirectoryV17::des(&buf)?.xids)
+        } else {
+            Ok(TwoPhaseDirectory::des(&buf)?
+                .xids
+                .iter()
+                .map(|x| u64::from(*x))
+                .collect())
+        }
    }

    pub(crate) async fn get_control_file(
@@ -902,9 +910,13 @@ impl Timeline {

        // Then pg_twophase
        result.add_key(TWOPHASEDIR_KEY);
-        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
-        let twophase_dir = TwoPhaseDirectory::des(&buf)?;
-        let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
+
+        let mut xids: Vec<u64> = self
+            .list_twophase_files(lsn, ctx)
+            .await?
+            .iter()
+            .cloned()
+            .collect();
        xids.sort_unstable();
        for xid in xids {
            result.add_key(twophase_file_key(xid));
@@ -1127,9 +1139,15 @@ impl<'a> DatadirModification<'a> {
        // Create AuxFilesDirectory
        self.init_aux_dir()?;

-        let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
-            xids: HashSet::new(),
-        })?;
+        let buf = if self.tline.pg_version >= 17 {
+            TwoPhaseDirectoryV17::ser(&TwoPhaseDirectoryV17 {
+                xids: HashSet::new(),
+            })
+        } else {
+            TwoPhaseDirectory::ser(&TwoPhaseDirectory {
+                xids: HashSet::new(),
+            })
+        }?;
        self.pending_directory_entries
            .push((DirectoryKind::TwoPhase, 0));
        self.put(TWOPHASEDIR_KEY, Value::Image(buf.into()));
@@ -1321,22 +1339,31 @@ impl<'a> DatadirModification<'a> {

    pub async fn put_twophase_file(
        &mut self,
-        xid: TransactionId,
+        xid: u64,
        img: Bytes,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // Add it to the directory entry
-        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
-        let mut dir = TwoPhaseDirectory::des(&buf)?;
-        if !dir.xids.insert(xid) {
-            anyhow::bail!("twophase file for xid {} already exists", xid);
-        }
-        self.pending_directory_entries
-            .push((DirectoryKind::TwoPhase, dir.xids.len()));
-        self.put(
-            TWOPHASEDIR_KEY,
-            Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
-        );
+        let dirbuf = self.get(TWOPHASEDIR_KEY, ctx).await?;
+        let newdirbuf = if self.tline.pg_version >= 17 {
+            let mut dir = TwoPhaseDirectoryV17::des(&dirbuf)?;
+            if !dir.xids.insert(xid) {
+                anyhow::bail!("twophase file for xid {} already exists", xid);
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
+        } else {
+            let xid = xid as u32;
+            let mut dir = TwoPhaseDirectory::des(&dirbuf)?;
+            if !dir.xids.insert(xid) {
+                anyhow::bail!("twophase file for xid {} already exists", xid);
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            Bytes::from(TwoPhaseDirectory::ser(&dir)?)
+        };
+        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));

        self.put(twophase_file_key(xid), Value::Image(img));
        Ok(())
@@ -1639,22 +1666,32 @@ impl<'a> DatadirModification<'a> {
    /// This method is used for marking truncated SLRU files
    pub async fn drop_twophase_file(
        &mut self,
-        xid: TransactionId,
+        xid: u64,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        // Remove it from the directory entry
        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
-        let mut dir = TwoPhaseDirectory::des(&buf)?;
+        let newdirbuf = if self.tline.pg_version >= 17 {
+            let mut dir = TwoPhaseDirectoryV17::des(&buf)?;

-        if !dir.xids.remove(&xid) {
-            warn!("twophase file for xid {} does not exist", xid);
-        }
-        self.pending_directory_entries
-            .push((DirectoryKind::TwoPhase, dir.xids.len()));
-        self.put(
-            TWOPHASEDIR_KEY,
-            Value::Image(Bytes::from(TwoPhaseDirectory::ser(&dir)?)),
-        );
+            if !dir.xids.remove(&xid) {
+                warn!("twophase file for xid {} does not exist", xid);
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            Bytes::from(TwoPhaseDirectoryV17::ser(&dir)?)
+        } else {
+            let xid: u32 = u32::try_from(xid)?;
+            let mut dir = TwoPhaseDirectory::des(&buf)?;
+
+            if !dir.xids.remove(&xid) {
+                warn!("twophase file for xid {} does not exist", xid);
+            }
+            self.pending_directory_entries
+                .push((DirectoryKind::TwoPhase, dir.xids.len()));
+            Bytes::from(TwoPhaseDirectory::ser(&dir)?)
+        };
+        self.put(TWOPHASEDIR_KEY, Value::Image(newdirbuf));

        // Delete it
        self.delete(twophase_key_range(xid));
@@ -2124,11 +2161,21 @@ struct DbDirectory {
    dbdirs: HashMap<(Oid, Oid), bool>,
 }

+// The format of TwoPhaseDirectory changed in PostgreSQL v17, because the filenames of
+// pg_twophase files was expanded from 32-bit XIDs to 64-bit XIDs.  Previously, the files
+// were named like "pg_twophase/000002E5", now they're like
+// "pg_twophsae/0000000A000002E4".
+
 #[derive(Debug, Serialize, Deserialize)]
 struct TwoPhaseDirectory {
    xids: HashSet<TransactionId>,
 }

+#[derive(Debug, Serialize, Deserialize)]
+struct TwoPhaseDirectoryV17 {
+    xids: HashSet<u64>,
+}
+
 #[derive(Debug, Serialize, Deserialize, Default)]
 struct RelDirectory {
    // Set of relations that exist. (relfilenode, forknum)
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -38,7 +38,7 @@ use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
    BlobFlag, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead, VectoredReadPlanner,
 };
-use crate::tenant::{PageReconstructError, Timeline};
+use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
 use crate::virtual_file::{self, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
@@ -58,7 +58,6 @@ use std::io::SeekFrom;
 use std::ops::Range;
 use std::os::unix::prelude::FileExt;
 use std::str::FromStr;
-use std::sync::Arc;
 use tokio::sync::OnceCell;
 use tokio_stream::StreamExt;
 use tracing::*;
@@ -70,9 +69,7 @@ use utils::{
 };

 use super::layer_name::ImageLayerName;
-use super::{
-    AsLayerDesc, Layer, LayerName, PersistentLayerDesc, ResidentLayer, ValuesReconstructState,
-};
+use super::{AsLayerDesc, LayerName, PersistentLayerDesc, ValuesReconstructState};

 ///
 /// Header stored in the beginning of the file
@@ -800,10 +797,9 @@ impl ImageLayerWriterInner {
    ///
    async fn finish(
        self,
-        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
        end_key: Option<Key>,
-    ) -> anyhow::Result<ResidentLayer> {
+    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

@@ -879,12 +875,9 @@ impl ImageLayerWriterInner {
        // fsync the file
        file.sync_all().await?;

-        // FIXME: why not carry the virtualfile here, it supports renaming?
-        let layer = Layer::finish_creating(self.conf, timeline, desc, &self.path)?;
+        trace!("created image layer {}", self.path);

-        info!("created image layer {}", layer.local_path());
-
-        Ok(layer)
+        Ok((desc, self.path))
    }
 }

@@ -963,24 +956,18 @@ impl ImageLayerWriter {
    ///
    pub(crate) async fn finish(
        mut self,
-        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<super::ResidentLayer> {
-        self.inner.take().unwrap().finish(timeline, ctx, None).await
+    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+        self.inner.take().unwrap().finish(ctx, None).await
    }

    /// Finish writing the image layer with an end key, used in [`super::split_writer::SplitImageLayerWriter`]. The end key determines the end of the image layer's covered range and is exclusive.
    pub(super) async fn finish_with_end_key(
        mut self,
-        timeline: &Arc<Timeline>,
        end_key: Key,
        ctx: &RequestContext,
-    ) -> anyhow::Result<super::ResidentLayer> {
-        self.inner
-            .take()
-            .unwrap()
-            .finish(timeline, ctx, Some(end_key))
-            .await
+    ) -> anyhow::Result<(PersistentLayerDesc, Utf8PathBuf)> {
+        self.inner.take().unwrap().finish(ctx, Some(end_key)).await
    }
 }

@@ -1084,7 +1071,7 @@ mod test {
        tenant::{
            config::TenantConf,
            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::ResidentLayer,
+            storage_layer::{Layer, ResidentLayer},
            vectored_blob_io::StreamingVectoredReadPlanner,
            Tenant, Timeline,
        },
@@ -1155,7 +1142,8 @@ mod test {

                key = key.next();
            }
-            writer.finish(&timeline, &ctx).await.unwrap()
+            let (desc, path) = writer.finish(&ctx).await.unwrap();
+            Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap()
        };
        let original_size = resident.metadata().file_size;

@@ -1217,7 +1205,9 @@ mod test {
                .await
                .unwrap();
            let replacement = if wrote_keys > 0 {
-                Some(filtered_writer.finish(&timeline, &ctx).await.unwrap())
+                let (desc, path) = filtered_writer.finish(&ctx).await.unwrap();
+                let resident = Layer::finish_creating(tenant.conf, &timeline, desc, &path).unwrap();
+                Some(resident)
            } else {
                None
            };
@@ -1290,7 +1280,8 @@ mod test {
        for (key, img) in images {
            writer.put_image(key, img, ctx).await?;
        }
-        let img_layer = writer.finish(tline, ctx).await?;
+        let (desc, path) = writer.finish(ctx).await?;
+        let img_layer = Layer::finish_creating(tenant.conf, tline, desc, &path)?;

        Ok::<_, anyhow::Error>(img_layer)
    }
--- a/pageserver/src/tenant/storage_layer/split_writer.rs
+++ b/pageserver/src/tenant/storage_layer/split_writer.rs
@@ -121,11 +121,11 @@ impl SplitImageLayerWriter {
                self.generated_layers
                    .push(SplitWriterResult::Discarded(layer_key));
            } else {
-                self.generated_layers.push(SplitWriterResult::Produced(
-                    prev_image_writer
-                        .finish_with_end_key(tline, key, ctx)
-                        .await?,
-                ));
+                let (desc, path) = prev_image_writer.finish_with_end_key(key, ctx).await?;
+
+                let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+                self.generated_layers
+                    .push(SplitWriterResult::Produced(layer));
            }
        }
        self.inner.put_image(key, img, ctx).await
@@ -170,9 +170,9 @@ impl SplitImageLayerWriter {
        if discard(&layer_key).await {
            generated_layers.push(SplitWriterResult::Discarded(layer_key));
        } else {
-            generated_layers.push(SplitWriterResult::Produced(
-                inner.finish_with_end_key(tline, end_key, ctx).await?,
-            ));
+            let (desc, path) = inner.finish_with_end_key(end_key, ctx).await?;
+            let layer = Layer::finish_creating(self.conf, tline, desc, &path)?;
+            generated_layers.push(SplitWriterResult::Produced(layer));
        }
        Ok(generated_layers)
    }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -4013,7 +4013,8 @@ impl Timeline {
        if wrote_keys {
            // Normal path: we have written some data into the new image layer for this
            // partition, so flush it to disk.
-            let image_layer = image_layer_writer.finish(self, ctx).await?;
+            let (desc, path) = image_layer_writer.finish(ctx).await?;
+            let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
            Ok(ImageLayerCreationOutcome {
                image: Some(image_layer),
                next_start_key: img_range.end,
@@ -4101,7 +4102,8 @@ impl Timeline {
        if wrote_any_image {
            // Normal path: we have written some data into the new image layer for this
            // partition, so flush it to disk.
-            let image_layer = image_layer_writer.finish(self, ctx).await?;
+            let (desc, path) = image_layer_writer.finish(ctx).await?;
+            let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;
            Ok(ImageLayerCreationOutcome {
                image: Some(image_layer),
                next_start_key: img_range.end,
@@ -5403,7 +5405,8 @@ impl Timeline {
        for (key, img) in images {
            image_layer_writer.put_image(key, img, ctx).await?;
        }
-        let image_layer = image_layer_writer.finish(self, ctx).await?;
+        let (desc, path) = image_layer_writer.finish(ctx).await?;
+        let image_layer = Layer::finish_creating(self.conf, self, desc, &path)?;

        {
            let mut guard = self.layers.write().await;
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -563,10 +563,12 @@ impl Timeline {
                .await?;

            if keys_written > 0 {
-                let new_layer = image_layer_writer
-                    .finish(self, ctx)
+                let (desc, path) = image_layer_writer
+                    .finish(ctx)
                    .await
                    .map_err(CompactionError::Other)?;
+                let new_layer = Layer::finish_creating(self.conf, self, desc, &path)
+                    .map_err(CompactionError::Other)?;
                tracing::info!(layer=%new_layer, "Rewrote layer, {} -> {} bytes",
                    layer.metadata().file_size,
                    new_layer.metadata().file_size);
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -237,6 +237,26 @@ impl WalIngest {
                                .await?;
                        }
                    }
+                } else if pg_version == 17 {
+                    if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_WAL_LOG {
+                        debug!("XLOG_DBASE_CREATE_WAL_LOG: noop");
+                    } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY {
+                        // The XLOG record was renamed between v14 and v15,
+                        // but the record format is the same.
+                        // So we can reuse XlCreateDatabase here.
+                        debug!("XLOG_DBASE_CREATE_FILE_COPY");
+                        let createdb = XlCreateDatabase::decode(&mut buf);
+                        self.ingest_xlog_dbase_create(modification, &createdb, ctx)
+                            .await?;
+                    } else if info == postgres_ffi::v17::bindings::XLOG_DBASE_DROP {
+                        let dropdb = XlDropDatabase::decode(&mut buf);
+                        for tablespace_id in dropdb.tablespace_ids {
+                            trace!("Drop db {}, {}", tablespace_id, dropdb.db_id);
+                            modification
+                                .drop_dbdir(tablespace_id, dropdb.db_id, ctx)
+                                .await?;
+                        }
+                    }
                }
            }
            pg_constants::RM_TBLSPC_ID => {
@@ -246,7 +266,11 @@ impl WalIngest {
                let info = decoded.xl_info & !pg_constants::XLR_INFO_MASK;

                if info == pg_constants::CLOG_ZEROPAGE {
-                    let pageno = buf.get_u32_le();
+                    let pageno = if pg_version < 17 {
+                        buf.get_u32_le()
+                    } else {
+                        buf.get_u64_le() as u32
+                    };
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -260,7 +284,7 @@ impl WalIngest {
                    .await?;
                } else {
                    assert!(info == pg_constants::CLOG_TRUNCATE);
-                    let xlrec = XlClogTruncate::decode(&mut buf);
+                    let xlrec = XlClogTruncate::decode(&mut buf, pg_version);
                    self.ingest_clog_truncate_record(modification, &xlrec, ctx)
                        .await?;
                }
@@ -299,12 +323,21 @@ impl WalIngest {
                        parsed_xact.xid,
                        lsn,
                    );
-                    modification
-                        .drop_twophase_file(parsed_xact.xid, ctx)
-                        .await?;
+
+                    let xid: u64 = if pg_version >= 17 {
+                        self.adjust_to_full_transaction_id(parsed_xact.xid)?
+                    } else {
+                        parsed_xact.xid as u64
+                    };
+                    modification.drop_twophase_file(xid, ctx).await?;
                } else if info == pg_constants::XLOG_XACT_PREPARE {
+                    let xid: u64 = if pg_version >= 17 {
+                        self.adjust_to_full_transaction_id(decoded.xl_xid)?
+                    } else {
+                        decoded.xl_xid as u64
+                    };
                    modification
-                        .put_twophase_file(decoded.xl_xid, Bytes::copy_from_slice(&buf[..]), ctx)
+                        .put_twophase_file(xid, Bytes::copy_from_slice(&buf[..]), ctx)
                        .await?;
                }
            }
@@ -312,7 +345,11 @@ impl WalIngest {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;

                if info == pg_constants::XLOG_MULTIXACT_ZERO_OFF_PAGE {
-                    let pageno = buf.get_u32_le();
+                    let pageno = if pg_version < 17 {
+                        buf.get_u32_le()
+                    } else {
+                        buf.get_u64_le() as u32
+                    };
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -325,7 +362,11 @@ impl WalIngest {
                    )
                    .await?;
                } else if info == pg_constants::XLOG_MULTIXACT_ZERO_MEM_PAGE {
-                    let pageno = buf.get_u32_le();
+                    let pageno = if pg_version < 17 {
+                        buf.get_u32_le()
+                    } else {
+                        buf.get_u64_le() as u32
+                    };
                    let segno = pageno / pg_constants::SLRU_PAGES_PER_SEGMENT;
                    let rpageno = pageno % pg_constants::SLRU_PAGES_PER_SEGMENT;
                    self.put_slru_page_image(
@@ -354,6 +395,20 @@ impl WalIngest {
            pg_constants::RM_XLOG_ID => {
                let info = decoded.xl_info & pg_constants::XLR_RMGR_INFO_MASK;

+                if info == pg_constants::XLOG_PARAMETER_CHANGE {
+                    if let CheckPoint::V17(cp) = &mut self.checkpoint {
+                        let rec = v17::XlParameterChange::decode(&mut buf);
+                        cp.wal_level = rec.wal_level;
+                        self.checkpoint_modified = true;
+                    }
+                } else if info == pg_constants::XLOG_END_OF_RECOVERY {
+                    if let CheckPoint::V17(cp) = &mut self.checkpoint {
+                        let rec = v17::XlEndOfRecovery::decode(&mut buf);
+                        cp.wal_level = rec.wal_level;
+                        self.checkpoint_modified = true;
+                    }
+                }
+
                enum_pgversion_dispatch!(&mut self.checkpoint, CheckPoint, cp, {
                    if info == pg_constants::XLOG_NEXTOID {
                        let next_oid = buf.get_u32_le();
@@ -397,12 +452,24 @@ impl WalIngest {
                        if xlog_checkpoint.oldestActiveXid == pg_constants::INVALID_TRANSACTION_ID
                            && info == pg_constants::XLOG_CHECKPOINT_SHUTDOWN
                        {
-                            let mut oldest_active_xid = cp.nextXid.value as u32;
-                            for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
-                                if (xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
-                                    oldest_active_xid = xid;
+                            let oldest_active_xid = if pg_version >= 17 {
+                                let mut oldest_active_full_xid = cp.nextXid.value;
+                                for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
+                                    if xid < oldest_active_full_xid {
+                                        oldest_active_full_xid = xid;
+                                    }
                                }
-                            }
+                                oldest_active_full_xid as u32
+                            } else {
+                                let mut oldest_active_xid = cp.nextXid.value as u32;
+                                for xid in modification.tline.list_twophase_files(lsn, ctx).await? {
+                                    let narrow_xid = xid as u32;
+                                    if (narrow_xid.wrapping_sub(oldest_active_xid) as i32) < 0 {
+                                        oldest_active_xid = narrow_xid;
+                                    }
+                                }
+                                oldest_active_xid
+                            };
                            cp.oldestActiveXid = oldest_active_xid;
                        } else {
                            cp.oldestActiveXid = xlog_checkpoint.oldestActiveXid;
@@ -515,6 +582,25 @@ impl WalIngest {
        Ok(modification.len() > prev_len)
    }

+    /// This is the same as AdjustToFullTransactionId(xid) in PostgreSQL
+    fn adjust_to_full_transaction_id(&self, xid: TransactionId) -> Result<u64> {
+        let next_full_xid =
+            enum_pgversion_dispatch!(&self.checkpoint, CheckPoint, cp, { cp.nextXid.value });
+
+        let next_xid = (next_full_xid) as u32;
+        let mut epoch = (next_full_xid >> 32) as u32;
+
+        if xid > next_xid {
+            // Wraparound occurred, must be from a prev epoch.
+            if epoch == 0 {
+                bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}");
+            }
+            epoch -= 1;
+        }
+
+        Ok((epoch as u64) << 32 | xid as u64)
+    }
+
    /// Do not store this block, but observe it for the purposes of updating our relation size state.
    async fn observe_decoded_block(
        &mut self,
@@ -815,6 +901,73 @@ impl WalIngest {
                    bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
                }
            }
+            17 => {
+                if decoded.xl_rmid == pg_constants::RM_HEAP_ID {
+                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
+
+                    if info == pg_constants::XLOG_HEAP_INSERT {
+                        let xlrec = v17::XlHeapInsert::decode(buf);
+                        assert_eq!(0, buf.remaining());
+                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
+                            new_heap_blkno = Some(decoded.blocks[0].blkno);
+                        }
+                    } else if info == pg_constants::XLOG_HEAP_DELETE {
+                        let xlrec = v17::XlHeapDelete::decode(buf);
+                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
+                            new_heap_blkno = Some(decoded.blocks[0].blkno);
+                        }
+                    } else if info == pg_constants::XLOG_HEAP_UPDATE
+                        || info == pg_constants::XLOG_HEAP_HOT_UPDATE
+                    {
+                        let xlrec = v17::XlHeapUpdate::decode(buf);
+                        // the size of tuple data is inferred from the size of the record.
+                        // we can't validate the remaining number of bytes without parsing
+                        // the tuple data.
+                        if (xlrec.flags & pg_constants::XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) != 0 {
+                            old_heap_blkno = Some(decoded.blocks.last().unwrap().blkno);
+                        }
+                        if (xlrec.flags & pg_constants::XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) != 0 {
+                            // PostgreSQL only uses XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED on a
+                            // non-HOT update where the new tuple goes to different page than
+                            // the old one. Otherwise, only XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED is
+                            // set.
+                            new_heap_blkno = Some(decoded.blocks[0].blkno);
+                        }
+                    } else if info == pg_constants::XLOG_HEAP_LOCK {
+                        let xlrec = v17::XlHeapLock::decode(buf);
+                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
+                            old_heap_blkno = Some(decoded.blocks[0].blkno);
+                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
+                        }
+                    }
+                } else if decoded.xl_rmid == pg_constants::RM_HEAP2_ID {
+                    let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;
+                    if info == pg_constants::XLOG_HEAP2_MULTI_INSERT {
+                        let xlrec = v17::XlHeapMultiInsert::decode(buf);
+
+                        let offset_array_len =
+                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
+                                // the offsets array is omitted if XLOG_HEAP_INIT_PAGE is set
+                                0
+                            } else {
+                                size_of::<u16>() * xlrec.ntuples as usize
+                            };
+                        assert_eq!(offset_array_len, buf.remaining());
+
+                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
+                            new_heap_blkno = Some(decoded.blocks[0].blkno);
+                        }
+                    } else if info == pg_constants::XLOG_HEAP2_LOCK_UPDATED {
+                        let xlrec = v17::XlHeapLockUpdated::decode(buf);
+                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
+                            old_heap_blkno = Some(decoded.blocks[0].blkno);
+                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
+                        }
+                    }
+                } else {
+                    bail!("Unknown RMGR {} for Heap decoding", decoded.xl_rmid);
+                }
+            }
            _ => {}
        }

@@ -923,26 +1076,26 @@ impl WalIngest {
        assert_eq!(decoded.xl_rmid, pg_constants::RM_NEON_ID);

        match pg_version {
-            16 => {
+            16 | 17 => {
                let info = decoded.xl_info & pg_constants::XLOG_HEAP_OPMASK;

                match info {
                    pg_constants::XLOG_NEON_HEAP_INSERT => {
-                        let xlrec = v16::rm_neon::XlNeonHeapInsert::decode(buf);
+                        let xlrec = v17::rm_neon::XlNeonHeapInsert::decode(buf);
                        assert_eq!(0, buf.remaining());
                        if (xlrec.flags & pg_constants::XLH_INSERT_ALL_VISIBLE_CLEARED) != 0 {
                            new_heap_blkno = Some(decoded.blocks[0].blkno);
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_DELETE => {
-                        let xlrec = v16::rm_neon::XlNeonHeapDelete::decode(buf);
+                        let xlrec = v17::rm_neon::XlNeonHeapDelete::decode(buf);
                        if (xlrec.flags & pg_constants::XLH_DELETE_ALL_VISIBLE_CLEARED) != 0 {
                            new_heap_blkno = Some(decoded.blocks[0].blkno);
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_UPDATE
                    | pg_constants::XLOG_NEON_HEAP_HOT_UPDATE => {
-                        let xlrec = v16::rm_neon::XlNeonHeapUpdate::decode(buf);
+                        let xlrec = v17::rm_neon::XlNeonHeapUpdate::decode(buf);
                        // the size of tuple data is inferred from the size of the record.
                        // we can't validate the remaining number of bytes without parsing
                        // the tuple data.
@@ -958,7 +1111,7 @@ impl WalIngest {
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_MULTI_INSERT => {
-                        let xlrec = v16::rm_neon::XlNeonHeapMultiInsert::decode(buf);
+                        let xlrec = v17::rm_neon::XlNeonHeapMultiInsert::decode(buf);

                        let offset_array_len =
                            if decoded.xl_info & pg_constants::XLOG_HEAP_INIT_PAGE > 0 {
@@ -974,7 +1127,7 @@ impl WalIngest {
                        }
                    }
                    pg_constants::XLOG_NEON_HEAP_LOCK => {
-                        let xlrec = v16::rm_neon::XlNeonHeapLock::decode(buf);
+                        let xlrec = v17::rm_neon::XlNeonHeapLock::decode(buf);
                        if (xlrec.flags & pg_constants::XLH_LOCK_ALL_FROZEN_CLEARED) != 0 {
                            old_heap_blkno = Some(decoded.blocks[0].blkno);
                            flags = pg_constants::VISIBILITYMAP_ALL_FROZEN;
--- a/pageserver/src/walrecord.rs
+++ b/pageserver/src/walrecord.rs
@@ -174,6 +174,7 @@ impl DecodedWALRecord {
                }
                15 => info == postgres_ffi::v15::bindings::XLOG_DBASE_CREATE_FILE_COPY,
                16 => info == postgres_ffi::v16::bindings::XLOG_DBASE_CREATE_FILE_COPY,
+                17 => info == postgres_ffi::v17::bindings::XLOG_DBASE_CREATE_FILE_COPY,
                _ => {
                    panic!("Unsupported postgres version {pg_version}")
                }
@@ -341,16 +342,47 @@ pub mod v14 {
            }
        }
    }
+
+    #[repr(C)]
+    #[derive(Debug)]
+    pub struct XlParameterChange {
+        pub max_connections: i32,
+        pub max_worker_processes: i32,
+        pub max_wal_senders: i32,
+        pub max_prepared_xacts: i32,
+        pub max_locks_per_xact: i32,
+        pub wal_level: i32,
+        pub wal_log_hints: bool,
+        pub track_commit_timestamp: bool,
+        pub _padding: [u8; 2],
+    }
+
+    impl XlParameterChange {
+        pub fn decode(buf: &mut Bytes) -> XlParameterChange {
+            XlParameterChange {
+                max_connections: buf.get_i32_le(),
+                max_worker_processes: buf.get_i32_le(),
+                max_wal_senders: buf.get_i32_le(),
+                max_prepared_xacts: buf.get_i32_le(),
+                max_locks_per_xact: buf.get_i32_le(),
+                wal_level: buf.get_i32_le(),
+                wal_log_hints: buf.get_u8() != 0,
+                track_commit_timestamp: buf.get_u8() != 0,
+                _padding: [buf.get_u8(), buf.get_u8()],
+            }
+        }
+    }
 }

 pub mod v15 {
    pub use super::v14::{
        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapLockUpdated, XlHeapMultiInsert, XlHeapUpdate,
+        XlParameterChange,
    };
 }

 pub mod v16 {
-    pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert};
+    pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange};
    use bytes::{Buf, Bytes};
    use postgres_ffi::{OffsetNumber, TransactionId};

@@ -529,6 +561,37 @@ pub mod v16 {
    }
 }

+pub mod v17 {
+    pub use super::v14::XlHeapLockUpdated;
+    use bytes::{Buf, Bytes};
+    pub use postgres_ffi::{TimeLineID, TimestampTz};
+
+    pub use super::v16::rm_neon;
+    pub use super::v16::{
+        XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange,
+    };
+
+    #[repr(C)]
+    #[derive(Debug)]
+    pub struct XlEndOfRecovery {
+        pub end_time: TimestampTz,
+        pub this_time_line_id: TimeLineID,
+        pub prev_time_line_id: TimeLineID,
+        pub wal_level: i32,
+    }
+
+    impl XlEndOfRecovery {
+        pub fn decode(buf: &mut Bytes) -> XlEndOfRecovery {
+            XlEndOfRecovery {
+                end_time: buf.get_i64_le(),
+                this_time_line_id: buf.get_u32_le(),
+                prev_time_line_id: buf.get_u32_le(),
+                wal_level: buf.get_i32_le(),
+            }
+        }
+    }
+}
+
 #[repr(C)]
 #[derive(Debug)]
 pub struct XlSmgrCreate {
@@ -746,9 +809,13 @@ pub struct XlClogTruncate {
 }

 impl XlClogTruncate {
-    pub fn decode(buf: &mut Bytes) -> XlClogTruncate {
+    pub fn decode(buf: &mut Bytes, pg_version: u32) -> XlClogTruncate {
        XlClogTruncate {
-            pageno: buf.get_u32_le(),
+            pageno: if pg_version < 17 {
+                buf.get_u32_le()
+            } else {
+                buf.get_u64_le() as u32
+            },
            oldest_xid: buf.get_u32_le(),
            oldest_xid_db: buf.get_u32_le(),
        }
--- a/pageserver/src/walredo.rs
+++ b/pageserver/src/walredo.rs
@@ -35,6 +35,7 @@ use anyhow::Context;
 use bytes::{Bytes, BytesMut};
 use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus};
 use pageserver_api::shard::TenantShardId;
+use std::future::Future;
 use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
@@ -296,6 +297,97 @@ impl PostgresRedoManager {
        }
    }

+    async fn do_with_walredo_process<
+        F: FnOnce(Arc<Process>) -> Fut,
+        Fut: Future<Output = Result<O, Error>>,
+        O,
+    >(
+        &self,
+        pg_version: u32,
+        closure: F,
+    ) -> Result<O, Error> {
+        let proc: Arc<Process> = match self.redo_process.get_or_init_detached().await {
+            Ok(guard) => match &*guard {
+                ProcessOnceCell::Spawned(proc) => Arc::clone(proc),
+                ProcessOnceCell::ManagerShutDown => {
+                    return Err(Error::Cancelled);
+                }
+            },
+            Err(permit) => {
+                let start = Instant::now();
+                // acquire guard before spawning process, so that we don't spawn new processes
+                // if the gate is already closed.
+                let _launched_processes_guard = match self.launched_processes.enter() {
+                    Ok(guard) => guard,
+                    Err(GateError::GateClosed) => unreachable!(
+                        "shutdown sets the once cell to `ManagerShutDown` state before closing the gate"
+                    ),
+                };
+                let proc = Arc::new(Process {
+                    process: process::WalRedoProcess::launch(
+                        self.conf,
+                        self.tenant_shard_id,
+                        pg_version,
+                    )
+                    .context("launch walredo process")?,
+                    _launched_processes_guard,
+                });
+                let duration = start.elapsed();
+                WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
+                info!(
+                    elapsed_ms = duration.as_millis(),
+                    pid = proc.id(),
+                    "launched walredo process"
+                );
+                self.redo_process
+                    .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit);
+                proc
+            }
+        };
+
+        // async closures are unstable, would support &Process
+        let result = closure(proc.clone()).await;
+
+        if result.is_err() {
+            // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.
+            // Note that there may be other tasks concurrent with us that also hold `proc`.
+            // We have to deal with that here.
+            // Also read the doc comment on field `self.redo_process`.
+            //
+            // NB: there may still be other concurrent threads using `proc`.
+            // The last one will send SIGKILL when the underlying Arc reaches refcount 0.
+            //
+            // NB: the drop impl blocks the dropping thread with a wait() system call for
+            // the child process. In some ways the blocking is actually good: if we
+            // deferred the waiting into the background / to tokio if we used `tokio::process`,
+            // it could happen that if walredo always fails immediately, we spawn processes faster
+            // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,
+            // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.
+            // This probably needs revisiting at some later point.
+            match self.redo_process.get() {
+                None => (),
+                Some(guard) => {
+                    match &*guard {
+                        ProcessOnceCell::ManagerShutDown => {}
+                        ProcessOnceCell::Spawned(guard_proc) => {
+                            if Arc::ptr_eq(&proc, guard_proc) {
+                                // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
+                                guard.take_and_deinit();
+                            } else {
+                                // Another task already spawned another redo process (further up in this method)
+                                // and put it into `redo_process`. Do nothing, our view of the world is behind.
+                            }
+                        }
+                    }
+                }
+            }
+            // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.
+            drop(proc);
+        }
+
+        result
+    }
+
    ///
    /// Process one request for WAL redo using wal-redo postgres
    ///
@@ -319,130 +411,63 @@ impl PostgresRedoManager {
        const MAX_RETRY_ATTEMPTS: u32 = 1;
        let mut n_attempts = 0u32;
        loop {
-            let proc: Arc<Process> = match self.redo_process.get_or_init_detached().await {
-                Ok(guard) => match &*guard {
-                    ProcessOnceCell::Spawned(proc) => Arc::clone(proc),
-                    ProcessOnceCell::ManagerShutDown => {
-                        return Err(Error::Cancelled);
-                    }
-                },
-                Err(permit) => {
-                    let start = Instant::now();
-                    // acquire guard before spawning process, so that we don't spawn new processes
-                    // if the gate is already closed.
-                    let _launched_processes_guard = match self.launched_processes.enter() {
-                                Ok(guard) => guard,
-                                Err(GateError::GateClosed) => unreachable!(
-                                    "shutdown sets the once cell to `ManagerShutDown` state before closing the gate"
-                                ),
-                            };
-                    let proc = Arc::new(Process {
-                        process: process::WalRedoProcess::launch(
-                            self.conf,
-                            self.tenant_shard_id,
-                            pg_version,
-                        )
-                        .context("launch walredo process")?,
-                        _launched_processes_guard,
-                    });
-                    let duration = start.elapsed();
-                    WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM.observe(duration.as_secs_f64());
-                    info!(
-                        duration_ms = duration.as_millis(),
-                        pid = proc.id(),
-                        "launched walredo process"
-                    );
-                    self.redo_process
-                        .set(ProcessOnceCell::Spawned(Arc::clone(&proc)), permit);
-                    proc
-                }
-            };
+            let base_img = &base_img;
+            let closure = |proc: Arc<Process>| async move {
+                let started_at = std::time::Instant::now();

-            let started_at = std::time::Instant::now();
+                // Relational WAL records are applied using wal-redo-postgres
+                let result = proc
+                    .apply_wal_records(rel, blknum, base_img, records, wal_redo_timeout)
+                    .await
+                    .context("apply_wal_records");

-            // Relational WAL records are applied using wal-redo-postgres
-            let result = proc
-                .apply_wal_records(rel, blknum, &base_img, records, wal_redo_timeout)
-                .await
-                .context("apply_wal_records");
+                let duration = started_at.elapsed();

-            let duration = started_at.elapsed();
-
-            let len = records.len();
-            let nbytes = records.iter().fold(0, |acumulator, record| {
-                acumulator
-                    + match &record.1 {
-                        NeonWalRecord::Postgres { rec, .. } => rec.len(),
-                        _ => unreachable!("Only PostgreSQL records are accepted in this batch"),
-                    }
-            });
-
-            WAL_REDO_TIME.observe(duration.as_secs_f64());
-            WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
-            WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);
-
-            debug!(
-                "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
-                len,
-                nbytes,
-                duration.as_micros(),
-                lsn
-            );
-
-            // If something went wrong, don't try to reuse the process. Kill it, and
-            // next request will launch a new one.
-            if let Err(e) = result.as_ref() {
-                error!(
-                    "error applying {} WAL records {}..{} ({} bytes) to key {key}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
-                    records.len(),
-                    records.first().map(|p| p.0).unwrap_or(Lsn(0)),
-                    records.last().map(|p| p.0).unwrap_or(Lsn(0)),
-                    nbytes,
-                    base_img_lsn,
-                    lsn,
-                    n_attempts,
-                    e,
-                );
-                // Avoid concurrent callers hitting the same issue by taking `proc` out of the rotation.
-                // Note that there may be other tasks concurrent with us that also hold `proc`.
-                // We have to deal with that here.
-                // Also read the doc comment on field `self.redo_process`.
-                //
-                // NB: there may still be other concurrent threads using `proc`.
-                // The last one will send SIGKILL when the underlying Arc reaches refcount 0.
-                //
-                // NB: the drop impl blocks the dropping thread with a wait() system call for
-                // the child process. In some ways the blocking is actually good: if we
-                // deferred the waiting into the background / to tokio if we used `tokio::process`,
-                // it could happen that if walredo always fails immediately, we spawn processes faster
-                // than we can SIGKILL & `wait` for them to exit. By doing it the way we do here,
-                // we limit this risk of run-away to at most $num_runtimes * $num_executor_threads.
-                // This probably needs revisiting at some later point.
-                match self.redo_process.get() {
-                    None => (),
-                    Some(guard) => {
-                        match &*guard {
-                            ProcessOnceCell::ManagerShutDown => {}
-                            ProcessOnceCell::Spawned(guard_proc) => {
-                                if Arc::ptr_eq(&proc, guard_proc) {
-                                    // We're the first to observe an error from `proc`, it's our job to take it out of rotation.
-                                    guard.take_and_deinit();
-                                } else {
-                                    // Another task already spawned another redo process (further up in this method)
-                                    // and put it into `redo_process`. Do nothing, our view of the world is behind.
-                                }
-                            }
+                let len = records.len();
+                let nbytes = records.iter().fold(0, |acumulator, record| {
+                    acumulator
+                        + match &record.1 {
+                            NeonWalRecord::Postgres { rec, .. } => rec.len(),
+                            _ => unreachable!("Only PostgreSQL records are accepted in this batch"),
                        }
-                    }
+                });
+
+                WAL_REDO_TIME.observe(duration.as_secs_f64());
+                WAL_REDO_RECORDS_HISTOGRAM.observe(len as f64);
+                WAL_REDO_BYTES_HISTOGRAM.observe(nbytes as f64);
+
+                debug!(
+                    "postgres applied {} WAL records ({} bytes) in {} us to reconstruct page image at LSN {}",
+                    len,
+                    nbytes,
+                    duration.as_micros(),
+                    lsn
+                );
+
+                if let Err(e) = result.as_ref() {
+                    error!(
+                        "error applying {} WAL records {}..{} ({} bytes) to key {key}, from base image with LSN {} to reconstruct page image at LSN {} n_attempts={}: {:?}",
+                        records.len(),
+                        records.first().map(|p| p.0).unwrap_or(Lsn(0)),
+                        records.last().map(|p| p.0).unwrap_or(Lsn(0)),
+                        nbytes,
+                        base_img_lsn,
+                        lsn,
+                        n_attempts,
+                        e,
+                    );
                }
-                // The last task that does this `drop()` of `proc` will do a blocking `wait()` syscall.
-                drop(proc);
-            } else if n_attempts != 0 {
+
+                result.map_err(Error::Other)
+            };
+            let result = self.do_with_walredo_process(pg_version, closure).await;
+
+            if result.is_ok() && n_attempts != 0 {
                info!(n_attempts, "retried walredo succeeded");
            }
            n_attempts += 1;
            if n_attempts > MAX_RETRY_ATTEMPTS || result.is_ok() {
-                return result.map_err(Error::Other);
+                return result;
            }
        }
    }
--- a/pgxn/neon/bitmap.h
+++ b/pgxn/neon/bitmap.h
@@ -0,0 +1,12 @@
+#ifndef NEON_BITMAP_H
+#define NEON_BITMAP_H
+
+/*
+ * Utilities for manipulating bits8* as bitmaps.
+ */
+
+#define BITMAP_ISSET(bm, bit) ((bm)[(bit) >> 3] & (1 << ((bit) & 7)))
+#define BITMAP_SET(bm, bit) (bm)[(bit) >> 3] |= (1 << ((bit) & 7))
+#define BITMAP_CLR(bm, bit) (bm)[(bit) >> 3] &= ~(1 << ((bit) & 7))
+
+#endif //NEON_BITMAP_H
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -27,6 +27,7 @@
 #include "pagestore_client.h"
 #include "common/hashfn.h"
 #include "pgstat.h"
+#include "port/pg_iovec.h"
 #include "postmaster/bgworker.h"
 #include RELFILEINFO_HDR
 #include "storage/buf_internals.h"
@@ -40,6 +41,7 @@
 #include "utils/guc.h"

 #include "hll.h"
+#include "bitmap.h"

 #define CriticalAssert(cond) do if (!(cond)) elog(PANIC, "Assertion %s failed at %s:%d: ", #cond, __FILE__, __LINE__); while (0)

@@ -469,6 +471,99 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	return found;
 }

+/*
+ * Check if page is present in the cache.
+ * Returns true if page is found in local cache.
+ */
+int
+lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+					int nblocks, bits8 *bitmap)
+{
+	BufferTag	tag;
+	FileCacheEntry *entry;
+	uint32		chunk_offs;
+	int			found = 0;
+	uint32		hash;
+	int			i = 0;
+
+	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
+		return 0;
+
+	CopyNRelFileInfoToBufTag(tag, rinfo);
+	tag.forkNum = forkNum;
+
+	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
+
+	tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
+	hash = get_hash_value(lfc_hash, &tag);
+	chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
+
+	LWLockAcquire(lfc_lock, LW_SHARED);
+
+	while (true)
+	{
+		int		this_chunk = Min(nblocks, BLOCKS_PER_CHUNK - chunk_offs);
+		if (LFC_ENABLED())
+		{
+			entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+
+			if (entry != NULL)
+			{
+				for (; chunk_offs < BLOCKS_PER_CHUNK && i < nblocks; chunk_offs++, i++)
+				{
+					if ((entry->bitmap[chunk_offs >> 5] & 
+						(1 << (chunk_offs & 31))) != 0)
+					{
+						BITMAP_SET(bitmap, i);
+						found++;
+					}
+				}
+			}
+			else
+			{
+				i += this_chunk;
+			}
+		}
+		else
+		{
+			return found;
+		}
+
+		/*
+		 * Break out of the iteration before doing expensive stuff for
+		 * a next iteration
+		 */
+		if (i + 1 >= nblocks)
+			break;
+
+		/*
+		 * Prepare for the next iteration. We don't unlock here, as that'd
+		 * probably be more expensive than the gains it'd get us.
+		 */
+		tag.blockNum = (blkno + i) & ~(BLOCKS_PER_CHUNK - 1);
+		hash = get_hash_value(lfc_hash, &tag);
+		chunk_offs = (blkno + i) & (BLOCKS_PER_CHUNK - 1);
+	}
+
+	LWLockRelease(lfc_lock);
+
+#if USE_ASSERT_CHECKING
+	do {
+		int count = 0;
+
+		for (int j = 0; j < nblocks; j++)
+		{
+			if (BITMAP_ISSET(bitmap, j))
+				count++;
+		}
+
+		Assert(count == found);
+	} while (false);
+#endif
+
+	return found;
+}
+
 /*
 * Evict a page (if present) from the local file cache
 */
@@ -548,91 +643,171 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 }

 /*
- * Try to read page from local cache.
- * Returns true if page is found in local cache.
- * In case of error local file cache is disabled (lfc->limit is set to zero).
+ * Try to read pages from local cache.
+ * Returns the number of pages read from the local cache, and sets bits in
+ * 'read' for the pages which were read. This may scribble over buffers not
+ * marked in 'read', so be careful with operation ordering.
+ *
+ * In case of error local file cache is disabled (lfc->limit is set to zero),
+ * and -1 is returned. Note that 'read' and the buffers may be touched and in
+ * an otherwise invalid state.
+ *
+ * If the mask argument is supplied, bits will be set at the offsets of pages
+ * that were present and read from the LFC.
 */
-bool
-lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-		 char *buffer)
+int
+lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+				 void **buffers, BlockNumber nblocks, bits8 *mask)
 {
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
-	int			chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 	bool		result = true;
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
+	int			blocks_read = 0;
+	int			buf_offset = 0;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
-		return false;
+		return 0;

 	if (!lfc_ensure_opened())
-		return false;
+		return 0;

 	CopyNRelFileInfoToBufTag(tag, rinfo);
 	tag.forkNum = forkNum;
-	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-	hash = get_hash_value(lfc_hash, &tag);

-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
+	/* 
+	 * For every chunk that has blocks we're interested in, we
+	 * 1. get the chunk header
+	 * 2. Check if the chunk actually has the blocks we're interested in
+	 * 3. Read the blocks we're looking for (in one preadv), assuming they exist
+	 * 4. Update the statistics for the read call.
+	 *
+	 * If there is an error, we do an early return.
+	 */
+	while (nblocks > 0)
 	{
+		struct iovec iov[PG_IOV_MAX];
+		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
+		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
+		int		iteration_hits = 0;
+		int		iteration_misses = 0;
+		Assert(blocks_in_chunk > 0);
+
+		for (int i = 0; i < blocks_in_chunk; i++)
+		{
+			iov[i].iov_base = buffers[buf_offset + i];
+			iov[i].iov_len = BLCKSZ;
+		}
+
+		tag.blockNum = blkno - chunk_offs;
+		hash = get_hash_value(lfc_hash, &tag);
+
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+		/* We can return the blocks we've read before LFC got disabled;
+		 * assuming we read any. */
+		if (!LFC_ENABLED())
+		{
+			LWLockRelease(lfc_lock);
+			return blocks_read;
+		}
+
+		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+
+		/* Approximate working set for the blocks assumed in this entry */
+		for (int i = 0; i < blocks_in_chunk; i++)
+		{
+			tag.blockNum = blkno + i;
+			addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+		}
+
+		if (entry == NULL)
+		{
+			/* Pages are not cached */
+			lfc_ctl->misses += blocks_in_chunk;
+			pgBufferUsage.file_cache.misses += blocks_in_chunk;
+			LWLockRelease(lfc_lock);
+
+			buf_offset += blocks_in_chunk;
+			nblocks -= blocks_in_chunk;
+			blkno += blocks_in_chunk;
+
+			continue;
+		}
+
+		/* Unlink entry from LRU list to pin it for the duration of IO operation */
+		if (entry->access_count++ == 0)
+			dlist_delete(&entry->list_node);
+
+		generation = lfc_ctl->generation;
+		entry_offset = entry->offset;
+
 		LWLockRelease(lfc_lock);
-		return false;
-	}

-	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL);
+		for (int i = 0; i < blocks_in_chunk; i++)
+		{
+			/*
+			 * If the page is valid, we consider it "read".
+			 * All other pages will be fetched separately by the next cache
+			 */
+			if (entry->bitmap[(chunk_offs + i) / 32] & (1 << ((chunk_offs + i) % 32)))
+			{
+				BITMAP_SET(mask, buf_offset + i);
+				iteration_hits++;
+			}
+			else
+				iteration_misses++;
+		}

-	/* Approximate working set */
-	tag.blockNum = blkno;
-	addSHLL(&lfc_ctl->wss_estimation, hash_bytes((uint8_t const*)&tag, sizeof(tag)));
+		Assert(iteration_hits + iteration_misses > 0);
+
+		if (iteration_hits != 0)
+		{
+			rc = preadv(lfc_desc, iov, blocks_in_chunk,
+						((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+
+			if (rc != (BLCKSZ * blocks_in_chunk))
+			{
+				lfc_disable("read");
+				return -1;
+			}
+		}
+
+		/* Place entry to the head of LRU list */
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+		if (lfc_ctl->generation == generation)
+		{
+			CriticalAssert(LFC_ENABLED());
+			lfc_ctl->hits += iteration_hits;
+			lfc_ctl->misses += iteration_misses;
+			pgBufferUsage.file_cache.hits += iteration_hits;
+			pgBufferUsage.file_cache.misses += iteration_misses;
+			CriticalAssert(entry->access_count > 0);
+			if (--entry->access_count == 0)
+				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+		}
+		else
+		{
+			/* generation mismatch, assume error condition */
+			LWLockRelease(lfc_lock);
+			return -1;
+		}

-	if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0)
-	{
-		/* Page is not cached */
-		lfc_ctl->misses += 1;
-		pgBufferUsage.file_cache.misses += 1;
 		LWLockRelease(lfc_lock);
-		return false;
-	}
-	/* Unlink entry from LRU list to pin it for the duration of IO operation */
-	if (entry->access_count++ == 0)
-		dlist_delete(&entry->list_node);
-	generation = lfc_ctl->generation;
-	entry_offset = entry->offset;

-	LWLockRelease(lfc_lock);
-
-	rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
-	if (rc != BLCKSZ)
-	{
-		lfc_disable("read");
-		return false;
+		buf_offset += blocks_in_chunk;
+		nblocks -= blocks_in_chunk;
+		blkno += blocks_in_chunk;
+		blocks_read += iteration_hits;
 	}

-	/* Place entry to the head of LRU list */
-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (lfc_ctl->generation == generation)
-	{
-		CriticalAssert(LFC_ENABLED());
-		lfc_ctl->hits += 1;
-		pgBufferUsage.file_cache.hits += 1;
-		CriticalAssert(entry->access_count > 0);
-		if (--entry->access_count == 0)
-			dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
-	}
-	else
-		result = false;
-
-	LWLockRelease(lfc_lock);
-
-	return result;
+	return blocks_read;
 }

 /*
@@ -640,20 +815,17 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 * If cache is full then evict some other page.
 */
 void
-#if PG_MAJORVERSION_NUM < 16
-lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer)
-#else
-lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void *buffer)
-#endif
+lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		   const void *const *buffers, BlockNumber nblocks)
 {
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
 	bool		found;
-	int			chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
+	int			buf_offset = 0;

 	if (lfc_maybe_disabled())	/* fast exit if file cache is disabled */
 		return;
@@ -661,110 +833,142 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void
 	if (!lfc_ensure_opened())
 		return;

-	tag.forkNum = forkNum;
-	tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
 	CopyNRelFileInfoToBufTag(tag, rinfo);
+	tag.forkNum = forkNum;

 	CriticalAssert(BufTagGetRelNumber(&tag) != InvalidRelFileNumber);
-	hash = get_hash_value(lfc_hash, &tag);

-	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-	if (!LFC_ENABLED())
+	/* 
+	 * For every chunk that has blocks we're interested in, we
+	 * 1. get the chunk header
+	 * 2. Check if the chunk actually has the blocks we're interested in
+	 * 3. Read the blocks we're looking for (in one preadv), assuming they exist
+	 * 4. Update the statistics for the read call.
+	 *
+	 * If there is an error, we do an early return.
+	 */
+	while (nblocks > 0)
 	{
-		LWLockRelease(lfc_lock);
-		return;
-	}
+		struct iovec iov[PG_IOV_MAX];
+		int		chunk_offs = blkno & (BLOCKS_PER_CHUNK - 1);
+		int		blocks_in_chunk = Min(nblocks, BLOCKS_PER_CHUNK - (blkno % BLOCKS_PER_CHUNK));
+		Assert(blocks_in_chunk > 0);

-	entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);
-
-	if (found)
-	{
-		/*
-		 * Unlink entry from LRU list to pin it for the duration of IO
-		 * operation
-		 */
-		if (entry->access_count++ == 0)
-			dlist_delete(&entry->list_node);
-	}
-	else
-	{
-		/*
-		 * We have two choices if all cache pages are pinned (i.e. used in IO
-		 * operations):
-		 *
-		 * 1) Wait until some of this operation is completed and pages is
-		 * unpinned.
-		 *
-		 * 2) Allocate one more chunk, so that specified cache size is more
-		 * recommendation than hard limit.
-		 *
-		 * As far as probability of such event (that all pages are pinned) is
-		 * considered to be very very small: there are should be very large
-		 * number of concurrent IO operations and them are limited by
-		 * max_connections, we prefer not to complicate code and use second
-		 * approach.
-		 */
-		if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
+		for (int i = 0; i < blocks_in_chunk; i++)
 		{
-			/* Cache overflow: evict least recently used chunk */
-			FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
-
-			CriticalAssert(victim->access_count == 0);
-			entry->offset = victim->offset; /* grab victim's chunk */
-			hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
-			neon_log(DEBUG2, "Swap file cache page");
+			iov[i].iov_base = unconstify(void *, buffers[buf_offset + i]);
+			iov[i].iov_len = BLCKSZ;
 		}
-		else if (!dlist_is_empty(&lfc_ctl->holes))
+
+		tag.blockNum = blkno & ~(BLOCKS_PER_CHUNK - 1);
+		hash = get_hash_value(lfc_hash, &tag);
+
+		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+		if (!LFC_ENABLED())
 		{
-			/* We can reuse a hole that was left behind when the LFC was shrunk previously */
-			FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
-			uint32		offset = hole->offset;
-			bool		found;
+			LWLockRelease(lfc_lock);
+			return;
+		}

-			hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
-			CriticalAssert(found);
+		entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_ENTER, &found);

-			lfc_ctl->used += 1;
-			entry->offset = offset;	/* reuse the hole */
+		if (found)
+		{
+			/*
+			 * Unlink entry from LRU list to pin it for the duration of IO
+			 * operation
+			 */
+			if (entry->access_count++ == 0)
+				dlist_delete(&entry->list_node);
 		}
 		else
 		{
-			lfc_ctl->used += 1;
-			entry->offset = lfc_ctl->size++;	/* allocate new chunk at end
-												 * of file */
-		}
-		entry->access_count = 1;
-		entry->hash = hash;
-		memset(entry->bitmap, 0, sizeof entry->bitmap);
-	}
-
-	generation = lfc_ctl->generation;
-	entry_offset = entry->offset;
-	lfc_ctl->writes += 1;
-	LWLockRelease(lfc_lock);
-
-	rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
-	if (rc != BLCKSZ)
-	{
-		lfc_disable("write");
-	}
-	else
-	{
-		LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-
-		if (lfc_ctl->generation == generation)
-		{
-			CriticalAssert(LFC_ENABLED());
-			/* Place entry to the head of LRU list */
-			CriticalAssert(entry->access_count > 0);
-			if (--entry->access_count == 0)
-				dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
-
-			entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31));
+			/*
+			 * We have two choices if all cache pages are pinned (i.e. used in IO
+			 * operations):
+			 *
+			 * 1) Wait until some of this operation is completed and pages is
+			 * unpinned.
+			 *
+			 * 2) Allocate one more chunk, so that specified cache size is more
+			 * recommendation than hard limit.
+			 *
+			 * As far as probability of such event (that all pages are pinned) is
+			 * considered to be very very small: there are should be very large
+			 * number of concurrent IO operations and them are limited by
+			 * max_connections, we prefer not to complicate code and use second
+			 * approach.
+			 */
+			if (lfc_ctl->used >= lfc_ctl->limit && !dlist_is_empty(&lfc_ctl->lru))
+			{
+				/* Cache overflow: evict least recently used chunk */
+				FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
+	
+				CriticalAssert(victim->access_count == 0);
+				entry->offset = victim->offset; /* grab victim's chunk */
+				hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
+				neon_log(DEBUG2, "Swap file cache page");
+			}
+			else if (!dlist_is_empty(&lfc_ctl->holes))
+			{
+				/* We can reuse a hole that was left behind when the LFC was shrunk previously */
+				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
+				uint32		offset = hole->offset;
+				bool		found;
+	
+				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
+				CriticalAssert(found);
+	
+				lfc_ctl->used += 1;
+				entry->offset = offset;	/* reuse the hole */
+			}
+			else
+			{
+				lfc_ctl->used += 1;
+				entry->offset = lfc_ctl->size++;	/* allocate new chunk at end
+													 * of file */
+			}
+			entry->access_count = 1;
+			entry->hash = hash;
+			memset(entry->bitmap, 0, sizeof entry->bitmap);
 		}

+		generation = lfc_ctl->generation;
+		entry_offset = entry->offset;
+		lfc_ctl->writes += blocks_in_chunk;
 		LWLockRelease(lfc_lock);
+
+		rc = pwritev(lfc_desc, iov, blocks_in_chunk,
+					 ((off_t) entry_offset * BLOCKS_PER_CHUNK + chunk_offs) * BLCKSZ);
+		if (rc != BLCKSZ * blocks_in_chunk)
+		{
+			lfc_disable("write");
+		}
+		else
+		{
+			LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
+
+			if (lfc_ctl->generation == generation)
+			{
+				CriticalAssert(LFC_ENABLED());
+				/* Place entry to the head of LRU list */
+				CriticalAssert(entry->access_count > 0);
+				if (--entry->access_count == 0)
+					dlist_push_tail(&lfc_ctl->lru, &entry->list_node);
+
+				for (int i = 0; i < blocks_in_chunk; i++)
+				{
+					entry->bitmap[(chunk_offs + i) >> 5] |=
+						(1 << ((chunk_offs + i) & 31));
+				}
+			}
+
+			LWLockRelease(lfc_lock);
+		}
+		blkno += blocks_in_chunk;
+		buf_offset += blocks_in_chunk;
+		nblocks -= blocks_in_chunk;
 	}
 }

--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -537,7 +537,11 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		/* No more polling needed; connection succeeded */
 		shard->last_connect_time = GetCurrentTimestamp();

+#if PG_MAJORVERSION_NUM >= 17
+		shard->wes_read = CreateWaitEventSet(NULL, 3);
+#else
 		shard->wes_read = CreateWaitEventSet(TopMemoryContext, 3);
+#endif
 		AddWaitEventToSet(shard->wes_read, WL_LATCH_SET, PGINVALID_SOCKET,
 						  MyLatch, NULL);
 		AddWaitEventToSet(shard->wes_read, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -6,7 +6,11 @@
 #ifndef NEON_PGVERSIONCOMPAT_H
 #define NEON_PGVERSIONCOMPAT_H

+#if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
+#else
+#define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != INVALID_PROC_NUMBER)
+#endif

 #define RelFileInfoEquals(a, b) ( \
 	NInfoGetSpcOid(a) == NInfoGetSpcOid(b) && \
@@ -50,7 +54,7 @@
 #define CopyNRelFileInfoToBufTag(tag, rinfo) \
 	do { \
 		(tag).rnode = (rinfo); \
-	} while (false);
+	} while (false)

 #define BufTagGetNRelFileInfo(tag) tag.rnode

@@ -98,7 +102,7 @@
 		(tag).spcOid = (rinfo).spcOid; \
 		(tag).dbOid = (rinfo).dbOid; \
 		(tag).relNumber = (rinfo).relNumber; \
-	} while (false);
+	} while (false)

 #define BufTagGetNRelFileInfo(tag) \
 	((RelFileLocator) { \
@@ -113,4 +117,10 @@
 #define DropRelationAllLocalBuffers DropRelationAllLocalBuffers
 #endif

+#if PG_MAJORVERSION_NUM < 17
+#define ProcNumber BackendId
+#define INVALID_PROC_NUMBER InvalidBackendId
+#define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
+#endif
+
 #endif							/* NEON_PGVERSIONCOMPAT_H */
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -6,8 +6,6 @@
 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * contrib/neon/pagestore_client.h
- *
 *-------------------------------------------------------------------------
 */
 #ifndef pageserver_h
@@ -187,7 +185,7 @@ extern char *nm_to_string(NeonMessage *msg);
 * API
 */

-typedef unsigned shardno_t;
+typedef uint16 shardno_t;

 typedef struct
 {
@@ -211,7 +209,7 @@ extern int  neon_protocol_version;

 extern shardno_t get_shard_number(BufferTag* tag);

-extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
+extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
 extern void smgr_init_neon(void);
 extern void readahead_buffer_resize(int newsize, void *extra);

@@ -233,8 +231,13 @@ extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
 							BlockNumber blocknum, int nbuffers, bool skipFsync);
 #endif

+#if PG_MAJORVERSION_NUM >=17
+extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
+						  BlockNumber blocknum, int nblocks);
+#else
 extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
 						  BlockNumber blocknum);
+#endif

 /*
 * LSN values associated with each request to the pageserver
@@ -269,19 +272,11 @@ typedef struct
 } neon_request_lsns;

 #if PG_MAJORVERSION_NUM < 16
-extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-					  char *buffer);
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
 										 neon_request_lsns request_lsns, char *buffer);
-extern void neon_write(SMgrRelation reln, ForkNumber forknum,
-					   BlockNumber blocknum, char *buffer, bool skipFsync);
 #else
-extern void neon_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-					  void *buffer);
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
 										 neon_request_lsns request_lsns, void *buffer);
-extern void neon_write(SMgrRelation reln, ForkNumber forknum,
-					   BlockNumber blocknum, const void *buffer, bool skipFsync);
 #endif
 extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
 						   BlockNumber blocknum, BlockNumber nblocks);
@@ -299,17 +294,34 @@ extern void update_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum, BlockN
 extern void forget_cached_relsize(NRelFileInfo rinfo, ForkNumber forknum);

 /* functions for local file cache */
-#if PG_MAJORVERSION_NUM < 16
-extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-					  char *buffer);
-#else
-extern void lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
-					  const void *buffer);
-#endif
-extern bool lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, char *buffer);
-extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
+extern void lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum,
+					   BlockNumber blkno, const void *const *buffers,
+					   BlockNumber nblocks);
+/* returns number of blocks read, with one bit set in *read for each  */
+extern int lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum,
+							BlockNumber blkno, void **buffers,
+							BlockNumber nblocks, bits8 *mask);
+
+extern bool lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum,
+							   BlockNumber blkno);
+extern int lfc_cache_containsv(NRelFileInfo rinfo, ForkNumber forkNum,
+							   BlockNumber blkno, int nblocks, bits8 *bitmap);
 extern void lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno);
 extern void lfc_init(void);

+static inline bool
+lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		 void *buffer)
+{
+	bits8		rv = 0;
+	return lfc_readv_select(rinfo, forkNum, blkno, &buffer, 1, &rv) == 1;
+}
+
+static inline void
+lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
+		  const void *buffer)
+{
+	return lfc_writev(rinfo, forkNum, blkno, &buffer, 1);
+}

 #endif
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -81,6 +81,7 @@ static void nwp_register_gucs(void);
 static void assign_neon_safekeepers(const char *newval, void *extra);
 static void nwp_prepare_shmem(void);
 static uint64 backpressure_lag_impl(void);
+static uint64 startup_backpressure_wrap(void);
 static bool backpressure_throttling_impl(void);
 static void walprop_register_bgworker(void);

@@ -90,7 +91,7 @@ static void walprop_pg_init_bgworker(void);
 static TimestampTz walprop_pg_get_current_timestamp(WalProposer *wp);
 static void walprop_pg_load_libpqwalreceiver(void);

-static process_interrupts_callback_t PrevProcessInterruptsCallback;
+static process_interrupts_callback_t PrevProcessInterruptsCallback = NULL;
 static shmem_startup_hook_type prev_shmem_startup_hook_type;
 #if PG_VERSION_NUM >= 150000
 static shmem_request_hook_type prev_shmem_request_hook = NULL;
@@ -178,7 +179,7 @@ pg_init_walproposer(void)

 	nwp_prepare_shmem();

-	delay_backend_us = &backpressure_lag_impl;
+	delay_backend_us = &startup_backpressure_wrap;
 	PrevProcessInterruptsCallback = ProcessInterruptsCallback;
 	ProcessInterruptsCallback = backpressure_throttling_impl;

@@ -352,6 +353,22 @@ backpressure_lag_impl(void)
 	return 0;
 }

+/*
+ * We don't apply backpressure when we're the postmaster, or the startup
+ * process, because in postmaster we can't apply backpressure, and in
+ * the startup process we can't afford to slow down.
+ */
+static uint64
+startup_backpressure_wrap(void)
+{
+	if (AmStartupProcess() || !IsUnderPostmaster)
+		return 0;
+
+	delay_backend_us = &backpressure_lag_impl;
+
+	return backpressure_lag_impl();
+}
+
 /*
 * WalproposerShmemSize --- report amount of shared memory space needed
 */
@@ -401,12 +418,13 @@ WalproposerShmemInit_SyncSafekeeper(void)
 static bool
 backpressure_throttling_impl(void)
 {
-	int64		lag;
+	uint64		lag;
 	TimestampTz start,
 				stop;
-	bool		retry = PrevProcessInterruptsCallback
-		? PrevProcessInterruptsCallback()
-		: false;
+	bool		retry = false;
+
+	if (PointerIsValid(PrevProcessInterruptsCallback))
+		retry = PrevProcessInterruptsCallback();

 	/*
 	 * Don't throttle read only transactions or wal sender. Do throttle CREATE
@@ -602,7 +620,12 @@ walprop_pg_init_walsender(void)
 	/* Create replication slot for WAL proposer if not exists */
 	if (SearchNamedReplicationSlot(WAL_PROPOSER_SLOT_NAME, false) == NULL)
 	{
+#if PG_MAJORVERSION_NUM >= 17
+		ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT,
+							  false, false, false);
+#else
 		ReplicationSlotCreate(WAL_PROPOSER_SLOT_NAME, false, RS_PERSISTENT, false);
+#endif
 		ReplicationSlotReserveWal();
 		/* Write this slot to disk */
 		ReplicationSlotMarkDirty();
@@ -1509,7 +1532,11 @@ walprop_pg_init_event_set(WalProposer *wp)
 		wpg_log(FATAL, "double-initialization of event set");

 	/* for each sk, we have socket plus potentially socket for neon walreader */
+#if PG_MAJORVERSION_NUM >= 17
+	waitEvents = CreateWaitEventSet(NULL, 2 + 2 * wp->n_safekeepers);
+#else
 	waitEvents = CreateWaitEventSet(TopMemoryContext, 2 + 2 * wp->n_safekeepers);
+#endif
 	AddWaitEventToSet(waitEvents, WL_LATCH_SET, PGINVALID_SOCKET,
 					  MyLatch, NULL);
 	AddWaitEventToSet(waitEvents, WL_EXIT_ON_PM_DEATH, PGINVALID_SOCKET,
--- a/pgxn/neon_rmgr/neon_rmgr_decode.c
+++ b/pgxn/neon_rmgr/neon_rmgr_decode.c
@@ -1,6 +1,7 @@
 #include "postgres.h"

 #if PG_MAJORVERSION_NUM >= 16
+
 #include "access/heapam_xlog.h"
 #include "access/neon_xlog.h"
 #include "replication/decode.h"
@@ -9,6 +10,10 @@

 #include "neon_rmgr.h"

+#endif /* PG >= 16 */
+
+#if PG_MAJORVERSION_NUM == 16
+
 /* individual record(group)'s handlers */
 static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
 static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
@@ -399,6 +404,398 @@ DecodeXLogTuple(char *data, Size len, ReorderBufferTupleBuf *tuple)
 	header->t_infomask2 = xlhdr.t_infomask2;
 	header->t_hoff = xlhdr.t_hoff;
 }
+#endif
+
+#if PG_MAJORVERSION_NUM == 17
+
+/* individual record(group)'s handlers */
+static void DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+static void DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf);
+
+/* common function to decode tuples */
+static void DecodeXLogTuple(char *data, Size len, HeapTuple tuple);


-#endif
+void
+neon_rm_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	uint8		info = XLogRecGetInfo(buf->record) & XLOG_NEON_OPMASK;
+	TransactionId xid = XLogRecGetXid(buf->record);
+	SnapBuild  *builder = ctx->snapshot_builder;
+
+	ReorderBufferProcessXid(ctx->reorder, xid, buf->origptr);
+
+	/*
+	 * If we don't have snapshot or we are just fast-forwarding, there is no
+	 * point in decoding data changes.
+	 */
+	if (SnapBuildCurrentState(builder) < SNAPBUILD_FULL_SNAPSHOT ||
+		ctx->fast_forward)
+		return;
+
+	switch (info)
+	{
+		case XLOG_NEON_HEAP_INSERT:
+			if (SnapBuildProcessChange(builder, xid, buf->origptr))
+				DecodeNeonInsert(ctx, buf);
+			break;
+		case XLOG_NEON_HEAP_DELETE:
+			if (SnapBuildProcessChange(builder, xid, buf->origptr))
+				DecodeNeonDelete(ctx, buf);
+			break;
+		case XLOG_NEON_HEAP_UPDATE:
+		case XLOG_NEON_HEAP_HOT_UPDATE:
+			if (SnapBuildProcessChange(builder, xid, buf->origptr))
+				DecodeNeonUpdate(ctx, buf);
+			break;
+		case XLOG_NEON_HEAP_LOCK:
+			break;
+		case XLOG_NEON_HEAP_MULTI_INSERT:
+			if (SnapBuildProcessChange(builder, xid, buf->origptr))
+				DecodeNeonMultiInsert(ctx, buf);
+			break;
+		default:
+			elog(ERROR, "unexpected RM_HEAP_ID record type: %u", info);
+			break;
+	}
+}
+
+static inline bool
+FilterByOrigin(LogicalDecodingContext *ctx, RepOriginId origin_id)
+{
+	if (ctx->callbacks.filter_by_origin_cb == NULL)
+		return false;
+
+	return filter_by_origin_cb_wrapper(ctx, origin_id);
+}
+
+/*
+ * Parse XLOG_HEAP_INSERT (not MULTI_INSERT!) records into tuplebufs.
+ *
+ * Deletes can contain the new tuple.
+ */
+static void
+DecodeNeonInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	Size		datalen;
+	char	   *tupledata;
+	Size		tuplelen;
+	XLogReaderState *r = buf->record;
+	xl_neon_heap_insert *xlrec;
+	ReorderBufferChange *change;
+	RelFileLocator target_locator;
+
+	xlrec = (xl_neon_heap_insert *) XLogRecGetData(r);
+
+	/*
+	 * Ignore insert records without new tuples (this does happen when
+	 * raw_heap_insert marks the TOAST record as HEAP_INSERT_NO_LOGICAL).
+	 */
+	if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
+		return;
+
+	/* only interested in our database */
+	XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
+	if (target_locator.dbOid != ctx->slot->data.database)
+		return;
+
+	/* output plugin doesn't look for this origin, no need to queue */
+	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+		return;
+
+	change = ReorderBufferGetChange(ctx->reorder);
+	if (!(xlrec->flags & XLH_INSERT_IS_SPECULATIVE))
+		change->action = REORDER_BUFFER_CHANGE_INSERT;
+	else
+		change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_INSERT;
+	change->origin_id = XLogRecGetOrigin(r);
+
+	memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
+
+	tupledata = XLogRecGetBlockData(r, 0, &datalen);
+	tuplelen = datalen - SizeOfHeapHeader;
+
+	change->data.tp.newtuple =
+		ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
+
+	DecodeXLogTuple(tupledata, datalen, change->data.tp.newtuple);
+
+	change->data.tp.clear_toast_afterwards = true;
+
+	ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
+							 change,
+							 xlrec->flags & XLH_INSERT_ON_TOAST_RELATION);
+}
+
+/*
+ * Parse XLOG_HEAP_DELETE from wal into proper tuplebufs.
+ *
+ * Deletes can possibly contain the old primary key.
+ */
+static void
+DecodeNeonDelete(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	XLogReaderState *r = buf->record;
+	xl_neon_heap_delete *xlrec;
+	ReorderBufferChange *change;
+	RelFileLocator target_locator;
+
+	xlrec = (xl_neon_heap_delete *) XLogRecGetData(r);
+
+	/* only interested in our database */
+	XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
+	if (target_locator.dbOid != ctx->slot->data.database)
+		return;
+
+	/* output plugin doesn't look for this origin, no need to queue */
+	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+		return;
+
+	change = ReorderBufferGetChange(ctx->reorder);
+
+	if (xlrec->flags & XLH_DELETE_IS_SUPER)
+		change->action = REORDER_BUFFER_CHANGE_INTERNAL_SPEC_ABORT;
+	else
+		change->action = REORDER_BUFFER_CHANGE_DELETE;
+
+	change->origin_id = XLogRecGetOrigin(r);
+
+	memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
+
+	/* old primary key stored */
+	if (xlrec->flags & XLH_DELETE_CONTAINS_OLD)
+	{
+		Size		datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapHeader;
+		Size		tuplelen = datalen - SizeOfNeonHeapHeader;
+
+		Assert(XLogRecGetDataLen(r) > (SizeOfNeonHeapDelete + SizeOfNeonHeapHeader));
+
+		change->data.tp.oldtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
+
+		DecodeXLogTuple((char *) xlrec + SizeOfNeonHeapDelete,
+						datalen, change->data.tp.oldtuple);
+	}
+
+	change->data.tp.clear_toast_afterwards = true;
+
+	ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
+							 change, false);
+}
+
+/*
+ * Parse XLOG_HEAP_UPDATE and XLOG_HEAP_HOT_UPDATE, which have the same layout
+ * in the record, from wal into proper tuplebufs.
+ *
+ * Updates can possibly contain a new tuple and the old primary key.
+ */
+static void
+DecodeNeonUpdate(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	XLogReaderState *r = buf->record;
+	xl_neon_heap_update *xlrec;
+	ReorderBufferChange *change;
+	char	   *data;
+	RelFileLocator target_locator;
+
+	xlrec = (xl_neon_heap_update *) XLogRecGetData(r);
+
+	/* only interested in our database */
+	XLogRecGetBlockTag(r, 0, &target_locator, NULL, NULL);
+	if (target_locator.dbOid != ctx->slot->data.database)
+		return;
+
+	/* output plugin doesn't look for this origin, no need to queue */
+	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+		return;
+
+	change = ReorderBufferGetChange(ctx->reorder);
+	change->action = REORDER_BUFFER_CHANGE_UPDATE;
+	change->origin_id = XLogRecGetOrigin(r);
+	memcpy(&change->data.tp.rlocator, &target_locator, sizeof(RelFileLocator));
+
+	if (xlrec->flags & XLH_UPDATE_CONTAINS_NEW_TUPLE)
+	{
+		Size		datalen;
+		Size		tuplelen;
+
+		data = XLogRecGetBlockData(r, 0, &datalen);
+
+		tuplelen = datalen - SizeOfNeonHeapHeader;
+
+		change->data.tp.newtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
+
+		DecodeXLogTuple(data, datalen, change->data.tp.newtuple);
+	}
+
+	if (xlrec->flags & XLH_UPDATE_CONTAINS_OLD)
+	{
+		Size		datalen;
+		Size		tuplelen;
+
+		/* caution, remaining data in record is not aligned */
+		data = XLogRecGetData(r) + SizeOfNeonHeapUpdate;
+		datalen = XLogRecGetDataLen(r) - SizeOfNeonHeapUpdate;
+		tuplelen = datalen - SizeOfNeonHeapHeader;
+
+		change->data.tp.oldtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, tuplelen);
+
+		DecodeXLogTuple(data, datalen, change->data.tp.oldtuple);
+	}
+
+	change->data.tp.clear_toast_afterwards = true;
+
+	ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r), buf->origptr,
+							 change, false);
+}
+
+/*
+ * Decode XLOG_HEAP2_MULTI_INSERT_insert record into multiple tuplebufs.
+ *
+ * Currently MULTI_INSERT will always contain the full tuples.
+ */
+static void
+DecodeNeonMultiInsert(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
+{
+	XLogReaderState *r = buf->record;
+	xl_neon_heap_multi_insert *xlrec;
+	int			i;
+	char	   *data;
+	char	   *tupledata;
+	Size		tuplelen;
+	RelFileLocator rlocator;
+
+	xlrec = (xl_neon_heap_multi_insert *) XLogRecGetData(r);
+
+	/*
+	 * Ignore insert records without new tuples.  This happens when a
+	 * multi_insert is done on a catalog or on a non-persistent relation.
+	 */
+	if (!(xlrec->flags & XLH_INSERT_CONTAINS_NEW_TUPLE))
+		return;
+
+	/* only interested in our database */
+	XLogRecGetBlockTag(r, 0, &rlocator, NULL, NULL);
+	if (rlocator.dbOid != ctx->slot->data.database)
+		return;
+
+	/* output plugin doesn't look for this origin, no need to queue */
+	if (FilterByOrigin(ctx, XLogRecGetOrigin(r)))
+		return;
+
+	/*
+	 * We know that this multi_insert isn't for a catalog, so the block should
+	 * always have data even if a full-page write of it is taken.
+	 */
+	tupledata = XLogRecGetBlockData(r, 0, &tuplelen);
+	Assert(tupledata != NULL);
+
+	data = tupledata;
+	for (i = 0; i < xlrec->ntuples; i++)
+	{
+		ReorderBufferChange *change;
+		xl_neon_multi_insert_tuple *xlhdr;
+		int			datalen;
+		HeapTuple	tuple;
+		HeapTupleHeader header;
+
+		change = ReorderBufferGetChange(ctx->reorder);
+		change->action = REORDER_BUFFER_CHANGE_INSERT;
+		change->origin_id = XLogRecGetOrigin(r);
+
+		memcpy(&change->data.tp.rlocator, &rlocator, sizeof(RelFileLocator));
+
+		xlhdr = (xl_neon_multi_insert_tuple *) SHORTALIGN(data);
+		data = ((char *) xlhdr) + SizeOfNeonMultiInsertTuple;
+		datalen = xlhdr->datalen;
+
+		change->data.tp.newtuple =
+			ReorderBufferGetTupleBuf(ctx->reorder, datalen);
+
+		tuple = change->data.tp.newtuple;
+		header = tuple->t_data;
+
+		/* not a disk based tuple */
+		ItemPointerSetInvalid(&tuple->t_self);
+
+		/*
+		 * We can only figure this out after reassembling the transactions.
+		 */
+		tuple->t_tableOid = InvalidOid;
+
+		tuple->t_len = datalen + SizeofHeapTupleHeader;
+
+		memset(header, 0, SizeofHeapTupleHeader);
+
+		memcpy((char *) tuple->t_data + SizeofHeapTupleHeader,
+			   (char *) data,
+			   datalen);
+		header->t_infomask = xlhdr->t_infomask;
+		header->t_infomask2 = xlhdr->t_infomask2;
+		header->t_hoff = xlhdr->t_hoff;
+
+		/*
+		 * Reset toast reassembly state only after the last row in the last
+		 * xl_multi_insert_tuple record emitted by one heap_multi_insert()
+		 * call.
+		 */
+		if (xlrec->flags & XLH_INSERT_LAST_IN_MULTI &&
+			(i + 1) == xlrec->ntuples)
+			change->data.tp.clear_toast_afterwards = true;
+		else
+			change->data.tp.clear_toast_afterwards = false;
+
+		ReorderBufferQueueChange(ctx->reorder, XLogRecGetXid(r),
+								 buf->origptr, change, false);
+
+		/* move to the next xl_neon_multi_insert_tuple entry */
+		data += datalen;
+	}
+	Assert(data == tupledata + tuplelen);
+}
+
+/*
+ * Read a HeapTuple as WAL logged by heap_insert, heap_update and heap_delete
+ * (but not by heap_multi_insert) into a tuplebuf.
+ *
+ * The size 'len' and the pointer 'data' in the record need to be
+ * computed outside as they are record specific.
+ */
+static void
+DecodeXLogTuple(char *data, Size len, HeapTuple tuple)
+{
+	xl_neon_heap_header xlhdr;
+	int			datalen = len - SizeOfNeonHeapHeader;
+	HeapTupleHeader header;
+
+	Assert(datalen >= 0);
+
+	tuple->t_len = datalen + SizeofHeapTupleHeader;
+	header = tuple->t_data;
+
+	/* not a disk based tuple */
+	ItemPointerSetInvalid(&tuple->t_self);
+
+	/* we can only figure this out after reassembling the transactions */
+	tuple->t_tableOid = InvalidOid;
+
+	/* data is not stored aligned, copy to aligned storage */
+	memcpy((char *) &xlhdr,
+		   data,
+		   SizeOfNeonHeapHeader);
+
+	memset(header, 0, SizeofHeapTupleHeader);
+
+	memcpy(((char *) tuple->t_data) + SizeofHeapTupleHeader,
+		   data + SizeOfNeonHeapHeader,
+		   datalen);
+
+	header->t_infomask = xlhdr.t_infomask;
+	header->t_infomask2 = xlhdr.t_infomask2;
+	header->t_hoff = xlhdr.t_hoff;
+}
+#endif
--- a/pgxn/neon_walredo/inmem_smgr.c
+++ b/pgxn/neon_walredo/inmem_smgr.c
@@ -68,8 +68,13 @@ static void inmem_close(SMgrRelation reln, ForkNumber forknum);
 static void inmem_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 static bool inmem_exists(SMgrRelation reln, ForkNumber forknum);
 static void inmem_unlink(NRelFileInfoBackend rinfo, ForkNumber forknum, bool isRedo);
+#if PG_MAJORVERSION_NUM >= 17
+static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
+						   BlockNumber blocknum, int nblocks);
+#else
 static bool inmem_prefetch(SMgrRelation reln, ForkNumber forknum,
 						   BlockNumber blocknum);
+#endif
 #if PG_MAJORVERSION_NUM < 16
 static void inmem_extend(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum, char *buffer, bool skipFsync);
@@ -93,7 +98,9 @@ static BlockNumber inmem_nblocks(SMgrRelation reln, ForkNumber forknum);
 static void inmem_truncate(SMgrRelation reln, ForkNumber forknum,
 						   BlockNumber nblocks);
 static void inmem_immedsync(SMgrRelation reln, ForkNumber forknum);
-
+#if PG_MAJORVERSION_NUM >= 17
+static void inmem_registersync(SMgrRelation reln, ForkNumber forknum);
+#endif

 /*
 *	inmem_init() -- Initialize private state
@@ -190,6 +197,14 @@ inmem_close(SMgrRelation reln, ForkNumber forknum)
 {
 }

+#if PG_MAJORVERSION_NUM >= 17
+static bool
+inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+			   int nblocks)
+{
+	return true;
+}
+#else
 /*
 *	inmem_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
@@ -198,6 +213,7 @@ inmem_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
 	return true;
 }
+#endif

 /*
 * inmem_writeback() -- Tell the kernel to write pages back to storage.
@@ -211,11 +227,13 @@ inmem_writeback(SMgrRelation reln, ForkNumber forknum,
 /*
 *	inmem_read() -- Read the specified block from a relation.
 */
+#if PG_MAJORVERSION_NUM < 16
 static void
 inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
-#if PG_MAJORVERSION_NUM < 16
 		   char *buffer)
 #else
+static void
+inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 		   void *buffer)
 #endif
 {
@@ -228,6 +246,18 @@ inmem_read(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 		memcpy(buffer, page_body[pg], BLCKSZ);
 }

+#if PG_MAJORVERSION_NUM >= 17
+static void
+inmem_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
+			void **buffers, BlockNumber nblocks)
+{
+	for (int i = 0; i < nblocks; i++)
+	{
+		inmem_read(reln, forknum, blkno, buffers[i]);
+	}
+}
+#endif
+
 /*
 *	inmem_write() -- Write the supplied block at the appropriate location.
 *
@@ -280,6 +310,18 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	memcpy(page_body[pg], buffer, BLCKSZ);
 }

+#if PG_MAJORVERSION_NUM >= 17
+static void
+inmem_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
+			 const void **buffers, BlockNumber nblocks, bool skipFsync)
+{
+	for (int i = 0; i < nblocks; i++)
+	{
+		inmem_write(reln, forknum, blkno, buffers[i], skipFsync);
+	}
+}
+#endif
+
 /*
 *	inmem_nblocks() -- Get the number of blocks stored in a relation.
 */
@@ -315,6 +357,13 @@ inmem_immedsync(SMgrRelation reln, ForkNumber forknum)
 {
 }

+#if PG_MAJORVERSION_NUM >= 17
+static void
+inmem_registersync(SMgrRelation reln, ForkNumber forknum)
+{
+}
+#endif
+
 static const struct f_smgr inmem_smgr =
 {
 	.smgr_init = inmem_init,
@@ -328,23 +377,39 @@ static const struct f_smgr inmem_smgr =
 #if PG_MAJORVERSION_NUM >= 16
 	.smgr_zeroextend = inmem_zeroextend,
 #endif
+#if PG_MAJORVERSION_NUM >= 17
+	.smgr_prefetch = inmem_prefetch,
+	.smgr_readv = inmem_readv,
+	.smgr_writev = inmem_writev,
+#else
 	.smgr_prefetch = inmem_prefetch,
 	.smgr_read = inmem_read,
 	.smgr_write = inmem_write,
+#endif
 	.smgr_writeback = inmem_writeback,
 	.smgr_nblocks = inmem_nblocks,
 	.smgr_truncate = inmem_truncate,
 	.smgr_immedsync = inmem_immedsync,
+
+#if PG_MAJORVERSION_NUM >= 17
+	.smgr_registersync = inmem_registersync,
+#endif
+
+	.smgr_start_unlogged_build = NULL,
+	.smgr_finish_unlogged_build_phase_1 = NULL,
+	.smgr_end_unlogged_build = NULL,
+	.smgr_read_slru_segment = NULL,
 };

 const f_smgr *
-smgr_inmem(BackendId backend, NRelFileInfo rinfo)
+smgr_inmem(ProcNumber backend, NRelFileInfo rinfo)
 {
 	Assert(InRecovery);
-	if (backend != InvalidBackendId)
-		return smgr_standard(backend, rinfo);
-	else
-		return &inmem_smgr;
+	// // What does this code do?
+	// if (backend != INVALID_PROC_NUMBER)
+	// 	return smgr_standard(backend, rinfo);
+	// else
+	return &inmem_smgr;
 }

 void
--- a/pgxn/neon_walredo/inmem_smgr.h
+++ b/pgxn/neon_walredo/inmem_smgr.h
@@ -11,7 +11,7 @@
 #ifndef INMEM_SMGR_H
 #define INMEM_SMGR_H

-extern const f_smgr *smgr_inmem(BackendId backend, NRelFileInfo rinfo);
+extern const f_smgr *smgr_inmem(ProcNumber backend, NRelFileInfo rinfo);
 extern void smgr_init_inmem(void);

 #endif /* INMEM_SMGR_H */
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -100,6 +100,9 @@
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
 #include "storage/dsm.h"
+#if PG_MAJORVERSION_NUM >= 17
+#include "storage/dsm_registry.h"
+#endif
 #include "storage/ipc.h"
 #include "storage/pg_shmem.h"
 #include "storage/pmsignal.h"
@@ -137,7 +140,7 @@ static BufferTag target_redo_tag;

 static XLogReaderState *reader_state;

-#define TRACE DEBUG5
+#define TRACE LOG

 #ifdef HAVE_LIBSECCOMP

@@ -517,6 +520,10 @@ CreateFakeSharedMemoryAndSemaphores()
 	/*
 	 * Set up xlog, clog, and buffers
 	 */
+#if PG_MAJORVERSION_NUM >= 17
+	DSMRegistryShmemInit();
+	VarsupShmemInit();
+#endif
 	XLOGShmemInit();
 	CLOGShmemInit();
 	CommitTsShmemInit();
@@ -566,7 +573,10 @@ CreateFakeSharedMemoryAndSemaphores()
 	/*
 	 * Set up other modules that need some shared memory space
 	 */
+#if PG_MAJORVERSION_NUM < 17
+	/* "snapshot too old" was removed in PG17, and with it the SnapMgr */
 	SnapMgrInit();
+#endif
 	BTreeShmemInit();
 	SyncScanShmemInit();
 	/* Skip due to the 'pg_notify' directory check */
@@ -742,7 +752,7 @@ BeginRedoForBlock(StringInfo input_message)
 		 target_redo_tag.forkNum,
 		 target_redo_tag.blockNum);

-	reln = smgropen(rinfo, InvalidBackendId, RELPERSISTENCE_PERMANENT);
+	reln = smgropen(rinfo, INVALID_PROC_NUMBER, RELPERSISTENCE_PERMANENT);
 	if (reln->smgr_cached_nblocks[forknum] == InvalidBlockNumber ||
 		reln->smgr_cached_nblocks[forknum] < blknum + 1)
 	{
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -18,7 +18,6 @@ atomic-take.workspace = true
 aws-config.workspace = true
 aws-sdk-iam.workspace = true
 aws-sigv4.workspace = true
-aws-types.workspace = true
 base64.workspace = true
 bstr.workspace = true
 bytes = { workspace = true, features = ["serde"] }
@@ -26,7 +25,6 @@ camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 consumption_metrics.workspace = true
-crossbeam-deque.workspace = true
 dashmap.workspace = true
 env_logger.workspace = true
 framed-websockets.workspace = true
@@ -48,11 +46,9 @@ indexmap.workspace = true
 ipnet.workspace = true
 itertools.workspace = true
 lasso = { workspace = true, features = ["multi-threaded"] }
-md5.workspace = true
 measured = { workspace = true, features = ["lasso"] }
 metrics.workspace = true
 once_cell.workspace = true
-opentelemetry.workspace = true
 parking_lot.workspace = true
 parquet.workspace = true
 parquet_derive.workspace = true
@@ -67,7 +63,6 @@ reqwest.workspace = true
 reqwest-middleware = { workspace = true, features = ["json"] }
 reqwest-retry.workspace = true
 reqwest-tracing.workspace = true
-routerify.workspace = true
 rustc-hash.workspace = true
 rustls-pemfile.workspace = true
 rustls.workspace = true
@@ -79,7 +74,6 @@ smol_str.workspace = true
 smallvec.workspace = true
 socket2.workspace = true
 subtle.workspace = true
-task-local-extensions.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
@@ -88,7 +82,6 @@ tokio-postgres-rustls.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
-tower-service.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -92,6 +92,12 @@ struct SqlOverHttpArgs {

    #[clap(long, default_value_t = 16)]
    sql_over_http_cancel_set_shards: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_request_size_bytes: u64,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_response_size_bytes: usize,
 }

 #[tokio::main]
@@ -208,6 +214,8 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
        },
        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
+        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
+        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
    };

    Ok(Box::leak(Box::new(ProxyConfig {
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -62,12 +62,13 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
 #[derive(Clone, Debug, ValueEnum)]
 enum AuthBackendType {
    Console,
-    #[cfg(feature = "testing")]
-    Postgres,
    // clap only shows the name, not the alias, in usage text.
    // TODO: swap name/alias and deprecate "link"
    #[value(name("link"), alias("web"))]
    Web,
+
+    #[cfg(feature = "testing")]
+    Postgres,
 }

 /// Neon proxy/router
@@ -268,6 +269,12 @@ struct SqlOverHttpArgs {

    #[clap(long, default_value_t = 64)]
    sql_over_http_cancel_set_shards: usize,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_request_size_bytes: u64,
+
+    #[clap(long, default_value_t = 10 * 1024 * 1024)] // 10 MiB
+    sql_over_http_max_response_size_bytes: usize,
 }

 #[tokio::main]
@@ -633,17 +640,19 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            let api = console::provider::ConsoleBackend::Console(api);
            auth::Backend::Console(MaybeOwned::Owned(api), ())
        }
-        #[cfg(feature = "testing")]
-        AuthBackendType::Postgres => {
-            let url = args.auth_endpoint.parse()?;
-            let api = console::provider::mock::Api::new(url);
-            let api = console::provider::ConsoleBackend::Postgres(api);
-            auth::Backend::Console(MaybeOwned::Owned(api), ())
-        }
+
        AuthBackendType::Web => {
            let url = args.uri.parse()?;
            auth::Backend::Web(MaybeOwned::Owned(url), ())
        }
+
+        #[cfg(feature = "testing")]
+        AuthBackendType::Postgres => {
+            let url = args.auth_endpoint.parse()?;
+            let api = console::provider::mock::Api::new(url, !args.is_private_access_proxy);
+            let api = console::provider::ConsoleBackend::Postgres(api);
+            auth::Backend::Console(MaybeOwned::Owned(api), ())
+        }
    };

    let config::ConcurrencyLockOptions {
@@ -679,6 +688,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        },
        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
+        max_request_size_bytes: args.sql_over_http.sql_over_http_max_request_size_bytes,
+        max_response_size_bytes: args.sql_over_http.sql_over_http_max_response_size_bytes,
    };
    let authentication_config = AuthenticationConfig {
        thread_pool,
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -56,6 +56,8 @@ pub struct HttpConfig {
    pub pool_options: GlobalConnPoolOptions,
    pub cancel_set: CancelSet,
    pub client_conn_threshold: u64,
+    pub max_request_size_bytes: u64,
+    pub max_response_size_bytes: usize,
 }

 pub struct AuthenticationConfig {
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -41,11 +41,15 @@ impl From<tokio_postgres::Error> for ApiError {
 #[derive(Clone)]
 pub struct Api {
    endpoint: ApiUrl,
+    ip_allowlist_check_enabled: bool,
 }

 impl Api {
-    pub fn new(endpoint: ApiUrl) -> Self {
-        Self { endpoint }
+    pub fn new(endpoint: ApiUrl, ip_allowlist_check_enabled: bool) -> Self {
+        Self {
+            endpoint,
+            ip_allowlist_check_enabled,
+        }
    }

    pub(crate) fn url(&self) -> &str {
@@ -64,6 +68,7 @@ impl Api {
                tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;

            tokio::spawn(connection);
+
            let secret = if let Some(entry) = get_execute_postgres_query(
                &client,
                "select rolpassword from pg_catalog.pg_authid where rolname = $1",
@@ -79,21 +84,26 @@ impl Api {
                warn!("user '{}' does not exist", user_info.user);
                None
            };
-            let allowed_ips = match get_execute_postgres_query(
-                &client,
-                "select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1",
-                &[&user_info.endpoint.as_str()],
-                "allowed_ips",
-            )
-            .await?
-            {
-                Some(s) => {
-                    info!("got allowed_ips: {s}");
-                    s.split(',')
-                        .map(|s| IpPattern::from_str(s).unwrap())
-                        .collect()
+
+            let allowed_ips = if self.ip_allowlist_check_enabled {
+                match get_execute_postgres_query(
+                    &client,
+                    "select allowed_ips from neon_control_plane.endpoints where endpoint_id = $1",
+                    &[&user_info.endpoint.as_str()],
+                    "allowed_ips",
+                )
+                .await?
+                {
+                    Some(s) => {
+                        info!("got allowed_ips: {s}");
+                        s.split(',')
+                            .map(|s| IpPattern::from_str(s).unwrap())
+                            .collect()
+                    }
+                    None => vec![],
                }
-                None => vec![],
+            } else {
+                vec![]
            };

            Ok((secret, allowed_ips))
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -776,6 +776,8 @@ mod tests {
            },
            cancel_set: CancelSet::new(0),
            client_conn_threshold: u64::MAX,
+            max_request_size_bytes: u64::MAX,
+            max_response_size_bytes: usize::MAX,
        }));
        let pool = GlobalConnPool::new(config);
        let conn_info = ConnInfo {
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -87,9 +87,6 @@ enum Payload {
    Batch(BatchQueryData),
 }

-const MAX_RESPONSE_SIZE: usize = 10 * 1024 * 1024; // 10 MiB
-const MAX_REQUEST_SIZE: u64 = 10 * 1024 * 1024; // 10 MiB
-
 static CONN_STRING: HeaderName = HeaderName::from_static("neon-connection-string");
 static RAW_TEXT_OUTPUT: HeaderName = HeaderName::from_static("neon-raw-text-output");
 static ARRAY_MODE: HeaderName = HeaderName::from_static("neon-array-mode");
@@ -366,10 +363,10 @@ pub(crate) enum SqlOverHttpError {
    ConnectCompute(#[from] HttpConnError),
    #[error("{0}")]
    ConnInfo(#[from] ConnInfoError),
-    #[error("request is too large (max is {MAX_REQUEST_SIZE} bytes)")]
-    RequestTooLarge,
-    #[error("response is too large (max is {MAX_RESPONSE_SIZE} bytes)")]
-    ResponseTooLarge,
+    #[error("request is too large (max is {0} bytes)")]
+    RequestTooLarge(u64),
+    #[error("response is too large (max is {0} bytes)")]
+    ResponseTooLarge(usize),
    #[error("invalid isolation level")]
    InvalidIsolationLevel,
    #[error("{0}")]
@@ -386,8 +383,8 @@ impl ReportableError for SqlOverHttpError {
            SqlOverHttpError::ReadPayload(e) => e.get_error_kind(),
            SqlOverHttpError::ConnectCompute(e) => e.get_error_kind(),
            SqlOverHttpError::ConnInfo(e) => e.get_error_kind(),
-            SqlOverHttpError::RequestTooLarge => ErrorKind::User,
-            SqlOverHttpError::ResponseTooLarge => ErrorKind::User,
+            SqlOverHttpError::RequestTooLarge(_) => ErrorKind::User,
+            SqlOverHttpError::ResponseTooLarge(_) => ErrorKind::User,
            SqlOverHttpError::InvalidIsolationLevel => ErrorKind::User,
            SqlOverHttpError::Postgres(p) => p.get_error_kind(),
            SqlOverHttpError::JsonConversion(_) => ErrorKind::Postgres,
@@ -402,8 +399,8 @@ impl UserFacingError for SqlOverHttpError {
            SqlOverHttpError::ReadPayload(p) => p.to_string(),
            SqlOverHttpError::ConnectCompute(c) => c.to_string_client(),
            SqlOverHttpError::ConnInfo(c) => c.to_string_client(),
-            SqlOverHttpError::RequestTooLarge => self.to_string(),
-            SqlOverHttpError::ResponseTooLarge => self.to_string(),
+            SqlOverHttpError::RequestTooLarge(_) => self.to_string(),
+            SqlOverHttpError::ResponseTooLarge(_) => self.to_string(),
            SqlOverHttpError::InvalidIsolationLevel => self.to_string(),
            SqlOverHttpError::Postgres(p) => p.to_string(),
            SqlOverHttpError::JsonConversion(_) => "could not parse postgres response".to_string(),
@@ -537,7 +534,7 @@ async fn handle_inner(

    let request_content_length = match request.body().size_hint().upper() {
        Some(v) => v,
-        None => MAX_REQUEST_SIZE + 1,
+        None => config.http_config.max_request_size_bytes + 1,
    };
    info!(request_content_length, "request size in bytes");
    Metrics::get()
@@ -547,8 +544,10 @@ async fn handle_inner(

    // we don't have a streaming request support yet so this is to prevent OOM
    // from a malicious user sending an extremely large request body
-    if request_content_length > MAX_REQUEST_SIZE {
-        return Err(SqlOverHttpError::RequestTooLarge);
+    if request_content_length > config.http_config.max_request_size_bytes {
+        return Err(SqlOverHttpError::RequestTooLarge(
+            config.http_config.max_request_size_bytes,
+        ));
    }

    let fetch_and_process_request = Box::pin(
@@ -612,7 +611,10 @@ async fn handle_inner(

    // Now execute the query and return the result.
    let json_output = match payload {
-        Payload::Single(stmt) => stmt.process(cancel, &mut client, parsed_headers).await?,
+        Payload::Single(stmt) => {
+            stmt.process(config, cancel, &mut client, parsed_headers)
+                .await?
+        }
        Payload::Batch(statements) => {
            if parsed_headers.txn_read_only {
                response = response.header(TXN_READ_ONLY.clone(), &HEADER_VALUE_TRUE);
@@ -628,7 +630,7 @@ async fn handle_inner(
            }

            statements
-                .process(cancel, &mut client, parsed_headers)
+                .process(config, cancel, &mut client, parsed_headers)
                .await?
        }
    };
@@ -656,6 +658,7 @@ async fn handle_inner(
 impl QueryData {
    async fn process(
        self,
+        config: &'static ProxyConfig,
        cancel: CancellationToken,
        client: &mut Client<tokio_postgres::Client>,
        parsed_headers: HttpHeaders,
@@ -664,7 +667,7 @@ impl QueryData {
        let cancel_token = inner.cancel_token();

        let res = match select(
-            pin!(query_to_json(&*inner, self, &mut 0, parsed_headers)),
+            pin!(query_to_json(config, &*inner, self, &mut 0, parsed_headers)),
            pin!(cancel.cancelled()),
        )
        .await
@@ -727,6 +730,7 @@ impl QueryData {
 impl BatchQueryData {
    async fn process(
        self,
+        config: &'static ProxyConfig,
        cancel: CancellationToken,
        client: &mut Client<tokio_postgres::Client>,
        parsed_headers: HttpHeaders,
@@ -751,44 +755,52 @@ impl BatchQueryData {
            discard.discard();
        })?;

-        let json_output =
-            match query_batch(cancel.child_token(), &transaction, self, parsed_headers).await {
-                Ok(json_output) => {
-                    info!("commit");
-                    let status = transaction.commit().await.inspect_err(|_| {
-                        // if we cannot commit - for now don't return connection to pool
-                        // TODO: get a query status from the error
-                        discard.discard();
-                    })?;
-                    discard.check_idle(status);
-                    json_output
-                }
-                Err(SqlOverHttpError::Cancelled(_)) => {
-                    if let Err(err) = cancel_token.cancel_query(NoTls).await {
-                        tracing::error!(?err, "could not cancel query");
-                    }
-                    // TODO: after cancelling, wait to see if we can get a status. maybe the connection is still safe.
+        let json_output = match query_batch(
+            config,
+            cancel.child_token(),
+            &transaction,
+            self,
+            parsed_headers,
+        )
+        .await
+        {
+            Ok(json_output) => {
+                info!("commit");
+                let status = transaction.commit().await.inspect_err(|_| {
+                    // if we cannot commit - for now don't return connection to pool
+                    // TODO: get a query status from the error
                    discard.discard();
+                })?;
+                discard.check_idle(status);
+                json_output
+            }
+            Err(SqlOverHttpError::Cancelled(_)) => {
+                if let Err(err) = cancel_token.cancel_query(NoTls).await {
+                    tracing::error!(?err, "could not cancel query");
+                }
+                // TODO: after cancelling, wait to see if we can get a status. maybe the connection is still safe.
+                discard.discard();

-                    return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));
-                }
-                Err(err) => {
-                    info!("rollback");
-                    let status = transaction.rollback().await.inspect_err(|_| {
-                        // if we cannot rollback - for now don't return connection to pool
-                        // TODO: get a query status from the error
-                        discard.discard();
-                    })?;
-                    discard.check_idle(status);
-                    return Err(err);
-                }
-            };
+                return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Postgres));
+            }
+            Err(err) => {
+                info!("rollback");
+                let status = transaction.rollback().await.inspect_err(|_| {
+                    // if we cannot rollback - for now don't return connection to pool
+                    // TODO: get a query status from the error
+                    discard.discard();
+                })?;
+                discard.check_idle(status);
+                return Err(err);
+            }
+        };

        Ok(json_output)
    }
 }

 async fn query_batch(
+    config: &'static ProxyConfig,
    cancel: CancellationToken,
    transaction: &Transaction<'_>,
    queries: BatchQueryData,
@@ -798,6 +810,7 @@ async fn query_batch(
    let mut current_size = 0;
    for stmt in queries.queries {
        let query = pin!(query_to_json(
+            config,
            transaction,
            stmt,
            &mut current_size,
@@ -826,6 +839,7 @@ async fn query_batch(
 }

 async fn query_to_json<T: GenericClient>(
+    config: &'static ProxyConfig,
    client: &T,
    data: QueryData,
    current_size: &mut usize,
@@ -846,8 +860,10 @@ async fn query_to_json<T: GenericClient>(
        rows.push(row);
        // we don't have a streaming response support yet so this is to prevent OOM
        // from a malicious query (eg a cross join)
-        if *current_size > MAX_RESPONSE_SIZE {
-            return Err(SqlOverHttpError::ResponseTooLarge);
+        if *current_size > config.http_config.max_response_size_bytes {
+            return Err(SqlOverHttpError::ResponseTooLarge(
+                config.http_config.max_response_size_bytes,
+            ));
        }
    }

--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -13,14 +13,12 @@ testing = ["fail/failpoints"]
 [dependencies]
 async-stream.workspace = true
 anyhow.workspace = true
-async-trait.workspace = true
 byteorder.workspace = true
 bytes.workspace = true
 camino.workspace = true
 camino-tempfile.workspace = true
 chrono.workspace = true
 clap = { workspace = true, features = ["derive"] }
-const_format.workspace = true
 crc32c.workspace = true
 fail.workspace = true
 git-version.workspace = true
@@ -38,8 +36,6 @@ scopeguard.workspace = true
 reqwest = { workspace = true, features = ["json"] }
 serde.workspace = true
 serde_json.workspace = true
-serde_with.workspace = true
-signal-hook.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 thiserror.workspace = true
@@ -48,7 +44,6 @@ tokio-util = { workspace = true }
 tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 tokio-tar.workspace = true
-toml_edit.workspace = true
 tracing.workspace = true
 url.workspace = true
 metrics.workspace = true
--- a/storage_broker/Cargo.toml
+++ b/storage_broker/Cargo.toml
@@ -10,7 +10,6 @@ bench = []
 [dependencies]
 anyhow.workspace = true
 async-stream.workspace = true
-bytes.workspace = true
 clap = { workspace = true, features = ["derive"] }
 const_format.workspace = true
 futures.workspace = true
@@ -24,7 +23,6 @@ parking_lot.workspace = true
 prost.workspace = true
 tonic.workspace = true
 tokio = { workspace = true, features = ["rt-multi-thread"] }
-tokio-stream.workspace = true
 tracing.workspace = true
 metrics.workspace = true
 utils.workspace = true
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -15,9 +15,7 @@ testing = []

 [dependencies]
 anyhow.workspace = true
-aws-config.workspace = true
 bytes.workspace = true
-camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 fail.workspace = true
--- a/storage_controller/client/Cargo.toml
+++ b/storage_controller/client/Cargo.toml
@@ -5,18 +5,7 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
-pageserver_api.workspace = true
 pageserver_client.workspace = true
-thiserror.workspace = true
 reqwest.workspace = true
-utils.workspace = true
 serde.workspace = true
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
-tokio-postgres.workspace = true
-tokio-stream.workspace = true
-tokio.workspace = true
-futures.workspace = true
-tokio-util.workspace = true
-anyhow.workspace = true
-postgres.workspace = true
-bytes.workspace = true
--- a/storage_controller/src/http.rs
+++ b/storage_controller/src/http.rs
@@ -1,10 +1,11 @@
+use crate::http;
 use crate::metrics::{
    HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup,
    METRICS_REGISTRY,
 };
 use crate::persistence::SafekeeperPersistence;
 use crate::reconciler::ReconcileError;
-use crate::service::{LeadershipStatus, Service, STARTUP_RECONCILE_TIMEOUT};
+use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT};
 use anyhow::Context;
 use futures::Future;
 use hyper::header::CONTENT_TYPE;
@@ -22,6 +23,7 @@ use pageserver_api::models::{
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::{mgmt_api, BlockUnblock};
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
@@ -87,9 +89,16 @@ fn get_state(request: &Request<Body>) -> &HttpState {
 }

 /// Pageserver calls into this on startup, to learn which tenants it should attach
-async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_re_attach(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::GenerationsApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let reattach_req = json_request::<ReAttachRequest>(&mut req).await?;
    let state = get_state(&req);
    json_response(StatusCode::OK, state.service.re_attach(reattach_req).await?)
@@ -97,9 +106,16 @@ async fn handle_re_attach(mut req: Request<Body>) -> Result<Response<Body>, ApiE

 /// Pageserver calls into this before doing deletions, to confirm that it still
 /// holds the latest generation for the tenants with deletions enqueued
-async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_validate(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::GenerationsApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let validate_req = json_request::<ValidateRequest>(&mut req).await?;
    let state = get_state(&req);
    json_response(StatusCode::OK, state.service.validate(validate_req).await?)
@@ -108,9 +124,16 @@ async fn handle_validate(mut req: Request<Body>) -> Result<Response<Body>, ApiEr
 /// Call into this before attaching a tenant to a pageserver, to acquire a generation number
 /// (in the real control plane this is unnecessary, because the same program is managing
 ///  generation numbers and doing attachments).
-async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_attach_hook(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let attach_req = json_request::<AttachHookRequest>(&mut req).await?;
    let state = get_state(&req);

@@ -124,9 +147,16 @@ async fn handle_attach_hook(mut req: Request<Body>) -> Result<Response<Body>, Ap
    )
 }

-async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_inspect(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let inspect_req = json_request::<InspectRequest>(&mut req).await?;

    let state = get_state(&req);
@@ -136,10 +166,17 @@ async fn handle_inspect(mut req: Request<Body>) -> Result<Response<Body>, ApiErr

 async fn handle_tenant_create(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::PageServerApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let create_req = json_request::<TenantCreateRequest>(&mut req).await?;

    json_response(
@@ -150,11 +187,18 @@ async fn handle_tenant_create(

 async fn handle_tenant_location_config(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let config_req = json_request::<TenantLocationConfigRequest>(&mut req).await?;
    json_response(
        StatusCode::OK,
@@ -166,10 +210,17 @@ async fn handle_tenant_location_config(

 async fn handle_tenant_config_set(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::PageServerApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let config_req = json_request::<TenantConfigRequest>(&mut req).await?;

    json_response(StatusCode::OK, service.tenant_config_set(config_req).await?)
@@ -182,16 +233,30 @@ async fn handle_tenant_config_get(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    json_response(StatusCode::OK, service.tenant_config_get(tenant_id)?)
 }

 async fn handle_tenant_time_travel_remote_storage(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let time_travel_req = json_request::<TenantTimeTravelRequest>(&mut req).await?;

    let timestamp_raw = must_get_query_param(&req, "travel_to")?;
@@ -232,6 +297,13 @@ async fn handle_tenant_secondary_download(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);

+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
    json_response(map_reqwest_hyper_status(status)?, progress)
 }
@@ -243,6 +315,13 @@ async fn handle_tenant_delete(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    let status_code = service
        .tenant_delete(tenant_id)
        .await
@@ -258,11 +337,18 @@ async fn handle_tenant_delete(

 async fn handle_tenant_timeline_create(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let create_req = json_request::<TimelineCreateRequest>(&mut req).await?;
    json_response(
        StatusCode::CREATED,
@@ -277,9 +363,16 @@ async fn handle_tenant_timeline_delete(
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+
    check_permissions(&req, Scope::PageServerApi)?;

-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };

    // For timeline deletions, which both implement an "initially return 202, then 404 once
    // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
@@ -337,12 +430,19 @@ async fn handle_tenant_timeline_delete(

 async fn handle_tenant_timeline_archival_config(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+
    check_permissions(&req, Scope::PageServerApi)?;

-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };

    let create_req = json_request::<TimelineArchivalConfigRequest>(&mut req).await?;

@@ -358,9 +458,16 @@ async fn handle_tenant_timeline_detach_ancestor(
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+
    check_permissions(&req, Scope::PageServerApi)?;

-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };

    let res = service
        .tenant_timeline_detach_ancestor(tenant_id, timeline_id)
@@ -393,6 +500,13 @@ async fn handle_tenant_timeline_passthrough(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let Some(path) = req.uri().path_and_query() else {
        // This should never happen, our request router only calls us if there is a path
        return Err(ApiError::BadRequest(anyhow::anyhow!("Missing path")));
@@ -460,9 +574,17 @@ async fn handle_tenant_locate(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+
    check_permissions(&req, Scope::Admin)?;

-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    json_response(StatusCode::OK, service.tenant_locate(tenant_id)?)
 }

@@ -473,6 +595,14 @@ async fn handle_tenant_describe(
    check_permissions(&req, Scope::Scrubber)?;

    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+
+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
 }

@@ -482,12 +612,26 @@ async fn handle_tenant_list(
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    json_response(StatusCode::OK, service.tenant_list())
 }

-async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_node_register(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let register_req = json_request::<NodeRegisterRequest>(&mut req).await?;
    let state = get_state(&req);
    state.service.node_register(register_req).await?;
@@ -497,6 +641,13 @@ async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>,
 async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let nodes = state.service.node_list().await?;
    let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
@@ -507,6 +658,13 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
 async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
@@ -515,14 +673,28 @@ async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError
 async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
 }

-async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_node_configure(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    let config_req = json_request::<NodeConfigureRequest>(&mut req).await?;
    if node_id != config_req.node_id {
@@ -548,6 +720,13 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
 async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;

@@ -570,6 +749,13 @@ async fn handle_node_shards(req: Request<Body>) -> Result<Response<Body>, ApiErr
 async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let leader = state.service.get_leader().await.map_err(|err| {
        ApiError::InternalServerError(anyhow::anyhow!(
@@ -583,6 +769,13 @@ async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiErro
 async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;

@@ -594,6 +787,13 @@ async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiErro
 async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;

@@ -605,6 +805,13 @@ async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>,
 async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;

@@ -616,6 +823,13 @@ async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError
 async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;

@@ -624,9 +838,16 @@ async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, A
    json_response(StatusCode::ACCEPTED, ())
 }

-async fn handle_metadata_health_update(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_metadata_health_update(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Scrubber)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let update_req = json_request::<MetadataHealthUpdateRequest>(&mut req).await?;
    let state = get_state(&req);

@@ -640,6 +861,13 @@ async fn handle_metadata_health_list_unhealthy(
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let unhealthy_tenant_shards = state.service.metadata_health_list_unhealthy().await?;

@@ -652,10 +880,17 @@ async fn handle_metadata_health_list_unhealthy(
 }

 async fn handle_metadata_health_list_outdated(
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let list_outdated_req = json_request::<MetadataHealthListOutdatedRequest>(&mut req).await?;
    let state = get_state(&req);
    let health_records = state
@@ -671,10 +906,17 @@ async fn handle_metadata_health_list_outdated(

 async fn handle_tenant_shard_split(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let split_req = json_request::<TenantShardSplitRequest>(&mut req).await?;

@@ -686,10 +928,17 @@ async fn handle_tenant_shard_split(

 async fn handle_tenant_shard_migrate(
    service: Arc<Service>,
-    mut req: Request<Body>,
+    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?;
    let migrate_req = json_request::<TenantShardMigrateRequest>(&mut req).await?;
    json_response(
@@ -700,9 +949,16 @@ async fn handle_tenant_shard_migrate(
    )
 }

-async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_tenant_update_policy(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let update_req = json_request::<TenantPolicyRequest>(&mut req).await?;
    let state = get_state(&req);
@@ -716,9 +972,16 @@ async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<
    )
 }

-async fn handle_update_preferred_azs(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_update_preferred_azs(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let mut req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let azs_req = json_request::<ShardsPreferredAzsRequest>(&mut req).await?;
    let state = get_state(&req);

@@ -731,23 +994,46 @@ async fn handle_update_preferred_azs(mut req: Request<Body>) -> Result<Response<
 async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    json_response(StatusCode::OK, state.service.step_down().await)
 }

 async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+
+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
 }

 async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

+    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
+
+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    json_response(
@@ -759,6 +1045,13 @@ async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiE
 async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    state.service.tenants_dump()
 }
@@ -766,6 +1059,13 @@ async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiEr
 async fn handle_scheduler_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    state.service.scheduler_dump()
 }
@@ -773,6 +1073,13 @@ async fn handle_scheduler_dump(req: Request<Body>) -> Result<Response<Body>, Api
 async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    json_response(StatusCode::OK, state.service.consistency_check().await?)
@@ -781,19 +1088,40 @@ async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>,
 async fn handle_reconcile_all(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    json_response(StatusCode::OK, state.service.reconcile_all_now().await?)
 }

 /// Status endpoint is just used for checking that our HTTP listener is up
-async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
+async fn handle_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(_req) => {}
+    };
+
    json_response(StatusCode::OK, ())
 }

 /// Readiness endpoint indicates when we're done doing startup I/O (e.g. reconciling
 /// with remote pageserver nodes).  This is intended for use as a kubernetes readiness probe.
 async fn handle_ready(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    if state.service.startup_complete.is_ready() {
        json_response(StatusCode::OK, ())
@@ -816,6 +1144,13 @@ async fn handle_get_safekeeper(req: Request<Body>) -> Result<Response<Body>, Api

    let id = parse_request_param::<i64>(&req, "id")?;

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    let res = state.service.get_safekeeper(id).await;
@@ -847,6 +1182,13 @@ async fn handle_upsert_safekeeper(mut req: Request<Body>) -> Result<Response<Bod
        )));
    }

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);

    state.service.upsert_safekeeper(body).await?;
@@ -925,10 +1267,7 @@ pub fn prologue_leadership_status_check_middleware<

        let allowed_routes = match leadership_status {
            LeadershipStatus::Leader => AllowedRoutes::All,
-            LeadershipStatus::SteppedDown => {
-                // TODO: does it make sense to allow /status here?
-                AllowedRoutes::Some(["/control/v1/step_down", "/status", "/metrics"].to_vec())
-            }
+            LeadershipStatus::SteppedDown => AllowedRoutes::All,
            LeadershipStatus::Candidate => {
                AllowedRoutes::Some(["/ready", "/status", "/metrics"].to_vec())
            }
@@ -1005,6 +1344,13 @@ fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
 pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";

+    let req = match maybe_forward(req).await {
+        ForwardOutcome::Forwarded(res) => {
+            return res;
+        }
+        ForwardOutcome::NotForwarded(req) => req,
+    };
+
    let state = get_state(&req);
    let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
    let response = Response::builder()
@@ -1032,6 +1378,220 @@ where
    request_span(request, handler).await
 }

+enum ForwardOutcome {
+    Forwarded(Result<Response<Body>, ApiError>),
+    NotForwarded(Request<Body>),
+}
+
+/// Potentially forward the request to the current storage controler leader.
+/// More specifically we forward when:
+/// 1. Request is not one of ["/control/v1/step_down", "/status", "/ready", "/metrics"]
+/// 2. Current instance is in [`LeadershipStatus::SteppedDown`] state
+/// 3. There is a leader in the database to forward to
+/// 4. Leader from step (3) is not the current instance
+///
+/// Why forward?
+/// It turns out that we can't rely on external orchestration to promptly route trafic to the
+/// new leader. This is downtime inducing. Forwarding provides a safe way out.
+///
+/// Why is it safe?
+/// If a storcon instance is persisted in the database, then we know that it is the current leader.
+/// There's one exception: time between handling step-down request and the new leader updating the
+/// database.
+///
+/// Let's treat the happy case first. The stepped down node does not produce any side effects,
+/// since all request handling happens on the leader.
+///
+/// As for the edge case, we are guaranteed to always have a maximum of two running instances.
+/// Hence, if we are in the edge case scenario the leader persisted in the database is the
+/// stepped down instance that received the request. Condition (4) above covers this scenario.
+async fn maybe_forward(req: Request<Body>) -> ForwardOutcome {
+    const NOT_FOR_FORWARD: [&str; 4] = ["/control/v1/step_down", "/status", "/ready", "/metrics"];
+
+    let uri = req.uri().to_string();
+    let uri_for_forward = !NOT_FOR_FORWARD.contains(&uri.as_str());
+
+    let state = get_state(&req);
+    let leadership_status = state.service.get_leadership_status();
+
+    if leadership_status != LeadershipStatus::SteppedDown || !uri_for_forward {
+        return ForwardOutcome::NotForwarded(req);
+    }
+
+    let leader = state.service.get_leader().await;
+    let leader = {
+        match leader {
+            Ok(Some(leader)) => leader,
+            Ok(None) => {
+                return ForwardOutcome::Forwarded(Err(ApiError::ResourceUnavailable(
+                    "No leader to forward to while in stepped down state".into(),
+                )));
+            }
+            Err(err) => {
+                return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(
+                    anyhow::anyhow!(
+                        "Failed to get leader for forwarding while in stepped down state: {err}"
+                    ),
+                )));
+            }
+        }
+    };
+
+    let cfg = state.service.get_config();
+    if let Some(ref self_addr) = cfg.address_for_peers {
+        let leader_addr = match Uri::from_str(leader.address.as_str()) {
+            Ok(uri) => uri,
+            Err(err) => {
+                return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(
+                    anyhow::anyhow!(
+                    "Failed to parse leader uri for forwarding while in stepped down state: {err}"
+                ),
+                )));
+            }
+        };
+
+        if *self_addr == leader_addr {
+            return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
+                "Leader is stepped down instance"
+            ))));
+        }
+    }
+
+    tracing::info!("Forwarding {} to leader at {}", uri, leader.address);
+
+    // Use [`RECONCILE_TIMEOUT`] as the max amount of time a request should block for and
+    // include some leeway to get the timeout for proxied requests.
+    const PROXIED_REQUEST_TIMEOUT: Duration = Duration::from_secs(RECONCILE_TIMEOUT.as_secs() + 10);
+    let client = reqwest::ClientBuilder::new()
+        .timeout(PROXIED_REQUEST_TIMEOUT)
+        .build();
+    let client = match client {
+        Ok(client) => client,
+        Err(err) => {
+            return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
+                "Failed to build leader client for forwarding while in stepped down state: {err}"
+            ))));
+        }
+    };
+
+    let request: reqwest::Request = match convert_request(req, &client, leader.address).await {
+        Ok(r) => r,
+        Err(err) => {
+            return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
+                "Failed to convert request for forwarding while in stepped down state: {err}"
+            ))));
+        }
+    };
+
+    let response = match client.execute(request).await {
+        Ok(r) => r,
+        Err(err) => {
+            return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError(anyhow::anyhow!(
+                "Failed to forward while in stepped down state: {err}"
+            ))));
+        }
+    };
+
+    ForwardOutcome::Forwarded(convert_response(response).await)
+}
+
+/// Convert a [`reqwest::Response`] to a [hyper::Response`] by passing through
+/// a stable representation (string, bytes or integer)
+///
+/// Ideally, we would not have to do this since both types use the http crate
+/// under the hood. However, they use different versions of the crate and keeping
+/// second order dependencies in sync is difficult.
+async fn convert_response(resp: reqwest::Response) -> Result<hyper::Response<Body>, ApiError> {
+    use std::str::FromStr;
+
+    let mut builder = hyper::Response::builder().status(resp.status().as_u16());
+    for (key, value) in resp.headers().into_iter() {
+        let key = hyper::header::HeaderName::from_str(key.as_str()).map_err(|err| {
+            ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
+        })?;
+
+        let value = hyper::header::HeaderValue::from_bytes(value.as_bytes()).map_err(|err| {
+            ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
+        })?;
+
+        builder = builder.header(key, value);
+    }
+
+    let body = http::Body::wrap_stream(resp.bytes_stream());
+
+    builder.body(body).map_err(|err| {
+        ApiError::InternalServerError(anyhow::anyhow!("Response conversion failed: {err}"))
+    })
+}
+
+/// Convert a [`reqwest::Request`] to a [hyper::Request`] by passing through
+/// a stable representation (string, bytes or integer)
+///
+/// See [`convert_response`] for why we are doing it this way.
+async fn convert_request(
+    req: hyper::Request<Body>,
+    client: &reqwest::Client,
+    to_address: String,
+) -> Result<reqwest::Request, ApiError> {
+    use std::str::FromStr;
+
+    let (parts, body) = req.into_parts();
+    let method = reqwest::Method::from_str(parts.method.as_str()).map_err(|err| {
+        ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+    })?;
+
+    let path_and_query = parts.uri.path_and_query().ok_or_else(|| {
+        ApiError::InternalServerError(anyhow::anyhow!(
+            "Request conversion failed: no path and query"
+        ))
+    })?;
+
+    let uri = reqwest::Url::from_str(
+        format!(
+            "{}{}",
+            to_address.trim_end_matches("/"),
+            path_and_query.as_str()
+        )
+        .as_str(),
+    )
+    .map_err(|err| {
+        ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+    })?;
+
+    let mut headers = reqwest::header::HeaderMap::new();
+    for (key, value) in parts.headers.into_iter() {
+        let key = match key {
+            Some(k) => k,
+            None => {
+                continue;
+            }
+        };
+
+        let key = reqwest::header::HeaderName::from_str(key.as_str()).map_err(|err| {
+            ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+        })?;
+
+        let value = reqwest::header::HeaderValue::from_bytes(value.as_bytes()).map_err(|err| {
+            ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+        })?;
+
+        headers.insert(key, value);
+    }
+
+    let body = hyper::body::to_bytes(body).await.map_err(|err| {
+        ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+    })?;
+
+    client
+        .request(method, uri)
+        .headers(headers)
+        .body(body)
+        .build()
+        .map_err(|err| {
+            ApiError::InternalServerError(anyhow::anyhow!("Request conversion failed: {err}"))
+        })
+}
+
 pub fn make_router(
    service: Arc<Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
--- a/storage_scrubber/Cargo.toml
+++ b/storage_scrubber/Cargo.toml
@@ -6,21 +6,13 @@ license.workspace = true

 [dependencies]
 aws-sdk-s3.workspace = true
-aws-smithy-async.workspace = true
 either.workspace = true
-tokio-rustls.workspace = true
 anyhow.workspace = true
 git-version.workspace = true
 hex.workspace = true
 humantime.workspace = true
-thiserror.workspace = true
-rand.workspace = true
-bytes.workspace = true
-bincode.workspace = true
-crc32c.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-serde_with.workspace = true
 workspace_hack.workspace = true
 utils.workspace = true
 async-stream.workspace = true
--- a/storage_scrubber/src/main.rs
+++ b/storage_scrubber/src/main.rs
@@ -121,8 +121,6 @@ enum Command {
 async fn main() -> anyhow::Result<()> {
    let cli = Cli::parse();

-    tracing::info!("version: {}, build_tag {}", GIT_VERSION, BUILD_TAG);
-
    let bucket_config = BucketConfig::from_env()?;

    let command_log_name = match &cli.command {
@@ -142,6 +140,8 @@ async fn main() -> anyhow::Result<()> {
        chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
    ));

+    tracing::info!("version: {}, build_tag {}", GIT_VERSION, BUILD_TAG);
+
    let controller_client = cli.controller_api.map(|controller_api| {
        ControllerClientConfig {
            controller_api,
--- a/test_runner/fixtures/auth_tokens.py
+++ b/test_runner/fixtures/auth_tokens.py
@@ -0,0 +1,47 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+
+import jwt
+
+from fixtures.common_types import TenantId
+
+
+@dataclass
+class AuthKeys:
+    priv: str
+
+    def generate_token(self, *, scope: TokenScope, **token_data: Any) -> str:
+        token_data = {key: str(val) for key, val in token_data.items()}
+        token = jwt.encode({"scope": scope, **token_data}, self.priv, algorithm="EdDSA")
+        # cast(Any, self.priv)
+
+        # jwt.encode can return 'bytes' or 'str', depending on Python version or type
+        # hinting or something (not sure what). If it returned 'bytes', convert it to 'str'
+        # explicitly.
+        if isinstance(token, bytes):
+            token = token.decode()
+
+        return token
+
+    def generate_pageserver_token(self) -> str:
+        return self.generate_token(scope=TokenScope.PAGE_SERVER_API)
+
+    def generate_safekeeper_token(self) -> str:
+        return self.generate_token(scope=TokenScope.SAFEKEEPER_DATA)
+
+    # generate token giving access to only one tenant
+    def generate_tenant_token(self, tenant_id: TenantId) -> str:
+        return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))
+
+
+# TODO: Replace with `StrEnum` when we upgrade to python 3.11
+class TokenScope(str, Enum):
+    ADMIN = "admin"
+    PAGE_SERVER_API = "pageserverapi"
+    GENERATIONS_API = "generations_api"
+    SAFEKEEPER_DATA = "safekeeperdata"
+    TENANT = "tenant"
+    SCRUBBER = "scrubber"
--- a/test_runner/fixtures/broker.py
+++ b/test_runner/fixtures/broker.py
@@ -1,63 +0,0 @@
-import subprocess
-import time
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Any, Optional
-
-from fixtures.log_helper import log
-
-
-@dataclass
-class NeonBroker:
-    """An object managing storage_broker instance"""
-
-    logfile: Path
-    port: int
-    neon_binpath: Path
-    handle: Optional[subprocess.Popen[Any]] = None  # handle of running daemon
-
-    def listen_addr(self):
-        return f"127.0.0.1:{self.port}"
-
-    def client_url(self):
-        return f"http://{self.listen_addr()}"
-
-    def check_status(self):
-        return True  # TODO
-
-    def try_start(self):
-        if self.handle is not None:
-            log.debug(f"storage_broker is already running on port {self.port}")
-            return
-
-        listen_addr = self.listen_addr()
-        log.info(f'starting storage_broker to listen incoming connections at "{listen_addr}"')
-        with open(self.logfile, "wb") as logfile:
-            args = [
-                str(self.neon_binpath / "storage_broker"),
-                f"--listen-addr={listen_addr}",
-            ]
-            self.handle = subprocess.Popen(args, stdout=logfile, stderr=logfile)
-
-        # wait for start
-        started_at = time.time()
-        while True:
-            try:
-                self.check_status()
-            except Exception as e:
-                elapsed = time.time() - started_at
-                if elapsed > 5:
-                    raise RuntimeError(
-                        f"timed out waiting {elapsed:.0f}s for storage_broker start: {e}"
-                    ) from e
-                time.sleep(0.5)
-            else:
-                break  # success
-
-    def stop(self, immediate: bool = False):
-        if self.handle is not None:
-            if immediate:
-                self.handle.kill()
-            else:
-                self.handle.terminate()
-            self.handle.wait()
--- a/test_runner/fixtures/common_types.py
+++ b/test_runner/fixtures/common_types.py
@@ -13,7 +13,7 @@ DEFAULT_WAL_SEG_SIZE = 16 * 1024 * 1024
 class Lsn:
    """
    Datatype for an LSN. Internally it is a 64-bit integer, but the string
-    representation is like "1/123abcd". See also pg_lsn datatype in Postgres
+    representation is like "1/0123abcd". See also pg_lsn datatype in Postgres
    """

    def __init__(self, x: Union[int, str]):
--- a/test_runner/fixtures/compare_fixtures.py
+++ b/test_runner/fixtures/compare_fixtures.py
@@ -4,6 +4,7 @@ from abc import ABC, abstractmethod
 from contextlib import _GeneratorContextManager, contextmanager

 # Type-related stuff
+from pathlib import Path
 from typing import Dict, Iterator, List

 import pytest
@@ -229,11 +230,11 @@ class VanillaCompare(PgCompare):
        pass  # TODO find something

    def report_size(self):
-        data_size = self.pg.get_subdir_size("base")
+        data_size = self.pg.get_subdir_size(Path("base"))
        self.zenbenchmark.record(
            "data_size", data_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
        )
-        wal_size = self.pg.get_subdir_size("pg_wal")
+        wal_size = self.pg.get_subdir_size(Path("pg_wal"))
        self.zenbenchmark.record(
            "wal_size", wal_size / (1024 * 1024), "MB", report=MetricReport.LOWER_IS_BETTER
        )
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -43,7 +43,6 @@ from urllib.parse import quote, urlparse
 import asyncpg
 import backoff
 import httpx
-import jwt
 import psycopg2
 import psycopg2.sql
 import pytest
@@ -60,7 +59,7 @@ from psycopg2.extensions import make_dsn, parse_dsn
 from urllib3.util.retry import Retry

 from fixtures import overlayfs
-from fixtures.broker import NeonBroker
+from fixtures.auth_tokens import AuthKeys, TokenScope
 from fixtures.common_types import Lsn, NodeId, TenantId, TenantShardId, TimelineId
 from fixtures.endpoint.http import EndpointHttpClient
 from fixtures.log_helper import log
@@ -93,6 +92,7 @@ from fixtures.utils import (
    allure_add_grafana_links,
    allure_attach_from_dir,
    assert_no_errors,
+    get_dir_size,
    get_self_dir,
    print_gc_result,
    subprocess_capture,
@@ -158,7 +158,7 @@ def neon_binpath(base_dir: Path, build_type: str) -> Iterator[Path]:
    yield binpath


-@pytest.fixture(scope="function")
+@pytest.fixture(scope="session")
 def pg_distrib_dir(base_dir: Path) -> Iterator[Path]:
    if env_postgres_bin := os.environ.get("POSTGRES_DISTRIB_DIR"):
        distrib_dir = Path(env_postgres_bin).resolve()
@@ -182,25 +182,6 @@ def top_output_dir(base_dir: Path) -> Iterator[Path]:
    yield output_dir


-@pytest.fixture(scope="function")
-def versioned_pg_distrib_dir(pg_distrib_dir: Path, pg_version: PgVersion) -> Iterator[Path]:
-    versioned_dir = pg_distrib_dir / pg_version.v_prefixed
-
-    psql_bin_path = versioned_dir / "bin/psql"
-    postgres_bin_path = versioned_dir / "bin/postgres"
-
-    if os.getenv("REMOTE_ENV"):
-        # When testing against a remote server, we only need the client binary.
-        if not psql_bin_path.exists():
-            raise Exception(f"psql not found at '{psql_bin_path}'")
-    else:
-        if not postgres_bin_path.exists():
-            raise Exception(f"postgres not found at '{postgres_bin_path}'")
-
-    log.info(f"versioned_pg_distrib_dir is {versioned_dir}")
-    yield versioned_dir
-
-
@pytest.fixture(scope="session")
 def neon_api_key() -> str:
    api_key = os.getenv("NEON_API_KEY")
@@ -243,36 +224,11 @@ def worker_base_port(worker_seq_no: int, worker_port_num: int) -> int:
    return BASE_PORT + worker_seq_no * worker_port_num


-def get_dir_size(path: str) -> int:
-    """Return size in bytes."""
-    totalbytes = 0
-    for root, _dirs, files in os.walk(path):
-        for name in files:
-            totalbytes += os.path.getsize(os.path.join(root, name))
-
-    return totalbytes
-
-
@pytest.fixture(scope="session")
 def port_distributor(worker_base_port: int, worker_port_num: int) -> PortDistributor:
    return PortDistributor(base_port=worker_base_port, port_number=worker_port_num)


-@pytest.fixture(scope="function")
-def default_broker(
-    port_distributor: PortDistributor,
-    test_output_dir: Path,
-    neon_binpath: Path,
-) -> Iterator[NeonBroker]:
-    # multiple pytest sessions could get launched in parallel, get them different ports/datadirs
-    client_port = port_distributor.get_port()
-    broker_logfile = test_output_dir / "repo" / "storage_broker.log"
-
-    broker = NeonBroker(logfile=broker_logfile, port=client_port, neon_binpath=neon_binpath)
-    yield broker
-    broker.stop()
-
-
@pytest.fixture(scope="session")
 def run_id() -> Iterator[uuid.UUID]:
    yield uuid.uuid4()
@@ -401,44 +357,6 @@ class PgProtocol:
        return self.safe_psql(query, log_query=log_query)[0][0]


-@dataclass
-class AuthKeys:
-    priv: str
-
-    def generate_token(self, *, scope: TokenScope, **token_data: Any) -> str:
-        token_data = {key: str(val) for key, val in token_data.items()}
-        token = jwt.encode({"scope": scope, **token_data}, self.priv, algorithm="EdDSA")
-        # cast(Any, self.priv)
-
-        # jwt.encode can return 'bytes' or 'str', depending on Python version or type
-        # hinting or something (not sure what). If it returned 'bytes', convert it to 'str'
-        # explicitly.
-        if isinstance(token, bytes):
-            token = token.decode()
-
-        return token
-
-    def generate_pageserver_token(self) -> str:
-        return self.generate_token(scope=TokenScope.PAGE_SERVER_API)
-
-    def generate_safekeeper_token(self) -> str:
-        return self.generate_token(scope=TokenScope.SAFEKEEPER_DATA)
-
-    # generate token giving access to only one tenant
-    def generate_tenant_token(self, tenant_id: TenantId) -> str:
-        return self.generate_token(scope=TokenScope.TENANT, tenant_id=str(tenant_id))
-
-
-# TODO: Replace with `StrEnum` when we upgrade to python 3.11
-class TokenScope(str, Enum):
-    ADMIN = "admin"
-    PAGE_SERVER_API = "pageserverapi"
-    GENERATIONS_API = "generations_api"
-    SAFEKEEPER_DATA = "safekeeperdata"
-    TENANT = "tenant"
-    SCRUBBER = "scrubber"
-
-
 class NeonEnvBuilder:
    """
    Builder object to create a Neon runtime environment
@@ -453,7 +371,6 @@ class NeonEnvBuilder:
        self,
        repo_dir: Path,
        port_distributor: PortDistributor,
-        broker: NeonBroker,
        run_id: uuid.UUID,
        mock_s3_server: MockS3Server,
        neon_binpath: Path,
@@ -494,7 +411,6 @@ class NeonEnvBuilder:
        # Safekeepers remote storage
        self.safekeepers_remote_storage: Optional[RemoteStorage] = None

-        self.broker = broker
        self.run_id = run_id
        self.mock_s3_server: MockS3Server = mock_s3_server
        self.pageserver_config_override = pageserver_config_override
@@ -933,8 +849,11 @@ class NeonEnvBuilder:

        for directory_to_clean in reversed(directories_to_clean):
            if not os.listdir(directory_to_clean):
-                log.debug(f"Removing empty directory {directory_to_clean}")
-                directory_to_clean.rmdir()
+                log.info(f"Removing empty directory {directory_to_clean}")
+                try:
+                    directory_to_clean.rmdir()
+                except Exception as e:
+                    log.error(f"Error removing empty directory {directory_to_clean}: {e}")

    def cleanup_remote_storage(self):
        for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
@@ -1003,6 +922,8 @@ class NeonEnvBuilder:

            self.env.storage_controller.assert_no_errors()

+            self.env.broker.assert_no_errors()
+
        try:
            self.overlay_cleanup_teardown()
        except Exception as e:
@@ -1056,7 +977,7 @@ class NeonEnv:
        self.endpoints = EndpointFactory(self)
        self.safekeepers: List[Safekeeper] = []
        self.pageservers: List[NeonPageserver] = []
-        self.broker = config.broker
+        self.broker = NeonBroker(self)
        self.pageserver_remote_storage = config.pageserver_remote_storage
        self.safekeepers_remote_storage = config.safekeepers_remote_storage
        self.pg_version = config.pg_version
@@ -1231,7 +1152,7 @@ class NeonEnv:
            max_workers=2 + len(self.pageservers) + len(self.safekeepers)
        ) as executor:
            futs.append(
-                executor.submit(lambda: self.broker.try_start() or None)
+                executor.submit(lambda: self.broker.start() or None)
            )  # The `or None` is for the linter

            for pageserver in self.pageservers:
@@ -1288,7 +1209,7 @@ class NeonEnv:
                pageserver.stop(immediate=immediate)
            except RuntimeError:
                stop_later.append(pageserver)
-        self.broker.stop(immediate=immediate)
+        self.broker.stop()

        # TODO: for nice logging we need python 3.11 ExceptionGroup
        for ps in stop_later:
@@ -1402,7 +1323,6 @@ def neon_simple_env(
    pytestconfig: Config,
    port_distributor: PortDistributor,
    mock_s3_server: MockS3Server,
-    default_broker: NeonBroker,
    run_id: uuid.UUID,
    top_output_dir: Path,
    test_output_dir: Path,
@@ -1427,7 +1347,6 @@ def neon_simple_env(
        top_output_dir=top_output_dir,
        repo_dir=repo_dir,
        port_distributor=port_distributor,
-        broker=default_broker,
        mock_s3_server=mock_s3_server,
        neon_binpath=neon_binpath,
        pg_distrib_dir=pg_distrib_dir,
@@ -1455,7 +1374,6 @@ def neon_env_builder(
    neon_binpath: Path,
    pg_distrib_dir: Path,
    pg_version: PgVersion,
-    default_broker: NeonBroker,
    run_id: uuid.UUID,
    request: FixtureRequest,
    test_overlay_dir: Path,
@@ -1491,7 +1409,6 @@ def neon_env_builder(
        neon_binpath=neon_binpath,
        pg_distrib_dir=pg_distrib_dir,
        pg_version=pg_version,
-        broker=default_broker,
        run_id=run_id,
        preserve_database_files=cast(bool, pytestconfig.getoption("--preserve-database-files")),
        pageserver_virtual_file_io_engine=pageserver_virtual_file_io_engine,
@@ -1913,6 +1830,18 @@ class NeonCli(AbstractNeonCli):
            args.extend(["-m", "immediate"])
        return self.raw_cli(args)

+    def broker_start(
+        self, timeout_in_seconds: Optional[int] = None
+    ) -> "subprocess.CompletedProcess[str]":
+        cmd = ["storage_broker", "start"]
+        if timeout_in_seconds is not None:
+            cmd.append(f"--start-timeout={timeout_in_seconds}s")
+        return self.raw_cli(cmd)
+
+    def broker_stop(self) -> "subprocess.CompletedProcess[str]":
+        cmd = ["storage_broker", "stop"]
+        return self.raw_cli(cmd)
+
    def endpoint_create(
        self,
        branch_name: str,
@@ -3335,12 +3264,12 @@ class PgBin:
        )
        return base_path

-    def get_pg_controldata_checkpoint_lsn(self, pgdata: str) -> Lsn:
+    def get_pg_controldata_checkpoint_lsn(self, pgdata: Path) -> Lsn:
        """
        Run pg_controldata on given datadir and extract checkpoint lsn.
        """

-        pg_controldata_path = os.path.join(self.pg_bin_path, "pg_controldata")
+        pg_controldata_path = self.pg_bin_path / "pg_controldata"
        cmd = f"{pg_controldata_path} -D {pgdata}"
        result = subprocess.run(cmd, capture_output=True, text=True, shell=True)
        checkpoint_lsn = re.findall(
@@ -3423,6 +3352,7 @@ class VanillaPostgres(PgProtocol):
        assert not self.running
        with open(os.path.join(self.pgdatadir, "postgresql.conf"), "a") as conf_file:
            conf_file.write("\n".join(options))
+            conf_file.write("\n")

    def edit_hba(self, hba: List[str]):
        """Prepend hba lines into pg_hba.conf file."""
@@ -3448,9 +3378,9 @@ class VanillaPostgres(PgProtocol):
        self.running = False
        self.pg_bin.run_capture(["pg_ctl", "-w", "-D", str(self.pgdatadir), "stop"])

-    def get_subdir_size(self, subdir) -> int:
+    def get_subdir_size(self, subdir: Path) -> int:
        """Return size of pgdatadir subdirectory in bytes."""
-        return get_dir_size(os.path.join(self.pgdatadir, subdir))
+        return get_dir_size(self.pgdatadir / subdir)

    def __enter__(self) -> "VanillaPostgres":
        return self
@@ -3476,6 +3406,7 @@ def vanilla_pg(
    pg_bin = PgBin(test_output_dir, pg_distrib_dir, pg_version)
    port = port_distributor.get_port()
    with VanillaPostgres(pgdatadir, pg_bin, port) as vanilla_pg:
+        vanilla_pg.configure(["shared_preload_libraries='neon_rmgr'"])
        yield vanilla_pg


@@ -3976,7 +3907,7 @@ class Endpoint(PgProtocol, LogUtils):
        self.env = env
        self.branch_name: Optional[str] = None  # dubious
        self.endpoint_id: Optional[str] = None  # dubious, see asserts below
-        self.pgdata_dir: Optional[str] = None  # Path to computenode PGDATA
+        self.pgdata_dir: Optional[Path] = None  # Path to computenode PGDATA
        self.tenant_id = tenant_id
        self.pg_port = pg_port
        self.http_port = http_port
@@ -4033,7 +3964,7 @@ class Endpoint(PgProtocol, LogUtils):
            allow_multiple=allow_multiple,
        )
        path = Path("endpoints") / self.endpoint_id / "pgdata"
-        self.pgdata_dir = os.path.join(self.env.repo_dir, path)
+        self.pgdata_dir = self.env.repo_dir / path
        self.logfile = self.endpoint_path() / "compute.log"

        config_lines = config_lines or []
@@ -4086,21 +4017,21 @@ class Endpoint(PgProtocol, LogUtils):
        path = Path("endpoints") / self.endpoint_id
        return self.env.repo_dir / path

-    def pg_data_dir_path(self) -> str:
+    def pg_data_dir_path(self) -> Path:
        """Path to Postgres data directory"""
-        return os.path.join(self.endpoint_path(), "pgdata")
+        return self.endpoint_path() / "pgdata"

-    def pg_xact_dir_path(self) -> str:
+    def pg_xact_dir_path(self) -> Path:
        """Path to pg_xact dir"""
-        return os.path.join(self.pg_data_dir_path(), "pg_xact")
+        return self.pg_data_dir_path() / "pg_xact"

-    def pg_twophase_dir_path(self) -> str:
+    def pg_twophase_dir_path(self) -> Path:
        """Path to pg_twophase dir"""
-        return os.path.join(self.pg_data_dir_path(), "pg_twophase")
+        return self.pg_data_dir_path() / "pg_twophase"

-    def config_file_path(self) -> str:
+    def config_file_path(self) -> Path:
        """Path to the postgresql.conf in the endpoint directory (not the one in pgdata)"""
-        return os.path.join(self.endpoint_path(), "postgresql.conf")
+        return self.endpoint_path() / "postgresql.conf"

    def config(self, lines: List[str]) -> "Endpoint":
        """
@@ -4155,7 +4086,7 @@ class Endpoint(PgProtocol, LogUtils):
            json.dump(dict(data_dict, **kwargs), file, indent=4)

    # Please note: Migrations only run if pg_skip_catalog_updates is false
-    def wait_for_migrations(self, num_migrations: int = 10):
+    def wait_for_migrations(self, num_migrations: int = 11):
        with self.cursor() as cur:

            def check_migrations_done():
@@ -4265,7 +4196,7 @@ class Endpoint(PgProtocol, LogUtils):
        log.info(f'checkpointing at LSN {self.safe_psql("select pg_current_wal_lsn()")[0][0]}')
        self.safe_psql("checkpoint")
        assert self.pgdata_dir is not None  # please mypy
-        return get_dir_size(os.path.join(self.pgdata_dir, "pg_wal")) / 1024 / 1024
+        return get_dir_size(self.pgdata_dir / "pg_wal") / 1024 / 1024

    def clear_shared_buffers(self, cursor: Optional[Any] = None):
        """
@@ -4634,6 +4565,40 @@ class Safekeeper(LogUtils):
        wait_until(20, 0.5, paused)


+class NeonBroker(LogUtils):
+    """An object managing storage_broker instance"""
+
+    def __init__(self, env: NeonEnv):
+        super().__init__(logfile=env.repo_dir / "storage_broker.log")
+        self.env = env
+        self.port: int = self.env.port_distributor.get_port()
+        self.running = False
+
+    def start(
+        self,
+        timeout_in_seconds: Optional[int] = None,
+    ):
+        assert not self.running
+        self.env.neon_cli.broker_start(timeout_in_seconds)
+        self.running = True
+        return self
+
+    def stop(self):
+        if self.running:
+            self.env.neon_cli.broker_stop()
+            self.running = False
+        return self
+
+    def listen_addr(self):
+        return f"127.0.0.1:{self.port}"
+
+    def client_url(self):
+        return f"http://{self.listen_addr()}"
+
+    def assert_no_errors(self):
+        assert_no_errors(self.logfile, "storage_controller", [])
+
+
 # TODO: Replace with `StrEnum` when we upgrade to python 3.11
 class NodeKind(str, Enum):
    PAGESERVER = "pageserver"
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;`