fix Payload deser

turns out we don't actually need to deser everything
proxy: use RawValue to lazily process inputs
2026-01-28 07:40:37 +00:00 · 2024-10-14 14:02:46 +01:00 · 2024-10-14 11:58:53 +01:00 · 2024-10-14 11:48:58 +01:00 · 2024-10-14 11:44:18 +01:00 · 2024-10-14 11:42:20 +01:00
319 changed files with 5846 additions and 3239 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -5,9 +5,7 @@
 !Cargo.toml
 !Makefile
 !rust-toolchain.toml
-!scripts/combine_control_files.py
 !scripts/ninstall.sh
-!vm-cgconfig.conf
 !docker-compose/run-tests.sh

 # Directories
@@ -17,15 +15,12 @@
 !compute_tools/
 !control_plane/
 !libs/
-!neon_local/
 !pageserver/
-!patches/
 !pgxn/
 !proxy/
 !storage_scrubber/
 !safekeeper/
 !storage_broker/
 !storage_controller/
-!trace/
 !vendor/postgres-*/
 !workspace_hack/
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -218,6 +218,9 @@ runs:
        name: compatibility-snapshot-${{ runner.arch }}-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
+        # The lack of compatibility snapshot shouldn't fail the job
+        # (for example if we didn't run the test for non build-and-test workflow)
+        skip-if-does-not-exist: true

    - name: Upload test results
      if: ${{ !cancelled() }}
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -7,6 +7,10 @@ inputs:
  path:
    description: "A directory or file to upload"
    required: true
+  skip-if-does-not-exist:
+    description: "Allow to skip if path doesn't exist, fail otherwise"
+    default: false
+    required: false
  prefix:
    description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
@@ -15,10 +19,12 @@ runs:
  using: "composite"
  steps:
    - name: Prepare artifact
+      id: prepare-artifact
      shell: bash -euxo pipefail {0}
      env:
        SOURCE: ${{ inputs.path }}
        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
+        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
      run: |
        mkdir -p $(dirname $ARCHIVE)

@@ -33,14 +39,22 @@ runs:
        elif [ -f ${SOURCE} ]; then
          time tar -cf ${ARCHIVE} --zstd ${SOURCE}
        elif ! ls ${SOURCE} > /dev/null 2>&1; then
-          echo >&2 "${SOURCE} does not exist"
-          exit 2
+          if [ "${SKIP_IF_DOES_NOT_EXIST}" = "true" ]; then
+            echo 'SKIPPED=true' >> $GITHUB_OUTPUT
+            exit 0
+          else
+            echo >&2 "${SOURCE} does not exist"
+            exit 2
+          fi
        else
          echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
          exit 3
        fi

+        echo 'SKIPPED=false' >> $GITHUB_OUTPUT
+
    - name: Upload artifact
+      if: ${{ steps.prepare-artifact.outputs.SKIPPED == 'false' }}
      shell: bash -euxo pipefail {0}
      env:
        SOURCE: ${{ inputs.path }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -193,16 +193,15 @@ jobs:
        with:
          submodules: true

-#      Disabled for now
-#      - name: Restore cargo deps cache
-#        id: cache_cargo
-#        uses: actions/cache@v4
-#        with:
-#          path: |
-#            !~/.cargo/registry/src
-#            ~/.cargo/git/
-#            target/
-#          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-clippy-${{ hashFiles('rust-toolchain.toml') }}-${{ hashFiles('Cargo.lock') }}
+      - name: Cache cargo deps
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            !~/.cargo/registry/src
+            ~/.cargo/git
+            target
+          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust

      # Some of our rust modules use FFI and need those to be checked
      - name: Get postgres headers
--- a/.github/workflows/report-workflow-stats.yml
+++ b/.github/workflows/report-workflow-stats.yml
@@ -0,0 +1,41 @@
+name: Report Workflow Stats
+
+on:
+  workflow_run:
+    workflows:
+    - Add `external` label to issues and PRs created by external users
+    - Benchmarking
+    - Build and Test
+    - Build and Test Locally
+    - Build build-tools image
+    - Check Permissions
+    - Check build-tools image
+    - Check neon with extra platform builds
+    - Cloud Regression Test
+    - Create Release Branch
+    - Handle `approved-for-ci-run` label
+    - Lint GitHub Workflows
+    - Notify Slack channel about upcoming release
+    - Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
+    - Pin build-tools image
+    - Prepare benchmarking databases by restoring dumps
+    - Push images to ACR
+    - Test Postgres client libraries
+    - Trigger E2E Tests
+    - cleanup caches by a branch
+    types: [completed]
+
+jobs:
+  gh-workflow-stats:
+    name: Github Workflow Stats
+    runs-on: ubuntu-22.04
+    permissions:
+      actions: read
+    steps:
+    - name: Export GH Workflow Stats
+      uses: neondatabase/gh-workflow-stats-action@v0.1.4
+      with:
+        DB_URI: ${{ secrets.GH_REPORT_STATS_DB_RW_CONNSTR }}
+        DB_TABLE: "gh_workflow_stats_neon"
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GH_RUN_ID: ${{ github.event.workflow_run.id }}
--- a/1
+++ b/1
@@ -1,5 +1,6 @@
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
 /storage_controller @neondatabase/storage
+/storage_scrubber @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
 /libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
 /libs/remote_storage/ @neondatabase/storage
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -666,34 +666,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum"
-version = "0.6.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf"
-dependencies = [
- "async-trait",
- "axum-core 0.3.4",
- "bitflags 1.3.2",
- "bytes",
- "futures-util",
- "http 0.2.9",
- "http-body 0.4.5",
- "hyper 0.14.30",
- "itoa",
- "matchit 0.7.0",
- "memchr",
- "mime",
- "percent-encoding",
- "pin-project-lite",
- "rustversion",
- "serde",
- "sync_wrapper 0.1.2",
- "tower",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum"
 version = "0.7.5"
@@ -701,7 +673,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
 dependencies = [
 "async-trait",
- "axum-core 0.4.5",
+ "axum-core",
 "base64 0.21.1",
 "bytes",
 "futures-util",
@@ -731,23 +703,6 @@ dependencies = [
 "tracing",
 ]

-[[package]]
-name = "axum-core"
-version = "0.3.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c"
-dependencies = [
- "async-trait",
- "bytes",
- "futures-util",
- "http 0.2.9",
- "http-body 0.4.5",
- "mime",
- "rustversion",
- "tower-layer",
- "tower-service",
-]
-
 [[package]]
 name = "axum-core"
 version = "0.4.5"
@@ -971,7 +926,7 @@ dependencies = [
 "clang-sys",
 "itertools 0.12.1",
 "log",
- "prettyplease 0.2.17",
+ "prettyplease",
 "proc-macro2",
 "quote",
 "regex",
@@ -1865,6 +1820,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47"
 dependencies = [
 "base16ct 0.2.0",
+ "base64ct",
 "crypto-bigint 0.5.5",
 "digest",
 "ff 0.13.0",
@@ -1874,6 +1830,8 @@ dependencies = [
 "pkcs8 0.10.2",
 "rand_core 0.6.4",
 "sec1 0.7.3",
+ "serde_json",
+ "serdect",
 "subtle",
 "zeroize",
 ]
@@ -2454,15 +2412,6 @@ dependencies = [
 "digest",
 ]

-[[package]]
-name = "home"
-version = "0.5.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
-dependencies = [
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "hostname"
 version = "0.4.0"
@@ -2657,14 +2606,15 @@ dependencies = [

 [[package]]
 name = "hyper-timeout"
-version = "0.4.1"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1"
+checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
 dependencies = [
- "hyper 0.14.30",
+ "hyper 1.4.1",
+ "hyper-util",
 "pin-project-lite",
 "tokio",
- "tokio-io-timeout",
+ "tower-service",
 ]

 [[package]]
@@ -3470,7 +3420,7 @@ dependencies = [
 "opentelemetry-http",
 "opentelemetry-proto",
 "opentelemetry_sdk",
- "prost 0.13.3",
+ "prost",
 "reqwest 0.12.4",
 "thiserror",
 ]
@@ -3483,8 +3433,8 @@ checksum = "30ee9f20bff9c984511a02f082dc8ede839e4a9bf15cc2487c8d6fea5ad850d9"
 dependencies = [
 "opentelemetry",
 "opentelemetry_sdk",
- "prost 0.13.3",
- "tonic 0.12.3",
+ "prost",
+ "tonic",
 ]

 [[package]]
@@ -4090,6 +4040,8 @@ dependencies = [
 "bytes",
 "fallible-iterator",
 "postgres-protocol",
+ "serde",
+ "serde_json",
 ]

 [[package]]
@@ -4178,16 +4130,6 @@ dependencies = [
 "tokio",
 ]

-[[package]]
-name = "prettyplease"
-version = "0.1.25"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86"
-dependencies = [
- "proc-macro2",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "prettyplease"
 version = "0.2.17"
@@ -4258,16 +4200,6 @@ dependencies = [
 "thiserror",
 ]

-[[package]]
-name = "prost"
-version = "0.11.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd"
-dependencies = [
- "bytes",
- "prost-derive 0.11.9",
-]
-
 [[package]]
 name = "prost"
 version = "0.13.3"
@@ -4275,42 +4207,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f"
 dependencies = [
 "bytes",
- "prost-derive 0.13.3",
+ "prost-derive",
 ]

 [[package]]
 name = "prost-build"
-version = "0.11.9"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270"
+checksum = "0c1318b19085f08681016926435853bbf7858f9c082d0999b80550ff5d9abe15"
 dependencies = [
 "bytes",
- "heck 0.4.1",
- "itertools 0.10.5",
- "lazy_static",
+ "heck 0.5.0",
+ "itertools 0.12.1",
 "log",
 "multimap",
+ "once_cell",
 "petgraph",
- "prettyplease 0.1.25",
- "prost 0.11.9",
+ "prettyplease",
+ "prost",
 "prost-types",
 "regex",
- "syn 1.0.109",
+ "syn 2.0.52",
 "tempfile",
- "which",
-]
-
-[[package]]
-name = "prost-derive"
-version = "0.11.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4"
-dependencies = [
- "anyhow",
- "itertools 0.10.5",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
 ]

 [[package]]
@@ -4328,11 +4246,11 @@ dependencies = [

 [[package]]
 name = "prost-types"
-version = "0.11.9"
+version = "0.13.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13"
+checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670"
 dependencies = [
- "prost 0.11.9",
+ "prost",
 ]

 [[package]]
@@ -5094,6 +5012,21 @@ dependencies = [
 "zeroize",
 ]

+[[package]]
+name = "rustls"
+version = "0.23.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebbbdb961df0ad3f2652da8f3fdc4b36122f568f968f45ad3316f26c025c677b"
+dependencies = [
+ "log",
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki 0.102.2",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "rustls-native-certs"
 version = "0.6.2"
@@ -5119,6 +5052,19 @@ dependencies = [
 "security-framework",
 ]

+[[package]]
+name = "rustls-native-certs"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fcaf18a4f2be7326cd874a5fa579fae794320a0f388d365dca7e480e55f83f8a"
+dependencies = [
+ "openssl-probe",
+ "rustls-pemfile 2.1.1",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework",
+]
+
 [[package]]
 name = "rustls-pemfile"
 version = "1.0.2"
@@ -5194,6 +5140,7 @@ dependencies = [
 "fail",
 "futures",
 "hex",
+ "http 1.1.0",
 "humantime",
 "hyper 0.14.30",
 "metrics",
@@ -5314,6 +5261,7 @@ dependencies = [
 "der 0.7.8",
 "generic-array",
 "pkcs8 0.10.2",
+ "serdect",
 "subtle",
 "zeroize",
 ]
@@ -5568,6 +5516,16 @@ dependencies = [
 "syn 2.0.52",
 ]

+[[package]]
+name = "serdect"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a84f14a19e9a014bb9f4512488d9829a68e04ecabffb0f9904cd1ace94598177"
+dependencies = [
+ "base16ct 0.2.0",
+ "serde",
+]
+
 [[package]]
 name = "sha1"
 version = "0.10.5"
@@ -5750,19 +5708,22 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "async-stream",
+ "bytes",
 "clap",
 "const_format",
 "futures",
 "futures-core",
 "futures-util",
+ "http-body-util",
 "humantime",
- "hyper 0.14.30",
+ "hyper 1.4.1",
+ "hyper-util",
 "metrics",
 "once_cell",
 "parking_lot 0.12.1",
- "prost 0.11.9",
+ "prost",
 "tokio",
- "tonic 0.9.2",
+ "tonic",
 "tonic-build",
 "tracing",
 "utils",
@@ -6306,6 +6267,17 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tokio-rustls"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
+dependencies = [
+ "rustls 0.23.7",
+ "rustls-pki-types",
+ "tokio",
+]
+
 [[package]]
 name = "tokio-stream"
 version = "0.1.16"
@@ -6397,29 +6369,30 @@ dependencies = [

 [[package]]
 name = "tonic"
-version = "0.9.2"
+version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
+checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
 dependencies = [
 "async-stream",
 "async-trait",
- "axum 0.6.20",
- "base64 0.21.1",
+ "axum",
+ "base64 0.22.1",
 "bytes",
- "futures-core",
- "futures-util",
- "h2 0.3.26",
- "http 0.2.9",
- "http-body 0.4.5",
- "hyper 0.14.30",
+ "h2 0.4.4",
+ "http 1.1.0",
+ "http-body 1.0.0",
+ "http-body-util",
+ "hyper 1.4.1",
 "hyper-timeout",
+ "hyper-util",
 "percent-encoding",
 "pin-project",
- "prost 0.11.9",
- "rustls-native-certs 0.6.2",
- "rustls-pemfile 1.0.2",
+ "prost",
+ "rustls-native-certs 0.8.0",
+ "rustls-pemfile 2.1.1",
+ "socket2",
 "tokio",
- "tokio-rustls 0.24.0",
+ "tokio-rustls 0.26.0",
 "tokio-stream",
 "tower",
 "tower-layer",
@@ -6428,37 +6401,17 @@ dependencies = [
 ]

 [[package]]
-name = "tonic"
+name = "tonic-build"
 version = "0.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52"
+checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11"
 dependencies = [
- "async-trait",
- "base64 0.22.1",
- "bytes",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "percent-encoding",
- "pin-project",
- "prost 0.13.3",
- "tokio-stream",
- "tower-layer",
- "tower-service",
- "tracing",
-]
-
-[[package]]
-name = "tonic-build"
-version = "0.9.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6fdaae4c2c638bb70fe42803a26fbd6fc6ac8c72f5c59f67ecc2a2dcabf4b07"
-dependencies = [
- "prettyplease 0.1.25",
+ "prettyplease",
 "proc-macro2",
 "prost-build",
+ "prost-types",
 "quote",
- "syn 1.0.109",
+ "syn 2.0.52",
 ]

 [[package]]
@@ -6864,7 +6817,7 @@ name = "vm_monitor"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "axum 0.7.5",
+ "axum",
 "cgroups-rs",
 "clap",
 "futures",
@@ -7095,18 +7048,6 @@ dependencies = [
 "rustls-pki-types",
 ]

-[[package]]
-name = "which"
-version = "4.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
-dependencies = [
- "either",
- "home",
- "once_cell",
- "rustix",
-]
-
 [[package]]
 name = "whoami"
 version = "1.5.1"
@@ -7335,9 +7276,10 @@ version = "0.1.0"
 dependencies = [
 "ahash",
 "anyhow",
+ "axum",
+ "axum-core",
 "base64 0.21.1",
 "base64ct",
- "bitflags 2.4.1",
 "bytes",
 "camino",
 "cc",
@@ -7365,7 +7307,6 @@ dependencies = [
 "hyper 1.4.1",
 "hyper-util",
 "indexmap 1.9.3",
- "itertools 0.10.5",
 "itertools 0.12.1",
 "lazy_static",
 "libc",
@@ -7377,15 +7318,16 @@ dependencies = [
 "num-traits",
 "once_cell",
 "parquet",
+ "postgres-types",
+ "prettyplease",
 "proc-macro2",
- "prost 0.11.9",
+ "prost",
 "quote",
 "rand 0.8.5",
 "regex",
 "regex-automata 0.4.3",
 "regex-syntax 0.8.2",
 "reqwest 0.12.4",
- "rustls 0.21.11",
 "scopeguard",
 "serde",
 "serde_json",
@@ -7401,9 +7343,11 @@ dependencies = [
 "time",
 "time-macros",
 "tokio",
- "tokio-rustls 0.24.0",
+ "tokio-postgres",
+ "tokio-stream",
 "tokio-util",
 "toml_edit",
+ "tonic",
 "tower",
 "tracing",
 "tracing-core",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -130,7 +130,7 @@ pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
 procfs = "0.16"
 prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
-prost = "0.11"
+prost = "0.13"
 rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
@@ -178,7 +178,7 @@ tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.8"
 toml_edit = "0.22"
-tonic = {version = "0.9", features = ["tls", "tls-roots"]}
+tonic = {version = "0.12.3", features = ["tls", "tls-roots"]}
 tower-service = "0.3.2"
 tracing = "0.1"
 tracing-error = "0.2"
@@ -246,7 +246,7 @@ criterion = "0.5.1"
 rcgen = "0.12"
 rstest = "0.18"
 camino-tempfile = "1.0.2"
-tonic-build = "0.9"
+tonic-build = "0.12"

 [patch.crates-io]

--- a/14
+++ b/14
@@ -168,27 +168,27 @@ postgres-check-%: postgres-%
 neon-pg-ext-%: postgres-%
 	+@echo "Compiling neon $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
 	+@echo "Compiling neon_walredo $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile install
 	+@echo "Compiling neon_rmgr $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-rmgr-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_rmgr/Makefile install
 	+@echo "Compiling neon_test_utils $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install
 	+@echo "Compiling neon_utils $*"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/neon-utils-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install

@@ -220,7 +220,7 @@ neon-pg-clean-ext-%:
 walproposer-lib: neon-pg-ext-v17
 	+@echo "Compiling walproposer-lib"
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/walproposer-lib \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile walproposer-lib
 	cp $(POSTGRES_INSTALL_DIR)/v17/lib/libpgport.a $(POSTGRES_INSTALL_DIR)/build/walproposer-lib
@@ -333,7 +333,7 @@ postgres-%-pgindent: postgres-%-pg-bsd-indent postgres-%-typedefs.list
 # Indent pxgn/neon.
 .PHONY: neon-pgindent
 neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v17/bin/pg_config COPT='$(COPT)' \
 		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
 		INDENT=$(POSTGRES_INSTALL_DIR)/build/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
 		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -109,13 +109,30 @@ RUN apt update && \
    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
    protobuf-c-compiler xsltproc

+
+# Postgis 3.5.0 requires SFCGAL 1.4+
+#
+# It would be nice to update all versions together, but we must solve the SFCGAL dependency first.
 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
-RUN case "${PG_VERSION}" in "v17") \
-    mkdir -p /sfcgal && \
-    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
+# and also we must check backward compatibility with older versions of PostGIS.
+#
+# Use new version only for v17
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export SFCGAL_VERSION=1.4.1 \
+        export SFCGAL_CHECKSUM=1800c8a26241588f11cddcf433049e9b9aea902e923414d2ecef33a3295626c3 \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export SFCGAL_VERSION=1.3.10 \
+        export SFCGAL_CHECKSUM=4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
-    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
+    mkdir -p /sfcgal && \
+    wget https://gitlab.com/sfcgal/SFCGAL/-/archive/v${SFCGAL_VERSION}/SFCGAL-v${SFCGAL_VERSION}.tar.gz -O SFCGAL.tar.gz && \
+    echo "${SFCGAL_CHECKSUM} SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
@@ -123,15 +140,27 @@ RUN case "${PG_VERSION}" in "v17") \

 ENV PATH="/usr/local/pgsql/bin:$PATH"

-RUN case "${PG_VERSION}" in "v17") \
-    echo "Postgis doensn't yet support PG17 (needs 3.4.3, if not higher)" && exit 0;; \
+# Postgis 3.5.0 supports v17
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export POSTGIS_VERSION=3.5.0 \
+        export POSTGIS_CHECKSUM=ca698a22cc2b2b3467ac4e063b43a28413f3004ddd505bdccdd74c56a647f510 \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export POSTGIS_VERSION=3.3.3 \
+        export POSTGIS_CHECKSUM=74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
-    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
+    wget https://download.osgeo.org/postgis/source/postgis-${POSTGIS_VERSION}.tar.gz -O postgis.tar.gz && \
+    echo "${POSTGIS_CHECKSUM} postgis.tar.gz" | sha256sum --check && \
    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    cd extensions/postgis && \
    make clean && \
@@ -152,11 +181,27 @@ RUN case "${PG_VERSION}" in "v17") \
    cp /usr/local/pgsql/share/extension/address_standardizer.control /extensions/postgis && \
    cp /usr/local/pgsql/share/extension/address_standardizer_data_us.control /extensions/postgis

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
+# Uses versioned libraries, i.e. libpgrouting-3.4
+# and may introduce function signature changes between releases
+# i.e. release 3.5.0 has new signature for pg_dijkstra function
+#
+# Use new version only for v17
+# last release v3.6.2 - Mar 30, 2024
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export PGROUTING_VERSION=3.6.2 \
+        export PGROUTING_CHECKSUM=f4a1ed79d6f714e52548eca3bb8e5593c6745f1bde92eb5fb858efd8984dffa2 \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export PGROUTING_VERSION=3.4.2 \
+        export PGROUTING_CHECKSUM=cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
-    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
+    wget https://github.com/pgRouting/pgrouting/archive/v${PGROUTING_VERSION}.tar.gz -O pgrouting.tar.gz && \
+    echo "${PGROUTING_CHECKSUM} pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake -DCMAKE_BUILD_TYPE=Release .. && \
@@ -215,10 +260,9 @@ FROM build-deps AS h3-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-        mkdir -p /h3/usr/ && \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
+# not version-specific
+# last release v4.1.0 - Jan 18, 2023
+RUN mkdir -p /h3/usr/ && \
    wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
@@ -229,10 +273,9 @@ RUN case "${PG_VERSION}" in "v17") \
    cp -R /h3/usr / && \
    rm -rf build

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
+# not version-specific
+# last release v4.1.3 - Jul 26, 2023
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
@@ -251,11 +294,10 @@ FROM build-deps AS unit-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
-    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
+# not version-specific
+# last release 7.9 - Sep 15, 2024
+RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.9.tar.gz -O postgresql-unit.tar.gz && \
+    echo "e46de6245dcc8b2c2ecf29873dbd43b2b346773f31dd5ce4b8315895a052b456 postgresql-unit.tar.gz" | sha256sum --check && \
    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -302,12 +344,10 @@ FROM build-deps AS pgjwt-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
-    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
+# not version-specific
+# doesn't use releases, last commit f3d82fd - Mar 2, 2023 
+RUN wget https://github.com/michelp/pgjwt/archive/f3d82fd30151e754e19ce5d6a06c71c20689ce3d.tar.gz -O pgjwt.tar.gz && \
+    echo "dae8ed99eebb7593b43013f6532d772b12dfecd55548d2673f2dfd0163f6d2b9 pgjwt.tar.gz" | sha256sum --check && \
    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
@@ -342,10 +382,9 @@ FROM build-deps AS pg-hashids-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+# not version-specific
+# last release v1.2.1 -Jan 12, 2018
+RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -405,10 +444,9 @@ FROM build-deps AS ip4r-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
+# not version-specific
+# last release v2.4.2 - Jul 29, 2023
+RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -425,10 +463,9 @@ FROM build-deps AS prefix-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
+# not version-specific
+# last release v1.2.10  - Jul 5, 2023
+RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -445,10 +482,9 @@ FROM build-deps AS hll-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
+# not version-specific
+# last release v2.18 - Aug 29, 2023
+RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -659,11 +695,10 @@ FROM build-deps AS pg-roaringbitmap-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# not version-specific
+# last release v0.5.4 - Jun 28, 2022
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 extensions is not supported yet by pg_roaringbitmap. Quit" && exit 0;; \
-    esac && \
-    wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
+RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -680,12 +715,27 @@ FROM build-deps AS pg-semver-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

+# Release 0.40.0 breaks backward compatibility with previous versions
+# see release note https://github.com/theory/pg-semver/releases/tag/v0.40.0
+# Use new version only for v17
+#
+# last release v0.40.0 - Jul 22, 2024
 ENV PATH="/usr/local/pgsql/bin/:$PATH"
-RUN case "${PG_VERSION}" in "v17") \
-    echo "v17 is not supported yet by pg_semver. Quit" && exit 0;; \
+RUN case "${PG_VERSION}" in \
+    "v17") \
+        export SEMVER_VERSION=0.40.0 \
+        export SEMVER_CHECKSUM=3e50bcc29a0e2e481e7b6d2bc937cadc5f5869f55d983b5a1aafeb49f5425cfc \
+    ;; \
+    "v14" | "v15" | "v16") \
+        export SEMVER_VERSION=0.32.1 \
+        export SEMVER_CHECKSUM=fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 \
+    ;; \
+    *) \
+        echo "unexpected PostgreSQL version" && exit 1 \
+    ;; \
    esac && \
-    wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
-    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
+    wget https://github.com/theory/pg-semver/archive/refs/tags/v${SEMVER_VERSION}.tar.gz -O pg_semver.tar.gz && \
+    echo "${SEMVER_CHECKSUM} pg_semver.tar.gz" | sha256sum --check && \
    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -402,8 +402,7 @@ fn start_postgres(
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
-    state.status = ComputeStatus::Init;
-    compute.state_changed.notify_all();
+    state.set_status(ComputeStatus::Init, &compute.state_changed);

    info!(
        "running compute with features: {:?}",
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -109,6 +109,18 @@ impl ComputeState {
            metrics: ComputeMetrics::default(),
        }
    }
+
+    pub fn set_status(&mut self, status: ComputeStatus, state_changed: &Condvar) {
+        let prev = self.status;
+        info!("Changing compute status from {} to {}", prev, status);
+        self.status = status;
+        state_changed.notify_all();
+    }
+
+    pub fn set_failed_status(&mut self, err: anyhow::Error, state_changed: &Condvar) {
+        self.error = Some(format!("{err:?}"));
+        self.set_status(ComputeStatus::Failed, state_changed);
+    }
 }

 impl Default for ComputeState {
@@ -303,15 +315,12 @@ impl ComputeNode {

    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
-        state.status = status;
-        self.state_changed.notify_all();
+        state.set_status(status, &self.state_changed);
    }

    pub fn set_failed_status(&self, err: anyhow::Error) {
        let mut state = self.state.lock().unwrap();
-        state.error = Some(format!("{err:?}"));
-        state.status = ComputeStatus::Failed;
-        self.state_changed.notify_all();
+        state.set_failed_status(err, &self.state_changed);
    }

    pub fn get_status(&self) -> ComputeStatus {
@@ -1475,6 +1484,28 @@ LIMIT 100",
            info!("Pageserver config changed");
        }
    }
+
+    // Gather info about installed extensions
+    pub fn get_installed_extensions(&self) -> Result<()> {
+        let connstr = self.connstr.clone();
+
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .expect("failed to create runtime");
+        let result = rt
+            .block_on(crate::installed_extensions::get_installed_extensions(
+                connstr,
+            ))
+            .expect("failed to get installed extensions");
+
+        info!(
+            "{}",
+            serde_json::to_string(&result).expect("failed to serialize extensions list")
+        );
+
+        Ok(())
+    }
 }

 pub fn forward_termination_signal() {
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -24,8 +24,7 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
        // Re-check the status after waking up
        if state.status == ComputeStatus::ConfigurationPending {
            info!("got configuration request");
-            state.status = ComputeStatus::Configuration;
-            compute.state_changed.notify_all();
+            state.set_status(ComputeStatus::Configuration, &compute.state_changed);
            drop(state);

            let mut new_status = ComputeStatus::Failed;
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -165,6 +165,32 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

+        // get the list of installed extensions
+        // currently only used in python tests
+        // TODO: call it from cplane
+        (&Method::GET, "/installed_extensions") => {
+            info!("serving /installed_extensions GET request");
+            let status = compute.get_status();
+            if status != ComputeStatus::Running {
+                let msg = format!(
+                    "invalid compute status for extensions request: {:?}",
+                    status
+                );
+                error!(msg);
+                return Response::new(Body::from(msg));
+            }
+
+            let connstr = compute.connstr.clone();
+            let res = crate::installed_extensions::get_installed_extensions(connstr).await;
+            match res {
+                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
+                Err(e) => render_json_error(
+                    &format!("could not get list of installed extensions: {}", e),
+                    StatusCode::INTERNAL_SERVER_ERROR,
+                ),
+            }
+        }
+
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
@@ -288,8 +314,7 @@ async fn handle_configure_request(
                return Err((msg, StatusCode::PRECONDITION_FAILED));
            }
            state.pspec = Some(parsed_spec);
-            state.status = ComputeStatus::ConfigurationPending;
-            compute.state_changed.notify_all();
+            state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
            drop(state);
            info!("set new spec and notified waiters");
        }
@@ -362,15 +387,15 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
        }
        if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
            let msg = format!(
-                "invalid compute status for termination request: {:?}",
-                state.status.clone()
+                "invalid compute status for termination request: {}",
+                state.status
            );
            return Err((msg, StatusCode::PRECONDITION_FAILED));
        }
-        state.status = ComputeStatus::TerminationPending;
-        compute.state_changed.notify_all();
+        state.set_status(ComputeStatus::TerminationPending, &compute.state_changed);
        drop(state);
    }
+
    forward_termination_signal();
    info!("sent signal and notified waiters");

@@ -384,7 +409,8 @@ async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (Str
        while state.status != ComputeStatus::Terminated {
            state = c.state_changed.wait(state).unwrap();
            info!(
-                "waiting for compute to become Terminated, current status: {:?}",
+                "waiting for compute to become {}, current status: {:?}",
+                ComputeStatus::Terminated,
                state.status
            );
        }
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -53,6 +53,20 @@ paths:
              schema:
                $ref: "#/components/schemas/ComputeInsights"

+  /installed_extensions:
+    get:
+      tags:
+      - Info
+      summary: Get installed extensions.
+      description: ""
+      operationId: getInstalledExtensions
+      responses:
+        200:
+          description: List of installed extensions
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/InstalledExtensions"
  /info:
    get:
      tags:
@@ -395,6 +409,24 @@ components:
        - configuration
      example: running

+    InstalledExtensions:
+      type: object
+      properties:
+        extensions:
+          description: Contains list of installed extensions.
+          type: array
+          items:
+            type: object
+            properties:
+              extname:
+                type: string
+              versions:
+                type: array
+                items:
+                  type: string
+              n_databases:
+                type: integer
+
    #
    # Errors
    #
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -0,0 +1,80 @@
+use compute_api::responses::{InstalledExtension, InstalledExtensions};
+use std::collections::HashMap;
+use std::collections::HashSet;
+use url::Url;
+
+use anyhow::Result;
+use postgres::{Client, NoTls};
+use tokio::task;
+
+/// We don't reuse get_existing_dbs() just for code clarity
+/// and to make database listing query here more explicit.
+///
+/// Limit the number of databases to 500 to avoid excessive load.
+fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
+    // `pg_database.datconnlimit = -2` means that the database is in the
+    // invalid state
+    let databases = client
+        .query(
+            "SELECT datname FROM pg_catalog.pg_database
+                WHERE datallowconn
+                AND datconnlimit <> - 2
+                LIMIT 500",
+            &[],
+        )?
+        .iter()
+        .map(|row| {
+            let db: String = row.get("datname");
+            db
+        })
+        .collect();
+
+    Ok(databases)
+}
+
+/// Connect to every database (see list_dbs above) and get the list of installed extensions.
+/// Same extension can be installed in multiple databases with different versions,
+/// we only keep the highest and lowest version across all databases.
+pub async fn get_installed_extensions(connstr: Url) -> Result<InstalledExtensions> {
+    let mut connstr = connstr.clone();
+
+    task::spawn_blocking(move || {
+        let mut client = Client::connect(connstr.as_str(), NoTls)?;
+        let databases: Vec<String> = list_dbs(&mut client)?;
+
+        let mut extensions_map: HashMap<String, InstalledExtension> = HashMap::new();
+        for db in databases.iter() {
+            connstr.set_path(db);
+            let mut db_client = Client::connect(connstr.as_str(), NoTls)?;
+            let extensions: Vec<(String, String)> = db_client
+                .query(
+                    "SELECT extname, extversion FROM pg_catalog.pg_extension;",
+                    &[],
+                )?
+                .iter()
+                .map(|row| (row.get("extname"), row.get("extversion")))
+                .collect();
+
+            for (extname, v) in extensions.iter() {
+                let version = v.to_string();
+                extensions_map
+                    .entry(extname.to_string())
+                    .and_modify(|e| {
+                        e.versions.insert(version.clone());
+                        // count the number of databases where the extension is installed
+                        e.n_databases += 1;
+                    })
+                    .or_insert(InstalledExtension {
+                        extname: extname.to_string(),
+                        versions: HashSet::from([version.clone()]),
+                        n_databases: 1,
+                    });
+            }
+        }
+
+        Ok(InstalledExtensions {
+            extensions: extensions_map.values().cloned().collect(),
+        })
+    })
+    .await?
+}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -15,6 +15,7 @@ pub mod catalog;
 pub mod compute;
 pub mod disk_quota;
 pub mod extension_server;
+pub mod installed_extensions;
 pub mod local_proxy;
 pub mod lsn_lease;
 mod migration;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,3 +1,4 @@
+use std::collections::HashSet;
 use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;
@@ -189,6 +190,15 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

+    let mut jwks_roles = HashSet::new();
+    if let Some(local_proxy) = &spec.local_proxy_config {
+        for jwks_setting in local_proxy.jwks.iter().flatten() {
+            for role_name in &jwks_setting.role_names {
+                jwks_roles.insert(role_name.clone());
+            }
+        }
+    }
+
    // Print a list of existing Postgres roles (only in debug mode)
    if span_enabled!(Level::INFO) {
        let mut vec = Vec::new();
@@ -308,6 +318,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                    name.pg_quote()
                );
+                if jwks_roles.contains(name.as_str()) {
+                    query = format!("CREATE ROLE {}", name.pg_quote());
+                }
                info!("running role create query: '{}'", &query);
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,5 +1,8 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

+use std::collections::HashSet;
+use std::fmt::Display;
+
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize, Serializer};

@@ -58,6 +61,21 @@ pub enum ComputeStatus {
    Terminated,
 }

+impl Display for ComputeStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            ComputeStatus::Empty => f.write_str("empty"),
+            ComputeStatus::ConfigurationPending => f.write_str("configuration-pending"),
+            ComputeStatus::Init => f.write_str("init"),
+            ComputeStatus::Running => f.write_str("running"),
+            ComputeStatus::Configuration => f.write_str("configuration"),
+            ComputeStatus::Failed => f.write_str("failed"),
+            ComputeStatus::TerminationPending => f.write_str("termination-pending"),
+            ComputeStatus::Terminated => f.write_str("terminated"),
+        }
+    }
+}
+
 fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
@@ -138,3 +156,15 @@ pub enum ControlPlaneComputeStatus {
    // should be able to start with provided spec.
    Attached,
 }
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct InstalledExtension {
+    pub extname: String,
+    pub versions: HashSet<String>,
+    pub n_databases: u32, // Number of databases using this extension
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct InstalledExtensions {
+    pub extensions: Vec<InstalledExtension>,
+}
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -104,8 +104,7 @@ pub struct ConfigToml {
    pub image_compression: ImageCompressionAlgorithm,
    pub ephemeral_bytes_per_memory_kb: usize,
    pub l0_flush: Option<crate::models::L0FlushConfig>,
-    pub virtual_file_direct_io: crate::models::virtual_file::DirectIoMode,
-    pub io_buffer_alignment: usize,
+    pub virtual_file_io_mode: Option<crate::models::virtual_file::IoMode>,
 }

 #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
@@ -388,10 +387,7 @@ impl Default for ConfigToml {
            image_compression: (DEFAULT_IMAGE_COMPRESSION),
            ephemeral_bytes_per_memory_kb: (DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB),
            l0_flush: None,
-            virtual_file_direct_io: crate::models::virtual_file::DirectIoMode::default(),
-
-            io_buffer_alignment: DEFAULT_IO_BUFFER_ALIGNMENT,
-
+            virtual_file_io_mode: None,
            tenant_config: TenantConfigToml::default(),
        }
    }
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -972,8 +972,6 @@ pub struct TopTenantShardsResponse {
 }

 pub mod virtual_file {
-    use std::path::PathBuf;
-
    #[derive(
        Copy,
        Clone,
@@ -994,50 +992,45 @@ pub mod virtual_file {
    }

    /// Direct IO modes for a pageserver.
-    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
-    #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)]
-    pub enum DirectIoMode {
-        /// Direct IO disabled (uses usual buffered IO).
-        #[default]
-        Disabled,
-        /// Direct IO disabled (performs checks and perf simulations).
-        Evaluate {
-            /// Alignment check level
-            alignment_check: DirectIoAlignmentCheckLevel,
-            /// Latency padded for performance simulation.
-            latency_padding: DirectIoLatencyPadding,
-        },
-        /// Direct IO enabled.
-        Enabled {
-            /// Actions to perform on alignment error.
-            on_alignment_error: DirectIoOnAlignmentErrorAction,
-        },
+    #[derive(
+        Copy,
+        Clone,
+        PartialEq,
+        Eq,
+        Hash,
+        strum_macros::EnumString,
+        strum_macros::Display,
+        serde_with::DeserializeFromStr,
+        serde_with::SerializeDisplay,
+        Debug,
+    )]
+    #[strum(serialize_all = "kebab-case")]
+    #[repr(u8)]
+    pub enum IoMode {
+        /// Uses buffered IO.
+        Buffered,
+        /// Uses direct IO, error out if the operation fails.
+        #[cfg(target_os = "linux")]
+        Direct,
    }

-    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
-    #[serde(rename_all = "kebab-case")]
-    pub enum DirectIoAlignmentCheckLevel {
-        #[default]
-        Error,
-        Log,
-        None,
+    impl IoMode {
+        pub const fn preferred() -> Self {
+            Self::Buffered
+        }
    }

-    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
-    #[serde(rename_all = "kebab-case")]
-    pub enum DirectIoOnAlignmentErrorAction {
-        Error,
-        #[default]
-        FallbackToBuffered,
-    }
+    impl TryFrom<u8> for IoMode {
+        type Error = u8;

-    #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize, Default)]
-    #[serde(tag = "type", rename_all = "kebab-case")]
-    pub enum DirectIoLatencyPadding {
-        /// Pad virtual file operations with IO to a fake file.
-        FakeFileRW { path: PathBuf },
-        #[default]
-        None,
+        fn try_from(value: u8) -> Result<Self, Self::Error> {
+            Ok(match value {
+                v if v == (IoMode::Buffered as u8) => IoMode::Buffered,
+                #[cfg(target_os = "linux")]
+                v if v == (IoMode::Direct as u8) => IoMode::Direct,
+                x => return Err(x),
+            })
+        }
    }
 }

--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -496,26 +496,12 @@ impl RemoteStorage for AzureBlobStorage {
            builder = builder.if_match(IfMatchCondition::NotMatch(etag.to_string()))
        }

-        self.download_for_builder(builder, cancel).await
-    }
-
-    async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        let blob_client = self.client.blob_client(self.relative_path_to_name(from));
-
-        let mut builder = blob_client.get();
-
-        let range: Range = if let Some(end_exclusive) = end_exclusive {
-            (start_inclusive..end_exclusive).into()
-        } else {
-            (start_inclusive..).into()
-        };
-        builder = builder.range(range);
+        if let Some((start, end)) = opts.byte_range() {
+            builder = builder.range(match end {
+                Some(end) => Range::Range(start..end),
+                None => Range::RangeFrom(start..),
+            });
+        }

        self.download_for_builder(builder, cancel).await
    }
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -19,7 +19,8 @@ mod simulate_failures;
 mod support;

 use std::{
-    collections::HashMap, fmt::Debug, num::NonZeroU32, pin::Pin, sync::Arc, time::SystemTime,
+    collections::HashMap, fmt::Debug, num::NonZeroU32, ops::Bound, pin::Pin, sync::Arc,
+    time::SystemTime,
 };

 use anyhow::Context;
@@ -162,11 +163,60 @@ pub struct Listing {
 }

 /// Options for downloads. The default value is a plain GET.
-#[derive(Default)]
 pub struct DownloadOpts {
    /// If given, returns [`DownloadError::Unmodified`] if the object still has
    /// the same ETag (using If-None-Match).
    pub etag: Option<Etag>,
+    /// The start of the byte range to download, or unbounded.
+    pub byte_start: Bound<u64>,
+    /// The end of the byte range to download, or unbounded. Must be after the
+    /// start bound.
+    pub byte_end: Bound<u64>,
+}
+
+impl Default for DownloadOpts {
+    fn default() -> Self {
+        Self {
+            etag: Default::default(),
+            byte_start: Bound::Unbounded,
+            byte_end: Bound::Unbounded,
+        }
+    }
+}
+
+impl DownloadOpts {
+    /// Returns the byte range with inclusive start and exclusive end, or None
+    /// if unbounded.
+    pub fn byte_range(&self) -> Option<(u64, Option<u64>)> {
+        if self.byte_start == Bound::Unbounded && self.byte_end == Bound::Unbounded {
+            return None;
+        }
+        let start = match self.byte_start {
+            Bound::Excluded(i) => i + 1,
+            Bound::Included(i) => i,
+            Bound::Unbounded => 0,
+        };
+        let end = match self.byte_end {
+            Bound::Excluded(i) => Some(i),
+            Bound::Included(i) => Some(i + 1),
+            Bound::Unbounded => None,
+        };
+        if let Some(end) = end {
+            assert!(start < end, "range end {end} at or before start {start}");
+        }
+        Some((start, end))
+    }
+
+    /// Returns the byte range as an RFC 2616 Range header value with inclusive
+    /// bounds, or None if unbounded.
+    pub fn byte_range_header(&self) -> Option<String> {
+        self.byte_range()
+            .map(|(start, end)| (start, end.map(|end| end - 1))) // make end inclusive
+            .map(|(start, end)| match end {
+                Some(end) => format!("bytes={start}-{end}"),
+                None => format!("bytes={start}-"),
+            })
+    }
 }

 /// Storage (potentially remote) API to manage its state.
@@ -257,21 +307,6 @@ pub trait RemoteStorage: Send + Sync + 'static {
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError>;

-    /// Streams a given byte range of the remote storage entry contents.
-    ///
-    /// The returned download stream will obey initial timeout and cancellation signal by erroring
-    /// on whichever happens first. Only one of the reasons will fail the stream, which is usually
-    /// enough for `tokio::io::copy_buf` usage. If needed the error can be filtered out.
-    ///
-    /// Returns the metadata, if any was stored with the file previously.
-    async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError>;
-
    /// Delete a single path from remote storage.
    ///
    /// If the operation fails because of timeout or cancellation, the root cause of the error will be
@@ -425,33 +460,6 @@ impl<Other: RemoteStorage> GenericRemoteStorage<Arc<Other>> {
        }
    }

-    pub async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        match self {
-            Self::LocalFs(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
-                    .await
-            }
-            Self::AwsS3(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
-                    .await
-            }
-            Self::AzureBlob(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
-                    .await
-            }
-            Self::Unreliable(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive, cancel)
-                    .await
-            }
-        }
-    }
-
    /// See [`RemoteStorage::delete`]
    pub async fn delete(
        &self,
@@ -573,20 +581,6 @@ impl GenericRemoteStorage {
            })
    }

-    /// Downloads the storage object into the `to_path` provided.
-    /// `byte_range` could be specified to dowload only a part of the file, if needed.
-    pub async fn download_storage_object(
-        &self,
-        byte_range: Option<(u64, Option<u64>)>,
-        from: &RemotePath,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        match byte_range {
-            Some((start, end)) => self.download_byte_range(from, start, end, cancel).await,
-            None => self.download(from, &DownloadOpts::default(), cancel).await,
-        }
-    }
-
    /// The name of the bucket/container/etc.
    pub fn bucket_name(&self) -> Option<&str> {
        match self {
@@ -660,6 +654,76 @@ impl ConcurrencyLimiter {
 mod tests {
    use super::*;

+    /// DownloadOpts::byte_range() should generate (inclusive, exclusive) ranges
+    /// with optional end bound, or None when unbounded.
+    #[test]
+    fn download_opts_byte_range() {
+        // Consider using test_case or a similar table-driven test framework.
+        let cases = [
+            // (byte_start, byte_end, expected)
+            (Bound::Unbounded, Bound::Unbounded, None),
+            (Bound::Unbounded, Bound::Included(7), Some((0, Some(8)))),
+            (Bound::Unbounded, Bound::Excluded(7), Some((0, Some(7)))),
+            (Bound::Included(3), Bound::Unbounded, Some((3, None))),
+            (Bound::Included(3), Bound::Included(7), Some((3, Some(8)))),
+            (Bound::Included(3), Bound::Excluded(7), Some((3, Some(7)))),
+            (Bound::Excluded(3), Bound::Unbounded, Some((4, None))),
+            (Bound::Excluded(3), Bound::Included(7), Some((4, Some(8)))),
+            (Bound::Excluded(3), Bound::Excluded(7), Some((4, Some(7)))),
+            // 1-sized ranges are fine, 0 aren't and will panic (separate test).
+            (Bound::Included(3), Bound::Included(3), Some((3, Some(4)))),
+            (Bound::Included(3), Bound::Excluded(4), Some((3, Some(4)))),
+        ];
+
+        for (byte_start, byte_end, expect) in cases {
+            let opts = DownloadOpts {
+                byte_start,
+                byte_end,
+                ..Default::default()
+            };
+            let result = opts.byte_range();
+            assert_eq!(
+                result, expect,
+                "byte_start={byte_start:?} byte_end={byte_end:?}"
+            );
+
+            // Check generated HTTP header, which uses an inclusive range.
+            let expect_header = expect.map(|(start, end)| match end {
+                Some(end) => format!("bytes={start}-{}", end - 1), // inclusive end
+                None => format!("bytes={start}-"),
+            });
+            assert_eq!(
+                opts.byte_range_header(),
+                expect_header,
+                "byte_start={byte_start:?} byte_end={byte_end:?}"
+            );
+        }
+    }
+
+    /// DownloadOpts::byte_range() zero-sized byte range should panic.
+    #[test]
+    #[should_panic]
+    fn download_opts_byte_range_zero() {
+        DownloadOpts {
+            byte_start: Bound::Included(3),
+            byte_end: Bound::Excluded(3),
+            ..Default::default()
+        }
+        .byte_range();
+    }
+
+    /// DownloadOpts::byte_range() negative byte range should panic.
+    #[test]
+    #[should_panic]
+    fn download_opts_byte_range_negative() {
+        DownloadOpts {
+            byte_start: Bound::Included(3),
+            byte_end: Bound::Included(2),
+            ..Default::default()
+        }
+        .byte_range();
+    }
+
    #[test]
    fn test_object_name() {
        let k = RemotePath::new(Utf8Path::new("a/b/c")).unwrap();
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -506,54 +506,7 @@ impl RemoteStorage for LocalFs {
            return Err(DownloadError::Unmodified);
        }

-        let source = ReaderStream::new(
-            fs::OpenOptions::new()
-                .read(true)
-                .open(&target_path)
-                .await
-                .with_context(|| {
-                    format!("Failed to open source file {target_path:?} to use in the download")
-                })
-                .map_err(DownloadError::Other)?,
-        );
-
-        let metadata = self
-            .read_storage_metadata(&target_path)
-            .await
-            .map_err(DownloadError::Other)?;
-
-        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
-        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);
-
-        Ok(Download {
-            metadata,
-            last_modified: file_metadata
-                .modified()
-                .map_err(|e| DownloadError::Other(anyhow::anyhow!(e).context("Reading mtime")))?,
-            etag,
-            download_stream: Box::pin(source),
-        })
-    }
-
-    async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        if let Some(end_exclusive) = end_exclusive {
-            if end_exclusive <= start_inclusive {
-                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) is not less than end_exclusive ({end_exclusive:?})")));
-            };
-            if start_inclusive == end_exclusive.saturating_sub(1) {
-                return Err(DownloadError::Other(anyhow::anyhow!("Invalid range, start ({start_inclusive}) and end_exclusive ({end_exclusive:?}) difference is zero bytes")));
-            }
-        }
-
-        let target_path = from.with_base(&self.storage_root);
-        let file_metadata = file_metadata(&target_path).await?;
-        let mut source = tokio::fs::OpenOptions::new()
+        let mut file = fs::OpenOptions::new()
            .read(true)
            .open(&target_path)
            .await
@@ -562,31 +515,29 @@ impl RemoteStorage for LocalFs {
            })
            .map_err(DownloadError::Other)?;

-        let len = source
-            .metadata()
-            .await
-            .context("query file length")
-            .map_err(DownloadError::Other)?
-            .len();
+        let mut take = file_metadata.len();
+        if let Some((start, end)) = opts.byte_range() {
+            if start > 0 {
+                file.seek(io::SeekFrom::Start(start))
+                    .await
+                    .context("Failed to seek to the range start in a local storage file")
+                    .map_err(DownloadError::Other)?;
+            }
+            if let Some(end) = end {
+                take = end - start;
+            }
+        }

-        source
-            .seek(io::SeekFrom::Start(start_inclusive))
-            .await
-            .context("Failed to seek to the range start in a local storage file")
-            .map_err(DownloadError::Other)?;
+        let source = ReaderStream::new(file.take(take));

        let metadata = self
            .read_storage_metadata(&target_path)
            .await
            .map_err(DownloadError::Other)?;

-        let source = source.take(end_exclusive.unwrap_or(len) - start_inclusive);
-        let source = ReaderStream::new(source);
-
        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
        let source = crate::support::DownloadStream::new(cancel_or_timeout, source);

-        let etag = mock_etag(&file_metadata);
        Ok(Download {
            metadata,
            last_modified: file_metadata
@@ -688,7 +639,7 @@ mod fs_tests {
    use super::*;

    use camino_tempfile::tempdir;
-    use std::{collections::HashMap, io::Write};
+    use std::{collections::HashMap, io::Write, ops::Bound};

    async fn read_and_check_metadata(
        storage: &LocalFs,
@@ -804,10 +755,12 @@ mod fs_tests {
        let (first_part_local, second_part_local) = uploaded_bytes.split_at(3);

        let first_part_download = storage
-            .download_byte_range(
+            .download(
                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
+                &DownloadOpts {
+                    byte_end: Bound::Excluded(first_part_local.len() as u64),
+                    ..Default::default()
+                },
                &cancel,
            )
            .await?;
@@ -823,10 +776,15 @@ mod fs_tests {
        );

        let second_part_download = storage
-            .download_byte_range(
+            .download(
                &upload_target,
-                first_part_local.len() as u64,
-                Some((first_part_local.len() + second_part_local.len()) as u64),
+                &DownloadOpts {
+                    byte_start: Bound::Included(first_part_local.len() as u64),
+                    byte_end: Bound::Excluded(
+                        (first_part_local.len() + second_part_local.len()) as u64,
+                    ),
+                    ..Default::default()
+                },
                &cancel,
            )
            .await?;
@@ -842,7 +800,14 @@ mod fs_tests {
        );

        let suffix_bytes = storage
-            .download_byte_range(&upload_target, 13, None, &cancel)
+            .download(
+                &upload_target,
+                &DownloadOpts {
+                    byte_start: Bound::Included(13),
+                    ..Default::default()
+                },
+                &cancel,
+            )
            .await?
            .download_stream;
        let suffix_bytes = aggregate(suffix_bytes).await?;
@@ -850,7 +815,7 @@ mod fs_tests {
        assert_eq!(upload_name, suffix);

        let all_bytes = storage
-            .download_byte_range(&upload_target, 0, None, &cancel)
+            .download(&upload_target, &DownloadOpts::default(), &cancel)
            .await?
            .download_stream;
        let all_bytes = aggregate(all_bytes).await?;
@@ -861,48 +826,26 @@ mod fs_tests {
    }

    #[tokio::test]
-    async fn download_file_range_negative() -> anyhow::Result<()> {
-        let (storage, cancel) = create_storage()?;
+    #[should_panic(expected = "at or before start")]
+    async fn download_file_range_negative() {
+        let (storage, cancel) = create_storage().unwrap();
        let upload_name = "upload_1";
-        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel).await?;
+        let upload_target = upload_dummy_file(&storage, upload_name, None, &cancel)
+            .await
+            .unwrap();

-        let start = 1_000_000_000;
-        let end = start + 1;
-        match storage
-            .download_byte_range(
+        storage
+            .download(
                &upload_target,
-                start,
-                Some(end), // exclusive end
+                &DownloadOpts {
+                    byte_start: Bound::Included(10),
+                    byte_end: Bound::Excluded(10),
+                    ..Default::default()
+                },
                &cancel,
            )
            .await
-        {
-            Ok(_) => panic!("Should not allow downloading wrong ranges"),
-            Err(e) => {
-                let error_string = e.to_string();
-                assert!(error_string.contains("zero bytes"));
-                assert!(error_string.contains(&start.to_string()));
-                assert!(error_string.contains(&end.to_string()));
-            }
-        }
-
-        let start = 10000;
-        let end = 234;
-        assert!(start > end, "Should test an incorrect range");
-        match storage
-            .download_byte_range(&upload_target, start, Some(end), &cancel)
-            .await
-        {
-            Ok(_) => panic!("Should not allow downloading wrong ranges"),
-            Err(e) => {
-                let error_string = e.to_string();
-                assert!(error_string.contains("Invalid range"));
-                assert!(error_string.contains(&start.to_string()));
-                assert!(error_string.contains(&end.to_string()));
-            }
-        }
-
-        Ok(())
+            .unwrap();
    }

    #[tokio::test]
@@ -945,10 +888,12 @@ mod fs_tests {
        let (first_part_local, _) = uploaded_bytes.split_at(3);

        let partial_download_with_metadata = storage
-            .download_byte_range(
+            .download(
                &upload_target,
-                0,
-                Some(first_part_local.len() as u64),
+                &DownloadOpts {
+                    byte_end: Bound::Excluded(first_part_local.len() as u64),
+                    ..Default::default()
+                },
                &cancel,
            )
            .await?;
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -804,34 +804,7 @@ impl RemoteStorage for S3Bucket {
                bucket: self.bucket_name.clone(),
                key: self.relative_path_to_s3_object(from),
                etag: opts.etag.as_ref().map(|e| e.to_string()),
-                range: None,
-            },
-            cancel,
-        )
-        .await
-    }
-
-    async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        // S3 accepts ranges as https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35
-        // and needs both ends to be exclusive
-        let end_inclusive = end_exclusive.map(|end| end.saturating_sub(1));
-        let range = Some(match end_inclusive {
-            Some(end_inclusive) => format!("bytes={start_inclusive}-{end_inclusive}"),
-            None => format!("bytes={start_inclusive}-"),
-        });
-
-        self.download_object(
-            GetObjectRequest {
-                bucket: self.bucket_name.clone(),
-                key: self.relative_path_to_s3_object(from),
-                etag: None,
-                range,
+                range: opts.byte_range_header(),
            },
            cancel,
        )
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -170,28 +170,13 @@ impl RemoteStorage for UnreliableWrapper {
        opts: &DownloadOpts,
        cancel: &CancellationToken,
    ) -> Result<Download, DownloadError> {
+        // Note: We treat any byte range as an "attempt" of the same operation.
+        // We don't pay attention to the ranges. That's good enough for now.
        self.attempt(RemoteOp::Download(from.clone()))
            .map_err(DownloadError::Other)?;
        self.inner.download(from, opts, cancel).await
    }

-    async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-        cancel: &CancellationToken,
-    ) -> Result<Download, DownloadError> {
-        // Note: We treat any download_byte_range as an "attempt" of the same
-        // operation. We don't pay attention to the ranges. That's good enough
-        // for now.
-        self.attempt(RemoteOp::Download(from.clone()))
-            .map_err(DownloadError::Other)?;
-        self.inner
-            .download_byte_range(from, start_inclusive, end_exclusive, cancel)
-            .await
-    }
-
    async fn delete(&self, path: &RemotePath, cancel: &CancellationToken) -> anyhow::Result<()> {
        self.delete_inner(path, true, cancel).await
    }
--- a/libs/remote_storage/tests/common/tests.rs
+++ b/libs/remote_storage/tests/common/tests.rs
@@ -2,6 +2,7 @@ use anyhow::Context;
 use camino::Utf8Path;
 use futures::StreamExt;
 use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath};
+use std::ops::Bound;
 use std::sync::Arc;
 use std::{collections::HashSet, num::NonZeroU32};
 use test_context::test_context;
@@ -293,7 +294,15 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // Full range (end specified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 0, Some(len as u64), &cancel)
+        .download(
+            &path,
+            &DownloadOpts {
+                byte_start: Bound::Included(0),
+                byte_end: Bound::Excluded(len as u64),
+                ..Default::default()
+            },
+            &cancel,
+        )
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);
@@ -301,7 +310,15 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // partial range (end specified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 4, Some(10), &cancel)
+        .download(
+            &path,
+            &DownloadOpts {
+                byte_start: Bound::Included(4),
+                byte_end: Bound::Excluded(10),
+                ..Default::default()
+            },
+            &cancel,
+        )
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..10]);
@@ -309,7 +326,15 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // partial range (end beyond real end)
    let dl = ctx
        .client
-        .download_byte_range(&path, 8, Some(len as u64 * 100), &cancel)
+        .download(
+            &path,
+            &DownloadOpts {
+                byte_start: Bound::Included(8),
+                byte_end: Bound::Excluded(len as u64 * 100),
+                ..Default::default()
+            },
+            &cancel,
+        )
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[8..]);
@@ -317,7 +342,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // Partial range (end unspecified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 4, None, &cancel)
+        .download(
+            &path,
+            &DownloadOpts {
+                byte_start: Bound::Included(4),
+                ..Default::default()
+            },
+            &cancel,
+        )
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig[4..]);
@@ -325,7 +357,14 @@ async fn upload_download_works(ctx: &mut MaybeEnabledStorage) -> anyhow::Result<
    // Full range (end unspecified)
    let dl = ctx
        .client
-        .download_byte_range(&path, 0, None, &cancel)
+        .download(
+            &path,
+            &DownloadOpts {
+                byte_start: Bound::Included(0),
+                ..Default::default()
+            },
+            &cancel,
+        )
        .await?;
    let buf = download_to_vec(dl).await?;
    assert_eq!(&buf, &orig);
--- a/libs/utils/src/auth.rs
+++ b/libs/utils/src/auth.rs
@@ -31,9 +31,12 @@ pub enum Scope {
    /// The scope used by pageservers in upcalls to storage controller and cloud control plane
    #[serde(rename = "generations_api")]
    GenerationsApi,
-    /// Allows access to control plane managment API and some storage controller endpoints.
+    /// Allows access to control plane managment API and all storage controller endpoints.
    Admin,

+    /// Allows access to control plane & storage controller endpoints used in infrastructure automation (e.g. node registration)
+    Infra,
+
    /// Allows access to storage controller APIs used by the scrubber, to interrogate the state
    /// of a tenant & post scrub results.
    Scrubber,
--- a/libs/vm_monitor/src/runner.rs
+++ b/libs/vm_monitor/src/runner.rs
@@ -79,8 +79,7 @@ pub struct Config {
    /// memory.
    ///
    /// The default value of `0.15` means that we *guarantee* sending upscale requests if the
-    /// cgroup is using more than 85% of total memory (even if we're *not* separately reserving
-    /// memory for the file cache).
+    /// cgroup is using more than 85% of total memory.
    cgroup_min_overhead_fraction: f64,

    cgroup_downscale_threshold_buffer_bytes: u64,
@@ -97,24 +96,12 @@ impl Default for Config {
 }

 impl Config {
-    fn cgroup_threshold(&self, total_mem: u64, file_cache_disk_size: u64) -> u64 {
-        // If the file cache is in tmpfs, then it will count towards shmem usage of the cgroup,
-        // and thus be non-reclaimable, so we should allow for additional memory usage.
-        //
-        // If the file cache sits on disk, our desired stable system state is for it to be fully
-        // page cached (its contents should only be paged to/from disk in situations where we can't
-        // upscale fast enough). Page-cached memory is reclaimable, so we need to lower the
-        // threshold for non-reclaimable memory so we scale up *before* the kernel starts paging
-        // out the file cache.
-        let memory_remaining_for_cgroup = total_mem.saturating_sub(file_cache_disk_size);
-
-        // Even if we're not separately making room for the file cache (if it's in tmpfs), we still
-        // want our threshold to be met gracefully instead of letting postgres get OOM-killed.
+    fn cgroup_threshold(&self, total_mem: u64) -> u64 {
+        // We want our threshold to be met gracefully instead of letting postgres get OOM-killed
+        // (or if there's room, spilling to swap).
        // So we guarantee that there's at least `cgroup_min_overhead_fraction` of total memory
        // remaining above the threshold.
-        let max_threshold = (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64;
-
-        memory_remaining_for_cgroup.min(max_threshold)
+        (total_mem as f64 * (1.0 - self.cgroup_min_overhead_fraction)) as u64
    }
 }

@@ -149,11 +136,6 @@ impl Runner {

        let mem = get_total_system_memory();

-        let mut file_cache_disk_size = 0;
-
-        // We need to process file cache initialization before cgroup initialization, so that the memory
-        // allocated to the file cache is appropriately taken into account when we decide the cgroup's
-        // memory limits.
        if let Some(connstr) = &args.pgconnstr {
            info!("initializing file cache");
            let config = FileCacheConfig::default();
@@ -184,7 +166,6 @@ impl Runner {
                info!("file cache size actually got set to {actual_size}")
            }

-            file_cache_disk_size = actual_size;
            state.filecache = Some(file_cache);
        }

@@ -207,7 +188,7 @@ impl Runner {
                cgroup.watch(hist_tx).await
            });

-            let threshold = state.config.cgroup_threshold(mem, file_cache_disk_size);
+            let threshold = state.config.cgroup_threshold(mem);
            info!(threshold, "set initial cgroup threshold",);

            state.cgroup = Some(CgroupState {
@@ -259,9 +240,7 @@ impl Runner {
                return Ok((false, status.to_owned()));
            }

-            let new_threshold = self
-                .config
-                .cgroup_threshold(usable_system_memory, expected_file_cache_size);
+            let new_threshold = self.config.cgroup_threshold(usable_system_memory);

            let current = last_history.avg_non_reclaimable;

@@ -282,13 +261,11 @@ impl Runner {

        // The downscaling has been approved. Downscale the file cache, then the cgroup.
        let mut status = vec![];
-        let mut file_cache_disk_size = 0;
        if let Some(file_cache) = &mut self.filecache {
            let actual_usage = file_cache
                .set_file_cache_size(expected_file_cache_size)
                .await
                .context("failed to set file cache size")?;
-            file_cache_disk_size = actual_usage;
            let message = format!(
                "set file cache size to {} MiB",
                bytes_to_mebibytes(actual_usage),
@@ -298,9 +275,7 @@ impl Runner {
        }

        if let Some(cgroup) = &mut self.cgroup {
-            let new_threshold = self
-                .config
-                .cgroup_threshold(usable_system_memory, file_cache_disk_size);
+            let new_threshold = self.config.cgroup_threshold(usable_system_memory);

            let message = format!(
                "set cgroup memory threshold from {} MiB to {} MiB, of new total {} MiB",
@@ -329,7 +304,6 @@ impl Runner {
        let new_mem = resources.mem;
        let usable_system_memory = new_mem.saturating_sub(self.config.sys_buffer_bytes);

-        let mut file_cache_disk_size = 0;
        if let Some(file_cache) = &mut self.filecache {
            let expected_usage = file_cache.config.calculate_cache_size(usable_system_memory);
            info!(
@@ -342,7 +316,6 @@ impl Runner {
                .set_file_cache_size(expected_usage)
                .await
                .context("failed to set file cache size")?;
-            file_cache_disk_size = actual_usage;

            if actual_usage != expected_usage {
                warn!(
@@ -354,9 +327,7 @@ impl Runner {
        }

        if let Some(cgroup) = &mut self.cgroup {
-            let new_threshold = self
-                .config
-                .cgroup_threshold(usable_system_memory, file_cache_disk_size);
+            let new_threshold = self.config.cgroup_threshold(usable_system_memory);

            info!(
                "set cgroup memory threshold from {} MiB to {} MiB of new total {} MiB",
--- a/pageserver/benches/bench_ingest.rs
+++ b/pageserver/benches/bench_ingest.rs
@@ -164,11 +164,7 @@ fn criterion_benchmark(c: &mut Criterion) {
    let conf: &'static PageServerConf = Box::leak(Box::new(
        pageserver::config::PageServerConf::dummy_conf(temp_dir.path().to_path_buf()),
    ));
-    virtual_file::init(
-        16384,
-        virtual_file::io_engine_for_bench(),
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    virtual_file::init(16384, virtual_file::io_engine_for_bench());
    page_cache::init(conf.page_cache_size);

    {
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -540,10 +540,13 @@ impl Client {
            .map_err(Error::ReceiveBody)
    }

-    /// Configs io buffer alignment at runtime.
-    pub async fn put_io_alignment(&self, align: usize) -> Result<()> {
-        let uri = format!("{}/v1/io_alignment", self.mgmt_api_endpoint);
-        self.request(Method::PUT, uri, align)
+    /// Configs io mode at runtime.
+    pub async fn put_io_mode(
+        &self,
+        mode: &pageserver_api::models::virtual_file::IoMode,
+    ) -> Result<()> {
+        let uri = format!("{}/v1/io_mode", self.mgmt_api_endpoint);
+        self.request(Method::PUT, uri, mode)
            .await?
            .json()
            .await
--- a/pageserver/ctl/src/layer_map_analyzer.rs
+++ b/pageserver/ctl/src/layer_map_analyzer.rs
@@ -152,11 +152,7 @@ pub(crate) async fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
-    pageserver::virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    pageserver::page_cache::init(100);

    let mut total_delta_layers = 0usize;
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -59,7 +59,7 @@ pub(crate) enum LayerCmd {

 async fn read_delta_file(path: impl AsRef<Path>, ctx: &RequestContext) -> Result<()> {
    let path = Utf8Path::from_path(path.as_ref()).expect("non-Unicode path");
-    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs, 1);
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let file = VirtualFile::open(path, ctx).await?;
    let file_id = page_cache::next_file_id();
@@ -190,11 +190,7 @@ pub(crate) async fn main(cmd: &LayerCmd) -> Result<()> {
            new_tenant_id,
            new_timeline_id,
        } => {
-            pageserver::virtual_file::init(
-                10,
-                virtual_file::api::IoEngineKind::StdFs,
-                pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT,
-            );
+            pageserver::virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
            pageserver::page_cache::init(100);

            let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -26,7 +26,7 @@ use pageserver::{
    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
    virtual_file,
 };
-use pageserver_api::{config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT, shard::TenantShardId};
+use pageserver_api::shard::TenantShardId;
 use postgres_ffi::ControlFileData;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use tokio_util::sync::CancellationToken;
@@ -205,11 +205,7 @@ fn read_pg_control_file(control_file_path: &Utf8Path) -> anyhow::Result<()> {

 async fn print_layerfile(path: &Utf8Path) -> anyhow::Result<()> {
    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        10,
-        virtual_file::api::IoEngineKind::StdFs,
-        DEFAULT_IO_BUFFER_ALIGNMENT,
-    );
+    virtual_file::init(10, virtual_file::api::IoEngineKind::StdFs);
    page_cache::init(100);
    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
    dump_layerfile_from_path(path, true, &ctx).await
--- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
+++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs
@@ -59,9 +59,9 @@ pub(crate) struct Args {
    #[clap(long)]
    set_io_engine: Option<pageserver_api::models::virtual_file::IoEngineKind>,

-    /// Before starting the benchmark, live-reconfigure the pageserver to use specified alignment for io buffers.
+    /// Before starting the benchmark, live-reconfigure the pageserver to use specified io mode (buffered vs. direct).
    #[clap(long)]
-    set_io_alignment: Option<usize>,
+    set_io_mode: Option<pageserver_api::models::virtual_file::IoMode>,

    targets: Option<Vec<TenantTimelineId>>,
 }
@@ -129,8 +129,8 @@ async fn main_impl(
        mgmt_api_client.put_io_engine(engine_str).await?;
    }

-    if let Some(align) = args.set_io_alignment {
-        mgmt_api_client.put_io_alignment(align).await?;
+    if let Some(mode) = &args.set_io_mode {
+        mgmt_api_client.put_io_mode(mode).await?;
    }

    // discover targets
--- a/pageserver/src/auth.rs
+++ b/pageserver/src/auth.rs
@@ -14,14 +14,19 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
        }
        (Scope::PageServerApi, None) => Ok(()), // access to management api for PageServerApi scope
        (Scope::PageServerApi, Some(_)) => Ok(()), // access to tenant api using PageServerApi scope
-        (Scope::Admin | Scope::SafekeeperData | Scope::GenerationsApi | Scope::Scrubber, _) => {
-            Err(AuthError(
-                format!(
-                    "JWT scope '{:?}' is ineligible for Pageserver auth",
-                    claims.scope
-                )
-                .into(),
-            ))
-        }
+        (
+            Scope::Admin
+            | Scope::SafekeeperData
+            | Scope::GenerationsApi
+            | Scope::Infra
+            | Scope::Scrubber,
+            _,
+        ) => Err(AuthError(
+            format!(
+                "JWT scope '{:?}' is ineligible for Pageserver auth",
+                claims.scope
+            )
+            .into(),
+        )),
    }
 }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -125,8 +125,7 @@ fn main() -> anyhow::Result<()> {

    // after setting up logging, log the effective IO engine choice and read path implementations
    info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine");
-    info!(?conf.virtual_file_direct_io, "starting with virtual_file Direct IO settings");
-    info!(?conf.io_buffer_alignment, "starting with setting for IO buffer alignment");
+    info!(?conf.virtual_file_io_mode, "starting with virtual_file IO mode");

    // The tenants directory contains all the pageserver local disk state.
    // Create if not exists and make sure all the contents are durable before proceeding.
@@ -168,11 +167,7 @@ fn main() -> anyhow::Result<()> {
    let scenario = failpoint_support::init();

    // Basic initialization of things that don't change after startup
-    virtual_file::init(
-        conf.max_file_descriptors,
-        conf.virtual_file_io_engine,
-        conf.io_buffer_alignment,
-    );
+    virtual_file::init(conf.max_file_descriptors, conf.virtual_file_io_engine);
    page_cache::init(conf.page_cache_size);

    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -174,9 +174,7 @@ pub struct PageServerConf {
    pub l0_flush: crate::l0_flush::L0FlushConfig,

    /// Direct IO settings
-    pub virtual_file_direct_io: virtual_file::DirectIoMode,
-
-    pub io_buffer_alignment: usize,
+    pub virtual_file_io_mode: virtual_file::IoMode,
 }

 /// Token for authentication to safekeepers
@@ -325,11 +323,10 @@ impl PageServerConf {
            image_compression,
            ephemeral_bytes_per_memory_kb,
            l0_flush,
-            virtual_file_direct_io,
+            virtual_file_io_mode,
            concurrent_tenant_warmup,
            concurrent_tenant_size_logical_size_queries,
            virtual_file_io_engine,
-            io_buffer_alignment,
            tenant_config,
        } = config_toml;

@@ -368,8 +365,6 @@ impl PageServerConf {
            max_vectored_read_bytes,
            image_compression,
            ephemeral_bytes_per_memory_kb,
-            virtual_file_direct_io,
-            io_buffer_alignment,

            // ------------------------------------------------------------
            // fields that require additional validation or custom handling
@@ -408,6 +403,7 @@ impl PageServerConf {
            l0_flush: l0_flush
                .map(crate::l0_flush::L0FlushConfig::from)
                .unwrap_or_default(),
+            virtual_file_io_mode: virtual_file_io_mode.unwrap_or(virtual_file::IoMode::preferred()),
        };

        // ------------------------------------------------------------
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -17,6 +17,7 @@ use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
+use pageserver_api::models::virtual_file::IoMode;
 use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
 use pageserver_api::models::IngestAuxFilesRequest;
@@ -703,6 +704,8 @@ async fn timeline_archival_config_handler(
    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;

+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
+
    let request_data: TimelineArchivalConfigRequest = json_request(&mut request).await?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
    let state = get_state(&request);
@@ -713,7 +716,7 @@ async fn timeline_archival_config_handler(
            .get_attached_tenant_shard(tenant_shard_id)?;

        tenant
-            .apply_timeline_archival_config(timeline_id, request_data.state)
+            .apply_timeline_archival_config(timeline_id, request_data.state, ctx)
            .await?;
        Ok::<_, ApiError>(())
    }
@@ -2379,17 +2382,13 @@ async fn put_io_engine_handler(
    json_response(StatusCode::OK, ())
 }

-async fn put_io_alignment_handler(
+async fn put_io_mode_handler(
    mut r: Request<Body>,
    _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
    check_permission(&r, None)?;
-    let align: usize = json_request(&mut r).await?;
-    crate::virtual_file::set_io_buffer_alignment(align).map_err(|align| {
-        ApiError::PreconditionFailed(
-            format!("Requested io alignment ({align}) is not a power of two").into(),
-        )
-    })?;
+    let mode: IoMode = json_request(&mut r).await?;
+    crate::virtual_file::set_io_mode(mode);
    json_response(StatusCode::OK, ())
 }

@@ -3080,9 +3079,7 @@ pub fn make_router(
            |r| api_handler(r, timeline_collect_keyspace),
        )
        .put("/v1/io_engine", |r| api_handler(r, put_io_engine_handler))
-        .put("/v1/io_alignment", |r| {
-            api_handler(r, put_io_alignment_handler)
-        })
+        .put("/v1/io_mode", |r| api_handler(r, put_io_mode_handler))
        .put(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/force_aux_policy_switch",
            |r| api_handler(r, force_aux_policy_switch_handler),
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -38,6 +38,7 @@ use std::future::Future;
 use std::sync::Weak;
 use std::time::SystemTime;
 use storage_broker::BrokerClientChannel;
+use timeline::offload::offload_timeline;
 use tokio::io::BufReader;
 use tokio::sync::watch;
 use tokio::task::JoinSet;
@@ -287,9 +288,13 @@ pub struct Tenant {

    /// During timeline creation, we first insert the TimelineId to the
    /// creating map, then `timelines`, then remove it from the creating map.
-    /// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
+    /// **Lock order**: if acquiring both, acquire`timelines` before `timelines_creating`
    timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,

+    /// Possibly offloaded and archived timelines
+    /// **Lock order**: if acquiring both, acquire`timelines` before `timelines_offloaded`
+    timelines_offloaded: Mutex<HashMap<TimelineId, Arc<OffloadedTimeline>>>,
+
    // This mutex prevents creation of new timelines during GC.
    // Adding yet another mutex (in addition to `timelines`) is needed because holding
    // `timelines` mutex during all GC iteration
@@ -484,6 +489,65 @@ impl WalRedoManager {
    }
 }

+pub struct OffloadedTimeline {
+    pub tenant_shard_id: TenantShardId,
+    pub timeline_id: TimelineId,
+    pub ancestor_timeline_id: Option<TimelineId>,
+
+    // TODO: once we persist offloaded state, make this lazily constructed
+    pub remote_client: Arc<RemoteTimelineClient>,
+
+    /// Prevent two tasks from deleting the timeline at the same time. If held, the
+    /// timeline is being deleted. If 'true', the timeline has already been deleted.
+    pub delete_progress: Arc<tokio::sync::Mutex<DeleteTimelineFlow>>,
+}
+
+impl OffloadedTimeline {
+    fn from_timeline(timeline: &Timeline) -> Self {
+        Self {
+            tenant_shard_id: timeline.tenant_shard_id,
+            timeline_id: timeline.timeline_id,
+            ancestor_timeline_id: timeline.get_ancestor_timeline_id(),
+
+            remote_client: timeline.remote_client.clone(),
+            delete_progress: timeline.delete_progress.clone(),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub enum TimelineOrOffloaded {
+    Timeline(Arc<Timeline>),
+    Offloaded(Arc<OffloadedTimeline>),
+}
+
+impl TimelineOrOffloaded {
+    pub fn tenant_shard_id(&self) -> TenantShardId {
+        match self {
+            TimelineOrOffloaded::Timeline(timeline) => timeline.tenant_shard_id,
+            TimelineOrOffloaded::Offloaded(offloaded) => offloaded.tenant_shard_id,
+        }
+    }
+    pub fn timeline_id(&self) -> TimelineId {
+        match self {
+            TimelineOrOffloaded::Timeline(timeline) => timeline.timeline_id,
+            TimelineOrOffloaded::Offloaded(offloaded) => offloaded.timeline_id,
+        }
+    }
+    pub fn delete_progress(&self) -> &Arc<tokio::sync::Mutex<DeleteTimelineFlow>> {
+        match self {
+            TimelineOrOffloaded::Timeline(timeline) => &timeline.delete_progress,
+            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.delete_progress,
+        }
+    }
+    pub fn remote_client(&self) -> &Arc<RemoteTimelineClient> {
+        match self {
+            TimelineOrOffloaded::Timeline(timeline) => &timeline.remote_client,
+            TimelineOrOffloaded::Offloaded(offloaded) => &offloaded.remote_client,
+        }
+    }
+}
+
 #[derive(Debug, thiserror::Error, PartialEq, Eq)]
 pub enum GetTimelineError {
    #[error("Timeline is shutting down")]
@@ -1406,52 +1470,192 @@ impl Tenant {
        }
    }

-    pub(crate) async fn apply_timeline_archival_config(
-        &self,
+    fn check_to_be_archived_has_no_unarchived_children(
        timeline_id: TimelineId,
-        state: TimelineArchivalState,
+        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
+    ) -> Result<(), TimelineArchivalError> {
+        let children: Vec<TimelineId> = timelines
+            .iter()
+            .filter_map(|(id, entry)| {
+                if entry.get_ancestor_timeline_id() != Some(timeline_id) {
+                    return None;
+                }
+                if entry.is_archived() == Some(true) {
+                    return None;
+                }
+                Some(*id)
+            })
+            .collect();
+
+        if !children.is_empty() {
+            return Err(TimelineArchivalError::HasUnarchivedChildren(children));
+        }
+        Ok(())
+    }
+
+    fn check_ancestor_of_to_be_unarchived_is_not_archived(
+        ancestor_timeline_id: TimelineId,
+        timelines: &std::sync::MutexGuard<'_, HashMap<TimelineId, Arc<Timeline>>>,
+        offloaded_timelines: &std::sync::MutexGuard<
+            '_,
+            HashMap<TimelineId, Arc<OffloadedTimeline>>,
+        >,
+    ) -> Result<(), TimelineArchivalError> {
+        let has_archived_parent =
+            if let Some(ancestor_timeline) = timelines.get(&ancestor_timeline_id) {
+                ancestor_timeline.is_archived() == Some(true)
+            } else if offloaded_timelines.contains_key(&ancestor_timeline_id) {
+                true
+            } else {
+                error!("ancestor timeline {ancestor_timeline_id} not found");
+                if cfg!(debug_assertions) {
+                    panic!("ancestor timeline {ancestor_timeline_id} not found");
+                }
+                return Err(TimelineArchivalError::NotFound);
+            };
+        if has_archived_parent {
+            return Err(TimelineArchivalError::HasArchivedParent(
+                ancestor_timeline_id,
+            ));
+        }
+        Ok(())
+    }
+
+    fn check_to_be_unarchived_timeline_has_no_archived_parent(
+        timeline: &Arc<Timeline>,
+    ) -> Result<(), TimelineArchivalError> {
+        if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
+            if ancestor_timeline.is_archived() == Some(true) {
+                return Err(TimelineArchivalError::HasArchivedParent(
+                    ancestor_timeline.timeline_id,
+                ));
+            }
+        }
+        Ok(())
+    }
+
+    /// Loads the specified (offloaded) timeline from S3 and attaches it as a loaded timeline
+    async fn unoffload_timeline(
+        self: &Arc<Self>,
+        timeline_id: TimelineId,
+        ctx: RequestContext,
+    ) -> Result<Arc<Timeline>, TimelineArchivalError> {
+        let cancel = self.cancel.clone();
+        let timeline_preload = self
+            .load_timeline_metadata(timeline_id, self.remote_storage.clone(), cancel)
+            .await;
+
+        let index_part = match timeline_preload.index_part {
+            Ok(index_part) => {
+                debug!("remote index part exists for timeline {timeline_id}");
+                index_part
+            }
+            Err(DownloadError::NotFound) => {
+                error!(%timeline_id, "index_part not found on remote");
+                return Err(TimelineArchivalError::NotFound);
+            }
+            Err(e) => {
+                // Some (possibly ephemeral) error happened during index_part download.
+                warn!(%timeline_id, "Failed to load index_part from remote storage, failed creation? ({e})");
+                return Err(TimelineArchivalError::Other(
+                    anyhow::Error::new(e).context("downloading index_part from remote storage"),
+                ));
+            }
+        };
+        let index_part = match index_part {
+            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
+            MaybeDeletedIndexPart::Deleted(_index_part) => {
+                info!("timeline is deleted according to index_part.json");
+                return Err(TimelineArchivalError::NotFound);
+            }
+        };
+        let remote_metadata = index_part.metadata.clone();
+        let timeline_resources = self.build_timeline_resources(timeline_id);
+        self.load_remote_timeline(
+            timeline_id,
+            index_part,
+            remote_metadata,
+            timeline_resources,
+            &ctx,
+        )
+        .await
+        .with_context(|| {
+            format!(
+                "failed to load remote timeline {} for tenant {}",
+                timeline_id, self.tenant_shard_id
+            )
+        })?;
+        let timelines = self.timelines.lock().unwrap();
+        if let Some(timeline) = timelines.get(&timeline_id) {
+            let mut offloaded_timelines = self.timelines_offloaded.lock().unwrap();
+            if offloaded_timelines.remove(&timeline_id).is_none() {
+                warn!("timeline already removed from offloaded timelines");
+            }
+            Ok(Arc::clone(timeline))
+        } else {
+            warn!("timeline not available directly after attach");
+            Err(TimelineArchivalError::Other(anyhow::anyhow!(
+                "timeline not available directly after attach"
+            )))
+        }
+    }
+
+    pub(crate) async fn apply_timeline_archival_config(
+        self: &Arc<Self>,
+        timeline_id: TimelineId,
+        new_state: TimelineArchivalState,
+        ctx: RequestContext,
    ) -> Result<(), TimelineArchivalError> {
        info!("setting timeline archival config");
-        let timeline = {
+        // First part: figure out what is needed to do, and do validation
+        let timeline_or_unarchive_offloaded = 'outer: {
            let timelines = self.timelines.lock().unwrap();

            let Some(timeline) = timelines.get(&timeline_id) else {
-                return Err(TimelineArchivalError::NotFound);
+                let offloaded_timelines = self.timelines_offloaded.lock().unwrap();
+                let Some(offloaded) = offloaded_timelines.get(&timeline_id) else {
+                    return Err(TimelineArchivalError::NotFound);
+                };
+                if new_state == TimelineArchivalState::Archived {
+                    // It's offloaded already, so nothing to do
+                    return Ok(());
+                }
+                if let Some(ancestor_timeline_id) = offloaded.ancestor_timeline_id {
+                    Self::check_ancestor_of_to_be_unarchived_is_not_archived(
+                        ancestor_timeline_id,
+                        &timelines,
+                        &offloaded_timelines,
+                    )?;
+                }
+                break 'outer None;
            };

-            if state == TimelineArchivalState::Unarchived {
-                if let Some(ancestor_timeline) = timeline.ancestor_timeline() {
-                    if ancestor_timeline.is_archived() == Some(true) {
-                        return Err(TimelineArchivalError::HasArchivedParent(
-                            ancestor_timeline.timeline_id,
-                        ));
-                    }
+            // Do some validation. We release the timelines lock below, so there is potential
+            // for race conditions: these checks are more present to prevent misunderstandings of
+            // the API's capabilities, instead of serving as the sole way to defend their invariants.
+            match new_state {
+                TimelineArchivalState::Unarchived => {
+                    Self::check_to_be_unarchived_timeline_has_no_archived_parent(timeline)?
+                }
+                TimelineArchivalState::Archived => {
+                    Self::check_to_be_archived_has_no_unarchived_children(timeline_id, &timelines)?
                }
            }
-
-            // Ensure that there are no non-archived child timelines
-            let children: Vec<TimelineId> = timelines
-                .iter()
-                .filter_map(|(id, entry)| {
-                    if entry.get_ancestor_timeline_id() != Some(timeline_id) {
-                        return None;
-                    }
-                    if entry.is_archived() == Some(true) {
-                        return None;
-                    }
-                    Some(*id)
-                })
-                .collect();
-
-            if !children.is_empty() && state == TimelineArchivalState::Archived {
-                return Err(TimelineArchivalError::HasUnarchivedChildren(children));
-            }
-            Arc::clone(timeline)
+            Some(Arc::clone(timeline))
        };

+        // Second part: unarchive timeline (if needed)
+        let timeline = if let Some(timeline) = timeline_or_unarchive_offloaded {
+            timeline
+        } else {
+            // Turn offloaded timeline into a non-offloaded one
+            self.unoffload_timeline(timeline_id, ctx).await?
+        };
+
+        // Third part: upload new timeline archival state and block until it is present in S3
        let upload_needed = timeline
            .remote_client
-            .schedule_index_upload_for_timeline_archival_state(state)?;
+            .schedule_index_upload_for_timeline_archival_state(new_state)?;

        if upload_needed {
            info!("Uploading new state");
@@ -1884,7 +2088,7 @@ impl Tenant {
    ///
    /// Returns whether we have pending compaction task.
    async fn compaction_iteration(
-        &self,
+        self: &Arc<Self>,
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> Result<bool, timeline::CompactionError> {
@@ -1905,21 +2109,28 @@ impl Tenant {
        // while holding the lock. Then drop the lock and actually perform the
        // compactions.  We don't want to block everything else while the
        // compaction runs.
-        let timelines_to_compact = {
+        let timelines_to_compact_or_offload;
+        {
            let timelines = self.timelines.lock().unwrap();
-            let timelines_to_compact = timelines
+            timelines_to_compact_or_offload = timelines
                .iter()
                .filter_map(|(timeline_id, timeline)| {
-                    if timeline.is_active() {
-                        Some((*timeline_id, timeline.clone()))
-                    } else {
+                    let (is_active, can_offload) = (timeline.is_active(), timeline.can_offload());
+                    let has_no_unoffloaded_children = {
+                        !timelines
+                            .iter()
+                            .any(|(_id, tl)| tl.get_ancestor_timeline_id() == Some(*timeline_id))
+                    };
+                    let can_offload = can_offload && has_no_unoffloaded_children;
+                    if (is_active, can_offload) == (false, false) {
                        None
+                    } else {
+                        Some((*timeline_id, timeline.clone(), (is_active, can_offload)))
                    }
                })
                .collect::<Vec<_>>();
            drop(timelines);
-            timelines_to_compact
-        };
+        }

        // Before doing any I/O work, check our circuit breaker
        if self.compaction_circuit_breaker.lock().unwrap().is_broken() {
@@ -1929,20 +2140,34 @@ impl Tenant {

        let mut has_pending_task = false;

-        for (timeline_id, timeline) in &timelines_to_compact {
-            has_pending_task |= timeline
-                .compact(cancel, EnumSet::empty(), ctx)
-                .instrument(info_span!("compact_timeline", %timeline_id))
-                .await
-                .inspect_err(|e| match e {
-                    timeline::CompactionError::ShuttingDown => (),
-                    timeline::CompactionError::Other(e) => {
-                        self.compaction_circuit_breaker
-                            .lock()
-                            .unwrap()
-                            .fail(&CIRCUIT_BREAKERS_BROKEN, e);
-                    }
-                })?;
+        for (timeline_id, timeline, (can_compact, can_offload)) in &timelines_to_compact_or_offload
+        {
+            let pending_task_left = if *can_compact {
+                Some(
+                    timeline
+                        .compact(cancel, EnumSet::empty(), ctx)
+                        .instrument(info_span!("compact_timeline", %timeline_id))
+                        .await
+                        .inspect_err(|e| match e {
+                            timeline::CompactionError::ShuttingDown => (),
+                            timeline::CompactionError::Other(e) => {
+                                self.compaction_circuit_breaker
+                                    .lock()
+                                    .unwrap()
+                                    .fail(&CIRCUIT_BREAKERS_BROKEN, e);
+                            }
+                        })?,
+                )
+            } else {
+                None
+            };
+            has_pending_task |= pending_task_left.unwrap_or(false);
+            if pending_task_left == Some(false) && *can_offload {
+                offload_timeline(self, timeline)
+                    .instrument(info_span!("offload_timeline", %timeline_id))
+                    .await
+                    .map_err(timeline::CompactionError::Other)?;
+            }
        }

        self.compaction_circuit_breaker
@@ -2852,6 +3077,7 @@ impl Tenant {
            constructed_at: Instant::now(),
            timelines: Mutex::new(HashMap::new()),
            timelines_creating: Mutex::new(HashSet::new()),
+            timelines_offloaded: Mutex::new(HashMap::new()),
            gc_cs: tokio::sync::Mutex::new(()),
            walredo_mgr,
            remote_storage,
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -84,7 +84,7 @@ impl Drop for EphemeralFile {
    fn drop(&mut self) {
        // unlink the file
        // we are clear to do this, because we have entered a gate
-        let path = &self.buffered_writer.as_inner().as_inner().path;
+        let path = self.buffered_writer.as_inner().as_inner().path();
        let res = std::fs::remove_file(path);
        if let Err(e) = res {
            if e.kind() != std::io::ErrorKind::NotFound {
@@ -356,7 +356,7 @@ mod tests {
        }

        let file_contents =
-            std::fs::read(&file.buffered_writer.as_inner().as_inner().path).unwrap();
+            std::fs::read(file.buffered_writer.as_inner().as_inner().path()).unwrap();
        assert_eq!(file_contents, &content[0..cap]);

        let buffer_contents = file.buffered_writer.inspect_buffer();
@@ -392,7 +392,7 @@ mod tests {
            .buffered_writer
            .as_inner()
            .as_inner()
-            .path
+            .path()
            .metadata()
            .unwrap();
        assert_eq!(
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -141,14 +141,14 @@ impl GcBlock {
        Ok(())
    }

-    pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
+    pub(crate) fn before_delete(&self, timeline_id: &super::TimelineId) {
        let unblocked = {
            let mut g = self.reasons.lock().unwrap();
            if g.is_empty() {
                return;
            }

-            g.remove(&timeline.timeline_id);
+            g.remove(timeline_id);

            BlockingReasons::clean_and_summarize(g).is_none()
        };
--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -950,6 +950,7 @@ impl<'a> TenantDownloader<'a> {
        let cancel = &self.secondary_state.cancel;
        let opts = DownloadOpts {
            etag: prev_etag.cloned(),
+            ..Default::default()
        };

        backoff::retry(
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -573,7 +573,7 @@ impl DeltaLayerWriterInner {
        ensure!(
            metadata.len() <= S3_UPLOAD_LIMIT,
            "Created delta layer file at {} of size {} above limit {S3_UPLOAD_LIMIT}!",
-            file.path,
+            file.path(),
            metadata.len()
        );

@@ -791,7 +791,7 @@ impl DeltaLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
-        let file = VirtualFile::open(path, ctx)
+        let file = VirtualFile::open_v2(path, ctx)
            .await
            .context("open layer file")?;

@@ -1022,7 +1022,7 @@ impl DeltaLayerInner {
                            blob_meta.key,
                            PageReconstructError::Other(anyhow!(
                                "Failed to read blobs from virtual file {}: {}",
-                                self.file.path,
+                                self.file.path(),
                                kind
                            )),
                        );
@@ -1048,7 +1048,7 @@ impl DeltaLayerInner {
                            meta.meta.key,
                            PageReconstructError::Other(anyhow!(e).context(format!(
                                "Failed to decompress blob from virtual file {}",
-                                self.file.path,
+                                self.file.path(),
                            ))),
                        );

@@ -1066,7 +1066,7 @@ impl DeltaLayerInner {
                            meta.meta.key,
                            PageReconstructError::Other(anyhow!(e).context(format!(
                                "Failed to deserialize blob from virtual file {}",
-                                self.file.path,
+                                self.file.path(),
                            ))),
                        );

@@ -1198,7 +1198,6 @@ impl DeltaLayerInner {
        let mut prev: Option<(Key, Lsn, BlobRef)> = None;

        let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;
-        let align = virtual_file::get_io_buffer_alignment();

        let max_read_size = self
            .max_vectored_read_bytes
@@ -1247,7 +1246,6 @@ impl DeltaLayerInner {
                        offsets.end.pos(),
                        meta,
                        max_read_size,
-                        align,
                    ))
                }
            } else {
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -389,7 +389,7 @@ impl ImageLayerInner {
        max_vectored_read_bytes: Option<MaxVectoredReadBytes>,
        ctx: &RequestContext,
    ) -> anyhow::Result<Self> {
-        let file = VirtualFile::open(path, ctx)
+        let file = VirtualFile::open_v2(path, ctx)
            .await
            .context("open layer file")?;
        let file_id = page_cache::next_file_id();
@@ -626,7 +626,7 @@ impl ImageLayerInner {
                                    meta.meta.key,
                                    PageReconstructError::Other(anyhow!(e).context(format!(
                                        "Failed to decompress blob from virtual file {}",
-                                        self.file.path,
+                                        self.file.path(),
                                    ))),
                                );

@@ -647,7 +647,7 @@ impl ImageLayerInner {
                            blob_meta.key,
                            PageReconstructError::from(anyhow!(
                                "Failed to read blobs from virtual file {}: {}",
-                                self.file.path,
+                                self.file.path(),
                                kind
                            )),
                        );
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -7,6 +7,7 @@ pub(crate) mod handle;
 mod init;
 pub mod layer_manager;
 pub(crate) mod logical_size;
+pub mod offload;
 pub mod span;
 pub mod uninit;
 mod walreceiver;
@@ -1556,6 +1557,17 @@ impl Timeline {
        }
    }

+    /// Checks if the internal state of the timeline is consistent with it being able to be offloaded.
+    /// This is neccessary but not sufficient for offloading of the timeline as it might have
+    /// child timelines that are not offloaded yet.
+    pub(crate) fn can_offload(&self) -> bool {
+        if self.remote_client.is_archived() != Some(true) {
+            return false;
+        }
+
+        true
+    }
+
    /// Outermost timeline compaction operation; downloads needed layers. Returns whether we have pending
    /// compaction tasks.
    pub(crate) async fn compact(
@@ -1818,7 +1830,6 @@ impl Timeline {
        self.current_state() == TimelineState::Active
    }

-    #[allow(unused)]
    pub(crate) fn is_archived(&self) -> Option<bool> {
        self.remote_client.is_archived()
    }
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -15,7 +15,7 @@ use crate::{
    tenant::{
        metadata::TimelineMetadata,
        remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient},
-        CreateTimelineCause, DeleteTimelineError, Tenant,
+        CreateTimelineCause, DeleteTimelineError, Tenant, TimelineOrOffloaded,
    },
 };

@@ -24,12 +24,14 @@ use super::{Timeline, TimelineResources};
 /// Mark timeline as deleted in S3 so we won't pick it up next time
 /// during attach or pageserver restart.
 /// See comment in persist_index_part_with_deleted_flag.
-async fn set_deleted_in_remote_index(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
-    match timeline
-        .remote_client
+async fn set_deleted_in_remote_index(
+    timeline: &TimelineOrOffloaded,
+) -> Result<(), DeleteTimelineError> {
+    let res = timeline
+        .remote_client()
        .persist_index_part_with_deleted_flag()
-        .await
-    {
+        .await;
+    match res {
        // If we (now, or already) marked it successfully as deleted, we can proceed
        Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
        // Bail out otherwise
@@ -127,9 +129,9 @@ pub(super) async fn delete_local_timeline_directory(
 }

 /// Removes remote layers and an index file after them.
-async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<()> {
+async fn delete_remote_layers_and_index(timeline: &TimelineOrOffloaded) -> anyhow::Result<()> {
    timeline
-        .remote_client
+        .remote_client()
        .delete_all()
        .await
        .context("delete_all")
@@ -137,27 +139,41 @@ async fn delete_remote_layers_and_index(timeline: &Timeline) -> anyhow::Result<(

 /// It is important that this gets called when DeletionGuard is being held.
 /// For more context see comments in [`DeleteTimelineFlow::prepare`]
-async fn remove_timeline_from_tenant(
+async fn remove_maybe_offloaded_timeline_from_tenant(
    tenant: &Tenant,
-    timeline: &Timeline,
+    timeline: &TimelineOrOffloaded,
    _: &DeletionGuard, // using it as a witness
 ) -> anyhow::Result<()> {
    // Remove the timeline from the map.
+    // This observes the locking order between timelines and timelines_offloaded
    let mut timelines = tenant.timelines.lock().unwrap();
+    let mut timelines_offloaded = tenant.timelines_offloaded.lock().unwrap();
+    let offloaded_children_exist = timelines_offloaded
+        .iter()
+        .any(|(_, entry)| entry.ancestor_timeline_id == Some(timeline.timeline_id()));
    let children_exist = timelines
        .iter()
-        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
-    // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
-    // We already deleted the layer files, so it's probably best to panic.
-    // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
-    if children_exist {
+        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id()));
+    // XXX this can happen because of race conditions with branch creation.
+    // We already deleted the remote layer files, so it's probably best to panic.
+    if children_exist || offloaded_children_exist {
        panic!("Timeline grew children while we removed layer files");
    }

-    timelines
-        .remove(&timeline.timeline_id)
-        .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
+    match timeline {
+        TimelineOrOffloaded::Timeline(timeline) => {
+            timelines.remove(&timeline.timeline_id).expect(
+                "timeline that we were deleting was concurrently removed from 'timelines' map",
+            );
+        }
+        TimelineOrOffloaded::Offloaded(timeline) => {
+            timelines_offloaded
+                .remove(&timeline.timeline_id)
+                .expect("timeline that we were deleting was concurrently removed from 'timelines_offloaded' map");
+        }
+    }

+    drop(timelines_offloaded);
    drop(timelines);

    Ok(())
@@ -207,9 +223,11 @@ impl DeleteTimelineFlow {
        guard.mark_in_progress()?;

        // Now that the Timeline is in Stopping state, request all the related tasks to shut down.
-        timeline.shutdown(super::ShutdownMode::Hard).await;
+        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
+            timeline.shutdown(super::ShutdownMode::Hard).await;
+        }

-        tenant.gc_block.before_delete(&timeline);
+        tenant.gc_block.before_delete(&timeline.timeline_id());

        fail::fail_point!("timeline-delete-before-index-deleted-at", |_| {
            Err(anyhow::anyhow!(
@@ -285,15 +303,16 @@ impl DeleteTimelineFlow {

        guard.mark_in_progress()?;

+        let timeline = TimelineOrOffloaded::Timeline(timeline);
        Self::schedule_background(guard, tenant.conf, tenant, timeline);

        Ok(())
    }

-    fn prepare(
+    pub(super) fn prepare(
        tenant: &Tenant,
        timeline_id: TimelineId,
-    ) -> Result<(Arc<Timeline>, DeletionGuard), DeleteTimelineError> {
+    ) -> Result<(TimelineOrOffloaded, DeletionGuard), DeleteTimelineError> {
        // Note the interaction between this guard and deletion guard.
        // Here we attempt to lock deletion guard when we're holding a lock on timelines.
        // This is important because when you take into account `remove_timeline_from_tenant`
@@ -307,8 +326,14 @@ impl DeleteTimelineFlow {
        let timelines = tenant.timelines.lock().unwrap();

        let timeline = match timelines.get(&timeline_id) {
-            Some(t) => t,
-            None => return Err(DeleteTimelineError::NotFound),
+            Some(t) => TimelineOrOffloaded::Timeline(Arc::clone(t)),
+            None => {
+                let offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
+                match offloaded_timelines.get(&timeline_id) {
+                    Some(t) => TimelineOrOffloaded::Offloaded(Arc::clone(t)),
+                    None => return Err(DeleteTimelineError::NotFound),
+                }
+            }
        };

        // Ensure that there are no child timelines **attached to that pageserver**,
@@ -334,30 +359,32 @@ impl DeleteTimelineFlow {
        // to remove the timeline from it.
        // Always if you have two locks that are taken in different order this can result in a deadlock.

-        let delete_progress = Arc::clone(&timeline.delete_progress);
+        let delete_progress = Arc::clone(timeline.delete_progress());
        let delete_lock_guard = match delete_progress.try_lock_owned() {
            Ok(guard) => DeletionGuard(guard),
            Err(_) => {
                // Unfortunately if lock fails arc is consumed.
                return Err(DeleteTimelineError::AlreadyInProgress(Arc::clone(
-                    &timeline.delete_progress,
+                    timeline.delete_progress(),
                )));
            }
        };

-        timeline.set_state(TimelineState::Stopping);
+        if let TimelineOrOffloaded::Timeline(timeline) = &timeline {
+            timeline.set_state(TimelineState::Stopping);
+        }

-        Ok((Arc::clone(timeline), delete_lock_guard))
+        Ok((timeline, delete_lock_guard))
    }

    fn schedule_background(
        guard: DeletionGuard,
        conf: &'static PageServerConf,
        tenant: Arc<Tenant>,
-        timeline: Arc<Timeline>,
+        timeline: TimelineOrOffloaded,
    ) {
-        let tenant_shard_id = timeline.tenant_shard_id;
-        let timeline_id = timeline.timeline_id;
+        let tenant_shard_id = timeline.tenant_shard_id();
+        let timeline_id = timeline.timeline_id();

        task_mgr::spawn(
            task_mgr::BACKGROUND_RUNTIME.handle(),
@@ -368,7 +395,9 @@ impl DeleteTimelineFlow {
            async move {
                if let Err(err) = Self::background(guard, conf, &tenant, &timeline).await {
                    error!("Error: {err:#}");
-                    timeline.set_broken(format!("{err:#}"))
+                    if let TimelineOrOffloaded::Timeline(timeline) = timeline {
+                        timeline.set_broken(format!("{err:#}"))
+                    }
                };
                Ok(())
            }
@@ -380,15 +409,19 @@ impl DeleteTimelineFlow {
        mut guard: DeletionGuard,
        conf: &PageServerConf,
        tenant: &Tenant,
-        timeline: &Timeline,
+        timeline: &TimelineOrOffloaded,
    ) -> Result<(), DeleteTimelineError> {
-        delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
+        // Offloaded timelines have no local state
+        // TODO: once we persist offloaded information, delete the timeline from there, too
+        if let TimelineOrOffloaded::Timeline(timeline) = timeline {
+            delete_local_timeline_directory(conf, tenant.tenant_shard_id, timeline).await?;
+        }

        delete_remote_layers_and_index(timeline).await?;

        pausable_failpoint!("in_progress_delete");

-        remove_timeline_from_tenant(tenant, timeline, &guard).await?;
+        remove_maybe_offloaded_timeline_from_tenant(tenant, timeline, &guard).await?;

        *guard = Self::Finished;

@@ -400,7 +433,7 @@ impl DeleteTimelineFlow {
    }
 }

-struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
+pub(super) struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);

 impl Deref for DeletionGuard {
    type Target = DeleteTimelineFlow;
--- a/pageserver/src/tenant/timeline/offload.rs
+++ b/pageserver/src/tenant/timeline/offload.rs
@@ -0,0 +1,69 @@
+use std::sync::Arc;
+
+use crate::tenant::{OffloadedTimeline, Tenant, TimelineOrOffloaded};
+
+use super::{
+    delete::{delete_local_timeline_directory, DeleteTimelineFlow, DeletionGuard},
+    Timeline,
+};
+
+pub(crate) async fn offload_timeline(
+    tenant: &Tenant,
+    timeline: &Arc<Timeline>,
+) -> anyhow::Result<()> {
+    tracing::info!("offloading archived timeline");
+    let (timeline, guard) = DeleteTimelineFlow::prepare(tenant, timeline.timeline_id)?;
+
+    let TimelineOrOffloaded::Timeline(timeline) = timeline else {
+        tracing::error!("timeline already offloaded, but given timeline object");
+        return Ok(());
+    };
+
+    // TODO extend guard mechanism above with method
+    // to make deletions possible while offloading is in progress
+
+    // TODO mark timeline as offloaded in S3
+
+    let conf = &tenant.conf;
+    delete_local_timeline_directory(conf, tenant.tenant_shard_id, &timeline).await?;
+
+    remove_timeline_from_tenant(tenant, &timeline, &guard).await?;
+
+    {
+        let mut offloaded_timelines = tenant.timelines_offloaded.lock().unwrap();
+        offloaded_timelines.insert(
+            timeline.timeline_id,
+            Arc::new(OffloadedTimeline::from_timeline(&timeline)),
+        );
+    }
+
+    Ok(())
+}
+
+/// It is important that this gets called when DeletionGuard is being held.
+/// For more context see comments in [`DeleteTimelineFlow::prepare`]
+async fn remove_timeline_from_tenant(
+    tenant: &Tenant,
+    timeline: &Timeline,
+    _: &DeletionGuard, // using it as a witness
+) -> anyhow::Result<()> {
+    // Remove the timeline from the map.
+    let mut timelines = tenant.timelines.lock().unwrap();
+    let children_exist = timelines
+        .iter()
+        .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline.timeline_id));
+    // XXX this can happen because `branch_timeline` doesn't check `TimelineState::Stopping`.
+    // We already deleted the layer files, so it's probably best to panic.
+    // (Ideally, above remove_dir_all is atomic so we don't see this timeline after a restart)
+    if children_exist {
+        panic!("Timeline grew children while we removed layer files");
+    }
+
+    timelines
+        .remove(&timeline.timeline_id)
+        .expect("timeline that we were deleting was concurrently removed from 'timelines' map");
+
+    drop(timelines);
+
+    Ok(())
+}
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -194,8 +194,6 @@ pub(crate) struct ChunkedVectoredReadBuilder {
    /// Start offset and metadata for each blob in this read
    blobs_at: VecMap<u64, BlobMeta>,
    max_read_size: Option<usize>,
-    /// Chunk size reads are coalesced into.
-    chunk_size: usize,
 }

 /// Computes x / d rounded up.
@@ -204,6 +202,7 @@ fn div_round_up(x: usize, d: usize) -> usize {
 }

 impl ChunkedVectoredReadBuilder {
+    const CHUNK_SIZE: usize = virtual_file::get_io_buffer_alignment();
    /// Start building a new vectored read.
    ///
    /// Note that by design, this does not check against reading more than `max_read_size` to
@@ -214,21 +213,19 @@ impl ChunkedVectoredReadBuilder {
        end_offset: u64,
        meta: BlobMeta,
        max_read_size: Option<usize>,
-        chunk_size: usize,
    ) -> Self {
        let mut blobs_at = VecMap::default();
        blobs_at
            .append(start_offset, meta)
            .expect("First insertion always succeeds");

-        let start_blk_no = start_offset as usize / chunk_size;
-        let end_blk_no = div_round_up(end_offset as usize, chunk_size);
+        let start_blk_no = start_offset as usize / Self::CHUNK_SIZE;
+        let end_blk_no = div_round_up(end_offset as usize, Self::CHUNK_SIZE);
        Self {
            start_blk_no,
            end_blk_no,
            blobs_at,
            max_read_size,
-            chunk_size,
        }
    }

@@ -237,18 +234,12 @@ impl ChunkedVectoredReadBuilder {
        end_offset: u64,
        meta: BlobMeta,
        max_read_size: usize,
-        align: usize,
    ) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), align)
+        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size))
    }

-    pub(crate) fn new_streaming(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        align: usize,
-    ) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, None, align)
+    pub(crate) fn new_streaming(start_offset: u64, end_offset: u64, meta: BlobMeta) -> Self {
+        Self::new_impl(start_offset, end_offset, meta, None)
    }

    /// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.
@@ -256,12 +247,12 @@ impl ChunkedVectoredReadBuilder {
    /// The resulting size also must be below the max read size.
    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
        tracing::trace!(start, end, "trying to extend");
-        let start_blk_no = start as usize / self.chunk_size;
-        let end_blk_no = div_round_up(end as usize, self.chunk_size);
+        let start_blk_no = start as usize / Self::CHUNK_SIZE;
+        let end_blk_no = div_round_up(end as usize, Self::CHUNK_SIZE);

        let not_limited_by_max_read_size = {
            if let Some(max_read_size) = self.max_read_size {
-                let coalesced_size = (end_blk_no - self.start_blk_no) * self.chunk_size;
+                let coalesced_size = (end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE;
                coalesced_size <= max_read_size
            } else {
                true
@@ -292,12 +283,12 @@ impl ChunkedVectoredReadBuilder {
    }

    pub(crate) fn size(&self) -> usize {
-        (self.end_blk_no - self.start_blk_no) * self.chunk_size
+        (self.end_blk_no - self.start_blk_no) * Self::CHUNK_SIZE
    }

    pub(crate) fn build(self) -> VectoredRead {
-        let start = (self.start_blk_no * self.chunk_size) as u64;
-        let end = (self.end_blk_no * self.chunk_size) as u64;
+        let start = (self.start_blk_no * Self::CHUNK_SIZE) as u64;
+        let end = (self.end_blk_no * Self::CHUNK_SIZE) as u64;
        VectoredRead {
            start,
            end,
@@ -328,18 +319,14 @@ pub struct VectoredReadPlanner {
    prev: Option<(Key, Lsn, u64, BlobFlag)>,

    max_read_size: usize,
-
-    align: usize,
 }

 impl VectoredReadPlanner {
    pub fn new(max_read_size: usize) -> Self {
-        let align = virtual_file::get_io_buffer_alignment();
        Self {
            blobs: BTreeMap::new(),
            prev: None,
            max_read_size,
-            align,
        }
    }

@@ -418,7 +405,6 @@ impl VectoredReadPlanner {
                        end_offset,
                        BlobMeta { key, lsn },
                        self.max_read_size,
-                        self.align,
                    );

                    let prev_read_builder = current_read_builder.replace(next_read_builder);
@@ -472,13 +458,13 @@ impl<'a> VectoredBlobReader<'a> {
        );

        if cfg!(debug_assertions) {
-            let align = virtual_file::get_io_buffer_alignment() as u64;
+            const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
            debug_assert_eq!(
-                read.start % align,
+                read.start % ALIGN,
                0,
                "Read start at {} does not satisfy the required io buffer alignment ({} bytes)",
                read.start,
-                align
+                ALIGN
            );
        }

@@ -553,22 +539,18 @@ pub struct StreamingVectoredReadPlanner {
    max_cnt: usize,
    /// Size of the current batch
    cnt: usize,
-
-    align: usize,
 }

 impl StreamingVectoredReadPlanner {
    pub fn new(max_read_size: u64, max_cnt: usize) -> Self {
        assert!(max_cnt > 0);
        assert!(max_read_size > 0);
-        let align = virtual_file::get_io_buffer_alignment();
        Self {
            read_builder: None,
            prev: None,
            max_cnt,
            max_read_size,
            cnt: 0,
-            align,
        }
    }

@@ -621,7 +603,6 @@ impl StreamingVectoredReadPlanner {
                        start_offset,
                        end_offset,
                        BlobMeta { key, lsn },
-                        self.align,
                    ))
                };
            }
@@ -656,9 +637,9 @@ mod tests {
    use super::*;

    fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) {
-        let align = virtual_file::get_io_buffer_alignment() as u64;
-        assert_eq!(read.start % align, 0);
-        assert_eq!(read.start / align, offset_range.first().unwrap().2 / align);
+        const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64;
+        assert_eq!(read.start % ALIGN, 0);
+        assert_eq!(read.start / ALIGN, offset_range.first().unwrap().2 / ALIGN);

        let expected_offsets_in_read: Vec<_> = offset_range.iter().map(|o| o.2).collect();

@@ -676,32 +657,27 @@ mod tests {
    fn planner_chunked_coalesce_all_test() {
        use crate::virtual_file;

-        let chunk_size = virtual_file::get_io_buffer_alignment() as u64;
+        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;

-        // The test explicitly does not check chunk size < 512
-        if chunk_size < 512 {
-            return;
-        }
-
-        let max_read_size = chunk_size as usize * 8;
+        let max_read_size = CHUNK_SIZE as usize * 8;
        let key = Key::MIN;
        let lsn = Lsn(0);

        let blob_descriptions = [
-            (key, lsn, chunk_size / 8, BlobFlag::None), // Read 1 BEGIN
-            (key, lsn, chunk_size / 4, BlobFlag::Ignore), // Gap
-            (key, lsn, chunk_size / 2, BlobFlag::None),
-            (key, lsn, chunk_size - 2, BlobFlag::Ignore), // Gap
-            (key, lsn, chunk_size, BlobFlag::None),
-            (key, lsn, chunk_size * 2 - 1, BlobFlag::None),
-            (key, lsn, chunk_size * 2 + 1, BlobFlag::Ignore), // Gap
-            (key, lsn, chunk_size * 3 + 1, BlobFlag::None),
-            (key, lsn, chunk_size * 5 + 1, BlobFlag::None),
-            (key, lsn, chunk_size * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
-            (key, lsn, chunk_size * 7 + 1, BlobFlag::None),
-            (key, lsn, chunk_size * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
-            (key, lsn, chunk_size * 9, BlobFlag::Ignore), // ==== skipped a chunk
-            (key, lsn, chunk_size * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
+            (key, lsn, CHUNK_SIZE / 8, BlobFlag::None), // Read 1 BEGIN
+            (key, lsn, CHUNK_SIZE / 4, BlobFlag::Ignore), // Gap
+            (key, lsn, CHUNK_SIZE / 2, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE - 2, BlobFlag::Ignore), // Gap
+            (key, lsn, CHUNK_SIZE, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE * 2 - 1, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE * 2 + 1, BlobFlag::Ignore), // Gap
+            (key, lsn, CHUNK_SIZE * 3 + 1, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE * 5 + 1, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE * 6 + 1, BlobFlag::Ignore), // skipped chunk size, but not a chunk: should coalesce.
+            (key, lsn, CHUNK_SIZE * 7 + 1, BlobFlag::None),
+            (key, lsn, CHUNK_SIZE * 8, BlobFlag::None), // Read 2 BEGIN (b/c max_read_size)
+            (key, lsn, CHUNK_SIZE * 9, BlobFlag::Ignore), // ==== skipped a chunk
+            (key, lsn, CHUNK_SIZE * 10, BlobFlag::None), // Read 3 BEGIN (cannot coalesce)
        ];

        let ranges = [
@@ -780,19 +756,19 @@ mod tests {

    #[test]
    fn planner_replacement_test() {
-        let chunk_size = virtual_file::get_io_buffer_alignment() as u64;
-        let max_read_size = 128 * chunk_size as usize;
+        const CHUNK_SIZE: u64 = virtual_file::get_io_buffer_alignment() as u64;
+        let max_read_size = 128 * CHUNK_SIZE as usize;
        let first_key = Key::MIN;
        let second_key = first_key.next();
        let lsn = Lsn(0);

        let blob_descriptions = vec![
            (first_key, lsn, 0, BlobFlag::None),          // First in read 1
-            (first_key, lsn, chunk_size, BlobFlag::None), // Last in read 1
-            (second_key, lsn, 2 * chunk_size, BlobFlag::ReplaceAll),
-            (second_key, lsn, 3 * chunk_size, BlobFlag::None),
-            (second_key, lsn, 4 * chunk_size, BlobFlag::ReplaceAll), // First in read 2
-            (second_key, lsn, 5 * chunk_size, BlobFlag::None),       // Last in read 2
+            (first_key, lsn, CHUNK_SIZE, BlobFlag::None), // Last in read 1
+            (second_key, lsn, 2 * CHUNK_SIZE, BlobFlag::ReplaceAll),
+            (second_key, lsn, 3 * CHUNK_SIZE, BlobFlag::None),
+            (second_key, lsn, 4 * CHUNK_SIZE, BlobFlag::ReplaceAll), // First in read 2
+            (second_key, lsn, 5 * CHUNK_SIZE, BlobFlag::None),       // Last in read 2
        ];

        let ranges = [&blob_descriptions[0..2], &blob_descriptions[4..]];
@@ -802,7 +778,7 @@ mod tests {
            planner.handle(key, lsn, offset, flag);
        }

-        planner.handle_range_end(6 * chunk_size);
+        planner.handle_range_end(6 * CHUNK_SIZE);

        let reads = planner.finish();
        assert_eq!(reads.len(), 2);
@@ -947,7 +923,6 @@ mod tests {
        let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
        let mut buf = BytesMut::with_capacity(reserved_bytes);

-        let align = virtual_file::get_io_buffer_alignment();
        let vectored_blob_reader = VectoredBlobReader::new(&file);
        let meta = BlobMeta {
            key: Key::MIN,
@@ -959,8 +934,7 @@ mod tests {
            if idx + 1 == offsets.len() {
                continue;
            }
-            let read_builder =
-                ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, align);
+            let read_builder = ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096);
            let read = read_builder.build();
            let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
            assert_eq!(result.blobs.len(), 1);
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -23,10 +23,12 @@ use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT;
 use pageserver_api::shard::TenantShardId;
 use std::fs::File;
 use std::io::{Error, ErrorKind, Seek, SeekFrom};
+#[cfg(target_os = "linux")]
+use std::os::unix::fs::OpenOptionsExt;
 use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice};

 use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
-use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
 use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard};
 use tokio::time::Instant;

@@ -38,7 +40,7 @@ pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult;
 mod metadata;
 mod open_options;
 use self::owned_buffers_io::write::OwnedAsyncWriter;
-pub(crate) use api::DirectIoMode;
+pub(crate) use api::IoMode;
 pub(crate) use io_engine::IoEngineKind;
 pub(crate) use metadata::Metadata;
 pub(crate) use open_options::*;
@@ -61,6 +63,171 @@ pub(crate) mod owned_buffers_io {
    }
 }

+#[derive(Debug)]
+pub struct VirtualFile {
+    inner: VirtualFileInner,
+    _mode: IoMode,
+}
+
+impl VirtualFile {
+    /// Open a file in read-only mode. Like File::open.
+    pub async fn open<P: AsRef<Utf8Path>>(
+        path: P,
+        ctx: &RequestContext,
+    ) -> Result<Self, std::io::Error> {
+        let inner = VirtualFileInner::open(path, ctx).await?;
+        Ok(VirtualFile {
+            inner,
+            _mode: IoMode::Buffered,
+        })
+    }
+
+    /// Open a file in read-only mode. Like File::open.
+    ///
+    /// `O_DIRECT` will be enabled base on `virtual_file_io_mode`.
+    pub async fn open_v2<P: AsRef<Utf8Path>>(
+        path: P,
+        ctx: &RequestContext,
+    ) -> Result<Self, std::io::Error> {
+        Self::open_with_options_v2(path.as_ref(), OpenOptions::new().read(true), ctx).await
+    }
+
+    pub async fn create<P: AsRef<Utf8Path>>(
+        path: P,
+        ctx: &RequestContext,
+    ) -> Result<Self, std::io::Error> {
+        let inner = VirtualFileInner::create(path, ctx).await?;
+        Ok(VirtualFile {
+            inner,
+            _mode: IoMode::Buffered,
+        })
+    }
+
+    pub async fn create_v2<P: AsRef<Utf8Path>>(
+        path: P,
+        ctx: &RequestContext,
+    ) -> Result<Self, std::io::Error> {
+        VirtualFile::open_with_options_v2(
+            path.as_ref(),
+            OpenOptions::new().write(true).create(true).truncate(true),
+            ctx,
+        )
+        .await
+    }
+
+    pub async fn open_with_options<P: AsRef<Utf8Path>>(
+        path: P,
+        open_options: &OpenOptions,
+        ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
+    ) -> Result<Self, std::io::Error> {
+        let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
+        Ok(VirtualFile {
+            inner,
+            _mode: IoMode::Buffered,
+        })
+    }
+
+    pub async fn open_with_options_v2<P: AsRef<Utf8Path>>(
+        path: P,
+        open_options: &OpenOptions,
+        ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
+    ) -> Result<Self, std::io::Error> {
+        let file = match get_io_mode() {
+            IoMode::Buffered => {
+                let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?;
+                VirtualFile {
+                    inner,
+                    _mode: IoMode::Buffered,
+                }
+            }
+            #[cfg(target_os = "linux")]
+            IoMode::Direct => {
+                let inner = VirtualFileInner::open_with_options(
+                    path,
+                    open_options.clone().custom_flags(nix::libc::O_DIRECT),
+                    ctx,
+                )
+                .await?;
+                VirtualFile {
+                    inner,
+                    _mode: IoMode::Direct,
+                }
+            }
+        };
+        Ok(file)
+    }
+
+    pub fn path(&self) -> &Utf8Path {
+        self.inner.path.as_path()
+    }
+
+    pub async fn crashsafe_overwrite<B: BoundedBuf<Buf = Buf> + Send, Buf: IoBuf + Send>(
+        final_path: Utf8PathBuf,
+        tmp_path: Utf8PathBuf,
+        content: B,
+    ) -> std::io::Result<()> {
+        VirtualFileInner::crashsafe_overwrite(final_path, tmp_path, content).await
+    }
+
+    pub async fn sync_all(&self) -> Result<(), Error> {
+        self.inner.sync_all().await
+    }
+
+    pub async fn sync_data(&self) -> Result<(), Error> {
+        self.inner.sync_data().await
+    }
+
+    pub async fn metadata(&self) -> Result<Metadata, Error> {
+        self.inner.metadata().await
+    }
+
+    pub fn remove(self) {
+        self.inner.remove();
+    }
+
+    pub async fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
+        self.inner.seek(pos).await
+    }
+
+    pub async fn read_exact_at<Buf>(
+        &self,
+        slice: Slice<Buf>,
+        offset: u64,
+        ctx: &RequestContext,
+    ) -> Result<Slice<Buf>, Error>
+    where
+        Buf: IoBufMut + Send,
+    {
+        self.inner.read_exact_at(slice, offset, ctx).await
+    }
+
+    pub async fn read_exact_at_page(
+        &self,
+        page: PageWriteGuard<'static>,
+        offset: u64,
+        ctx: &RequestContext,
+    ) -> Result<PageWriteGuard<'static>, Error> {
+        self.inner.read_exact_at_page(page, offset, ctx).await
+    }
+
+    pub async fn write_all_at<Buf: IoBuf + Send>(
+        &self,
+        buf: FullSlice<Buf>,
+        offset: u64,
+        ctx: &RequestContext,
+    ) -> (FullSlice<Buf>, Result<(), Error>) {
+        self.inner.write_all_at(buf, offset, ctx).await
+    }
+
+    pub async fn write_all<Buf: IoBuf + Send>(
+        &mut self,
+        buf: FullSlice<Buf>,
+        ctx: &RequestContext,
+    ) -> (FullSlice<Buf>, Result<usize, Error>) {
+        self.inner.write_all(buf, ctx).await
+    }
+}
+
 ///
 /// A virtual file descriptor. You can use this just like std::fs::File, but internally
 /// the underlying file is closed if the system is low on file descriptors,
@@ -77,7 +244,7 @@ pub(crate) mod owned_buffers_io {
 /// 'tag' field is used to detect whether the handle still is valid or not.
 ///
 #[derive(Debug)]
-pub struct VirtualFile {
+pub struct VirtualFileInner {
    /// Lazy handle to the global file descriptor cache. The slot that this points to
    /// might contain our File, or it may be empty, or it may contain a File that
    /// belongs to a different VirtualFile.
@@ -350,12 +517,12 @@ macro_rules! with_file {
    }};
 }

-impl VirtualFile {
+impl VirtualFileInner {
    /// Open a file in read-only mode. Like File::open.
    pub async fn open<P: AsRef<Utf8Path>>(
        path: P,
        ctx: &RequestContext,
-    ) -> Result<VirtualFile, std::io::Error> {
+    ) -> Result<VirtualFileInner, std::io::Error> {
        Self::open_with_options(path.as_ref(), OpenOptions::new().read(true), ctx).await
    }

@@ -364,7 +531,7 @@ impl VirtualFile {
    pub async fn create<P: AsRef<Utf8Path>>(
        path: P,
        ctx: &RequestContext,
-    ) -> Result<VirtualFile, std::io::Error> {
+    ) -> Result<VirtualFileInner, std::io::Error> {
        Self::open_with_options(
            path.as_ref(),
            OpenOptions::new().write(true).create(true).truncate(true),
@@ -382,7 +549,7 @@ impl VirtualFile {
        path: P,
        open_options: &OpenOptions,
        _ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */
-    ) -> Result<VirtualFile, std::io::Error> {
+    ) -> Result<VirtualFileInner, std::io::Error> {
        let path_ref = path.as_ref();
        let path_str = path_ref.to_string();
        let parts = path_str.split('/').collect::<Vec<&str>>();
@@ -423,7 +590,7 @@ impl VirtualFile {
        reopen_options.create_new(false);
        reopen_options.truncate(false);

-        let vfile = VirtualFile {
+        let vfile = VirtualFileInner {
            handle: RwLock::new(handle),
            pos: 0,
            path: path_ref.to_path_buf(),
@@ -1034,6 +1201,21 @@ impl tokio_epoll_uring::IoFd for FileGuard {

 #[cfg(test)]
 impl VirtualFile {
+    pub(crate) async fn read_blk(
+        &self,
+        blknum: u32,
+        ctx: &RequestContext,
+    ) -> Result<crate::tenant::block_io::BlockLease<'_>, std::io::Error> {
+        self.inner.read_blk(blknum, ctx).await
+    }
+
+    async fn read_to_end(&mut self, buf: &mut Vec<u8>, ctx: &RequestContext) -> Result<(), Error> {
+        self.inner.read_to_end(buf, ctx).await
+    }
+}
+
+#[cfg(test)]
+impl VirtualFileInner {
    pub(crate) async fn read_blk(
        &self,
        blknum: u32,
@@ -1067,7 +1249,7 @@ impl VirtualFile {
    }
 }

-impl Drop for VirtualFile {
+impl Drop for VirtualFileInner {
    /// If a VirtualFile is dropped, close the underlying file if it was open.
    fn drop(&mut self) {
        let handle = self.handle.get_mut();
@@ -1143,15 +1325,10 @@ impl OpenFiles {
 /// server startup.
 ///
 #[cfg(not(test))]
-pub fn init(num_slots: usize, engine: IoEngineKind, io_buffer_alignment: usize) {
+pub fn init(num_slots: usize, engine: IoEngineKind) {
    if OPEN_FILES.set(OpenFiles::new(num_slots)).is_err() {
        panic!("virtual_file::init called twice");
    }
-    if set_io_buffer_alignment(io_buffer_alignment).is_err() {
-        panic!(
-            "IO buffer alignment needs to be a power of two and greater than 512, got {io_buffer_alignment}"
-        );
-    }
    io_engine::init(engine);
    crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
 }
@@ -1175,47 +1352,20 @@ fn get_open_files() -> &'static OpenFiles {
    }
 }

-static IO_BUFFER_ALIGNMENT: AtomicUsize = AtomicUsize::new(DEFAULT_IO_BUFFER_ALIGNMENT);
-
-/// Returns true if the alignment is a power of two and is greater or equal to 512.
-fn is_valid_io_buffer_alignment(align: usize) -> bool {
-    align.is_power_of_two() && align >= 512
-}
-
-/// Sets IO buffer alignment requirement. Returns error if the alignment requirement is
-/// not a power of two or less than 512 bytes.
-#[allow(unused)]
-pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
-    if is_valid_io_buffer_alignment(align) {
-        IO_BUFFER_ALIGNMENT.store(align, std::sync::atomic::Ordering::Relaxed);
-        Ok(())
-    } else {
-        Err(align)
-    }
-}
-
 /// Gets the io buffer alignment.
-///
-/// This function should be used for getting the actual alignment value to use.
-pub(crate) fn get_io_buffer_alignment() -> usize {
-    let align = IO_BUFFER_ALIGNMENT.load(std::sync::atomic::Ordering::Relaxed);
-
-    if cfg!(test) {
-        let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT";
-        if let Some(test_align) = utils::env::var(env_var_name) {
-            if is_valid_io_buffer_alignment(test_align) {
-                test_align
-            } else {
-                panic!("IO buffer alignment needs to be a power of two and greater than 512, got {test_align}");
-            }
-        } else {
-            align
-        }
-    } else {
-        align
-    }
+pub(crate) const fn get_io_buffer_alignment() -> usize {
+    DEFAULT_IO_BUFFER_ALIGNMENT
 }

+static IO_MODE: AtomicU8 = AtomicU8::new(IoMode::preferred() as u8);
+
+pub(crate) fn set_io_mode(mode: IoMode) {
+    IO_MODE.store(mode as u8, std::sync::atomic::Ordering::Relaxed);
+}
+
+pub(crate) fn get_io_mode() -> IoMode {
+    IoMode::try_from(IO_MODE.load(Ordering::Relaxed)).unwrap()
+}
 #[cfg(test)]
 mod tests {
    use crate::context::DownloadBehavior;
@@ -1524,7 +1674,7 @@ mod tests {
        // Open the file many times.
        let mut files = Vec::new();
        for _ in 0..VIRTUAL_FILES {
-            let f = VirtualFile::open_with_options(
+            let f = VirtualFileInner::open_with_options(
                &test_file_path,
                OpenOptions::new().read(true),
                &ctx,
@@ -1576,7 +1726,7 @@ mod tests {
        let path = testdir.join("myfile");
        let tmp_path = testdir.join("myfile.tmp");

-        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
+        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
            .await
            .unwrap();
        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
@@ -1585,7 +1735,7 @@ mod tests {
        assert!(!tmp_path.exists());
        drop(file);

-        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
+        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"bar".to_vec())
            .await
            .unwrap();
        let mut file = MaybeVirtualFile::from(VirtualFile::open(&path, &ctx).await.unwrap());
@@ -1608,7 +1758,7 @@ mod tests {
        std::fs::write(&tmp_path, "some preexisting junk that should be removed").unwrap();
        assert!(tmp_path.exists());

-        VirtualFile::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
+        VirtualFileInner::crashsafe_overwrite(path.clone(), tmp_path.clone(), b"foo".to_vec())
            .await
            .unwrap();

--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -146,6 +146,8 @@ ConstructDeltaMessage()
 	if (RootTable.role_table)
 	{
 		JsonbValue	roles;
+		HASH_SEQ_STATUS status;
+		RoleEntry  *entry;

 		roles.type = jbvString;
 		roles.val.string.val = "roles";
@@ -153,9 +155,6 @@ ConstructDeltaMessage()
 		pushJsonbValue(&state, WJB_KEY, &roles);
 		pushJsonbValue(&state, WJB_BEGIN_ARRAY, NULL);

-		HASH_SEQ_STATUS status;
-		RoleEntry  *entry;
-
 		hash_seq_init(&status, RootTable.role_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -190,10 +189,12 @@ ConstructDeltaMessage()
 		}
 		pushJsonbValue(&state, WJB_END_ARRAY, NULL);
 	}
-	JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
-	Jsonb	   *jsonb = JsonbValueToJsonb(result);
+	{
+		JsonbValue *result = pushJsonbValue(&state, WJB_END_OBJECT, NULL);
+		Jsonb	   *jsonb = JsonbValueToJsonb(result);

-	return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
+		return JsonbToCString(NULL, &jsonb->root, 0 /* estimated_len */ );
+	}
 }

 #define ERROR_SIZE 1024
@@ -272,32 +273,28 @@ SendDeltasToControlPlane()
 		curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ErrorWriteCallback);
 	}

-	char	   *message = ConstructDeltaMessage();
-	ErrorString str;
-
-	str.size = 0;
-
-	curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
-	curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
-
-	const int	num_retries = 5;
-	CURLcode	curl_status;
-
-	for (int i = 0; i < num_retries; i++)
-	{
-		if ((curl_status = curl_easy_perform(handle)) == 0)
-			break;
-		elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
-		pg_usleep(1000 * 1000);
-	}
-	if (curl_status != CURLE_OK)
-	{
-		elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
-	}
-	else
 	{
+		char	   *message = ConstructDeltaMessage();
+		ErrorString str;
+		const int	num_retries = 5;
+		CURLcode	curl_status;
 		long		response_code;

+		str.size = 0;
+
+		curl_easy_setopt(handle, CURLOPT_POSTFIELDS, message);
+		curl_easy_setopt(handle, CURLOPT_WRITEDATA, &str);
+
+		for (int i = 0; i < num_retries; i++)
+		{
+			if ((curl_status = curl_easy_perform(handle)) == 0)
+				break;
+			elog(LOG, "Curl request failed on attempt %d: %s", i, CurlErrorBuf);
+			pg_usleep(1000 * 1000);
+		}
+		if (curl_status != CURLE_OK)
+			elog(ERROR, "Failed to perform curl request: %s", CurlErrorBuf);
+
 		if (curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &response_code) != CURLE_UNKNOWN_OPTION)
 		{
 			if (response_code != 200)
@@ -376,10 +373,11 @@ MergeTable()

 	if (old_table->db_table)
 	{
-		InitDbTableIfNeeded();
 		DbEntry    *entry;
 		HASH_SEQ_STATUS status;

+		InitDbTableIfNeeded();
+
 		hash_seq_init(&status, old_table->db_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -421,10 +419,11 @@ MergeTable()

 	if (old_table->role_table)
 	{
-		InitRoleTableIfNeeded();
 		RoleEntry  *entry;
 		HASH_SEQ_STATUS status;

+		InitRoleTableIfNeeded();
+
 		hash_seq_init(&status, old_table->role_table);
 		while ((entry = hash_seq_search(&status)) != NULL)
 		{
@@ -515,9 +514,12 @@ RoleIsNeonSuperuser(const char *role_name)
 static void
 HandleCreateDb(CreatedbStmt *stmt)
 {
-	InitDbTableIfNeeded();
 	DefElem    *downer = NULL;
 	ListCell   *option;
+	bool		found = false;
+	DbEntry    *entry;
+
+	InitDbTableIfNeeded();

 	foreach(option, stmt->options)
 	{
@@ -526,13 +528,11 @@ HandleCreateDb(CreatedbStmt *stmt)
 		if (strcmp(defel->defname, "owner") == 0)
 			downer = defel;
 	}
-	bool		found = false;
-	DbEntry    *entry = hash_search(
-									CurrentDdlTable->db_table,
-									stmt->dbname,
-									HASH_ENTER,
-									&found);

+	entry = hash_search(CurrentDdlTable->db_table,
+						stmt->dbname,
+						HASH_ENTER,
+						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));

@@ -554,21 +554,24 @@ HandleCreateDb(CreatedbStmt *stmt)
 static void
 HandleAlterOwner(AlterOwnerStmt *stmt)
 {
+	const char *name;
+	bool		found = false;
+	DbEntry    *entry;
+	const char *new_owner;
+
 	if (stmt->objectType != OBJECT_DATABASE)
 		return;
 	InitDbTableIfNeeded();
-	const char *name = strVal(stmt->object);
-	bool		found = false;
-	DbEntry    *entry = hash_search(
-									CurrentDdlTable->db_table,
-									name,
-									HASH_ENTER,
-									&found);

+	name = strVal(stmt->object);
+	entry = hash_search(CurrentDdlTable->db_table,
+						name,
+						HASH_ENTER,
+						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
-	const char *new_owner = get_rolespec_name(stmt->newowner);

+	new_owner = get_rolespec_name(stmt->newowner);
 	if (RoleIsNeonSuperuser(new_owner))
 		elog(ERROR, "can't alter owner to neon_superuser");
 	entry->owner = get_role_oid(new_owner, false);
@@ -578,21 +581,23 @@ HandleAlterOwner(AlterOwnerStmt *stmt)
 static void
 HandleDbRename(RenameStmt *stmt)
 {
+	bool		found = false;
+	DbEntry    *entry;
+	DbEntry    *entry_for_new_name;
+
 	Assert(stmt->renameType == OBJECT_DATABASE);
 	InitDbTableIfNeeded();
-	bool		found = false;
-	DbEntry    *entry = hash_search(
-									CurrentDdlTable->db_table,
-									stmt->subname,
-									HASH_FIND,
-									&found);
-	DbEntry    *entry_for_new_name = hash_search(
-												 CurrentDdlTable->db_table,
-												 stmt->newname,
-												 HASH_ENTER,
-												 NULL);
+	entry = hash_search(CurrentDdlTable->db_table,
+						stmt->subname,
+						HASH_FIND,
+						&found);

+	entry_for_new_name = hash_search(CurrentDdlTable->db_table,
+									 stmt->newname,
+									 HASH_ENTER,
+									 NULL);
 	entry_for_new_name->type = Op_Set;
+
 	if (found)
 	{
 		if (entry->old_name[0] != '\0')
@@ -600,8 +605,7 @@ HandleDbRename(RenameStmt *stmt)
 		else
 			strlcpy(entry_for_new_name->old_name, entry->name, NAMEDATALEN);
 		entry_for_new_name->owner = entry->owner;
-		hash_search(
-					CurrentDdlTable->db_table,
+		hash_search(CurrentDdlTable->db_table,
 					stmt->subname,
 					HASH_REMOVE,
 					NULL);
@@ -616,14 +620,15 @@ HandleDbRename(RenameStmt *stmt)
 static void
 HandleDropDb(DropdbStmt *stmt)
 {
-	InitDbTableIfNeeded();
 	bool		found = false;
-	DbEntry    *entry = hash_search(
-									CurrentDdlTable->db_table,
-									stmt->dbname,
-									HASH_ENTER,
-									&found);
+	DbEntry    *entry;

+	InitDbTableIfNeeded();
+
+	entry = hash_search(CurrentDdlTable->db_table,
+						stmt->dbname,
+						HASH_ENTER,
+						&found);
 	entry->type = Op_Delete;
 	entry->owner = InvalidOid;
 	if (!found)
@@ -633,16 +638,14 @@ HandleDropDb(DropdbStmt *stmt)
 static void
 HandleCreateRole(CreateRoleStmt *stmt)
 {
-	InitRoleTableIfNeeded();
 	bool		found = false;
-	RoleEntry  *entry = hash_search(
-									CurrentDdlTable->role_table,
-									stmt->role,
-									HASH_ENTER,
-									&found);
-	DefElem    *dpass = NULL;
+	RoleEntry  *entry;
+	DefElem    *dpass;
 	ListCell   *option;

+	InitRoleTableIfNeeded();
+
+	dpass = NULL;
 	foreach(option, stmt->options)
 	{
 		DefElem    *defel = lfirst(option);
@@ -650,6 +653,11 @@ HandleCreateRole(CreateRoleStmt *stmt)
 		if (strcmp(defel->defname, "password") == 0)
 			dpass = defel;
 	}
+
+	entry = hash_search(CurrentDdlTable->role_table,
+						stmt->role,
+						HASH_ENTER,
+						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
 	if (dpass && dpass->arg)
@@ -662,14 +670,18 @@ HandleCreateRole(CreateRoleStmt *stmt)
 static void
 HandleAlterRole(AlterRoleStmt *stmt)
 {
-	InitRoleTableIfNeeded();
-	DefElem    *dpass = NULL;
-	ListCell   *option;
 	const char *role_name = stmt->role->rolename;
+	DefElem    *dpass;
+	ListCell   *option;
+	bool		found = false;
+	RoleEntry  *entry;
+
+	InitRoleTableIfNeeded();

 	if (RoleIsNeonSuperuser(role_name) && !superuser())
 		elog(ERROR, "can't ALTER neon_superuser");

+	dpass = NULL;
 	foreach(option, stmt->options)
 	{
 		DefElem    *defel = lfirst(option);
@@ -680,13 +692,11 @@ HandleAlterRole(AlterRoleStmt *stmt)
 	/* We only care about updates to the password */
 	if (!dpass)
 		return;
-	bool		found = false;
-	RoleEntry  *entry = hash_search(
-									CurrentDdlTable->role_table,
-									role_name,
-									HASH_ENTER,
-									&found);

+	entry = hash_search(CurrentDdlTable->role_table,
+						role_name,
+						HASH_ENTER,
+						&found);
 	if (!found)
 		memset(entry->old_name, 0, sizeof(entry->old_name));
 	if (dpass->arg)
@@ -699,20 +709,22 @@ HandleAlterRole(AlterRoleStmt *stmt)
 static void
 HandleRoleRename(RenameStmt *stmt)
 {
-	InitRoleTableIfNeeded();
-	Assert(stmt->renameType == OBJECT_ROLE);
 	bool		found = false;
-	RoleEntry  *entry = hash_search(
-									CurrentDdlTable->role_table,
-									stmt->subname,
-									HASH_FIND,
-									&found);
+	RoleEntry  *entry;
+	RoleEntry  *entry_for_new_name;

-	RoleEntry  *entry_for_new_name = hash_search(
-												 CurrentDdlTable->role_table,
-												 stmt->newname,
-												 HASH_ENTER,
-												 NULL);
+	Assert(stmt->renameType == OBJECT_ROLE);
+	InitRoleTableIfNeeded();
+
+	entry = hash_search(CurrentDdlTable->role_table,
+						stmt->subname,
+						HASH_FIND,
+						&found);
+
+	entry_for_new_name = hash_search(CurrentDdlTable->role_table,
+									 stmt->newname,
+									 HASH_ENTER,
+									 NULL);

 	entry_for_new_name->type = Op_Set;
 	if (found)
@@ -738,9 +750,10 @@ HandleRoleRename(RenameStmt *stmt)
 static void
 HandleDropRole(DropRoleStmt *stmt)
 {
-	InitRoleTableIfNeeded();
 	ListCell   *item;

+	InitRoleTableIfNeeded();
+
 	foreach(item, stmt->roles)
 	{
 		RoleSpec   *spec = lfirst(item);
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -170,12 +170,14 @@ lfc_disable(char const *op)

 		if (lfc_desc > 0)
 		{
+			int			rc;
+
 			/*
 			 * If the reason of error is ENOSPC, then truncation of file may
 			 * help to reclaim some space
 			 */
 			pgstat_report_wait_start(WAIT_EVENT_NEON_LFC_TRUNCATE);
-			int			rc = ftruncate(lfc_desc, 0);
+			rc = ftruncate(lfc_desc, 0);
 			pgstat_report_wait_end();

 			if (rc < 0)
@@ -616,7 +618,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno)
 	 */
 	if (entry->bitmap[chunk_offs >> 5] == 0)
 	{
-		bool		has_remaining_pages;
+		bool		has_remaining_pages = false;

 		for (int i = 0; i < CHUNK_BITMAP_SIZE; i++)
 		{
@@ -666,7 +668,6 @@ lfc_readv_select(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 	BufferTag	tag;
 	FileCacheEntry *entry;
 	ssize_t		rc;
-	bool		result = true;
 	uint32		hash;
 	uint64		generation;
 	uint32		entry_offset;
@@ -925,10 +926,10 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				/* We can reuse a hole that was left behind when the LFC was shrunk previously */
 				FileCacheEntry *hole = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->holes));
 				uint32		offset = hole->offset;
-				bool		found;
+				bool		hole_found;
 	
-				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &found);
-				CriticalAssert(found);
+				hash_search_with_hash_value(lfc_hash, &hole->key, hole->hash, HASH_REMOVE, &hole_found);
+				CriticalAssert(hole_found);
 	
 				lfc_ctl->used += 1;
 				entry->offset = offset;	/* reuse the hole */
@@ -1004,7 +1005,7 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 	Datum		result;
 	HeapTuple	tuple;
 	char const *key;
-	uint64		value;
+	uint64		value = 0;
 	Datum		values[NUM_NEON_GET_STATS_COLS];
 	bool		nulls[NUM_NEON_GET_STATS_COLS];

--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -116,8 +116,6 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
 {
 	uint8		count;
 	uint32		index;
-	size_t		i;
-	size_t		j;

 	TimestampTz	now = GetCurrentTimestamp();
 	/* Use the first "k" (registerWidth) bits as a zero based index */
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -89,7 +89,6 @@ typedef struct

 #if PG_VERSION_NUM >= 150000
 static shmem_request_hook_type prev_shmem_request_hook = NULL;
-static void walproposer_shmem_request(void);
 #endif
 static shmem_startup_hook_type prev_shmem_startup_hook;
 static PagestoreShmemState *pagestore_shared;
@@ -441,8 +440,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 			return false;
 		}
 		shard->state = PS_Connecting_Startup;
-		/* fallthrough */
 	}
+	/* FALLTHROUGH */
 	case PS_Connecting_Startup:
 	{
 		char	   *pagestream_query;
@@ -453,8 +452,6 @@ pageserver_connect(shardno_t shard_no, int elevel)

 		do
 		{
-			WaitEvent	event;
-
 			switch (poll_result)
 			{
 			default: /* unknown/unused states are handled as a failed connection */
@@ -585,8 +582,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		}

 		shard->state = PS_Connecting_PageStream;
-		/* fallthrough */
 	}
+	/* FALLTHROUGH */
 	case PS_Connecting_PageStream:
 	{
 		neon_shard_log(shard_no, DEBUG5, "Connection state: Connecting_PageStream");
@@ -631,8 +628,8 @@ pageserver_connect(shardno_t shard_no, int elevel)
 		}

 		shard->state = PS_Connected;
-		/* fallthrough */
 	}
+	/* FALLTHROUGH */
 	case PS_Connected:
 		/*
 		 * We successfully connected. Future connections to this PageServer
--- a/pgxn/neon/neon_perf_counters.c
+++ b/pgxn/neon/neon_perf_counters.c
@@ -94,7 +94,6 @@ neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
 	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
 	uint64		bucket_accum;
 	int			i = 0;
-	Datum		getpage_wait_str;

 	metrics[i].name = "getpage_wait_seconds_count";
 	metrics[i].is_bucket = false;
@@ -224,7 +223,6 @@ neon_get_perf_counters(PG_FUNCTION_ARGS)
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Datum		values[3];
 	bool		nulls[3];
-	Datum		getpage_wait_str;
 	neon_per_backend_counters totals = {0};
 	metric_t   *metrics;

--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -7,6 +7,7 @@
 #define NEON_PGVERSIONCOMPAT_H

 #include "fmgr.h"
+#include "storage/buf_internals.h"

 #if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
@@ -20,11 +21,24 @@
 	NInfoGetRelNumber(a) == NInfoGetRelNumber(b) \
 )

-/* buftag population & RelFileNode/RelFileLocator rework */
+/* These macros were turned into static inline functions in v16 */
 #if PG_MAJORVERSION_NUM < 16
+static inline bool
+BufferTagsEqual(const BufferTag *tag1, const BufferTag *tag2)
+{
+	return BUFFERTAGS_EQUAL(*tag1, *tag2);
+}

-#define InitBufferTag(tag, rfn, fn, bn) INIT_BUFFERTAG(*tag, *rfn, fn, bn)
+static inline void
+InitBufferTag(BufferTag *tag, const RelFileNode *rnode,
+			  ForkNumber forkNum, BlockNumber blockNum)
+{
+	INIT_BUFFERTAG(*tag, *rnode, forkNum, blockNum);
+}
+#endif

+/* RelFileNode -> RelFileLocator rework */
+#if PG_MAJORVERSION_NUM < 16
 #define USE_RELFILENODE

 #define RELFILEINFO_HDR "storage/relfilenode.h"
@@ -73,8 +87,6 @@

 #define USE_RELFILELOCATOR

-#define BUFFERTAGS_EQUAL(a, b) BufferTagsEqual(&(a), &(b))
-
 #define RELFILEINFO_HDR "storage/relfilelocator.h"

 #define NRelFileInfo RelFileLocator
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -213,32 +213,6 @@ extern const f_smgr *smgr_neon(ProcNumber backend, NRelFileInfo rinfo);
 extern void smgr_init_neon(void);
 extern void readahead_buffer_resize(int newsize, void *extra);

-/* Neon storage manager functionality */
-
-extern void neon_init(void);
-extern void neon_open(SMgrRelation reln);
-extern void neon_close(SMgrRelation reln, ForkNumber forknum);
-extern void neon_create(SMgrRelation reln, ForkNumber forknum, bool isRedo);
-extern bool neon_exists(SMgrRelation reln, ForkNumber forknum);
-extern void neon_unlink(NRelFileInfoBackend rnode, ForkNumber forknum, bool isRedo);
-#if PG_MAJORVERSION_NUM < 16
-extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
-						BlockNumber blocknum, char *buffer, bool skipFsync);
-#else
-extern void neon_extend(SMgrRelation reln, ForkNumber forknum,
-						BlockNumber blocknum, const void *buffer, bool skipFsync);
-extern void neon_zeroextend(SMgrRelation reln, ForkNumber forknum,
-							BlockNumber blocknum, int nbuffers, bool skipFsync);
-#endif
-
-#if PG_MAJORVERSION_NUM >=17
-extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
-						  BlockNumber blocknum, int nblocks);
-#else
-extern bool neon_prefetch(SMgrRelation reln, ForkNumber forknum,
-						  BlockNumber blocknum);
-#endif
-
 /*
 * LSN values associated with each request to the pageserver
 */
@@ -278,13 +252,7 @@ extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum,
 extern PGDLLEXPORT void neon_read_at_lsn(NRelFileInfo rnode, ForkNumber forkNum, BlockNumber blkno,
 										 neon_request_lsns request_lsns, void *buffer);
 #endif
-extern void neon_writeback(SMgrRelation reln, ForkNumber forknum,
-						   BlockNumber blocknum, BlockNumber nblocks);
-extern BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
 extern int64 neon_dbsize(Oid dbNode);
-extern void neon_truncate(SMgrRelation reln, ForkNumber forknum,
-						  BlockNumber nblocks);
-extern void neon_immedsync(SMgrRelation reln, ForkNumber forknum);

 /* utils for neon relsize cache */
 extern void relsize_hash_init(void);
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -118,6 +118,8 @@ static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
 static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;

+static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum);
+
 /*
 * Prefetch implementation:
 *
@@ -215,7 +217,7 @@ typedef struct PrfHashEntry
 	sizeof(BufferTag) \
 )

-#define SH_EQUAL(tb, a, b)	(BUFFERTAGS_EQUAL((a)->buftag, (b)->buftag))
+#define SH_EQUAL(tb, a, b)	(BufferTagsEqual(&(a)->buftag, &(b)->buftag))
 #define SH_SCOPE			static inline
 #define SH_DEFINE
 #define SH_DECLARE
@@ -736,7 +738,7 @@ static void
 prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns)
 {
 	bool		found;
-	uint64		mySlotNo = slot->my_ring_index;
+	uint64		mySlotNo PG_USED_FOR_ASSERTS_ONLY = slot->my_ring_index;

 	NeonGetPageRequest request = {
 		.req.tag = T_NeonGetPageRequest,
@@ -853,7 +855,7 @@ Retry:
 			Assert(slot->status != PRFS_UNUSED);
 			Assert(MyPState->ring_last <= ring_index &&
 				   ring_index < MyPState->ring_unused);
-			Assert(BUFFERTAGS_EQUAL(slot->buftag, hashkey.buftag));
+			Assert(BufferTagsEqual(&slot->buftag, &hashkey.buftag));

 			/*
 			 * If the caller specified a request LSN to use, only accept
@@ -1463,7 +1465,6 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno,
 	BlockNumber	blknos[XLR_MAX_BLOCK_ID];
 	Page		pageptrs[XLR_MAX_BLOCK_ID];
 	int			nregistered = 0;
-	XLogRecPtr	result = 0;

 	for (int i = 0; i < nblocks; i++)
 	{
@@ -1776,7 +1777,7 @@ neon_wallog_page(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, co
 /*
 *	neon_init() -- Initialize private state
 */
-void
+static void
 neon_init(void)
 {
 	Size		prfs_size;
@@ -2166,7 +2167,7 @@ neon_prefetch_response_usable(neon_request_lsns *request_lsns,
 /*
 *	neon_exists() -- Does the physical file exist?
 */
-bool
+static bool
 neon_exists(SMgrRelation reln, ForkNumber forkNum)
 {
 	bool		exists;
@@ -2272,7 +2273,7 @@ neon_exists(SMgrRelation reln, ForkNumber forkNum)
 *
 * If isRedo is true, it's okay for the relation to exist already.
 */
-void
+static void
 neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 {
 	switch (reln->smgr_relpersistence)
@@ -2348,7 +2349,7 @@ neon_create(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
 * Note: any failure should be reported as WARNING not ERROR, because
 * we are usually not in a transaction anymore when this is called.
 */
-void
+static void
 neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 {
 	/*
@@ -2372,7 +2373,7 @@ neon_unlink(NRelFileInfoBackend rinfo, ForkNumber forkNum, bool isRedo)
 *		EOF).  Note that we assume writing a block beyond current EOF
 *		causes intervening file space to become filled with zeroes.
 */
-void
+static void
 #if PG_MAJORVERSION_NUM < 16
 neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 			char *buffer, bool skipFsync)
@@ -2464,7 +2465,7 @@ neon_extend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
 }

 #if PG_MAJORVERSION_NUM >= 16
-void
+static void
 neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 				int nblocks, bool skipFsync)
 {
@@ -2560,7 +2561,7 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum,
 /*
 *  neon_open() -- Initialize newly-opened relation.
 */
-void
+static void
 neon_open(SMgrRelation reln)
 {
 	/*
@@ -2578,7 +2579,7 @@ neon_open(SMgrRelation reln)
 /*
 *	neon_close() -- Close the specified relation, if it isn't closed already.
 */
-void
+static void
 neon_close(SMgrRelation reln, ForkNumber forknum)
 {
 	/*
@@ -2593,13 +2594,12 @@ neon_close(SMgrRelation reln, ForkNumber forknum)
 /*
 *	neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
-bool
+static bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			  int nblocks)
 {
 	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;
 	BufferTag	tag;
-	bool		io_initiated = false;

 	switch (reln->smgr_relpersistence)
 	{
@@ -2623,7 +2623,6 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	while (nblocks > 0)
 	{
 		int		iterblocks = Min(nblocks, PG_IOV_MAX);
-		int		seqlen = 0;
 		bits8		lfc_present[PG_IOV_MAX / 8];
 		memset(lfc_present, 0, sizeof(lfc_present));

@@ -2635,8 +2634,6 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			continue;
 		}

-		io_initiated = true;
-
 		tag.blockNum = blocknum;
 		
 		for (int i = 0; i < PG_IOV_MAX / 8; i++)
@@ -2659,7 +2656,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 /*
 *	neon_prefetch() -- Initiate asynchronous read of the specified block of a relation
 */
-bool
+static bool
 neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 {
 	uint64		ring_index PG_USED_FOR_ASSERTS_ONLY;
@@ -2703,7 +2700,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
 * This accepts a range of blocks because flushing several pages at once is
 * considerably more efficient than doing so individually.
 */
-void
+static void
 neon_writeback(SMgrRelation reln, ForkNumber forknum,
 			   BlockNumber blocknum, BlockNumber nblocks)
 {
@@ -2924,10 +2921,10 @@ neon_read_at_lsn(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 *	neon_read() -- Read the specified block from a relation.
 */
 #if PG_MAJORVERSION_NUM < 16
-void
+static void
 neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, char *buffer)
 #else
-void
+static void
 neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer)
 #endif
 {
@@ -3036,7 +3033,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 #endif /* PG_MAJORVERSION_NUM <= 16 */

 #if PG_MAJORVERSION_NUM >= 17
-void
+static void
 neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 		void **buffers, BlockNumber nblocks)
 {
@@ -3200,6 +3197,7 @@ hexdump_page(char *page)
 }
 #endif

+#if PG_MAJORVERSION_NUM < 17
 /*
 *	neon_write() -- Write the supplied block at the appropriate location.
 *
@@ -3207,7 +3205,7 @@ hexdump_page(char *page)
 *		relation (ie, those before the current EOF).  To extend a relation,
 *		use mdextend().
 */
-void
+static void
 #if PG_MAJORVERSION_NUM < 16
 neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync)
 #else
@@ -3273,11 +3271,12 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo
 		#endif
 #endif
 }
+#endif



 #if PG_MAJORVERSION_NUM >= 17
-void
+static void
 neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 			 const void **buffers, BlockNumber nblocks, bool skipFsync)
 {
@@ -3327,7 +3326,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 /*
 *	neon_nblocks() -- Get the number of blocks stored in a relation.
 */
-BlockNumber
+static BlockNumber
 neon_nblocks(SMgrRelation reln, ForkNumber forknum)
 {
 	NeonResponse *resp;
@@ -3464,7 +3463,7 @@ neon_dbsize(Oid dbNode)
 /*
 *	neon_truncate() -- Truncate relation to specified number of blocks.
 */
-void
+static void
 neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 {
 	XLogRecPtr	lsn;
@@ -3533,7 +3532,7 @@ neon_truncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks)
 * crash before the next checkpoint syncs the newly-inactive segment, that
 * segment may survive recovery, reintroducing unwanted data into the table.
 */
-void
+static void
 neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 {
 	switch (reln->smgr_relpersistence)
@@ -3563,8 +3562,8 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum)
 }

 #if PG_MAJORVERSION_NUM >= 17
-void
-neon_regisersync(SMgrRelation reln, ForkNumber forknum)
+static void
+neon_registersync(SMgrRelation reln, ForkNumber forknum)
 {
 	switch (reln->smgr_relpersistence)
 	{
@@ -3748,6 +3747,8 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	SlruKind	kind;
 	int			n_blocks;
 	shardno_t	shard_no = 0; /* All SLRUs are at shard 0 */
+	NeonResponse *resp;
+	NeonGetSlruSegmentRequest request;

 	/*
 	 * Compute a request LSN to use, similar to neon_get_request_lsns() but the
@@ -3786,8 +3787,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf
 	else
 		return -1;

-	NeonResponse *resp;
-	NeonGetSlruSegmentRequest request = {
+	request = (NeonGetSlruSegmentRequest) {
 		.req.tag = T_NeonGetSlruSegmentRequest,
 		.req.lsn = request_lsn,
 		.req.not_modified_since = not_modified_since,
@@ -3894,7 +3894,7 @@ static const struct f_smgr neon_smgr =
 	.smgr_truncate = neon_truncate,
 	.smgr_immedsync = neon_immedsync,
 #if PG_MAJORVERSION_NUM >= 17
-	.smgr_registersync = neon_regisersync,
+	.smgr_registersync = neon_registersync,
 #endif
 	.smgr_start_unlogged_build = neon_start_unlogged_build,
 	.smgr_finish_unlogged_build_phase_1 = neon_finish_unlogged_build_phase_1,
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -252,8 +252,6 @@ WalProposerPoll(WalProposer *wp)
 		/* timeout expired: poll state */
 		if (rc == 0 || TimeToReconnect(wp, now) <= 0)
 		{
-			TimestampTz now;
-
 			/*
 			 * If no WAL was generated during timeout (and we have already
 			 * collected the quorum), then send empty keepalive message
@@ -269,8 +267,7 @@ WalProposerPoll(WalProposer *wp)
 			now = wp->api.get_current_timestamp(wp);
 			for (int i = 0; i < wp->n_safekeepers; i++)
 			{
-				Safekeeper *sk = &wp->safekeeper[i];
-
+				sk = &wp->safekeeper[i];
 				if (TimestampDifferenceExceeds(sk->latestMsgReceivedAt, now,
 											   wp->config->safekeeper_connection_timeout))
 				{
@@ -1080,7 +1077,7 @@ SendProposerElected(Safekeeper *sk)
 	ProposerElected msg;
 	TermHistory *th;
 	term_t		lastCommonTerm;
-	int			i;
+	int			idx;

 	/* Now that we are ready to send it's a good moment to create WAL reader */
 	wp->api.wal_reader_allocate(sk);
@@ -1099,15 +1096,15 @@ SendProposerElected(Safekeeper *sk)
 	/* We must start somewhere. */
 	Assert(wp->propTermHistory.n_entries >= 1);

-	for (i = 0; i < Min(wp->propTermHistory.n_entries, th->n_entries); i++)
+	for (idx = 0; idx < Min(wp->propTermHistory.n_entries, th->n_entries); idx++)
 	{
-		if (wp->propTermHistory.entries[i].term != th->entries[i].term)
+		if (wp->propTermHistory.entries[idx].term != th->entries[idx].term)
 			break;
 		/* term must begin everywhere at the same point */
-		Assert(wp->propTermHistory.entries[i].lsn == th->entries[i].lsn);
+		Assert(wp->propTermHistory.entries[idx].lsn == th->entries[idx].lsn);
 	}
-	i--;						/* step back to the last common term */
-	if (i < 0)
+	idx--;						/* step back to the last common term */
+	if (idx < 0)
 	{
 		/* safekeeper is empty or no common point, start from the beginning */
 		sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
@@ -1128,14 +1125,14 @@ SendProposerElected(Safekeeper *sk)
 		 * proposer, LSN it is currently writing, but then we just pick
 		 * safekeeper pos as it obviously can't be higher.
 		 */
-		if (wp->propTermHistory.entries[i].term == wp->propTerm)
+		if (wp->propTermHistory.entries[idx].term == wp->propTerm)
 		{
 			sk->startStreamingAt = sk->voteResponse.flushLsn;
 		}
 		else
 		{
-			XLogRecPtr	propEndLsn = wp->propTermHistory.entries[i + 1].lsn;
-			XLogRecPtr	skEndLsn = (i + 1 < th->n_entries ? th->entries[i + 1].lsn : sk->voteResponse.flushLsn);
+			XLogRecPtr	propEndLsn = wp->propTermHistory.entries[idx + 1].lsn;
+			XLogRecPtr	skEndLsn = (idx + 1 < th->n_entries ? th->entries[idx + 1].lsn : sk->voteResponse.flushLsn);

 			sk->startStreamingAt = Min(propEndLsn, skEndLsn);
 		}
@@ -1149,7 +1146,7 @@ SendProposerElected(Safekeeper *sk)
 	msg.termHistory = &wp->propTermHistory;
 	msg.timelineStartLsn = wp->timelineStartLsn;

-	lastCommonTerm = i >= 0 ? wp->propTermHistory.entries[i].term : 0;
+	lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0;
 	wp_log(LOG,
 		   "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X",
 		   sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn));
@@ -1641,7 +1638,7 @@ UpdateDonorShmem(WalProposer *wp)
 * Process AppendResponse message from safekeeper.
 */
 static void
-HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
+HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk)
 {
 	XLogRecPtr	candidateTruncateLsn;
 	XLogRecPtr	newCommitLsn;
@@ -1660,7 +1657,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
 	 * and WAL is committed by the quorum. BroadcastAppendRequest() should be
 	 * called to notify safekeepers about the new commitLsn.
 	 */
-	wp->api.process_safekeeper_feedback(wp, sk);
+	wp->api.process_safekeeper_feedback(wp, fromsk);

 	/*
 	 * Try to advance truncateLsn -- the last record flushed to all
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -725,7 +725,7 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
 extern void WalProposerPoll(WalProposer *wp);
 extern void WalProposerFree(WalProposer *wp);

-extern WalproposerShmemState *GetWalpropShmemState();
+extern WalproposerShmemState *GetWalpropShmemState(void);

 /*
 * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
@@ -745,7 +745,7 @@ extern TimeLineID walprop_pg_get_timeline_id(void);
 * catch logging.
 */
 #ifdef WALPROPOSER_LIB
-extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...);
+extern void WalProposerLibLog(WalProposer *wp, int elevel, char *fmt,...) pg_attribute_printf(3, 4);
 #define wp_log(elevel, fmt, ...) WalProposerLibLog(wp, elevel, fmt, ## __VA_ARGS__)
 #else
 #define wp_log(elevel, fmt, ...) elog(elevel, WP_LOG_PREFIX fmt, ## __VA_ARGS__)
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -286,6 +286,9 @@ safekeepers_cmp(char *old, char *new)
 static void
 assign_neon_safekeepers(const char *newval, void *extra)
 {
+	char	   *newval_copy;
+	char	   *oldval;
+
 	if (!am_walproposer)
 		return;

@@ -295,8 +298,8 @@ assign_neon_safekeepers(const char *newval, void *extra)
 	}

 	/* Copy values because we will modify them in split_safekeepers_list() */
-	char *newval_copy = pstrdup(newval);
-	char *oldval = pstrdup(wal_acceptors_list);
+	newval_copy = pstrdup(newval);
+	oldval = pstrdup(wal_acceptors_list);

 	/* 
 	 * TODO: restarting through FATAL is stupid and introduces 1s delay before
@@ -538,7 +541,7 @@ nwp_shmem_startup_hook(void)
 }

 WalproposerShmemState *
-GetWalpropShmemState()
+GetWalpropShmemState(void)
 {
 	Assert(walprop_shared != NULL);
 	return walprop_shared;
--- a/pgxn/neon_rmgr/neon_rmgr_desc.c
+++ b/pgxn/neon_rmgr/neon_rmgr_desc.c
@@ -44,27 +44,6 @@ infobits_desc(StringInfo buf, uint8 infobits, const char *keyname)
 	appendStringInfoString(buf, "]");
 }

-static void
-truncate_flags_desc(StringInfo buf, uint8 flags)
-{
-	appendStringInfoString(buf, "flags: [");
-
-	if (flags & XLH_TRUNCATE_CASCADE)
-		appendStringInfoString(buf, "CASCADE, ");
-	if (flags & XLH_TRUNCATE_RESTART_SEQS)
-		appendStringInfoString(buf, "RESTART_SEQS, ");
-
-	if (buf->data[buf->len - 1] == ' ')
-	{
-		/* Truncate-away final unneeded ", "  */
-		Assert(buf->data[buf->len - 2] == ',');
-		buf->len -= 2;
-		buf->data[buf->len] = '\0';
-	}
-
-	appendStringInfoString(buf, "]");
-}
-
 void
 neon_rm_desc(StringInfo buf, XLogReaderState *record)
 {
--- a/pgxn/neon_walredo/walredoproc.c
+++ b/pgxn/neon_walredo/walredoproc.c
@@ -136,7 +136,7 @@ static bool redo_block_filter(XLogReaderState *record, uint8 block_id);
 static void GetPage(StringInfo input_message);
 static void Ping(StringInfo input_message);
 static ssize_t buffered_read(void *buf, size_t count);
-static void CreateFakeSharedMemoryAndSemaphores();
+static void CreateFakeSharedMemoryAndSemaphores(void);

 static BufferTag target_redo_tag;

@@ -170,6 +170,40 @@ close_range_syscall(unsigned int start_fd, unsigned int count, unsigned int flag
    return syscall(__NR_close_range, start_fd, count, flags);
 }

+
+static PgSeccompRule allowed_syscalls[] =
+{
+	/* Hard requirements */
+	PG_SCMP_ALLOW(exit_group),
+	PG_SCMP_ALLOW(pselect6),
+	PG_SCMP_ALLOW(read),
+	PG_SCMP_ALLOW(select),
+	PG_SCMP_ALLOW(write),
+
+	/* Memory allocation */
+	PG_SCMP_ALLOW(brk),
+#ifndef MALLOC_NO_MMAP
+	/* TODO: musl doesn't have mallopt */
+	PG_SCMP_ALLOW(mmap),
+	PG_SCMP_ALLOW(munmap),
+#endif
+	/*
+	 * getpid() is called on assertion failure, in ExceptionalCondition.
+	 * It's not really needed, but seems pointless to hide it either. The
+	 * system call unlikely to expose a kernel vulnerability, and the PID
+	 * is stored in MyProcPid anyway.
+	 */
+	PG_SCMP_ALLOW(getpid),
+
+	/* Enable those for a proper shutdown. */
+#if 0
+	   PG_SCMP_ALLOW(munmap),
+	   PG_SCMP_ALLOW(shmctl),
+	   PG_SCMP_ALLOW(shmdt),
+	   PG_SCMP_ALLOW(unlink),	/* shm_unlink */
+#endif
+};
+
 static void
 enter_seccomp_mode(void)
 {
@@ -183,44 +217,12 @@ enter_seccomp_mode(void)
 				(errcode(ERRCODE_SYSTEM_ERROR),
 				 errmsg("seccomp: could not close files >= fd 3")));

-	PgSeccompRule syscalls[] =
-	{
-		/* Hard requirements */
-		PG_SCMP_ALLOW(exit_group),
-		PG_SCMP_ALLOW(pselect6),
-		PG_SCMP_ALLOW(read),
-		PG_SCMP_ALLOW(select),
-		PG_SCMP_ALLOW(write),
-
-		/* Memory allocation */
-		PG_SCMP_ALLOW(brk),
-#ifndef MALLOC_NO_MMAP
-		/* TODO: musl doesn't have mallopt */
-		PG_SCMP_ALLOW(mmap),
-		PG_SCMP_ALLOW(munmap),
-#endif
-		/*
-		 * getpid() is called on assertion failure, in ExceptionalCondition.
-		 * It's not really needed, but seems pointless to hide it either. The
-		 * system call unlikely to expose a kernel vulnerability, and the PID
-		 * is stored in MyProcPid anyway.
-		 */
-		PG_SCMP_ALLOW(getpid),
-
-		/* Enable those for a proper shutdown.
-		PG_SCMP_ALLOW(munmap),
-		PG_SCMP_ALLOW(shmctl),
-		PG_SCMP_ALLOW(shmdt),
-		PG_SCMP_ALLOW(unlink), // shm_unlink
-	 */
-	};
-
 #ifdef MALLOC_NO_MMAP
 	/* Ask glibc not to use mmap() */
 	mallopt(M_MMAP_MAX, 0);
 #endif

-	seccomp_load_rules(syscalls, lengthof(syscalls));
+	seccomp_load_rules(allowed_syscalls, lengthof(allowed_syscalls));
 }
 #endif /* HAVE_LIBSECCOMP */

@@ -449,7 +451,7 @@ WalRedoMain(int argc, char *argv[])
 * half-initialized postgres.
 */
 static void
-CreateFakeSharedMemoryAndSemaphores()
+CreateFakeSharedMemoryAndSemaphores(void)
 {
 	PGShmemHeader *shim = NULL;
 	PGShmemHeader *hdr;
@@ -992,7 +994,7 @@ redo_block_filter(XLogReaderState *record, uint8 block_id)
 	 * If this block isn't one we are currently restoring, then return 'true'
 	 * so that this gets ignored
 	 */
-	return !BUFFERTAGS_EQUAL(target_tag, target_redo_tag);
+	return !BufferTagsEqual(&target_tag, &target_redo_tag);
 }

 /*
--- a/poetry.lock
+++ b/poetry.lock
@@ -2095,6 +2095,7 @@ files = [
    {file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
    {file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
+    {file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
@@ -2103,6 +2104,8 @@ files = [
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
    {file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
+    {file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
+    {file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
    {file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
@@ -2584,6 +2587,7 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -2729,21 +2733,22 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]

 [[package]]
 name = "responses"
-version = "0.21.0"
+version = "0.25.3"
 description = "A utility library for mocking out the `requests` Python library."
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "responses-0.21.0-py3-none-any.whl", hash = "sha256:2dcc863ba63963c0c3d9ee3fa9507cbe36b7d7b0fccb4f0bdfd9e96c539b1487"},
-    {file = "responses-0.21.0.tar.gz", hash = "sha256:b82502eb5f09a0289d8e209e7bad71ef3978334f56d09b444253d5ad67bf5253"},
+    {file = "responses-0.25.3-py3-none-any.whl", hash = "sha256:521efcbc82081ab8daa588e08f7e8a64ce79b91c39f6e62199b19159bea7dbcb"},
+    {file = "responses-0.25.3.tar.gz", hash = "sha256:617b9247abd9ae28313d57a75880422d55ec63c29d33d629697590a034358dba"},
 ]

 [package.dependencies]
-requests = ">=2.0,<3.0"
-urllib3 = ">=1.25.10"
+pyyaml = "*"
+requests = ">=2.30.0,<3.0"
+urllib3 = ">=1.25.10,<3.0"

 [package.extras]
-tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-localserver", "types-mock", "types-requests"]
+tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "tomli", "tomli-w", "types-PyYAML", "types-requests"]

 [[package]]
 name = "rfc3339-validator"
@@ -3137,6 +3142,16 @@ files = [
    {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
    {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
    {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
+    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
+    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
    {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
--- a/pre-commit.py
+++ b/pre-commit.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3

+from __future__ import annotations
+
 import argparse
 import enum
 import os
 import subprocess
 import sys
-from typing import List


@enum.unique
@@ -55,12 +56,12 @@ def mypy() -> str:
    return "poetry run mypy"


-def get_commit_files() -> List[str]:
+def get_commit_files() -> list[str]:
    files = subprocess.check_output("git diff --cached --name-only --diff-filter=ACM".split())
    return files.decode().splitlines()


-def check(name: str, suffix: str, cmd: str, changed_files: List[str], no_color: bool = False):
+def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False):
    print(f"Checking: {name} ", end="")
    applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files))
    if not applicable_files:
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -39,7 +39,7 @@ http.workspace = true
 humantime.workspace = true
 humantime-serde.workspace = true
 hyper0.workspace = true
-hyper1 = { package = "hyper", version = "1.2", features = ["server"] }
+hyper = { workspace = true, features = ["server", "http1", "http2"] }
 hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
 http-body-util = { version = "0.1" }
 indexmap.workspace = true
@@ -77,7 +77,7 @@ subtle.workspace = true
 thiserror.workspace = true
 tikv-jemallocator.workspace = true
 tikv-jemalloc-ctl = { workspace = true, features = ["use_std"] }
-tokio-postgres.workspace = true
+tokio-postgres = { workspace = true, features = ["with-serde_json-1"] }
 tokio-postgres-rustls.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
@@ -101,7 +101,7 @@ jose-jwa = "0.1.2"
 jose-jwk = { version = "0.1.2", features = ["p256", "p384", "rsa"] }
 signature = "2"
 ecdsa = "0.16"
-p256 = "0.13"
+p256 = { version = "0.13", features = ["jwk"] }
 rsa = "0.9"

 workspace_hack.workspace = true
--- a/proxy/src/auth/backend/console_redirect.rs
+++ b/proxy/src/auth/backend/console_redirect.rs
@@ -1,24 +1,18 @@
 use crate::{
-    auth,
-    cache::Cached,
-    compute,
+    auth, compute,
    config::AuthenticationConfig,
    context::RequestMonitoring,
-    control_plane::{self, provider::NodeInfo, CachedNodeInfo},
+    control_plane::{self, provider::NodeInfo},
    error::{ReportableError, UserFacingError},
-    proxy::connect_compute::ComputeConnectBackend,
    stream::PqStream,
    waiters,
 };
-use async_trait::async_trait;
 use pq_proto::BeMessage as Be;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tokio_postgres::config::SslMode;
 use tracing::{info, info_span};

-use super::ComputeCredentialKeys;
-
 #[derive(Debug, Error)]
 pub(crate) enum WebAuthError {
    #[error(transparent)]
@@ -31,7 +25,6 @@ pub(crate) enum WebAuthError {
    Io(#[from] std::io::Error),
 }

-#[derive(Debug)]
 pub struct ConsoleRedirectBackend {
    console_uri: reqwest::Url,
 }
@@ -73,31 +66,17 @@ impl ConsoleRedirectBackend {
        Self { console_uri }
    }

+    pub(super) fn url(&self) -> &reqwest::Url {
+        &self.console_uri
+    }
+
    pub(crate) async fn authenticate(
        &self,
        ctx: &RequestMonitoring,
        auth_config: &'static AuthenticationConfig,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
-    ) -> auth::Result<ConsoleRedirectNodeInfo> {
-        authenticate(ctx, auth_config, &self.console_uri, client)
-            .await
-            .map(ConsoleRedirectNodeInfo)
-    }
-}
-
-pub struct ConsoleRedirectNodeInfo(pub(super) NodeInfo);
-
-#[async_trait]
-impl ComputeConnectBackend for ConsoleRedirectNodeInfo {
-    async fn wake_compute(
-        &self,
-        _ctx: &RequestMonitoring,
-    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
-        Ok(Cached::new_uncached(self.0.clone()))
-    }
-
-    fn get_keys(&self) -> &ComputeCredentialKeys {
-        &ComputeCredentialKeys::None
+    ) -> auth::Result<NodeInfo> {
+        authenticate(ctx, auth_config, &self.console_uri, client).await
    }
 }

--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -17,6 +17,8 @@ use crate::{
    RoleName,
 };

+use super::ComputeCredentialKeys;
+
 // TODO(conrad): make these configurable.
 const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
 const MIN_RENEW: Duration = Duration::from_secs(30);
@@ -241,7 +243,7 @@ impl JwkCacheEntryLock {
        endpoint: EndpointId,
        role_name: &RoleName,
        fetch: &F,
-    ) -> Result<(), anyhow::Error> {
+    ) -> Result<ComputeCredentialKeys, anyhow::Error> {
        // JWT compact form is defined to be
        // <B64(Header)> || . || <B64(Payload)> || . || <B64(Signature)>
        // where Signature = alg(<B64(Header)> || . || <B64(Payload)>);
@@ -300,9 +302,9 @@ impl JwkCacheEntryLock {
            key => bail!("unsupported key type {key:?}"),
        };

-        let payload = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
+        let payloadb = base64::decode_config(payload, base64::URL_SAFE_NO_PAD)
            .context("Provided authentication token is not a valid JWT encoding")?;
-        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payload)
+        let payload = serde_json::from_slice::<JwtPayload<'_>>(&payloadb)
            .context("Provided authentication token is not a valid JWT encoding")?;

        tracing::debug!(?payload, "JWT signature valid with claims");
@@ -327,7 +329,7 @@ impl JwkCacheEntryLock {
            );
        }

-        Ok(())
+        Ok(ComputeCredentialKeys::JwtPayload(payloadb))
    }
 }

@@ -339,7 +341,7 @@ impl JwkCache {
        role_name: &RoleName,
        fetch: &F,
        jwt: &str,
-    ) -> Result<(), anyhow::Error> {
+    ) -> Result<ComputeCredentialKeys, anyhow::Error> {
        // try with just a read lock first
        let key = (endpoint.clone(), role_name.clone());
        let entry = self.map.get(&key).as_deref().map(Arc::clone);
@@ -571,7 +573,7 @@ mod tests {
    use bytes::Bytes;
    use http::Response;
    use http_body_util::Full;
-    use hyper1::service::service_fn;
+    use hyper::service::service_fn;
    use hyper_util::rt::TokioIo;
    use rand::rngs::OsRng;
    use rsa::pkcs8::DecodePrivateKey;
@@ -736,7 +738,7 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
        });

        let listener = TcpListener::bind("0.0.0.0:0").await.unwrap();
-        let server = hyper1::server::conn::http1::Builder::new();
+        let server = hyper::server::conn::http1::Builder::new();
        let addr = listener.local_addr().unwrap();
        tokio::spawn(async move {
            loop {
--- a/proxy/src/auth/backend/mod.rs
+++ b/proxy/src/auth/backend/mod.rs
@@ -20,8 +20,9 @@ use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::{validate_password_and_exchange, AuthError};
 use crate::cache::Cached;
 use crate::context::RequestMonitoring;
-use crate::control_plane::provider::ControlPlaneBackend;
-use crate::control_plane::AuthSecret;
+use crate::control_plane::errors::GetAuthInfoError;
+use crate::control_plane::provider::{CachedRoleSecret, ControlPlaneBackend};
+use crate::control_plane::{AuthSecret, NodeInfo};
 use crate::intern::EndpointIdInt;
 use crate::metrics::Metrics;
 use crate::proxy::connect_compute::ComputeConnectBackend;
@@ -31,22 +32,48 @@ use crate::stream::Stream;
 use crate::{
    auth::{self, ComputeUserInfoMaybeEndpoint},
    config::AuthenticationConfig,
-    control_plane::{self, provider::CachedNodeInfo, Api},
+    control_plane::{
+        self,
+        provider::{CachedAllowedIps, CachedNodeInfo},
+        Api,
+    },
    stream,
 };
 use crate::{scram, EndpointCacheKey, EndpointId, RoleName};

-/// The [crate::serverless] module can authenticate either using control-plane
-/// to get authentication state, or by using JWKs stored in the filesystem.
-pub enum ServerlessBackend<'a> {
-    /// Cloud API (V2).
-    ControlPlane(&'a ControlPlaneBackend),
-    /// Local proxy uses configured auth credentials and does not wake compute
-    Local(&'a LocalBackend),
+/// Alternative to [`std::borrow::Cow`] but doesn't need `T: ToOwned` as we don't need that functionality
+pub enum MaybeOwned<'a, T> {
+    Owned(T),
+    Borrowed(&'a T),
 }

-#[cfg(test)]
-use crate::control_plane::provider::{CachedAllowedIps, CachedRoleSecret};
+impl<T> std::ops::Deref for MaybeOwned<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            MaybeOwned::Owned(t) => t,
+            MaybeOwned::Borrowed(t) => t,
+        }
+    }
+}
+
+/// This type serves two purposes:
+///
+/// * When `T` is `()`, it's just a regular auth backend selector
+///   which we use in [`crate::config::ProxyConfig`].
+///
+/// * However, when we substitute `T` with [`ComputeUserInfoMaybeEndpoint`],
+///   this helps us provide the credentials only to those auth
+///   backends which require them for the authentication process.
+pub enum Backend<'a, T, D> {
+    /// Cloud API (V2).
+    ControlPlane(MaybeOwned<'a, ControlPlaneBackend>, T),
+    /// Authentication via a web browser.
+    ConsoleRedirect(MaybeOwned<'a, ConsoleRedirectBackend>, D),
+    /// Local proxy uses configured auth credentials and does not wake compute
+    Local(MaybeOwned<'a, LocalBackend>),
+}

 #[cfg(test)]
 pub(crate) trait TestBackend: Send + Sync + 'static {
@@ -64,20 +91,63 @@ impl Clone for Box<dyn TestBackend> {
    }
 }

-impl std::fmt::Display for ControlPlaneBackend {
+impl std::fmt::Display for Backend<'_, (), ()> {
    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            ControlPlaneBackend::Management(endpoint) => fmt
-                .debug_tuple("ControlPlane::Management")
-                .field(&endpoint.url())
+            Self::ControlPlane(api, ()) => match &**api {
+                ControlPlaneBackend::Management(endpoint) => fmt
+                    .debug_tuple("ControlPlane::Management")
+                    .field(&endpoint.url())
+                    .finish(),
+                #[cfg(any(test, feature = "testing"))]
+                ControlPlaneBackend::PostgresMock(endpoint) => fmt
+                    .debug_tuple("ControlPlane::PostgresMock")
+                    .field(&endpoint.url())
+                    .finish(),
+                #[cfg(test)]
+                ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
+            },
+            Self::ConsoleRedirect(backend, ()) => fmt
+                .debug_tuple("ConsoleRedirect")
+                .field(&backend.url().as_str())
                .finish(),
-            #[cfg(any(test, feature = "testing"))]
-            ControlPlaneBackend::PostgresMock(endpoint) => fmt
-                .debug_tuple("ControlPlane::PostgresMock")
-                .field(&endpoint.url())
-                .finish(),
-            #[cfg(test)]
-            ControlPlaneBackend::Test(_) => fmt.debug_tuple("ControlPlane::Test").finish(),
+            Self::Local(_) => fmt.debug_tuple("Local").finish(),
+        }
+    }
+}
+
+impl<T, D> Backend<'_, T, D> {
+    /// Very similar to [`std::option::Option::as_ref`].
+    /// This helps us pass structured config to async tasks.
+    pub(crate) fn as_ref(&self) -> Backend<'_, &T, &D> {
+        match self {
+            Self::ControlPlane(c, x) => Backend::ControlPlane(MaybeOwned::Borrowed(c), x),
+            Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(MaybeOwned::Borrowed(c), x),
+            Self::Local(l) => Backend::Local(MaybeOwned::Borrowed(l)),
+        }
+    }
+}
+
+impl<'a, T, D> Backend<'a, T, D> {
+    /// Very similar to [`std::option::Option::map`].
+    /// Maps [`Backend<T>`] to [`Backend<R>`] by applying
+    /// a function to a contained value.
+    pub(crate) fn map<R>(self, f: impl FnOnce(T) -> R) -> Backend<'a, R, D> {
+        match self {
+            Self::ControlPlane(c, x) => Backend::ControlPlane(c, f(x)),
+            Self::ConsoleRedirect(c, x) => Backend::ConsoleRedirect(c, x),
+            Self::Local(l) => Backend::Local(l),
+        }
+    }
+}
+impl<'a, T, D, E> Backend<'a, Result<T, E>, D> {
+    /// Very similar to [`std::option::Option::transpose`].
+    /// This is most useful for error handling.
+    pub(crate) fn transpose(self) -> Result<Backend<'a, T, D>, E> {
+        match self {
+            Self::ControlPlane(c, x) => x.map(|x| Backend::ControlPlane(c, x)),
+            Self::ConsoleRedirect(c, x) => Ok(Backend::ConsoleRedirect(c, x)),
+            Self::Local(l) => Ok(Backend::Local(l)),
        }
    }
 }
@@ -106,10 +176,12 @@ impl ComputeUserInfo {
    }
 }

+#[cfg_attr(test, derive(Debug))]
 pub(crate) enum ComputeCredentialKeys {
    #[cfg(any(test, feature = "testing"))]
    Password(Vec<u8>),
    AuthKeys(AuthKeys),
+    JwtPayload(Vec<u8>),
    None,
 }

@@ -342,79 +414,133 @@ async fn authenticate_with_secret(
    classic::authenticate(ctx, info, client, config, secret).await
 }

-impl ControlPlaneBackend {
+impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
+    /// Get username from the credentials.
+    pub(crate) fn get_user(&self) -> &str {
+        match self {
+            Self::ControlPlane(_, user_info) => &user_info.user,
+            Self::ConsoleRedirect(_, ()) => "web",
+            Self::Local(_) => "local",
+        }
+    }
+
+    /// Authenticate the client via the requested backend, possibly using credentials.
    #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
    pub(crate) async fn authenticate(
-        &self,
+        self,
        ctx: &RequestMonitoring,
-        user_info: ComputeUserInfoMaybeEndpoint,
        client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
        allow_cleartext: bool,
        config: &'static AuthenticationConfig,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    ) -> auth::Result<ControlPlaneComputeBackend> {
-        info!(
-            user = &*user_info.user,
-            project = user_info.endpoint(),
-            "performing authentication using the console"
-        );
+    ) -> auth::Result<Backend<'a, ComputeCredentials, NodeInfo>> {
+        let res = match self {
+            Self::ControlPlane(api, user_info) => {
+                info!(
+                    user = &*user_info.user,
+                    project = user_info.endpoint(),
+                    "performing authentication using the console"
+                );

-        let credentials = auth_quirks(
-            ctx,
-            self,
-            user_info,
-            client,
-            allow_cleartext,
-            config,
-            endpoint_rate_limiter,
-        )
-        .await?;
+                let credentials = auth_quirks(
+                    ctx,
+                    &*api,
+                    user_info,
+                    client,
+                    allow_cleartext,
+                    config,
+                    endpoint_rate_limiter,
+                )
+                .await?;
+                Backend::ControlPlane(api, credentials)
+            }
+            // NOTE: this auth backend doesn't use client credentials.
+            Self::ConsoleRedirect(backend, ()) => {
+                info!("performing web authentication");
+
+                let info = backend.authenticate(ctx, config, client).await?;
+
+                Backend::ConsoleRedirect(backend, info)
+            }
+            Self::Local(_) => {
+                return Err(auth::AuthError::bad_auth_method("invalid for local proxy"))
+            }
+        };

        info!("user successfully authenticated");
-        Ok(ControlPlaneComputeBackend {
-            api: self,
-            creds: credentials,
-        })
-    }
-
-    pub(crate) fn attach_to_credentials(
-        &self,
-        creds: ComputeCredentials,
-    ) -> ControlPlaneComputeBackend {
-        ControlPlaneComputeBackend { api: self, creds }
+        Ok(res)
    }
 }

-pub struct ControlPlaneComputeBackend<'a> {
-    api: &'a ControlPlaneBackend,
-    creds: ComputeCredentials,
+impl Backend<'_, ComputeUserInfo, &()> {
+    pub(crate) async fn get_role_secret(
+        &self,
+        ctx: &RequestMonitoring,
+    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => api.get_role_secret(ctx, user_info).await,
+            Self::ConsoleRedirect(_, ()) => Ok(Cached::new_uncached(None)),
+            Self::Local(_) => Ok(Cached::new_uncached(None)),
+        }
+    }
+
+    pub(crate) async fn get_allowed_ips_and_secret(
+        &self,
+        ctx: &RequestMonitoring,
+    ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), GetAuthInfoError> {
+        match self {
+            Self::ControlPlane(api, user_info) => {
+                api.get_allowed_ips_and_secret(ctx, user_info).await
+            }
+            Self::ConsoleRedirect(_, ()) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+            Self::Local(_) => Ok((Cached::new_uncached(Arc::new(vec![])), None)),
+        }
+    }
 }

 #[async_trait::async_trait]
-impl ComputeConnectBackend for ControlPlaneComputeBackend<'_> {
+impl ComputeConnectBackend for Backend<'_, ComputeCredentials, NodeInfo> {
    async fn wake_compute(
        &self,
        ctx: &RequestMonitoring,
    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
-        self.api.wake_compute(ctx, &self.creds.info).await
+        match self {
+            Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
+            Self::ConsoleRedirect(_, info) => Ok(Cached::new_uncached(info.clone())),
+            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
+        }
    }

    fn get_keys(&self) -> &ComputeCredentialKeys {
-        &self.creds.keys
+        match self {
+            Self::ControlPlane(_, creds) => &creds.keys,
+            Self::ConsoleRedirect(_, _) => &ComputeCredentialKeys::None,
+            Self::Local(_) => &ComputeCredentialKeys::None,
+        }
    }
 }

 #[async_trait::async_trait]
-impl ComputeConnectBackend for LocalBackend {
+impl ComputeConnectBackend for Backend<'_, ComputeCredentials, &()> {
    async fn wake_compute(
        &self,
-        _ctx: &RequestMonitoring,
+        ctx: &RequestMonitoring,
    ) -> Result<CachedNodeInfo, control_plane::errors::WakeComputeError> {
-        Ok(Cached::new_uncached(self.node_info.clone()))
+        match self {
+            Self::ControlPlane(api, creds) => api.wake_compute(ctx, &creds.info).await,
+            Self::ConsoleRedirect(_, ()) => {
+                unreachable!("web auth flow doesn't support waking the compute")
+            }
+            Self::Local(local) => Ok(Cached::new_uncached(local.node_info.clone())),
+        }
    }

    fn get_keys(&self) -> &ComputeCredentialKeys {
-        &ComputeCredentialKeys::None
+        match self {
+            Self::ControlPlane(_, creds) => &creds.keys,
+            Self::ConsoleRedirect(_, ()) => &ComputeCredentialKeys::None,
+            Self::Local(_) => &ComputeCredentialKeys::None,
+        }
    }
 }

--- a/proxy/src/auth/mod.rs
+++ b/proxy/src/auth/mod.rs
@@ -1,7 +1,7 @@
 //! Client authentication mechanisms.

 pub mod backend;
-pub use backend::ServerlessBackend;
+pub use backend::Backend;

 mod credentials;
 pub(crate) use credentials::{
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -197,7 +197,7 @@ async fn main() -> anyhow::Result<()> {

    let task = serverless::task_main(
        config,
-        auth::ServerlessBackend::Local(auth_backend),
+        auth_backend,
        http_listener,
        shutdown.clone(),
        Arc::new(CancellationHandlerMain::new(
@@ -289,8 +289,12 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
 }

 /// auth::Backend is created at proxy startup, and lives forever.
-fn build_auth_backend(args: &LocalProxyCliArgs) -> anyhow::Result<&'static LocalBackend> {
-    let auth_backend = LocalBackend::new(args.compute);
+fn build_auth_backend(
+    args: &LocalProxyCliArgs,
+) -> anyhow::Result<&'static auth::Backend<'static, (), ()>> {
+    let auth_backend = proxy::auth::Backend::Local(proxy::auth::backend::MaybeOwned::Owned(
+        LocalBackend::new(args.compute),
+    ));

    Ok(Box::leak(Box::new(auth_backend)))
 }
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -11,6 +11,7 @@ use proxy::auth;
 use proxy::auth::backend::jwt::JwkCache;
 use proxy::auth::backend::AuthRateLimiter;
 use proxy::auth::backend::ConsoleRedirectBackend;
+use proxy::auth::backend::MaybeOwned;
 use proxy::cancellation::CancelMap;
 use proxy::cancellation::CancellationHandler;
 use proxy::config::remote_storage_from_toml;
@@ -21,7 +22,6 @@ use proxy::config::ProjectInfoCacheOptions;
 use proxy::config::ProxyProtocolV2;
 use proxy::context::parquet::ParquetUploadArgs;
 use proxy::control_plane;
-use proxy::control_plane::provider::ControlPlaneBackend;
 use proxy::http;
 use proxy::http::health_server::AppMetrics;
 use proxy::metrics::Metrics;
@@ -314,10 +314,7 @@ async fn main() -> anyhow::Result<()> {
    let config = build_config(&args)?;
    let auth_backend = build_auth_backend(&args)?;

-    match auth_backend {
-        Either::Left(auth_backend) => info!("Authentication backend: {auth_backend}"),
-        Either::Right(auth_backend) => info!("Authentication backend: {auth_backend:?}"),
-    };
+    info!("Authentication backend: {}", auth_backend);
    info!("Using region: {}", args.aws_region);

    let region_provider =
@@ -464,41 +461,26 @@ async fn main() -> anyhow::Result<()> {
    // client facing tasks. these will exit on error or on cancellation
    // cancellation returns Ok(())
    let mut client_tasks = JoinSet::new();
-    match auth_backend {
-        Either::Left(auth_backend) => {
-            if let Some(proxy_listener) = proxy_listener {
-                client_tasks.spawn(proxy::proxy::task_main(
-                    config,
-                    auth_backend,
-                    proxy_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                    endpoint_rate_limiter.clone(),
-                ));
-            }
+    if let Some(proxy_listener) = proxy_listener {
+        client_tasks.spawn(proxy::proxy::task_main(
+            config,
+            auth_backend,
+            proxy_listener,
+            cancellation_token.clone(),
+            cancellation_handler.clone(),
+            endpoint_rate_limiter.clone(),
+        ));
+    }

-            if let Some(serverless_listener) = serverless_listener {
-                client_tasks.spawn(serverless::task_main(
-                    config,
-                    auth::ServerlessBackend::ControlPlane(auth_backend),
-                    serverless_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                    endpoint_rate_limiter.clone(),
-                ));
-            }
-        }
-        Either::Right(auth_backend) => {
-            if let Some(proxy_listener) = proxy_listener {
-                client_tasks.spawn(proxy::console_redirect_proxy::task_main(
-                    config,
-                    auth_backend,
-                    proxy_listener,
-                    cancellation_token.clone(),
-                    cancellation_handler.clone(),
-                ));
-            }
-        }
+    if let Some(serverless_listener) = serverless_listener {
+        client_tasks.spawn(serverless::task_main(
+            config,
+            auth_backend,
+            serverless_listener,
+            cancellation_token.clone(),
+            cancellation_handler.clone(),
+            endpoint_rate_limiter.clone(),
+        ));
    }

    client_tasks.spawn(proxy::context::parquet::worker(
@@ -528,38 +510,40 @@ async fn main() -> anyhow::Result<()> {
        ));
    }

-    if let Either::Left(ControlPlaneBackend::Management(api)) = &auth_backend {
-        match (redis_notifications_client, regional_redis_client.clone()) {
-            (None, None) => {}
-            (client1, client2) => {
-                let cache = api.caches.project_info.clone();
-                if let Some(client) = client1 {
-                    maintenance_tasks.spawn(notifications::task_main(
-                        client,
-                        cache.clone(),
-                        cancel_map.clone(),
-                        args.region.clone(),
-                    ));
+    if let auth::Backend::ControlPlane(api, _) = auth_backend {
+        if let proxy::control_plane::provider::ControlPlaneBackend::Management(api) = &**api {
+            match (redis_notifications_client, regional_redis_client.clone()) {
+                (None, None) => {}
+                (client1, client2) => {
+                    let cache = api.caches.project_info.clone();
+                    if let Some(client) = client1 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            cancel_map.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    if let Some(client) = client2 {
+                        maintenance_tasks.spawn(notifications::task_main(
+                            client,
+                            cache.clone(),
+                            cancel_map.clone(),
+                            args.region.clone(),
+                        ));
+                    }
+                    maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
                }
-                if let Some(client) = client2 {
-                    maintenance_tasks.spawn(notifications::task_main(
-                        client,
-                        cache.clone(),
-                        cancel_map.clone(),
-                        args.region.clone(),
-                    ));
-                }
-                maintenance_tasks.spawn(async move { cache.clone().gc_worker().await });
            }
-        }
-        if let Some(regional_redis_client) = regional_redis_client {
-            let cache = api.caches.endpoints_cache.clone();
-            let con = regional_redis_client;
-            let span = tracing::info_span!("endpoints_cache");
-            maintenance_tasks.spawn(
-                async move { cache.do_read(con, cancellation_token.clone()).await }
-                    .instrument(span),
-            );
+            if let Some(regional_redis_client) = regional_redis_client {
+                let cache = api.caches.endpoints_cache.clone();
+                let con = regional_redis_client;
+                let span = tracing::info_span!("endpoints_cache");
+                maintenance_tasks.spawn(
+                    async move { cache.do_read(con, cancellation_token.clone()).await }
+                        .instrument(span),
+                );
+            }
        }
    }

@@ -679,7 +663,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        webauth_confirmation_timeout: args.webauth_confirmation_timeout,
    };

-    let config = ProxyConfig {
+    let config = Box::leak(Box::new(ProxyConfig {
        tls_config,
        metric_collection,
        allow_self_signed_compute: args.allow_self_signed_compute,
@@ -693,9 +677,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        connect_to_compute_retry_config: config::RetryConfig::parse(
            &args.connect_to_compute_retry,
        )?,
-    };
-
-    let config = Box::leak(Box::new(config));
+    }));

    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());

@@ -705,8 +687,8 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
 /// auth::Backend is created at proxy startup, and lives forever.
 fn build_auth_backend(
    args: &ProxyCliArgs,
-) -> anyhow::Result<Either<&'static ControlPlaneBackend, &'static ConsoleRedirectBackend>> {
-    match &args.auth_backend {
+) -> anyhow::Result<&'static auth::Backend<'static, (), ()>> {
+    let auth_backend = match &args.auth_backend {
        AuthBackendType::Console => {
            let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
            let project_info_cache_config: ProjectInfoCacheOptions =
@@ -755,33 +737,25 @@ fn build_auth_backend(
                locks,
                wake_compute_endpoint_rate_limiter,
            );
-            let auth_backend = control_plane::provider::ControlPlaneBackend::Management(api);
+            let api = control_plane::provider::ControlPlaneBackend::Management(api);
+            auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
+        }

-            let config = Box::leak(Box::new(auth_backend));
-
-            Ok(Either::Left(config))
+        AuthBackendType::Web => {
+            let url = args.uri.parse()?;
+            auth::Backend::ConsoleRedirect(MaybeOwned::Owned(ConsoleRedirectBackend::new(url)), ())
        }

        #[cfg(feature = "testing")]
        AuthBackendType::Postgres => {
            let url = args.auth_endpoint.parse()?;
            let api = control_plane::provider::mock::Api::new(url, !args.is_private_access_proxy);
-            let auth_backend = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
-
-            let config = Box::leak(Box::new(auth_backend));
-
-            Ok(Either::Left(config))
+            let api = control_plane::provider::ControlPlaneBackend::PostgresMock(api);
+            auth::Backend::ControlPlane(MaybeOwned::Owned(api), ())
        }
+    };

-        AuthBackendType::Web => {
-            let url = args.uri.parse()?;
-            let backend = ConsoleRedirectBackend::new(url);
-
-            let config = Box::leak(Box::new(backend));
-
-            Ok(Either::Right(config))
-        }
-    }
+    Ok(Box::leak(Box::new(auth_backend)))
 }

 #[cfg(test)]
--- a/proxy/src/console_redirect_proxy.rs
+++ b/proxy/src/console_redirect_proxy.rs
@@ -1,161 +0,0 @@
-use crate::auth::backend::ConsoleRedirectBackend;
-use crate::config::ProxyConfig;
-use crate::metrics::Protocol;
-use crate::proxy::{prepare_client_connection, transition_connection, ClientRequestError};
-use crate::{
-    cancellation::CancellationHandlerMain,
-    context::RequestMonitoring,
-    metrics::{Metrics, NumClientConnectionsGuard},
-    proxy::handshake::{handshake, HandshakeData},
-};
-use futures::TryFutureExt;
-use std::net::IpAddr;
-use std::sync::Arc;
-use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
-use tokio_util::sync::CancellationToken;
-use tracing::{info, Instrument};
-
-use crate::proxy::{
-    connect_compute::{connect_to_compute, TcpMechanism},
-    passthrough::ProxyPassthrough,
-};
-
-pub async fn task_main(
-    config: &'static ProxyConfig,
-    backend: &'static ConsoleRedirectBackend,
-    listener: tokio::net::TcpListener,
-    cancellation_token: CancellationToken,
-    cancellation_handler: Arc<CancellationHandlerMain>,
-) -> anyhow::Result<()> {
-    scopeguard::defer! {
-        info!("proxy has shut down");
-    }
-
-    super::connection_loop(
-        config,
-        listener,
-        cancellation_token,
-        Protocol::Tcp,
-        C {
-            config,
-            backend,
-            cancellation_handler,
-        },
-    )
-    .await
-}
-
-#[derive(Clone)]
-struct C {
-    config: &'static ProxyConfig,
-    backend: &'static ConsoleRedirectBackend,
-    cancellation_handler: Arc<CancellationHandlerMain>,
-}
-
-impl super::ConnHandler for C {
-    async fn handle(
-        self,
-        session_id: uuid::Uuid,
-        peer_addr: IpAddr,
-        socket: crate::protocol2::ChainRW<tokio::net::TcpStream>,
-        conn_gauge: crate::metrics::NumClientConnectionsGuard<'static>,
-    ) {
-        let ctx = RequestMonitoring::new(session_id, peer_addr, Protocol::Tcp, &self.config.region);
-        let span = ctx.span();
-
-        let startup = Box::pin(
-            handle_client(
-                self.config,
-                self.backend,
-                &ctx,
-                self.cancellation_handler,
-                socket,
-                conn_gauge,
-            )
-            .instrument(span.clone()),
-        );
-
-        let res = startup.await;
-        transition_connection(ctx, res).await;
-    }
-}
-
-pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
-    config: &'static ProxyConfig,
-    backend: &'static ConsoleRedirectBackend,
-    ctx: &RequestMonitoring,
-    cancellation_handler: Arc<CancellationHandlerMain>,
-    stream: S,
-    conn_gauge: NumClientConnectionsGuard<'static>,
-) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
-    info!(
-        protocol = %ctx.protocol(),
-        "handling interactive connection from client"
-    );
-
-    let metrics = &Metrics::get().proxy;
-    let proto = ctx.protocol();
-    let request_gauge = metrics.connection_requests.guard(proto);
-
-    let tls = config.tls_config.as_ref();
-
-    let record_handshake_error = !ctx.has_private_peer_addr();
-    let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client);
-    let do_handshake = handshake(ctx, stream, tls, record_handshake_error);
-    let (mut stream, params) =
-        match tokio::time::timeout(config.handshake_timeout, do_handshake).await?? {
-            HandshakeData::Startup(stream, params) => (stream, params),
-            HandshakeData::Cancel(cancel_key_data) => {
-                return Ok(cancellation_handler
-                    .cancel_session(cancel_key_data, ctx.session_id())
-                    .await
-                    .map(|()| None)?)
-            }
-        };
-    drop(pause);
-
-    ctx.set_db_options(params.clone());
-
-    let user_info = match backend
-        .authenticate(ctx, &config.authentication_config, &mut stream)
-        .await
-    {
-        Ok(auth_result) => auth_result,
-        Err(e) => {
-            return stream.throw_error(e).await?;
-        }
-    };
-
-    let mut node = connect_to_compute(
-        ctx,
-        &TcpMechanism {
-            params: &params,
-            locks: &config.connect_compute_locks,
-        },
-        &user_info,
-        config.allow_self_signed_compute,
-        config.wake_compute_retry_config,
-        config.connect_to_compute_retry_config,
-    )
-    .or_else(|e| stream.throw_error(e))
-    .await?;
-
-    let session = cancellation_handler.get_session();
-    prepare_client_connection(&node, &session, &mut stream).await?;
-
-    // Before proxy passing, forward to compute whatever data is left in the
-    // PqStream input buffer. Normally there is none, but our serverless npm
-    // driver in pipeline mode sends startup, password and first query
-    // immediately after opening the connection.
-    let (stream, read_buf) = stream.into_inner();
-    node.stream.write_all(&read_buf).await?;
-
-    Ok(Some(ProxyPassthrough {
-        client: stream,
-        aux: node.aux.clone(),
-        compute: node,
-        _req: request_gauge,
-        _conn: conn_gauge,
-        _cancel: session,
-    }))
-}
--- a/proxy/src/control_plane/provider/mod.rs
+++ b/proxy/src/control_plane/provider/mod.rs
@@ -81,12 +81,12 @@ pub(crate) mod errors {
                    Reason::EndpointNotFound => ErrorKind::User,
                    Reason::BranchNotFound => ErrorKind::User,
                    Reason::RateLimitExceeded => ErrorKind::ServiceRateLimit,
-                    Reason::NonDefaultBranchComputeTimeExceeded => ErrorKind::User,
-                    Reason::ActiveTimeQuotaExceeded => ErrorKind::User,
-                    Reason::ComputeTimeQuotaExceeded => ErrorKind::User,
-                    Reason::WrittenDataQuotaExceeded => ErrorKind::User,
-                    Reason::DataTransferQuotaExceeded => ErrorKind::User,
-                    Reason::LogicalSizeQuotaExceeded => ErrorKind::User,
+                    Reason::NonDefaultBranchComputeTimeExceeded => ErrorKind::Quota,
+                    Reason::ActiveTimeQuotaExceeded => ErrorKind::Quota,
+                    Reason::ComputeTimeQuotaExceeded => ErrorKind::Quota,
+                    Reason::WrittenDataQuotaExceeded => ErrorKind::Quota,
+                    Reason::DataTransferQuotaExceeded => ErrorKind::Quota,
+                    Reason::LogicalSizeQuotaExceeded => ErrorKind::Quota,
                    Reason::ConcurrencyLimitReached => ErrorKind::ControlPlane,
                    Reason::LockAlreadyTaken => ErrorKind::ControlPlane,
                    Reason::RunningOperations => ErrorKind::ControlPlane,
@@ -103,7 +103,7 @@ pub(crate) mod errors {
                        } if error
                            .contains("compute time quota of non-primary branches is exceeded") =>
                        {
-                            crate::error::ErrorKind::User
+                            crate::error::ErrorKind::Quota
                        }
                        ControlPlaneError {
                            http_status_code: http::StatusCode::LOCKED,
@@ -112,7 +112,7 @@ pub(crate) mod errors {
                        } if error.contains("quota exceeded")
                            || error.contains("the limit for current plan reached") =>
                        {
-                            crate::error::ErrorKind::User
+                            crate::error::ErrorKind::Quota
                        }
                        ControlPlaneError {
                            http_status_code: http::StatusCode::TOO_MANY_REQUESTS,
@@ -309,7 +309,7 @@ impl NodeInfo {
            #[cfg(any(test, feature = "testing"))]
            ComputeCredentialKeys::Password(password) => self.config.password(password),
            ComputeCredentialKeys::AuthKeys(auth_keys) => self.config.auth_keys(*auth_keys),
-            ComputeCredentialKeys::None => &mut self.config,
+            ComputeCredentialKeys::JwtPayload(_) | ComputeCredentialKeys::None => &mut self.config,
        };
    }
 }
--- a/proxy/src/control_plane/provider/neon.rs
+++ b/proxy/src/control_plane/provider/neon.rs
@@ -22,7 +22,7 @@ use futures::TryFutureExt;
 use std::{sync::Arc, time::Duration};
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
-use tracing::{debug, error, info, info_span, warn, Instrument};
+use tracing::{debug, info, info_span, warn, Instrument};

 const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");

@@ -456,7 +456,7 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
    });
    body.http_status_code = status;

-    error!("console responded with an error ({status}): {body:?}");
+    warn!("console responded with an error ({status}): {body:?}");
    Err(ApiError::ControlPlane(body))
 }

--- a/proxy/src/error.rs
+++ b/proxy/src/error.rs
@@ -49,6 +49,10 @@ pub enum ErrorKind {
    #[label(rename = "serviceratelimit")]
    ServiceRateLimit,

+    /// Proxy quota limit violation
+    #[label(rename = "quota")]
+    Quota,
+
    /// internal errors
    Service,

@@ -70,6 +74,7 @@ impl ErrorKind {
            ErrorKind::ClientDisconnect => "clientdisconnect",
            ErrorKind::RateLimit => "ratelimit",
            ErrorKind::ServiceRateLimit => "serviceratelimit",
+            ErrorKind::Quota => "quota",
            ErrorKind::Service => "service",
            ErrorKind::ControlPlane => "controlplane",
            ErrorKind::Postgres => "postgres",
--- a/proxy/src/http/health_server.rs
+++ b/proxy/src/http/health_server.rs
@@ -1,5 +1,5 @@
 use anyhow::{anyhow, bail};
-use hyper::{header::CONTENT_TYPE, Body, Request, Response, StatusCode};
+use hyper0::{header::CONTENT_TYPE, Body, Request, Response, StatusCode};
 use measured::{text::BufferedTextEncoder, MetricGroup};
 use metrics::NeonMetrics;
 use std::{
@@ -21,7 +21,7 @@ async fn status_handler(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(StatusCode::OK, "")
 }

-fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper::Body, ApiError> {
+fn make_router(metrics: AppMetrics) -> RouterBuilder<hyper0::Body, ApiError> {
    let state = Arc::new(Mutex::new(PrometheusHandler {
        encoder: BufferedTextEncoder::new(),
        metrics,
@@ -45,7 +45,7 @@ pub async fn task_main(

    let service = || RouterService::new(make_router(metrics).build()?);

-    hyper::Server::from_tcp(http_listener)?
+    hyper0::Server::from_tcp(http_listener)?
        .serve(service().map_err(|e| anyhow!(e))?)
        .await?;

--- a/proxy/src/http/mod.rs
+++ b/proxy/src/http/mod.rs
@@ -9,7 +9,7 @@ use std::time::Duration;
 use anyhow::bail;
 use bytes::Bytes;
 use http_body_util::BodyExt;
-use hyper1::body::Body;
+use hyper::body::Body;
 use serde::de::DeserializeOwned;

 pub(crate) use reqwest::{Request, Response};
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -82,25 +82,19 @@
    impl_trait_overcaptures,
 )]

-use std::{convert::Infallible, future::Future, net::IpAddr};
+use std::convert::Infallible;

 use anyhow::{bail, Context};
 use intern::{EndpointIdInt, EndpointIdTag, InternId};
-use protocol2::{get_client_conn_info, ChainRW};
-use proxy::run_until_cancelled;
-use tokio::{net::TcpStream, task::JoinError};
+use tokio::task::JoinError;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, warn};
-use uuid::Uuid;
-
-extern crate hyper0 as hyper;
+use tracing::warn;

 pub mod auth;
 pub mod cache;
 pub mod cancellation;
 pub mod compute;
 pub mod config;
-pub mod console_redirect_proxy;
 pub mod context;
 pub mod control_plane;
 pub mod error;
@@ -279,81 +273,3 @@ impl EndpointId {
        ProjectId(self.0.clone())
    }
 }
-
-pub(crate) trait ConnHandler: Clone + Send + 'static {
-    fn handle(
-        self,
-        session_id: Uuid,
-        peer_addr: IpAddr,
-        stream: ChainRW<TcpStream>,
-        conn_gauge: metrics::NumClientConnectionsGuard<'static>,
-    ) -> impl Future<Output = ()> + Send;
-}
-
-/// Accept connections, parse the proxy-protocol v2 header and spawn a tracked connection task.
-pub(crate) async fn connection_loop<C>(
-    config: &'static config::ProxyConfig,
-    listener: tokio::net::TcpListener,
-    cancellation_token: CancellationToken,
-    protocol: metrics::Protocol,
-    conn_handler: C,
-) -> anyhow::Result<()>
-where
-    C: ConnHandler,
-{
-    // When set for the server socket, the keepalive setting
-    // will be inherited by all accepted client sockets.
-    socket2::SockRef::from(&listener).set_keepalive(true)?;
-
-    let connections = tokio_util::task::task_tracker::TaskTracker::new();
-
-    while let Some(accept_result) =
-        run_until_cancelled(listener.accept(), &cancellation_token).await
-    {
-        let (socket, peer_addr) = accept_result?;
-
-        let conn_gauge = metrics::Metrics::get()
-            .proxy
-            .client_connections
-            .guard(protocol);
-
-        let session_id = uuid::Uuid::new_v4();
-        let conn_handler = conn_handler.clone();
-
-        tracing::info!(protocol = protocol.as_str(), %session_id, "accepted new TCP connection");
-
-        connections.spawn(async move {
-            let (socket, peer_addr) = match get_client_conn_info(socket, config.proxy_protocol_v2).await {
-                Err(e) => {
-                    error!("per-client task finished with an error: {e:#}");
-                    return;
-                }
-                Ok((socket, Some(addr))) => (socket, addr),
-                Ok((socket, None)) => (socket, peer_addr.ip()),
-            };
-
-            match socket.inner.set_nodelay(true) {
-                Ok(()) => {}
-                Err(e) => {
-                    error!("per-client task finished with an error: failed to set socket option: {e:#}");
-                    return;
-                }
-            };
-
-            conn_handler.handle(
-                session_id,
-                peer_addr,
-                socket,
-                conn_gauge,
-            ).await;
-        });
-    }
-
-    connections.close();
-    drop(listener);
-
-    // Drain connections
-    connections.wait().await;
-
-    Ok(())
-}
--- a/proxy/src/protocol2.rs
+++ b/proxy/src/protocol2.rs
@@ -2,18 +2,15 @@

 use std::{
    io,
-    net::{IpAddr, SocketAddr},
+    net::SocketAddr,
    pin::Pin,
    task::{Context, Poll},
 };

-use anyhow::bail;
 use bytes::BytesMut;
 use pin_project_lite::pin_project;
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};

-use crate::config::ProxyProtocolV2;
-
 pin_project! {
    /// A chained [`AsyncRead`] with [`AsyncWrite`] passthrough
    pub(crate) struct ChainRW<T> {
@@ -63,23 +60,7 @@ const HEADER: [u8; 12] = [
    0x0D, 0x0A, 0x0D, 0x0A, 0x00, 0x0D, 0x0A, 0x51, 0x55, 0x49, 0x54, 0x0A,
 ];

-pub(crate) async fn get_client_conn_info<T: AsyncRead + Unpin>(
-    socket: T,
-    proxy_protocol_v2: ProxyProtocolV2,
-) -> anyhow::Result<(ChainRW<T>, Option<IpAddr>)> {
-    match read_proxy_protocol(socket).await? {
-        (_socket, None) if proxy_protocol_v2 == ProxyProtocolV2::Required => {
-            bail!("missing required proxy protocol header");
-        }
-        (_socket, Some(_)) if proxy_protocol_v2 == ProxyProtocolV2::Rejected => {
-            bail!("proxy protocol header not supported");
-        }
-        (socket, Some(addr)) => Ok((socket, Some(addr.ip()))),
-        (socket, None) => Ok((socket, None)),
-    }
-}
-
-async fn read_proxy_protocol<T: AsyncRead + Unpin>(
+pub(crate) async fn read_proxy_protocol<T: AsyncRead + Unpin>(
    mut read: T,
 ) -> std::io::Result<(ChainRW<T>, Option<SocketAddr>)> {
    let mut buf = BytesMut::with_capacity(128);
--- a/proxy/src/proxy/mod.rs
+++ b/proxy/src/proxy/mod.rs
@@ -10,16 +10,16 @@ pub(crate) mod wake_compute;
 pub use copy_bidirectional::copy_bidirectional_client_compute;
 pub use copy_bidirectional::ErrorSource;

-use crate::control_plane::provider::ControlPlaneBackend;
-use crate::metrics::Protocol;
+use crate::config::ProxyProtocolV2;
 use crate::{
    auth,
-    cancellation::{self, CancellationHandlerMain},
+    cancellation::{self, CancellationHandlerMain, CancellationHandlerMainInternal},
    compute,
    config::{ProxyConfig, TlsConfig},
    context::RequestMonitoring,
    error::ReportableError,
    metrics::{Metrics, NumClientConnectionsGuard},
+    protocol2::read_proxy_protocol,
    proxy::handshake::{handshake, HandshakeData},
    rate_limiter::EndpointRateLimiter,
    stream::{PqStream, Stream},
@@ -31,12 +31,11 @@ use once_cell::sync::OnceCell;
 use pq_proto::{BeMessage as Be, StartupMessageParams};
 use regex::Regex;
 use smol_str::{format_smolstr, SmolStr};
-use std::net::IpAddr;
 use std::sync::Arc;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, Instrument};
+use tracing::{error, info, warn, Instrument};

 use self::{
    connect_compute::{connect_to_compute, TcpMechanism},
@@ -62,7 +61,7 @@ pub async fn run_until_cancelled<F: std::future::Future>(

 pub async fn task_main(
    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
+    auth_backend: &'static auth::Backend<'static, (), ()>,
    listener: tokio::net::TcpListener,
    cancellation_token: CancellationToken,
    cancellation_handler: Arc<CancellationHandlerMain>,
@@ -72,91 +71,110 @@ pub async fn task_main(
        info!("proxy has shut down");
    }

-    super::connection_loop(
-        config,
-        listener,
-        cancellation_token,
-        Protocol::Tcp,
-        C {
-            config,
-            auth_backend,
-            cancellation_handler,
-            endpoint_rate_limiter,
-        },
-    )
-    .await
-}
+    // When set for the server socket, the keepalive setting
+    // will be inherited by all accepted client sockets.
+    socket2::SockRef::from(&listener).set_keepalive(true)?;

-#[derive(Clone)]
-struct C {
-    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
-    cancellation_handler: Arc<CancellationHandlerMain>,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-}
+    let connections = tokio_util::task::task_tracker::TaskTracker::new();

-impl super::ConnHandler for C {
-    async fn handle(
-        self,
-        session_id: uuid::Uuid,
-        peer_addr: IpAddr,
-        socket: crate::protocol2::ChainRW<tokio::net::TcpStream>,
-        conn_gauge: crate::metrics::NumClientConnectionsGuard<'static>,
-    ) {
-        let ctx = RequestMonitoring::new(
-            session_id,
-            peer_addr,
-            crate::metrics::Protocol::Tcp,
-            &self.config.region,
-        );
-        let span = ctx.span();
+    while let Some(accept_result) =
+        run_until_cancelled(listener.accept(), &cancellation_token).await
+    {
+        let (socket, peer_addr) = accept_result?;

-        let startup = Box::pin(
-            handle_client(
-                self.config,
-                self.auth_backend,
-                &ctx,
-                self.cancellation_handler,
-                socket,
-                ClientMode::Tcp,
-                self.endpoint_rate_limiter,
-                conn_gauge,
-            )
-            .instrument(span.clone()),
-        );
+        let conn_gauge = Metrics::get()
+            .proxy
+            .client_connections
+            .guard(crate::metrics::Protocol::Tcp);

-        let res = startup.await;
-        transition_connection(ctx, res).await;
-    }
-}
+        let session_id = uuid::Uuid::new_v4();
+        let cancellation_handler = Arc::clone(&cancellation_handler);

-pub(crate) async fn transition_connection<S: AsyncRead + AsyncWrite + Unpin>(
-    ctx: RequestMonitoring,
-    res: Result<Option<ProxyPassthrough<S>>, ClientRequestError>,
-) {
-    let span = ctx.span();
-    match res {
-        Err(e) => {
-            ctx.set_error_kind(e.get_error_kind());
-            error!(parent: &span, "per-client task finished with an error: {e:#}");
-        }
-        Ok(None) => {
-            ctx.set_success();
-        }
-        Ok(Some(p)) => {
-            ctx.set_success();
-            ctx.log_connect();
-            match p.proxy_pass().instrument(span.clone()).await {
-                Ok(()) => {}
-                Err(ErrorSource::Client(e)) => {
-                    error!(parent: &span, "per-client task finished with an IO error from the client: {e:#}");
+        tracing::info!(protocol = "tcp", %session_id, "accepted new TCP connection");
+        let endpoint_rate_limiter2 = endpoint_rate_limiter.clone();
+
+        connections.spawn(async move {
+            let (socket, peer_addr) = match read_proxy_protocol(socket).await {
+                Err(e) => {
+                    warn!("per-client task finished with an error: {e:#}");
+                    return;
                }
-                Err(ErrorSource::Compute(e)) => {
-                    error!(parent: &span, "per-client task finished with an IO error from the compute: {e:#}");
+                Ok((_socket, None)) if config.proxy_protocol_v2 == ProxyProtocolV2::Required => {
+                    warn!("missing required proxy protocol header");
+                    return;
+                }
+                Ok((_socket, Some(_))) if config.proxy_protocol_v2 == ProxyProtocolV2::Rejected => {
+                    warn!("proxy protocol header not supported");
+                    return;
+                }
+                Ok((socket, Some(addr))) => (socket, addr.ip()),
+                Ok((socket, None)) => (socket, peer_addr.ip()),
+            };
+
+            match socket.inner.set_nodelay(true) {
+                Ok(()) => {}
+                Err(e) => {
+                    error!("per-client task finished with an error: failed to set socket option: {e:#}");
+                    return;
+                }
+            };
+
+            let ctx = RequestMonitoring::new(
+                session_id,
+                peer_addr,
+                crate::metrics::Protocol::Tcp,
+                &config.region,
+            );
+            let span = ctx.span();
+
+            let startup = Box::pin(
+                handle_client(
+                    config,
+                    auth_backend,
+                    &ctx,
+                    cancellation_handler,
+                    socket,
+                    ClientMode::Tcp,
+                    endpoint_rate_limiter2,
+                    conn_gauge,
+                )
+                .instrument(span.clone()),
+            );
+            let res = startup.await;
+
+            match res {
+                Err(e) => {
+                    // todo: log and push to ctx the error kind
+                    ctx.set_error_kind(e.get_error_kind());
+                    warn!(parent: &span, "per-client task finished with an error: {e:#}");
+                }
+                Ok(None) => {
+                    ctx.set_success();
+                }
+                Ok(Some(p)) => {
+                    ctx.set_success();
+                    ctx.log_connect();
+                    match p.proxy_pass().instrument(span.clone()).await {
+                        Ok(()) => {}
+                        Err(ErrorSource::Client(e)) => {
+                            warn!(parent: &span, "per-client task finished with an IO error from the client: {e:#}");
+                        }
+                        Err(ErrorSource::Compute(e)) => {
+                            error!(parent: &span, "per-client task finished with an IO error from the compute: {e:#}");
+                        }
+                    }
                }
            }
-        }
+        });
    }
+
+    connections.close();
+    drop(listener);
+
+    // Drain connections
+    connections.wait().await;
+
+    Ok(())
 }

 pub(crate) enum ClientMode {
@@ -230,14 +248,14 @@ impl ReportableError for ClientRequestError {
 #[allow(clippy::too_many_arguments)]
 pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
+    auth_backend: &'static auth::Backend<'static, (), ()>,
    ctx: &RequestMonitoring,
    cancellation_handler: Arc<CancellationHandlerMain>,
    stream: S,
    mode: ClientMode,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    conn_gauge: NumClientConnectionsGuard<'static>,
-) -> Result<Option<ProxyPassthrough<S>>, ClientRequestError> {
+) -> Result<Option<ProxyPassthrough<CancellationHandlerMainInternal, S>>, ClientRequestError> {
    info!(
        protocol = %ctx.protocol(),
        "handling interactive connection from client"
@@ -271,17 +289,20 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
    let common_names = tls.map(|tls| &tls.common_names);

    // Extract credentials which we're going to use for auth.
-    let result = auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names);
+    let result = auth_backend
+        .as_ref()
+        .map(|()| auth::ComputeUserInfoMaybeEndpoint::parse(ctx, &params, hostname, common_names))
+        .transpose();
+
    let user_info = match result {
        Ok(user_info) => user_info,
        Err(e) => stream.throw_error(e).await?,
    };

-    let user = user_info.user.clone();
-    let user_info = match auth_backend
+    let user = user_info.get_user().to_owned();
+    let user_info = match user_info
        .authenticate(
            ctx,
-            user_info,
            &mut stream,
            mode.allow_cleartext(),
            &config.authentication_config,
@@ -335,7 +356,7 @@ pub(crate) async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

 /// Finish client connection initialization: confirm auth success, send params, etc.
 #[tracing::instrument(skip_all)]
-pub(crate) async fn prepare_client_connection<P>(
+async fn prepare_client_connection<P>(
    node: &compute::PostgresConnection,
    session: &cancellation::Session<P>,
    stream: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
--- a/proxy/src/proxy/passthrough.rs
+++ b/proxy/src/proxy/passthrough.rs
@@ -1,5 +1,5 @@
 use crate::{
-    cancellation::{self, CancellationHandlerMainInternal},
+    cancellation,
    compute::PostgresConnection,
    control_plane::messages::MetricsAuxInfo,
    metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard},
@@ -57,21 +57,21 @@ pub(crate) async fn proxy_pass(
    Ok(())
 }

-pub(crate) struct ProxyPassthrough<S> {
+pub(crate) struct ProxyPassthrough<P, S> {
    pub(crate) client: Stream<S>,
    pub(crate) compute: PostgresConnection,
    pub(crate) aux: MetricsAuxInfo,

    pub(crate) _req: NumConnectionRequestsGuard<'static>,
    pub(crate) _conn: NumClientConnectionsGuard<'static>,
-    pub(crate) _cancel: cancellation::Session<CancellationHandlerMainInternal>,
+    pub(crate) _cancel: cancellation::Session<P>,
 }

-impl<S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<S> {
+impl<P, S: AsyncRead + AsyncWrite + Unpin> ProxyPassthrough<P, S> {
    pub(crate) async fn proxy_pass(self) -> Result<(), ErrorSource> {
        let res = proxy_pass(self.client, self.compute.stream, self.aux).await;
        if let Err(err) = self.compute.cancel_closure.try_cancel_query().await {
-            tracing::error!(?err, "could not cancel the query in the database");
+            tracing::warn!(?err, "could not cancel the query in the database");
        }
        res
    }
--- a/proxy/src/proxy/tests/mod.rs
+++ b/proxy/src/proxy/tests/mod.rs
@@ -8,20 +8,18 @@ use super::connect_compute::ConnectMechanism;
 use super::retry::CouldRetry;
 use super::*;
 use crate::auth::backend::{
-    ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, TestBackend,
+    ComputeCredentialKeys, ComputeCredentials, ComputeUserInfo, MaybeOwned, TestBackend,
 };
-use crate::config::{CertResolver, ProxyProtocolV2, RetryConfig};
+use crate::config::{CertResolver, RetryConfig};
 use crate::control_plane::messages::{ControlPlaneError, Details, MetricsAuxInfo, Status};
 use crate::control_plane::provider::{
    CachedAllowedIps, CachedRoleSecret, ControlPlaneBackend, NodeInfoCache,
 };
 use crate::control_plane::{self, CachedNodeInfo, NodeInfo};
 use crate::error::ErrorKind;
-use crate::protocol2::get_client_conn_info;
 use crate::{sasl, scram, BranchId, EndpointId, ProjectId};
 use anyhow::{bail, Context};
 use async_trait::async_trait;
-use auth::backend::ControlPlaneComputeBackend;
 use http::StatusCode;
 use retry::{retry_after, ShouldRetryWakeCompute};
 use rstest::rstest;
@@ -178,7 +176,7 @@ async fn dummy_proxy(
    tls: Option<TlsConfig>,
    auth: impl TestAuth + Send,
 ) -> anyhow::Result<()> {
-    let (client, _) = get_client_conn_info(client, ProxyProtocolV2::Supported).await?;
+    let (client, _) = read_proxy_protocol(client).await?;
    let mut stream =
        match handshake(&RequestMonitoring::test(), client, tls.as_ref(), false).await? {
            HandshakeData::Startup(stream, _) => stream,
@@ -554,19 +552,19 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn

 fn helper_create_connect_info(
    mechanism: &TestConnectMechanism,
-) -> ControlPlaneComputeBackend<'static> {
-    let api = Box::leak(Box::new(ControlPlaneBackend::Test(Box::new(
-        mechanism.clone(),
-    ))));
-
-    api.attach_to_credentials(ComputeCredentials {
-        info: ComputeUserInfo {
-            endpoint: "endpoint".into(),
-            user: "user".into(),
-            options: NeonOptions::parse_options_raw(""),
+) -> auth::Backend<'static, ComputeCredentials, &()> {
+    let user_info = auth::Backend::ControlPlane(
+        MaybeOwned::Owned(ControlPlaneBackend::Test(Box::new(mechanism.clone()))),
+        ComputeCredentials {
+            info: ComputeUserInfo {
+                endpoint: "endpoint".into(),
+                user: "user".into(),
+                options: NeonOptions::parse_options_raw(""),
+            },
+            keys: ComputeCredentialKeys::Password("password".into()),
        },
-        keys: ComputeCredentialKeys::Password("password".into()),
-    })
+    );
+    user_info
 }

 #[tokio::test]
--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -7,7 +7,7 @@ use crate::metrics::{
    WakeupFailureKind,
 };
 use crate::proxy::retry::{retry_after, should_retry};
-use hyper1::StatusCode;
+use hyper::StatusCode;
 use tracing::{error, info, warn};

 use super::connect_compute::ComputeConnectBackend;
--- a/proxy/src/redis/connection_with_credentials_provider.rs
+++ b/proxy/src/redis/connection_with_credentials_provider.rs
@@ -6,7 +6,7 @@ use redis::{
    ConnectionInfo, IntoConnectionInfo, RedisConnectionInfo, RedisResult,
 };
 use tokio::task::JoinHandle;
-use tracing::{debug, error, info};
+use tracing::{debug, error, info, warn};

 use super::elasticache::CredentialsProvider;

@@ -89,7 +89,7 @@ impl ConnectionWithCredentialsProvider {
                    return Ok(());
                }
                Err(e) => {
-                    error!("Error during PING: {e:?}");
+                    warn!("Error during PING: {e:?}");
                }
            }
        } else {
@@ -121,7 +121,7 @@ impl ConnectionWithCredentialsProvider {
                info!("Connection succesfully established");
            }
            Err(e) => {
-                error!("Connection is broken. Error during PING: {e:?}");
+                warn!("Connection is broken. Error during PING: {e:?}");
            }
        }
        self.con = Some(con);
--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -146,7 +146,7 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                {
                    Ok(()) => {}
                    Err(e) => {
-                        tracing::error!("failed to cancel session: {e}");
+                        tracing::warn!("failed to cancel session: {e}");
                    }
                }
            }
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -3,12 +3,14 @@ use std::{io, sync::Arc, time::Duration};
 use async_trait::async_trait;
 use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer};
 use tokio::net::{lookup_host, TcpStream};
-use tracing::{field::display, info};
+use tokio_postgres::types::ToSql;
+use tracing::{debug, field::display, info};

 use crate::{
    auth::{
+        self,
        backend::{local::StaticAuthRules, ComputeCredentials, ComputeUserInfo},
-        check_peer_addr_is_in_list, AuthError, ServerlessBackend,
+        check_peer_addr_is_in_list, AuthError,
    },
    compute,
    config::ProxyConfig,
@@ -17,7 +19,7 @@ use crate::{
        errors::{GetAuthInfoError, WakeComputeError},
        locks::ApiLocks,
        provider::ApiLockError,
-        Api, CachedNodeInfo,
+        CachedNodeInfo,
    },
    error::{ErrorKind, ReportableError, UserFacingError},
    intern::EndpointIdInt,
@@ -26,19 +28,21 @@ use crate::{
        retry::{CouldRetry, ShouldRetryWakeCompute},
    },
    rate_limiter::EndpointRateLimiter,
-    Host,
+    EndpointId, Host,
 };

 use super::{
    conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool},
    http_conn_pool::{self, poll_http2_client},
+    local_conn_pool::{self, LocalClient, LocalConnPool},
 };

 pub(crate) struct PoolingBackend {
    pub(crate) http_conn_pool: Arc<super::http_conn_pool::GlobalConnPool>,
+    pub(crate) local_pool: Arc<LocalConnPool<tokio_postgres::Client>>,
    pub(crate) pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
    pub(crate) config: &'static ProxyConfig,
-    pub(crate) auth_backend: ServerlessBackend<'static>,
+    pub(crate) auth_backend: &'static crate::auth::Backend<'static, (), ()>,
    pub(crate) endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 }

@@ -49,16 +53,9 @@ impl PoolingBackend {
        user_info: &ComputeUserInfo,
        password: &[u8],
    ) -> Result<ComputeCredentials, AuthError> {
-        let cplane = match &self.auth_backend {
-            ServerlessBackend::ControlPlane(cplane) => cplane,
-            ServerlessBackend::Local(_local) => {
-                return Err(AuthError::bad_auth_method(
-                    "password authentication not supported by local_proxy",
-                ))
-            }
-        };
-
-        let (allowed_ips, maybe_secret) = cplane.get_allowed_ips_and_secret(ctx, user_info).await?;
+        let user_info = user_info.clone();
+        let backend = self.auth_backend.as_ref().map(|()| user_info.clone());
+        let (allowed_ips, maybe_secret) = backend.get_allowed_ips_and_secret(ctx).await?;
        if self.config.authentication_config.ip_allowlist_check_enabled
            && !check_peer_addr_is_in_list(&ctx.peer_addr(), &allowed_ips)
        {
@@ -72,7 +69,7 @@ impl PoolingBackend {
        }
        let cached_secret = match maybe_secret {
            Some(secret) => secret,
-            None => cplane.get_role_secret(ctx, user_info).await?,
+            None => backend.get_role_secret(ctx).await?,
        };

        let secret = match cached_secret.value.clone() {
@@ -107,7 +104,7 @@ impl PoolingBackend {
            }
        };
        res.map(|key| ComputeCredentials {
-            info: user_info.clone(),
+            info: user_info,
            keys: key,
        })
    }
@@ -117,9 +114,9 @@ impl PoolingBackend {
        ctx: &RequestMonitoring,
        user_info: &ComputeUserInfo,
        jwt: String,
-    ) -> Result<(), AuthError> {
+    ) -> Result<ComputeCredentials, AuthError> {
        match &self.auth_backend {
-            ServerlessBackend::ControlPlane(console) => {
+            crate::auth::Backend::ControlPlane(console, ()) => {
                self.config
                    .authentication_config
                    .jwks_cache
@@ -133,10 +130,17 @@ impl PoolingBackend {
                    .await
                    .map_err(|e| AuthError::auth_failed(e.to_string()))?;

-                Ok(())
+                Ok(ComputeCredentials {
+                    info: user_info.clone(),
+                    keys: crate::auth::backend::ComputeCredentialKeys::None,
+                })
            }
-            ServerlessBackend::Local(_) => {
-                self.config
+            crate::auth::Backend::ConsoleRedirect(_, ()) => Err(AuthError::auth_failed(
+                "JWT login over web auth proxy is not supported",
+            )),
+            crate::auth::Backend::Local(_) => {
+                let keys = self
+                    .config
                    .authentication_config
                    .jwks_cache
                    .check_jwt(
@@ -149,8 +153,10 @@ impl PoolingBackend {
                    .await
                    .map_err(|e| AuthError::auth_failed(e.to_string()))?;

-                // todo: rewrite JWT signature with key shared somehow between local proxy and postgres
-                Ok(())
+                Ok(ComputeCredentials {
+                    info: user_info.clone(),
+                    keys,
+                })
            }
        }
    }
@@ -180,41 +186,21 @@ impl PoolingBackend {
        let conn_id = uuid::Uuid::new_v4();
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "pool: opening a new connection '{conn_info}'");
-
-        match &self.auth_backend {
-            ServerlessBackend::ControlPlane(cplane) => {
-                crate::proxy::connect_compute::connect_to_compute(
-                    ctx,
-                    &TokioMechanism {
-                        conn_id,
-                        conn_info,
-                        pool: self.pool.clone(),
-                        locks: &self.config.connect_compute_locks,
-                    },
-                    &cplane.attach_to_credentials(keys),
-                    false, // do not allow self signed compute for http flow
-                    self.config.wake_compute_retry_config,
-                    self.config.connect_to_compute_retry_config,
-                )
-                .await
-            }
-            ServerlessBackend::Local(local_proxy) => {
-                crate::proxy::connect_compute::connect_to_compute(
-                    ctx,
-                    &TokioMechanism {
-                        conn_id,
-                        conn_info,
-                        pool: self.pool.clone(),
-                        locks: &self.config.connect_compute_locks,
-                    },
-                    &**local_proxy,
-                    false, // do not allow self signed compute for http flow
-                    self.config.wake_compute_retry_config,
-                    self.config.connect_to_compute_retry_config,
-                )
-                .await
-            }
-        }
+        let backend = self.auth_backend.as_ref().map(|()| keys);
+        crate::proxy::connect_compute::connect_to_compute(
+            ctx,
+            &TokioMechanism {
+                conn_id,
+                conn_info,
+                pool: self.pool.clone(),
+                locks: &self.config.connect_compute_locks,
+            },
+            &backend,
+            false, // do not allow self signed compute for http flow
+            self.config.wake_compute_retry_config,
+            self.config.connect_to_compute_retry_config,
+        )
+        .await
    }

    // Wake up the destination if needed
@@ -224,13 +210,6 @@ impl PoolingBackend {
        ctx: &RequestMonitoring,
        conn_info: ConnInfo,
    ) -> Result<http_conn_pool::Client, HttpConnError> {
-        let cplane = match &self.auth_backend {
-            ServerlessBackend::Local(_) => {
-                panic!("connect to local_proxy should not be called if we are already local_proxy")
-            }
-            ServerlessBackend::ControlPlane(cplane) => cplane,
-        };
-
        info!("pool: looking for an existing connection");
        if let Some(client) = self.http_conn_pool.get(ctx, &conn_info) {
            return Ok(client);
@@ -239,9 +218,12 @@ impl PoolingBackend {
        let conn_id = uuid::Uuid::new_v4();
        tracing::Span::current().record("conn_id", display(conn_id));
        info!(%conn_id, "pool: opening a new connection '{conn_info}'");
-
-        let backend = cplane.attach_to_credentials(ComputeCredentials {
-            info: conn_info.user_info.clone(),
+        let backend = self.auth_backend.as_ref().map(|()| ComputeCredentials {
+            info: ComputeUserInfo {
+                user: conn_info.user_info.user.clone(),
+                endpoint: EndpointId::from(format!("{}-local-proxy", conn_info.user_info.endpoint)),
+                options: conn_info.user_info.options.clone(),
+            },
            keys: crate::auth::backend::ComputeCredentialKeys::None,
        });
        crate::proxy::connect_compute::connect_to_compute(
@@ -259,6 +241,77 @@ impl PoolingBackend {
        )
        .await
    }
+
+    /// Connect to postgres over localhost.
+    ///
+    /// We expect postgres to be started here, so we won't do any retries.
+    ///
+    /// # Panics
+    ///
+    /// Panics if called with a non-local_proxy backend.
+    #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)]
+    pub(crate) async fn connect_to_local_postgres(
+        &self,
+        ctx: &RequestMonitoring,
+        conn_info: ConnInfo,
+    ) -> Result<LocalClient<tokio_postgres::Client>, HttpConnError> {
+        if let Some(client) = self.local_pool.get(ctx, &conn_info)? {
+            return Ok(client);
+        }
+
+        let conn_id = uuid::Uuid::new_v4();
+        tracing::Span::current().record("conn_id", display(conn_id));
+        info!(%conn_id, "local_pool: opening a new connection '{conn_info}'");
+
+        let mut node_info = match &self.auth_backend {
+            auth::Backend::ControlPlane(_, ()) | auth::Backend::ConsoleRedirect(_, ()) => {
+                unreachable!("only local_proxy can connect to local postgres")
+            }
+            auth::Backend::Local(local) => local.node_info.clone(),
+        };
+
+        let config = node_info
+            .config
+            .user(&conn_info.user_info.user)
+            .dbname(&conn_info.dbname);
+
+        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
+        let (client, connection) = config.connect(tokio_postgres::NoTls).await?;
+        drop(pause);
+
+        tracing::Span::current().record("pid", tracing::field::display(client.get_process_id()));
+
+        let handle = local_conn_pool::poll_client(
+            self.local_pool.clone(),
+            ctx,
+            conn_info,
+            client,
+            connection,
+            conn_id,
+            node_info.aux.clone(),
+        );
+
+        let kid = handle.get_client().get_process_id() as i64;
+        let jwk = p256::PublicKey::from(handle.key().verifying_key()).to_jwk();
+
+        debug!(kid, ?jwk, "setting up backend session state");
+
+        // initiates the auth session
+        handle
+            .get_client()
+            .query(
+                "select auth.init($1, $2);",
+                &[
+                    &kid as &(dyn ToSql + Sync),
+                    &tokio_postgres::types::Json(jwk),
+                ],
+            )
+            .await?;
+
+        info!(?kid, "backend session state init");
+
+        Ok(handle)
+    }
 }

 #[derive(Debug, thiserror::Error)]
@@ -269,6 +322,8 @@ pub(crate) enum HttpConnError {
    PostgresConnectionError(#[from] tokio_postgres::Error),
    #[error("could not connection to local-proxy in compute")]
    LocalProxyConnectionError(#[from] LocalProxyConnError),
+    #[error("could not parse JWT payload")]
+    JwtPayloadError(serde_json::Error),

    #[error("could not get auth info")]
    GetAuthInfo(#[from] GetAuthInfoError),
@@ -285,7 +340,7 @@ pub(crate) enum LocalProxyConnError {
    #[error("error with connection to local-proxy")]
    Io(#[source] std::io::Error),
    #[error("could not establish h2 connection")]
-    H2(#[from] hyper1::Error),
+    H2(#[from] hyper::Error),
 }

 impl ReportableError for HttpConnError {
@@ -294,6 +349,7 @@ impl ReportableError for HttpConnError {
            HttpConnError::ConnectionClosedAbruptly(_) => ErrorKind::Compute,
            HttpConnError::PostgresConnectionError(p) => p.get_error_kind(),
            HttpConnError::LocalProxyConnectionError(_) => ErrorKind::Compute,
+            HttpConnError::JwtPayloadError(_) => ErrorKind::User,
            HttpConnError::GetAuthInfo(a) => a.get_error_kind(),
            HttpConnError::AuthError(a) => a.get_error_kind(),
            HttpConnError::WakeCompute(w) => w.get_error_kind(),
@@ -308,6 +364,7 @@ impl UserFacingError for HttpConnError {
            HttpConnError::ConnectionClosedAbruptly(_) => self.to_string(),
            HttpConnError::PostgresConnectionError(p) => p.to_string(),
            HttpConnError::LocalProxyConnectionError(p) => p.to_string(),
+            HttpConnError::JwtPayloadError(p) => p.to_string(),
            HttpConnError::GetAuthInfo(c) => c.to_string_client(),
            HttpConnError::AuthError(c) => c.to_string_client(),
            HttpConnError::WakeCompute(c) => c.to_string_client(),
@@ -324,6 +381,7 @@ impl CouldRetry for HttpConnError {
            HttpConnError::PostgresConnectionError(e) => e.could_retry(),
            HttpConnError::LocalProxyConnectionError(e) => e.could_retry(),
            HttpConnError::ConnectionClosedAbruptly(_) => false,
+            HttpConnError::JwtPayloadError(_) => false,
            HttpConnError::GetAuthInfo(_) => false,
            HttpConnError::AuthError(_) => false,
            HttpConnError::WakeCompute(_) => false,
@@ -450,8 +508,12 @@ impl ConnectMechanism for HyperMechanism {

        let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);

-        // let port = node_info.config.get_ports().first().unwrap_or_else(10432);
-        let res = connect_http2(&host, 10432, timeout).await;
+        let port = *node_info.config.get_ports().first().ok_or_else(|| {
+            HttpConnError::WakeCompute(WakeComputeError::BadComputeAddress(
+                "local-proxy port missing on compute address".into(),
+            ))
+        })?;
+        let res = connect_http2(&host, port, timeout).await;
        drop(pause);
        let (client, connection) = permit.release_result(res)?;

@@ -509,7 +571,7 @@ async fn connect_http2(
        };
    };

-    let (client, connection) = hyper1::client::conn::http2::Builder::new(TokioExecutor::new())
+    let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new())
        .timer(TokioTimer::new())
        .keep_alive_interval(Duration::from_secs(20))
        .keep_alive_while_idle(true)
--- a/proxy/src/serverless/http_conn_pool.rs
+++ b/proxy/src/serverless/http_conn_pool.rs
@@ -1,5 +1,5 @@
 use dashmap::DashMap;
-use hyper1::client::conn::http2;
+use hyper::client::conn::http2;
 use hyper_util::rt::{TokioExecutor, TokioIo};
 use parking_lot::RwLock;
 use rand::Rng;
@@ -18,9 +18,9 @@ use tracing::{info, info_span, Instrument};

 use super::conn_pool::ConnInfo;

-pub(crate) type Send = http2::SendRequest<hyper1::body::Incoming>;
+pub(crate) type Send = http2::SendRequest<hyper::body::Incoming>;
 pub(crate) type Connect =
-    http2::Connection<TokioIo<TcpStream>, hyper1::body::Incoming, TokioExecutor>;
+    http2::Connection<TokioIo<TcpStream>, hyper::body::Incoming, TokioExecutor>;

 #[derive(Clone)]
 struct ConnPoolEntry {
--- a/proxy/src/serverless/http_util.rs
+++ b/proxy/src/serverless/http_util.rs
@@ -11,7 +11,7 @@ use serde::Serialize;
 use utils::http::error::ApiError;

 /// Like [`ApiError::into_response`]
-pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper1::Error>> {
+pub(crate) fn api_error_into_response(this: ApiError) -> Response<BoxBody<Bytes, hyper::Error>> {
    match this {
        ApiError::BadRequest(err) => HttpErrorBody::response_from_msg_and_status(
            format!("{err:#?}"), // use debug printing so that we give the cause
@@ -67,12 +67,12 @@ impl HttpErrorBody {
    fn response_from_msg_and_status(
        msg: String,
        status: StatusCode,
-    ) -> Response<BoxBody<Bytes, hyper1::Error>> {
+    ) -> Response<BoxBody<Bytes, hyper::Error>> {
        HttpErrorBody { msg }.to_response(status)
    }

    /// Same as [`utils::http::error::HttpErrorBody::to_response`]
-    fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper1::Error>> {
+    fn to_response(&self, status: StatusCode) -> Response<BoxBody<Bytes, hyper::Error>> {
        Response::builder()
            .status(status)
            .header(http::header::CONTENT_TYPE, "application/json")
@@ -90,7 +90,7 @@ impl HttpErrorBody {
 pub(crate) fn json_response<T: Serialize>(
    status: StatusCode,
    data: T,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
    let json = serde_json::to_string(&data)
        .context("Failed to serialize JSON response")
        .map_err(ApiError::InternalServerError)?;
--- a/proxy/src/serverless/json.rs
+++ b/proxy/src/serverless/json.rs
@@ -1,30 +1,40 @@
+use itertools::Itertools;
+use serde_json::value::RawValue;
 use serde_json::Map;
 use serde_json::Value;
 use tokio_postgres::types::Kind;
 use tokio_postgres::types::Type;
 use tokio_postgres::Row;
+use typed_json::json;
+
+use super::json_raw_value::LazyValue;

 //
 // Convert json non-string types to strings, so that they can be passed to Postgres
 // as parameters.
 //
-pub(crate) fn json_to_pg_text(json: Vec<Value>) -> Vec<Option<String>> {
-    json.iter().map(json_value_to_pg_text).collect()
+pub(crate) fn json_to_pg_text(
+    json: &[&RawValue],
+) -> Result<Vec<Option<String>>, serde_json::Error> {
+    json.iter().copied().map(json_value_to_pg_text).try_collect()
 }

-fn json_value_to_pg_text(value: &Value) -> Option<String> {
-    match value {
+fn json_value_to_pg_text(value: &RawValue) -> Result<Option<String>, serde_json::Error> {
+    let lazy_value = serde_json::from_str(value.get())?;
+    match lazy_value {
        // special care for nulls
-        Value::Null => None,
+        LazyValue::Null => Ok(None),

        // convert to text with escaping
-        v @ (Value::Bool(_) | Value::Number(_) | Value::Object(_)) => Some(v.to_string()),
+        LazyValue::Bool | LazyValue::Number | LazyValue::Object => {
+            Ok(Some(value.get().to_string()))
+        }

        // avoid escaping here, as we pass this as a parameter
-        Value::String(s) => Some(s.to_string()),
+        LazyValue::String(s) => Ok(Some(s.into_owned())),

        // special care for arrays
-        Value::Array(_) => json_array_to_pg_array(value),
+        LazyValue::Array(arr) => Ok(Some(json_array_to_pg_array(arr)?)),
    }
 }

@@ -36,27 +46,42 @@ fn json_value_to_pg_text(value: &Value) -> Option<String> {
 //
 // Example of the same escaping in node-postgres: packages/pg/lib/utils.js
 //
-fn json_array_to_pg_array(value: &Value) -> Option<String> {
-    match value {
+fn json_array_to_pg_array(arr: Vec<&RawValue>) -> Result<String, serde_json::Error> {
+    let mut output = String::new();
+    let mut first = true;
+
+    output.push('{');
+
+    for value in arr {
+        if !first {
+            output.push(',');
+        }
+        first = false;
+
+        let value = json_array_to_pg_array_inner(value)?;
+        output.push_str(value.as_deref().unwrap_or("NULL"));
+    }
+
+    output.push('}');
+
+    Ok(output)
+}
+
+fn json_array_to_pg_array_inner(value: &RawValue) -> Result<Option<String>, serde_json::Error> {
+    let lazy_value = serde_json::from_str(value.get())?;
+    match lazy_value {
        // special care for nulls
-        Value::Null => None,
+        LazyValue::Null => Ok(None),

        // convert to text with escaping
        // here string needs to be escaped, as it is part of the array
-        v @ (Value::Bool(_) | Value::Number(_) | Value::String(_)) => Some(v.to_string()),
-        v @ Value::Object(_) => json_array_to_pg_array(&Value::String(v.to_string())),
+        LazyValue::Bool | LazyValue::Number | LazyValue::String(_) => {
+            Ok(Some(value.get().to_string()))
+        }
+        LazyValue::Object => Ok(Some(json!(value.get().to_string()).to_string())),

        // recurse into array
-        Value::Array(arr) => {
-            let vals = arr
-                .iter()
-                .map(json_array_to_pg_array)
-                .map(|v| v.unwrap_or_else(|| "NULL".to_string()))
-                .collect::<Vec<_>>()
-                .join(",");
-
-            Some(format!("{{{vals}}}"))
-        }
+        LazyValue::Array(arr) => Ok(Some(json_array_to_pg_array(arr)?)),
    }
 }

@@ -259,25 +284,31 @@ mod tests {
    use super::*;
    use serde_json::json;

+    fn json_to_pg_text_test(json: Vec<serde_json::Value>) -> Vec<Option<String>> {
+        let json = serde_json::Value::Array(json).to_string();
+        let json: Vec<&RawValue> = serde_json::from_str(&json).unwrap();
+        json_to_pg_text(&json).unwrap()
+    }
+
    #[test]
    fn test_atomic_types_to_pg_params() {
        let json = vec![Value::Bool(true), Value::Bool(false)];
-        let pg_params = json_to_pg_text(json);
+        let pg_params = json_to_pg_text_test(json);
        assert_eq!(
            pg_params,
            vec![Some("true".to_owned()), Some("false".to_owned())]
        );

        let json = vec![Value::Number(serde_json::Number::from(42))];
-        let pg_params = json_to_pg_text(json);
+        let pg_params = json_to_pg_text_test(json);
        assert_eq!(pg_params, vec![Some("42".to_owned())]);

        let json = vec![Value::String("foo\"".to_string())];
-        let pg_params = json_to_pg_text(json);
+        let pg_params = json_to_pg_text_test(json);
        assert_eq!(pg_params, vec![Some("foo\"".to_owned())]);

        let json = vec![Value::Null];
-        let pg_params = json_to_pg_text(json);
+        let pg_params = json_to_pg_text_test(json);
        assert_eq!(pg_params, vec![None]);
    }

@@ -286,7 +317,7 @@ mod tests {
        // atoms and escaping
        let json = "[true, false, null, \"NULL\", 42, \"foo\", \"bar\\\"-\\\\\"]";
        let json: Value = serde_json::from_str(json).unwrap();
-        let pg_params = json_to_pg_text(vec![json]);
+        let pg_params = json_to_pg_text_test(vec![json]);
        assert_eq!(
            pg_params,
            vec![Some(
@@ -297,7 +328,7 @@ mod tests {
        // nested arrays
        let json = "[[true, false], [null, 42], [\"foo\", \"bar\\\"-\\\\\"]]";
        let json: Value = serde_json::from_str(json).unwrap();
-        let pg_params = json_to_pg_text(vec![json]);
+        let pg_params = json_to_pg_text_test(vec![json]);
        assert_eq!(
            pg_params,
            vec![Some(
@@ -307,7 +338,7 @@ mod tests {
        // array of objects
        let json = r#"[{"foo": 1},{"bar": 2}]"#;
        let json: Value = serde_json::from_str(json).unwrap();
-        let pg_params = json_to_pg_text(vec![json]);
+        let pg_params = json_to_pg_text_test(vec![json]);
        assert_eq!(
            pg_params,
            vec![Some(r#"{"{\"foo\":1}","{\"bar\":2}"}"#.to_owned())]
--- a/proxy/src/serverless/json_raw_value.rs
+++ b/proxy/src/serverless/json_raw_value.rs
@@ -0,0 +1,193 @@
+//! [`serde_json::Value`] but uses RawValue internally
+//!
+//! This code forks from the serde_json code, but replaces internal Value with RawValue where possible.
+//!
+//! Taken from <https://github.com/serde-rs/json/blob/faab2e8d2fcf781a3f77f329df836ffb3aaacfba/src/value/de.rs>
+//! Licensed from serde-rs under MIT or APACHE-2.0, with modifications by Conrad Ludgate
+
+use core::fmt;
+use std::borrow::Cow;
+
+use serde::{
+    de::{IgnoredAny, MapAccess, SeqAccess, Visitor},
+    Deserialize,
+};
+use serde_json::value::RawValue;
+
+pub enum LazyValue<'de> {
+    Null,
+    Bool,
+    Number,
+    String(Cow<'de, str>),
+    Array(Vec<&'de RawValue>),
+    Object,
+}
+
+impl<'de> Deserialize<'de> for LazyValue<'de> {
+    #[inline]
+    fn deserialize<D>(deserializer: D) -> Result<LazyValue<'de>, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct ValueVisitor;
+
+        impl<'de> Visitor<'de> for ValueVisitor {
+            type Value = LazyValue<'de>;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+                formatter.write_str("any valid JSON value")
+            }
+
+            #[inline]
+            fn visit_bool<E>(self, _value: bool) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Bool)
+            }
+
+            #[inline]
+            fn visit_i64<E>(self, _value: i64) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Number)
+            }
+
+            #[inline]
+            fn visit_u64<E>(self, _value: u64) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Number)
+            }
+
+            #[inline]
+            fn visit_f64<E>(self, _value: f64) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Number)
+            }
+
+            #[inline]
+            fn visit_str<E>(self, value: &str) -> Result<LazyValue<'de>, E>
+            where
+                E: serde::de::Error,
+            {
+                self.visit_string(String::from(value))
+            }
+
+            #[inline]
+            fn visit_borrowed_str<E>(self, value: &'de str) -> Result<LazyValue<'de>, E>
+            where
+                E: serde::de::Error,
+            {
+                Ok(LazyValue::String(Cow::Borrowed(value)))
+            }
+
+            #[inline]
+            fn visit_string<E>(self, value: String) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::String(Cow::Owned(value)))
+            }
+
+            #[inline]
+            fn visit_none<E>(self) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Null)
+            }
+
+            #[inline]
+            fn visit_some<D>(self, deserializer: D) -> Result<LazyValue<'de>, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                Deserialize::deserialize(deserializer)
+            }
+
+            #[inline]
+            fn visit_unit<E>(self) -> Result<LazyValue<'de>, E> {
+                Ok(LazyValue::Null)
+            }
+
+            #[inline]
+            fn visit_seq<V>(self, mut visitor: V) -> Result<LazyValue<'de>, V::Error>
+            where
+                V: SeqAccess<'de>,
+            {
+                let mut vec = Vec::new();
+
+                while let Some(elem) = visitor.next_element()? {
+                    vec.push(elem);
+                }
+
+                Ok(LazyValue::Array(vec))
+            }
+
+            fn visit_map<V>(self, mut visitor: V) -> Result<LazyValue<'de>, V::Error>
+            where
+                V: MapAccess<'de>,
+            {
+                while visitor.next_entry::<IgnoredAny, IgnoredAny>()?.is_some() {}
+                Ok(LazyValue::Object)
+            }
+        }
+
+        deserializer.deserialize_any(ValueVisitor)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::borrow::Cow;
+
+    use typed_json::json;
+
+    use super::LazyValue;
+
+    #[test]
+    fn object() {
+        let json = json! {{
+            "foo": {
+                "bar": 1
+            },
+            "baz": [2, 3],
+        }}
+        .to_string();
+
+        let lazy: LazyValue = serde_json::from_str(&json).unwrap();
+
+        let LazyValue::Object = lazy else {
+            panic!("expected object")
+        };
+    }
+
+    #[test]
+    fn array() {
+        let json = json! {[
+            {
+                "bar": 1
+            },
+            [2, 3],
+        ]}
+        .to_string();
+
+        let lazy: LazyValue = serde_json::from_str(&json).unwrap();
+
+        let LazyValue::Array(array) = lazy else {
+            panic!("expected array")
+        };
+        assert_eq!(array.len(), 2);
+
+        assert_eq!(array[0].get(), r#"{"bar":1}"#);
+        assert_eq!(array[1].get(), r#"[2,3]"#);
+    }
+
+    #[test]
+    fn string() {
+        let json = json! { "hello world" }.to_string();
+
+        let lazy: LazyValue = serde_json::from_str(&json).unwrap();
+
+        let LazyValue::String(Cow::Borrowed(string)) = lazy else {
+            panic!("expected borrowed string")
+        };
+        assert_eq!(string, "hello world");
+
+        let json = json! { "hello \n world" }.to_string();
+
+        let lazy: LazyValue = serde_json::from_str(&json).unwrap();
+
+        let LazyValue::String(Cow::Owned(string)) = lazy else {
+            panic!("expected owned string")
+        };
+        assert_eq!(string, "hello \n world");
+    }
+}
--- a/proxy/src/serverless/local_conn_pool.rs
+++ b/proxy/src/serverless/local_conn_pool.rs
@@ -0,0 +1,544 @@
+use futures::{future::poll_fn, Future};
+use jose_jwk::jose_b64::base64ct::{Base64UrlUnpadded, Encoding};
+use p256::ecdsa::{Signature, SigningKey};
+use parking_lot::RwLock;
+use rand::rngs::OsRng;
+use serde_json::Value;
+use signature::Signer;
+use std::task::{ready, Poll};
+use std::{collections::HashMap, pin::pin, sync::Arc, sync::Weak, time::Duration};
+use tokio::time::Instant;
+use tokio_postgres::tls::NoTlsStream;
+use tokio_postgres::types::ToSql;
+use tokio_postgres::{AsyncMessage, ReadyForQueryStatus, Socket};
+use tokio_util::sync::CancellationToken;
+use typed_json::json;
+
+use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo};
+use crate::metrics::Metrics;
+use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS};
+use crate::{context::RequestMonitoring, DbName, RoleName};
+
+use tracing::{debug, error, warn, Span};
+use tracing::{info, info_span, Instrument};
+
+use super::backend::HttpConnError;
+use super::conn_pool::{ClientInnerExt, ConnInfo};
+
+struct ConnPoolEntry<C: ClientInnerExt> {
+    conn: ClientInner<C>,
+    _last_access: std::time::Instant,
+}
+
+// /// key id for the pg_session_jwt state
+// static PG_SESSION_JWT_KID: AtomicU64 = AtomicU64::new(1);
+
+// Per-endpoint connection pool, (dbname, username) -> DbUserConnPool
+// Number of open connections is limited by the `max_conns_per_endpoint`.
+pub(crate) struct EndpointConnPool<C: ClientInnerExt> {
+    pools: HashMap<(DbName, RoleName), DbUserConnPool<C>>,
+    total_conns: usize,
+    max_conns: usize,
+    global_pool_size_max_conns: usize,
+}
+
+impl<C: ClientInnerExt> EndpointConnPool<C> {
+    fn get_conn_entry(&mut self, db_user: (DbName, RoleName)) -> Option<ConnPoolEntry<C>> {
+        let Self {
+            pools, total_conns, ..
+        } = self;
+        pools
+            .get_mut(&db_user)
+            .and_then(|pool_entries| pool_entries.get_conn_entry(total_conns))
+    }
+
+    fn remove_client(&mut self, db_user: (DbName, RoleName), conn_id: uuid::Uuid) -> bool {
+        let Self {
+            pools, total_conns, ..
+        } = self;
+        if let Some(pool) = pools.get_mut(&db_user) {
+            let old_len = pool.conns.len();
+            pool.conns.retain(|conn| conn.conn.conn_id != conn_id);
+            let new_len = pool.conns.len();
+            let removed = old_len - new_len;
+            if removed > 0 {
+                Metrics::get()
+                    .proxy
+                    .http_pool_opened_connections
+                    .get_metric()
+                    .dec_by(removed as i64);
+            }
+            *total_conns -= removed;
+            removed > 0
+        } else {
+            false
+        }
+    }
+
+    fn put(pool: &RwLock<Self>, conn_info: &ConnInfo, client: ClientInner<C>) {
+        let conn_id = client.conn_id;
+
+        if client.is_closed() {
+            info!(%conn_id, "local_pool: throwing away connection '{conn_info}' because connection is closed");
+            return;
+        }
+        let global_max_conn = pool.read().global_pool_size_max_conns;
+        if pool.read().total_conns >= global_max_conn {
+            info!(%conn_id, "local_pool: throwing away connection '{conn_info}' because pool is full");
+            return;
+        }
+
+        // return connection to the pool
+        let mut returned = false;
+        let mut per_db_size = 0;
+        let total_conns = {
+            let mut pool = pool.write();
+
+            if pool.total_conns < pool.max_conns {
+                let pool_entries = pool.pools.entry(conn_info.db_and_user()).or_default();
+                pool_entries.conns.push(ConnPoolEntry {
+                    conn: client,
+                    _last_access: std::time::Instant::now(),
+                });
+
+                returned = true;
+                per_db_size = pool_entries.conns.len();
+
+                pool.total_conns += 1;
+                Metrics::get()
+                    .proxy
+                    .http_pool_opened_connections
+                    .get_metric()
+                    .inc();
+            }
+
+            pool.total_conns
+        };
+
+        // do logging outside of the mutex
+        if returned {
+            info!(%conn_id, "local_pool: returning connection '{conn_info}' back to the pool, total_conns={total_conns}, for this (db, user)={per_db_size}");
+        } else {
+            info!(%conn_id, "local_pool: throwing away connection '{conn_info}' because pool is full, total_conns={total_conns}");
+        }
+    }
+}
+
+impl<C: ClientInnerExt> Drop for EndpointConnPool<C> {
+    fn drop(&mut self) {
+        if self.total_conns > 0 {
+            Metrics::get()
+                .proxy
+                .http_pool_opened_connections
+                .get_metric()
+                .dec_by(self.total_conns as i64);
+        }
+    }
+}
+
+pub(crate) struct DbUserConnPool<C: ClientInnerExt> {
+    conns: Vec<ConnPoolEntry<C>>,
+}
+
+impl<C: ClientInnerExt> Default for DbUserConnPool<C> {
+    fn default() -> Self {
+        Self { conns: Vec::new() }
+    }
+}
+
+impl<C: ClientInnerExt> DbUserConnPool<C> {
+    fn clear_closed_clients(&mut self, conns: &mut usize) -> usize {
+        let old_len = self.conns.len();
+
+        self.conns.retain(|conn| !conn.conn.is_closed());
+
+        let new_len = self.conns.len();
+        let removed = old_len - new_len;
+        *conns -= removed;
+        removed
+    }
+
+    fn get_conn_entry(&mut self, conns: &mut usize) -> Option<ConnPoolEntry<C>> {
+        let mut removed = self.clear_closed_clients(conns);
+        let conn = self.conns.pop();
+        if conn.is_some() {
+            *conns -= 1;
+            removed += 1;
+        }
+        Metrics::get()
+            .proxy
+            .http_pool_opened_connections
+            .get_metric()
+            .dec_by(removed as i64);
+        conn
+    }
+}
+
+pub(crate) struct LocalConnPool<C: ClientInnerExt> {
+    global_pool: RwLock<EndpointConnPool<C>>,
+
+    config: &'static crate::config::HttpConfig,
+}
+
+impl<C: ClientInnerExt> LocalConnPool<C> {
+    pub(crate) fn new(config: &'static crate::config::HttpConfig) -> Arc<Self> {
+        Arc::new(Self {
+            global_pool: RwLock::new(EndpointConnPool {
+                pools: HashMap::new(),
+                total_conns: 0,
+                max_conns: config.pool_options.max_conns_per_endpoint,
+                global_pool_size_max_conns: config.pool_options.max_total_conns,
+            }),
+            config,
+        })
+    }
+
+    pub(crate) fn get_idle_timeout(&self) -> Duration {
+        self.config.pool_options.idle_timeout
+    }
+
+    // pub(crate) fn shutdown(&self) {
+    //     let mut pool = self.global_pool.write();
+    //     pool.pools.clear();
+    //     pool.total_conns = 0;
+    // }
+
+    pub(crate) fn get(
+        self: &Arc<Self>,
+        ctx: &RequestMonitoring,
+        conn_info: &ConnInfo,
+    ) -> Result<Option<LocalClient<C>>, HttpConnError> {
+        let mut client: Option<ClientInner<C>> = None;
+        if let Some(entry) = self
+            .global_pool
+            .write()
+            .get_conn_entry(conn_info.db_and_user())
+        {
+            client = Some(entry.conn);
+        }
+
+        // ok return cached connection if found and establish a new one otherwise
+        if let Some(client) = client {
+            if client.is_closed() {
+                info!("local_pool: cached connection '{conn_info}' is closed, opening a new one");
+                return Ok(None);
+            }
+            tracing::Span::current().record("conn_id", tracing::field::display(client.conn_id));
+            tracing::Span::current().record(
+                "pid",
+                tracing::field::display(client.inner.get_process_id()),
+            );
+            info!(
+                cold_start_info = ColdStartInfo::HttpPoolHit.as_str(),
+                "local_pool: reusing connection '{conn_info}'"
+            );
+            client.session.send(ctx.session_id())?;
+            ctx.set_cold_start_info(ColdStartInfo::HttpPoolHit);
+            ctx.success();
+            return Ok(Some(LocalClient::new(
+                client,
+                conn_info.clone(),
+                Arc::downgrade(self),
+            )));
+        }
+        Ok(None)
+    }
+}
+
+pub(crate) fn poll_client(
+    global_pool: Arc<LocalConnPool<tokio_postgres::Client>>,
+    ctx: &RequestMonitoring,
+    conn_info: ConnInfo,
+    client: tokio_postgres::Client,
+    mut connection: tokio_postgres::Connection<Socket, NoTlsStream>,
+    conn_id: uuid::Uuid,
+    aux: MetricsAuxInfo,
+) -> LocalClient<tokio_postgres::Client> {
+    let conn_gauge = Metrics::get().proxy.db_connections.guard(ctx.protocol());
+    let mut session_id = ctx.session_id();
+    let (tx, mut rx) = tokio::sync::watch::channel(session_id);
+
+    let span = info_span!(parent: None, "connection", %conn_id);
+    let cold_start_info = ctx.cold_start_info();
+    span.in_scope(|| {
+        info!(cold_start_info = cold_start_info.as_str(), %conn_info, %session_id, "new connection");
+    });
+    let pool = Arc::downgrade(&global_pool);
+    let pool_clone = pool.clone();
+
+    let db_user = conn_info.db_and_user();
+    let idle = global_pool.get_idle_timeout();
+    let cancel = CancellationToken::new();
+    let cancelled = cancel.clone().cancelled_owned();
+
+    tokio::spawn(
+    async move {
+        let _conn_gauge = conn_gauge;
+        let mut idle_timeout = pin!(tokio::time::sleep(idle));
+        let mut cancelled = pin!(cancelled);
+
+        poll_fn(move |cx| {
+            if cancelled.as_mut().poll(cx).is_ready() {
+                info!("connection dropped");
+                return Poll::Ready(())
+            }
+
+            match rx.has_changed() {
+                Ok(true) => {
+                    session_id = *rx.borrow_and_update();
+                    info!(%session_id, "changed session");
+                    idle_timeout.as_mut().reset(Instant::now() + idle);
+                }
+                Err(_) => {
+                    info!("connection dropped");
+                    return Poll::Ready(())
+                }
+                _ => {}
+            }
+
+            // 5 minute idle connection timeout
+            if idle_timeout.as_mut().poll(cx).is_ready() {
+                idle_timeout.as_mut().reset(Instant::now() + idle);
+                info!("connection idle");
+                if let Some(pool) = pool.clone().upgrade() {
+                    // remove client from pool - should close the connection if it's idle.
+                    // does nothing if the client is currently checked-out and in-use
+                    if pool.global_pool.write().remove_client(db_user.clone(), conn_id) {
+                        info!("idle connection removed");
+                    }
+                }
+            }
+
+            loop {
+                let message = ready!(connection.poll_message(cx));
+
+                match message {
+                    Some(Ok(AsyncMessage::Notice(notice))) => {
+                        info!(%session_id, "notice: {}", notice);
+                    }
+                    Some(Ok(AsyncMessage::Notification(notif))) => {
+                        warn!(%session_id, pid = notif.process_id(), channel = notif.channel(), "notification received");
+                    }
+                    Some(Ok(_)) => {
+                        warn!(%session_id, "unknown message");
+                    }
+                    Some(Err(e)) => {
+                        error!(%session_id, "connection error: {}", e);
+                        break
+                    }
+                    None => {
+                        info!("connection closed");
+                        break
+                    }
+                }
+            }
+
+            // remove from connection pool
+            if let Some(pool) = pool.clone().upgrade() {
+                if pool.global_pool.write().remove_client(db_user.clone(), conn_id) {
+                    info!("closed connection removed");
+                }
+            }
+
+            Poll::Ready(())
+        }).await;
+
+    }
+    .instrument(span));
+
+    let key = SigningKey::random(&mut OsRng);
+
+    let inner = ClientInner {
+        inner: client,
+        session: tx,
+        cancel,
+        aux,
+        conn_id,
+        key,
+        jti: 0,
+    };
+    LocalClient::new(inner, conn_info, pool_clone)
+}
+
+struct ClientInner<C: ClientInnerExt> {
+    inner: C,
+    session: tokio::sync::watch::Sender<uuid::Uuid>,
+    cancel: CancellationToken,
+    aux: MetricsAuxInfo,
+    conn_id: uuid::Uuid,
+
+    // needed for pg_session_jwt state
+    key: SigningKey,
+    jti: u64,
+}
+
+impl<C: ClientInnerExt> Drop for ClientInner<C> {
+    fn drop(&mut self) {
+        // on client drop, tell the conn to shut down
+        self.cancel.cancel();
+    }
+}
+
+impl<C: ClientInnerExt> ClientInner<C> {
+    pub(crate) fn is_closed(&self) -> bool {
+        self.inner.is_closed()
+    }
+}
+
+impl<C: ClientInnerExt> LocalClient<C> {
+    pub(crate) fn metrics(&self) -> Arc<MetricCounter> {
+        let aux = &self.inner.as_ref().unwrap().aux;
+        USAGE_METRICS.register(Ids {
+            endpoint_id: aux.endpoint_id,
+            branch_id: aux.branch_id,
+        })
+    }
+}
+
+pub(crate) struct LocalClient<C: ClientInnerExt> {
+    span: Span,
+    inner: Option<ClientInner<C>>,
+    conn_info: ConnInfo,
+    pool: Weak<LocalConnPool<C>>,
+}
+
+pub(crate) struct Discard<'a, C: ClientInnerExt> {
+    conn_info: &'a ConnInfo,
+    pool: &'a mut Weak<LocalConnPool<C>>,
+}
+
+impl<C: ClientInnerExt> LocalClient<C> {
+    pub(self) fn new(
+        inner: ClientInner<C>,
+        conn_info: ConnInfo,
+        pool: Weak<LocalConnPool<C>>,
+    ) -> Self {
+        Self {
+            inner: Some(inner),
+            span: Span::current(),
+            conn_info,
+            pool,
+        }
+    }
+    pub(crate) fn inner(&mut self) -> (&mut C, Discard<'_, C>) {
+        let Self {
+            inner,
+            pool,
+            conn_info,
+            span: _,
+        } = self;
+        let inner = inner.as_mut().expect("client inner should not be removed");
+        (&mut inner.inner, Discard { conn_info, pool })
+    }
+    pub(crate) fn key(&self) -> &SigningKey {
+        let inner = &self
+            .inner
+            .as_ref()
+            .expect("client inner should not be removed");
+        &inner.key
+    }
+}
+
+impl LocalClient<tokio_postgres::Client> {
+    pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), HttpConnError> {
+        let inner = self
+            .inner
+            .as_mut()
+            .expect("client inner should not be removed");
+        inner.jti += 1;
+
+        let kid = inner.inner.get_process_id();
+        let header = json!({"kid":kid}).to_string();
+
+        let mut payload = serde_json::from_slice::<serde_json::Map<String, Value>>(payload)
+            .map_err(HttpConnError::JwtPayloadError)?;
+        payload.insert("jti".to_string(), Value::Number(inner.jti.into()));
+        let payload = Value::Object(payload).to_string();
+
+        debug!(
+            kid,
+            jti = inner.jti,
+            ?header,
+            ?payload,
+            "signing new ephemeral JWT"
+        );
+
+        let token = sign_jwt(&inner.key, header, payload);
+
+        // initiates the auth session
+        inner.inner.simple_query("discard all").await?;
+        inner
+            .inner
+            .query(
+                "select auth.jwt_session_init($1)",
+                &[&token as &(dyn ToSql + Sync)],
+            )
+            .await?;
+
+        info!(kid, jti = inner.jti, "user session state init");
+
+        Ok(())
+    }
+}
+
+fn sign_jwt(sk: &SigningKey, header: String, payload: String) -> String {
+    let header = Base64UrlUnpadded::encode_string(header.as_bytes());
+    let payload = Base64UrlUnpadded::encode_string(payload.as_bytes());
+
+    let message = format!("{header}.{payload}");
+    let sig: Signature = sk.sign(message.as_bytes());
+    let base64_sig = Base64UrlUnpadded::encode_string(&sig.to_bytes());
+    format!("{message}.{base64_sig}")
+}
+
+impl<C: ClientInnerExt> Discard<'_, C> {
+    pub(crate) fn check_idle(&mut self, status: ReadyForQueryStatus) {
+        let conn_info = &self.conn_info;
+        if status != ReadyForQueryStatus::Idle && std::mem::take(self.pool).strong_count() > 0 {
+            info!(
+                "local_pool: throwing away connection '{conn_info}' because connection is not idle"
+            );
+        }
+    }
+    pub(crate) fn discard(&mut self) {
+        let conn_info = &self.conn_info;
+        if std::mem::take(self.pool).strong_count() > 0 {
+            info!("local_pool: throwing away connection '{conn_info}' because connection is potentially in a broken state");
+        }
+    }
+}
+
+impl<C: ClientInnerExt> LocalClient<C> {
+    pub fn get_client(&self) -> &C {
+        &self
+            .inner
+            .as_ref()
+            .expect("client inner should not be removed")
+            .inner
+    }
+
+    fn do_drop(&mut self) -> Option<impl FnOnce()> {
+        let conn_info = self.conn_info.clone();
+        let client = self
+            .inner
+            .take()
+            .expect("client inner should not be removed");
+        if let Some(conn_pool) = std::mem::take(&mut self.pool).upgrade() {
+            let current_span = self.span.clone();
+            // return connection to the pool
+            return Some(move || {
+                let _span = current_span.enter();
+                EndpointConnPool::put(&conn_pool.global_pool, &conn_info, client);
+            });
+        }
+        None
+    }
+}
+
+impl<C: ClientInnerExt> Drop for LocalClient<C> {
+    fn drop(&mut self) {
+        if let Some(drop) = self.do_drop() {
+            tokio::task::spawn_blocking(drop);
+        }
+    }
+}
--- a/proxy/src/serverless/mod.rs
+++ b/proxy/src/serverless/mod.rs
@@ -8,6 +8,8 @@ mod conn_pool;
 mod http_conn_pool;
 mod http_util;
 mod json;
+mod json_raw_value;
+mod local_conn_pool;
 mod sql_over_http;
 mod websocket;

@@ -16,12 +18,13 @@ use atomic_take::AtomicTake;
 use bytes::Bytes;
 pub use conn_pool::GlobalConnPoolOptions;

+use anyhow::Context;
 use futures::future::{select, Either};
 use futures::TryFutureExt;
 use http::{Method, Response, StatusCode};
 use http_body_util::combinators::BoxBody;
 use http_body_util::{BodyExt, Empty};
-use hyper1::body::Incoming;
+use hyper::body::Incoming;
 use hyper_util::rt::TokioExecutor;
 use hyper_util::server::conn::auto::Builder;
 use rand::rngs::StdRng;
@@ -31,29 +34,29 @@ use tokio::time::timeout;
 use tokio_rustls::TlsAcceptor;
 use tokio_util::task::TaskTracker;

-use crate::auth::ServerlessBackend;
 use crate::cancellation::CancellationHandlerMain;
 use crate::config::ProxyConfig;
 use crate::context::RequestMonitoring;
-use crate::metrics::{Metrics, Protocol};
-use crate::protocol2::ChainRW;
+use crate::metrics::Metrics;
+use crate::protocol2::{read_proxy_protocol, ChainRW};
+use crate::proxy::run_until_cancelled;
 use crate::rate_limiter::EndpointRateLimiter;
 use crate::serverless::backend::PoolingBackend;
 use crate::serverless::http_util::{api_error_into_response, json_response};

-use std::net::IpAddr;
+use std::net::{IpAddr, SocketAddr};
 use std::pin::{pin, Pin};
 use std::sync::Arc;
 use tokio::net::{TcpListener, TcpStream};
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, instrument, warn, Instrument};
+use tracing::{info, warn, Instrument};
 use utils::http::error::ApiError;

 pub(crate) const SERVERLESS_DRIVER_SNI: &str = "api";

 pub async fn task_main(
    config: &'static ProxyConfig,
-    auth_backend: ServerlessBackend<'static>,
+    auth_backend: &'static crate::auth::Backend<'static, (), ()>,
    ws_listener: TcpListener,
    cancellation_token: CancellationToken,
    cancellation_handler: Arc<CancellationHandlerMain>,
@@ -63,6 +66,7 @@ pub async fn task_main(
        info!("websocket server has shut down");
    }

+    let local_pool = local_conn_pool::LocalConnPool::new(&config.http_config);
    let conn_pool = conn_pool::GlobalConnPool::new(&config.http_config);
    {
        let conn_pool = Arc::clone(&conn_pool);
@@ -105,6 +109,7 @@ pub async fn task_main(

    let backend = Arc::new(PoolingBackend {
        http_conn_pool: Arc::clone(&http_conn_pool),
+        local_pool,
        pool: Arc::clone(&conn_pool),
        config,
        auth_backend,
@@ -123,100 +128,81 @@ pub async fn task_main(
        }
    };

-    let requests = TaskTracker::new();
-    requests.close(); // allows `requests.wait to complete`
+    let connections = tokio_util::task::task_tracker::TaskTracker::new();
+    connections.close(); // allows `connections.wait to complete`

-    crate::connection_loop(
-        config,
-        ws_listener,
-        cancellation_token.clone(),
-        Protocol::Http,
-        C {
-            config,
-            backend,
-            cancellation_handler,
-            endpoint_rate_limiter,
-            tls_acceptor,
-            requests: requests.clone(),
-            cancellation_token,
-        },
-    )
-    .await?;
+    while let Some(res) = run_until_cancelled(ws_listener.accept(), &cancellation_token).await {
+        let (conn, peer_addr) = res.context("could not accept TCP stream")?;
+        if let Err(e) = conn.set_nodelay(true) {
+            tracing::error!("could not set nodelay: {e}");
+            continue;
+        }
+        let conn_id = uuid::Uuid::new_v4();
+        let http_conn_span = tracing::info_span!("http_conn", ?conn_id);

-    requests.wait().await;
-
-    Ok(())
-}
-
-#[derive(Clone)]
-struct C {
-    config: &'static ProxyConfig,
-    backend: Arc<PoolingBackend>,
-    cancellation_handler: Arc<CancellationHandlerMain>,
-    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-    tls_acceptor: Arc<dyn MaybeTlsAcceptor>,
-    requests: TaskTracker,
-    cancellation_token: CancellationToken,
-}
-
-impl super::ConnHandler for C {
-    #[instrument(name = "http_conn", skip_all, fields(conn_id))]
-    async fn handle(
-        self,
-        conn_id: uuid::Uuid,
-        peer_addr: IpAddr,
-        stream: ChainRW<TcpStream>,
-        conn_gauge: crate::metrics::NumClientConnectionsGuard<'static>,
-    ) {
-        // try and close an old HTTP connection.
-        // picked at random
        let n_connections = Metrics::get()
            .proxy
            .client_connections
            .sample(crate::metrics::Protocol::Http);
-        tracing::trace!(?n_connections, threshold = ?self.config.http_config.client_conn_threshold, "check");
-        if n_connections > self.config.http_config.client_conn_threshold {
+        tracing::trace!(?n_connections, threshold = ?config.http_config.client_conn_threshold, "check");
+        if n_connections > config.http_config.client_conn_threshold {
            tracing::trace!("attempting to cancel a random connection");
-            if let Some(token) = self.config.http_config.cancel_set.take() {
+            if let Some(token) = config.http_config.cancel_set.take() {
                tracing::debug!("cancelling a random connection");
                token.cancel();
            }
        }

-        let conn_token = self.cancellation_token.child_token();
-        let _cancel_guard = self
-            .config
-            .http_config
-            .cancel_set
-            .insert(conn_id, conn_token.clone());
+        let conn_token = cancellation_token.child_token();
+        let tls_acceptor = tls_acceptor.clone();
+        let backend = backend.clone();
+        let connections2 = connections.clone();
+        let cancellation_handler = cancellation_handler.clone();
+        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
+        connections.spawn(
+            async move {
+                let conn_token2 = conn_token.clone();
+                let _cancel_guard = config.http_config.cancel_set.insert(conn_id, conn_token2);

-        let startup_result = Box::pin(connection_startup(
-            self.config,
-            self.tls_acceptor,
-            conn_id,
-            stream,
-            peer_addr,
-        ))
-        .await;
-        let Some((conn, peer_addr)) = startup_result else {
-            return;
-        };
+                let session_id = uuid::Uuid::new_v4();

-        Box::pin(connection_handler(
-            self.config,
-            self.backend,
-            self.requests,
-            self.cancellation_handler,
-            self.endpoint_rate_limiter,
-            conn_token,
-            conn,
-            peer_addr,
-            conn_id,
-        ))
-        .await;
+                let _gauge = Metrics::get()
+                    .proxy
+                    .client_connections
+                    .guard(crate::metrics::Protocol::Http);

-        drop(conn_gauge);
+                let startup_result = Box::pin(connection_startup(
+                    config,
+                    tls_acceptor,
+                    session_id,
+                    conn,
+                    peer_addr,
+                ))
+                .await;
+                let Some((conn, peer_addr)) = startup_result else {
+                    return;
+                };
+
+                Box::pin(connection_handler(
+                    config,
+                    backend,
+                    connections2,
+                    cancellation_handler,
+                    endpoint_rate_limiter,
+                    conn_token,
+                    conn,
+                    peer_addr,
+                    session_id,
+                ))
+                .await;
+            }
+            .instrument(http_conn_span),
+        );
    }
+
+    connections.wait().await;
+
+    Ok(())
 }

 pub(crate) trait AsyncReadWrite: AsyncRead + AsyncWrite + Send + 'static {}
@@ -244,14 +230,26 @@ impl MaybeTlsAcceptor for NoTls {
    }
 }

-/// Handles the TLS startup handshake.
+/// Handles the TCP startup lifecycle.
+/// 1. Parses PROXY protocol V2
+/// 2. Handles TLS handshake
 async fn connection_startup(
    config: &ProxyConfig,
    tls_acceptor: Arc<dyn MaybeTlsAcceptor>,
    session_id: uuid::Uuid,
-    conn: ChainRW<TcpStream>,
-    peer_addr: IpAddr,
+    conn: TcpStream,
+    peer_addr: SocketAddr,
 ) -> Option<(AsyncRW, IpAddr)> {
+    // handle PROXY protocol
+    let (conn, peer) = match read_proxy_protocol(conn).await {
+        Ok(c) => c,
+        Err(e) => {
+            tracing::warn!(?session_id, %peer_addr, "failed to accept TCP connection: invalid PROXY protocol V2 header: {e:#}");
+            return None;
+        }
+    };
+
+    let peer_addr = peer.unwrap_or(peer_addr).ip();
    let has_private_peer_addr = match peer_addr {
        IpAddr::V4(ip) => ip.is_private(),
        IpAddr::V6(_) => false,
@@ -310,7 +308,7 @@ async fn connection_handler(
    let server = Builder::new(TokioExecutor::new());
    let conn = server.serve_connection_with_upgrades(
        hyper_util::rt::TokioIo::new(conn),
-        hyper1::service::service_fn(move |req: hyper1::Request<Incoming>| {
+        hyper::service::service_fn(move |req: hyper::Request<Incoming>| {
            // First HTTP request shares the same session ID
            let session_id = session_id.take().unwrap_or_else(uuid::Uuid::new_v4);

@@ -363,7 +361,7 @@ async fn connection_handler(

 #[allow(clippy::too_many_arguments)]
 async fn request_handler(
-    mut request: hyper1::Request<Incoming>,
+    mut request: hyper::Request<Incoming>,
    config: &'static ProxyConfig,
    backend: Arc<PoolingBackend>,
    ws_connections: TaskTracker,
@@ -373,7 +371,7 @@ async fn request_handler(
    // used to cancel in-flight HTTP requests. not used to cancel websockets
    http_cancellation_token: CancellationToken,
    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
    let host = request
        .headers()
        .get("host")
@@ -385,10 +383,6 @@ async fn request_handler(
    if config.http_config.accept_websockets
        && framed_websockets::upgrade::is_upgrade_request(&request)
    {
-        let ServerlessBackend::ControlPlane(auth_backend) = backend.auth_backend else {
-            return json_response(StatusCode::BAD_REQUEST, "query is not supported");
-        };
-
        let ctx = RequestMonitoring::new(
            session_id,
            peer_addr,
@@ -406,7 +400,7 @@ async fn request_handler(
            async move {
                if let Err(e) = websocket::serve_websocket(
                    config,
-                    auth_backend,
+                    backend.auth_backend,
                    ctx,
                    websocket,
                    cancellation_handler,
@@ -415,7 +409,7 @@ async fn request_handler(
                )
                .await
                {
-                    error!("error in websocket connection: {e:#}");
+                    warn!("error in websocket connection: {e:#}");
                }
            }
            .instrument(span),
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -1,3 +1,4 @@
+use std::borrow::Cow;
 use std::pin::pin;
 use std::sync::Arc;

@@ -12,17 +13,17 @@ use http::Method;
 use http_body_util::combinators::BoxBody;
 use http_body_util::BodyExt;
 use http_body_util::Full;
-use hyper1::body::Body;
-use hyper1::body::Incoming;
-use hyper1::header;
-use hyper1::http::HeaderName;
-use hyper1::http::HeaderValue;
-use hyper1::Response;
-use hyper1::StatusCode;
-use hyper1::{HeaderMap, Request};
+use hyper::body::Body;
+use hyper::body::Incoming;
+use hyper::header;
+use hyper::http::HeaderName;
+use hyper::http::HeaderValue;
+use hyper::Response;
+use hyper::StatusCode;
+use hyper::{HeaderMap, Request};
 use pq_proto::StartupMessageParamsBuilder;
 use serde::Serialize;
-use serde_json::Value;
+use serde_json::value::RawValue;
 use tokio::time;
 use tokio_postgres::error::DbError;
 use tokio_postgres::error::ErrorPosition;
@@ -40,7 +41,7 @@ use url::Url;
 use urlencoding;
 use utils::http::error::ApiError;

-use crate::auth::backend::ComputeCredentials;
+use crate::auth::backend::ComputeCredentialKeys;
 use crate::auth::backend::ComputeUserInfo;
 use crate::auth::endpoint_sni;
 use crate::auth::ComputeUserInfoParseError;
@@ -57,41 +58,47 @@ use crate::metrics::Metrics;
 use crate::proxy::run_until_cancelled;
 use crate::proxy::NeonOptions;
 use crate::serverless::backend::HttpConnError;
+use crate::usage_metrics::MetricCounter;
 use crate::usage_metrics::MetricCounterRecorder;
 use crate::DbName;
 use crate::RoleName;

 use super::backend::LocalProxyConnError;
 use super::backend::PoolingBackend;
+use super::conn_pool;
 use super::conn_pool::AuthData;
-use super::conn_pool::Client;
 use super::conn_pool::ConnInfo;
 use super::conn_pool::ConnInfoWithAuth;
 use super::http_util::json_response;
 use super::json::json_to_pg_text;
 use super::json::pg_text_row_to_json;
 use super::json::JsonConversionError;
+use super::local_conn_pool;

 #[derive(serde::Deserialize)]
 #[serde(rename_all = "camelCase")]
-struct QueryData {
-    query: String,
-    #[serde(deserialize_with = "bytes_to_pg_text")]
-    params: Vec<Option<String>>,
+#[serde(bound = "'de: 'a")]
+struct QueryData<'a> {
+    #[serde(borrow)]
+    query: Cow<'a, str>,
+
+    #[serde(borrow)]
+    params: Vec<&'a RawValue>,
+
    #[serde(default)]
    array_mode: Option<bool>,
 }

 #[derive(serde::Deserialize)]
-struct BatchQueryData {
-    queries: Vec<QueryData>,
+#[serde(rename_all = "camelCase")]
+#[serde(bound = "'de: 'a")]
+struct BatchQueryData<'a> {
+    queries: Vec<QueryData<'a>>,
 }

-#[derive(serde::Deserialize)]
-#[serde(untagged)]
-enum Payload {
-    Single(QueryData),
-    Batch(BatchQueryData),
+enum Payload<'a> {
+    Batch(BatchQueryData<'a>),
+    Single(QueryData<'a>),
 }

 static CONN_STRING: HeaderName = HeaderName::from_static("neon-connection-string");
@@ -104,13 +111,18 @@ static TXN_DEFERRABLE: HeaderName = HeaderName::from_static("neon-batch-deferrab

 static HEADER_VALUE_TRUE: HeaderValue = HeaderValue::from_static("true");

-fn bytes_to_pg_text<'de, D>(deserializer: D) -> Result<Vec<Option<String>>, D::Error>
-where
-    D: serde::de::Deserializer<'de>,
-{
-    // TODO: consider avoiding the allocation here.
-    let json: Vec<Value> = serde::de::Deserialize::deserialize(deserializer)?;
-    Ok(json_to_pg_text(json))
+fn parse_pg_params(params: &[&RawValue]) -> Result<Vec<Option<String>>, ReadPayloadError> {
+    json_to_pg_text(params).map_err(ReadPayloadError::Parse)
+}
+
+fn parse_payload(body: &[u8]) -> Result<Payload<'_>, ReadPayloadError> {
+    // RawValue doesn't work via untagged enums
+    // so instead we try parse each individually
+    if let Ok(batch) = serde_json::from_slice(body) {
+        Ok(Payload::Batch(batch))
+    } else {
+        Ok(Payload::Single(serde_json::from_slice(body)?))
+    }
 }

 #[derive(Debug, thiserror::Error)]
@@ -273,7 +285,7 @@ pub(crate) async fn handle(
    request: Request<Incoming>,
    backend: Arc<PoolingBackend>,
    cancel: CancellationToken,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, ApiError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, ApiError> {
    let result = handle_inner(cancel, config, &ctx, request, backend).await;

    let mut response = match result {
@@ -436,7 +448,7 @@ impl UserFacingError for SqlOverHttpError {
 #[derive(Debug, thiserror::Error)]
 pub(crate) enum ReadPayloadError {
    #[error("could not read the HTTP request body: {0}")]
-    Read(#[from] hyper1::Error),
+    Read(#[from] hyper::Error),
    #[error("could not parse the HTTP request body: {0}")]
    Parse(#[from] serde_json::Error),
 }
@@ -477,7 +489,7 @@ struct HttpHeaders {
 }

 impl HttpHeaders {
-    fn try_parse(headers: &hyper1::http::HeaderMap) -> Result<Self, SqlOverHttpError> {
+    fn try_parse(headers: &hyper::http::HeaderMap) -> Result<Self, SqlOverHttpError> {
        // Determine the output options. Default behaviour is 'false'. Anything that is not
        // strictly 'true' assumed to be false.
        let raw_output = headers.get(&RAW_TEXT_OUTPUT) == Some(&HEADER_VALUE_TRUE);
@@ -530,7 +542,7 @@ async fn handle_inner(
    ctx: &RequestMonitoring,
    request: Request<Incoming>,
    backend: Arc<PoolingBackend>,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
    let _requeset_gauge = Metrics::get()
        .proxy
        .connection_requests
@@ -578,7 +590,7 @@ async fn handle_db_inner(
    conn_info: ConnInfo,
    auth: AuthData,
    backend: Arc<PoolingBackend>,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
    //
    // Determine the destination and connection params
    //
@@ -613,14 +625,15 @@ async fn handle_db_inner(
        async {
            let body = request.into_body().collect().await?.to_bytes();
            info!(length = body.len(), "request payload read");
-            let payload: Payload = serde_json::from_slice(&body)?;
-            Ok::<Payload, ReadPayloadError>(payload) // Adjust error type accordingly
+            Ok::<Bytes, ReadPayloadError>(body)
        }
        .map_err(SqlOverHttpError::from),
    );

    let authenticate_and_connect = Box::pin(
        async {
+            let is_local_proxy = matches!(backend.auth_backend, crate::auth::Backend::Local(_));
+
            let keys = match auth {
                AuthData::Password(pw) => {
                    backend
@@ -630,18 +643,24 @@ async fn handle_db_inner(
                AuthData::Jwt(jwt) => {
                    backend
                        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
-                        .await?;
-
-                    ComputeCredentials {
-                        info: conn_info.user_info.clone(),
-                        keys: crate::auth::backend::ComputeCredentialKeys::None,
-                    }
+                        .await?
+                }
+            };
+
+            let client = match keys.keys {
+                ComputeCredentialKeys::JwtPayload(payload) if is_local_proxy => {
+                    let mut client = backend.connect_to_local_postgres(ctx, conn_info).await?;
+                    client.set_jwt_session(&payload).await?;
+                    Client::Local(client)
+                }
+                _ => {
+                    let client = backend
+                        .connect_to_compute(ctx, conn_info, keys, !allow_pool)
+                        .await?;
+                    Client::Remote(client)
                }
            };

-            let client = backend
-                .connect_to_compute(ctx, conn_info, keys, !allow_pool)
-                .await?;
            // not strictly necessary to mark success here,
            // but it's just insurance for if we forget it somewhere else
            ctx.success();
@@ -650,7 +669,7 @@ async fn handle_db_inner(
        .map_err(SqlOverHttpError::from),
    );

-    let (payload, mut client) = match run_until_cancelled(
+    let (body, mut client) = match run_until_cancelled(
        // Run both operations in parallel
        try_join(
            pin!(fetch_and_process_request),
@@ -664,6 +683,8 @@ async fn handle_db_inner(
        None => return Err(SqlOverHttpError::Cancelled(SqlOverHttpCancel::Connect)),
    };

+    let payload = parse_payload(&body)?;
+
    let mut response = Response::builder()
        .status(StatusCode::OK)
        .header(header::CONTENT_TYPE, "application/json");
@@ -734,7 +755,7 @@ async fn handle_auth_broker_inner(
    conn_info: ConnInfo,
    jwt: String,
    backend: Arc<PoolingBackend>,
-) -> Result<Response<BoxBody<Bytes, hyper1::Error>>, SqlOverHttpError> {
+) -> Result<Response<BoxBody<Bytes, hyper::Error>>, SqlOverHttpError> {
    backend
        .authenticate_with_jwt(ctx, &conn_info.user_info, jwt)
        .await
@@ -771,12 +792,12 @@ async fn handle_auth_broker_inner(
        .map(|b| b.boxed()))
 }

-impl QueryData {
+impl QueryData<'_> {
    async fn process(
        self,
        config: &'static HttpConfig,
        cancel: CancellationToken,
-        client: &mut Client<tokio_postgres::Client>,
+        client: &mut Client,
        parsed_headers: HttpHeaders,
    ) -> Result<String, SqlOverHttpError> {
        let (inner, mut discard) = client.inner();
@@ -805,7 +826,7 @@ impl QueryData {
            Either::Right((_cancelled, query)) => {
                tracing::info!("cancelling query");
                if let Err(err) = cancel_token.cancel_query(NoTls).await {
-                    tracing::error!(?err, "could not cancel query");
+                    tracing::warn!(?err, "could not cancel query");
                }
                // wait for the query cancellation
                match time::timeout(time::Duration::from_millis(100), query).await {
@@ -845,12 +866,12 @@ impl QueryData {
    }
 }

-impl BatchQueryData {
+impl BatchQueryData<'_> {
    async fn process(
        self,
        config: &'static HttpConfig,
        cancel: CancellationToken,
-        client: &mut Client<tokio_postgres::Client>,
+        client: &mut Client,
        parsed_headers: HttpHeaders,
    ) -> Result<String, SqlOverHttpError> {
        info!("starting transaction");
@@ -894,7 +915,7 @@ impl BatchQueryData {
            }
            Err(SqlOverHttpError::Cancelled(_)) => {
                if let Err(err) = cancel_token.cancel_query(NoTls).await {
-                    tracing::error!(?err, "could not cancel query");
+                    tracing::warn!(?err, "could not cancel query");
                }
                // TODO: after cancelling, wait to see if we can get a status. maybe the connection is still safe.
                discard.discard();
@@ -921,7 +942,7 @@ async fn query_batch(
    config: &'static HttpConfig,
    cancel: CancellationToken,
    transaction: &Transaction<'_>,
-    queries: BatchQueryData,
+    queries: BatchQueryData<'_>,
    parsed_headers: HttpHeaders,
 ) -> Result<String, SqlOverHttpError> {
    let mut results = Vec::with_capacity(queries.queries.len());
@@ -959,12 +980,12 @@ async fn query_batch(
 async fn query_to_json<T: GenericClient>(
    config: &'static HttpConfig,
    client: &T,
-    data: QueryData,
+    data: QueryData<'_>,
    current_size: &mut usize,
    parsed_headers: HttpHeaders,
 ) -> Result<(ReadyForQueryStatus, impl Serialize), SqlOverHttpError> {
    info!("executing query");
-    let query_params = data.params;
+    let query_params = parse_pg_params(&data.params)?;
    let mut row_stream = std::pin::pin!(client.query_raw_txt(&data.query, query_params).await?);
    info!("finished executing query");

@@ -1043,3 +1064,88 @@ async fn query_to_json<T: GenericClient>(

    Ok((ready, results))
 }
+
+enum Client {
+    Remote(conn_pool::Client<tokio_postgres::Client>),
+    Local(local_conn_pool::LocalClient<tokio_postgres::Client>),
+}
+
+enum Discard<'a> {
+    Remote(conn_pool::Discard<'a, tokio_postgres::Client>),
+    Local(local_conn_pool::Discard<'a, tokio_postgres::Client>),
+}
+
+impl Client {
+    fn metrics(&self) -> Arc<MetricCounter> {
+        match self {
+            Client::Remote(client) => client.metrics(),
+            Client::Local(local_client) => local_client.metrics(),
+        }
+    }
+
+    fn inner(&mut self) -> (&mut tokio_postgres::Client, Discard<'_>) {
+        match self {
+            Client::Remote(client) => {
+                let (c, d) = client.inner();
+                (c, Discard::Remote(d))
+            }
+            Client::Local(local_client) => {
+                let (c, d) = local_client.inner();
+                (c, Discard::Local(d))
+            }
+        }
+    }
+}
+
+impl Discard<'_> {
+    fn check_idle(&mut self, status: ReadyForQueryStatus) {
+        match self {
+            Discard::Remote(discard) => discard.check_idle(status),
+            Discard::Local(discard) => discard.check_idle(status),
+        }
+    }
+    fn discard(&mut self) {
+        match self {
+            Discard::Remote(discard) => discard.discard(),
+            Discard::Local(discard) => discard.discard(),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use typed_json::json;
+
+    use super::parse_payload;
+    use super::Payload;
+
+    #[test]
+    fn raw_single_payload() {
+        let body = json! {
+            {"query":"select $1","params":["1"]}
+        }
+        .to_string();
+
+        let Payload::Single(query) = parse_payload(body.as_bytes()).unwrap() else {
+            panic!("expected single")
+        };
+        assert_eq!(&*query.query, "select $1");
+        assert_eq!(query.params[0].get(), "\"1\"");
+    }
+
+    #[test]
+    fn raw_batch_payload() {
+        let body = json! {{
+            "queries": [
+                {"query":"select $1","params":["1"]},
+                {"query":"select $1","params":["2"]},
+            ]
+        }}
+        .to_string();
+
+        let Payload::Batch(query) = parse_payload(body.as_bytes()).unwrap() else {
+            panic!("expected batch")
+        };
+        assert_eq!(query.queries.len(), 2);
+    }
+}
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -1,4 +1,3 @@
-use crate::control_plane::provider::ControlPlaneBackend;
 use crate::proxy::ErrorSource;
 use crate::{
    cancellation::CancellationHandlerMain,
@@ -13,7 +12,7 @@ use anyhow::Context as _;
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use framed_websockets::{Frame, OpCode, WebSocketServer};
 use futures::{Sink, Stream};
-use hyper1::upgrade::OnUpgrade;
+use hyper::upgrade::OnUpgrade;
 use hyper_util::rt::TokioIo;
 use pin_project_lite::pin_project;

@@ -130,7 +129,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AsyncBufRead for WebSocketRw<S> {

 pub(crate) async fn serve_websocket(
    config: &'static ProxyConfig,
-    auth_backend: &'static ControlPlaneBackend,
+    auth_backend: &'static crate::auth::Backend<'static, (), ()>,
    ctx: RequestMonitoring,
    websocket: OnUpgrade,
    cancellation_handler: Arc<CancellationHandlerMain>,
--- a/proxy/src/usage_metrics.rs
+++ b/proxy/src/usage_metrics.rs
@@ -27,7 +27,7 @@ use std::{
 };
 use tokio::io::AsyncWriteExt;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, instrument, trace};
+use tracing::{error, info, instrument, trace, warn};
 use utils::backoff;
 use uuid::{NoContext, Timestamp};

@@ -346,7 +346,7 @@ async fn collect_metrics_iteration(
            error!("metrics endpoint refused the sent metrics: {:?}", res);
            for metric in chunk.events.iter().filter(|e| e.value > (1u64 << 40)) {
                // Report if the metric value is suspiciously large
-                error!("potentially abnormal metric value: {:?}", metric);
+                warn!("potentially abnormal metric value: {:?}", metric);
            }
        }
    }
@@ -485,49 +485,51 @@ async fn upload_events_chunk(

 #[cfg(test)]
 mod tests {
-    use std::{
-        net::TcpListener,
-        sync::{Arc, Mutex},
-    };
+    use super::*;

+    use crate::{http, BranchId, EndpointId};
    use anyhow::Error;
    use chrono::Utc;
    use consumption_metrics::{Event, EventChunk};
-    use hyper::{
-        service::{make_service_fn, service_fn},
-        Body, Response,
-    };
+    use http_body_util::BodyExt;
+    use hyper::{body::Incoming, server::conn::http1, service::service_fn, Request, Response};
+    use hyper_util::rt::TokioIo;
+    use std::sync::{Arc, Mutex};
+    use tokio::net::TcpListener;
    use url::Url;

-    use super::*;
-    use crate::{http, BranchId, EndpointId};
-
    #[tokio::test]
    async fn metrics() {
-        let listener = TcpListener::bind("0.0.0.0:0").unwrap();
+        type Report = EventChunk<'static, Event<Ids, String>>;
+        let reports: Arc<Mutex<Vec<Report>>> = Arc::default();

-        let reports = Arc::new(Mutex::new(vec![]));
-        let reports2 = reports.clone();
-
-        let server = hyper::server::Server::from_tcp(listener)
-            .unwrap()
-            .serve(make_service_fn(move |_| {
-                let reports = reports.clone();
-                async move {
-                    Ok::<_, Error>(service_fn(move |req| {
+        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+        let addr = listener.local_addr().unwrap();
+        tokio::spawn({
+            let reports = reports.clone();
+            async move {
+                loop {
+                    if let Ok((stream, _addr)) = listener.accept().await {
                        let reports = reports.clone();
-                        async move {
-                            let bytes = hyper::body::to_bytes(req.into_body()).await?;
-                            let events: EventChunk<'static, Event<Ids, String>> =
-                                serde_json::from_slice(&bytes)?;
-                            reports.lock().unwrap().push(events);
-                            Ok::<_, Error>(Response::new(Body::from(vec![])))
-                        }
-                    }))
+                        http1::Builder::new()
+                            .serve_connection(
+                                TokioIo::new(stream),
+                                service_fn(move |req: Request<Incoming>| {
+                                    let reports = reports.clone();
+                                    async move {
+                                        let bytes = req.into_body().collect().await?.to_bytes();
+                                        let events = serde_json::from_slice(&bytes)?;
+                                        reports.lock().unwrap().push(events);
+                                        Ok::<_, Error>(Response::new(String::new()))
+                                    }
+                                }),
+                            )
+                            .await
+                            .unwrap();
+                    }
                }
-            }));
-        let addr = server.local_addr();
-        tokio::spawn(server);
+            }
+        });

        let metrics = Metrics::default();
        let client = http::new_client();
@@ -536,7 +538,7 @@ mod tests {

        // no counters have been registered
        collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
-        let r = std::mem::take(&mut *reports2.lock().unwrap());
+        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert!(r.is_empty());

        // register a new counter
@@ -548,7 +550,7 @@ mod tests {

        // the counter should be observed despite 0 egress
        collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
-        let r = std::mem::take(&mut *reports2.lock().unwrap());
+        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert_eq!(r.len(), 1);
        assert_eq!(r[0].events.len(), 1);
        assert_eq!(r[0].events[0].value, 0);
@@ -558,7 +560,7 @@ mod tests {

        // egress should be observered
        collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
-        let r = std::mem::take(&mut *reports2.lock().unwrap());
+        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert_eq!(r.len(), 1);
        assert_eq!(r[0].events.len(), 1);
        assert_eq!(r[0].events[0].value, 1);
@@ -568,7 +570,7 @@ mod tests {

        // we do not observe the counter
        collect_metrics_iteration(&metrics.endpoints, &client, &endpoint, "foo", now, now).await;
-        let r = std::mem::take(&mut *reports2.lock().unwrap());
+        let r = std::mem::take(&mut *reports.lock().unwrap());
        assert!(r.is_empty());

        // counter is unregistered
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -97,5 +97,8 @@ select = [
    "I", # isort
    "W", # pycodestyle
    "B", # bugbear
-    "UP032", # f-string
+    "UP", # pyupgrade
 ]
+
+[tool.ruff.lint.pyupgrade]
+keep-runtime-typing = true # Remove this stanza when we require Python 3.10
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -23,6 +23,7 @@ crc32c.workspace = true
 fail.workspace = true
 hex.workspace = true
 humantime.workspace = true
+http.workspace = true
 hyper0.workspace = true
 futures.workspace = true
 once_cell.workspace = true
--- a/safekeeper/src/auth.rs
+++ b/safekeeper/src/auth.rs
@@ -15,15 +15,20 @@ pub fn check_permission(claims: &Claims, tenant_id: Option<TenantId>) -> Result<
            }
            Ok(())
        }
-        (Scope::Admin | Scope::PageServerApi | Scope::GenerationsApi | Scope::Scrubber, _) => {
-            Err(AuthError(
-                format!(
-                    "JWT scope '{:?}' is ineligible for Safekeeper auth",
-                    claims.scope
-                )
-                .into(),
-            ))
-        }
+        (
+            Scope::Admin
+            | Scope::PageServerApi
+            | Scope::GenerationsApi
+            | Scope::Infra
+            | Scope::Scrubber,
+            _,
+        ) => Err(AuthError(
+            format!(
+                "JWT scope '{:?}' is ineligible for Safekeeper auth",
+                claims.scope
+            )
+            .into(),
+        )),
        (Scope::SafekeeperData, _) => Ok(()),
    }
 }
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -12,8 +12,8 @@ use metrics::{
    core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
    proto::MetricFamily,
    register_histogram_vec, register_int_counter, register_int_counter_pair,
-    register_int_counter_pair_vec, register_int_counter_vec, Gauge, HistogramVec, IntCounter,
-    IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec,
+    register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, Gauge,
+    HistogramVec, IntCounter, IntCounterPair, IntCounterPairVec, IntCounterVec, IntGaugeVec,
 };
 use once_cell::sync::Lazy;

@@ -231,6 +231,14 @@ pub(crate) static EVICTION_EVENTS_COMPLETED: Lazy<IntCounterVec> = Lazy::new(||
    .expect("Failed to register metric")
 });

+pub static NUM_EVICTED_TIMELINES: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
+        "safekeeper_evicted_timelines",
+        "Number of currently evicted timelines"
+    )
+    .expect("Failed to register metric")
+});
+
 pub const LABEL_UNKNOWN: &str = "unknown";

 /// Labels for traffic metrics.
--- a/Show More
+++ b/Show More