diff --git a/.github/workflows/_benchmarking_preparation.yml b/.github/workflows/_benchmarking_preparation.yml
index a52e43b4da..d60f97320b 100644
--- a/.github/workflows/_benchmarking_preparation.yml
+++ b/.github/workflows/_benchmarking_preparation.yml
@@ -3,19 +3,23 @@ name: Prepare benchmarking databases by restoring dumps
 on:
   workflow_call:
     # no inputs needed
-    
+
 defaults:
   run:
     shell: bash -euxo pipefail {0}
 
 jobs:
   setup-databases:
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
     strategy:
       fail-fast: false
       matrix:
-        platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ] 
+        platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ]
         database: [ clickbench, tpch, userexample ]
-  
+
     env:
       LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
       PLATFORM: ${{ matrix.platform }}
@@ -23,7 +27,10 @@ jobs:
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
@@ -32,13 +39,13 @@ jobs:
       run: |
         case "${PLATFORM}" in
           neon)
-            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }} 
+            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
             ;;
           aws-rds-postgres)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }} 
+            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
             ;;
           aws-aurora-serverless-v2-postgres)
-            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }} 
+            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }}
             ;;
           *)
             echo >&2 "Unknown PLATFORM=${PLATFORM}"
@@ -46,10 +53,17 @@ jobs:
             ;;
         esac
 
-        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT  
+        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
     - uses: actions/checkout@v4
 
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
     - name: Download Neon artifact
       uses: ./.github/actions/download
       with:
@@ -57,23 +71,23 @@ jobs:
         path: /tmp/neon/
         prefix: latest
 
-    # we create a table that has one row for each database that we want to restore with the status whether the restore is done    
+    # we create a table that has one row for each database that we want to restore with the status whether the restore is done
     - name: Create benchmark_restore_status table if it does not exist
       env:
         BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
         DATABASE_NAME: ${{ matrix.database }}
-      # to avoid a race condition of multiple jobs trying to create the table at the same time, 
+      # to avoid a race condition of multiple jobs trying to create the table at the same time,
       # we use an advisory lock
       run: |
         ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
-        SELECT pg_advisory_lock(4711);  
+        SELECT pg_advisory_lock(4711);
         CREATE TABLE IF NOT EXISTS benchmark_restore_status (
         databasename text primary key,
         restore_done boolean
         );
         SELECT pg_advisory_unlock(4711);
         "
-    
+
     - name: Check if restore is already done
       id: check-restore-done
       env:
@@ -107,7 +121,7 @@ jobs:
         DATABASE_NAME: ${{ matrix.database }}
       run: |
         mkdir -p /tmp/dumps
-        aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/ 
+        aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/
 
     - name: Replace database name in connection string
       if: steps.check-restore-done.outputs.skip != 'true'
@@ -126,17 +140,17 @@ jobs:
         else
           new_connstr="${base_connstr}/${DATABASE_NAME}"
         fi
-        echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT  
+        echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT
 
     - name: Restore dump
       if: steps.check-restore-done.outputs.skip != 'true'
       env:
         DATABASE_NAME: ${{ matrix.database }}
         DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
-        # the following works only with larger computes: 
+        # the following works only with larger computes:
         # PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
         # we add the || true because:
-        # the dumps were created with Neon and contain neon extensions that are not 
+        # the dumps were created with Neon and contain neon extensions that are not
         # available in RDS, so we will always report an error, but we can ignore it
       run: |
         ${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml
index 67152b6991..5fc6aa247a 100644
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -236,9 +236,7 @@ jobs:
 
           # run pageserver tests with different settings
           for io_engine in std-fs tokio-epoll-uring ; do
-            for io_buffer_alignment in 0 1 512 ; do
-              NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT=$io_buffer_alignment ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
-            done
+            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'
           done
 
           # Run separate tests for real S3
@@ -257,7 +255,15 @@ jobs:
           ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
 
       - name: Install postgres binaries
-        run: cp -a pg_install /tmp/neon/pg_install
+        run: |
+          # Use tar to copy files matching the pattern, preserving the paths in the destionation
+          tar c \
+            pg_install/v* \
+            pg_install/build/*/src/test/regress/*.so \
+            pg_install/build/*/src/test/regress/pg_regress \
+            pg_install/build/*/src/test/isolation/isolationtester \
+            pg_install/build/*/src/test/isolation/pg_isolation_regress \
+            | tar  x -C /tmp/neon
 
       - name: Upload Neon artifact
         uses: ./.github/actions/upload
diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index a4a597acde..32806b89ab 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -12,7 +12,6 @@ on:
     #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
     #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
     - cron:   '0 3 * * *' # run once a day, timezone is utc
-
   workflow_dispatch: # adds ability to run this manually
     inputs:
       region_id:
@@ -59,7 +58,7 @@ jobs:
     permissions:
       contents: write
       statuses: write
-      id-token: write # Required for OIDC authentication in azure runners
+      id-token: write # aws-actions/configure-aws-credentials
     strategy:
       fail-fast: false
       matrix:
@@ -68,12 +67,10 @@ jobs:
             PLATFORM: "neon-staging"
             region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
             RUNNER: [ self-hosted, us-east-2, x64 ]
-            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
           - DEFAULT_PG_VERSION: 16
             PLATFORM: "azure-staging"
             region_id: 'azure-eastus2'
             RUNNER: [ self-hosted, eastus2, x64 ]
-            IMAGE: neondatabase/build-tools:pinned
     env:
       TEST_PG_BENCH_DURATIONS_MATRIX: "300"
       TEST_PG_BENCH_SCALES_MATRIX: "10,100"
@@ -86,7 +83,10 @@ jobs:
 
     runs-on: ${{ matrix.RUNNER }}
     container:
-      image: ${{ matrix.IMAGE }}
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
@@ -164,6 +164,10 @@ jobs:
 
   replication-tests:
     if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
     env:
       POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
       DEFAULT_PG_VERSION: 16
@@ -174,12 +178,21 @@ jobs:
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
     - uses: actions/checkout@v4
 
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
 
     - name: Download Neon artifact
       uses: ./.github/actions/download
@@ -267,7 +280,7 @@ jobs:
         region_id_default=${{ env.DEFAULT_REGION_ID }}
         runner_default='["self-hosted", "us-east-2", "x64"]'
         runner_azure='["self-hosted", "eastus2", "x64"]'
-        image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
+        image_default="neondatabase/build-tools:pinned"
         matrix='{
           "pg_version" : [
             16
@@ -344,7 +357,7 @@ jobs:
     permissions:
       contents: write
       statuses: write
-      id-token: write # Required for OIDC authentication in azure runners
+      id-token: write # aws-actions/configure-aws-credentials
 
     strategy:
       fail-fast: false
@@ -371,7 +384,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
 
-    - name: Configure AWS credentials # necessary on Azure runners
+    - name: Configure AWS credentials
       uses: aws-actions/configure-aws-credentials@v4
       with:
         aws-region: eu-central-1
@@ -492,17 +505,15 @@ jobs:
     permissions:
       contents: write
       statuses: write
-      id-token: write # Required for OIDC authentication in azure runners
+      id-token: write # aws-actions/configure-aws-credentials
     strategy:
       fail-fast: false
       matrix:
         include:
           - PLATFORM: "neonvm-captest-pgvector"
             RUNNER: [ self-hosted, us-east-2, x64 ]
-            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
           - PLATFORM: "azure-captest-pgvector"
             RUNNER: [ self-hosted, eastus2, x64 ]
-            IMAGE: neondatabase/build-tools:pinned
 
     env:
       TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
@@ -511,13 +522,16 @@ jobs:
       DEFAULT_PG_VERSION: 16
       TEST_OUTPUT: /tmp/test_output
       BUILD_TYPE: remote
-      LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
+
       SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
       PLATFORM: ${{ matrix.PLATFORM }}
 
     runs-on: ${{ matrix.RUNNER }}
     container:
-      image: ${{ matrix.IMAGE }}
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
@@ -527,17 +541,26 @@ jobs:
     # instead of using Neon artifacts containing pgbench
     - name: Install postgresql-16 where pytest expects it
       run: |
+        # Just to make it easier to test things locally on macOS (with arm64)
+        arch=$(uname -m | sed 's/x86_64/amd64/g' | sed 's/aarch64/arm64/g')
+
         cd /home/nonroot
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb 
-        dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-17/libpq5_17.0-1.pgdg110+1_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb"
+        wget -q "https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110+2_${arch}.deb"
+        dpkg -x libpq5_17.0-1.pgdg110+1_${arch}.deb pg
+        dpkg -x postgresql-16_16.4-1.pgdg110+2_${arch}.deb pg
+        dpkg -x postgresql-client-16_16.4-1.pgdg110+2_${arch}.deb pg
+
         mkdir -p /tmp/neon/pg_install/v16/bin
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench  
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql  
-        ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib 
+        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench
+        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql    /tmp/neon/pg_install/v16/bin/psql
+        ln -s /home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu     /tmp/neon/pg_install/v16/lib
+
+        LD_LIBRARY_PATH="/home/nonroot/pg/usr/lib/$(uname -m)-linux-gnu:${LD_LIBRARY_PATH:-}"
+        export LD_LIBRARY_PATH
+        echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" >> ${GITHUB_ENV}
+
         /tmp/neon/pg_install/v16/bin/pgbench --version
         /tmp/neon/pg_install/v16/bin/psql --version
 
@@ -559,7 +582,7 @@ jobs:
 
         echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
 
-    - name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
+    - name: Configure AWS credentials
       uses: aws-actions/configure-aws-credentials@v4
       with:
         aws-region: eu-central-1
@@ -620,6 +643,10 @@ jobs:
     # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
     # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
     if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
     needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]
 
     strategy:
@@ -638,12 +665,22 @@ jobs:
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
     - uses: actions/checkout@v4
 
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
     - name: Download Neon artifact
       uses: ./.github/actions/download
       with:
@@ -714,6 +751,10 @@ jobs:
     #
     # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
     if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
     needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]
 
     strategy:
@@ -731,12 +772,22 @@ jobs:
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
     - uses: actions/checkout@v4
 
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
     - name: Download Neon artifact
       uses: ./.github/actions/download
       with:
@@ -806,6 +857,10 @@ jobs:
 
   user-examples-compare:
     if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
     needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]
 
     strategy:
@@ -822,12 +877,22 @@ jobs:
 
     runs-on: [ self-hosted, us-east-2, x64 ]
     container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
+      image: neondatabase/build-tools:pinned
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
       options: --init
 
     steps:
     - uses: actions/checkout@v4
 
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
     - name: Download Neon artifact
       uses: ./.github/actions/download
       with:
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 81a9fd99ae..ba5d139553 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1190,10 +1190,9 @@ jobs:
 
               files_to_promote+=("s3://${BUCKET}/${s3_key}")
 
-              # TODO Add v17
-              for pg_version in v14 v15 v16; do
+              for pg_version in v14 v15 v16 v17; do
                 # We run less tests for debug builds, so we don't need to promote them
-                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v16" ] ; }; then
+                if [ "${build_type}" == "debug" ] && { [ "${arch}" == "ARM64" ] || [ "${pg_version}" != "v17" ] ; }; then
                   continue
                 fi
 
diff --git a/Cargo.lock b/Cargo.lock
index d0702e09d4..2bd828367c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1321,6 +1321,7 @@ dependencies = [
  "clap",
  "comfy-table",
  "compute_api",
+ "futures",
  "humantime",
  "humantime-serde",
  "hyper 0.14.30",
@@ -4296,6 +4297,7 @@ dependencies = [
  "camino-tempfile",
  "chrono",
  "clap",
+ "compute_api",
  "consumption_metrics",
  "dashmap",
  "ecdsa 0.16.9",
diff --git a/Dockerfile.build-tools b/Dockerfile.build-tools
index c4209c7a12..d8bcacf228 100644
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -13,6 +13,9 @@ RUN useradd -ms /bin/bash nonroot -b /home
 SHELL ["/bin/bash", "-c"]
 
 # System deps
+#
+# 'gdb' is included so that we get backtraces of core dumps produced in
+# regression tests
 RUN set -e \
     && apt update \
     && apt install -y \
@@ -24,6 +27,7 @@ RUN set -e \
         cmake \
         curl \
         flex \
+        gdb \
         git \
         gnupg \
         gzip \
diff --git a/compute/etc/neon_collector.yml b/compute/etc/neon_collector.yml
index 29be0958dd..acb17d3cc0 100644
--- a/compute/etc/neon_collector.yml
+++ b/compute/etc/neon_collector.yml
@@ -195,7 +195,7 @@ metrics:
       -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
       -- temporary snapshot files are renamed to the actual snapshot files after they are
       -- completely built. We only WAL-log the completely built snapshot files.
-      (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
+      (SELECT COUNT(*) FROM pg_ls_dir('pg_logical/snapshots') AS name WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
 
 # In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
 # It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
@@ -244,4 +244,3 @@ metrics:
     SELECT slot_name,
            CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
     FROM pg_replication_slots;
-
diff --git a/compute/vm-image-spec.yaml b/compute/vm-image-spec.yaml
index 0af44745e5..50fcd62e4f 100644
--- a/compute/vm-image-spec.yaml
+++ b/compute/vm-image-spec.yaml
@@ -11,6 +11,10 @@ commands:
     user: root
     sysvInitAction: sysinit
     shell: 'chmod 711 /neonvm/bin/resize-swap'
+  - name: chmod-set-disk-quota
+    user: root
+    sysvInitAction: sysinit
+    shell: 'chmod 711 /neonvm/bin/set-disk-quota'
   - name: pgbouncer
     user: postgres
     sysvInitAction: respawn
@@ -30,11 +34,12 @@ commands:
 shutdownHook: |
   su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
 files:
-  - filename: compute_ctl-resize-swap
+  - filename: compute_ctl-sudoers
     content: |
       # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
-      # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
+      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
+      # regardless of hostname (ALL)
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota
   - filename: cgconfig.conf
     content: |
       # Configuration for cgroups in VM compute nodes
@@ -100,7 +105,7 @@ merge: |
       && apt install --no-install-recommends -y \
              sudo \
       && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-  COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
+  COPY compute_ctl-sudoers /etc/sudoers.d/compute_ctl-sudoers
 
   COPY cgconfig.conf /etc/cgconfig.conf
 
diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs
index 9499a7186e..b10638c454 100644
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -44,6 +44,7 @@ use std::{thread, time::Duration};
 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
+use compute_tools::disk_quota::set_disk_quota;
 use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
@@ -151,6 +152,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
     let spec_json = matches.get_one::<String>("spec");
     let spec_path = matches.get_one::<String>("spec-path");
     let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
+    let set_disk_quota_for_fs = matches.get_one::<String>("set-disk-quota-for-fs");
 
     Ok(ProcessCliResult {
         connstr,
@@ -161,6 +163,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
         spec_json,
         spec_path,
         resize_swap_on_bind,
+        set_disk_quota_for_fs,
     })
 }
 
@@ -173,6 +176,7 @@ struct ProcessCliResult<'clap> {
     spec_json: Option<&'clap String>,
     spec_path: Option<&'clap String>,
     resize_swap_on_bind: bool,
+    set_disk_quota_for_fs: Option<&'clap String>,
 }
 
 fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
@@ -293,6 +297,7 @@ fn wait_spec(
         pgbin,
         ext_remote_storage,
         resize_swap_on_bind,
+        set_disk_quota_for_fs,
         http_port,
         ..
     }: ProcessCliResult,
@@ -373,6 +378,7 @@ fn wait_spec(
         compute,
         http_port,
         resize_swap_on_bind,
+        set_disk_quota_for_fs: set_disk_quota_for_fs.cloned(),
     })
 }
 
@@ -381,6 +387,7 @@ struct WaitSpecResult {
     // passed through from ProcessCliResult
     http_port: u16,
     resize_swap_on_bind: bool,
+    set_disk_quota_for_fs: Option<String>,
 }
 
 fn start_postgres(
@@ -390,6 +397,7 @@ fn start_postgres(
         compute,
         http_port,
         resize_swap_on_bind,
+        set_disk_quota_for_fs,
     }: WaitSpecResult,
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
     // We got all we need, update the state.
@@ -403,6 +411,7 @@ fn start_postgres(
     );
     // before we release the mutex, fetch the swap size (if any) for later.
     let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
+    let disk_quota_bytes = state.pspec.as_ref().unwrap().spec.disk_quota_bytes;
     drop(state);
 
     // Launch remaining service threads
@@ -422,8 +431,8 @@ fn start_postgres(
         // OOM-killed during startup because swap wasn't available yet.
         match resize_swap(size_bytes) {
             Ok(()) => {
-                let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_gib, "resized swap");
+                let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
+                info!(%size_bytes, %size_mib, "resized swap");
             }
             Err(err) => {
                 let err = err.context("failed to resize swap");
@@ -432,10 +441,29 @@ fn start_postgres(
                 // Mark compute startup as failed; don't try to start postgres, and report this
                 // error to the control plane when it next asks.
                 prestartup_failed = true;
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{err:?}"));
-                state.status = ComputeStatus::Failed;
-                compute.state_changed.notify_all();
+                compute.set_failed_status(err);
+                delay_exit = true;
+            }
+        }
+    }
+
+    // Set disk quota if the compute spec says so
+    if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
+        (disk_quota_bytes, set_disk_quota_for_fs)
+    {
+        match set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) {
+            Ok(()) => {
+                let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
+                info!(%disk_quota_bytes, %size_mib, "set disk quota");
+            }
+            Err(err) => {
+                let err = err.context("failed to set disk quota");
+                error!("{err:#}");
+
+                // Mark compute startup as failed; don't try to start postgres, and report this
+                // error to the control plane when it next asks.
+                prestartup_failed = true;
+                compute.set_failed_status(err);
                 delay_exit = true;
             }
         }
@@ -450,16 +478,7 @@ fn start_postgres(
             Ok(pg) => Some(pg),
             Err(err) => {
                 error!("could not start the compute node: {:#}", err);
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{:?}", err));
-                state.status = ComputeStatus::Failed;
-                // Notify others that Postgres failed to start. In case of configuring the
-                // empty compute, it's likely that API handler is still waiting for compute
-                // state change. With this we will notify it that compute is in Failed state,
-                // so control plane will know about it earlier and record proper error instead
-                // of timeout.
-                compute.state_changed.notify_all();
-                drop(state); // unlock
+                compute.set_failed_status(err);
                 delay_exit = true;
                 None
             }
@@ -750,6 +769,11 @@ fn cli() -> clap::Command {
                 .long("resize-swap-on-bind")
                 .action(clap::ArgAction::SetTrue),
         )
+        .arg(
+            Arg::new("set-disk-quota-for-fs")
+                .long("set-disk-quota-for-fs")
+                .value_name("SET_DISK_QUOTA_FOR_FS")
+        )
 }
 
 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs
index 1f47bb58a3..147eb2a161 100644
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -10,6 +10,7 @@ use std::sync::atomic::AtomicU32;
 use std::sync::atomic::Ordering;
 use std::sync::{Condvar, Mutex, RwLock};
 use std::thread;
+use std::time::Duration;
 use std::time::Instant;
 
 use anyhow::{Context, Result};
@@ -305,6 +306,13 @@ impl ComputeNode {
         self.state_changed.notify_all();
     }
 
+    pub fn set_failed_status(&self, err: anyhow::Error) {
+        let mut state = self.state.lock().unwrap();
+        state.error = Some(format!("{err:?}"));
+        state.status = ComputeStatus::Failed;
+        self.state_changed.notify_all();
+    }
+
     pub fn get_status(&self) -> ComputeStatus {
         self.state.lock().unwrap().status
     }
@@ -710,7 +718,7 @@ impl ComputeNode {
         info!("running initdb");
         let initdb_bin = Path::new(&self.pgbin).parent().unwrap().join("initdb");
         Command::new(initdb_bin)
-            .args(["-D", pgdata])
+            .args(["--pgdata", pgdata])
             .output()
             .expect("cannot start initdb process");
 
@@ -1123,6 +1131,9 @@ impl ComputeNode {
     //
     // Use that as a default location and pattern, except macos where core dumps are written
     // to /cores/ directory by default.
+    //
+    // With default Linux settings, the core dump file is called just "core", so check for
+    // that too.
     pub fn check_for_core_dumps(&self) -> Result<()> {
         let core_dump_dir = match std::env::consts::OS {
             "macos" => Path::new("/cores/"),
@@ -1134,8 +1145,17 @@ impl ComputeNode {
         let files = fs::read_dir(core_dump_dir)?;
         let cores = files.filter_map(|entry| {
             let entry = entry.ok()?;
-            let _ = entry.file_name().to_str()?.strip_prefix("core.")?;
-            Some(entry.path())
+
+            let is_core_dump = match entry.file_name().to_str()? {
+                n if n.starts_with("core.") => true,
+                "core" => true,
+                _ => false,
+            };
+            if is_core_dump {
+                Some(entry.path())
+            } else {
+                None
+            }
         });
 
         // Print backtrace for each core dump
@@ -1386,6 +1406,36 @@ LIMIT 100",
         }
         Ok(remote_ext_metrics)
     }
+
+    /// Waits until current thread receives a state changed notification and
+    /// the pageserver connection strings has changed.
+    ///
+    /// The operation will time out after a specified duration.
+    pub fn wait_timeout_while_pageserver_connstr_unchanged(&self, duration: Duration) {
+        let state = self.state.lock().unwrap();
+        let old_pageserver_connstr = state
+            .pspec
+            .as_ref()
+            .expect("spec must be set")
+            .pageserver_connstr
+            .clone();
+        let mut unchanged = true;
+        let _ = self
+            .state_changed
+            .wait_timeout_while(state, duration, |s| {
+                let pageserver_connstr = &s
+                    .pspec
+                    .as_ref()
+                    .expect("spec must be set")
+                    .pageserver_connstr;
+                unchanged = pageserver_connstr == &old_pageserver_connstr;
+                unchanged
+            })
+            .unwrap();
+        if !unchanged {
+            info!("Pageserver config changed");
+        }
+    }
 }
 
 pub fn forward_termination_signal() {
diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs
index 274a221ac7..7bd0e4938d 100644
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -11,9 +11,17 @@ use crate::compute::ComputeNode;
 fn configurator_main_loop(compute: &Arc<ComputeNode>) {
     info!("waiting for reconfiguration requests");
     loop {
-        let state = compute.state.lock().unwrap();
-        let mut state = compute.state_changed.wait(state).unwrap();
+        let mut state = compute.state.lock().unwrap();
 
+        // We have to re-check the status after re-acquiring the lock because it could be that
+        // the status has changed while we were waiting for the lock, and we might not need to
+        // wait on the condition variable. Otherwise, we might end up in some soft-/deadlock, i.e.
+        // we are waiting for a condition variable that will never be signaled.
+        if state.status != ComputeStatus::ConfigurationPending {
+            state = compute.state_changed.wait(state).unwrap();
+        }
+
+        // Re-check the status after waking up
         if state.status == ComputeStatus::ConfigurationPending {
             info!("got configuration request");
             state.status = ComputeStatus::Configuration;
diff --git a/compute_tools/src/disk_quota.rs b/compute_tools/src/disk_quota.rs
new file mode 100644
index 0000000000..e838c5b9fd
--- /dev/null
+++ b/compute_tools/src/disk_quota.rs
@@ -0,0 +1,25 @@
+use anyhow::Context;
+
+pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";
+
+/// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
+/// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
+pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
+    let size_kb = size_bytes / 1024;
+    // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
+    let child_result = std::process::Command::new("/usr/bin/sudo")
+        .arg(DISK_QUOTA_BIN)
+        .arg(size_kb.to_string())
+        .arg(fs_mountpoint)
+        .spawn();
+
+    child_result
+        .context("spawn() failed")
+        .and_then(|mut child| child.wait().context("wait() failed"))
+        .and_then(|status| match status.success() {
+            true => Ok(()),
+            false => Err(anyhow::anyhow!("process exited with {status}")),
+        })
+        // wrap any prior error with the overall context that we couldn't run the command
+        .with_context(|| format!("could not run `/usr/bin/sudo {DISK_QUOTA_BIN}`"))
+}
diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs
index c402d63305..c5b4ca632c 100644
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -10,6 +10,7 @@ pub mod http;
 pub mod logger;
 pub mod catalog;
 pub mod compute;
+pub mod disk_quota;
 pub mod extension_server;
 pub mod lsn_lease;
 mod migration;
diff --git a/compute_tools/src/lsn_lease.rs b/compute_tools/src/lsn_lease.rs
index 7e5917c55f..3061d387a5 100644
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -57,10 +57,10 @@ fn lsn_lease_bg_task(
             .max(valid_duration / 2);
 
         info!(
-            "Succeeded, sleeping for {} seconds",
+            "Request succeeded, sleeping for {} seconds",
             sleep_duration.as_secs()
         );
-        thread::sleep(sleep_duration);
+        compute.wait_timeout_while_pageserver_connstr_unchanged(sleep_duration);
     }
 }
 
@@ -89,10 +89,7 @@ fn acquire_lsn_lease_with_retry(
                 .map(|connstr| {
                     let mut config = postgres::Config::from_str(connstr).expect("Invalid connstr");
                     if let Some(storage_auth_token) = &spec.storage_auth_token {
-                        info!("Got storage auth token from spec file");
                         config.password(storage_auth_token.clone());
-                    } else {
-                        info!("Storage auth token not set");
                     }
                     config
                 })
@@ -108,9 +105,11 @@ fn acquire_lsn_lease_with_retry(
                 bail!("Permanent error: lease could not be obtained, LSN is behind the GC cutoff");
             }
             Err(e) => {
-                warn!("Failed to acquire lsn lease: {e} (attempt {attempts}");
+                warn!("Failed to acquire lsn lease: {e} (attempt {attempts})");
 
-                thread::sleep(Duration::from_millis(retry_period_ms as u64));
+                compute.wait_timeout_while_pageserver_connstr_unchanged(Duration::from_millis(
+                    retry_period_ms as u64,
+                ));
                 retry_period_ms *= 1.5;
                 retry_period_ms = retry_period_ms.min(MAX_RETRY_PERIOD_MS);
             }
diff --git a/control_plane/Cargo.toml b/control_plane/Cargo.toml
index df87c181bf..355eca0fe5 100644
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -9,6 +9,7 @@ anyhow.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
+futures.workspace = true
 humantime.workspace = true
 nix.workspace = true
 once_cell.workspace = true
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 92f609761a..624936620d 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -6,7 +6,7 @@
 //! rely on `neon_local` to set up the environment for each test.
 //!
 use anyhow::{anyhow, bail, Context, Result};
-use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
+use clap::Parser;
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
 use control_plane::local_env::{
@@ -56,10 +56,627 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);
 
-const DEFAULT_PG_VERSION: &str = "16";
+const DEFAULT_PG_VERSION: u32 = 16;
 
 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
 
+#[derive(clap::Parser)]
+#[command(version = GIT_VERSION, about, name = "Neon CLI")]
+struct Cli {
+    #[command(subcommand)]
+    command: NeonLocalCmd,
+}
+
+#[derive(clap::Subcommand)]
+enum NeonLocalCmd {
+    Init(InitCmdArgs),
+
+    #[command(subcommand)]
+    Tenant(TenantCmd),
+    #[command(subcommand)]
+    Timeline(TimelineCmd),
+    #[command(subcommand)]
+    Pageserver(PageserverCmd),
+    #[command(subcommand)]
+    #[clap(alias = "storage_controller")]
+    StorageController(StorageControllerCmd),
+    #[command(subcommand)]
+    #[clap(alias = "storage_broker")]
+    StorageBroker(StorageBrokerCmd),
+    #[command(subcommand)]
+    Safekeeper(SafekeeperCmd),
+    #[command(subcommand)]
+    Endpoint(EndpointCmd),
+    #[command(subcommand)]
+    Mappings(MappingsCmd),
+
+    Start(StartCmdArgs),
+    Stop(StopCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Initialize a new Neon repository, preparing configs for services to start with")]
+struct InitCmdArgs {
+    #[clap(long, help("How many pageservers to create (default 1)"))]
+    num_pageservers: Option<u16>,
+
+    #[clap(long)]
+    config: Option<PathBuf>,
+
+    #[clap(long, help("Force initialization even if the repository is not empty"))]
+    #[arg(value_parser)]
+    #[clap(default_value = "must-not-exist")]
+    force: InitForceMode,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start pageserver and safekeepers")]
+struct StartCmdArgs {
+    #[clap(long = "start-timeout", default_value = "10s")]
+    timeout: humantime::Duration,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Stop pageserver and safekeepers")]
+struct StopCmdArgs {
+    #[arg(value_enum)]
+    #[clap(long, default_value_t = StopMode::Fast)]
+    mode: StopMode,
+}
+
+#[derive(Clone, Copy, clap::ValueEnum)]
+enum StopMode {
+    Fast,
+    Immediate,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage tenants")]
+enum TenantCmd {
+    List,
+    Create(TenantCreateCmdArgs),
+    SetDefault(TenantSetDefaultCmdArgs),
+    Config(TenantConfigCmdArgs),
+    Import(TenantImportCmdArgs),
+}
+
+#[derive(clap::Args)]
+struct TenantCreateCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(
+        long,
+        help = "Use a specific timeline id when creating a tenant and its initial timeline"
+    )]
+    timeline_id: Option<TimelineId>,
+
+    #[clap(short = 'c')]
+    config: Vec<String>,
+
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
+    #[clap(long, help = "Postgres version to use for the initial timeline")]
+    pg_version: u32,
+
+    #[clap(
+        long,
+        help = "Use this tenant in future CLI commands where tenant_id is needed, but not specified"
+    )]
+    set_default: bool,
+
+    #[clap(long, help = "Number of shards in the new tenant")]
+    #[arg(default_value_t = 0)]
+    shard_count: u8,
+    #[clap(long, help = "Sharding stripe size in pages")]
+    shard_stripe_size: Option<u32>,
+
+    #[clap(long, help = "Placement policy shards in this tenant")]
+    #[arg(value_parser = parse_placement_policy)]
+    placement_policy: Option<PlacementPolicy>,
+}
+
+fn parse_placement_policy(s: &str) -> anyhow::Result<PlacementPolicy> {
+    Ok(serde_json::from_str::<PlacementPolicy>(s)?)
+}
+
+#[derive(clap::Args)]
+#[clap(
+    about = "Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"
+)]
+struct TenantSetDefaultCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: TenantId,
+}
+
+#[derive(clap::Args)]
+struct TenantConfigCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(short = 'c')]
+    config: Vec<String>,
+}
+
+#[derive(clap::Args)]
+#[clap(
+    about = "Import a tenant that is present in remote storage, and create branches for its timelines"
+)]
+struct TenantImportCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: TenantId,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage timelines")]
+enum TimelineCmd {
+    List(TimelineListCmdArgs),
+    Branch(TimelineBranchCmdArgs),
+    Create(TimelineCreateCmdArgs),
+    Import(TimelineImportCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "List all timelines available to this pageserver")]
+struct TimelineListCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_shard_id: Option<TenantShardId>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Create a new timeline, branching off from another timeline")]
+struct TimelineBranchCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(long, help = "New timeline's ID")]
+    timeline_id: Option<TimelineId>,
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
+    branch_name: String,
+
+    #[clap(
+        long,
+        help = "Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name."
+    )]
+    ancestor_branch_name: Option<String>,
+
+    #[clap(
+        long,
+        help = "When using another timeline as base, use a specific Lsn in it instead of the latest one"
+    )]
+    ancestor_start_lsn: Option<Lsn>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Create a new blank timeline")]
+struct TimelineCreateCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(long, help = "New timeline's ID")]
+    timeline_id: Option<TimelineId>,
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
+    branch_name: String,
+
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
+    #[clap(long, help = "Postgres version")]
+    pg_version: u32,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Import timeline from a basebackup directory")]
+struct TimelineImportCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(long, help = "New timeline's ID")]
+    timeline_id: TimelineId,
+
+    #[clap(long, help = "Human-readable alias for the new timeline")]
+    branch_name: String,
+
+    #[clap(long, help = "Basebackup tarfile to import")]
+    base_tarfile: PathBuf,
+
+    #[clap(long, help = "Lsn the basebackup starts at")]
+    base_lsn: Lsn,
+
+    #[clap(long, help = "Wal to add after base")]
+    wal_tarfile: Option<PathBuf>,
+
+    #[clap(long, help = "Lsn the basebackup ends at")]
+    end_lsn: Option<Lsn>,
+
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
+    #[clap(long, help = "Postgres version of the backup being imported")]
+    pg_version: u32,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage pageservers")]
+enum PageserverCmd {
+    Status(PageserverStatusCmdArgs),
+    Start(PageserverStartCmdArgs),
+    Stop(PageserverStopCmdArgs),
+    Restart(PageserverRestartCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Show status of a local pageserver")]
+struct PageserverStatusCmdArgs {
+    #[clap(long = "id", help = "pageserver id")]
+    pageserver_id: Option<NodeId>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start local pageserver")]
+struct PageserverStartCmdArgs {
+    #[clap(long = "id", help = "pageserver id")]
+    pageserver_id: Option<NodeId>,
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Stop local pageserver")]
+struct PageserverStopCmdArgs {
+    #[clap(long = "id", help = "pageserver id")]
+    pageserver_id: Option<NodeId>,
+
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
+    #[arg(value_enum, default_value = "fast")]
+    stop_mode: StopMode,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Restart local pageserver")]
+struct PageserverRestartCmdArgs {
+    #[clap(long = "id", help = "pageserver id")]
+    pageserver_id: Option<NodeId>,
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage storage controller")]
+enum StorageControllerCmd {
+    Start(StorageControllerStartCmdArgs),
+    Stop(StorageControllerStopCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start storage controller")]
+struct StorageControllerStartCmdArgs {
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+
+    #[clap(
+        long,
+        help = "Identifier used to distinguish storage controller instances"
+    )]
+    #[arg(default_value_t = 1)]
+    instance_id: u8,
+
+    #[clap(
+        long,
+        help = "Base port for the storage controller instance idenfified by instance-id (defaults to pageserver cplane api)"
+    )]
+    base_port: Option<u16>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Stop storage controller")]
+struct StorageControllerStopCmdArgs {
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
+    #[arg(value_enum, default_value = "fast")]
+    stop_mode: StopMode,
+
+    #[clap(
+        long,
+        help = "Identifier used to distinguish storage controller instances"
+    )]
+    #[arg(default_value_t = 1)]
+    instance_id: u8,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage storage broker")]
+enum StorageBrokerCmd {
+    Start(StorageBrokerStartCmdArgs),
+    Stop(StorageBrokerStopCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start broker")]
+struct StorageBrokerStartCmdArgs {
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "stop broker")]
+struct StorageBrokerStopCmdArgs {
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
+    #[arg(value_enum, default_value = "fast")]
+    stop_mode: StopMode,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage safekeepers")]
+enum SafekeeperCmd {
+    Start(SafekeeperStartCmdArgs),
+    Stop(SafekeeperStopCmdArgs),
+    Restart(SafekeeperRestartCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start local safekeeper")]
+struct SafekeeperStartCmdArgs {
+    #[clap(help = "safekeeper id")]
+    #[arg(default_value_t = NodeId(1))]
+    id: NodeId,
+
+    #[clap(
+        short = 'e',
+        long = "safekeeper-extra-opt",
+        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
+    )]
+    extra_opt: Vec<String>,
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Stop local safekeeper")]
+struct SafekeeperStopCmdArgs {
+    #[clap(help = "safekeeper id")]
+    #[arg(default_value_t = NodeId(1))]
+    id: NodeId,
+
+    #[arg(value_enum, default_value = "fast")]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
+    stop_mode: StopMode,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Restart local safekeeper")]
+struct SafekeeperRestartCmdArgs {
+    #[clap(help = "safekeeper id")]
+    #[arg(default_value_t = NodeId(1))]
+    id: NodeId,
+
+    #[arg(value_enum, default_value = "fast")]
+    #[clap(
+        short = 'm',
+        help = "If 'immediate', don't flush repository data at shutdown"
+    )]
+    stop_mode: StopMode,
+
+    #[clap(
+        short = 'e',
+        long = "safekeeper-extra-opt",
+        help = "Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo"
+    )]
+    extra_opt: Vec<String>,
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage Postgres instances")]
+enum EndpointCmd {
+    List(EndpointListCmdArgs),
+    Create(EndpointCreateCmdArgs),
+    Start(EndpointStartCmdArgs),
+    Reconfigure(EndpointReconfigureCmdArgs),
+    Stop(EndpointStopCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "List endpoints")]
+struct EndpointListCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_shard_id: Option<TenantShardId>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Create a compute endpoint")]
+struct EndpointCreateCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(help = "Postgres endpoint id")]
+    endpoint_id: Option<String>,
+    #[clap(long, help = "Name of the branch the endpoint will run on")]
+    branch_name: Option<String>,
+    #[clap(
+        long,
+        help = "Specify Lsn on the timeline to start from. By default, end of the timeline would be used"
+    )]
+    lsn: Option<Lsn>,
+    #[clap(long)]
+    pg_port: Option<u16>,
+    #[clap(long)]
+    http_port: Option<u16>,
+    #[clap(long = "pageserver-id")]
+    endpoint_pageserver_id: Option<NodeId>,
+
+    #[clap(
+        long,
+        help = "Don't do basebackup, create endpoint directory with only config files",
+        action = clap::ArgAction::Set,
+        default_value_t = false
+    )]
+    config_only: bool,
+
+    #[arg(default_value_t = DEFAULT_PG_VERSION)]
+    #[clap(long, help = "Postgres version")]
+    pg_version: u32,
+
+    #[clap(
+        long,
+        help = "If set, the node will be a hot replica on the specified timeline",
+        action = clap::ArgAction::Set,
+        default_value_t = false
+    )]
+    hot_standby: bool,
+
+    #[clap(long, help = "If set, will set up the catalog for neon_superuser")]
+    update_catalog: bool,
+
+    #[clap(
+        long,
+        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
+    )]
+    allow_multiple: bool,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Start postgres. If the endpoint doesn't exist yet, it is created.")]
+struct EndpointStartCmdArgs {
+    #[clap(help = "Postgres endpoint id")]
+    endpoint_id: String,
+    #[clap(long = "pageserver-id")]
+    endpoint_pageserver_id: Option<NodeId>,
+
+    #[clap(long)]
+    safekeepers: Option<String>,
+
+    #[clap(
+        long,
+        help = "Configure the remote extensions storage proxy gateway to request for extensions."
+    )]
+    remote_ext_config: Option<String>,
+
+    #[clap(
+        long,
+        help = "If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`"
+    )]
+    create_test_user: bool,
+
+    #[clap(
+        long,
+        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
+    )]
+    allow_multiple: bool,
+
+    #[clap(short = 't', long, help = "timeout until we fail the command")]
+    #[arg(default_value = "10s")]
+    start_timeout: humantime::Duration,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Reconfigure an endpoint")]
+struct EndpointReconfigureCmdArgs {
+    #[clap(
+        long = "tenant-id",
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: Option<TenantId>,
+
+    #[clap(help = "Postgres endpoint id")]
+    endpoint_id: String,
+    #[clap(long = "pageserver-id")]
+    endpoint_pageserver_id: Option<NodeId>,
+
+    #[clap(long)]
+    safekeepers: Option<String>,
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Stop an endpoint")]
+struct EndpointStopCmdArgs {
+    #[clap(help = "Postgres endpoint id")]
+    endpoint_id: String,
+
+    #[clap(
+        long,
+        help = "Also delete data directory (now optional, should be default in future)"
+    )]
+    destroy: bool,
+
+    #[clap(long, help = "Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")]
+    #[arg(value_parser(["smart", "fast", "immediate"]))]
+    #[arg(default_value = "fast")]
+    mode: String,
+}
+
+#[derive(clap::Subcommand)]
+#[clap(about = "Manage neon_local branch name mappings")]
+enum MappingsCmd {
+    Map(MappingsMapCmdArgs),
+}
+
+#[derive(clap::Args)]
+#[clap(about = "Create new mapping which cannot exist already")]
+struct MappingsMapCmdArgs {
+    #[clap(
+        long,
+        help = "Tenant id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    tenant_id: TenantId,
+    #[clap(
+        long,
+        help = "Timeline id. Represented as a hexadecimal string 32 symbols length"
+    )]
+    timeline_id: TimelineId,
+    #[clap(long, help = "Branch name to give to the timeline")]
+    branch_name: String,
+}
+
 ///
 /// Timelines tree element used as a value in the HashMap.
 ///
@@ -80,19 +697,13 @@ struct TimelineTreeEl {
 //   * Providing CLI api to the pageserver
 //   * TODO: export/import to/from usual postgres
 fn main() -> Result<()> {
-    let matches = cli().get_matches();
-
-    let (sub_name, sub_args) = match matches.subcommand() {
-        Some(subcommand_data) => subcommand_data,
-        None => bail!("no subcommand provided"),
-    };
+    let cli = Cli::parse();
 
     // Check for 'neon init' command first.
-    let subcommand_result = if sub_name == "init" {
-        handle_init(sub_args).map(|env| Some(Cow::Owned(env)))
+    let subcommand_result = if let NeonLocalCmd::Init(args) = cli.command {
+        handle_init(&args).map(|env| Some(Cow::Owned(env)))
     } else {
         // all other commands need an existing config
-
         let env = LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
         let original_env = env.clone();
         let env = Box::leak(Box::new(env));
@@ -101,19 +712,20 @@ fn main() -> Result<()> {
             .build()
             .unwrap();
 
-        let subcommand_result = match sub_name {
-            "tenant" => rt.block_on(handle_tenant(sub_args, env)),
-            "timeline" => rt.block_on(handle_timeline(sub_args, env)),
-            "start" => rt.block_on(handle_start_all(env, get_start_timeout(sub_args))),
-            "stop" => rt.block_on(handle_stop_all(sub_args, env)),
-            "pageserver" => rt.block_on(handle_pageserver(sub_args, env)),
-            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, env)),
-            "storage_broker" => rt.block_on(handle_storage_broker(sub_args, env)),
-            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, env)),
-            "endpoint" => rt.block_on(handle_endpoint(sub_args, env)),
-            "mappings" => handle_mappings(sub_args, env),
-            "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
-            _ => bail!("unexpected subcommand {sub_name}"),
+        let subcommand_result = match cli.command {
+            NeonLocalCmd::Init(_) => unreachable!("init was handled earlier already"),
+            NeonLocalCmd::Start(args) => rt.block_on(handle_start_all(&args, env)),
+            NeonLocalCmd::Stop(args) => rt.block_on(handle_stop_all(&args, env)),
+            NeonLocalCmd::Tenant(subcmd) => rt.block_on(handle_tenant(&subcmd, env)),
+            NeonLocalCmd::Timeline(subcmd) => rt.block_on(handle_timeline(&subcmd, env)),
+            NeonLocalCmd::Pageserver(subcmd) => rt.block_on(handle_pageserver(&subcmd, env)),
+            NeonLocalCmd::StorageController(subcmd) => {
+                rt.block_on(handle_storage_controller(&subcmd, env))
+            }
+            NeonLocalCmd::StorageBroker(subcmd) => rt.block_on(handle_storage_broker(&subcmd, env)),
+            NeonLocalCmd::Safekeeper(subcmd) => rt.block_on(handle_safekeeper(&subcmd, env)),
+            NeonLocalCmd::Endpoint(subcmd) => rt.block_on(handle_endpoint(&subcmd, env)),
+            NeonLocalCmd::Mappings(subcmd) => handle_mappings(&subcmd, env),
         };
 
         if &original_env != env {
@@ -263,10 +875,13 @@ async fn get_timeline_infos(
         .collect())
 }
 
-// Helper function to parse --tenant_id option, or get the default from config file
-fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<TenantId> {
-    if let Some(tenant_id_from_arguments) = parse_tenant_id(sub_match).transpose() {
-        tenant_id_from_arguments
+/// Helper function to get tenant id from an optional --tenant_id option or from the config file
+fn get_tenant_id(
+    tenant_id_arg: Option<TenantId>,
+    env: &local_env::LocalEnv,
+) -> anyhow::Result<TenantId> {
+    if let Some(tenant_id_from_arguments) = tenant_id_arg {
+        Ok(tenant_id_from_arguments)
     } else if let Some(default_id) = env.default_tenant_id {
         Ok(default_id)
     } else {
@@ -274,13 +889,14 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
     }
 }
 
-// Helper function to parse --tenant_id option, for commands that accept a shard suffix
+/// Helper function to get tenant-shard ID from an optional --tenant_id option or from the config file,
+/// for commands that accept a shard suffix
 fn get_tenant_shard_id(
-    sub_match: &ArgMatches,
+    tenant_shard_id_arg: Option<TenantShardId>,
     env: &local_env::LocalEnv,
 ) -> anyhow::Result<TenantShardId> {
-    if let Some(tenant_id_from_arguments) = parse_tenant_shard_id(sub_match).transpose() {
-        tenant_id_from_arguments
+    if let Some(tenant_id_from_arguments) = tenant_shard_id_arg {
+        Ok(tenant_id_from_arguments)
     } else if let Some(default_id) = env.default_tenant_id {
         Ok(TenantShardId::unsharded(default_id))
     } else {
@@ -288,41 +904,11 @@ fn get_tenant_shard_id(
     }
 }
 
-fn parse_tenant_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantId>> {
-    sub_match
-        .get_one::<String>("tenant-id")
-        .map(|tenant_id| TenantId::from_str(tenant_id))
-        .transpose()
-        .context("Failed to parse tenant id from the argument string")
-}
-
-fn parse_tenant_shard_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TenantShardId>> {
-    sub_match
-        .get_one::<String>("tenant-id")
-        .map(|id_str| TenantShardId::from_str(id_str))
-        .transpose()
-        .context("Failed to parse tenant shard id from the argument string")
-}
-
-fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId>> {
-    sub_match
-        .get_one::<String>("timeline-id")
-        .map(|timeline_id| TimelineId::from_str(timeline_id))
-        .transpose()
-        .context("Failed to parse timeline id from the argument string")
-}
-
-fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
-    let num_pageservers = init_match.get_one::<u16>("num-pageservers");
-
-    let force = init_match.get_one("force").expect("we set a default value");
-
+fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
     // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
-    let init_conf: NeonLocalInitConf = if let Some(config_path) =
-        init_match.get_one::<PathBuf>("config")
-    {
+    let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config {
         // User (likely the Python test suite) provided a description of the environment.
-        if num_pageservers.is_some() {
+        if args.num_pageservers.is_some() {
             bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
         }
         // load and parse the file
@@ -346,7 +932,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
                 http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
                 ..Default::default()
             }],
-            pageservers: (0..num_pageservers.copied().unwrap_or(1))
+            pageservers: (0..args.num_pageservers.unwrap_or(1))
                 .map(|i| {
                     let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
                     let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
@@ -369,7 +955,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
         }
     };
 
-    LocalEnv::init(init_conf, force)
+    LocalEnv::init(init_conf, &args.force)
         .context("materialize initial neon_local environment on disk")?;
     Ok(LocalEnv::load_config(&local_env::base_path())
         .expect("freshly written config should be loadable"))
@@ -387,19 +973,16 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
     PageServerNode::from_env(env, ps_conf)
 }
 
-async fn handle_tenant(
-    tenant_match: &ArgMatches,
-    env: &mut local_env::LocalEnv,
-) -> anyhow::Result<()> {
+async fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {
     let pageserver = get_default_pageserver(env);
-    match tenant_match.subcommand() {
-        Some(("list", _)) => {
+    match subcmd {
+        TenantCmd::List => {
             for t in pageserver.tenant_list().await? {
                 println!("{} {:?}", t.id, t.state);
             }
         }
-        Some(("import", import_match)) => {
-            let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
+        TenantCmd::Import(args) => {
+            let tenant_id = args.tenant_id;
 
             let storage_controller = StorageController::from_env(env);
             let create_response = storage_controller.tenant_import(tenant_id).await?;
@@ -446,31 +1029,14 @@ async fn handle_tenant(
                 env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
             }
         }
-        Some(("create", create_match)) => {
-            let tenant_conf: HashMap<_, _> = create_match
-                .get_many::<String>("config")
-                .map(|vals: clap::parser::ValuesRef<'_, String>| {
-                    vals.flat_map(|c| c.split_once(':')).collect()
-                })
-                .unwrap_or_default();
-
-            let shard_count: u8 = create_match
-                .get_one::<u8>("shard-count")
-                .cloned()
-                .unwrap_or(0);
-
-            let shard_stripe_size: Option<u32> =
-                create_match.get_one::<u32>("shard-stripe-size").cloned();
-
-            let placement_policy = match create_match.get_one::<String>("placement-policy") {
-                Some(s) if !s.is_empty() => serde_json::from_str::<PlacementPolicy>(s)?,
-                _ => PlacementPolicy::Attached(0),
-            };
+        TenantCmd::Create(args) => {
+            let tenant_conf: HashMap<_, _> =
+                args.config.iter().flat_map(|c| c.split_once(':')).collect();
 
             let tenant_conf = PageServerNode::parse_config(tenant_conf)?;
 
             // If tenant ID was not specified, generate one
-            let tenant_id = parse_tenant_id(create_match)?.unwrap_or_else(TenantId::generate);
+            let tenant_id = args.tenant_id.unwrap_or_else(TenantId::generate);
 
             // We must register the tenant with the storage controller, so
             // that when the pageserver restarts, it will be re-attached.
@@ -478,29 +1044,26 @@ async fn handle_tenant(
             storage_controller
                 .tenant_create(TenantCreateRequest {
                     // Note that ::unsharded here isn't actually because the tenant is unsharded, its because the
-                    // storage controller expecfs a shard-naive tenant_id in this attribute, and the TenantCreateRequest
-                    // type is used both in storage controller (for creating tenants) and in pageserver (for creating shards)
+                    // storage controller expects a shard-naive tenant_id in this attribute, and the TenantCreateRequest
+                    // type is used both in the storage controller (for creating tenants) and in the pageserver (for
+                    // creating shards)
                     new_tenant_id: TenantShardId::unsharded(tenant_id),
                     generation: None,
                     shard_parameters: ShardParameters {
-                        count: ShardCount::new(shard_count),
-                        stripe_size: shard_stripe_size
+                        count: ShardCount::new(args.shard_count),
+                        stripe_size: args
+                            .shard_stripe_size
                             .map(ShardStripeSize)
                             .unwrap_or(ShardParameters::DEFAULT_STRIPE_SIZE),
                     },
-                    placement_policy: Some(placement_policy),
+                    placement_policy: args.placement_policy.clone(),
                     config: tenant_conf,
                 })
                 .await?;
             println!("tenant {tenant_id} successfully created on the pageserver");
 
             // Create an initial timeline for the new tenant
-            let new_timeline_id =
-                parse_timeline_id(create_match)?.unwrap_or(TimelineId::generate());
-            let pg_version = create_match
-                .get_one::<u32>("pg-version")
-                .copied()
-                .context("Failed to parse postgres version from the argument string")?;
+            let new_timeline_id = args.timeline_id.unwrap_or(TimelineId::generate());
 
             // FIXME: passing None for ancestor_start_lsn is not kosher in a sharded world: we can't have
             // different shards picking different start lsns.  Maybe we have to teach storage controller
@@ -513,7 +1076,7 @@ async fn handle_tenant(
                         ancestor_timeline_id: None,
                         ancestor_start_lsn: None,
                         existing_initdb_timeline_id: None,
-                        pg_version: Some(pg_version),
+                        pg_version: Some(args.pg_version),
                     },
                 )
                 .await?;
@@ -526,23 +1089,19 @@ async fn handle_tenant(
 
             println!("Created an initial timeline '{new_timeline_id}' for tenant: {tenant_id}",);
 
-            if create_match.get_flag("set-default") {
+            if args.set_default {
                 println!("Setting tenant {tenant_id} as a default one");
                 env.default_tenant_id = Some(tenant_id);
             }
         }
-        Some(("set-default", set_default_match)) => {
-            let tenant_id =
-                parse_tenant_id(set_default_match)?.context("No tenant id specified")?;
-            println!("Setting tenant {tenant_id} as a default one");
-            env.default_tenant_id = Some(tenant_id);
+        TenantCmd::SetDefault(args) => {
+            println!("Setting tenant {} as a default one", args.tenant_id);
+            env.default_tenant_id = Some(args.tenant_id);
         }
-        Some(("config", create_match)) => {
-            let tenant_id = get_tenant_id(create_match, env)?;
-            let tenant_conf: HashMap<_, _> = create_match
-                .get_many::<String>("config")
-                .map(|vals| vals.flat_map(|c| c.split_once(':')).collect())
-                .unwrap_or_default();
+        TenantCmd::Config(args) => {
+            let tenant_id = get_tenant_id(args.tenant_id, env)?;
+            let tenant_conf: HashMap<_, _> =
+                args.config.iter().flat_map(|c| c.split_once(':')).collect();
 
             pageserver
                 .tenant_config(tenant_id, tenant_conf)
@@ -550,36 +1109,25 @@ async fn handle_tenant(
                 .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
             println!("tenant {tenant_id} successfully configured on the pageserver");
         }
-
-        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{}'", sub_name),
-        None => bail!("no tenant subcommand provided"),
     }
     Ok(())
 }
 
-async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
+async fn handle_timeline(cmd: &TimelineCmd, env: &mut local_env::LocalEnv) -> Result<()> {
     let pageserver = get_default_pageserver(env);
 
-    match timeline_match.subcommand() {
-        Some(("list", list_match)) => {
+    match cmd {
+        TimelineCmd::List(args) => {
             // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
             // where shard 0 is attached, and query there.
-            let tenant_shard_id = get_tenant_shard_id(list_match, env)?;
+            let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;
             let timelines = pageserver.timeline_list(&tenant_shard_id).await?;
             print_timelines_tree(timelines, env.timeline_name_mappings())?;
         }
-        Some(("create", create_match)) => {
-            let tenant_id = get_tenant_id(create_match, env)?;
-            let new_branch_name = create_match
-                .get_one::<String>("branch-name")
-                .ok_or_else(|| anyhow!("No branch name provided"))?;
-
-            let pg_version = create_match
-                .get_one::<u32>("pg-version")
-                .copied()
-                .context("Failed to parse postgres version from the argument string")?;
-
-            let new_timeline_id_opt = parse_timeline_id(create_match)?;
+        TimelineCmd::Create(args) => {
+            let tenant_id = get_tenant_id(args.tenant_id, env)?;
+            let new_branch_name = &args.branch_name;
+            let new_timeline_id_opt = args.timeline_id;
             let new_timeline_id = new_timeline_id_opt.unwrap_or(TimelineId::generate());
 
             let storage_controller = StorageController::from_env(env);
@@ -588,7 +1136,7 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                 ancestor_timeline_id: None,
                 existing_initdb_timeline_id: None,
                 ancestor_start_lsn: None,
-                pg_version: Some(pg_version),
+                pg_version: Some(args.pg_version),
             };
             let timeline_info = storage_controller
                 .tenant_timeline_create(tenant_id, create_req)
@@ -602,67 +1150,42 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                 timeline_info.timeline_id
             );
         }
-        Some(("import", import_match)) => {
-            let tenant_id = get_tenant_id(import_match, env)?;
-            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
-            let branch_name = import_match
-                .get_one::<String>("branch-name")
-                .ok_or_else(|| anyhow!("No branch name provided"))?;
+        TimelineCmd::Import(args) => {
+            let tenant_id = get_tenant_id(args.tenant_id, env)?;
+            let timeline_id = args.timeline_id;
+            let branch_name = &args.branch_name;
 
             // Parse base inputs
-            let base_tarfile = import_match
-                .get_one::<PathBuf>("base-tarfile")
-                .ok_or_else(|| anyhow!("No base-tarfile provided"))?
-                .to_owned();
-            let base_lsn = Lsn::from_str(
-                import_match
-                    .get_one::<String>("base-lsn")
-                    .ok_or_else(|| anyhow!("No base-lsn provided"))?,
-            )?;
-            let base = (base_lsn, base_tarfile);
+            let base = (args.base_lsn, args.base_tarfile.clone());
 
             // Parse pg_wal inputs
-            let wal_tarfile = import_match.get_one::<PathBuf>("wal-tarfile").cloned();
-            let end_lsn = import_match
-                .get_one::<String>("end-lsn")
-                .map(|s| Lsn::from_str(s).unwrap());
+            let wal_tarfile = args.wal_tarfile.clone();
+            let end_lsn = args.end_lsn;
             // TODO validate both or none are provided
             let pg_wal = end_lsn.zip(wal_tarfile);
 
-            let pg_version = import_match
-                .get_one::<u32>("pg-version")
-                .copied()
-                .context("Failed to parse postgres version from the argument string")?;
-
             println!("Importing timeline into pageserver ...");
             pageserver
-                .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
+                .timeline_import(tenant_id, timeline_id, base, pg_wal, args.pg_version)
                 .await?;
             env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?;
             println!("Done");
         }
-        Some(("branch", branch_match)) => {
-            let tenant_id = get_tenant_id(branch_match, env)?;
-            let new_timeline_id =
-                parse_timeline_id(branch_match)?.unwrap_or(TimelineId::generate());
-            let new_branch_name = branch_match
-                .get_one::<String>("branch-name")
-                .ok_or_else(|| anyhow!("No branch name provided"))?;
-            let ancestor_branch_name = branch_match
-                .get_one::<String>("ancestor-branch-name")
-                .map(|s| s.as_str())
-                .unwrap_or(DEFAULT_BRANCH_NAME);
+        TimelineCmd::Branch(args) => {
+            let tenant_id = get_tenant_id(args.tenant_id, env)?;
+            let new_timeline_id = args.timeline_id.unwrap_or(TimelineId::generate());
+            let new_branch_name = &args.branch_name;
+            let ancestor_branch_name = args
+                .ancestor_branch_name
+                .clone()
+                .unwrap_or(DEFAULT_BRANCH_NAME.to_owned());
             let ancestor_timeline_id = env
-                .get_branch_timeline_id(ancestor_branch_name, tenant_id)
+                .get_branch_timeline_id(&ancestor_branch_name, tenant_id)
                 .ok_or_else(|| {
                     anyhow!("Found no timeline id for branch name '{ancestor_branch_name}'")
                 })?;
 
-            let start_lsn = branch_match
-                .get_one::<String>("ancestor-start-lsn")
-                .map(|lsn_str| Lsn::from_str(lsn_str))
-                .transpose()
-                .context("Failed to parse ancestor start Lsn from the request")?;
+            let start_lsn = args.ancestor_start_lsn;
             let storage_controller = StorageController::from_env(env);
             let create_req = TimelineCreateRequest {
                 new_timeline_id,
@@ -684,25 +1207,19 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                 timeline_info.timeline_id
             );
         }
-        Some((sub_name, _)) => bail!("Unexpected tenant subcommand '{sub_name}'"),
-        None => bail!("no tenant subcommand provided"),
     }
 
     Ok(())
 }
 
-async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match ep_match.subcommand() {
-        Some(ep_subcommand_data) => ep_subcommand_data,
-        None => bail!("no endpoint subcommand provided"),
-    };
+async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Result<()> {
     let mut cplane = ComputeControlPlane::load(env.clone())?;
 
-    match sub_name {
-        "list" => {
+    match subcmd {
+        EndpointCmd::List(args) => {
             // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
             // where shard 0 is attached, and query there.
-            let tenant_shard_id = get_tenant_shard_id(sub_args, env)?;
+            let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;
             let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
                 .await
                 .unwrap_or_else(|e| {
@@ -766,52 +1283,29 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
 
             println!("{table}");
         }
-        "create" => {
-            let tenant_id = get_tenant_id(sub_args, env)?;
-            let branch_name = sub_args
-                .get_one::<String>("branch-name")
-                .map(|s| s.as_str())
-                .unwrap_or(DEFAULT_BRANCH_NAME);
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .map(String::to_string)
+        EndpointCmd::Create(args) => {
+            let tenant_id = get_tenant_id(args.tenant_id, env)?;
+            let branch_name = args
+                .branch_name
+                .clone()
+                .unwrap_or(DEFAULT_BRANCH_NAME.to_owned());
+            let endpoint_id = args
+                .endpoint_id
+                .clone()
                 .unwrap_or_else(|| format!("ep-{branch_name}"));
-            let update_catalog = sub_args
-                .get_one::<bool>("update-catalog")
-                .cloned()
-                .unwrap_or_default();
 
-            let lsn = sub_args
-                .get_one::<String>("lsn")
-                .map(|lsn_str| Lsn::from_str(lsn_str))
-                .transpose()
-                .context("Failed to parse Lsn from the request")?;
             let timeline_id = env
-                .get_branch_timeline_id(branch_name, tenant_id)
+                .get_branch_timeline_id(&branch_name, tenant_id)
                 .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;
 
-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
-            let pg_version = sub_args
-                .get_one::<u32>("pg-version")
-                .copied()
-                .context("Failed to parse postgres version from the argument string")?;
-
-            let hot_standby = sub_args
-                .get_one::<bool>("hot-standby")
-                .copied()
-                .unwrap_or(false);
-
-            let allow_multiple = sub_args.get_flag("allow-multiple");
-
-            let mode = match (lsn, hot_standby) {
+            let mode = match (args.lsn, args.hot_standby) {
                 (Some(lsn), false) => ComputeMode::Static(lsn),
                 (None, true) => ComputeMode::Replica,
                 (None, false) => ComputeMode::Primary,
                 (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
             };
 
-            match (mode, hot_standby) {
+            match (mode, args.hot_standby) {
                 (ComputeMode::Static(_), true) => {
                     bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
                 }
@@ -821,7 +1315,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                 _ => {}
             }
 
-            if !allow_multiple {
+            if !args.allow_multiple {
                 cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
             }
 
@@ -829,34 +1323,21 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                 &endpoint_id,
                 tenant_id,
                 timeline_id,
-                pg_port,
-                http_port,
-                pg_version,
+                args.pg_port,
+                args.http_port,
+                args.pg_version,
                 mode,
-                !update_catalog,
+                !args.update_catalog,
             )?;
         }
-        "start" => {
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
-
-            let pageserver_id =
-                if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
-                    Some(NodeId(
-                        id_str.parse().context("while parsing pageserver id")?,
-                    ))
-                } else {
-                    None
-                };
-
-            let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
-
-            let allow_multiple = sub_args.get_flag("allow-multiple");
+        EndpointCmd::Start(args) => {
+            let endpoint_id = &args.endpoint_id;
+            let pageserver_id = args.endpoint_pageserver_id;
+            let remote_ext_config = &args.remote_ext_config;
 
             // If --safekeepers argument is given, use only the listed
             // safekeeper nodes; otherwise all from the env.
-            let safekeepers = if let Some(safekeepers) = parse_safekeepers(sub_args)? {
+            let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
                 safekeepers
             } else {
                 env.safekeepers.iter().map(|sk| sk.id).collect()
@@ -867,12 +1348,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                 .get(endpoint_id.as_str())
                 .ok_or_else(|| anyhow::anyhow!("endpoint {endpoint_id} not found"))?;
 
-            let create_test_user = sub_args
-                .get_one::<bool>("create-test-user")
-                .cloned()
-                .unwrap_or_default();
-
-            if !allow_multiple {
+            if !args.allow_multiple {
                 cplane.check_conflicting_endpoints(
                     endpoint.mode,
                     endpoint.tenant_id,
@@ -894,17 +1370,27 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                 // to pass these on to postgres.
                 let storage_controller = StorageController::from_env(env);
                 let locate_result = storage_controller.tenant_locate(endpoint.tenant_id).await?;
-                let pageservers = locate_result
-                    .shards
-                    .into_iter()
-                    .map(|shard| {
-                        (
+                let pageservers = futures::future::try_join_all(
+                    locate_result.shards.into_iter().map(|shard| async move {
+                        if let ComputeMode::Static(lsn) = endpoint.mode {
+                            // Initialize LSN leases for static computes.
+                            let conf = env.get_pageserver_conf(shard.node_id).unwrap();
+                            let pageserver = PageServerNode::from_env(env, conf);
+
+                            pageserver
+                                .http_client
+                                .timeline_init_lsn_lease(shard.shard_id, endpoint.timeline_id, lsn)
+                                .await?;
+                        }
+
+                        anyhow::Ok((
                             Host::parse(&shard.listen_pg_addr)
                                 .expect("Storage controller reported bad hostname"),
                             shard.listen_pg_port,
-                        )
-                    })
-                    .collect::<Vec<_>>();
+                        ))
+                    }),
+                )
+                .await?;
                 let stripe_size = locate_result.shard_params.stripe_size;
 
                 (pageservers, stripe_size)
@@ -926,72 +1412,61 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                     &auth_token,
                     safekeepers,
                     pageservers,
-                    remote_ext_config,
+                    remote_ext_config.as_ref(),
                     stripe_size.0 as usize,
-                    create_test_user,
+                    args.create_test_user,
                 )
                 .await?;
         }
-        "reconfigure" => {
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .ok_or_else(|| anyhow!("No endpoint ID provided to reconfigure"))?;
+        EndpointCmd::Reconfigure(args) => {
+            let endpoint_id = &args.endpoint_id;
             let endpoint = cplane
                 .endpoints
                 .get(endpoint_id.as_str())
                 .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            let pageservers =
-                if let Some(id_str) = sub_args.get_one::<String>("endpoint-pageserver-id") {
-                    let ps_id = NodeId(id_str.parse().context("while parsing pageserver id")?);
-                    let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
-                    vec![(
-                        pageserver.pg_connection_config.host().clone(),
-                        pageserver.pg_connection_config.port(),
-                    )]
-                } else {
-                    let storage_controller = StorageController::from_env(env);
-                    storage_controller
-                        .tenant_locate(endpoint.tenant_id)
-                        .await?
-                        .shards
-                        .into_iter()
-                        .map(|shard| {
-                            (
-                                Host::parse(&shard.listen_pg_addr)
-                                    .expect("Storage controller reported malformed host"),
-                                shard.listen_pg_port,
-                            )
-                        })
-                        .collect::<Vec<_>>()
-                };
+            let pageservers = if let Some(ps_id) = args.endpoint_pageserver_id {
+                let pageserver = PageServerNode::from_env(env, env.get_pageserver_conf(ps_id)?);
+                vec![(
+                    pageserver.pg_connection_config.host().clone(),
+                    pageserver.pg_connection_config.port(),
+                )]
+            } else {
+                let storage_controller = StorageController::from_env(env);
+                storage_controller
+                    .tenant_locate(endpoint.tenant_id)
+                    .await?
+                    .shards
+                    .into_iter()
+                    .map(|shard| {
+                        (
+                            Host::parse(&shard.listen_pg_addr)
+                                .expect("Storage controller reported malformed host"),
+                            shard.listen_pg_port,
+                        )
+                    })
+                    .collect::<Vec<_>>()
+            };
             // If --safekeepers argument is given, use only the listed
             // safekeeper nodes; otherwise all from the env.
-            let safekeepers = parse_safekeepers(sub_args)?;
+            let safekeepers = parse_safekeepers(&args.safekeepers)?;
             endpoint.reconfigure(pageservers, None, safekeepers).await?;
         }
-        "stop" => {
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?;
-            let destroy = sub_args.get_flag("destroy");
-            let mode = sub_args.get_one::<String>("mode").expect("has a default");
-
+        EndpointCmd::Stop(args) => {
+            let endpoint_id = &args.endpoint_id;
             let endpoint = cplane
                 .endpoints
-                .get(endpoint_id.as_str())
+                .get(endpoint_id)
                 .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            endpoint.stop(mode, destroy)?;
+            endpoint.stop(&args.mode, args.destroy)?;
         }
-
-        _ => bail!("Unexpected endpoint subcommand '{sub_name}'"),
     }
 
     Ok(())
 }
 
 /// Parse --safekeepers as list of safekeeper ids.
-fn parse_safekeepers(sub_args: &ArgMatches) -> Result<Option<Vec<NodeId>>> {
-    if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
+fn parse_safekeepers(safekeepers_str: &Option<String>) -> Result<Option<Vec<NodeId>>> {
+    if let Some(safekeepers_str) = safekeepers_str {
         let mut safekeepers: Vec<NodeId> = Vec::new();
         for sk_id in safekeepers_str.split(',').map(str::trim) {
             let sk_id = NodeId(
@@ -1006,44 +1481,25 @@ fn parse_safekeepers(sub_args: &ArgMatches) -> Result<Option<Vec<NodeId>>> {
     }
 }
 
-fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match sub_match.subcommand() {
-        Some(ep_subcommand_data) => ep_subcommand_data,
-        None => bail!("no mappings subcommand provided"),
-    };
-
-    match sub_name {
-        "map" => {
-            let branch_name = sub_args
-                .get_one::<String>("branch-name")
-                .expect("branch-name argument missing");
-
-            let tenant_id = sub_args
-                .get_one::<String>("tenant-id")
-                .map(|x| TenantId::from_str(x))
-                .expect("tenant-id argument missing")
-                .expect("malformed tenant-id arg");
-
-            let timeline_id = sub_args
-                .get_one::<String>("timeline-id")
-                .map(|x| TimelineId::from_str(x))
-                .expect("timeline-id argument missing")
-                .expect("malformed timeline-id arg");
-
-            env.register_branch_mapping(branch_name.to_owned(), tenant_id, timeline_id)?;
+fn handle_mappings(subcmd: &MappingsCmd, env: &mut local_env::LocalEnv) -> Result<()> {
+    match subcmd {
+        MappingsCmd::Map(args) => {
+            env.register_branch_mapping(
+                args.branch_name.to_owned(),
+                args.tenant_id,
+                args.timeline_id,
+            )?;
 
             Ok(())
         }
-        other => unimplemented!("mappings subcommand {other}"),
     }
 }
 
-fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageServerNode> {
-    let node_id = if let Some(id_str) = args.get_one::<String>("pageserver-id") {
-        NodeId(id_str.parse().context("while parsing pageserver id")?)
-    } else {
-        DEFAULT_PAGESERVER_ID
-    };
+fn get_pageserver(
+    env: &local_env::LocalEnv,
+    pageserver_id_arg: Option<NodeId>,
+) -> Result<PageServerNode> {
+    let node_id = pageserver_id_arg.unwrap_or(DEFAULT_PAGESERVER_ID);
 
     Ok(PageServerNode::from_env(
         env,
@@ -1051,48 +1507,11 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
     ))
 }
 
-fn get_start_timeout(args: &ArgMatches) -> &Duration {
-    let humantime_duration = args
-        .get_one::<humantime::Duration>("start-timeout")
-        .expect("invalid value for start-timeout");
-    humantime_duration.as_ref()
-}
-
-fn storage_controller_start_args(args: &ArgMatches) -> NeonStorageControllerStartArgs {
-    let maybe_instance_id = args.get_one::<u8>("instance-id");
-
-    let base_port = args.get_one::<u16>("base-port");
-
-    if maybe_instance_id.is_some() && base_port.is_none() {
-        panic!("storage-controller start specificied instance-id but did not provide base-port");
-    }
-
-    let start_timeout = args
-        .get_one::<humantime::Duration>("start-timeout")
-        .expect("invalid value for start-timeout");
-
-    NeonStorageControllerStartArgs {
-        instance_id: maybe_instance_id.copied().unwrap_or(1),
-        base_port: base_port.copied(),
-        start_timeout: *start_timeout,
-    }
-}
-
-fn storage_controller_stop_args(args: &ArgMatches) -> NeonStorageControllerStopArgs {
-    let maybe_instance_id = args.get_one::<u8>("instance-id");
-    let immediate = args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
-
-    NeonStorageControllerStopArgs {
-        instance_id: maybe_instance_id.copied().unwrap_or(1),
-        immediate,
-    }
-}
-
-async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    match sub_match.subcommand() {
-        Some(("start", subcommand_args)) => {
-            if let Err(e) = get_pageserver(env, subcommand_args)?
-                .start(get_start_timeout(subcommand_args))
+async fn handle_pageserver(subcmd: &PageserverCmd, env: &local_env::LocalEnv) -> Result<()> {
+    match subcmd {
+        PageserverCmd::Start(args) => {
+            if let Err(e) = get_pageserver(env, args.pageserver_id)?
+                .start(&args.start_timeout)
                 .await
             {
                 eprintln!("pageserver start failed: {e}");
@@ -1100,34 +1519,36 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
             }
         }
 
-        Some(("stop", subcommand_args)) => {
-            let immediate = subcommand_args
-                .get_one::<String>("stop-mode")
-                .map(|s| s.as_str())
-                == Some("immediate");
-
-            if let Err(e) = get_pageserver(env, subcommand_args)?.stop(immediate) {
+        PageserverCmd::Stop(args) => {
+            let immediate = match args.stop_mode {
+                StopMode::Fast => false,
+                StopMode::Immediate => true,
+            };
+            if let Err(e) = get_pageserver(env, args.pageserver_id)?.stop(immediate) {
                 eprintln!("pageserver stop failed: {}", e);
                 exit(1);
             }
         }
 
-        Some(("restart", subcommand_args)) => {
-            let pageserver = get_pageserver(env, subcommand_args)?;
+        PageserverCmd::Restart(args) => {
+            let pageserver = get_pageserver(env, args.pageserver_id)?;
             //TODO what shutdown strategy should we use here?
             if let Err(e) = pageserver.stop(false) {
                 eprintln!("pageserver stop failed: {}", e);
                 exit(1);
             }
 
-            if let Err(e) = pageserver.start(get_start_timeout(sub_match)).await {
+            if let Err(e) = pageserver.start(&args.start_timeout).await {
                 eprintln!("pageserver start failed: {e}");
                 exit(1);
             }
         }
 
-        Some(("status", subcommand_args)) => {
-            match get_pageserver(env, subcommand_args)?.check_status().await {
+        PageserverCmd::Status(args) => {
+            match get_pageserver(env, args.pageserver_id)?
+                .check_status()
+                .await
+            {
                 Ok(_) => println!("Page server is up and running"),
                 Err(err) => {
                     eprintln!("Page server is not available: {}", err);
@@ -1135,34 +1556,42 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                 }
             }
         }
-
-        Some((sub_name, _)) => bail!("Unexpected pageserver subcommand '{}'", sub_name),
-        None => bail!("no pageserver subcommand provided"),
     }
     Ok(())
 }
 
 async fn handle_storage_controller(
-    sub_match: &ArgMatches,
+    subcmd: &StorageControllerCmd,
     env: &local_env::LocalEnv,
 ) -> Result<()> {
     let svc = StorageController::from_env(env);
-    match sub_match.subcommand() {
-        Some(("start", start_match)) => {
-            if let Err(e) = svc.start(storage_controller_start_args(start_match)).await {
+    match subcmd {
+        StorageControllerCmd::Start(args) => {
+            let start_args = NeonStorageControllerStartArgs {
+                instance_id: args.instance_id,
+                base_port: args.base_port,
+                start_timeout: args.start_timeout,
+            };
+
+            if let Err(e) = svc.start(start_args).await {
                 eprintln!("start failed: {e}");
                 exit(1);
             }
         }
 
-        Some(("stop", stop_match)) => {
-            if let Err(e) = svc.stop(storage_controller_stop_args(stop_match)).await {
+        StorageControllerCmd::Stop(args) => {
+            let stop_args = NeonStorageControllerStopArgs {
+                instance_id: args.instance_id,
+                immediate: match args.stop_mode {
+                    StopMode::Fast => false,
+                    StopMode::Immediate => true,
+                },
+            };
+            if let Err(e) = svc.stop(stop_args).await {
                 eprintln!("stop failed: {}", e);
                 exit(1);
             }
         }
-        Some((sub_name, _)) => bail!("Unexpected storage_controller subcommand '{}'", sub_name),
-        None => bail!("no storage_controller subcommand provided"),
     }
     Ok(())
 }
@@ -1175,111 +1604,77 @@ fn get_safekeeper(env: &local_env::LocalEnv, id: NodeId) -> Result<SafekeeperNod
     }
 }
 
-// Get list of options to append to safekeeper command invocation.
-fn safekeeper_extra_opts(init_match: &ArgMatches) -> Vec<String> {
-    init_match
-        .get_many::<String>("safekeeper-extra-opt")
-        .into_iter()
-        .flatten()
-        .map(|s| s.to_owned())
-        .collect()
-}
+async fn handle_safekeeper(subcmd: &SafekeeperCmd, env: &local_env::LocalEnv) -> Result<()> {
+    match subcmd {
+        SafekeeperCmd::Start(args) => {
+            let safekeeper = get_safekeeper(env, args.id)?;
 
-async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match sub_match.subcommand() {
-        Some(safekeeper_command_data) => safekeeper_command_data,
-        None => bail!("no safekeeper subcommand provided"),
-    };
-
-    // All the commands take an optional safekeeper name argument
-    let sk_id = if let Some(id_str) = sub_args.get_one::<String>("id") {
-        NodeId(id_str.parse().context("while parsing safekeeper id")?)
-    } else {
-        DEFAULT_SAFEKEEPER_ID
-    };
-    let safekeeper = get_safekeeper(env, sk_id)?;
-
-    match sub_name {
-        "start" => {
-            let extra_opts = safekeeper_extra_opts(sub_args);
-
-            if let Err(e) = safekeeper
-                .start(extra_opts, get_start_timeout(sub_args))
-                .await
-            {
+            if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
                 eprintln!("safekeeper start failed: {}", e);
                 exit(1);
             }
         }
 
-        "stop" => {
-            let immediate =
-                sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
-
+        SafekeeperCmd::Stop(args) => {
+            let safekeeper = get_safekeeper(env, args.id)?;
+            let immediate = match args.stop_mode {
+                StopMode::Fast => false,
+                StopMode::Immediate => true,
+            };
             if let Err(e) = safekeeper.stop(immediate) {
                 eprintln!("safekeeper stop failed: {}", e);
                 exit(1);
             }
         }
 
-        "restart" => {
-            let immediate =
-                sub_args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
+        SafekeeperCmd::Restart(args) => {
+            let safekeeper = get_safekeeper(env, args.id)?;
+            let immediate = match args.stop_mode {
+                StopMode::Fast => false,
+                StopMode::Immediate => true,
+            };
 
             if let Err(e) = safekeeper.stop(immediate) {
                 eprintln!("safekeeper stop failed: {}", e);
                 exit(1);
             }
 
-            let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper
-                .start(extra_opts, get_start_timeout(sub_args))
-                .await
-            {
+            if let Err(e) = safekeeper.start(&args.extra_opt, &args.start_timeout).await {
                 eprintln!("safekeeper start failed: {}", e);
                 exit(1);
             }
         }
-
-        _ => {
-            bail!("Unexpected safekeeper subcommand '{}'", sub_name)
-        }
     }
     Ok(())
 }
 
-async fn handle_storage_broker(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match sub_match.subcommand() {
-        Some(broker_command_data) => broker_command_data,
-        None => bail!("no broker subcommand provided"),
-    };
-
-    match sub_name {
-        "start" => {
-            if let Err(e) = broker::start_broker_process(env, get_start_timeout(sub_args)).await {
+async fn handle_storage_broker(subcmd: &StorageBrokerCmd, env: &local_env::LocalEnv) -> Result<()> {
+    match subcmd {
+        StorageBrokerCmd::Start(args) => {
+            if let Err(e) = broker::start_broker_process(env, &args.start_timeout).await {
                 eprintln!("broker start failed: {e}");
                 exit(1);
             }
         }
 
-        "stop" => {
+        StorageBrokerCmd::Stop(_args) => {
+            // FIXME: stop_mode unused
             if let Err(e) = broker::stop_broker_process(env) {
                 eprintln!("broker stop failed: {e}");
                 exit(1);
             }
         }
-
-        _ => bail!("Unexpected broker subcommand '{}'", sub_name),
     }
     Ok(())
 }
 
 async fn handle_start_all(
+    args: &StartCmdArgs,
     env: &'static local_env::LocalEnv,
-    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
-    let Err(errors) = handle_start_all_impl(env, *retry_timeout).await else {
-        neon_start_status_check(env, retry_timeout)
+    // FIXME: this was called "retry_timeout", is it right?
+    let Err(errors) = handle_start_all_impl(env, args.timeout).await else {
+        neon_start_status_check(env, args.timeout.as_ref())
             .await
             .context("status check after successful startup of all services")?;
         return Ok(());
@@ -1304,7 +1699,7 @@ async fn handle_start_all(
 /// Otherwise, returns the list of errors that occurred during startup.
 async fn handle_start_all_impl(
     env: &'static local_env::LocalEnv,
-    retry_timeout: Duration,
+    retry_timeout: humantime::Duration,
 ) -> Result<(), Vec<anyhow::Error>> {
     // Endpoints are not started automatically
 
@@ -1324,7 +1719,7 @@ async fn handle_start_all_impl(
                 let storage_controller = StorageController::from_env(env);
                 storage_controller
                     .start(NeonStorageControllerStartArgs::with_default_instance_id(
-                        retry_timeout.into(),
+                        retry_timeout,
                     ))
                     .await
                     .map_err(|e| e.context("start storage_controller"))
@@ -1345,7 +1740,7 @@ async fn handle_start_all_impl(
             js.spawn(async move {
                 let safekeeper = SafekeeperNode::from_env(env, node);
                 safekeeper
-                    .start(vec![], &retry_timeout)
+                    .start(&[], &retry_timeout)
                     .await
                     .map_err(|e| e.context(format!("start safekeeper {}", safekeeper.id)))
             });
@@ -1425,9 +1820,11 @@ async fn neon_start_status_check(
     anyhow::bail!("\nNeon passed status check")
 }
 
-async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let immediate =
-        sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
+async fn handle_stop_all(args: &StopCmdArgs, env: &local_env::LocalEnv) -> Result<()> {
+    let immediate = match args.mode {
+        StopMode::Fast => false,
+        StopMode::Immediate => true,
+    };
 
     try_stop_all(env, immediate).await;
 
@@ -1485,400 +1882,3 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
         }
     }
 }
-
-fn cli() -> Command {
-    let timeout_arg = Arg::new("start-timeout")
-        .long("start-timeout")
-        .short('t')
-        .global(true)
-        .help("timeout until we fail the command, e.g. 30s")
-        .value_parser(value_parser!(humantime::Duration))
-        .default_value("10s")
-        .required(false);
-
-    let branch_name_arg = Arg::new("branch-name")
-        .long("branch-name")
-        .help("Name of the branch to be created or used as an alias for other services")
-        .required(false);
-
-    let endpoint_id_arg = Arg::new("endpoint_id")
-        .help("Postgres endpoint id")
-        .required(false);
-
-    let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);
-
-    // --id, when using a pageserver command
-    let pageserver_id_arg = Arg::new("pageserver-id")
-        .long("id")
-        .global(true)
-        .help("pageserver id")
-        .required(false);
-    // --pageserver-id when using a non-pageserver command
-    let endpoint_pageserver_id_arg = Arg::new("endpoint-pageserver-id")
-        .long("pageserver-id")
-        .required(false);
-
-    let safekeeper_extra_opt_arg = Arg::new("safekeeper-extra-opt")
-        .short('e')
-        .long("safekeeper-extra-opt")
-        .num_args(1)
-        .action(ArgAction::Append)
-        .help("Additional safekeeper invocation options, e.g. -e=--http-auth-public-key-path=foo")
-        .required(false);
-
-    let tenant_id_arg = Arg::new("tenant-id")
-        .long("tenant-id")
-        .help("Tenant id. Represented as a hexadecimal string 32 symbols length")
-        .required(false);
-
-    let timeline_id_arg = Arg::new("timeline-id")
-        .long("timeline-id")
-        .help("Timeline id. Represented as a hexadecimal string 32 symbols length")
-        .required(false);
-
-    let pg_version_arg = Arg::new("pg-version")
-        .long("pg-version")
-        .help("Postgres version to use for the initial tenant")
-        .required(false)
-        .value_parser(value_parser!(u32))
-        .default_value(DEFAULT_PG_VERSION);
-
-    let pg_port_arg = Arg::new("pg-port")
-        .long("pg-port")
-        .required(false)
-        .value_parser(value_parser!(u16))
-        .value_name("pg-port");
-
-    let http_port_arg = Arg::new("http-port")
-        .long("http-port")
-        .required(false)
-        .value_parser(value_parser!(u16))
-        .value_name("http-port");
-
-    let safekeepers_arg = Arg::new("safekeepers")
-        .long("safekeepers")
-        .required(false)
-        .value_name("safekeepers");
-
-    let stop_mode_arg = Arg::new("stop-mode")
-        .short('m')
-        .value_parser(["fast", "immediate"])
-        .default_value("fast")
-        .help("If 'immediate', don't flush repository data at shutdown")
-        .required(false)
-        .value_name("stop-mode");
-
-    let remote_ext_config_args = Arg::new("remote-ext-config")
-        .long("remote-ext-config")
-        .num_args(1)
-        .help("Configure the remote extensions storage proxy gateway to request for extensions.")
-        .required(false);
-
-    let lsn_arg = Arg::new("lsn")
-        .long("lsn")
-        .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
-        .required(false);
-
-    let hot_standby_arg = Arg::new("hot-standby")
-        .value_parser(value_parser!(bool))
-        .long("hot-standby")
-        .help("If set, the node will be a hot replica on the specified timeline")
-        .required(false);
-
-    let force_arg = Arg::new("force")
-        .value_parser(value_parser!(InitForceMode))
-        .long("force")
-        .default_value(
-            InitForceMode::MustNotExist
-                .to_possible_value()
-                .unwrap()
-                .get_name()
-                .to_owned(),
-        )
-        .help("Force initialization even if the repository is not empty")
-        .required(false);
-
-    let num_pageservers_arg = Arg::new("num-pageservers")
-        .value_parser(value_parser!(u16))
-        .long("num-pageservers")
-        .help("How many pageservers to create (default 1)");
-
-    let update_catalog = Arg::new("update-catalog")
-        .value_parser(value_parser!(bool))
-        .long("update-catalog")
-        .help("If set, will set up the catalog for neon_superuser")
-        .required(false);
-
-    let create_test_user = Arg::new("create-test-user")
-        .value_parser(value_parser!(bool))
-        .long("create-test-user")
-        .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
-        .required(false);
-
-    let allow_multiple = Arg::new("allow-multiple")
-        .help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
-        .long("allow-multiple")
-        .action(ArgAction::SetTrue)
-        .required(false);
-
-    let instance_id = Arg::new("instance-id")
-        .long("instance-id")
-        .help("Identifier used to distinguish storage controller instances (default 1)")
-        .value_parser(value_parser!(u8))
-        .required(false);
-
-    let base_port = Arg::new("base-port")
-        .long("base-port")
-        .help("Base port for the storage controller instance idenfified by instance-id (defaults to pagserver cplane api)")
-        .value_parser(value_parser!(u16))
-        .required(false);
-
-    Command::new("Neon CLI")
-        .arg_required_else_help(true)
-        .version(GIT_VERSION)
-        .subcommand(
-            Command::new("init")
-                .about("Initialize a new Neon repository, preparing configs for services to start with")
-                .arg(num_pageservers_arg.clone())
-                .arg(
-                    Arg::new("config")
-                        .long("config")
-                        .required(false)
-                        .value_parser(value_parser!(PathBuf))
-                        .value_name("config")
-                )
-                .arg(force_arg)
-        )
-        .subcommand(
-            Command::new("timeline")
-            .about("Manage timelines")
-            .arg_required_else_help(true)
-            .subcommand(Command::new("list")
-                .about("List all timelines, available to this pageserver")
-                .arg(tenant_id_arg.clone()))
-            .subcommand(Command::new("branch")
-                .about("Create a new timeline, using another timeline as a base, copying its data")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
-                .arg(branch_name_arg.clone())
-                .arg(Arg::new("ancestor-branch-name").long("ancestor-branch-name")
-                    .help("Use last Lsn of another timeline (and its data) as base when creating the new timeline. The timeline gets resolved by its branch name.").required(false))
-                .arg(Arg::new("ancestor-start-lsn").long("ancestor-start-lsn")
-                    .help("When using another timeline as base, use a specific Lsn in it instead of the latest one").required(false)))
-            .subcommand(Command::new("create")
-                .about("Create a new blank timeline")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
-                .arg(branch_name_arg.clone())
-                .arg(pg_version_arg.clone())
-            )
-            .subcommand(Command::new("import")
-                .about("Import timeline from basebackup directory")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone())
-                .arg(branch_name_arg.clone())
-                .arg(Arg::new("base-tarfile")
-                    .long("base-tarfile")
-                    .value_parser(value_parser!(PathBuf))
-                    .help("Basebackup tarfile to import")
-                )
-                .arg(Arg::new("base-lsn").long("base-lsn")
-                    .help("Lsn the basebackup starts at"))
-                .arg(Arg::new("wal-tarfile")
-                    .long("wal-tarfile")
-                    .value_parser(value_parser!(PathBuf))
-                    .help("Wal to add after base")
-                )
-                .arg(Arg::new("end-lsn").long("end-lsn")
-                    .help("Lsn the basebackup ends at"))
-                .arg(pg_version_arg.clone())
-            )
-        ).subcommand(
-            Command::new("tenant")
-            .arg_required_else_help(true)
-            .about("Manage tenants")
-            .subcommand(Command::new("list"))
-            .subcommand(Command::new("create")
-                .arg(tenant_id_arg.clone())
-                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
-                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
-                .arg(pg_version_arg.clone())
-                .arg(Arg::new("set-default").long("set-default").action(ArgAction::SetTrue).required(false)
-                    .help("Use this tenant in future CLI commands where tenant_id is needed, but not specified"))
-                .arg(Arg::new("shard-count").value_parser(value_parser!(u8)).long("shard-count").action(ArgAction::Set).help("Number of shards in the new tenant (default 1)"))
-                .arg(Arg::new("shard-stripe-size").value_parser(value_parser!(u32)).long("shard-stripe-size").action(ArgAction::Set).help("Sharding stripe size in pages"))
-                .arg(Arg::new("placement-policy").value_parser(value_parser!(String)).long("placement-policy").action(ArgAction::Set).help("Placement policy shards in this tenant"))
-                )
-            .subcommand(Command::new("set-default").arg(tenant_id_arg.clone().required(true))
-                .about("Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"))
-            .subcommand(Command::new("config")
-                .arg(tenant_id_arg.clone())
-                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
-            .subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
-                .about("Import a tenant that is present in remote storage, and create branches for its timelines"))
-        )
-        .subcommand(
-            Command::new("pageserver")
-                .arg_required_else_help(true)
-                .about("Manage pageserver")
-                .arg(pageserver_id_arg)
-                .subcommand(Command::new("status"))
-                .subcommand(Command::new("start")
-                    .about("Start local pageserver")
-                    .arg(timeout_arg.clone())
-                )
-                .subcommand(Command::new("stop")
-                    .about("Stop local pageserver")
-                    .arg(stop_mode_arg.clone())
-                )
-                .subcommand(Command::new("restart")
-                    .about("Restart local pageserver")
-                    .arg(timeout_arg.clone())
-                )
-        )
-        .subcommand(
-            Command::new("storage_controller")
-                .arg_required_else_help(true)
-                .about("Manage storage_controller")
-                .subcommand(Command::new("start").about("Start storage controller")
-                            .arg(timeout_arg.clone())
-                            .arg(instance_id.clone())
-                            .arg(base_port))
-                .subcommand(Command::new("stop").about("Stop storage controller")
-                            .arg(stop_mode_arg.clone())
-                            .arg(instance_id))
-        )
-        .subcommand(
-            Command::new("storage_broker")
-                .arg_required_else_help(true)
-                .about("Manage broker")
-                .subcommand(Command::new("start")
-                            .about("Start broker")
-                            .arg(timeout_arg.clone())
-                )
-                .subcommand(Command::new("stop")
-                            .about("Stop broker")
-                            .arg(stop_mode_arg.clone())
-                )
-        )
-        .subcommand(
-            Command::new("safekeeper")
-                .arg_required_else_help(true)
-                .about("Manage safekeepers")
-                .subcommand(Command::new("start")
-                            .about("Start local safekeeper")
-                            .arg(safekeeper_id_arg.clone())
-                            .arg(safekeeper_extra_opt_arg.clone())
-                            .arg(timeout_arg.clone())
-                )
-                .subcommand(Command::new("stop")
-                            .about("Stop local safekeeper")
-                            .arg(safekeeper_id_arg.clone())
-                            .arg(stop_mode_arg.clone())
-                )
-                .subcommand(Command::new("restart")
-                            .about("Restart local safekeeper")
-                            .arg(safekeeper_id_arg)
-                            .arg(stop_mode_arg.clone())
-                            .arg(safekeeper_extra_opt_arg)
-                            .arg(timeout_arg.clone())
-                )
-        )
-        .subcommand(
-            Command::new("endpoint")
-                .arg_required_else_help(true)
-                .about("Manage postgres instances")
-                .subcommand(Command::new("list").arg(tenant_id_arg.clone()))
-                .subcommand(Command::new("create")
-                    .about("Create a compute endpoint")
-                    .arg(endpoint_id_arg.clone())
-                    .arg(branch_name_arg.clone())
-                    .arg(tenant_id_arg.clone())
-                    .arg(lsn_arg.clone())
-                    .arg(pg_port_arg.clone())
-                    .arg(http_port_arg.clone())
-                    .arg(endpoint_pageserver_id_arg.clone())
-                    .arg(
-                        Arg::new("config-only")
-                            .help("Don't do basebackup, create endpoint directory with only config files")
-                            .long("config-only")
-                            .required(false))
-                    .arg(pg_version_arg.clone())
-                    .arg(hot_standby_arg.clone())
-                    .arg(update_catalog)
-                    .arg(allow_multiple.clone())
-                )
-                .subcommand(Command::new("start")
-                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
-                    .arg(endpoint_id_arg.clone())
-                    .arg(endpoint_pageserver_id_arg.clone())
-                    .arg(safekeepers_arg.clone())
-                    .arg(remote_ext_config_args)
-                    .arg(create_test_user)
-                    .arg(allow_multiple.clone())
-                    .arg(timeout_arg.clone())
-                )
-                .subcommand(Command::new("reconfigure")
-                            .about("Reconfigure the endpoint")
-                            .arg(endpoint_pageserver_id_arg)
-                            .arg(safekeepers_arg)
-                            .arg(endpoint_id_arg.clone())
-                            .arg(tenant_id_arg.clone())
-                )
-                .subcommand(
-                    Command::new("stop")
-                    .arg(endpoint_id_arg)
-                    .arg(
-                        Arg::new("destroy")
-                            .help("Also delete data directory (now optional, should be default in future)")
-                            .long("destroy")
-                            .action(ArgAction::SetTrue)
-                            .required(false)
-                    )
-                    .arg(
-                        Arg::new("mode")
-                            .help("Postgres shutdown mode, passed to \"pg_ctl -m <mode>\"")
-                            .long("mode")
-                            .action(ArgAction::Set)
-                            .required(false)
-                            .value_parser(["smart", "fast", "immediate"])
-                            .default_value("fast")
-                    )
-                )
-
-        )
-        .subcommand(
-            Command::new("mappings")
-                .arg_required_else_help(true)
-                .about("Manage neon_local branch name mappings")
-                .subcommand(
-                    Command::new("map")
-                        .about("Create new mapping which cannot exist already")
-                        .arg(branch_name_arg.clone())
-                        .arg(tenant_id_arg.clone())
-                        .arg(timeline_id_arg.clone())
-                )
-        )
-        // Obsolete old name for 'endpoint'. We now just print an error if it's used.
-        .subcommand(
-            Command::new("pg")
-                .hide(true)
-                .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false))
-                .trailing_var_arg(true)
-        )
-        .subcommand(
-            Command::new("start")
-                .about("Start page server and safekeepers")
-                .arg(timeout_arg.clone())
-        )
-        .subcommand(
-            Command::new("stop")
-                .about("Stop page server and safekeepers")
-                .arg(stop_mode_arg)
-        )
-}
-
-#[test]
-fn verify_cli() {
-    cli().debug_assert();
-}
diff --git a/control_plane/src/branch_mappings.rs b/control_plane/src/branch_mappings.rs
new file mode 100644
index 0000000000..e89313df39
--- /dev/null
+++ b/control_plane/src/branch_mappings.rs
@@ -0,0 +1,94 @@
+//! Branch mappings for convenience
+
+use std::collections::HashMap;
+use std::fs;
+use std::path::Path;
+
+use anyhow::{bail, Context};
+use serde::{Deserialize, Serialize};
+
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+
+/// Keep human-readable aliases in memory (and persist them to config XXX), to hide tenant/timeline hex strings from the user.
+#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
+#[serde(default, deny_unknown_fields)]
+pub struct BranchMappings {
+    /// Default tenant ID to use with the 'neon_local' command line utility, when
+    /// --tenant_id is not explicitly specified. This comes from the branches.
+    pub default_tenant_id: Option<TenantId>,
+
+    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
+    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
+    // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
+    pub mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
+}
+
+impl BranchMappings {
+    pub fn register_branch_mapping(
+        &mut self,
+        branch_name: String,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> anyhow::Result<()> {
+        let existing_values = self.mappings.entry(branch_name.clone()).or_default();
+
+        let existing_ids = existing_values
+            .iter()
+            .find(|(existing_tenant_id, _)| existing_tenant_id == &tenant_id);
+
+        if let Some((_, old_timeline_id)) = existing_ids {
+            if old_timeline_id == &timeline_id {
+                Ok(())
+            } else {
+                bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
+            }
+        } else {
+            existing_values.push((tenant_id, timeline_id));
+            Ok(())
+        }
+    }
+
+    pub fn get_branch_timeline_id(
+        &self,
+        branch_name: &str,
+        tenant_id: TenantId,
+    ) -> Option<TimelineId> {
+        // If it looks like a timeline ID, return it as it is
+        if let Ok(timeline_id) = branch_name.parse::<TimelineId>() {
+            return Some(timeline_id);
+        }
+
+        self.mappings
+            .get(branch_name)?
+            .iter()
+            .find(|(mapped_tenant_id, _)| mapped_tenant_id == &tenant_id)
+            .map(|&(_, timeline_id)| timeline_id)
+            .map(TimelineId::from)
+    }
+
+    pub fn timeline_name_mappings(&self) -> HashMap<TenantTimelineId, String> {
+        self.mappings
+            .iter()
+            .flat_map(|(name, tenant_timelines)| {
+                tenant_timelines.iter().map(|&(tenant_id, timeline_id)| {
+                    (TenantTimelineId::new(tenant_id, timeline_id), name.clone())
+                })
+            })
+            .collect()
+    }
+
+    pub fn persist(&self, path: &Path) -> anyhow::Result<()> {
+        let content = &toml::to_string_pretty(self)?;
+        fs::write(path, content).with_context(|| {
+            format!(
+                "Failed to write branch information into path '{}'",
+                path.display()
+            )
+        })
+    }
+
+    pub fn load(path: &Path) -> anyhow::Result<BranchMappings> {
+        let branches_file_contents = fs::read_to_string(path)?;
+        Ok(toml::from_str(branches_file_contents.as_str())?)
+    }
+}
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index 7554a03a68..18f396b886 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -561,6 +561,7 @@ impl Endpoint {
             operation_uuid: None,
             features: self.features.clone(),
             swap_size_bytes: None,
+            disk_quota_bytes: None,
             cluster: Cluster {
                 cluster_id: None, // project ID: not used
                 name: None,       // project name: not used
diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs
index 573f1688d5..7a019bce88 100644
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -113,7 +113,7 @@ impl SafekeeperNode {
 
     pub async fn start(
         &self,
-        extra_opts: Vec<String>,
+        extra_opts: &[String],
         retry_timeout: &Duration,
     ) -> anyhow::Result<()> {
         print!(
@@ -196,7 +196,7 @@ impl SafekeeperNode {
             ]);
         }
 
-        args.extend(extra_opts);
+        args.extend_from_slice(extra_opts);
 
         background_process::start_process(
             &format!("safekeeper-{id}"),
diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs
index 0c0e67dff0..36e5e04c86 100644
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -347,7 +347,7 @@ impl StorageController {
 
             if !tokio::fs::try_exists(&pg_data_path).await? {
                 let initdb_args = [
-                    "-D",
+                    "--pgdata",
                     pg_data_path.as_ref(),
                     "--username",
                     &username(),
diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs
index 883c624f71..83515a00a0 100644
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -50,6 +50,16 @@ pub struct ComputeSpec {
     #[serde(default)]
     pub swap_size_bytes: Option<u64>,
 
+    /// If compute_ctl was passed `--set-disk-quota-for-fs`, a value of `Some(_)` instructs
+    /// compute_ctl to run `/neonvm/bin/set-disk-quota` with the given size and fs, when the
+    /// spec is first received.
+    ///
+    /// Both this field and `--set-disk-quota-for-fs` are required, so that the control plane's
+    /// spec generation doesn't need to be aware of the actual compute it's running on, while
+    /// guaranteeing gradual rollout of disk quota.
+    #[serde(default)]
+    pub disk_quota_bytes: Option<u64>,
+
     /// Expected cluster state at the end of transition process.
     pub cluster: Cluster,
     pub delta_operations: Option<Vec<DeltaOp>>,
@@ -268,6 +278,22 @@ pub struct GenericOption {
 /// declare a `trait` on it.
 pub type GenericOptions = Option<Vec<GenericOption>>;
 
+/// Configured the local-proxy application with the relevant JWKS and roles it should
+/// use for authorizing connect requests using JWT.
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct LocalProxySpec {
+    pub jwks: Vec<JwksSettings>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize)]
+pub struct JwksSettings {
+    pub id: String,
+    pub role_names: Vec<String>,
+    pub jwks_url: String,
+    pub provider_name: String,
+    pub jwt_audience: Option<String>,
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs
index e274d24585..085540e7b9 100644
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -984,6 +984,7 @@ pub fn short_error(e: &QueryError) -> String {
 }
 
 fn log_query_error(query: &str, e: &QueryError) {
+    // If you want to change the log level of a specific error, also re-categorize it in `BasebackupQueryTimeOngoingRecording`.
     match e {
         QueryError::Disconnected(ConnectionError::Io(io_error)) => {
             if is_expected_io_error(io_error) {
diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs
index 5c0abda522..9524a5149b 100644
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -93,9 +93,9 @@ impl Conf {
         );
         let output = self
             .new_pg_command("initdb")?
-            .arg("-D")
+            .arg("--pgdata")
             .arg(&self.datadir)
-            .args(["-U", "postgres", "--no-instructions", "--no-sync"])
+            .args(["--username", "postgres", "--no-instructions", "--no-sync"])
             .output()?;
         debug!("initdb output: {:?}", output);
         ensure!(
diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs
index 2d95ac42e6..592f1ded0d 100644
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -736,4 +736,22 @@ impl Client {
             .await
             .map_err(Error::ReceiveBody)
     }
+
+    pub async fn timeline_init_lsn_lease(
+        &self,
+        tenant_shard_id: TenantShardId,
+        timeline_id: TimelineId,
+        lsn: Lsn,
+    ) -> Result<LsnLease> {
+        let uri = format!(
+            "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/lsn_lease",
+            self.mgmt_api_endpoint,
+        );
+
+        self.request(Method::POST, &uri, LsnLeaseRequest { lsn })
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
 }
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index d15a0e47a4..e9e52acee6 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -15,7 +15,7 @@ use clap::{Arg, ArgAction, Command};
 
 use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
 use pageserver::config::PageserverIdentity;
-use pageserver::control_plane_client::ControlPlaneClient;
+use pageserver::controller_upcall_client::ControllerUpcallClient;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME};
@@ -396,7 +396,7 @@ fn start_pageserver(
     // Set up deletion queue
     let (deletion_queue, deletion_workers) = DeletionQueue::new(
         remote_storage.clone(),
-        ControlPlaneClient::new(conf, &shutdown_pageserver),
+        ControllerUpcallClient::new(conf, &shutdown_pageserver),
         conf,
     );
     if let Some(deletion_workers) = deletion_workers {
diff --git a/pageserver/src/control_plane_client.rs b/pageserver/src/controller_upcall_client.rs
similarity index 80%
rename from pageserver/src/control_plane_client.rs
rename to pageserver/src/controller_upcall_client.rs
index d0a967b920..73fc6dc3ab 100644
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/controller_upcall_client.rs
@@ -17,9 +17,12 @@ use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};
 use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
 use pageserver_api::config::NodeMetadata;
 
-/// The Pageserver's client for using the control plane API: this is a small subset
-/// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
-pub struct ControlPlaneClient {
+/// The Pageserver's client for using the storage controller upcall API: this is a small API
+/// for dealing with generations (see docs/rfcs/025-generation-numbers.md).
+///
+/// The server presenting this API may either be the storage controller or some other
+/// service (such as the Neon control plane) providing a store of generation numbers.
+pub struct ControllerUpcallClient {
     http_client: reqwest::Client,
     base_url: Url,
     node_id: NodeId,
@@ -45,7 +48,7 @@ pub trait ControlPlaneGenerationsApi {
     ) -> impl Future<Output = Result<HashMap<TenantShardId, bool>, RetryForeverError>> + Send;
 }
 
-impl ControlPlaneClient {
+impl ControllerUpcallClient {
     /// A None return value indicates that the input `conf` object does not have control
     /// plane API enabled.
     pub fn new(conf: &'static PageServerConf, cancel: &CancellationToken) -> Option<Self> {
@@ -114,7 +117,7 @@ impl ControlPlaneClient {
     }
 }
 
-impl ControlPlaneGenerationsApi for ControlPlaneClient {
+impl ControlPlaneGenerationsApi for ControllerUpcallClient {
     /// Block until we get a successful response, or error out if we are shut down
     async fn re_attach(
         &self,
@@ -216,29 +219,38 @@ impl ControlPlaneGenerationsApi for ControlPlaneClient {
             .join("validate")
             .expect("Failed to build validate path");
 
-        let request = ValidateRequest {
-            tenants: tenants
-                .into_iter()
-                .map(|(id, gen)| ValidateRequestTenant {
-                    id,
-                    gen: gen
-                        .into()
-                        .expect("Generation should always be valid for a Tenant doing deletions"),
-                })
-                .collect(),
-        };
+        // When sending validate requests, break them up into chunks so that we
+        // avoid possible edge cases of generating any HTTP requests that
+        // require database I/O across many thousands of tenants.
+        let mut result: HashMap<TenantShardId, bool> = HashMap::with_capacity(tenants.len());
+        for tenant_chunk in (tenants).chunks(128) {
+            let request = ValidateRequest {
+                tenants: tenant_chunk
+                    .iter()
+                    .map(|(id, generation)| ValidateRequestTenant {
+                        id: *id,
+                        gen: (*generation).into().expect(
+                            "Generation should always be valid for a Tenant doing deletions",
+                        ),
+                    })
+                    .collect(),
+            };
 
-        failpoint_support::sleep_millis_async!("control-plane-client-validate-sleep", &self.cancel);
-        if self.cancel.is_cancelled() {
-            return Err(RetryForeverError::ShuttingDown);
+            failpoint_support::sleep_millis_async!(
+                "control-plane-client-validate-sleep",
+                &self.cancel
+            );
+            if self.cancel.is_cancelled() {
+                return Err(RetryForeverError::ShuttingDown);
+            }
+
+            let response: ValidateResponse =
+                self.retry_http_forever(&re_attach_path, request).await?;
+            for rt in response.tenants {
+                result.insert(rt.id, rt.valid);
+            }
         }
 
-        let response: ValidateResponse = self.retry_http_forever(&re_attach_path, request).await?;
-
-        Ok(response
-            .tenants
-            .into_iter()
-            .map(|rt| (rt.id, rt.valid))
-            .collect())
+        Ok(result.into_iter().collect())
     }
 }
diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs
index 22f7d5b824..73bdc90213 100644
--- a/pageserver/src/deletion_queue.rs
+++ b/pageserver/src/deletion_queue.rs
@@ -6,7 +6,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::Duration;
 
-use crate::control_plane_client::ControlPlaneGenerationsApi;
+use crate::controller_upcall_client::ControlPlaneGenerationsApi;
 use crate::metrics;
 use crate::tenant::remote_timeline_client::remote_layer_path;
 use crate::tenant::remote_timeline_client::remote_timeline_path;
@@ -622,7 +622,7 @@ impl DeletionQueue {
     /// If remote_storage is None, then the returned workers will also be None.
     pub fn new<C>(
         remote_storage: GenericRemoteStorage,
-        control_plane_client: Option<C>,
+        controller_upcall_client: Option<C>,
         conf: &'static PageServerConf,
     ) -> (Self, Option<DeletionQueueWorkers<C>>)
     where
@@ -662,7 +662,7 @@ impl DeletionQueue {
                     conf,
                     backend_rx,
                     executor_tx,
-                    control_plane_client,
+                    controller_upcall_client,
                     lsn_table.clone(),
                     cancel.clone(),
                 ),
@@ -704,7 +704,7 @@ mod test {
     use tokio::task::JoinHandle;
 
     use crate::{
-        control_plane_client::RetryForeverError,
+        controller_upcall_client::RetryForeverError,
         repository::Key,
         tenant::{harness::TenantHarness, storage_layer::DeltaLayerName},
     };
diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs
index d215fd2b7d..1d55581ebd 100644
--- a/pageserver/src/deletion_queue/validator.rs
+++ b/pageserver/src/deletion_queue/validator.rs
@@ -25,8 +25,8 @@ use tracing::info;
 use tracing::warn;
 
 use crate::config::PageServerConf;
-use crate::control_plane_client::ControlPlaneGenerationsApi;
-use crate::control_plane_client::RetryForeverError;
+use crate::controller_upcall_client::ControlPlaneGenerationsApi;
+use crate::controller_upcall_client::RetryForeverError;
 use crate::metrics;
 use crate::virtual_file::MaybeFatalIo;
 
@@ -61,7 +61,7 @@ where
     tx: tokio::sync::mpsc::Sender<DeleterMessage>,
 
     // Client for calling into control plane API for validation of deletes
-    control_plane_client: Option<C>,
+    controller_upcall_client: Option<C>,
 
     // DeletionLists which are waiting generation validation.  Not safe to
     // execute until [`validate`] has processed them.
@@ -94,7 +94,7 @@ where
         conf: &'static PageServerConf,
         rx: tokio::sync::mpsc::Receiver<ValidatorQueueMessage>,
         tx: tokio::sync::mpsc::Sender<DeleterMessage>,
-        control_plane_client: Option<C>,
+        controller_upcall_client: Option<C>,
         lsn_table: Arc<std::sync::RwLock<VisibleLsnUpdates>>,
         cancel: CancellationToken,
     ) -> Self {
@@ -102,7 +102,7 @@ where
             conf,
             rx,
             tx,
-            control_plane_client,
+            controller_upcall_client,
             lsn_table,
             pending_lists: Vec::new(),
             validated_lists: Vec::new(),
@@ -145,8 +145,8 @@ where
             return Ok(());
         }
 
-        let tenants_valid = if let Some(control_plane_client) = &self.control_plane_client {
-            match control_plane_client
+        let tenants_valid = if let Some(controller_upcall_client) = &self.controller_upcall_client {
+            match controller_upcall_client
                 .validate(tenant_generations.iter().map(|(k, v)| (*k, *v)).collect())
                 .await
             {
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 6a10d4fb1c..6f0402e7b0 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -589,6 +589,10 @@ async fn timeline_create_handler(
                 StatusCode::SERVICE_UNAVAILABLE,
                 HttpErrorBody::from_msg(e.to_string()),
             ),
+            Err(e @ tenant::CreateTimelineError::AncestorArchived) => json_response(
+                StatusCode::NOT_ACCEPTABLE,
+                HttpErrorBody::from_msg(e.to_string()),
+            ),
             Err(tenant::CreateTimelineError::ShuttingDown) => json_response(
                 StatusCode::SERVICE_UNAVAILABLE,
                 HttpErrorBody::from_msg("tenant shutting down".to_string()),
@@ -820,7 +824,7 @@ async fn get_lsn_by_timestamp_handler(
 
     let lease = if with_lease {
         timeline
-            .make_lsn_lease(lsn, timeline.get_lsn_lease_length_for_ts(), &ctx)
+            .init_lsn_lease(lsn, timeline.get_lsn_lease_length_for_ts(), &ctx)
             .inspect_err(|_| {
                 warn!("fail to grant a lease to {}", lsn);
             })
@@ -1688,9 +1692,18 @@ async fn lsn_lease_handler(
     let timeline =
         active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
             .await?;
-    let result = timeline
-        .make_lsn_lease(lsn, timeline.get_lsn_lease_length(), &ctx)
-        .map_err(|e| ApiError::InternalServerError(e.context("lsn lease http handler")))?;
+
+    let result = async {
+        timeline
+            .init_lsn_lease(lsn, timeline.get_lsn_lease_length(), &ctx)
+            .map_err(|e| {
+                ApiError::InternalServerError(
+                    e.context(format!("invalid lsn lease request at {lsn}")),
+                )
+            })
+    }
+    .instrument(info_span!("init_lsn_lease", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
+    .await?;
 
     json_response(StatusCode::OK, result)
 }
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index 7a9cf495c7..08abfbd647 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -6,7 +6,7 @@ pub mod basebackup;
 pub mod config;
 pub mod consumption_metrics;
 pub mod context;
-pub mod control_plane_client;
+pub mod controller_upcall_client;
 pub mod deletion_queue;
 pub mod disk_usage_eviction_task;
 pub mod http;
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index 366bd82903..b76efa5b48 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -8,6 +8,8 @@ use metrics::{
 };
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
+use postgres_backend::{is_expected_io_error, QueryError};
+use pq_proto::framed::ConnectionError;
 use strum::{EnumCount, VariantNames};
 use strum_macros::{IntoStaticStr, VariantNames};
 use tracing::warn;
@@ -1508,6 +1510,7 @@ static COMPUTE_STARTUP_BUCKETS: Lazy<[f64; 28]> = Lazy::new(|| {
 pub(crate) struct BasebackupQueryTime {
     ok: Histogram,
     error: Histogram,
+    client_error: Histogram,
 }
 
 pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|| {
@@ -1521,6 +1524,7 @@ pub(crate) static BASEBACKUP_QUERY_TIME: Lazy<BasebackupQueryTime> = Lazy::new(|
     BasebackupQueryTime {
         ok: vec.get_metric_with_label_values(&["ok"]).unwrap(),
         error: vec.get_metric_with_label_values(&["error"]).unwrap(),
+        client_error: vec.get_metric_with_label_values(&["client_error"]).unwrap(),
     }
 });
 
@@ -1557,7 +1561,7 @@ impl BasebackupQueryTime {
 }
 
 impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
-    pub(crate) fn observe<T, E>(self, res: &Result<T, E>) {
+    pub(crate) fn observe<T>(self, res: &Result<T, QueryError>) {
         let elapsed = self.start.elapsed();
         let ex_throttled = self
             .ctx
@@ -1576,10 +1580,15 @@ impl<'a, 'c> BasebackupQueryTimeOngoingRecording<'a, 'c> {
                 elapsed
             }
         };
-        let metric = if res.is_ok() {
-            &self.parent.ok
-        } else {
-            &self.parent.error
+        // If you want to change categorize of a specific error, also change it in `log_query_error`.
+        let metric = match res {
+            Ok(_) => &self.parent.ok,
+            Err(QueryError::Disconnected(ConnectionError::Io(io_error)))
+                if is_expected_io_error(io_error) =>
+            {
+                &self.parent.client_error
+            }
+            Err(_) => &self.parent.error,
         };
         metric.observe(ex_throttled.as_secs_f64());
     }
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 9261b7481d..8fa6b9a7f0 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -273,10 +273,20 @@ async fn page_service_conn_main(
                 info!("Postgres client disconnected ({io_error})");
                 Ok(())
             } else {
-                Err(io_error).context("Postgres connection error")
+                let tenant_id = conn_handler.timeline_handles.tenant_id();
+                Err(io_error).context(format!(
+                    "Postgres connection error for tenant_id={:?} client at peer_addr={}",
+                    tenant_id, peer_addr
+                ))
             }
         }
-        other => other.context("Postgres query error"),
+        other => {
+            let tenant_id = conn_handler.timeline_handles.tenant_id();
+            other.context(format!(
+                "Postgres query error for tenant_id={:?} client peer_addr={}",
+                tenant_id, peer_addr
+            ))
+        }
     }
 }
 
@@ -340,6 +350,10 @@ impl TimelineHandles {
                 }
             })
     }
+
+    fn tenant_id(&self) -> Option<TenantId> {
+        self.wrapper.tenant_id.get().copied()
+    }
 }
 
 pub(crate) struct TenantManagerWrapper {
@@ -819,7 +833,7 @@ impl PageServerHandler {
         set_tracing_field_shard_id(&timeline);
 
         let lease = timeline
-            .make_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)
+            .renew_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)
             .inspect_err(|e| {
                 warn!("{e}");
             })
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 53cbaea621..db88303f7b 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -21,6 +21,7 @@ use futures::stream::FuturesUnordered;
 use futures::StreamExt;
 use pageserver_api::models;
 use pageserver_api::models::AuxFilePolicy;
+use pageserver_api::models::LsnLease;
 use pageserver_api::models::TimelineArchivalState;
 use pageserver_api::models::TimelineState;
 use pageserver_api::models::TopTenantShardItem;
@@ -182,27 +183,54 @@ pub struct TenantSharedResources {
 pub(super) struct AttachedTenantConf {
     tenant_conf: TenantConfOpt,
     location: AttachedLocationConfig,
+    /// The deadline before which we are blocked from GC so that
+    /// leases have a chance to be renewed.
+    lsn_lease_deadline: Option<tokio::time::Instant>,
 }
 
 impl AttachedTenantConf {
     fn new(tenant_conf: TenantConfOpt, location: AttachedLocationConfig) -> Self {
+        // Sets a deadline before which we cannot proceed to GC due to lsn lease.
+        //
+        // We do this as the leases mapping are not persisted to disk. By delaying GC by lease
+        // length, we guarantee that all the leases we granted before will have a chance to renew
+        // when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
+        let lsn_lease_deadline = if location.attach_mode == AttachmentMode::Single {
+            Some(
+                tokio::time::Instant::now()
+                    + tenant_conf
+                        .lsn_lease_length
+                        .unwrap_or(LsnLease::DEFAULT_LENGTH),
+            )
+        } else {
+            // We don't use `lsn_lease_deadline` to delay GC in AttachedMulti and AttachedStale
+            // because we don't do GC in these modes.
+            None
+        };
+
         Self {
             tenant_conf,
             location,
+            lsn_lease_deadline,
         }
     }
 
     fn try_from(location_conf: LocationConf) -> anyhow::Result<Self> {
         match &location_conf.mode {
-            LocationMode::Attached(attach_conf) => Ok(Self {
-                tenant_conf: location_conf.tenant_conf,
-                location: *attach_conf,
-            }),
+            LocationMode::Attached(attach_conf) => {
+                Ok(Self::new(location_conf.tenant_conf, *attach_conf))
+            }
             LocationMode::Secondary(_) => {
                 anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode")
             }
         }
     }
+
+    fn is_gc_blocked_by_lsn_lease_deadline(&self) -> bool {
+        self.lsn_lease_deadline
+            .map(|d| tokio::time::Instant::now() < d)
+            .unwrap_or(false)
+    }
 }
 struct TimelinePreload {
     timeline_id: TimelineId,
@@ -563,6 +591,8 @@ pub enum CreateTimelineError {
     AncestorLsn(anyhow::Error),
     #[error("ancestor timeline is not active")]
     AncestorNotActive,
+    #[error("ancestor timeline is archived")]
+    AncestorArchived,
     #[error("tenant shutting down")]
     ShuttingDown,
     #[error(transparent)]
@@ -1698,6 +1728,11 @@ impl Tenant {
                     return Err(CreateTimelineError::AncestorNotActive);
                 }
 
+                if ancestor_timeline.is_archived() == Some(true) {
+                    info!("tried to branch archived timeline");
+                    return Err(CreateTimelineError::AncestorArchived);
+                }
+
                 if let Some(lsn) = ancestor_start_lsn.as_mut() {
                     *lsn = lsn.align();
 
@@ -1815,6 +1850,11 @@ impl Tenant {
                 info!("Skipping GC in location state {:?}", conf.location);
                 return Ok(GcResult::default());
             }
+
+            if conf.is_gc_blocked_by_lsn_lease_deadline() {
+                info!("Skipping GC because lsn lease deadline is not reached");
+                return Ok(GcResult::default());
+            }
         }
 
         let _guard = match self.gc_block.start().await {
@@ -2623,6 +2663,8 @@ impl Tenant {
             Arc::new(AttachedTenantConf {
                 tenant_conf: new_tenant_conf.clone(),
                 location: inner.location,
+                // Attached location is not changed, no need to update lsn lease deadline.
+                lsn_lease_deadline: inner.lsn_lease_deadline,
             })
         });
 
@@ -3880,9 +3922,9 @@ async fn run_initdb(
     let _permit = INIT_DB_SEMAPHORE.acquire().await;
 
     let initdb_command = tokio::process::Command::new(&initdb_bin_path)
-        .args(["-D", initdb_target_dir.as_ref()])
-        .args(["-U", &conf.superuser])
-        .args(["-E", "utf8"])
+        .args(["--pgdata", initdb_target_dir.as_ref()])
+        .args(["--username", &conf.superuser])
+        .args(["--encoding", "utf8"])
         .arg("--no-instructions")
         .arg("--no-sync")
         .env_clear()
@@ -4454,13 +4496,17 @@ mod tests {
         tline.freeze_and_flush().await.map_err(|e| e.into())
     }
 
-    #[tokio::test]
+    #[tokio::test(start_paused = true)]
     async fn test_prohibit_branch_creation_on_garbage_collected_data() -> anyhow::Result<()> {
         let (tenant, ctx) =
             TenantHarness::create("test_prohibit_branch_creation_on_garbage_collected_data")
                 .await?
                 .load()
                 .await;
+        // Advance to the lsn lease deadline so that GC is not blocked by
+        // initial transition into AttachedSingle.
+        tokio::time::advance(tenant.get_lsn_lease_length()).await;
+        tokio::time::resume();
         let tline = tenant
             .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
             .await?;
@@ -7237,9 +7283,17 @@ mod tests {
         Ok(())
     }
 
-    #[tokio::test]
+    #[tokio::test(start_paused = true)]
     async fn test_lsn_lease() -> anyhow::Result<()> {
-        let (tenant, ctx) = TenantHarness::create("test_lsn_lease").await?.load().await;
+        let (tenant, ctx) = TenantHarness::create("test_lsn_lease")
+            .await
+            .unwrap()
+            .load()
+            .await;
+        // Advance to the lsn lease deadline so that GC is not blocked by
+        // initial transition into AttachedSingle.
+        tokio::time::advance(tenant.get_lsn_lease_length()).await;
+        tokio::time::resume();
         let key = Key::from_hex("010000000033333333444444445500000000").unwrap();
 
         let end_lsn = Lsn(0x100);
@@ -7267,24 +7321,33 @@ mod tests {
 
         let leased_lsns = [0x30, 0x50, 0x70];
         let mut leases = Vec::new();
-        let _: anyhow::Result<_> = leased_lsns.iter().try_for_each(|n| {
-            leases.push(timeline.make_lsn_lease(Lsn(*n), timeline.get_lsn_lease_length(), &ctx)?);
-            Ok(())
+        leased_lsns.iter().for_each(|n| {
+            leases.push(
+                timeline
+                    .init_lsn_lease(Lsn(*n), timeline.get_lsn_lease_length(), &ctx)
+                    .expect("lease request should succeed"),
+            );
         });
 
-        // Renewing with shorter lease should not change the lease.
-        let updated_lease_0 =
-            timeline.make_lsn_lease(Lsn(leased_lsns[0]), Duration::from_secs(0), &ctx)?;
-        assert_eq!(updated_lease_0.valid_until, leases[0].valid_until);
+        let updated_lease_0 = timeline
+            .renew_lsn_lease(Lsn(leased_lsns[0]), Duration::from_secs(0), &ctx)
+            .expect("lease renewal should succeed");
+        assert_eq!(
+            updated_lease_0.valid_until, leases[0].valid_until,
+            " Renewing with shorter lease should not change the lease."
+        );
 
-        // Renewing with a long lease should renew lease with later expiration time.
-        let updated_lease_1 = timeline.make_lsn_lease(
-            Lsn(leased_lsns[1]),
-            timeline.get_lsn_lease_length() * 2,
-            &ctx,
-        )?;
-
-        assert!(updated_lease_1.valid_until > leases[1].valid_until);
+        let updated_lease_1 = timeline
+            .renew_lsn_lease(
+                Lsn(leased_lsns[1]),
+                timeline.get_lsn_lease_length() * 2,
+                &ctx,
+            )
+            .expect("lease renewal should succeed");
+        assert!(
+            updated_lease_1.valid_until > leases[1].valid_until,
+            "Renewing with a long lease should renew lease with later expiration time."
+        );
 
         // Force set disk consistent lsn so we can get the cutoff at `end_lsn`.
         info!(
@@ -7301,7 +7364,8 @@ mod tests {
                 &CancellationToken::new(),
                 &ctx,
             )
-            .await?;
+            .await
+            .unwrap();
 
         // Keeping everything <= Lsn(0x80) b/c leases:
         // 0/10: initdb layer
@@ -7315,13 +7379,16 @@ mod tests {
         // Make lease on a already GC-ed LSN.
         // 0/80 does not have a valid lease + is below latest_gc_cutoff
         assert!(Lsn(0x80) < *timeline.get_latest_gc_cutoff_lsn());
-        let res = timeline.make_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx);
-        assert!(res.is_err());
+        timeline
+            .init_lsn_lease(Lsn(0x80), timeline.get_lsn_lease_length(), &ctx)
+            .expect_err("lease request on GC-ed LSN should fail");
 
         // Should still be able to renew a currently valid lease
         // Assumption: original lease to is still valid for 0/50.
-        let _ =
-            timeline.make_lsn_lease(Lsn(leased_lsns[1]), timeline.get_lsn_lease_length(), &ctx)?;
+        // (use `Timeline::init_lsn_lease` for testing so it always does validation)
+        timeline
+            .init_lsn_lease(Lsn(leased_lsns[1]), timeline.get_lsn_lease_length(), &ctx)
+            .expect("lease renewal with validation should succeed");
 
         Ok(())
     }
diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs
index 8eaa8a001c..1e8fa8d1d6 100644
--- a/pageserver/src/tenant/checks.rs
+++ b/pageserver/src/tenant/checks.rs
@@ -5,6 +5,7 @@ use itertools::Itertools;
 use super::storage_layer::LayerName;
 
 /// Checks whether a layer map is valid (i.e., is a valid result of the current compaction algorithm if nothing goes wrong).
+///
 /// The function checks if we can split the LSN range of a delta layer only at the LSNs of the delta layers. For example,
 ///
 /// ```plain
diff --git a/pageserver/src/tenant/gc_block.rs b/pageserver/src/tenant/gc_block.rs
index 1271d25b76..f7a7836a12 100644
--- a/pageserver/src/tenant/gc_block.rs
+++ b/pageserver/src/tenant/gc_block.rs
@@ -1,29 +1,12 @@
-use std::{collections::HashMap, time::Duration};
+use std::collections::HashMap;
 
-use super::remote_timeline_client::index::GcBlockingReason;
-use tokio::time::Instant;
 use utils::id::TimelineId;
 
-type TimelinesBlocked = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
+use super::remote_timeline_client::index::GcBlockingReason;
 
-#[derive(Default)]
-struct Storage {
-    timelines_blocked: TimelinesBlocked,
-    /// The deadline before which we are blocked from GC so that
-    /// leases have a chance to be renewed.
-    lsn_lease_deadline: Option<Instant>,
-}
+type Storage = HashMap<TimelineId, enumset::EnumSet<GcBlockingReason>>;
 
-impl Storage {
-    fn is_blocked_by_lsn_lease_deadline(&self) -> bool {
-        self.lsn_lease_deadline
-            .map(|d| Instant::now() < d)
-            .unwrap_or(false)
-    }
-}
-
-/// GcBlock provides persistent (per-timeline) gc blocking and facilitates transient time based gc
-/// blocking.
+/// GcBlock provides persistent (per-timeline) gc blocking.
 #[derive(Default)]
 pub(crate) struct GcBlock {
     /// The timelines which have current reasons to block gc.
@@ -66,17 +49,6 @@ impl GcBlock {
         }
     }
 
-    /// Sets a deadline before which we cannot proceed to GC due to lsn lease.
-    ///
-    /// We do this as the leases mapping are not persisted to disk. By delaying GC by lease
-    /// length, we guarantee that all the leases we granted before will have a chance to renew
-    /// when we run GC for the first time after restart / transition from AttachedMulti to AttachedSingle.
-    pub(super) fn set_lsn_lease_deadline(&self, lsn_lease_length: Duration) {
-        let deadline = Instant::now() + lsn_lease_length;
-        let mut g = self.reasons.lock().unwrap();
-        g.lsn_lease_deadline = Some(deadline);
-    }
-
     /// Describe the current gc blocking reasons.
     ///
     /// TODO: make this json serializable.
@@ -102,7 +74,7 @@ impl GcBlock {
     ) -> anyhow::Result<bool> {
         let (added, uploaded) = {
             let mut g = self.reasons.lock().unwrap();
-            let set = g.timelines_blocked.entry(timeline.timeline_id).or_default();
+            let set = g.entry(timeline.timeline_id).or_default();
             let added = set.insert(reason);
 
             // LOCK ORDER: intentionally hold the lock, see self.reasons.
@@ -133,7 +105,7 @@ impl GcBlock {
 
         let (remaining_blocks, uploaded) = {
             let mut g = self.reasons.lock().unwrap();
-            match g.timelines_blocked.entry(timeline.timeline_id) {
+            match g.entry(timeline.timeline_id) {
                 Entry::Occupied(mut oe) => {
                     let set = oe.get_mut();
                     set.remove(reason);
@@ -147,7 +119,7 @@ impl GcBlock {
                 }
             }
 
-            let remaining_blocks = g.timelines_blocked.len();
+            let remaining_blocks = g.len();
 
             // LOCK ORDER: intentionally hold the lock while scheduling; see self.reasons
             let uploaded = timeline
@@ -172,11 +144,11 @@ impl GcBlock {
     pub(crate) fn before_delete(&self, timeline: &super::Timeline) {
         let unblocked = {
             let mut g = self.reasons.lock().unwrap();
-            if g.timelines_blocked.is_empty() {
+            if g.is_empty() {
                 return;
             }
 
-            g.timelines_blocked.remove(&timeline.timeline_id);
+            g.remove(&timeline.timeline_id);
 
             BlockingReasons::clean_and_summarize(g).is_none()
         };
@@ -187,11 +159,10 @@ impl GcBlock {
     }
 
     /// Initialize with the non-deleted timelines of this tenant.
-    pub(crate) fn set_scanned(&self, scanned: TimelinesBlocked) {
+    pub(crate) fn set_scanned(&self, scanned: Storage) {
         let mut g = self.reasons.lock().unwrap();
-        assert!(g.timelines_blocked.is_empty());
-        g.timelines_blocked
-            .extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
+        assert!(g.is_empty());
+        g.extend(scanned.into_iter().filter(|(_, v)| !v.is_empty()));
 
         if let Some(reasons) = BlockingReasons::clean_and_summarize(g) {
             tracing::info!(summary=?reasons, "initialized with gc blocked");
@@ -205,7 +176,6 @@ pub(super) struct Guard<'a> {
 
 #[derive(Debug)]
 pub(crate) struct BlockingReasons {
-    tenant_blocked_by_lsn_lease_deadline: bool,
     timelines: usize,
     reasons: enumset::EnumSet<GcBlockingReason>,
 }
@@ -214,8 +184,8 @@ impl std::fmt::Display for BlockingReasons {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(
             f,
-            "tenant_blocked_by_lsn_lease_deadline: {}, {} timelines block for {:?}",
-            self.tenant_blocked_by_lsn_lease_deadline, self.timelines, self.reasons
+            "{} timelines block for {:?}",
+            self.timelines, self.reasons
         )
     }
 }
@@ -223,15 +193,13 @@ impl std::fmt::Display for BlockingReasons {
 impl BlockingReasons {
     fn clean_and_summarize(mut g: std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
         let mut reasons = enumset::EnumSet::empty();
-        g.timelines_blocked.retain(|_key, value| {
+        g.retain(|_key, value| {
             reasons = reasons.union(*value);
             !value.is_empty()
         });
-        let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
-        if !g.timelines_blocked.is_empty() || blocked_by_lsn_lease_deadline {
+        if !g.is_empty() {
             Some(BlockingReasons {
-                tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
-                timelines: g.timelines_blocked.len(),
+                timelines: g.len(),
                 reasons,
             })
         } else {
@@ -240,17 +208,14 @@ impl BlockingReasons {
     }
 
     fn summarize(g: &std::sync::MutexGuard<'_, Storage>) -> Option<Self> {
-        let blocked_by_lsn_lease_deadline = g.is_blocked_by_lsn_lease_deadline();
-        if g.timelines_blocked.is_empty() && !blocked_by_lsn_lease_deadline {
+        if g.is_empty() {
             None
         } else {
             let reasons = g
-                .timelines_blocked
                 .values()
                 .fold(enumset::EnumSet::empty(), |acc, next| acc.union(*next));
             Some(BlockingReasons {
-                tenant_blocked_by_lsn_lease_deadline: blocked_by_lsn_lease_deadline,
-                timelines: g.timelines_blocked.len(),
+                timelines: g.len(),
                 reasons,
             })
         }
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 1e7c1e10a5..c7212e89ba 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -30,8 +30,8 @@ use utils::{backoff, completion, crashsafe};
 
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::control_plane_client::{
-    ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
+use crate::controller_upcall_client::{
+    ControlPlaneGenerationsApi, ControllerUpcallClient, RetryForeverError,
 };
 use crate::deletion_queue::DeletionQueueClient;
 use crate::http::routes::ACTIVE_TENANT_TIMEOUT;
@@ -122,7 +122,7 @@ pub(crate) enum ShardSelector {
     Known(ShardIndex),
 }
 
-/// A convenience for use with the re_attach ControlPlaneClient function: rather
+/// A convenience for use with the re_attach ControllerUpcallClient function: rather
 /// than the serializable struct, we build this enum that encapsulates
 /// the invariant that attached tenants always have generations.
 ///
@@ -219,7 +219,11 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<U
         + TEMP_FILE_SUFFIX;
     let tmp_path = path_with_suffix_extension(&path, &rand_suffix);
     fs::rename(path.as_ref(), &tmp_path).await?;
-    fs::File::open(parent).await?.sync_all().await?;
+    fs::File::open(parent)
+        .await?
+        .sync_all()
+        .await
+        .maybe_fatal_err("safe_rename_tenant_dir")?;
     Ok(tmp_path)
 }
 
@@ -341,7 +345,7 @@ async fn init_load_generations(
             "Emergency mode!  Tenants will be attached unsafely using their last known generation"
         );
         emergency_generations(tenant_confs)
-    } else if let Some(client) = ControlPlaneClient::new(conf, cancel) {
+    } else if let Some(client) = ControllerUpcallClient::new(conf, cancel) {
         info!("Calling control plane API to re-attach tenants");
         // If we are configured to use the control plane API, then it is the source of truth for what tenants to load.
         match client.re_attach(conf).await {
@@ -949,12 +953,6 @@ impl TenantManager {
                 (LocationMode::Attached(attach_conf), Some(TenantSlot::Attached(tenant))) => {
                     match attach_conf.generation.cmp(&tenant.generation) {
                         Ordering::Equal => {
-                            if attach_conf.attach_mode == AttachmentMode::Single {
-                                tenant
-                                    .gc_block
-                                    .set_lsn_lease_deadline(tenant.get_lsn_lease_length());
-                            }
-
                             // A transition from Attached to Attached in the same generation, we may
                             // take our fast path and just provide the updated configuration
                             // to the tenant.
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index 9fbe2f0da5..97506b7e9a 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -178,6 +178,7 @@ async fn download_object<'a>(
                 destination_file
                     .flush()
                     .await
+                    .maybe_fatal_err("download_object sync_all")
                     .with_context(|| format!("flush source file at {dst_path}"))
                     .map_err(DownloadError::Other)?;
 
@@ -185,6 +186,7 @@ async fn download_object<'a>(
                 destination_file
                     .sync_all()
                     .await
+                    .maybe_fatal_err("download_object sync_all")
                     .with_context(|| format!("failed to fsync source file at {dst_path}"))
                     .map_err(DownloadError::Other)?;
 
@@ -232,6 +234,7 @@ async fn download_object<'a>(
                 destination_file
                     .sync_all()
                     .await
+                    .maybe_fatal_err("download_object sync_all")
                     .with_context(|| format!("failed to fsync source file at {dst_path}"))
                     .map_err(DownloadError::Other)?;
 
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index 2b212cfed5..6f9eda85f5 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -40,11 +40,11 @@ use crate::tenant::storage_layer::layer::S3_UPLOAD_LIMIT;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::vectored_blob_io::{
     BlobFlag, BufView, StreamingVectoredReadPlanner, VectoredBlobReader, VectoredRead,
-    VectoredReadCoalesceMode, VectoredReadPlanner,
+    VectoredReadPlanner,
 };
 use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt};
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{anyhow, bail, ensure, Context, Result};
@@ -589,7 +589,9 @@ impl DeltaLayerWriterInner {
         );
 
         // fsync the file
-        file.sync_all().await?;
+        file.sync_all()
+            .await
+            .maybe_fatal_err("delta_layer sync_all")?;
 
         trace!("created delta layer {}", self.path);
 
@@ -1133,7 +1135,7 @@ impl DeltaLayerInner {
         ctx: &RequestContext,
     ) -> anyhow::Result<usize> {
         use crate::tenant::vectored_blob_io::{
-            BlobMeta, VectoredReadBuilder, VectoredReadExtended,
+            BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended,
         };
         use futures::stream::TryStreamExt;
 
@@ -1183,8 +1185,8 @@ impl DeltaLayerInner {
 
         let mut prev: Option<(Key, Lsn, BlobRef)> = None;
 
-        let mut read_builder: Option<VectoredReadBuilder> = None;
-        let read_mode = VectoredReadCoalesceMode::get();
+        let mut read_builder: Option<ChunkedVectoredReadBuilder> = None;
+        let align = virtual_file::get_io_buffer_alignment();
 
         let max_read_size = self
             .max_vectored_read_bytes
@@ -1228,12 +1230,12 @@ impl DeltaLayerInner {
                 {
                     None
                 } else {
-                    read_builder.replace(VectoredReadBuilder::new(
+                    read_builder.replace(ChunkedVectoredReadBuilder::new(
                         offsets.start.pos(),
                         offsets.end.pos(),
                         meta,
                         max_read_size,
-                        read_mode,
+                        align,
                     ))
                 }
             } else {
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index 940d169db0..3dcd7bc962 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -41,7 +41,7 @@ use crate::tenant::vectored_blob_io::{
 };
 use crate::tenant::PageReconstructError;
 use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt;
-use crate::virtual_file::{self, VirtualFile};
+use crate::virtual_file::{self, MaybeFatalIo, VirtualFile};
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
 use anyhow::{anyhow, bail, ensure, Context, Result};
 use bytes::{Bytes, BytesMut};
@@ -889,7 +889,9 @@ impl ImageLayerWriterInner {
         // set inner.file here. The first read will have to re-open it.
 
         // fsync the file
-        file.sync_all().await?;
+        file.sync_all()
+            .await
+            .maybe_fatal_err("image_layer sync_all")?;
 
         trace!("created image layer {}", self.path);
 
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index 3f0f8a21c8..547739e773 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -330,7 +330,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
             RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
 
         let mut first = true;
-        tenant.gc_block.set_lsn_lease_deadline(tenant.get_lsn_lease_length());
         loop {
             tokio::select! {
                 _ = cancel.cancelled() => {
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index d301ba23ea..2113a1d726 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -66,6 +66,7 @@ use std::{
 use crate::{
     aux_file::AuxFileSizeEstimator,
     tenant::{
+        config::AttachmentMode,
         layer_map::{LayerMap, SearchResult},
         metadata::TimelineMetadata,
         storage_layer::{inmemory_layer::IndexEntry, PersistentLayerDesc},
@@ -1324,16 +1325,38 @@ impl Timeline {
         Ok(())
     }
 
-    /// Obtains a temporary lease blocking garbage collection for the given LSN.
-    ///
-    /// This function will error if the requesting LSN is less than the `latest_gc_cutoff_lsn` and there is also
-    /// no existing lease to renew. If there is an existing lease in the map, the lease will be renewed only if
-    /// the request extends the lease. The returned lease is therefore the maximum between the existing lease and
-    /// the requesting lease.
-    pub(crate) fn make_lsn_lease(
+    /// Initializes an LSN lease. The function will return an error if the requested LSN is less than the `latest_gc_cutoff_lsn`.
+    pub(crate) fn init_lsn_lease(
         &self,
         lsn: Lsn,
         length: Duration,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<LsnLease> {
+        self.make_lsn_lease(lsn, length, true, ctx)
+    }
+
+    /// Renews a lease at a particular LSN. The requested LSN is not validated against the `latest_gc_cutoff_lsn` when we are in the grace period.
+    pub(crate) fn renew_lsn_lease(
+        &self,
+        lsn: Lsn,
+        length: Duration,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<LsnLease> {
+        self.make_lsn_lease(lsn, length, false, ctx)
+    }
+
+    /// Obtains a temporary lease blocking garbage collection for the given LSN.
+    ///
+    /// If we are in `AttachedSingle` mode and is not blocked by the lsn lease deadline, this function will error
+    /// if the requesting LSN is less than the `latest_gc_cutoff_lsn` and there is no existing request present.
+    ///
+    /// If there is an existing lease in the map, the lease will be renewed only if the request extends the lease.
+    /// The returned lease is therefore the maximum between the existing lease and the requesting lease.
+    fn make_lsn_lease(
+        &self,
+        lsn: Lsn,
+        length: Duration,
+        init: bool,
         _ctx: &RequestContext,
     ) -> anyhow::Result<LsnLease> {
         let lease = {
@@ -1347,8 +1370,8 @@ impl Timeline {
 
             let entry = gc_info.leases.entry(lsn);
 
-            let lease = {
-                if let Entry::Occupied(mut occupied) = entry {
+            match entry {
+                Entry::Occupied(mut occupied) => {
                     let existing_lease = occupied.get_mut();
                     if valid_until > existing_lease.valid_until {
                         existing_lease.valid_until = valid_until;
@@ -1360,20 +1383,28 @@ impl Timeline {
                     }
 
                     existing_lease.clone()
-                } else {
-                    // Reject already GC-ed LSN (lsn < latest_gc_cutoff)
-                    let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
-                    if lsn < *latest_gc_cutoff_lsn {
-                        bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
+                }
+                Entry::Vacant(vacant) => {
+                    // Reject already GC-ed LSN (lsn < latest_gc_cutoff) if we are in AttachedSingle and
+                    // not blocked by the lsn lease deadline.
+                    let validate = {
+                        let conf = self.tenant_conf.load();
+                        conf.location.attach_mode == AttachmentMode::Single
+                            && !conf.is_gc_blocked_by_lsn_lease_deadline()
+                    };
+
+                    if init || validate {
+                        let latest_gc_cutoff_lsn = self.get_latest_gc_cutoff_lsn();
+                        if lsn < *latest_gc_cutoff_lsn {
+                            bail!("tried to request a page version that was garbage collected. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn);
+                        }
                     }
 
                     let dt: DateTime<Utc> = valid_until.into();
                     info!("lease created, valid until {}", dt);
-                    entry.or_insert(LsnLease { valid_until }).clone()
+                    vacant.insert(LsnLease { valid_until }).clone()
                 }
-            };
-
-            lease
+            }
         };
 
         Ok(lease)
@@ -1950,8 +1981,6 @@ impl Timeline {
             .unwrap_or(self.conf.default_tenant_conf.lsn_lease_length)
     }
 
-    // TODO(yuchen): remove unused flag after implementing https://github.com/neondatabase/neon/issues/8072
-    #[allow(unused)]
     pub(crate) fn get_lsn_lease_length_for_ts(&self) -> Duration {
         let tenant_conf = self.tenant_conf.load();
         tenant_conf
@@ -3601,7 +3630,7 @@ impl Timeline {
                     ctx,
                 )
                 .await
-                .map_err(|e| FlushLayerError::from_anyhow(self, e))?;
+                .map_err(|e| FlushLayerError::from_anyhow(self, e.into()))?;
 
             if self.cancel.is_cancelled() {
                 return Err(FlushLayerError::Cancelled);
@@ -3840,16 +3869,20 @@ impl Timeline {
         partition_size: u64,
         flags: EnumSet<CompactFlags>,
         ctx: &RequestContext,
-    ) -> anyhow::Result<((KeyPartitioning, SparseKeyPartitioning), Lsn)> {
+    ) -> Result<((KeyPartitioning, SparseKeyPartitioning), Lsn), CompactionError> {
         let Ok(mut partitioning_guard) = self.partitioning.try_lock() else {
             // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.
             // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`
             // and hence before the compaction task starts.
-            anyhow::bail!("repartition() called concurrently, this should not happen");
+            return Err(CompactionError::Other(anyhow!(
+                "repartition() called concurrently, this should not happen"
+            )));
         };
         let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard;
         if lsn < *partition_lsn {
-            anyhow::bail!("repartition() called with LSN going backwards, this should not happen");
+            return Err(CompactionError::Other(anyhow!(
+                "repartition() called with LSN going backwards, this should not happen"
+            )));
         }
 
         let distance = lsn.0 - partition_lsn.0;
@@ -4451,6 +4484,12 @@ pub(crate) enum CompactionError {
     Other(anyhow::Error),
 }
 
+impl CompactionError {
+    pub fn is_cancelled(&self) -> bool {
+        matches!(self, CompactionError::ShuttingDown)
+    }
+}
+
 impl From<CollectKeySpaceError> for CompactionError {
     fn from(err: CollectKeySpaceError) -> Self {
         match err {
diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs
index 6edc28a11b..3de386a2d5 100644
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -390,7 +390,7 @@ impl Timeline {
                 // error but continue.
                 //
                 // Suppress error when it's due to cancellation
-                if !self.cancel.is_cancelled() {
+                if !self.cancel.is_cancelled() && !err.is_cancelled() {
                     tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
                 }
                 (1, false)
diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs
index aa37a45898..1faa6bab99 100644
--- a/pageserver/src/tenant/vectored_blob_io.rs
+++ b/pageserver/src/tenant/vectored_blob_io.rs
@@ -185,171 +185,7 @@ pub(crate) enum VectoredReadExtended {
     No,
 }
 
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum VectoredReadCoalesceMode {
-    /// Only coalesce exactly adjacent reads.
-    AdjacentOnly,
-    /// In addition to adjacent reads, also consider reads whose corresponding
-    /// `end` and `start` offsets reside at the same chunk.
-    Chunked(usize),
-}
-
-impl VectoredReadCoalesceMode {
-    /// [`AdjacentVectoredReadBuilder`] is used if alignment requirement is 0,
-    /// whereas [`ChunkedVectoredReadBuilder`] is used for alignment requirement 1 and higher.
-    pub(crate) fn get() -> Self {
-        let align = virtual_file::get_io_buffer_alignment_raw();
-        if align == 0 {
-            VectoredReadCoalesceMode::AdjacentOnly
-        } else {
-            VectoredReadCoalesceMode::Chunked(align)
-        }
-    }
-}
-
-pub(crate) enum VectoredReadBuilder {
-    Adjacent(AdjacentVectoredReadBuilder),
-    Chunked(ChunkedVectoredReadBuilder),
-}
-
-impl VectoredReadBuilder {
-    fn new_impl(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        max_read_size: Option<usize>,
-        mode: VectoredReadCoalesceMode,
-    ) -> Self {
-        match mode {
-            VectoredReadCoalesceMode::AdjacentOnly => Self::Adjacent(
-                AdjacentVectoredReadBuilder::new(start_offset, end_offset, meta, max_read_size),
-            ),
-            VectoredReadCoalesceMode::Chunked(chunk_size) => {
-                Self::Chunked(ChunkedVectoredReadBuilder::new(
-                    start_offset,
-                    end_offset,
-                    meta,
-                    max_read_size,
-                    chunk_size,
-                ))
-            }
-        }
-    }
-
-    pub(crate) fn new(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        max_read_size: usize,
-        mode: VectoredReadCoalesceMode,
-    ) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), mode)
-    }
-
-    pub(crate) fn new_streaming(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        mode: VectoredReadCoalesceMode,
-    ) -> Self {
-        Self::new_impl(start_offset, end_offset, meta, None, mode)
-    }
-
-    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
-        match self {
-            VectoredReadBuilder::Adjacent(builder) => builder.extend(start, end, meta),
-            VectoredReadBuilder::Chunked(builder) => builder.extend(start, end, meta),
-        }
-    }
-
-    pub(crate) fn build(self) -> VectoredRead {
-        match self {
-            VectoredReadBuilder::Adjacent(builder) => builder.build(),
-            VectoredReadBuilder::Chunked(builder) => builder.build(),
-        }
-    }
-
-    pub(crate) fn size(&self) -> usize {
-        match self {
-            VectoredReadBuilder::Adjacent(builder) => builder.size(),
-            VectoredReadBuilder::Chunked(builder) => builder.size(),
-        }
-    }
-}
-
-pub(crate) struct AdjacentVectoredReadBuilder {
-    /// Start offset of the read.
-    start: u64,
-    // End offset of the read.
-    end: u64,
-    /// Start offset and metadata for each blob in this read
-    blobs_at: VecMap<u64, BlobMeta>,
-    max_read_size: Option<usize>,
-}
-
-impl AdjacentVectoredReadBuilder {
-    /// Start building a new vectored read.
-    ///
-    /// Note that by design, this does not check against reading more than `max_read_size` to
-    /// support reading larger blobs than the configuration value. The builder will be single use
-    /// however after that.
-    pub(crate) fn new(
-        start_offset: u64,
-        end_offset: u64,
-        meta: BlobMeta,
-        max_read_size: Option<usize>,
-    ) -> Self {
-        let mut blobs_at = VecMap::default();
-        blobs_at
-            .append(start_offset, meta)
-            .expect("First insertion always succeeds");
-
-        Self {
-            start: start_offset,
-            end: end_offset,
-            blobs_at,
-            max_read_size,
-        }
-    }
-    /// Attempt to extend the current read with a new blob if the start
-    /// offset matches with the current end of the vectored read
-    /// and the resuting size is below the max read size
-    pub(crate) fn extend(&mut self, start: u64, end: u64, meta: BlobMeta) -> VectoredReadExtended {
-        tracing::trace!(start, end, "trying to extend");
-        let size = (end - start) as usize;
-        let not_limited_by_max_read_size = {
-            if let Some(max_read_size) = self.max_read_size {
-                self.size() + size <= max_read_size
-            } else {
-                true
-            }
-        };
-
-        if self.end == start && not_limited_by_max_read_size {
-            self.end = end;
-            self.blobs_at
-                .append(start, meta)
-                .expect("LSNs are ordered within vectored reads");
-
-            return VectoredReadExtended::Yes;
-        }
-
-        VectoredReadExtended::No
-    }
-
-    pub(crate) fn size(&self) -> usize {
-        (self.end - self.start) as usize
-    }
-
-    pub(crate) fn build(self) -> VectoredRead {
-        VectoredRead {
-            start: self.start,
-            end: self.end,
-            blobs_at: self.blobs_at,
-        }
-    }
-}
-
+/// A vectored read builder that tries to coalesce all reads that fits in a chunk.
 pub(crate) struct ChunkedVectoredReadBuilder {
     /// Start block number
     start_blk_no: usize,
@@ -373,7 +209,7 @@ impl ChunkedVectoredReadBuilder {
     /// Note that by design, this does not check against reading more than `max_read_size` to
     /// support reading larger blobs than the configuration value. The builder will be single use
     /// however after that.
-    pub(crate) fn new(
+    fn new_impl(
         start_offset: u64,
         end_offset: u64,
         meta: BlobMeta,
@@ -396,6 +232,25 @@ impl ChunkedVectoredReadBuilder {
         }
     }
 
+    pub(crate) fn new(
+        start_offset: u64,
+        end_offset: u64,
+        meta: BlobMeta,
+        max_read_size: usize,
+        align: usize,
+    ) -> Self {
+        Self::new_impl(start_offset, end_offset, meta, Some(max_read_size), align)
+    }
+
+    pub(crate) fn new_streaming(
+        start_offset: u64,
+        end_offset: u64,
+        meta: BlobMeta,
+        align: usize,
+    ) -> Self {
+        Self::new_impl(start_offset, end_offset, meta, None, align)
+    }
+
     /// Attempts to extend the current read with a new blob if the new blob resides in the same or the immediate next chunk.
     ///
     /// The resulting size also must be below the max read size.
@@ -474,17 +329,17 @@ pub struct VectoredReadPlanner {
 
     max_read_size: usize,
 
-    mode: VectoredReadCoalesceMode,
+    align: usize,
 }
 
 impl VectoredReadPlanner {
     pub fn new(max_read_size: usize) -> Self {
-        let mode = VectoredReadCoalesceMode::get();
+        let align = virtual_file::get_io_buffer_alignment();
         Self {
             blobs: BTreeMap::new(),
             prev: None,
             max_read_size,
-            mode,
+            align,
         }
     }
 
@@ -545,7 +400,7 @@ impl VectoredReadPlanner {
     }
 
     pub fn finish(self) -> Vec<VectoredRead> {
-        let mut current_read_builder: Option<VectoredReadBuilder> = None;
+        let mut current_read_builder: Option<ChunkedVectoredReadBuilder> = None;
         let mut reads = Vec::new();
 
         for (key, blobs_for_key) in self.blobs {
@@ -558,12 +413,12 @@ impl VectoredReadPlanner {
                 };
 
                 if extended == VectoredReadExtended::No {
-                    let next_read_builder = VectoredReadBuilder::new(
+                    let next_read_builder = ChunkedVectoredReadBuilder::new(
                         start_offset,
                         end_offset,
                         BlobMeta { key, lsn },
                         self.max_read_size,
-                        self.mode,
+                        self.align,
                     );
 
                     let prev_read_builder = current_read_builder.replace(next_read_builder);
@@ -688,7 +543,7 @@ impl<'a> VectoredBlobReader<'a> {
 /// `handle` gets called and when the current key would just exceed the read_size and
 /// max_cnt constraints.
 pub struct StreamingVectoredReadPlanner {
-    read_builder: Option<VectoredReadBuilder>,
+    read_builder: Option<ChunkedVectoredReadBuilder>,
     // Arguments for previous blob passed into [`StreamingVectoredReadPlanner::handle`]
     prev: Option<(Key, Lsn, u64)>,
     /// Max read size per batch. This is not a strict limit. If there are [0, 100) and [100, 200), while the `max_read_size` is 150,
@@ -699,21 +554,21 @@ pub struct StreamingVectoredReadPlanner {
     /// Size of the current batch
     cnt: usize,
 
-    mode: VectoredReadCoalesceMode,
+    align: usize,
 }
 
 impl StreamingVectoredReadPlanner {
     pub fn new(max_read_size: u64, max_cnt: usize) -> Self {
         assert!(max_cnt > 0);
         assert!(max_read_size > 0);
-        let mode = VectoredReadCoalesceMode::get();
+        let align = virtual_file::get_io_buffer_alignment();
         Self {
             read_builder: None,
             prev: None,
             max_cnt,
             max_read_size,
             cnt: 0,
-            mode,
+            align,
         }
     }
 
@@ -762,11 +617,11 @@ impl StreamingVectoredReadPlanner {
             }
             None => {
                 self.read_builder = {
-                    Some(VectoredReadBuilder::new_streaming(
+                    Some(ChunkedVectoredReadBuilder::new_streaming(
                         start_offset,
                         end_offset,
                         BlobMeta { key, lsn },
-                        self.mode,
+                        self.align,
                     ))
                 };
             }
@@ -1092,7 +947,7 @@ mod tests {
         let reserved_bytes = blobs.iter().map(|bl| bl.len()).max().unwrap() * 2 + 16;
         let mut buf = BytesMut::with_capacity(reserved_bytes);
 
-        let mode = VectoredReadCoalesceMode::get();
+        let align = virtual_file::get_io_buffer_alignment();
         let vectored_blob_reader = VectoredBlobReader::new(&file);
         let meta = BlobMeta {
             key: Key::MIN,
@@ -1104,7 +959,8 @@ mod tests {
             if idx + 1 == offsets.len() {
                 continue;
             }
-            let read_builder = VectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, mode);
+            let read_builder =
+                ChunkedVectoredReadBuilder::new(*offset, *end, meta, 16 * 4096, align);
             let read = read_builder.build();
             let result = vectored_blob_reader.read_blobs(&read, buf, &ctx).await?;
             assert_eq!(result.blobs.len(), 1);
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index 57856eea80..5b7b279888 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -466,6 +466,7 @@ impl VirtualFile {
                 &[]
             };
             utils::crashsafe::overwrite(&final_path, &tmp_path, content)
+                .maybe_fatal_err("crashsafe_overwrite")
         })
         .await
         .expect("blocking task is never aborted")
@@ -475,7 +476,7 @@ impl VirtualFile {
     pub async fn sync_all(&self) -> Result<(), Error> {
         with_file!(self, StorageIoOperation::Fsync, |file_guard| {
             let (_file_guard, res) = io_engine::get().sync_all(file_guard).await;
-            res
+            res.maybe_fatal_err("sync_all")
         })
     }
 
@@ -483,7 +484,7 @@ impl VirtualFile {
     pub async fn sync_data(&self) -> Result<(), Error> {
         with_file!(self, StorageIoOperation::Fsync, |file_guard| {
             let (_file_guard, res) = io_engine::get().sync_data(file_guard).await;
-            res
+            res.maybe_fatal_err("sync_data")
         })
     }
 
@@ -1147,7 +1148,9 @@ pub fn init(num_slots: usize, engine: IoEngineKind, io_buffer_alignment: usize)
         panic!("virtual_file::init called twice");
     }
     if set_io_buffer_alignment(io_buffer_alignment).is_err() {
-        panic!("IO buffer alignment ({io_buffer_alignment}) is not a power of two");
+        panic!(
+            "IO buffer alignment needs to be a power of two and greater than 512, got {io_buffer_alignment}"
+        );
     }
     io_engine::init(engine);
     crate::metrics::virtual_file_descriptor_cache::SIZE_MAX.set(num_slots as u64);
@@ -1174,14 +1177,16 @@ fn get_open_files() -> &'static OpenFiles {
 
 static IO_BUFFER_ALIGNMENT: AtomicUsize = AtomicUsize::new(DEFAULT_IO_BUFFER_ALIGNMENT);
 
-/// Returns true if `x` is zero or a power of two.
-fn is_zero_or_power_of_two(x: usize) -> bool {
-    (x == 0) || ((x & (x - 1)) == 0)
+/// Returns true if the alignment is a power of two and is greater or equal to 512.
+fn is_valid_io_buffer_alignment(align: usize) -> bool {
+    align.is_power_of_two() && align >= 512
 }
 
+/// Sets IO buffer alignment requirement. Returns error if the alignment requirement is
+/// not a power of two or less than 512 bytes.
 #[allow(unused)]
 pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
-    if is_zero_or_power_of_two(align) {
+    if is_valid_io_buffer_alignment(align) {
         IO_BUFFER_ALIGNMENT.store(align, std::sync::atomic::Ordering::Relaxed);
         Ok(())
     } else {
@@ -1189,19 +1194,19 @@ pub(crate) fn set_io_buffer_alignment(align: usize) -> Result<(), usize> {
     }
 }
 
-/// Gets the io buffer alignment requirement. Returns 0 if there is no requirement specified.
+/// Gets the io buffer alignment.
 ///
-/// This function should be used to check the raw config value.
-pub(crate) fn get_io_buffer_alignment_raw() -> usize {
+/// This function should be used for getting the actual alignment value to use.
+pub(crate) fn get_io_buffer_alignment() -> usize {
     let align = IO_BUFFER_ALIGNMENT.load(std::sync::atomic::Ordering::Relaxed);
 
     if cfg!(test) {
         let env_var_name = "NEON_PAGESERVER_UNIT_TEST_IO_BUFFER_ALIGNMENT";
         if let Some(test_align) = utils::env::var(env_var_name) {
-            if is_zero_or_power_of_two(test_align) {
+            if is_valid_io_buffer_alignment(test_align) {
                 test_align
             } else {
-                panic!("IO buffer alignment ({test_align}) is not a power of two");
+                panic!("IO buffer alignment needs to be a power of two and greater than 512, got {test_align}");
             }
         } else {
             align
@@ -1211,14 +1216,6 @@ pub(crate) fn get_io_buffer_alignment_raw() -> usize {
     }
 }
 
-/// Gets the io buffer alignment requirement. Returns 1 if the alignment config is set to zero.
-///
-/// This function should be used for getting the actual alignment value to use.
-pub(crate) fn get_io_buffer_alignment() -> usize {
-    let align = get_io_buffer_alignment_raw();
-    align.max(1)
-}
-
 #[cfg(test)]
 mod tests {
     use crate::context::DownloadBehavior;
diff --git a/pgxn/neon/Makefile b/pgxn/neon/Makefile
index ddc8155ff3..f1229b2d73 100644
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -25,7 +25,18 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl
 
 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
+DATA = \
+	neon--1.0.sql \
+	neon--1.0--1.1.sql \
+	neon--1.1--1.2.sql \
+	neon--1.2--1.3.sql \
+	neon--1.3--1.4.sql \
+	neon--1.4--1.5.sql \
+	neon--1.5--1.4.sql \
+	neon--1.4--1.3.sql \
+	neon--1.3--1.2.sql \
+	neon--1.2--1.1.sql \
+	neon--1.1--1.0.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"
 
 EXTRA_CLEAN = \
diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c
index 4d0d06e6de..bb65a11c7d 100644
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -1473,11 +1473,33 @@ walprop_pg_wal_read(Safekeeper *sk, char *buf, XLogRecPtr startptr, Size count,
 {
 	NeonWALReadResult res;
 
-	res = NeonWALRead(sk->xlogreader,
-					  buf,
-					  startptr,
-					  count,
-					  walprop_pg_get_timeline_id());
+#if PG_MAJORVERSION_NUM >= 17
+	if (!sk->wp->config->syncSafekeepers)
+	{
+		Size	rbytes;
+		rbytes = WALReadFromBuffers(buf, startptr, count,
+									walprop_pg_get_timeline_id());
+
+		startptr += rbytes;
+		count -= rbytes;
+	}
+#endif
+
+	if (count == 0)
+	{
+		res = NEON_WALREAD_SUCCESS;
+	}
+	else
+	{
+		Assert(count > 0);
+
+		/* Now read the remaining WAL from the WAL file */
+		res = NeonWALRead(sk->xlogreader,
+						  buf,
+						  startptr,
+						  count,
+						  walprop_pg_get_timeline_id());
+	}
 
 	if (res == NEON_WALREAD_SUCCESS)
 	{
diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml
index 501ce050e0..04e0f9d4f5 100644
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -24,6 +24,7 @@ bytes = { workspace = true, features = ["serde"] }
 camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
+compute_api.workspace = true
 consumption_metrics.workspace = true
 dashmap.workspace = true
 env_logger.workspace = true
diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs
index 4e9f4591ad..5dbfa5cc09 100644
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -80,6 +80,14 @@ pub(crate) trait TestBackend: Send + Sync + 'static {
     fn get_allowed_ips_and_secret(
         &self,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), console::errors::GetAuthInfoError>;
+    fn dyn_clone(&self) -> Box<dyn TestBackend>;
+}
+
+#[cfg(test)]
+impl Clone for Box<dyn TestBackend> {
+    fn clone(&self) -> Self {
+        TestBackend::dyn_clone(&**self)
+    }
 }
 
 impl std::fmt::Display for Backend<'_, (), ()> {
@@ -585,6 +593,14 @@ mod tests {
             ))
         }
 
+        async fn get_endpoint_jwks(
+            &self,
+            _ctx: &RequestMonitoring,
+            _endpoint: crate::EndpointId,
+        ) -> anyhow::Result<Vec<super::jwt::AuthRule>> {
+            unimplemented!()
+        }
+
         async fn wake_compute(
             &self,
             _ctx: &RequestMonitoring,
diff --git a/proxy/src/auth/backend/jwt.rs b/proxy/src/auth/backend/jwt.rs
index 94e5999a5f..ab848551a9 100644
--- a/proxy/src/auth/backend/jwt.rs
+++ b/proxy/src/auth/backend/jwt.rs
@@ -12,7 +12,10 @@ use serde::{Deserialize, Deserializer};
 use signature::Verifier;
 use tokio::time::Instant;
 
-use crate::{context::RequestMonitoring, http::parse_json_body_with_limit, EndpointId, RoleName};
+use crate::{
+    context::RequestMonitoring, http::parse_json_body_with_limit, intern::RoleNameInt, EndpointId,
+    RoleName,
+};
 
 // TODO(conrad): make these configurable.
 const CLOCK_SKEW_LEEWAY: Duration = Duration::from_secs(30);
@@ -27,7 +30,6 @@ pub(crate) trait FetchAuthRules: Clone + Send + Sync + 'static {
         &self,
         ctx: &RequestMonitoring,
         endpoint: EndpointId,
-        role_name: RoleName,
     ) -> impl Future<Output = anyhow::Result<Vec<AuthRule>>> + Send;
 }
 
@@ -35,10 +37,11 @@ pub(crate) struct AuthRule {
     pub(crate) id: String,
     pub(crate) jwks_url: url::Url,
     pub(crate) audience: Option<String>,
+    pub(crate) role_names: Vec<RoleNameInt>,
 }
 
 #[derive(Default)]
-pub(crate) struct JwkCache {
+pub struct JwkCache {
     client: reqwest::Client,
 
     map: DashMap<(EndpointId, RoleName), Arc<JwkCacheEntryLock>>,
@@ -54,18 +57,28 @@ pub(crate) struct JwkCacheEntry {
 }
 
 impl JwkCacheEntry {
-    fn find_jwk_and_audience(&self, key_id: &str) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
-        self.key_sets.values().find_map(|key_set| {
-            key_set
-                .find_key(key_id)
-                .map(|jwk| (jwk, key_set.audience.as_deref()))
-        })
+    fn find_jwk_and_audience(
+        &self,
+        key_id: &str,
+        role_name: &RoleName,
+    ) -> Option<(&jose_jwk::Jwk, Option<&str>)> {
+        self.key_sets
+            .values()
+            // make sure our requested role has access to the key set
+            .filter(|key_set| key_set.role_names.iter().any(|role| **role == **role_name))
+            // try and find the requested key-id in the key set
+            .find_map(|key_set| {
+                key_set
+                    .find_key(key_id)
+                    .map(|jwk| (jwk, key_set.audience.as_deref()))
+            })
     }
 }
 
 struct KeySet {
     jwks: jose_jwk::JwkSet,
     audience: Option<String>,
+    role_names: Vec<RoleNameInt>,
 }
 
 impl KeySet {
@@ -106,7 +119,6 @@ impl JwkCacheEntryLock {
         ctx: &RequestMonitoring,
         client: &reqwest::Client,
         endpoint: EndpointId,
-        role_name: RoleName,
         auth_rules: &F,
     ) -> anyhow::Result<Arc<JwkCacheEntry>> {
         // double check that no one beat us to updating the cache.
@@ -119,11 +131,10 @@ impl JwkCacheEntryLock {
             }
         }
 
-        let rules = auth_rules
-            .fetch_auth_rules(ctx, endpoint, role_name)
-            .await?;
+        let rules = auth_rules.fetch_auth_rules(ctx, endpoint).await?;
         let mut key_sets =
             ahash::HashMap::with_capacity_and_hasher(rules.len(), ahash::RandomState::new());
+
         // TODO(conrad): run concurrently
         // TODO(conrad): strip the JWKs urls (should be checked by cplane as well - cloud#16284)
         for rule in rules {
@@ -151,6 +162,7 @@ impl JwkCacheEntryLock {
                                 KeySet {
                                     jwks,
                                     audience: rule.audience,
+                                    role_names: rule.role_names,
                                 },
                             );
                         }
@@ -173,7 +185,6 @@ impl JwkCacheEntryLock {
         ctx: &RequestMonitoring,
         client: &reqwest::Client,
         endpoint: EndpointId,
-        role_name: RoleName,
         fetch: &F,
     ) -> Result<Arc<JwkCacheEntry>, anyhow::Error> {
         let now = Instant::now();
@@ -183,9 +194,7 @@ impl JwkCacheEntryLock {
         let Some(cached) = guard else {
             let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
             let permit = self.acquire_permit().await;
-            return self
-                .renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
-                .await;
+            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
         };
 
         let last_update = now.duration_since(cached.last_retrieved);
@@ -196,9 +205,7 @@ impl JwkCacheEntryLock {
             let permit = self.acquire_permit().await;
 
             // it's been too long since we checked the keys. wait for them to update.
-            return self
-                .renew_jwks(permit, ctx, client, endpoint, role_name, fetch)
-                .await;
+            return self.renew_jwks(permit, ctx, client, endpoint, fetch).await;
         }
 
         // every 5 minutes we should spawn a job to eagerly update the token.
@@ -212,7 +219,7 @@ impl JwkCacheEntryLock {
                 let ctx = ctx.clone();
                 tokio::spawn(async move {
                     if let Err(e) = entry
-                        .renew_jwks(permit, &ctx, &client, endpoint, role_name, &fetch)
+                        .renew_jwks(permit, &ctx, &client, endpoint, &fetch)
                         .await
                     {
                         tracing::warn!(error=?e, "could not fetch JWKs in background job");
@@ -232,7 +239,7 @@ impl JwkCacheEntryLock {
         jwt: &str,
         client: &reqwest::Client,
         endpoint: EndpointId,
-        role_name: RoleName,
+        role_name: &RoleName,
         fetch: &F,
     ) -> Result<(), anyhow::Error> {
         // JWT compact form is defined to be
@@ -254,30 +261,26 @@ impl JwkCacheEntryLock {
         let sig = base64::decode_config(signature, base64::URL_SAFE_NO_PAD)
             .context("Provided authentication token is not a valid JWT encoding")?;
 
-        ensure!(header.typ == "JWT");
+        ensure!(
+            header.typ == "JWT",
+            "Provided authentication token is not a valid JWT encoding"
+        );
         let kid = header.key_id.context("missing key id")?;
 
         let mut guard = self
-            .get_or_update_jwk_cache(ctx, client, endpoint.clone(), role_name.clone(), fetch)
+            .get_or_update_jwk_cache(ctx, client, endpoint.clone(), fetch)
             .await?;
 
         // get the key from the JWKs if possible. If not, wait for the keys to update.
         let (jwk, expected_audience) = loop {
-            match guard.find_jwk_and_audience(kid) {
+            match guard.find_jwk_and_audience(kid, role_name) {
                 Some(jwk) => break jwk,
                 None if guard.last_retrieved.elapsed() > MIN_RENEW => {
                     let _paused = ctx.latency_timer_pause(crate::metrics::Waiting::Compute);
 
                     let permit = self.acquire_permit().await;
                     guard = self
-                        .renew_jwks(
-                            permit,
-                            ctx,
-                            client,
-                            endpoint.clone(),
-                            role_name.clone(),
-                            fetch,
-                        )
+                        .renew_jwks(permit, ctx, client, endpoint.clone(), fetch)
                         .await?;
                 }
                 _ => {
@@ -320,11 +323,14 @@ impl JwkCacheEntryLock {
         let now = SystemTime::now();
 
         if let Some(exp) = payload.expiration {
-            ensure!(now < exp + CLOCK_SKEW_LEEWAY);
+            ensure!(now < exp + CLOCK_SKEW_LEEWAY, "JWT token has expired");
         }
 
         if let Some(nbf) = payload.not_before {
-            ensure!(nbf < now + CLOCK_SKEW_LEEWAY);
+            ensure!(
+                nbf < now + CLOCK_SKEW_LEEWAY,
+                "JWT token is not yet ready to use"
+            );
         }
 
         Ok(())
@@ -336,7 +342,7 @@ impl JwkCache {
         &self,
         ctx: &RequestMonitoring,
         endpoint: EndpointId,
-        role_name: RoleName,
+        role_name: &RoleName,
         fetch: &F,
         jwt: &str,
     ) -> Result<(), anyhow::Error> {
@@ -572,7 +578,7 @@ mod tests {
         format!("{header}.{body}")
     }
 
-    fn new_ec_jwt(kid: String, key: p256::SecretKey) -> String {
+    fn new_ec_jwt(kid: String, key: &p256::SecretKey) -> String {
         use p256::ecdsa::{Signature, SigningKey};
 
         let payload = build_jwt_payload(kid, jose_jwa::Signing::Es256);
@@ -660,11 +666,6 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
         let (ec1, jwk3) = new_ec_jwk("3".into());
         let (ec2, jwk4) = new_ec_jwk("4".into());
 
-        let jwt1 = new_rsa_jwt("1".into(), rs1);
-        let jwt2 = new_rsa_jwt("2".into(), rs2);
-        let jwt3 = new_ec_jwt("3".into(), ec1);
-        let jwt4 = new_ec_jwt("4".into(), ec2);
-
         let foo_jwks = jose_jwk::JwkSet {
             keys: vec![jwk1, jwk3],
         };
@@ -706,47 +707,98 @@ X0n5X2/pBLJzxZc62ccvZYVnctBiFs6HbSnxpuMQCfkt/BcR/ttIepBQQIW86wHL
         let client = reqwest::Client::new();
 
         #[derive(Clone)]
-        struct Fetch(SocketAddr);
+        struct Fetch(SocketAddr, Vec<RoleNameInt>);
 
         impl FetchAuthRules for Fetch {
             async fn fetch_auth_rules(
                 &self,
                 _ctx: &RequestMonitoring,
                 _endpoint: EndpointId,
-                _role_name: RoleName,
             ) -> anyhow::Result<Vec<AuthRule>> {
                 Ok(vec![
                     AuthRule {
                         id: "foo".to_owned(),
                         jwks_url: format!("http://{}/foo", self.0).parse().unwrap(),
                         audience: None,
+                        role_names: self.1.clone(),
                     },
                     AuthRule {
                         id: "bar".to_owned(),
                         jwks_url: format!("http://{}/bar", self.0).parse().unwrap(),
                         audience: None,
+                        role_names: self.1.clone(),
                     },
                 ])
             }
         }
 
-        let role_name = RoleName::from("user");
+        let role_name1 = RoleName::from("anonymous");
+        let role_name2 = RoleName::from("authenticated");
+
+        let fetch = Fetch(
+            addr,
+            vec![
+                RoleNameInt::from(&role_name1),
+                RoleNameInt::from(&role_name2),
+            ],
+        );
+
         let endpoint = EndpointId::from("ep");
 
         let jwk_cache = Arc::new(JwkCacheEntryLock::default());
 
-        for token in [jwt1, jwt2, jwt3, jwt4] {
-            jwk_cache
-                .check_jwt(
-                    &RequestMonitoring::test(),
-                    &token,
-                    &client,
-                    endpoint.clone(),
-                    role_name.clone(),
-                    &Fetch(addr),
-                )
-                .await
-                .unwrap();
+        let jwt1 = new_rsa_jwt("1".into(), rs1);
+        let jwt2 = new_rsa_jwt("2".into(), rs2);
+        let jwt3 = new_ec_jwt("3".into(), &ec1);
+        let jwt4 = new_ec_jwt("4".into(), &ec2);
+
+        // had the wrong kid, therefore will have the wrong ecdsa signature
+        let bad_jwt = new_ec_jwt("3".into(), &ec2);
+        // this role_name is not accepted
+        let bad_role_name = RoleName::from("cloud_admin");
+
+        let err = jwk_cache
+            .check_jwt(
+                &RequestMonitoring::test(),
+                &bad_jwt,
+                &client,
+                endpoint.clone(),
+                &role_name1,
+                &fetch,
+            )
+            .await
+            .unwrap_err();
+        assert!(err.to_string().contains("signature error"));
+
+        let err = jwk_cache
+            .check_jwt(
+                &RequestMonitoring::test(),
+                &jwt1,
+                &client,
+                endpoint.clone(),
+                &bad_role_name,
+                &fetch,
+            )
+            .await
+            .unwrap_err();
+        assert!(err.to_string().contains("jwk not found"));
+
+        let tokens = [jwt1, jwt2, jwt3, jwt4];
+        let role_names = [role_name1, role_name2];
+        for role in &role_names {
+            for token in &tokens {
+                jwk_cache
+                    .check_jwt(
+                        &RequestMonitoring::test(),
+                        token,
+                        &client,
+                        endpoint.clone(),
+                        role,
+                        &fetch,
+                    )
+                    .await
+                    .unwrap();
+            }
         }
     }
 }
diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs
index 2ff2ca00f0..2ab53f2c6a 100644
--- a/proxy/src/auth/backend/local.rs
+++ b/proxy/src/auth/backend/local.rs
@@ -1,4 +1,4 @@
-use std::{collections::HashMap, net::SocketAddr};
+use std::net::SocketAddr;
 
 use anyhow::Context;
 use arc_swap::ArcSwapOption;
@@ -10,8 +10,8 @@ use crate::{
         NodeInfo,
     },
     context::RequestMonitoring,
-    intern::{BranchIdInt, BranchIdTag, EndpointIdTag, InternId, ProjectIdInt, ProjectIdTag},
-    EndpointId, RoleName,
+    intern::{BranchIdTag, EndpointIdTag, InternId, ProjectIdTag},
+    EndpointId,
 };
 
 use super::jwt::{AuthRule, FetchAuthRules, JwkCache};
@@ -48,26 +48,17 @@ impl LocalBackend {
 #[derive(Clone, Copy)]
 pub(crate) struct StaticAuthRules;
 
-pub static JWKS_ROLE_MAP: ArcSwapOption<JwksRoleSettings> = ArcSwapOption::const_empty();
-
-#[derive(Debug, Clone)]
-pub struct JwksRoleSettings {
-    pub roles: HashMap<RoleName, EndpointJwksResponse>,
-    pub project_id: ProjectIdInt,
-    pub branch_id: BranchIdInt,
-}
+pub static JWKS_ROLE_MAP: ArcSwapOption<EndpointJwksResponse> = ArcSwapOption::const_empty();
 
 impl FetchAuthRules for StaticAuthRules {
     async fn fetch_auth_rules(
         &self,
         _ctx: &RequestMonitoring,
         _endpoint: EndpointId,
-        role_name: RoleName,
     ) -> anyhow::Result<Vec<AuthRule>> {
         let mappings = JWKS_ROLE_MAP.load();
         let role_mappings = mappings
             .as_deref()
-            .and_then(|m| m.roles.get(&role_name))
             .context("JWKs settings for this role were not configured")?;
         let mut rules = vec![];
         for setting in &role_mappings.jwks {
@@ -75,6 +66,7 @@ impl FetchAuthRules for StaticAuthRules {
                 id: setting.id.clone(),
                 jwks_url: setting.jwks_url.clone(),
                 audience: setting.jwt_audience.clone(),
+                role_names: setting.role_names.clone(),
             });
         }
 
diff --git a/proxy/src/bin/local_proxy.rs b/proxy/src/bin/local_proxy.rs
index 94365ddf05..1b3f465686 100644
--- a/proxy/src/bin/local_proxy.rs
+++ b/proxy/src/bin/local_proxy.rs
@@ -1,34 +1,35 @@
-use std::{
-    net::SocketAddr,
-    path::{Path, PathBuf},
-    pin::pin,
-    sync::Arc,
-    time::Duration,
-};
+use std::{net::SocketAddr, pin::pin, str::FromStr, sync::Arc, time::Duration};
 
-use anyhow::{bail, ensure};
+use anyhow::{bail, ensure, Context};
+use camino::{Utf8Path, Utf8PathBuf};
+use compute_api::spec::LocalProxySpec;
 use dashmap::DashMap;
-use futures::{future::Either, FutureExt};
+use futures::future::Either;
 use proxy::{
-    auth::backend::local::{JwksRoleSettings, LocalBackend, JWKS_ROLE_MAP},
+    auth::backend::local::{LocalBackend, JWKS_ROLE_MAP},
     cancellation::CancellationHandlerMain,
     config::{self, AuthenticationConfig, HttpConfig, ProxyConfig, RetryConfig},
-    console::{locks::ApiLocks, messages::JwksRoleMapping},
+    console::{
+        locks::ApiLocks,
+        messages::{EndpointJwksResponse, JwksSettings},
+    },
     http::health_server::AppMetrics,
+    intern::RoleNameInt,
     metrics::{Metrics, ThreadPoolMetrics},
     rate_limiter::{BucketRateLimiter, EndpointRateLimiter, LeakyBucketConfig, RateBucketInfo},
     scram::threadpool::ThreadPool,
     serverless::{self, cancel_set::CancelSet, GlobalConnPoolOptions},
+    RoleName,
 };
 
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 
 use clap::Parser;
-use tokio::{net::TcpListener, task::JoinSet};
+use tokio::{net::TcpListener, sync::Notify, task::JoinSet};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info, warn};
-use utils::{project_build_tag, project_git_version, sentry_init::init_sentry};
+use utils::{pid_file, project_build_tag, project_git_version, sentry_init::init_sentry};
 
 #[global_allocator]
 static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
@@ -72,9 +73,12 @@ struct LocalProxyCliArgs {
     /// Address of the postgres server
     #[clap(long, default_value = "127.0.0.1:5432")]
     compute: SocketAddr,
-    /// File address of the local proxy config file
+    /// Path of the local proxy config file
     #[clap(long, default_value = "./localproxy.json")]
-    config_path: PathBuf,
+    config_path: Utf8PathBuf,
+    /// Path of the local proxy PID file
+    #[clap(long, default_value = "./localproxy.pid")]
+    pid_path: Utf8PathBuf,
 }
 
 #[derive(clap::Args, Clone, Copy, Debug)]
@@ -126,6 +130,24 @@ async fn main() -> anyhow::Result<()> {
     let args = LocalProxyCliArgs::parse();
     let config = build_config(&args)?;
 
+    // before we bind to any ports, write the process ID to a file
+    // so that compute-ctl can find our process later
+    // in order to trigger the appropriate SIGHUP on config change.
+    //
+    // This also claims a "lock" that makes sure only one instance
+    // of local-proxy runs at a time.
+    let _process_guard = loop {
+        match pid_file::claim_for_current_process(&args.pid_path) {
+            Ok(guard) => break guard,
+            Err(e) => {
+                // compute-ctl might have tried to read the pid-file to let us
+                // know about some config change. We should try again.
+                error!(path=?args.pid_path, "could not claim PID file guard: {e:?}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+            }
+        }
+    };
+
     let metrics_listener = TcpListener::bind(args.metrics).await?.into_std()?;
     let http_listener = TcpListener::bind(args.http).await?;
     let shutdown = CancellationToken::new();
@@ -139,12 +161,30 @@ async fn main() -> anyhow::Result<()> {
         16,
     ));
 
-    refresh_config(args.config_path.clone()).await;
+    // write the process ID to a file so that compute-ctl can find our process later
+    // in order to trigger the appropriate SIGHUP on config change.
+    let pid = std::process::id();
+    info!("process running in PID {pid}");
+    std::fs::write(args.pid_path, format!("{pid}\n")).context("writing PID to file")?;
 
     let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), move || {
-        refresh_config(args.config_path.clone()).map(Ok)
+
+    let refresh_config_notify = Arc::new(Notify::new());
+    maintenance_tasks.spawn(proxy::handle_signals(shutdown.clone(), {
+        let refresh_config_notify = Arc::clone(&refresh_config_notify);
+        move || {
+            refresh_config_notify.notify_one();
+        }
     }));
+
+    // trigger the first config load **after** setting up the signal hook
+    // to avoid the race condition where:
+    // 1. No config file registered when local-proxy starts up
+    // 2. The config file is written but the signal hook is not yet received
+    // 3. local-proxy completes startup but has no config loaded, despite there being a registerd config.
+    refresh_config_notify.notify_one();
+    tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify));
+
     maintenance_tasks.spawn(proxy::http::health_server::task_main(
         metrics_listener,
         AppMetrics {
@@ -245,81 +285,84 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig
     })))
 }
 
-async fn refresh_config(path: PathBuf) {
-    match refresh_config_inner(&path).await {
-        Ok(()) => {}
-        Err(e) => {
-            error!(error=?e, ?path, "could not read config file");
+async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc<Notify>) {
+    loop {
+        rx.notified().await;
+
+        match refresh_config_inner(&path).await {
+            Ok(()) => {}
+            Err(e) => {
+                error!(error=?e, ?path, "could not read config file");
+            }
         }
     }
 }
 
-async fn refresh_config_inner(path: &Path) -> anyhow::Result<()> {
+async fn refresh_config_inner(path: &Utf8Path) -> anyhow::Result<()> {
     let bytes = tokio::fs::read(&path).await?;
-    let mut data: JwksRoleMapping = serde_json::from_slice(&bytes)?;
+    let data: LocalProxySpec = serde_json::from_slice(&bytes)?;
 
-    let mut settings = None;
+    let mut jwks_set = vec![];
 
-    for mapping in data.roles.values_mut() {
-        for jwks in &mut mapping.jwks {
-            ensure!(
-                jwks.jwks_url.has_authority()
-                    && (jwks.jwks_url.scheme() == "http" || jwks.jwks_url.scheme() == "https"),
-                "Invalid JWKS url. Must be HTTP",
-            );
+    for jwks in data.jwks {
+        let mut jwks_url = url::Url::from_str(&jwks.jwks_url).context("parsing JWKS url")?;
 
-            ensure!(
-                jwks.jwks_url
-                    .host()
-                    .is_some_and(|h| h != url::Host::Domain("")),
-                "Invalid JWKS url. No domain listed",
-            );
+        ensure!(
+            jwks_url.has_authority()
+                && (jwks_url.scheme() == "http" || jwks_url.scheme() == "https"),
+            "Invalid JWKS url. Must be HTTP",
+        );
 
-            // clear username, password and ports
-            jwks.jwks_url.set_username("").expect(
+        ensure!(
+            jwks_url.host().is_some_and(|h| h != url::Host::Domain("")),
+            "Invalid JWKS url. No domain listed",
+        );
+
+        // clear username, password and ports
+        jwks_url
+            .set_username("")
+            .expect("url can be a base and has a valid host and is not a file. should not error");
+        jwks_url
+            .set_password(None)
+            .expect("url can be a base and has a valid host and is not a file. should not error");
+        // local testing is hard if we need to have a specific restricted port
+        if cfg!(not(feature = "testing")) {
+            jwks_url.set_port(None).expect(
                 "url can be a base and has a valid host and is not a file. should not error",
             );
-            jwks.jwks_url.set_password(None).expect(
-                "url can be a base and has a valid host and is not a file. should not error",
-            );
-            // local testing is hard if we need to have a specific restricted port
-            if cfg!(not(feature = "testing")) {
-                jwks.jwks_url.set_port(None).expect(
-                    "url can be a base and has a valid host and is not a file. should not error",
-                );
-            }
-
-            // clear query params
-            jwks.jwks_url.set_fragment(None);
-            jwks.jwks_url.query_pairs_mut().clear().finish();
-
-            if jwks.jwks_url.scheme() != "https" {
-                // local testing is hard if we need to set up https support.
-                if cfg!(not(feature = "testing")) {
-                    jwks.jwks_url
-                        .set_scheme("https")
-                        .expect("should not error to set the scheme to https if it was http");
-                } else {
-                    warn!(scheme = jwks.jwks_url.scheme(), "JWKS url is not HTTPS");
-                }
-            }
-
-            let (pr, br) = settings.get_or_insert((jwks.project_id, jwks.branch_id));
-            ensure!(
-                *pr == jwks.project_id,
-                "inconsistent project IDs configured"
-            );
-            ensure!(*br == jwks.branch_id, "inconsistent branch IDs configured");
         }
+
+        // clear query params
+        jwks_url.set_fragment(None);
+        jwks_url.query_pairs_mut().clear().finish();
+
+        if jwks_url.scheme() != "https" {
+            // local testing is hard if we need to set up https support.
+            if cfg!(not(feature = "testing")) {
+                jwks_url
+                    .set_scheme("https")
+                    .expect("should not error to set the scheme to https if it was http");
+            } else {
+                warn!(scheme = jwks_url.scheme(), "JWKS url is not HTTPS");
+            }
+        }
+
+        jwks_set.push(JwksSettings {
+            id: jwks.id,
+            jwks_url,
+            provider_name: jwks.provider_name,
+            jwt_audience: jwks.jwt_audience,
+            role_names: jwks
+                .role_names
+                .into_iter()
+                .map(RoleName::from)
+                .map(|s| RoleNameInt::from(&s))
+                .collect(),
+        })
     }
 
-    if let Some((project_id, branch_id)) = settings {
-        JWKS_ROLE_MAP.store(Some(Arc::new(JwksRoleSettings {
-            roles: data.roles,
-            project_id,
-            branch_id,
-        })));
-    }
+    info!("successfully loaded new config");
+    JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set })));
 
     Ok(())
 }
diff --git a/proxy/src/bin/pg_sni_router.rs b/proxy/src/bin/pg_sni_router.rs
index 20d2d3df9a..53f1586abe 100644
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -133,9 +133,7 @@ async fn main() -> anyhow::Result<()> {
         proxy_listener,
         cancellation_token.clone(),
     ));
-    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || async {
-        Ok(())
-    }));
+    let signals_task = tokio::spawn(proxy::handle_signals(cancellation_token, || {}));
 
     // the signal task cant ever succeed.
     // the main task can error, or can succeed on cancellation.
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index 2ac66ffe8c..141005788d 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -461,10 +461,7 @@ async fn main() -> anyhow::Result<()> {
 
     // maintenance tasks. these never return unless there's an error
     let mut maintenance_tasks = JoinSet::new();
-    maintenance_tasks.spawn(proxy::handle_signals(
-        cancellation_token.clone(),
-        || async { Ok(()) },
-    ));
+    maintenance_tasks.spawn(proxy::handle_signals(cancellation_token.clone(), || {}));
     maintenance_tasks.spawn(http::health_server::task_main(
         http_listener,
         AppMetrics {
diff --git a/proxy/src/console/messages.rs b/proxy/src/console/messages.rs
index 85683acb82..1696e229ce 100644
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -1,13 +1,11 @@
 use measured::FixedCardinalityLabel;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
 use std::fmt::{self, Display};
 
 use crate::auth::IpPattern;
 
-use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt};
+use crate::intern::{BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt};
 use crate::proxy::retry::CouldRetry;
-use crate::RoleName;
 
 /// Generic error response with human-readable description.
 /// Note that we can't always present it to user as is.
@@ -348,11 +346,6 @@ impl ColdStartInfo {
     }
 }
 
-#[derive(Debug, Deserialize, Clone)]
-pub struct JwksRoleMapping {
-    pub roles: HashMap<RoleName, EndpointJwksResponse>,
-}
-
 #[derive(Debug, Deserialize, Clone)]
 pub struct EndpointJwksResponse {
     pub jwks: Vec<JwksSettings>,
@@ -361,11 +354,10 @@ pub struct EndpointJwksResponse {
 #[derive(Debug, Deserialize, Clone)]
 pub struct JwksSettings {
     pub id: String,
-    pub project_id: ProjectIdInt,
-    pub branch_id: BranchIdInt,
     pub jwks_url: url::Url,
     pub provider_name: String,
     pub jwt_audience: Option<String>,
+    pub role_names: Vec<RoleNameInt>,
 }
 
 #[cfg(test)]
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index 16e8da605b..95097f2de9 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -5,7 +5,10 @@ pub mod neon;
 use super::messages::{ConsoleError, MetricsAuxInfo};
 use crate::{
     auth::{
-        backend::{ComputeCredentialKeys, ComputeUserInfo},
+        backend::{
+            jwt::{AuthRule, FetchAuthRules},
+            ComputeCredentialKeys, ComputeUserInfo,
+        },
         IpPattern,
     },
     cache::{endpoints::EndpointsCache, project_info::ProjectInfoCacheImpl, Cached, TimedLru},
@@ -16,7 +19,7 @@ use crate::{
     intern::ProjectIdInt,
     metrics::ApiLockMetrics,
     rate_limiter::{DynamicLimiter, Outcome, RateLimiterConfig, Token},
-    scram, EndpointCacheKey,
+    scram, EndpointCacheKey, EndpointId,
 };
 use dashmap::DashMap;
 use std::{hash::Hash, sync::Arc, time::Duration};
@@ -334,6 +337,12 @@ pub(crate) trait Api {
         user_info: &ComputeUserInfo,
     ) -> Result<(CachedAllowedIps, Option<CachedRoleSecret>), errors::GetAuthInfoError>;
 
+    async fn get_endpoint_jwks(
+        &self,
+        ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>>;
+
     /// Wake up the compute node and return the corresponding connection info.
     async fn wake_compute(
         &self,
@@ -343,6 +352,7 @@ pub(crate) trait Api {
 }
 
 #[non_exhaustive]
+#[derive(Clone)]
 pub enum ConsoleBackend {
     /// Current Cloud API (V2).
     Console(neon::Api),
@@ -386,6 +396,20 @@ impl Api for ConsoleBackend {
         }
     }
 
+    async fn get_endpoint_jwks(
+        &self,
+        ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>> {
+        match self {
+            Self::Console(api) => api.get_endpoint_jwks(ctx, endpoint).await,
+            #[cfg(any(test, feature = "testing"))]
+            Self::Postgres(api) => api.get_endpoint_jwks(ctx, endpoint).await,
+            #[cfg(test)]
+            Self::Test(_api) => Ok(vec![]),
+        }
+    }
+
     async fn wake_compute(
         &self,
         ctx: &RequestMonitoring,
@@ -552,3 +576,13 @@ impl WakeComputePermit {
         res
     }
 }
+
+impl FetchAuthRules for ConsoleBackend {
+    async fn fetch_auth_rules(
+        &self,
+        ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>> {
+        self.get_endpoint_jwks(ctx, endpoint).await
+    }
+}
diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs
index 1b77418de6..b548a0203a 100644
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -4,7 +4,9 @@ use super::{
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
     AuthInfo, AuthSecret, CachedNodeInfo, NodeInfo,
 };
-use crate::context::RequestMonitoring;
+use crate::{
+    auth::backend::jwt::AuthRule, context::RequestMonitoring, intern::RoleNameInt, RoleName,
+};
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use crate::{auth::IpPattern, cache::Cached};
 use crate::{
@@ -118,6 +120,39 @@ impl Api {
         })
     }
 
+    async fn do_get_endpoint_jwks(&self, endpoint: EndpointId) -> anyhow::Result<Vec<AuthRule>> {
+        let (client, connection) =
+            tokio_postgres::connect(self.endpoint.as_str(), tokio_postgres::NoTls).await?;
+
+        let connection = tokio::spawn(connection);
+
+        let res = client.query(
+                "select id, jwks_url, audience, role_names from neon_control_plane.endpoint_jwks where endpoint_id = $1",
+                &[&endpoint.as_str()],
+            )
+            .await?;
+
+        let mut rows = vec![];
+        for row in res {
+            rows.push(AuthRule {
+                id: row.get("id"),
+                jwks_url: url::Url::parse(row.get("jwks_url"))?,
+                audience: row.get("audience"),
+                role_names: row
+                    .get::<_, Vec<String>>("role_names")
+                    .into_iter()
+                    .map(RoleName::from)
+                    .map(|s| RoleNameInt::from(&s))
+                    .collect(),
+            });
+        }
+
+        drop(client);
+        connection.await??;
+
+        Ok(rows)
+    }
+
     async fn do_wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
         let mut config = compute::ConnCfg::new();
         config
@@ -185,6 +220,14 @@ impl super::Api for Api {
         ))
     }
 
+    async fn get_endpoint_jwks(
+        &self,
+        _ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>> {
+        self.do_get_endpoint_jwks(endpoint).await
+    }
+
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index b004bf4ecf..2d527f378c 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -7,27 +7,33 @@ use super::{
     NodeInfo,
 };
 use crate::{
-    auth::backend::ComputeUserInfo,
+    auth::backend::{jwt::AuthRule, ComputeUserInfo},
     compute,
-    console::messages::{ColdStartInfo, Reason},
+    console::messages::{ColdStartInfo, EndpointJwksResponse, Reason},
     http,
     metrics::{CacheOutcome, Metrics},
     rate_limiter::WakeComputeRateLimiter,
-    scram, EndpointCacheKey,
+    scram, EndpointCacheKey, EndpointId,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
+use ::http::{header::AUTHORIZATION, HeaderName};
+use anyhow::bail;
 use futures::TryFutureExt;
 use std::{sync::Arc, time::Duration};
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
 use tracing::{debug, error, info, info_span, warn, Instrument};
 
+const X_REQUEST_ID: HeaderName = HeaderName::from_static("x-request-id");
+
+#[derive(Clone)]
 pub struct Api {
     endpoint: http::Endpoint,
     pub caches: &'static ApiCaches,
     pub(crate) locks: &'static ApiLocks<EndpointCacheKey>,
     pub(crate) wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
-    jwt: String,
+    // put in a shared ref so we don't copy secrets all over in memory
+    jwt: Arc<str>,
 }
 
 impl Api {
@@ -38,7 +44,9 @@ impl Api {
         locks: &'static ApiLocks<EndpointCacheKey>,
         wake_compute_endpoint_rate_limiter: Arc<WakeComputeRateLimiter>,
     ) -> Self {
-        let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN").unwrap_or_default();
+        let jwt = std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN")
+            .unwrap_or_default()
+            .into();
         Self {
             endpoint,
             caches,
@@ -71,9 +79,9 @@ impl Api {
         async {
             let request = self
                 .endpoint
-                .get("proxy_get_role_secret")
-                .header("X-Request-ID", &request_id)
-                .header("Authorization", format!("Bearer {}", &self.jwt))
+                .get_path("proxy_get_role_secret")
+                .header(X_REQUEST_ID, &request_id)
+                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", ctx.session_id())])
                 .query(&[
                     ("application_name", application_name.as_str()),
@@ -125,6 +133,61 @@ impl Api {
         .await
     }
 
+    async fn do_get_endpoint_jwks(
+        &self,
+        ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>> {
+        if !self
+            .caches
+            .endpoints_cache
+            .is_valid(ctx, &endpoint.normalize())
+            .await
+        {
+            bail!("endpoint not found");
+        }
+        let request_id = ctx.session_id().to_string();
+        async {
+            let request = self
+                .endpoint
+                .get_with_url(|url| {
+                    url.path_segments_mut()
+                        .push("endpoints")
+                        .push(endpoint.as_str())
+                        .push("jwks");
+                })
+                .header(X_REQUEST_ID, &request_id)
+                .header(AUTHORIZATION, format!("Bearer {}", &self.jwt))
+                .query(&[("session_id", ctx.session_id())])
+                .build()?;
+
+            info!(url = request.url().as_str(), "sending http request");
+            let start = Instant::now();
+            let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Cplane);
+            let response = self.endpoint.execute(request).await?;
+            drop(pause);
+            info!(duration = ?start.elapsed(), "received http response");
+
+            let body = parse_body::<EndpointJwksResponse>(response).await?;
+
+            let rules = body
+                .jwks
+                .into_iter()
+                .map(|jwks| AuthRule {
+                    id: jwks.id,
+                    jwks_url: jwks.jwks_url,
+                    audience: jwks.jwt_audience,
+                    role_names: jwks.role_names,
+                })
+                .collect();
+
+            Ok(rules)
+        }
+        .map_err(crate::error::log_error)
+        .instrument(info_span!("http", id = request_id))
+        .await
+    }
+
     async fn do_wake_compute(
         &self,
         ctx: &RequestMonitoring,
@@ -135,7 +198,7 @@ impl Api {
         async {
             let mut request_builder = self
                 .endpoint
-                .get("proxy_wake_compute")
+                .get_path("proxy_wake_compute")
                 .header("X-Request-ID", &request_id)
                 .header("Authorization", format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", ctx.session_id())])
@@ -262,6 +325,15 @@ impl super::Api for Api {
         ))
     }
 
+    #[tracing::instrument(skip_all)]
+    async fn get_endpoint_jwks(
+        &self,
+        ctx: &RequestMonitoring,
+        endpoint: EndpointId,
+    ) -> anyhow::Result<Vec<AuthRule>> {
+        self.do_get_endpoint_jwks(ctx, endpoint).await
+    }
+
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
diff --git a/proxy/src/http.rs b/proxy/src/http.rs
index c77d95f47d..14720b5c6b 100644
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -86,9 +86,17 @@ impl Endpoint {
 
     /// Return a [builder](RequestBuilder) for a `GET` request,
     /// appending a single `path` segment to the base endpoint URL.
-    pub(crate) fn get(&self, path: &str) -> RequestBuilder {
+    pub(crate) fn get_path(&self, path: &str) -> RequestBuilder {
+        self.get_with_url(|u| {
+            u.path_segments_mut().push(path);
+        })
+    }
+
+    /// Return a [builder](RequestBuilder) for a `GET` request,
+    /// accepting a closure to modify the url path segments for more complex paths queries.
+    pub(crate) fn get_with_url(&self, f: impl for<'a> FnOnce(&'a mut ApiUrl)) -> RequestBuilder {
         let mut url = self.endpoint.clone();
-        url.path_segments_mut().push(path);
+        f(&mut url);
         self.client.get(url.into_inner())
     }
 
@@ -144,7 +152,7 @@ mod tests {
 
         // Validate that this pattern makes sense.
         let req = endpoint
-            .get("frobnicate")
+            .get_path("frobnicate")
             .query(&[
                 ("foo", Some("10")), // should be just `foo=10`
                 ("bar", None),       // shouldn't be passed at all
@@ -162,7 +170,7 @@ mod tests {
         let endpoint = Endpoint::new(url, Client::new());
 
         let req = endpoint
-            .get("frobnicate")
+            .get_path("frobnicate")
             .query(&[("session_id", uuid::Uuid::nil())])
             .build()?;
 
diff --git a/proxy/src/intern.rs b/proxy/src/intern.rs
index e5144cfe2e..108420d7d7 100644
--- a/proxy/src/intern.rs
+++ b/proxy/src/intern.rs
@@ -130,14 +130,14 @@ impl<Id: InternId> Default for StringInterner<Id> {
 }
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub(crate) struct RoleNameTag;
+pub struct RoleNameTag;
 impl InternId for RoleNameTag {
     fn get_interner() -> &'static StringInterner<Self> {
         static ROLE_NAMES: OnceLock<StringInterner<RoleNameTag>> = OnceLock::new();
         ROLE_NAMES.get_or_init(Default::default)
     }
 }
-pub(crate) type RoleNameInt = InternedString<RoleNameTag>;
+pub type RoleNameInt = InternedString<RoleNameTag>;
 impl From<&RoleName> for RoleNameInt {
     fn from(value: &RoleName) -> Self {
         RoleNameTag::get_interner().get_or_intern(value)
diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs
index 0070839aa8..ea0a9beced 100644
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -82,7 +82,7 @@
     impl_trait_overcaptures,
 )]
 
-use std::{convert::Infallible, future::Future};
+use std::convert::Infallible;
 
 use anyhow::{bail, Context};
 use intern::{EndpointIdInt, EndpointIdTag, InternId};
@@ -117,13 +117,12 @@ pub mod usage_metrics;
 pub mod waiters;
 
 /// Handle unix signals appropriately.
-pub async fn handle_signals<F, Fut>(
+pub async fn handle_signals<F>(
     token: CancellationToken,
     mut refresh_config: F,
 ) -> anyhow::Result<Infallible>
 where
-    F: FnMut() -> Fut,
-    Fut: Future<Output = anyhow::Result<()>>,
+    F: FnMut(),
 {
     use tokio::signal::unix::{signal, SignalKind};
 
@@ -136,7 +135,7 @@ where
             // Hangup is commonly used for config reload.
             _ = hangup.recv() => {
                 warn!("received SIGHUP");
-                refresh_config().await?;
+                refresh_config();
             }
             // Shut down the whole application.
             _ = interrupt.recv() => {
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 752d982726..058ec06e02 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -525,6 +525,10 @@ impl TestBackend for TestConnectMechanism {
     {
         unimplemented!("not used in tests")
     }
+
+    fn dyn_clone(&self) -> Box<dyn TestBackend> {
+        Box::new(self.clone())
+    }
 }
 
 fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeInfo {
diff --git a/proxy/src/scram/threadpool.rs b/proxy/src/scram/threadpool.rs
index 2702aeebfe..c027a0cd20 100644
--- a/proxy/src/scram/threadpool.rs
+++ b/proxy/src/scram/threadpool.rs
@@ -43,6 +43,13 @@ impl ThreadPool {
     pub fn new(n_workers: u8) -> Arc<Self> {
         // rayon would be nice here, but yielding in rayon does not work well afaict.
 
+        if n_workers == 0 {
+            return Arc::new(Self {
+                runtime: None,
+                metrics: Arc::new(ThreadPoolMetrics::new(n_workers as usize)),
+            });
+        }
+
         Arc::new_cyclic(|pool| {
             let pool = pool.clone();
             let worker_id = AtomicUsize::new(0);
diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs
index aa236907db..607eb0caf6 100644
--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -119,7 +119,7 @@ impl PoolingBackend {
                     .check_jwt(
                         ctx,
                         user_info.endpoint.clone(),
-                        user_info.user.clone(),
+                        &user_info.user,
                         &StaticAuthRules,
                         jwt,
                     )
diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs
index 5270934f5e..1e5f963a4f 100644
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -374,14 +374,16 @@ type JoinTaskRes = Result<anyhow::Result<()>, JoinError>;
 
 async fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
     // fsync the datadir to make sure we have a consistent state on disk.
-    let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
-    let started = Instant::now();
-    utils::crashsafe::syncfs(dfd)?;
-    let elapsed = started.elapsed();
-    info!(
-        elapsed_ms = elapsed.as_millis(),
-        "syncfs data directory done"
-    );
+    if !conf.no_sync {
+        let dfd = File::open(&conf.workdir).context("open datadir for syncfs")?;
+        let started = Instant::now();
+        utils::crashsafe::syncfs(dfd)?;
+        let elapsed = started.elapsed();
+        info!(
+            elapsed_ms = elapsed.as_millis(),
+            "syncfs data directory done"
+        );
+    }
 
     info!("starting safekeeper WAL service on {}", conf.listen_pg_addr);
     let pg_listener = tcp_listener::bind(conf.listen_pg_addr.clone()).map_err(|e| {
diff --git a/safekeeper/tests/random_test.rs b/safekeeper/tests/random_test.rs
index 7bdee35cd7..1a932ef699 100644
--- a/safekeeper/tests/random_test.rs
+++ b/safekeeper/tests/random_test.rs
@@ -9,7 +9,7 @@ use crate::walproposer_sim::{
 
 pub mod walproposer_sim;
 
-// Generates 2000 random seeds and runs a schedule for each of them.
+// Generates 500 random seeds and runs a schedule for each of them.
 // If you see this test fail, please report the last seed to the
 // @safekeeper team.
 #[test]
@@ -17,7 +17,7 @@ fn test_random_schedules() -> anyhow::Result<()> {
     let clock = init_logger();
     let mut config = TestConfig::new(Some(clock));
 
-    for _ in 0..2000 {
+    for _ in 0..500 {
         let seed: u64 = rand::thread_rng().gen();
         config.network = generate_network_opts(seed);
 
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 201eb1087d..70a038c960 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -642,9 +642,6 @@ class NeonEnvBuilder:
         patch_script = ""
         for ps in self.env.pageservers:
             patch_script += f"UPDATE nodes SET listen_http_port={ps.service_port.http}, listen_pg_port={ps.service_port.pg}  WHERE node_id = '{ps.id}';"
-            # This is a temporary to get the backward compat test happy
-            # since the compat snapshot was generated with an older version of neon local
-            patch_script += f"UPDATE nodes SET availability_zone_id='{ps.az_id}'  WHERE node_id = '{ps.id}' AND availability_zone_id IS NULL;"
         patch_script_path.write_text(patch_script)
 
         # Update the config with info about tenants and timelines
@@ -3314,7 +3311,7 @@ class VanillaPostgres(PgProtocol):
         self.pg_bin = pg_bin
         self.running = False
         if init:
-            self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
+            self.pg_bin.run_capture(["initdb", "--pgdata", str(pgdatadir)])
         self.configure([f"port = {port}\n"])
 
     def enable_tls(self):
diff --git a/test_runner/fixtures/safekeeper/http.py b/test_runner/fixtures/safekeeper/http.py
index 96c84d1616..7f170eeea3 100644
--- a/test_runner/fixtures/safekeeper/http.py
+++ b/test_runner/fixtures/safekeeper/http.py
@@ -8,6 +8,7 @@ import requests
 from fixtures.common_types import Lsn, TenantId, TenantTimelineId, TimelineId
 from fixtures.log_helper import log
 from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
+from fixtures.utils import wait_until
 
 
 # Walreceiver as returned by sk's timeline status endpoint.
@@ -161,6 +162,16 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter):
             walreceivers=walreceivers,
         )
 
+    # Get timeline_start_lsn, waiting until it's nonzero. It is a way to ensure
+    # that the timeline is fully initialized at the safekeeper.
+    def get_non_zero_timeline_start_lsn(self, tenant_id: TenantId, timeline_id: TimelineId) -> Lsn:
+        def timeline_start_lsn_non_zero() -> Lsn:
+            s = self.timeline_status(tenant_id, timeline_id).timeline_start_lsn
+            assert s > Lsn(0)
+            return s
+
+        return wait_until(30, 1, timeline_start_lsn_non_zero)
+
     def get_commit_lsn(self, tenant_id: TenantId, timeline_id: TimelineId) -> Lsn:
         return self.timeline_status(tenant_id, timeline_id).commit_lsn
 
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index fb5c1d3115..b559be5f18 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -21,7 +21,7 @@ from fixtures.pageserver.http import PageserverApiException
 from fixtures.pageserver.utils import (
     timeline_delete_wait_completed,
 )
-from fixtures.pg_version import PgVersion, skip_on_postgres
+from fixtures.pg_version import PgVersion
 from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage
 from fixtures.workload import Workload
 
@@ -156,9 +156,6 @@ ingest_lag_log_line = ".*ingesting record with timestamp lagging more than wait_
 @check_ondisk_data_compatibility_if_enabled
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
-@skip_on_postgres(
-    PgVersion.V17, "There are no snapshots yet"
-)  # TODO: revert this once we have snapshots
 def test_backward_compatibility(
     neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
@@ -206,9 +203,6 @@ def test_backward_compatibility(
 @check_ondisk_data_compatibility_if_enabled
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
-@skip_on_postgres(
-    PgVersion.V17, "There are no snapshots yet"
-)  # TODO: revert this once we have snapshots
 def test_forward_compatibility(
     neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
diff --git a/test_runner/regress/test_readonly_node.py b/test_runner/regress/test_readonly_node.py
index 5e8b8d38f7..b08fcc0da1 100644
--- a/test_runner/regress/test_readonly_node.py
+++ b/test_runner/regress/test_readonly_node.py
@@ -27,7 +27,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
     env.pageserver.allowed_errors.extend(
         [
             ".*basebackup .* failed: invalid basebackup lsn.*",
-            ".*page_service.*handle_make_lsn_lease.*.*tried to request a page version that was garbage collected",
+            ".*/lsn_lease.*invalid lsn lease request.*",
         ]
     )
 
@@ -108,7 +108,7 @@ def test_readonly_node(neon_simple_env: NeonEnv):
     assert cur.fetchone() == (1,)
 
     # Create node at pre-initdb lsn
-    with pytest.raises(Exception, match="invalid basebackup lsn"):
+    with pytest.raises(Exception, match="invalid lsn lease request"):
         # compute node startup with invalid LSN should fail
         env.endpoints.create_start(
             branch_name="main",
@@ -167,6 +167,23 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
             )
         return last_flush_lsn
 
+    def trigger_gc_and_select(env: NeonEnv, ep_static: Endpoint):
+        """
+        Trigger GC manually on all pageservers. Then run an `SELECT` query.
+        """
+        for shard, ps in tenant_get_shards(env, env.initial_tenant):
+            client = ps.http_client()
+            gc_result = client.timeline_gc(shard, env.initial_timeline, 0)
+            log.info(f"{gc_result=}")
+
+            assert (
+                gc_result["layers_removed"] == 0
+            ), "No layers should be removed, old layers are guarded by leases."
+
+        with ep_static.cursor() as cur:
+            cur.execute("SELECT count(*) FROM t0")
+            assert cur.fetchone() == (ROW_COUNT,)
+
     # Insert some records on main branch
     with env.endpoints.create_start("main") as ep_main:
         with ep_main.cursor() as cur:
@@ -193,25 +210,31 @@ def test_readonly_node_gc(neon_env_builder: NeonEnvBuilder):
 
             generate_updates_on_main(env, ep_main, i, end=100)
 
-            # Trigger GC
-            for shard, ps in tenant_get_shards(env, env.initial_tenant):
-                client = ps.http_client()
-                gc_result = client.timeline_gc(shard, env.initial_timeline, 0)
-                log.info(f"{gc_result=}")
+            trigger_gc_and_select(env, ep_static)
 
-                assert (
-                    gc_result["layers_removed"] == 0
-                ), "No layers should be removed, old layers are guarded by leases."
+            # Trigger Pageserver restarts
+            for ps in env.pageservers:
+                ps.stop()
+                # Static compute should have at least one lease request failure due to connection.
+                time.sleep(LSN_LEASE_LENGTH / 2)
+                ps.start()
 
-            with ep_static.cursor() as cur:
-                cur.execute("SELECT count(*) FROM t0")
-                assert cur.fetchone() == (ROW_COUNT,)
+            trigger_gc_and_select(env, ep_static)
+
+            # Reconfigure pageservers
+            env.pageservers[0].stop()
+            env.storage_controller.node_configure(
+                env.pageservers[0].id, {"availability": "Offline"}
+            )
+            env.storage_controller.reconcile_until_idle()
+
+            trigger_gc_and_select(env, ep_static)
 
         # Do some update so we can increment latest_gc_cutoff
         generate_updates_on_main(env, ep_main, i, end=100)
 
     # Wait for the existing lease to expire.
-    time.sleep(LSN_LEASE_LENGTH)
+    time.sleep(LSN_LEASE_LENGTH + 1)
     # Now trigger GC again, layers should be removed.
     for shard, ps in tenant_get_shards(env, env.initial_tenant):
         client = ps.http_client()
diff --git a/test_runner/regress/test_timeline_gc_blocking.py b/test_runner/regress/test_timeline_gc_blocking.py
index 765c72cf2a..ddfe9b911f 100644
--- a/test_runner/regress/test_timeline_gc_blocking.py
+++ b/test_runner/regress/test_timeline_gc_blocking.py
@@ -45,10 +45,7 @@ def test_gc_blocking_by_timeline(neon_env_builder: NeonEnvBuilder, sharded: bool
     tenant_after = http.tenant_status(env.initial_tenant)
     assert tenant_before != tenant_after
     gc_blocking = tenant_after["gc_blocking"]
-    assert (
-        gc_blocking
-        == "BlockingReasons { tenant_blocked_by_lsn_lease_deadline: false, timelines: 1, reasons: EnumSet(Manual) }"
-    )
+    assert gc_blocking == "BlockingReasons { timelines: 1, reasons: EnumSet(Manual) }"
 
     wait_for_another_gc_round()
     pss.assert_log_contains(gc_skipped_line)
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 8ee548bdb0..c75235a04b 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -772,7 +772,7 @@ class ProposerPostgres(PgProtocol):
     def initdb(self):
         """Run initdb"""
 
-        args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()]
+        args = ["initdb", "--username", "cloud_admin", "--pgdata", self.pg_data_dir_path()]
         self.pg_bin.run(args)
 
     def start(self):
@@ -2084,8 +2084,13 @@ def test_timeline_copy(neon_env_builder: NeonEnvBuilder, insert_rows: int):
 
     endpoint.safe_psql("create table t(key int, value text)")
 
-    timeline_status = env.safekeepers[0].http_client().timeline_status(tenant_id, timeline_id)
-    timeline_start_lsn = timeline_status.timeline_start_lsn
+    # Note: currently timelines on sks are created by compute and commit of
+    # transaction above is finished when 2/3 sks received it, so there is a
+    # small chance that timeline on this sk is not created/initialized yet,
+    # hence the usage of waiting function to prevent flakiness.
+    timeline_start_lsn = (
+        env.safekeepers[0].http_client().get_non_zero_timeline_start_lsn(tenant_id, timeline_id)
+    )
     log.info(f"Timeline start LSN: {timeline_start_lsn}")
 
     current_percent = 0.0