refactor thread local accesses

more ephasis on performance
cleanup code a little
2026-07-17 02:50:38 +00:00 · 2025-07-21 12:24:55 +01:00 · 2025-07-21 11:53:56 +01:00 · 2025-07-21 10:09:10 +01:00 · 2025-07-20 19:37:37 +01:00 · 2025-07-20 17:08:50 +01:00
349 changed files with 21851 additions and 7906 deletions
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -30,9 +30,11 @@ workspace-members = [
    "vm_monitor",
    # All of these exist in libs and are not usually built independently.
    # Putting workspace hack there adds a bottleneck for cargo builds.
+    "alloc-metrics",
    "compute_api",
    "consumption_metrics",
    "desim",
+    "json",
    "metrics",
    "pageserver_api",
    "postgres_backend",
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,4 +27,4 @@
 !storage_controller/
 !vendor/postgres-*/
 !workspace_hack/
-!build_tools/patches
+!build-tools/patches
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -7,6 +7,7 @@ self-hosted-runner:
    - small-metal
    - small-arm64
    - unit-perf
+    - unit-perf-aws-arm
    - us-east-2
 config-variables:
  - AWS_ECR_REGION
@@ -30,6 +31,7 @@ config-variables:
  - NEON_PROD_AWS_ACCOUNT_ID
  - PGREGRESS_PG16_PROJECT_ID
  - PGREGRESS_PG17_PROJECT_ID
+  - PREWARM_PGBENCH_SIZE
  - REMOTE_STORAGE_AZURE_CONTAINER
  - REMOTE_STORAGE_AZURE_REGION
  - SLACK_CICD_CHANNEL_ID
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -176,7 +176,13 @@ runs:
        fi

        if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
-          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          # We don't use code coverage for regression tests (the step is disabled),
+          # so there's no need to collect it.
+          # Ref https://github.com/neondatabase/neon/issues/4540
+          # cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
+          cov_prefix=()
+          # Explicitly set LLVM_PROFILE_FILE to /dev/null to avoid writing *.profraw files
+          export LLVM_PROFILE_FILE=/dev/null
        else
          cov_prefix=()
        fi
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -150,7 +150,7 @@ jobs:
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
@@ -162,7 +162,7 @@ jobs:
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
@@ -174,7 +174,7 @@ jobs:
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}

      - name: Cache postgres v17 build
        id: cache_pg_17
@@ -186,7 +186,7 @@ jobs:
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v17
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools/Dockerfile') }}

      - name: Build all
        # Note: the Makefile picks up BUILD_TYPE and CARGO_PROFILE from the env variables
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -219,6 +219,7 @@ jobs:
          --ignore test_runner/performance/test_cumulative_statistics_persistence.py
          --ignore test_runner/performance/test_perf_many_relations.py
          --ignore test_runner/performance/test_perf_oltp_large_tenant.py
+          --ignore test_runner/performance/test_lfc_prewarm.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -410,6 +411,77 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

+  prewarm-test:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      PGBENCH_SIZE: ${{ vars.PREWARM_PGBENCH_SIZE }}
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 17
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
+      PLATFORM: "neon-staging"
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --init
+
+    steps:
+    - name: Harden the runner (Audit all outbound calls)
+      uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+      with:
+        egress-policy: audit
+
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+    - name: Run prewarm benchmark
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance/test_lfc_prewarm.py
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 5400
+        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Create Allure report
+      id: create-allure-report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+      with:
+        store-test-results-into-db: true
+        aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
+
  generate-matrices:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -72,7 +72,7 @@ jobs:
          ARCHS: ${{ inputs.archs || '["x64","arm64"]' }}
          DEBIANS: ${{ inputs.debians || '["bullseye","bookworm"]' }}
          IMAGE_TAG: |
-            ${{ hashFiles('build-tools.Dockerfile',
+            ${{ hashFiles('build-tools/Dockerfile',
                          '.github/workflows/build-build-tools-image.yml') }}
        run: |
          echo "archs=${ARCHS}"           | tee -a ${GITHUB_OUTPUT}
@@ -144,7 +144,7 @@ jobs:

      - uses: docker/build-push-action@471d1dc4e07e5cdedd4c2171150001c434f0b7a4 # v6.15.0
        with:
-          file: build-tools.Dockerfile
+          file: build-tools/Dockerfile
          context: .
          provenance: false
          push: true
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -32,162 +32,14 @@ permissions:
  contents: read

 jobs:
-  build-pgxn:
-    if: |
-      inputs.pg_versions != '[]' || inputs.rebuild_everything ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
-    timeout-minutes: 30
-    runs-on: macos-15
-    strategy:
-      matrix:
-        postgres-version: ${{ inputs.rebuild_everything && fromJSON('["v14", "v15", "v16", "v17"]') || fromJSON(inputs.pg_versions) }}
-    env:
-      # Use release build only, to have less debug info around
-      # Hence keeping target/ (and general cache size) smaller
-      BUILD_TYPE: release
-    steps:
-      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
-        with:
-          egress-policy: audit
-
-      - name: Checkout main repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Set pg ${{ matrix.postgres-version }} for caching
-        id: pg_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-${{ matrix.postgres-version }}) | tee -a "${GITHUB_OUTPUT}"
-
-      - name: Cache postgres ${{ matrix.postgres-version }} build
-        id: cache_pg
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
-        with:
-          path: pg_install/${{ matrix.postgres-version }}
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ matrix.postgres-version }}-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Checkout submodule vendor/postgres-${{ matrix.postgres-version }}
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          git submodule init vendor/postgres-${{ matrix.postgres-version }}
-          git submodule update --depth 1 --recursive
-
-      - name: Install build dependencies
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          brew install flex bison openssl protobuf icu4c
-
-      - name: Set extra env for macOS
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
-
-      - name: Build Postgres ${{ matrix.postgres-version }}
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make postgres-${{ matrix.postgres-version }} -j$(sysctl -n hw.ncpu)
-
-      - name: Build Neon Pg Ext ${{ matrix.postgres-version }}
-        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: |
-          make "neon-pg-ext-${{ matrix.postgres-version }}" -j$(sysctl -n hw.ncpu)
-
-      - name: Upload "pg_install/${{ matrix.postgres-version }}" artifact
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: pg_install--${{ matrix.postgres-version }}
-          path: pg_install/${{ matrix.postgres-version }}
-          # The artifact is supposed to be used by the next job in the same workflow,
-          # so there’s no need to store it for too long.
-          retention-days: 1
-
-  build-walproposer-lib:
-    if: |
-      contains(inputs.pg_versions, 'v17') || inputs.rebuild_everything ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')  ||
-      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
-      github.ref_name == 'main'
-    timeout-minutes: 30
-    runs-on: macos-15
-    needs: [build-pgxn]
-    env:
-      # Use release build only, to have less debug info around
-      # Hence keeping target/ (and general cache size) smaller
-      BUILD_TYPE: release
-    steps:
-      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
-        with:
-          egress-policy: audit
-
-      - name: Checkout main repo
-        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Set pg v17 for caching
-        id: pg_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) | tee -a "${GITHUB_OUTPUT}"
-
-      - name: Download "pg_install/v17" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: pg_install--v17
-          path: pg_install/v17
-
-      # `actions/download-artifact` doesn't preserve permissions:
-      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
-      - name: Make pg_install/v*/bin/* executable
-        run: |
-          chmod +x pg_install/v*/bin/*
-
-      - name: Cache walproposer-lib
-        id: cache_walproposer_lib
-        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
-        with:
-          path: build/walproposer-lib
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-walproposer_lib-v17-${{ steps.pg_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
-
-      - name: Checkout submodule vendor/postgres-v17
-        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
-        run: |
-          git submodule init vendor/postgres-v17
-          git submodule update --depth 1 --recursive
-
-      - name: Install build dependencies
-        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
-        run: |
-          brew install flex bison openssl protobuf icu4c
-
-      - name: Set extra env for macOS
-        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
-        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
-
-      - name: Build walproposer-lib (only for v17)
-        if: steps.cache_walproposer_lib.outputs.cache-hit != 'true'
-        run:
-          make walproposer-lib -j$(sysctl -n hw.ncpu) PG_INSTALL_CACHED=1
-
-      - name: Upload "build/walproposer-lib" artifact
-        uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
-        with:
-          name: build--walproposer-lib
-          path: build/walproposer-lib
-          # The artifact is supposed to be used by the next job in the same workflow,
-          # so there’s no need to store it for too long.
-          retention-days: 1
-
-  cargo-build:
+  make-all:
    if: |
      inputs.pg_versions != '[]' || inputs.rebuild_rust_code || inputs.rebuild_everything ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    timeout-minutes: 30
+    timeout-minutes: 60
    runs-on: macos-15
-    needs: [build-pgxn, build-walproposer-lib]
    env:
      # Use release build only, to have less debug info around
      # Hence keeping target/ (and general cache size) smaller
@@ -203,41 +55,53 @@ jobs:
        with:
          submodules: true

-      - name: Download "pg_install/v14" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: pg_install--v14
-          path: pg_install/v14
-
-      - name: Download "pg_install/v15" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: pg_install--v15
-          path: pg_install/v15
-
-      - name: Download "pg_install/v16" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: pg_install--v16
-          path: pg_install/v16
-
-      - name: Download "pg_install/v17" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: pg_install--v17
-          path: pg_install/v17
-
-      - name: Download "build/walproposer-lib" artifact
-        uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
-        with:
-          name: build--walproposer-lib
-          path: build/walproposer-lib
-
-      # `actions/download-artifact` doesn't preserve permissions:
-      # https://github.com/actions/download-artifact?tab=readme-ov-file#permission-loss
-      - name: Make pg_install/v*/bin/* executable
+      - name: Install build dependencies
        run: |
-          chmod +x pg_install/v*/bin/*
+          brew install flex bison openssl protobuf icu4c
+
+      - name: Set extra env for macOS
+        run: |
+          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
+          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
+
+      - name: Restore "pg_install/" cache
+        id: cache_pg
+        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+        with:
+          path: pg_install
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-install-v14-${{ hashFiles('Makefile', 'postgres.mk', 'vendor/revisions.json') }}
+
+      - name: Checkout vendor/postgres submodules
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          git submodule init
+          git submodule update --depth 1 --recursive
+
+      - name: Build Postgres
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          make postgres -j$(sysctl -n hw.ncpu)
+
+      # This isn't strictly necessary, but it makes the cached and non-cached builds more similar,
+      # When pg_install is restored from cache, there is no 'build/' directory. By removing it
+      # in a non-cached build too, we enforce that the rest of the steps don't depend on it,
+      # so that we notice any build caching bugs earlier.
+      - name: Remove build artifacts
+        if: steps.cache_pg.outputs.cache-hit != 'true'
+        run: |
+          rm -rf build
+
+      # Explicitly update the rust toolchain before running 'make'. The parallel make build can
+      # invoke 'cargo build' more than once in parallel, for different crates.  That's OK, 'cargo'
+      # does its own locking to prevent concurrent builds from stepping on each other's
+      # toes. However, it will first try to update the toolchain, and that step is not locked the
+      # same way. To avoid two toolchain updates running in parallel and stepping on each other's
+      # toes, ensure that the toolchain is up-to-date beforehand.
+      - name: Update rust toolchain
+        run: |
+          rustup --version &&
+          rustup update &&
+          rustup show

      - name: Cache cargo deps
        uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
@@ -249,17 +113,12 @@ jobs:
            target
          key: v1-${{ runner.os }}-${{ runner.arch }}-cargo-${{ hashFiles('./Cargo.lock') }}-${{ hashFiles('./rust-toolchain.toml') }}-rust

-      - name: Install build dependencies
-        run: |
-          brew install flex bison openssl protobuf icu4c
-
-      - name: Set extra env for macOS
-        run: |
-          echo 'LDFLAGS=-L/usr/local/opt/openssl@3/lib' >> $GITHUB_ENV
-          echo 'CPPFLAGS=-I/usr/local/opt/openssl@3/include' >> $GITHUB_ENV
-
-      - name: Run cargo build
-        run: cargo build --all --release -j$(sysctl -n hw.ncpu)
+      # Build the neon-specific postgres extensions, and all the Rust bits.
+      #
+      # Pass PG_INSTALL_CACHED=1 because PostgreSQL was already built and cached
+      # separately.
+      - name: Build all
+        run: PG_INSTALL_CACHED=1 BUILD_TYPE=release make -j$(sysctl -n hw.ncpu) all

      - name: Check that no warnings are produced
        run: ./run_clippy.sh
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -87,6 +87,29 @@ jobs:
    uses: ./.github/workflows/build-build-tools-image.yml
    secrets: inherit

+  lint-yamls:
+    needs: [ meta, check-permissions, build-build-tools-image ]
+    # We do need to run this in `.*-rc-pr` because of hotfixes.
+    if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    runs-on: [ self-hosted, small ]
+    container:
+      image: ${{ needs.build-build-tools-image.outputs.image }}
+      credentials:
+        username: ${{ github.actor }}
+        password: ${{ secrets.GITHUB_TOKEN }}
+      options: --init
+
+    steps:
+      - name: Harden the runner (Audit all outbound calls)
+        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
+        with:
+          egress-policy: audit
+
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+      - run: make -C compute manifest-schema-validation
+      - run: make lint-openapi-spec
+
  check-codestyle-python:
    needs: [ meta, check-permissions, build-build-tools-image ]
    # No need to run on `main` because we this in the merge queue. We do need to run this in `.*-rc-pr` because of hotfixes.
@@ -199,28 +222,6 @@ jobs:
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
    secrets: inherit

-  validate-compute-manifest:
-    runs-on: ubuntu-22.04
-    needs: [ meta, check-permissions ]
-    # We do need to run this in `.*-rc-pr` because of hotfixes.
-    if: ${{ contains(fromJSON('["pr", "push-main", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
-    steps:
-      - name: Harden the runner (Audit all outbound calls)
-        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
-        with:
-          egress-policy: audit
-
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-
-      - name: Set up Node.js
-        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
-        with:
-          node-version: '24'
-
-      - name: Validate manifest against schema
-        run: |
-          make -C compute manifest-schema-validation
-
  build-and-test-locally:
    needs: [ meta, build-build-tools-image ]
    # We do need to run this in `.*-rc-pr` because of hotfixes.
@@ -306,14 +307,14 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf ]
+    runs-on: [ self-hosted, unit-perf-aws-arm ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
      credentials:
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
      # for changed limits, see comments on `options:` earlier in this file
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
+      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864 --ulimit nofile=65536:65536 --security-opt seccomp=unconfined
    strategy:
      fail-fast: false
      matrix:
@@ -986,6 +987,7 @@ jobs:
      - name: Verify docker-compose example and test extensions
        timeout-minutes: 60
        env:
+          PARALLEL_COMPUTES: 3
          TAG: >-
            ${{
              needs.meta.outputs.run-kind == 'compute-rc-pr'
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -1,4 +1,4 @@
-name: Periodic pagebench performance test on unit-perf hetzner runner
+name: Periodic pagebench performance test on unit-perf-aws-arm runners

 on:
  schedule:
@@ -40,7 +40,7 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [ self-hosted, unit-perf ]
+    runs-on: [ self-hosted, unit-perf-aws-arm ]
    container:
      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
      credentials:
--- a/.github/workflows/proxy-benchmark.yml
+++ b/.github/workflows/proxy-benchmark.yml
@@ -1,4 +1,4 @@
-name: Periodic proxy performance test on unit-perf hetzner runner
+name: Periodic proxy performance test on unit-perf-aws-arm runners

 on:
  push: # TODO: remove after testing
@@ -32,7 +32,7 @@ jobs:
      statuses: write
      contents: write
      pull-requests: write
-    runs-on: [self-hosted, unit-perf]
+    runs-on: [self-hosted, unit-perf-aws-arm]
    timeout-minutes: 60  # 1h timeout
    container:
      image: ghcr.io/neondatabase/build-tools:pinned-bookworm
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ neon.iml
 /.neon
 /integration_tests/.neon
 compaction-suite-results.*
+docker-compose/docker-compose-parallel.yml

 # Coverage
 *.profraw
@@ -25,11 +26,9 @@ compaction-suite-results.*
 *.o
 *.so
 *.Po
-*.pid

 # pgindent typedef lists
 *.list

-# various files for local testing
-/proxy/.subzero
-local_proxy.json
+# Node
+**/node_modules/
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,16 +1,16 @@
 [submodule "vendor/postgres-v14"]
 	path = vendor/postgres-v14
-	url = https://github.com/neondatabase/postgres.git
+	url = ../postgres.git
 	branch = REL_14_STABLE_neon
 [submodule "vendor/postgres-v15"]
 	path = vendor/postgres-v15
-	url = https://github.com/neondatabase/postgres.git
+	url = ../postgres.git
 	branch = REL_15_STABLE_neon
 [submodule "vendor/postgres-v16"]
 	path = vendor/postgres-v16
-	url = https://github.com/neondatabase/postgres.git
+	url = ../postgres.git
 	branch = REL_16_STABLE_neon
 [submodule "vendor/postgres-v17"]
 	path = vendor/postgres-v17
-	url = https://github.com/neondatabase/postgres.git
+	url = ../postgres.git
 	branch = REL_17_STABLE_neon
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,6 +8,7 @@ members = [
    "pageserver/compaction",
    "pageserver/ctl",
    "pageserver/client",
+    "pageserver/client_grpc",
    "pageserver/pagebench",
    "pageserver/page_api",
    "proxy",
@@ -42,10 +43,12 @@ members = [
    "libs/walproposer",
    "libs/wal_decoder",
    "libs/postgres_initdb",
+    "libs/proxy/json",
    "libs/proxy/postgres-protocol2",
    "libs/proxy/postgres-types2",
    "libs/proxy/tokio-postgres2",
    "endpoint_storage",
+    "pgxn/neon/communicator",
 ]

 [workspace.package]
@@ -127,6 +130,7 @@ jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] }
 jsonwebtoken = "9"
 lasso = "0.7"
 libc = "0.2"
+lock_api = "0.4.13"
 md5 = "0.7.0"
 measured = { version = "0.0.22", features=["lasso"] }
 measured-process = { version = "0.0.22" }
@@ -162,7 +166,7 @@ reqwest-middleware = "0.4"
 reqwest-retry = "0.7"
 routerify = "3"
 rpds = "0.13"
-rustc-hash = "1.1.0"
+rustc-hash = "2.1.1"
 rustls = { version = "0.23.16", default-features = false }
 rustls-pemfile = "2"
 rustls-pki-types = "1.11"
@@ -191,6 +195,7 @@ sync_wrapper = "0.1.2"
 tar = "0.4"
 test-context = "0.3"
 thiserror = "1.0"
+thread_local = "1.1.9"
 tikv-jemallocator = { version = "0.6", features = ["profiling", "stats", "unprefixed_malloc_on_supported_platforms"] }
 tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
 tokio = { version = "1.43.1", features = ["macros"] }
@@ -198,7 +203,7 @@ tokio-epoll-uring = { git = "https://github.com/neondatabase/tokio-epoll-uring.g
 tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.12.0"
 tokio-rustls = { version = "0.26.0", default-features = false, features = ["tls12", "ring"]}
-tokio-stream = "0.1"
+tokio-stream = { version = "0.1", features = ["sync"] }
 tokio-tar = "0.3"
 tokio-util = { version = "0.7.10", features = ["io", "io-util", "rt"] }
 toml = "0.8"
@@ -249,15 +254,18 @@ azure_storage = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git"
 azure_storage_blobs = { git = "https://github.com/neondatabase/azure-sdk-for-rust.git", branch = "neon", default-features = false, features = ["enable_reqwest_rustls"] }

 ## Local libraries
+alloc-metrics = { version = "0.1", path = "./libs/alloc-metrics/" }
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 desim = { version = "0.1", path = "./libs/desim" }
 endpoint_storage = { version = "0.0.1", path = "./endpoint_storage/" }
 http-utils = { version = "0.1", path = "./libs/http-utils/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
+neon-shmem = { version = "0.1", path = "./libs/neon-shmem/" }
 pageserver = { path = "./pageserver" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
 pageserver_client = { path = "./pageserver/client" }
+pageserver_client_grpc = { path = "./pageserver/client_grpc" }
 pageserver_compaction = { version = "0.1", path = "./pageserver/compaction/" }
 pageserver_page_api = { path = "./pageserver/page_api" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
@@ -284,6 +292,7 @@ walproposer = { version = "0.1", path = "./libs/walproposer/" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
+cbindgen = "0.29.0"
 criterion = "0.5.1"
 rcgen = "0.13"
 rstest = "0.18"
@@ -295,6 +304,9 @@ tonic-build = "0.13.1"
 # Needed to get `tokio-postgres-rustls` to depend on our fork.
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch = "neon" }

+# Needed to fix a bug in alloc-metrics
+thread_local = { git = "https://github.com/conradludgate/thread_local-rs", branch = "no-tls-destructor-get" }
+
 ################# Binary contents sections

 [profile.release]
--- a/53
+++ b/53
@@ -30,7 +30,18 @@ ARG BASE_IMAGE_SHA=debian:${DEBIAN_FLAVOR}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bookworm-slim/debian@$BOOKWORM_SLIM_SHA}
 ARG BASE_IMAGE_SHA=${BASE_IMAGE_SHA/debian:bullseye-slim/debian@$BULLSEYE_SLIM_SHA}

-# Build Postgres
+# Naive way:
+#
+# 1. COPY . .
+# 1. make neon-pg-ext
+# 2. cargo build <storage binaries>
+#
+# But to enable docker to cache intermediate layers, we perform a few preparatory steps:
+#
+# - Build all postgres versions, depending on just the contents of vendor/
+# - Use cargo chef to build all rust dependencies
+
+# 1. Build all postgres versions
 FROM $REPOSITORY/$IMAGE:$TAG AS pg-build
 WORKDIR /home/nonroot

@@ -38,17 +49,15 @@ COPY --chown=nonroot vendor/postgres-v14 vendor/postgres-v14
 COPY --chown=nonroot vendor/postgres-v15 vendor/postgres-v15
 COPY --chown=nonroot vendor/postgres-v16 vendor/postgres-v16
 COPY --chown=nonroot vendor/postgres-v17 vendor/postgres-v17
-COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot postgres.mk postgres.mk
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh

 ENV BUILD_TYPE=release
 RUN set -e \
-    && mold -run make -j $(nproc) -s neon-pg-ext \
-    && tar -C pg_install -czf /home/nonroot/postgres_install.tar.gz .
+    && mold -run make -j $(nproc) -s postgres

-# Prepare cargo-chef recipe
+# 2. Prepare cargo-chef recipe
 FROM $REPOSITORY/$IMAGE:$TAG AS plan
 WORKDIR /home/nonroot

@@ -56,23 +65,22 @@ COPY --chown=nonroot . .

 RUN cargo chef prepare --recipe-path recipe.json

-# Build neon binaries
+# Main build image
 FROM $REPOSITORY/$IMAGE:$TAG AS build
 WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
-
-COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v17/include/postgresql/server pg_install/v17/include/postgresql/server
-COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
-
 ARG ADDITIONAL_RUSTFLAGS=""

+# 3. Build cargo dependencies. Note that this step doesn't depend on anything else than
+# `recipe.json`, so the layer can be reused as long as none of the dependencies change.
+COPY --from=plan     /home/nonroot/recipe.json                              recipe.json
 RUN set -e \
    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment -Cforce-frame-pointers=yes ${ADDITIONAL_RUSTFLAGS}" cargo chef cook --locked --release --recipe-path recipe.json

+# Perform the main build. We reuse the Postgres build artifacts from the intermediate 'pg-build'
+# layer, and the cargo dependencies built in the previous step.
+COPY --chown=nonroot --from=pg-build /home/nonroot/pg_install/ pg_install
 COPY --chown=nonroot . .

 RUN set -e \
@@ -87,10 +95,10 @@ RUN set -e \
      --bin endpoint_storage \
      --bin neon_local \
      --bin storage_scrubber \
-      --locked --release
+      --locked --release \
+    && mold -run make -j $(nproc) -s neon-pg-ext

-# Build final image
-#
+# Assemble the final image
 FROM $BASE_IMAGE_SHA
 WORKDIR /data

@@ -130,12 +138,15 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/endpoint_storage    /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin
+COPY --from=build /home/nonroot/pg_install/v14 /usr/local/v14/
+COPY --from=build /home/nonroot/pg_install/v15 /usr/local/v15/
+COPY --from=build /home/nonroot/pg_install/v16 /usr/local/v16/
+COPY --from=build /home/nonroot/pg_install/v17 /usr/local/v17/

-COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
-COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
-COPY --from=pg-build /home/nonroot/pg_install/v16 /usr/local/v16/
-COPY --from=pg-build /home/nonroot/pg_install/v17 /usr/local/v17/
-COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
+# Deprecated: Old deployment scripts use this tarball which contains all the Postgres binaries.
+# That's obsolete, since all the same files are also present under /usr/local/v*. But to keep the
+# old scripts working for now, create the tarball.
+RUN tar -C /usr/local -cvzf /data/postgres_install.tar.gz v14 v15 v16 v17

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
 # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
--- a/35
+++ b/35
@@ -2,7 +2,7 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

 # Where to install Postgres, default is ./pg_install, maybe useful for package
 # managers.
-POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
+POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install

 # Supported PostgreSQL versions
 POSTGRES_VERSIONS = v17 v16 v15 v14
@@ -14,7 +14,7 @@ POSTGRES_VERSIONS = v17 v16 v15 v14
 # it is derived from BUILD_TYPE.

 # All intermediate build artifacts are stored here.
-BUILD_DIR := build
+BUILD_DIR := $(ROOT_PROJECT_DIR)/build

 ICU_PREFIX_DIR := /usr/local/icu

@@ -30,11 +30,18 @@ ifeq ($(BUILD_TYPE),release)
 	PG_CFLAGS += -O2 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=release
+	# NEON_CARGO_ARTIFACT_TARGET_DIR is the directory where `cargo build` places
+	# the final build artifacts. There is unfortunately no easy way of changing
+	# it to a fully predictable path, nor to extract the path with a simple
+	# command. See https://github.com/rust-lang/cargo/issues/9661 and
+	# https://github.com/rust-lang/cargo/issues/6790.
+	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
 	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	CARGO_PROFILE ?= --profile=dev
+	NEON_CARGO_ARTIFACT_TARGET_DIR = $(ROOT_PROJECT_DIR)/target/debug
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
@@ -102,7 +109,7 @@ all: neon postgres-install neon-pg-ext

 ### Neon Rust bits
 #
-# The 'postgres_ffi' depends on the Postgres headers.
+# The 'postgres_ffi' crate depends on the Postgres headers.
 .PHONY: neon
 neon: postgres-headers-install walproposer-lib cargo-target-dir
 	+@echo "Compiling Neon"
@@ -115,10 +122,13 @@ cargo-target-dir:
 	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG

 .PHONY: neon-pg-ext-%
-neon-pg-ext-%: postgres-install-%
+neon-pg-ext-%: postgres-install-% cargo-target-dir
 	+@echo "Compiling neon-specific Postgres extensions for $*"
 	mkdir -p $(BUILD_DIR)/pgxn-$*
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config COPT='$(COPT)' \
+	$(MAKE) PG_CONFIG="$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config" COPT='$(COPT)' \
+		NEON_CARGO_ARTIFACT_TARGET_DIR="$(NEON_CARGO_ARTIFACT_TARGET_DIR)" \
+		CARGO_BUILD_FLAGS="$(CARGO_BUILD_FLAGS)" \
+		CARGO_PROFILE="$(CARGO_PROFILE)" \
 		-C $(BUILD_DIR)/pgxn-$*\
 		-f $(ROOT_PROJECT_DIR)/pgxn/Makefile  install

@@ -202,7 +212,7 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 		FIND_TYPEDEF=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/find_typedef \
 		INDENT=$(BUILD_DIR)/v17/src/tools/pg_bsd_indent/pg_bsd_indent \
 		PGINDENT_SCRIPT=$(ROOT_PROJECT_DIR)/vendor/postgres-v17/src/tools/pgindent/pgindent \
-		-C $(BUILD_DIR)/neon-v17 \
+		-C $(BUILD_DIR)/pgxn-v17/neon \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile pgindent


@@ -210,6 +220,19 @@ neon-pgindent: postgres-v17-pg-bsd-indent neon-pg-ext-v17
 setup-pre-commit-hook:
 	ln -s -f $(ROOT_PROJECT_DIR)/pre-commit.py .git/hooks/pre-commit

+build-tools/node_modules: build-tools/package.json
+	cd build-tools && $(if $(CI),npm ci,npm install)
+	touch build-tools/node_modules
+
+.PHONY: lint-openapi-spec
+lint-openapi-spec: build-tools/node_modules
+	# operation-2xx-response: pageserver timeline delete returns 404 on success
+	find . -iname "openapi_spec.y*ml" -exec\
+		npx --prefix=build-tools/ redocly\
+			--skip-rule=operation-operationId --skip-rule=operation-summary --extends=minimal\
+			--skip-rule=no-server-example.com --skip-rule=operation-2xx-response\
+			lint {} \+
+
 # Targets for building PostgreSQL are defined in postgres.mk.
 #
 # But if the caller has indicated that PostgreSQL is already
--- a/build-tools/Dockerfile
+++ b/build-tools/Dockerfile
@@ -35,7 +35,7 @@ RUN echo 'Acquire::Retries "5";' > /etc/apt/apt.conf.d/80-retries && \
    echo -e "retry_connrefused=on\ntimeout=15\ntries=5\nretry-on-host-error=on\n" > /root/.wgetrc && \
    echo -e "--retry-connrefused\n--connect-timeout 15\n--retry 5\n--max-time 300\n" > /root/.curlrc

-COPY build_tools/patches/pgcopydbv017.patch /pgcopydbv017.patch
+COPY build-tools/patches/pgcopydbv017.patch /pgcopydbv017.patch

 RUN if [ "${DEBIAN_VERSION}" = "bookworm" ]; then \
        set -e && \
@@ -188,6 +188,12 @@ RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
    && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

+# Install node
+ENV NODE_VERSION=24
+RUN curl -fsSL https://deb.nodesource.com/setup_${NODE_VERSION}.x | bash - \
+    && apt install -y nodejs \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
 # Install docker
 RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION} stable" > /etc/apt/sources.list.d/docker.list \
@@ -311,14 +317,14 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools rustfmt clippy && \
-    cargo install rustfilt            --version ${RUSTFILT_VERSION} --locked && \
-    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} --locked && \
-    cargo install cargo-deny          --version ${CARGO_DENY_VERSION} --locked && \
-    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} --locked && \
-    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} --locked && \
-    cargo install cargo-chef          --version ${CARGO_CHEF_VERSION} --locked && \
-    cargo install diesel_cli          --version ${CARGO_DIESEL_CLI_VERSION} --locked \
-                                      --features postgres-bundled --no-default-features && \
+    cargo install rustfilt      --locked --version ${RUSTFILT_VERSION} && \
+    cargo install cargo-hakari  --locked --version ${CARGO_HAKARI_VERSION} && \
+    cargo install cargo-deny    --locked --version ${CARGO_DENY_VERSION} && \
+    cargo install cargo-hack    --locked --version ${CARGO_HACK_VERSION} && \
+    cargo install cargo-nextest --locked --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-chef    --locked --version ${CARGO_CHEF_VERSION} && \
+    cargo install diesel_cli    --locked --version ${CARGO_DIESEL_CLI_VERSION} \
+                                --features postgres-bundled --no-default-features && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git

--- a/build-tools/package-lock.json
+++ b/build-tools/package-lock.json
--- a/build-tools/package.json
+++ b/build-tools/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "build-tools",
+  "private": true,
+  "devDependencies": {
+    "@redocly/cli": "1.34.4",
+    "@sourcemeta/jsonschema": "10.0.0"
+  }
+}
--- a/build-tools/patches/pgcopydbv017.patch
+++ b/build-tools/patches/pgcopydbv017.patch
--- a/clippy.toml
+++ b/clippy.toml
@@ -1,9 +1,12 @@
 disallowed-methods = [
    "tokio::task::block_in_place",
+
    # Allow this for now, to deny it later once we stop using Handle::block_on completely
    # "tokio::runtime::Handle::block_on",
-    # use tokio_epoll_uring_ext instead
-    "tokio_epoll_uring::thread_local_system",
+
+    # tokio-epoll-uring:
+    # - allow-invalid because the method doesn't exist on macOS
+    { path = "tokio_epoll_uring::thread_local_system", replacement = "tokio_epoll_uring_ext module inside pageserver crate", allow-invalid = true }
 ]

 disallowed-macros = [
--- a/compute/Makefile
+++ b/compute/Makefile
@@ -50,9 +50,9 @@ jsonnetfmt-format:
 	jsonnetfmt --in-place $(jsonnet_files)

 .PHONY: manifest-schema-validation
-manifest-schema-validation: node_modules
-	node_modules/.bin/jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml
+manifest-schema-validation: ../build-tools/node_modules
+	npx --prefix=../build-tools/ jsonschema validate -d https://json-schema.org/draft/2020-12/schema manifest.schema.json manifest.yaml

-node_modules: package.json
-	npm install
-	touch node_modules
+../build-tools/node_modules: ../build-tools/package.json
+	cd ../build-tools && $(if $(CI),npm ci,npm install)
+	touch ../build-tools/node_modules
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -9,7 +9,7 @@
 #
 # build-tools:   This contains Rust compiler toolchain and other tools needed at compile
 #                time. This is also used for the storage builds. This image is defined in
-#                build-tools.Dockerfile.
+#                build-tools/Dockerfile.
 #
 # build-deps:    Contains C compiler, other build tools, and compile-time dependencies
 #                needed to compile PostgreSQL and most extensions. (Some extensions need
@@ -115,7 +115,7 @@ ARG EXTENSIONS=all
 FROM $BASE_IMAGE_SHA AS build-deps
 ARG DEBIAN_VERSION

-# Keep in sync with build-tools.Dockerfile
+# Keep in sync with build-tools/Dockerfile
 ENV PROTOC_VERSION=25.1

 # Use strict mode for bash to catch errors early
@@ -170,7 +170,29 @@ RUN case $DEBIAN_VERSION in \
 FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION:?} postgres
+COPY compute/patches/postgres_fdw.patch .
+COPY compute/patches/pg_stat_statements_pg14-16.patch .
+COPY compute/patches/pg_stat_statements_pg17.patch .
 RUN cd postgres && \
+    # Apply patches to some contrib extensions
+    # For example, we need to grant EXECUTE on pg_stat_statements_reset() to {privileged_role_name}.
+    # In vanilla Postgres this function is limited to Postgres role superuser.
+    # In Neon we have {privileged_role_name} role that is not a superuser but replaces superuser in some cases.
+    # We could add the additional grant statements to the Postgres repository but it would be hard to maintain,
+    # whenever we need to pick up a new Postgres version and we want to limit the changes in our Postgres fork,
+    # so we do it here.
+    case "${PG_VERSION}" in \
+    "v14" | "v15" | "v16") \
+    patch -p1 < /pg_stat_statements_pg14-16.patch; \
+    ;; \
+    "v17") \
+    patch -p1 < /pg_stat_statements_pg17.patch; \
+    ;; \
+    *) \
+    # To do not forget to migrate patches to the next major version
+    echo "No contrib patches for this PostgreSQL version" && exit 1;; \
+    esac && \
+    patch -p1 < /postgres_fdw.patch && \
    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
    --with-icu --with-libxml --with-libxslt --with-lz4" && \
    if [ "${PG_VERSION:?}" != "v14" ]; then \
@@ -184,8 +206,6 @@ RUN cd postgres && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/dblink.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgres_fdw.control && \
-    file=/usr/local/pgsql/share/extension/postgres_fdw--1.0.sql && [ -e $file ] && \
-    echo 'GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO neon_superuser;' >> $file && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
@@ -195,34 +215,7 @@ RUN cd postgres && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control && \
-    # We need to grant EXECUTE on pg_stat_statements_reset() to neon_superuser.
-    # In vanilla postgres this function is limited to Postgres role superuser.
-    # In neon we have neon_superuser role that is not a superuser but replaces superuser in some cases.
-    # We could add the additional grant statements to the postgres repository but it would be hard to maintain,
-    # whenever we need to pick up a new postgres version and we want to limit the changes in our postgres fork,
-    # so we do it here.
-    for file in /usr/local/pgsql/share/extension/pg_stat_statements--*.sql; do \
-        filename=$(basename "$file"); \
-        # Note that there are no downgrade scripts for pg_stat_statements, so we \
-        # don't have to modify any downgrade paths or (much) older versions: we only \
-        # have to make sure every creation of the pg_stat_statements_reset function \
-        # also adds execute permissions to the neon_superuser.
-        case $filename in \
-          pg_stat_statements--1.4.sql) \
-            # pg_stat_statements_reset is first created with 1.4
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO neon_superuser;' >> $file; \
-            ;; \
-          pg_stat_statements--1.6--1.7.sql) \
-            # Then with the 1.6-1.7 migration it is re-created with a new signature, thus add the permissions back
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO neon_superuser;' >> $file; \
-            ;; \
-          pg_stat_statements--1.10--1.11.sql) \
-            # Then with the 1.10-1.11 migration it is re-created with a new signature again, thus add the permissions back
-            echo 'GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO neon_superuser;' >> $file; \
-            ;; \
-        esac; \
-    done;
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control

 # Set PATH for all the subsequent build steps
 ENV PATH="/usr/local/pgsql/bin:$PATH"
@@ -1524,7 +1517,7 @@ WORKDIR /ext-src
 COPY compute/patches/pg_duckdb_v031.patch .
 COPY compute/patches/duckdb_v120.patch .
 # pg_duckdb build requires source dir to be a git repo to get submodules
-# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only:
+# allow {privileged_role_name} to execute some functions that in pg_duckdb are available to superuser only:
 # - extension management function duckdb.install_extension()
 # - access to duckdb.extensions table and its sequence
 RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
@@ -1636,11 +1629,14 @@ RUN make install USE_PGXS=1 -j $(getconf _NPROCESSORS_ONLN)
 # compile neon extensions
 #
 #########################################################################################
-FROM pg-build AS neon-ext-build
+FROM pg-build-with-cargo AS neon-ext-build
 ARG PG_VERSION

-COPY pgxn/ pgxn/
-RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute
+USER root
+COPY . .
+
+RUN make -j $(getconf _NPROCESSORS_ONLN) -C pgxn -s install-compute \
+      BUILD_TYPE=release CARGO_BUILD_FLAGS="--locked --release" NEON_CARGO_ARTIFACT_TARGET_DIR="$(pwd)/target/release"

 #########################################################################################
 #
@@ -1787,7 +1783,7 @@ RUN set -e \
 #########################################################################################
 FROM build-deps AS exporters
 ARG TARGETARCH
-# Keep sql_exporter version same as in build-tools.Dockerfile and
+# Keep sql_exporter version same as in build-tools/Dockerfile and
 # test_runner/regress/test_compute_metrics.py
 # See comment on the top of the file regading `echo`, `-e` and `\n`
 RUN if [ "$TARGETARCH" = "amd64" ]; then\
@@ -1912,10 +1908,10 @@ RUN cd /ext-src/pg_repack-src && patch -p1 </ext-src/pg_repack.patch && rm -f /e

 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
 RUN echo /usr/local/pgsql/lib > /etc/ld.so.conf.d/00-neon.conf && /sbin/ldconfig
-RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq \
+RUN apt-get update && apt-get install -y libtap-parser-sourcehandler-pgtap-perl jq parallel \
   && apt clean && rm -rf /ext-src/*.tar.gz /ext-src/*.patch /var/lib/apt/lists/*
 ENV PATH=/usr/local/pgsql/bin:$PATH
-ENV PGHOST=compute
+ENV PGHOST=compute1
 ENV PGPORT=55433
 ENV PGUSER=cloud_admin
 ENV PGDATABASE=postgres
--- a/compute/package.json
+++ b/compute/package.json
@@ -1,7 +0,0 @@
-{
-  "name": "neon-compute",
-  "private": true,
-  "dependencies": {
-    "@sourcemeta/jsonschema": "9.3.4"
-  }
-} 
--- a/compute/patches/anon_v2.patch
+++ b/compute/patches/anon_v2.patch
@@ -1,22 +1,26 @@
 diff --git a/sql/anon.sql b/sql/anon.sql
-index 0cdc769..b450327 100644
+index 0cdc769..5eab1d6 100644
 --- a/sql/anon.sql
 +++ b/sql/anon.sql
-@@ -1141,3 +1141,15 @@ $$
+@@ -1141,3 +1141,19 @@ $$
 -- TODO : https://en.wikipedia.org/wiki/L-diversity
 
 -- TODO : https://en.wikipedia.org/wiki/T-closeness
 +
 +-- NEON Patches
 +
-+GRANT ALL ON SCHEMA anon to neon_superuser;
-+GRANT ALL ON ALL TABLES IN SCHEMA anon TO neon_superuser;
-+
 +DO $$
+DECLARE
+  privileged_role_name text;
 +BEGIN
-+    IF current_setting('server_version_num')::int >= 150000 THEN
-+        GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO neon_superuser;
-+    END IF;
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT ALL ON SCHEMA anon to %I', privileged_role_name);
+  EXECUTE format('GRANT ALL ON ALL TABLES IN SCHEMA anon TO %I', privileged_role_name);
+
+  IF current_setting('server_version_num')::int >= 150000 THEN
+    EXECUTE format('GRANT SET ON PARAMETER anon.transparent_dynamic_masking TO %I', privileged_role_name);
+  END IF;
 +END $$;
 diff --git a/sql/init.sql b/sql/init.sql
 index 7da6553..9b6164b 100644
--- a/compute/patches/pg_duckdb_v031.patch
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -21,13 +21,21 @@ index 3235cc8..6b892bc 100644
 include Makefile.global
 
 diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
-index d777d76..af60106 100644
+index d777d76..3b54396 100644
 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql
 +++ b/sql/pg_duckdb--0.2.0--0.3.0.sql
-@@ -1056,3 +1056,6 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
+@@ -1056,3 +1056,14 @@ GRANT ALL ON FUNCTION duckdb.cache(TEXT, TEXT) TO PUBLIC;
 GRANT ALL ON FUNCTION duckdb.cache_info() TO PUBLIC;
 GRANT ALL ON FUNCTION duckdb.cache_delete(TEXT) TO PUBLIC;
 GRANT ALL ON PROCEDURE duckdb.recycle_ddb() TO PUBLIC;
-+GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO neon_superuser;
-+GRANT ALL ON TABLE duckdb.extensions TO neon_superuser;
-+GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO neon_superuser;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT ALL ON FUNCTION duckdb.install_extension(TEXT) TO %I', privileged_role_name);
+  EXECUTE format('GRANT ALL ON TABLE duckdb.extensions TO %I', privileged_role_name);
+  EXECUTE format('GRANT ALL ON SEQUENCE duckdb.extensions_table_seq TO %I', privileged_role_name);
+END $$;
--- a/compute/patches/pg_stat_statements_pg14-16.patch
+++ b/compute/patches/pg_stat_statements_pg14-16.patch
@@ -0,0 +1,34 @@
+diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+index 58cdf600fce..8be57a996f6 100644
+--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
+END $$;
+diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+index 6fc3fed4c93..256345a8f79 100644
+--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
+END $$;
--- a/compute/patches/pg_stat_statements_pg17.patch
+++ b/compute/patches/pg_stat_statements_pg17.patch
@@ -0,0 +1,52 @@
+diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
+index 0bb2c397711..32764db1d8b 100644
+--- a/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.10--1.11.sql
+@@ -80,3 +80,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) FROM PUBLIC;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint, boolean) TO %I', privileged_role_name);
+END $$;
+\ No newline at end of file
+diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+index 58cdf600fce..8be57a996f6 100644
+--- a/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.4.sql
+@@ -46,3 +46,12 @@ GRANT SELECT ON pg_stat_statements TO PUBLIC;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset() FROM PUBLIC;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO %I', privileged_role_name);
+END $$;
+diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+index 6fc3fed4c93..256345a8f79 100644
+--- a/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.6--1.7.sql
+@@ -20,3 +20,12 @@ LANGUAGE C STRICT PARALLEL SAFE;
+ 
+ -- Don't want this to be available to non-superusers.
+ REVOKE ALL ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) FROM PUBLIC;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT EXECUTE ON FUNCTION pg_stat_statements_reset(Oid, Oid, bigint) TO %I', privileged_role_name);
+END $$;
--- a/compute/patches/postgres_fdw.patch
+++ b/compute/patches/postgres_fdw.patch
@@ -0,0 +1,17 @@
+diff --git a/contrib/postgres_fdw/postgres_fdw--1.0.sql b/contrib/postgres_fdw/postgres_fdw--1.0.sql
+index a0f0fc1bf45..ee077f2eea6 100644
+--- a/contrib/postgres_fdw/postgres_fdw--1.0.sql
+++ b/contrib/postgres_fdw/postgres_fdw--1.0.sql
+@@ -16,3 +16,12 @@ LANGUAGE C STRICT;
+ CREATE FOREIGN DATA WRAPPER postgres_fdw
+   HANDLER postgres_fdw_handler
+   VALIDATOR postgres_fdw_validator;
+
+DO $$
+DECLARE
+  privileged_role_name text;
+BEGIN
+  privileged_role_name := current_setting('neon.privileged_role_name');
+
+  EXECUTE format('GRANT USAGE ON FOREIGN DATA WRAPPER postgres_fdw TO %I', privileged_role_name);
+END $$;
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -66,7 +66,7 @@ url.workspace = true
 uuid.workspace = true
 walkdir.workspace = true
 x509-cert.workspace = true
-
+postgres-types.workspace = true
 postgres_versioninfo.workspace = true
 postgres_initdb.workspace = true
 compute_api.workspace = true
--- a/compute_tools/README.md
+++ b/compute_tools/README.md
@@ -46,11 +46,14 @@ stateDiagram-v2
  Configuration --> Failed : Failed to configure the compute
  Configuration --> Running : Compute has been configured
  Empty --> Init : Compute spec is immediately available
-  Empty --> TerminationPending : Requested termination
+  Empty --> TerminationPendingFast : Requested termination
+  Empty --> TerminationPendingImmediate : Requested termination
  Init --> Failed : Failed to start Postgres
  Init --> Running : Started Postgres
-  Running --> TerminationPending : Requested termination
-  TerminationPending --> Terminated : Terminated compute
+  Running --> TerminationPendingFast : Requested termination
+  Running --> TerminationPendingImmediate : Requested termination
+  TerminationPendingFast --> Terminated compute with 30s delay for cplane to inspect status
+  TerminationPendingImmediate --> Terminated : Terminated compute immediately
  Failed --> [*] : Compute exited
  Terminated --> [*] : Compute exited
 ```
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -87,6 +87,14 @@ struct Cli {
    #[arg(short = 'C', long, value_name = "DATABASE_URL")]
    pub connstr: String,

+    #[arg(
+        long,
+        default_value = "neon_superuser",
+        value_name = "PRIVILEGED_ROLE_NAME",
+        value_parser = Self::parse_privileged_role_name
+    )]
+    pub privileged_role_name: String,
+
    #[cfg(target_os = "linux")]
    #[arg(long, default_value = "neon-postgres")]
    pub cgroup: String,
@@ -149,6 +157,21 @@ impl Cli {

        Ok(url)
    }
+
+    /// For simplicity, we do not escape `privileged_role_name` anywhere in the code.
+    /// Since it's a system role, which we fully control, that's fine. Still, let's
+    /// validate it to avoid any surprises.
+    fn parse_privileged_role_name(value: &str) -> Result<String> {
+        use regex::Regex;
+
+        let pattern = Regex::new(r"^[a-z_]+$").unwrap();
+
+        if !pattern.is_match(value) {
+            bail!("--privileged-role-name can only contain lowercase letters and underscores")
+        }
+
+        Ok(value.to_string())
+    }
 }

 fn main() -> Result<()> {
@@ -178,6 +201,7 @@ fn main() -> Result<()> {
        ComputeNodeParams {
            compute_id: cli.compute_id,
            connstr,
+            privileged_role_name: cli.privileged_role_name.clone(),
            pgdata: cli.pgdata.clone(),
            pgbin: cli.pgbin.clone(),
            pgversion: get_pg_version_string(&cli.pgbin),
@@ -327,4 +351,49 @@ mod test {
        ])
        .expect_err("URL parameters are not allowed");
    }
+
+    #[test]
+    fn verify_privileged_role_name() {
+        // Valid name
+        let cli = Cli::parse_from([
+            "compute_ctl",
+            "--pgdata=test",
+            "--connstr=test",
+            "--compute-id=test",
+            "--privileged-role-name",
+            "my_superuser",
+        ]);
+        assert_eq!(cli.privileged_role_name, "my_superuser");
+
+        // Invalid names
+        Cli::try_parse_from([
+            "compute_ctl",
+            "--pgdata=test",
+            "--connstr=test",
+            "--compute-id=test",
+            "--privileged-role-name",
+            "NeonSuperuser",
+        ])
+        .expect_err("uppercase letters are not allowed");
+
+        Cli::try_parse_from([
+            "compute_ctl",
+            "--pgdata=test",
+            "--connstr=test",
+            "--compute-id=test",
+            "--privileged-role-name",
+            "$'neon_superuser",
+        ])
+        .expect_err("special characters are not allowed");
+
+        Cli::try_parse_from([
+            "compute_ctl",
+            "--pgdata=test",
+            "--connstr=test",
+            "--compute-id=test",
+            "--privileged-role-name",
+            "",
+        ])
+        .expect_err("empty name is not allowed");
+    }
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -3,7 +3,7 @@ use chrono::{DateTime, Utc};
 use compute_api::privilege::Privilege;
 use compute_api::responses::{
    ComputeConfig, ComputeCtlConfig, ComputeMetrics, ComputeStatus, LfcOffloadState,
-    LfcPrewarmState, TlsConfig,
+    LfcPrewarmState, PromoteState, TlsConfig,
 };
 use compute_api::spec::{
    ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PageserverProtocol, PgIdent,
@@ -29,7 +29,7 @@ use std::sync::atomic::{AtomicU32, AtomicU64, Ordering};
 use std::sync::{Arc, Condvar, Mutex, RwLock};
 use std::time::{Duration, Instant};
 use std::{env, fs};
-use tokio::spawn;
+use tokio::{spawn, sync::watch, task::JoinHandle, time};
 use tracing::{Instrument, debug, error, info, instrument, warn};
 use url::Url;
 use utils::id::{TenantId, TimelineId};
@@ -74,12 +74,20 @@ const DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL: u64 = 3600;

 /// Static configuration params that don't change after startup. These mostly
 /// come from the CLI args, or are derived from them.
+#[derive(Clone, Debug)]
 pub struct ComputeNodeParams {
    /// The ID of the compute
    pub compute_id: String,
-    // Url type maintains proper escaping
+
+    /// Url type maintains proper escaping
    pub connstr: url::Url,

+    /// The name of the 'weak' superuser role, which we give to the users.
+    /// It follows the allow list approach, i.e., we take a standard role
+    /// and grant it extra permissions with explicit GRANTs here and there,
+    /// and core patches.
+    pub privileged_role_name: String,
+
    pub resize_swap_on_bind: bool,
    pub set_disk_quota_for_fs: Option<String>,

@@ -107,6 +115,8 @@ pub struct ComputeNodeParams {
    pub installed_extensions_collection_interval: Arc<AtomicU64>,
 }

+type TaskHandle = Mutex<Option<JoinHandle<()>>>;
+
 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
    pub params: ComputeNodeParams,
@@ -129,7 +139,8 @@ pub struct ComputeNode {
    pub compute_ctl_config: ComputeCtlConfig,

    /// Handle to the extension stats collection task
-    extension_stats_task: Mutex<Option<tokio::task::JoinHandle<()>>>,
+    extension_stats_task: TaskHandle,
+    lfc_offload_task: TaskHandle,
 }

 // store some metrics about download size that might impact startup time
@@ -171,6 +182,7 @@ pub struct ComputeState {
    /// WAL flush LSN that is set after terminating Postgres and syncing safekeepers if
    /// mode == ComputeMode::Primary. None otherwise
    pub terminate_flush_lsn: Option<Lsn>,
+    pub promote_state: Option<watch::Receiver<PromoteState>>,

    pub metrics: ComputeMetrics,
 }
@@ -188,6 +200,7 @@ impl ComputeState {
            lfc_prewarm_state: LfcPrewarmState::default(),
            lfc_offload_state: LfcOffloadState::default(),
            terminate_flush_lsn: None,
+            promote_state: None,
        }
    }

@@ -368,7 +381,7 @@ fn maybe_cgexec(cmd: &str) -> Command {

 struct PostgresHandle {
    postgres: std::process::Child,
-    log_collector: tokio::task::JoinHandle<Result<()>>,
+    log_collector: JoinHandle<Result<()>>,
 }

 impl PostgresHandle {
@@ -382,7 +395,7 @@ struct StartVmMonitorResult {
    #[cfg(target_os = "linux")]
    token: tokio_util::sync::CancellationToken,
    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
+    vm_monitor: Option<JoinHandle<Result<()>>>,
 }

 impl ComputeNode {
@@ -433,6 +446,7 @@ impl ComputeNode {
            ext_download_progress: RwLock::new(HashMap::new()),
            compute_ctl_config: config.compute_ctl_config,
            extension_stats_task: Mutex::new(None),
+            lfc_offload_task: Mutex::new(None),
        })
    }

@@ -520,8 +534,8 @@ impl ComputeNode {
            None
        };

-        // Terminate the extension stats collection task
        this.terminate_extension_stats_task();
+        this.terminate_lfc_offload_task();

        // Terminate the vm_monitor so it releases the file watcher on
        // /sys/fs/cgroup/neon-postgres.
@@ -851,12 +865,15 @@ impl ComputeNode {
        // Log metrics so that we can search for slow operations in logs
        info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished");

-        // Spawn the extension stats background task
        self.spawn_extension_stats_task();

        if pspec.spec.autoprewarm {
+            info!("autoprewarming on startup as requested");
            self.prewarm_lfc(None);
        }
+        if let Some(seconds) = pspec.spec.offload_lfc_interval_seconds {
+            self.spawn_lfc_offload_task(Duration::from_secs(seconds.into()));
+        };
        Ok(())
    }

@@ -947,14 +964,20 @@ impl ComputeNode {
            None
        };

-        let mut delay_exit = false;
        let mut state = self.state.lock().unwrap();
        state.terminate_flush_lsn = lsn;
-        if let ComputeStatus::TerminationPending { mode } = state.status {
+
+        let delay_exit = state.status == ComputeStatus::TerminationPendingFast;
+        if state.status == ComputeStatus::TerminationPendingFast
+            || state.status == ComputeStatus::TerminationPendingImmediate
+        {
+            info!(
+                "Changing compute status from {} to {}",
+                state.status,
+                ComputeStatus::Terminated
+            );
            state.status = ComputeStatus::Terminated;
            self.state_changed.notify_all();
-            // we were asked to terminate gracefully, don't exit to avoid restart
-            delay_exit = mode == compute_api::responses::TerminateMode::Fast
        }
        drop(state);

@@ -1025,6 +1048,8 @@ impl ComputeNode {
            PageserverProtocol::Grpc => self.try_get_basebackup_grpc(spec, lsn)?,
        };

+        self.fix_zenith_signal_neon_signal()?;
+
        let mut state = self.state.lock().unwrap();
        state.metrics.pageserver_connect_micros =
            connected.duration_since(started).as_micros() as u64;
@@ -1034,6 +1059,27 @@ impl ComputeNode {
        Ok(())
    }

+    /// Move the Zenith signal file to Neon signal file location.
+    /// This makes Compute compatible with older PageServers that don't yet
+    /// know about the Zenith->Neon rename.
+    fn fix_zenith_signal_neon_signal(&self) -> Result<()> {
+        let datadir = Path::new(&self.params.pgdata);
+
+        let neonsig = datadir.join("neon.signal");
+
+        if neonsig.is_file() {
+            return Ok(());
+        }
+
+        let zenithsig = datadir.join("zenith.signal");
+
+        if zenithsig.is_file() {
+            fs::copy(zenithsig, neonsig)?;
+        }
+
+        Ok(())
+    }
+
    /// Fetches a basebackup via gRPC. The connstring must use grpc://. Returns the timestamp when
    /// the connection was established, and the (compressed) size of the basebackup.
    fn try_get_basebackup_grpc(&self, spec: &ParsedSpec, lsn: Lsn) -> Result<(Instant, usize)> {
@@ -1049,7 +1095,7 @@ impl ComputeNode {
        };

        let (reader, connected) = tokio::runtime::Handle::current().block_on(async move {
-            let mut client = page_api::Client::new(
+            let mut client = page_api::Client::connect(
                shard0_connstr,
                spec.tenant_id,
                spec.timeline_id,
@@ -1248,9 +1294,7 @@ impl ComputeNode {

        // In case of error, log and fail the check, but don't crash.
        // We're playing it safe because these errors could be transient
-        // and we don't yet retry. Also being careful here allows us to
-        // be backwards compatible with safekeepers that don't have the
-        // TIMELINE_STATUS API yet.
+        // and we don't yet retry.
        if responses.len() < quorum {
            error!(
                "failed sync safekeepers check {:?} {:?} {:?}",
@@ -1353,6 +1397,7 @@ impl ComputeNode {
        self.create_pgdata()?;
        config::write_postgres_conf(
            pgdata_path,
+            &self.params,
            &pspec.spec,
            self.params.internal_http_port,
            tls_config,
@@ -1701,6 +1746,7 @@ impl ComputeNode {
        }

        // Run migrations separately to not hold up cold starts
+        let params = self.params.clone();
        tokio::spawn(async move {
            let mut conf = conf.as_ref().clone();
            conf.application_name("compute_ctl:migrations");
@@ -1712,7 +1758,7 @@ impl ComputeNode {
                            eprintln!("connection error: {e}");
                        }
                    });
-                    if let Err(e) = handle_migrations(&mut client).await {
+                    if let Err(e) = handle_migrations(params, &mut client).await {
                        error!("Failed to run migrations: {}", e);
                    }
                }
@@ -1791,11 +1837,14 @@ impl ComputeNode {
        let pgdata_path = Path::new(&self.params.pgdata);
        config::write_postgres_conf(
            pgdata_path,
+            &self.params,
            &spec,
            self.params.internal_http_port,
            tls_config,
        )?;

+        self.pg_reload_conf()?;
+
        if !spec.skip_pg_catalog_updates {
            let max_concurrent_connections = spec.reconfigure_concurrency;
            // Temporarily reset max_cluster_size in config
@@ -1815,10 +1864,9 @@ impl ComputeNode {

                Ok(())
            })?;
+            self.pg_reload_conf()?;
        }

-        self.pg_reload_conf()?;
-
        let unknown_op = "unknown".to_string();
        let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
        info!(
@@ -1891,7 +1939,8 @@ impl ComputeNode {

                            // exit loop
                            ComputeStatus::Failed
-                            | ComputeStatus::TerminationPending { .. }
+                            | ComputeStatus::TerminationPendingFast
+                            | ComputeStatus::TerminationPendingImmediate
                            | ComputeStatus::Terminated => break 'cert_update,

                            // wait
@@ -2357,10 +2406,7 @@ LIMIT 100",
    }

    pub fn spawn_extension_stats_task(&self) {
-        // Cancel any existing task
-        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
-            handle.abort();
-        }
+        self.terminate_extension_stats_task();

        let conf = self.tokio_conn_conf.clone();
        let atomic_interval = self.params.installed_extensions_collection_interval.clone();
@@ -2396,8 +2442,47 @@ LIMIT 100",
    }

    fn terminate_extension_stats_task(&self) {
-        if let Some(handle) = self.extension_stats_task.lock().unwrap().take() {
-            handle.abort();
+        if let Some(h) = self.extension_stats_task.lock().unwrap().take() {
+            h.abort()
+        }
+    }
+
+    pub fn spawn_lfc_offload_task(self: &Arc<Self>, interval: Duration) {
+        self.terminate_lfc_offload_task();
+        let secs = interval.as_secs();
+        let this = self.clone();
+
+        info!("spawning LFC offload worker with {secs}s interval");
+        let handle = spawn(async move {
+            let mut interval = time::interval(interval);
+            interval.tick().await; // returns immediately
+            loop {
+                interval.tick().await;
+
+                let prewarm_state = this.state.lock().unwrap().lfc_prewarm_state.clone();
+                // Do not offload LFC state if we are currently prewarming or any issue occurred.
+                // If we'd do that, we might override the LFC state in endpoint storage with some
+                // incomplete state. Imagine a situation:
+                // 1. Endpoint started with `autoprewarm: true`
+                // 2. While prewarming is not completed, we upload the new incomplete state
+                // 3. Compute gets interrupted and restarts
+                // 4. We start again and try to prewarm with the state from 2. instead of the previous complete state
+                if matches!(
+                    prewarm_state,
+                    LfcPrewarmState::Completed
+                        | LfcPrewarmState::NotPrewarmed
+                        | LfcPrewarmState::Skipped
+                ) {
+                    this.offload_lfc_async().await;
+                }
+            }
+        });
+        *self.lfc_offload_task.lock().unwrap() = Some(handle);
+    }
+
+    fn terminate_lfc_offload_task(&self) {
+        if let Some(h) = self.lfc_offload_task.lock().unwrap().take() {
+            h.abort()
        }
    }

@@ -2406,19 +2491,11 @@ LIMIT 100",
        // If the value is -1, we never suspend so set the value to default collection.
        // If the value is 0, it means default, we will just continue to use the default.
        if spec.suspend_timeout_seconds == -1 || spec.suspend_timeout_seconds == 0 {
-            info!(
-                "[NEON_EXT_INT_UPD] Spec Timeout: {}, New Timeout: {}",
-                spec.suspend_timeout_seconds, DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL
-            );
            self.params.installed_extensions_collection_interval.store(
                DEFAULT_INSTALLED_EXTENSIONS_COLLECTION_INTERVAL,
                std::sync::atomic::Ordering::SeqCst,
            );
        } else {
-            info!(
-                "[NEON_EXT_INT_UPD] Spec Timeout: {}",
-                spec.suspend_timeout_seconds
-            );
            self.params.installed_extensions_collection_interval.store(
                spec.suspend_timeout_seconds as u64,
                std::sync::atomic::Ordering::SeqCst,
@@ -2436,7 +2513,7 @@ pub async fn installed_extensions(conf: tokio_postgres::Config) -> Result<()> {
                serde_json::to_string(&extensions).expect("failed to serialize extensions list")
            );
        }
-        Err(err) => error!("could not get installed extensions: {err:?}"),
+        Err(err) => error!("could not get installed extensions: {err}"),
    }
    Ok(())
 }
--- a/compute_tools/src/compute_prewarm.rs
+++ b/compute_tools/src/compute_prewarm.rs
@@ -5,6 +5,7 @@ use compute_api::responses::LfcOffloadState;
 use compute_api::responses::LfcPrewarmState;
 use http::StatusCode;
 use reqwest::Client;
+use std::mem::replace;
 use std::sync::Arc;
 use tokio::{io::AsyncReadExt, spawn};
 use tracing::{error, info};
@@ -69,7 +70,7 @@ impl ComputeNode {
            }
        };
        let row = match client
-            .query_one("select * from get_prewarm_info()", &[])
+            .query_one("select * from neon.get_prewarm_info()", &[])
            .await
        {
            Ok(row) => row,
@@ -88,28 +89,37 @@ impl ComputeNode {
        self.state.lock().unwrap().lfc_offload_state.clone()
    }

-    /// Returns false if there is a prewarm request ongoing, true otherwise
+    /// If there is a prewarm request ongoing, return `false`, `true` otherwise.
    pub fn prewarm_lfc(self: &Arc<Self>, from_endpoint: Option<String>) -> bool {
-        crate::metrics::LFC_PREWARM_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_prewarm_state;
-            if let LfcPrewarmState::Prewarming =
-                std::mem::replace(state, LfcPrewarmState::Prewarming)
-            {
+            if let LfcPrewarmState::Prewarming = replace(state, LfcPrewarmState::Prewarming) {
                return false;
            }
        }
+        crate::metrics::LFC_PREWARMS.inc();

        let cloned = self.clone();
        spawn(async move {
-            let Err(err) = cloned.prewarm_impl(from_endpoint).await else {
-                cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Completed;
-                return;
-            };
-            error!(%err);
-            cloned.state.lock().unwrap().lfc_prewarm_state = LfcPrewarmState::Failed {
-                error: err.to_string(),
+            let state = match cloned.prewarm_impl(from_endpoint).await {
+                Ok(true) => LfcPrewarmState::Completed,
+                Ok(false) => {
+                    info!(
+                        "skipping LFC prewarm because LFC state is not found in endpoint storage"
+                    );
+                    LfcPrewarmState::Skipped
+                }
+                Err(err) => {
+                    crate::metrics::LFC_PREWARM_ERRORS.inc();
+                    error!(%err, "could not prewarm LFC");
+
+                    LfcPrewarmState::Failed {
+                        error: err.to_string(),
+                    }
+                }
            };
+
+            cloned.state.lock().unwrap().lfc_prewarm_state = state;
        });
        true
    }
@@ -120,15 +130,21 @@ impl ComputeNode {
        EndpointStoragePair::from_spec_and_endpoint(state.pspec.as_ref().unwrap(), from_endpoint)
    }

-    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<()> {
+    /// Request LFC state from endpoint storage and load corresponding pages into Postgres.
+    /// Returns a result with `false` if the LFC state is not found in endpoint storage.
+    async fn prewarm_impl(&self, from_endpoint: Option<String>) -> Result<bool> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(from_endpoint)?;
-        info!(%url, "requesting LFC state from endpoint storage");

+        info!(%url, "requesting LFC state from endpoint storage");
        let request = Client::new().get(&url).bearer_auth(token);
        let res = request.send().await.context("querying endpoint storage")?;
        let status = res.status();
-        if status != StatusCode::OK {
-            bail!("{status} querying endpoint storage")
+        match status {
+            StatusCode::OK => (),
+            StatusCode::NOT_FOUND => {
+                return Ok(false);
+            }
+            _ => bail!("{status} querying endpoint storage"),
        }

        let mut uncompressed = Vec::new();
@@ -141,52 +157,67 @@ impl ComputeNode {
            .await
            .context("decoding LFC state")?;
        let uncompressed_len = uncompressed.len();
-        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into postgres");
+
+        info!(%url, "downloaded LFC state, uncompressed size {uncompressed_len}, loading into Postgres");

        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?
-            .query_one("select prewarm_local_cache($1)", &[&uncompressed])
+            .query_one("select neon.prewarm_local_cache($1)", &[&uncompressed])
            .await
            .context("loading LFC state into postgres")
-            .map(|_| ())
+            .map(|_| ())?;
+
+        Ok(true)
    }

-    /// Returns false if there is an offload request ongoing, true otherwise
+    /// If offload request is ongoing, return false, true otherwise
    pub fn offload_lfc(self: &Arc<Self>) -> bool {
-        crate::metrics::LFC_OFFLOAD_REQUESTS.inc();
        {
            let state = &mut self.state.lock().unwrap().lfc_offload_state;
-            if let LfcOffloadState::Offloading =
-                std::mem::replace(state, LfcOffloadState::Offloading)
-            {
+            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
                return false;
            }
        }
-
        let cloned = self.clone();
-        spawn(async move {
-            let Err(err) = cloned.offload_lfc_impl().await else {
-                cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
-                return;
-            };
-            error!(%err);
-            cloned.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
-                error: err.to_string(),
-            };
-        });
+        spawn(async move { cloned.offload_lfc_with_state_update().await });
        true
    }

+    pub async fn offload_lfc_async(self: &Arc<Self>) {
+        {
+            let state = &mut self.state.lock().unwrap().lfc_offload_state;
+            if replace(state, LfcOffloadState::Offloading) == LfcOffloadState::Offloading {
+                return;
+            }
+        }
+        self.offload_lfc_with_state_update().await
+    }
+
+    async fn offload_lfc_with_state_update(&self) {
+        crate::metrics::LFC_OFFLOADS.inc();
+
+        let Err(err) = self.offload_lfc_impl().await else {
+            self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Completed;
+            return;
+        };
+
+        crate::metrics::LFC_OFFLOAD_ERRORS.inc();
+        error!(%err, "could not offload LFC state to endpoint storage");
+        self.state.lock().unwrap().lfc_offload_state = LfcOffloadState::Failed {
+            error: err.to_string(),
+        };
+    }
+
    async fn offload_lfc_impl(&self) -> Result<()> {
        let EndpointStoragePair { url, token } = self.endpoint_storage_pair(None)?;
-        info!(%url, "requesting LFC state from postgres");
+        info!(%url, "requesting LFC state from Postgres");

        let mut compressed = Vec::new();
        ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
            .await
            .context("connecting to postgres")?
-            .query_one("select get_local_cache_state()", &[])
+            .query_one("select neon.get_local_cache_state()", &[])
            .await
            .context("querying LFC state")?
            .try_get::<usize, &[u8]>(0)
@@ -195,13 +226,17 @@ impl ComputeNode {
            .read_to_end(&mut compressed)
            .await
            .context("compressing LFC state")?;
+
        let compressed_len = compressed.len();
        info!(%url, "downloaded LFC state, compressed size {compressed_len}, writing to endpoint storage");

        let request = Client::new().put(url).bearer_auth(token).body(compressed);
        match request.send().await {
            Ok(res) if res.status() == StatusCode::OK => Ok(()),
-            Ok(res) => bail!("Error writing to endpoint storage: {}", res.status()),
+            Ok(res) => bail!(
+                "Request to endpoint storage failed with status: {}",
+                res.status()
+            ),
            Err(err) => Err(err).context("writing to endpoint storage"),
        }
    }
--- a/compute_tools/src/compute_promote.rs
+++ b/compute_tools/src/compute_promote.rs
@@ -0,0 +1,132 @@
+use crate::compute::ComputeNode;
+use anyhow::{Context, Result, bail};
+use compute_api::{
+    responses::{LfcPrewarmState, PromoteState, SafekeepersLsn},
+    spec::ComputeMode,
+};
+use std::{sync::Arc, time::Duration};
+use tokio::time::sleep;
+use utils::lsn::Lsn;
+
+impl ComputeNode {
+    /// Returns only when promote fails or succeeds. If a network error occurs
+    /// and http client disconnects, this does not stop promotion, and subsequent
+    /// calls block until promote finishes.
+    /// Called by control plane on secondary after primary endpoint is terminated
+    pub async fn promote(self: &Arc<Self>, safekeepers_lsn: SafekeepersLsn) -> PromoteState {
+        let cloned = self.clone();
+        let start_promotion = || {
+            let (tx, rx) = tokio::sync::watch::channel(PromoteState::NotPromoted);
+            tokio::spawn(async move {
+                tx.send(match cloned.promote_impl(safekeepers_lsn).await {
+                    Ok(_) => PromoteState::Completed,
+                    Err(err) => {
+                        tracing::error!(%err, "promoting");
+                        PromoteState::Failed {
+                            error: err.to_string(),
+                        }
+                    }
+                })
+            });
+            rx
+        };
+
+        let mut task;
+        // self.state is unlocked after block ends so we lock it in promote_impl
+        // and task.changed() is reached
+        {
+            task = self
+                .state
+                .lock()
+                .unwrap()
+                .promote_state
+                .get_or_insert_with(start_promotion)
+                .clone()
+        }
+        task.changed().await.expect("promote sender dropped");
+        task.borrow().clone()
+    }
+
+    // Why do we have to supply safekeepers?
+    // For secondary we use primary_connection_conninfo so safekeepers field is empty
+    async fn promote_impl(&self, safekeepers_lsn: SafekeepersLsn) -> Result<()> {
+        {
+            let state = self.state.lock().unwrap();
+            let mode = &state.pspec.as_ref().unwrap().spec.mode;
+            if *mode != ComputeMode::Replica {
+                bail!("{} is not replica", mode.to_type_str());
+            }
+
+            // we don't need to query Postgres so not self.lfc_prewarm_state()
+            match &state.lfc_prewarm_state {
+                LfcPrewarmState::NotPrewarmed | LfcPrewarmState::Prewarming => {
+                    bail!("prewarm not requested or pending")
+                }
+                LfcPrewarmState::Failed { error } => {
+                    tracing::warn!(%error, "replica prewarm failed")
+                }
+                _ => {}
+            }
+        }
+
+        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
+            .await
+            .context("connecting to postgres")?;
+
+        let primary_lsn = safekeepers_lsn.wal_flush_lsn;
+        let mut last_wal_replay_lsn: Lsn = Lsn::INVALID;
+        const RETRIES: i32 = 20;
+        for i in 0..=RETRIES {
+            let row = client
+                .query_one("SELECT pg_last_wal_replay_lsn()", &[])
+                .await
+                .context("getting last replay lsn")?;
+            let lsn: u64 = row.get::<usize, postgres_types::PgLsn>(0).into();
+            last_wal_replay_lsn = lsn.into();
+            if last_wal_replay_lsn >= primary_lsn {
+                break;
+            }
+            tracing::info!("Try {i}, replica lsn {last_wal_replay_lsn}, primary lsn {primary_lsn}");
+            sleep(Duration::from_secs(1)).await;
+        }
+        if last_wal_replay_lsn < primary_lsn {
+            bail!("didn't catch up with primary in {RETRIES} retries");
+        }
+
+        // using $1 doesn't work with ALTER SYSTEM SET
+        let safekeepers_sql = format!(
+            "ALTER SYSTEM SET neon.safekeepers='{}'",
+            safekeepers_lsn.safekeepers
+        );
+        client
+            .query(&safekeepers_sql, &[])
+            .await
+            .context("setting safekeepers")?;
+        client
+            .query("SELECT pg_reload_conf()", &[])
+            .await
+            .context("reloading postgres config")?;
+        let row = client
+            .query_one("SELECT * FROM pg_promote()", &[])
+            .await
+            .context("pg_promote")?;
+        if !row.get::<usize, bool>(0) {
+            bail!("pg_promote() returned false");
+        }
+
+        let client = ComputeNode::get_maintenance_client(&self.tokio_conn_conf)
+            .await
+            .context("connecting to postgres")?;
+        let row = client
+            .query_one("SHOW transaction_read_only", &[])
+            .await
+            .context("getting transaction_read_only")?;
+        if row.get::<usize, &str>(0) == "on" {
+            bail!("replica in read only mode after promotion");
+        }
+
+        let mut state = self.state.lock().unwrap();
+        state.pspec.as_mut().unwrap().spec.mode = ComputeMode::Primary;
+        Ok(())
+    }
+}
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -9,6 +9,7 @@ use std::path::Path;
 use compute_api::responses::TlsConfig;
 use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption};

+use crate::compute::ComputeNodeParams;
 use crate::pg_helpers::{
    GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value,
 };
@@ -41,6 +42,7 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(
    pgdata_path: &Path,
+    params: &ComputeNodeParams,
    spec: &ComputeSpec,
    extension_server_port: u16,
    tls_config: &Option<TlsConfig>,
@@ -54,14 +56,15 @@ pub fn write_postgres_conf(
        writeln!(file, "{conf}")?;
    }

+    // Stripe size GUC should be defined prior to connection string
+    if let Some(stripe_size) = spec.shard_stripe_size {
+        writeln!(file, "neon.stripe_size={stripe_size}")?;
+    }
    // Add options for connecting to storage
    writeln!(file, "# Neon storage settings")?;
    if let Some(s) = &spec.pageserver_connstring {
        writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
    }
-    if let Some(stripe_size) = spec.shard_stripe_size {
-        writeln!(file, "neon.stripe_size={stripe_size}")?;
-    }
    if !spec.safekeeper_connstrings.is_empty() {
        let mut neon_safekeepers_value = String::new();
        tracing::info!(
@@ -161,6 +164,12 @@ pub fn write_postgres_conf(
        }
    }

+    writeln!(
+        file,
+        "neon.privileged_role_name={}",
+        escape_conf_value(params.privileged_role_name.as_str())
+    )?;
+
    // If there are any extra options in the 'settings' field, append those
    if spec.cluster.settings.is_some() {
        writeln!(file, "# Managed by compute_ctl: begin")?;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -83,6 +83,87 @@ paths:
              schema:
                $ref: "#/components/schemas/DbsAndRoles"

+  /promote:
+    post:
+      tags:
+        - Promotion
+      summary: Promote secondary replica to primary
+      description: ""
+      operationId: promoteReplica
+      requestBody:
+        description: Promote requests data
+        required: true
+        content:
+          application/json:
+            schema:
+                $ref: "#/components/schemas/SafekeepersLsn"
+      responses:
+        200:
+          description: Promote succeeded or wasn't started
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PromoteState"
+        500:
+          description: Promote failed
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/PromoteState"
+
+  /lfc/prewarm:
+    post:
+      summary: Request LFC Prewarm
+      parameters:
+        - name: from_endpoint
+          in: query
+          schema:
+            type: string
+      description: ""
+      operationId: lfcPrewarm
+      responses:
+        202:
+          description: LFC prewarm started
+        429:
+          description: LFC prewarm ongoing
+    get:
+      tags:
+        - Prewarm
+      summary: Get LFC prewarm state
+      description: ""
+      operationId: getLfcPrewarmState
+      responses:
+        200:
+          description: Prewarm state
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/LfcPrewarmState"
+
+  /lfc/offload:
+    post:
+      summary: Request LFC offload
+      description: ""
+      operationId: lfcOffload
+      responses:
+        202:
+          description: LFC offload started
+        429:
+          description: LFC offload ongoing
+    get:
+      tags:
+        - Prewarm
+      summary: Get LFC offloading state
+      description: ""
+      operationId: getLfcOffloadState
+      responses:
+        200:
+          description: Offload state
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/LfcOffloadState"
+
  /database_schema:
    get:
      tags:
@@ -290,9 +371,28 @@ paths:
      summary: Terminate Postgres and wait for it to exit
      description: ""
      operationId: terminate
+      parameters:
+        - name: mode
+          in: query
+          description: "Terminate mode: fast (wait 30s before returning) and immediate"
+          required: false
+          schema:
+            type: string
+            enum: ["fast", "immediate"]
+            default: fast
      responses:
        200:
          description: Result
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TerminateResponse"
+        201:
+          description: Result if compute is already terminated
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/TerminateResponse"
        412:
          description: "wrong state"
          content:
@@ -335,15 +435,6 @@ components:
        total_startup_ms:
          type: integer

-    Info:
-      type: object
-      description: Information about VM/Pod.
-      required:
-        - num_cpus
-      properties:
-        num_cpus:
-          type: integer
-
    DbsAndRoles:
      type: object
      description: Databases and Roles
@@ -458,11 +549,14 @@ components:
      type: string
      enum:
        - empty
-        - init
-        - failed
-        - running
        - configuration_pending
+        - init
+        - running
        - configuration
+        - failed
+        - termination_pending_fast
+        - termination_pending_immediate
+        - terminated
      example: running

    ExtensionInstallRequest:
@@ -497,25 +591,69 @@ components:
          type: string
          example: "1.0.0"

-    InstalledExtensions:
+    SafekeepersLsn:
      type: object
+      required:
+        - safekeepers
+        - wal_flush_lsn
      properties:
-        extensions:
-          description: Contains list of installed extensions.
-          type: array
-          items:
-            type: object
-            properties:
-              extname:
-                type: string
-              version:
-                type: string
-                items:
-                  type: string
-              n_databases:
-                type: integer
-              owned_by_superuser:
-                type: integer
+        safekeepers:
+          description: Primary replica safekeepers
+          type: string
+        wal_flush_lsn:
+          description: Primary last WAL flush LSN
+          type: string
+
+    LfcPrewarmState:
+      type: object
+      required:
+        - status
+        - total
+        - prewarmed
+        - skipped
+      properties:
+        status:
+          description: LFC prewarm status
+          enum: [not_prewarmed, prewarming, completed, failed, skipped]
+          type: string
+        error:
+          description: LFC prewarm error, if any
+          type: string
+        total:
+          description: Total pages processed
+          type: integer
+        prewarmed:
+          description: Total pages prewarmed
+          type: integer
+        skipped:
+          description: Pages processed but not prewarmed
+          type: integer
+
+    LfcOffloadState:
+      type: object
+      required:
+        - status
+      properties:
+        status:
+          description: LFC offload status
+          enum: [not_offloaded, offloading, completed, failed]
+          type: string
+        error:
+          description: LFC offload error, if any
+          type: string
+
+    PromoteState:
+      type: object
+      required:
+        - status
+      properties:
+        status:
+          description: Promote result
+          enum: [not_promoted, completed, failed]
+          type: string
+        error:
+          description: Promote error, if any
+          type: string

    SetRoleGrantsRequest:
      type: object
@@ -544,6 +682,17 @@ components:
          description: Role name.
          example: "neon"

+    TerminateResponse:
+      type: object
+      required:
+        - lsn
+      properties:
+        lsn:
+          type: string
+          nullable: true
+          description: "last WAL flush LSN"
+          example: "0/028F10D8"
+
    SetRoleGrantsResponse:
      type: object
      required:
--- a/compute_tools/src/http/routes/mod.rs
+++ b/compute_tools/src/http/routes/mod.rs
@@ -14,6 +14,7 @@ pub(in crate::http) mod insights;
 pub(in crate::http) mod lfc;
 pub(in crate::http) mod metrics;
 pub(in crate::http) mod metrics_json;
+pub(in crate::http) mod promote;
 pub(in crate::http) mod status;
 pub(in crate::http) mod terminate;

--- a/compute_tools/src/http/routes/promote.rs
+++ b/compute_tools/src/http/routes/promote.rs
@@ -0,0 +1,14 @@
+use crate::http::JsonResponse;
+use axum::Form;
+use http::StatusCode;
+
+pub(in crate::http) async fn promote(
+    compute: axum::extract::State<std::sync::Arc<crate::compute::ComputeNode>>,
+    Form(safekeepers_lsn): Form<compute_api::responses::SafekeepersLsn>,
+) -> axum::response::Response {
+    let state = compute.promote(safekeepers_lsn).await;
+    if let compute_api::responses::PromoteState::Failed { error } = state {
+        return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, error);
+    }
+    JsonResponse::success(StatusCode::OK, state)
+}
--- a/compute_tools/src/http/routes/terminate.rs
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -3,7 +3,7 @@ use crate::http::JsonResponse;
 use axum::extract::State;
 use axum::response::Response;
 use axum_extra::extract::OptionalQuery;
-use compute_api::responses::{ComputeStatus, TerminateResponse};
+use compute_api::responses::{ComputeStatus, TerminateMode, TerminateResponse};
 use http::StatusCode;
 use serde::Deserialize;
 use std::sync::Arc;
@@ -12,7 +12,7 @@ use tracing::info;

 #[derive(Deserialize, Default)]
 pub struct TerminateQuery {
-    mode: compute_api::responses::TerminateMode,
+    mode: TerminateMode,
 }

 /// Terminate the compute.
@@ -24,16 +24,16 @@ pub(in crate::http) async fn terminate(
    {
        let mut state = compute.state.lock().unwrap();
        if state.status == ComputeStatus::Terminated {
-            return JsonResponse::success(StatusCode::CREATED, state.terminate_flush_lsn);
+            let response = TerminateResponse {
+                lsn: state.terminate_flush_lsn,
+            };
+            return JsonResponse::success(StatusCode::CREATED, response);
        }

        if !matches!(state.status, ComputeStatus::Empty | ComputeStatus::Running) {
            return JsonResponse::invalid_status(state.status);
        }
-        state.set_status(
-            ComputeStatus::TerminationPending { mode },
-            &compute.state_changed,
-        );
+        state.set_status(mode.into(), &compute.state_changed);
    }

    forward_termination_signal(false);
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -23,7 +23,7 @@ use super::{
    middleware::authorize::Authorize,
    routes::{
        check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions,
-        grants, insights, lfc, metrics, metrics_json, status, terminate,
+        grants, insights, lfc, metrics, metrics_json, promote, status, terminate,
    },
 };
 use crate::compute::ComputeNode;
@@ -87,6 +87,7 @@ impl From<&Server> for Router<Arc<ComputeNode>> {
                let authenticated_router = Router::<Arc<ComputeNode>>::new()
                    .route("/lfc/prewarm", get(lfc::prewarm_state).post(lfc::prewarm))
                    .route("/lfc/offload", get(lfc::offload_state).post(lfc::offload))
+                    .route("/promote", post(promote::promote))
                    .route("/check_writability", post(check_writability::is_writable))
                    .route("/configure", post(configure::configure))
                    .route("/database_schema", get(database_schema::get_schema_dump))
--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -2,6 +2,7 @@ use std::collections::HashMap;

 use anyhow::Result;
 use compute_api::responses::{InstalledExtension, InstalledExtensions};
+use tokio_postgres::error::Error as PostgresError;
 use tokio_postgres::{Client, Config, NoTls};

 use crate::metrics::INSTALLED_EXTENSIONS;
@@ -10,7 +11,7 @@ use crate::metrics::INSTALLED_EXTENSIONS;
 /// and to make database listing query here more explicit.
 ///
 /// Limit the number of databases to 500 to avoid excessive load.
-async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
+async fn list_dbs(client: &mut Client) -> Result<Vec<String>, PostgresError> {
    // `pg_database.datconnlimit = -2` means that the database is in the
    // invalid state
    let databases = client
@@ -37,7 +38,9 @@ async fn list_dbs(client: &mut Client) -> Result<Vec<String>> {
 /// Same extension can be installed in multiple databases with different versions,
 /// so we report a separate metric (number of databases where it is installed)
 /// for each extension version.
-pub async fn get_installed_extensions(mut conf: Config) -> Result<InstalledExtensions> {
+pub async fn get_installed_extensions(
+    mut conf: Config,
+) -> Result<InstalledExtensions, PostgresError> {
    conf.application_name("compute_ctl:get_installed_extensions");
    let databases: Vec<String> = {
        let (mut client, connection) = conf.connect(NoTls).await?;
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -12,6 +12,7 @@ pub mod logger;
 pub mod catalog;
 pub mod compute;
 pub mod compute_prewarm;
+pub mod compute_promote;
 pub mod disk_quota;
 pub mod extension_server;
 pub mod installed_extensions;
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -192,7 +192,7 @@ fn acquire_lsn_lease_grpc(
    lsn: Lsn,
 ) -> Result<Option<SystemTime>> {
    tokio::runtime::Handle::current().block_on(async move {
-        let mut client = page_api::Client::new(
+        let mut client = page_api::Client::connect(
            connstring.to_string(),
            tenant_shard_id.tenant_id,
            timeline_id,
--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -97,20 +97,34 @@ pub(crate) static PG_TOTAL_DOWNTIME_MS: Lazy<GenericCounter<AtomicU64>> = Lazy::
    .expect("failed to define a metric")
 });

-/// Needed as neon.file_cache_prewarm_batch == 0 doesn't mean we never tried to prewarm.
-/// On the other hand, LFC_PREWARMED_PAGES is excessive as we can GET /lfc/prewarm
-pub(crate) static LFC_PREWARM_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
+pub(crate) static LFC_PREWARMS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_prewarm_requests_total",
-        "Total number of LFC prewarm requests made by compute_ctl",
+        "compute_ctl_lfc_prewarms_total",
+        "Total number of LFC prewarms requested by compute_ctl or autoprewarm option",
    )
    .expect("failed to define a metric")
 });

-pub(crate) static LFC_OFFLOAD_REQUESTS: Lazy<IntCounter> = Lazy::new(|| {
+pub(crate) static LFC_PREWARM_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
-        "compute_ctl_lfc_offload_requests_total",
-        "Total number of LFC offload requests made by compute_ctl",
+        "compute_ctl_lfc_prewarm_errors_total",
+        "Total number of LFC prewarm errors",
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static LFC_OFFLOADS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "compute_ctl_lfc_offloads_total",
+        "Total number of LFC offloads requested by compute_ctl or lfc_offload_period_seconds option",
+    )
+    .expect("failed to define a metric")
+});
+
+pub(crate) static LFC_OFFLOAD_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "compute_ctl_lfc_offload_errors_total",
+        "Total number of LFC offload errors",
    )
    .expect("failed to define a metric")
 });
@@ -124,7 +138,9 @@ pub fn collect() -> Vec<MetricFamily> {
    metrics.extend(AUDIT_LOG_DIR_SIZE.collect());
    metrics.extend(PG_CURR_DOWNTIME_MS.collect());
    metrics.extend(PG_TOTAL_DOWNTIME_MS.collect());
-    metrics.extend(LFC_PREWARM_REQUESTS.collect());
-    metrics.extend(LFC_OFFLOAD_REQUESTS.collect());
+    metrics.extend(LFC_PREWARMS.collect());
+    metrics.extend(LFC_PREWARM_ERRORS.collect());
+    metrics.extend(LFC_OFFLOADS.collect());
+    metrics.extend(LFC_OFFLOAD_ERRORS.collect());
    metrics
 }
--- a/compute_tools/src/migrations/0001-add_bypass_rls_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0001-add_bypass_rls_to_privileged_role.sql
@@ -0,0 +1 @@
+ALTER ROLE {privileged_role_name} BYPASSRLS;
--- a/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
@@ -1 +0,0 @@
-ALTER ROLE neon_superuser BYPASSRLS;
--- a/compute_tools/src/migrations/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/0002-alter_roles.sql
@@ -1,8 +1,21 @@
+-- On December 8th, 2023, an engineering escalation (INC-110) was opened after
+-- it was found that BYPASSRLS was being applied to all roles.
+--
+-- PR that introduced the issue: https://github.com/neondatabase/neon/pull/5657
+-- Subsequent commit on main: https://github.com/neondatabase/neon/commit/ad99fa5f0393e2679e5323df653c508ffa0ac072
+--
+-- NOBYPASSRLS and INHERIT are the defaults for a Postgres role, but because it
+-- isn't easy to know if a Postgres cluster is affected by the issue, we need to
+-- keep the migration around for a long time, if not indefinitely, so any
+-- cluster can be fixed.
+--
+-- Branching is the gift that keeps on giving...
+
 DO $$
 DECLARE
    role_name text;
 BEGIN
-    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, '{privileged_role_name}', 'member')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
@@ -10,7 +23,7 @@ BEGIN

    FOR role_name IN SELECT rolname FROM pg_roles
        WHERE
-            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
+            NOT pg_has_role(rolname, '{privileged_role_name}', 'member') AND NOT starts_with(rolname, 'pg_')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
--- a/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_privileged_role.sql
@@ -1,6 +1,6 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
+        EXECUTE 'GRANT pg_create_subscription TO {privileged_role_name}';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -1 +0,0 @@
-GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_privileged_role.sql
@@ -0,0 +1 @@
+GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0005-grant_all_on_tables_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0005-grant_all_on_tables_to_privileged_role.sql
@@ -1,4 +1,4 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by neon_superuser without permission issues.
+--       interacted with by {privileged_role_name} without permission issues.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name};
--- a/compute_tools/src/migrations/0006-grant_all_on_sequences_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0006-grant_all_on_sequences_to_privileged_role.sql
@@ -1,4 +1,4 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by neon_superuser without permission issues.
+--       interacted with by {privileged_role_name} without permission issues.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name};
--- a/compute_tools/src/migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
@@ -1,3 +1,3 @@
 -- SKIP: Moved inline to the handle_grants() functions.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO {privileged_role_name} WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
@@ -1,3 +1,3 @@
 -- SKIP: Moved inline to the handle_grants() functions.

-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
+ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO {privileged_role_name} WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
@@ -1,7 +1,7 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
+       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO {privileged_role_name}';
+       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO {privileged_role_name}';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql
@@ -1 +0,0 @@
-GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;
--- a/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
@@ -0,0 +1 @@
+GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO {privileged_role_name};
--- a/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_privileged_role.sql
+++ b/compute_tools/src/migrations/0012-grant_pg_signal_backend_to_privileged_role.sql
@@ -0,0 +1 @@
+GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0001-add_bypass_rls_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0003-grant_pg_create_subscription_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0004-grant_pg_monitor_to_privileged_role.sql
@@ -7,13 +7,17 @@ BEGIN
        INTO monitor
        FROM pg_auth_members
        WHERE roleid = 'pg_monitor'::regrole
-            AND member = 'pg_monitor'::regrole;
+            AND member = 'neon_superuser'::regrole;

-    IF NOT monitor.member THEN
+    IF monitor IS NULL THEN
+        RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_monitor';
+    END IF;
+
+    IF monitor.admin IS NULL OR NOT monitor.member THEN
        RAISE EXCEPTION 'neon_superuser is not a member of pg_monitor';
    END IF;

-    IF NOT monitor.admin THEN
+    IF monitor.admin IS NULL OR NOT monitor.admin THEN
        RAISE EXCEPTION 'neon_superuser cannot grant pg_monitor';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0005-grant_all_on_tables_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0006-grant_all_on_sequences_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql
--- a/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
+++ b/compute_tools/src/migrations/tests/0012-grant_pg_signal_backend_to_privileged_role.sql
@@ -0,0 +1,23 @@
+DO $$
+DECLARE
+    signal_backend record;
+BEGIN
+    SELECT pg_has_role('neon_superuser', 'pg_signal_backend', 'member') AS member,
+            admin_option AS admin
+        INTO signal_backend
+        FROM pg_auth_members
+        WHERE roleid = 'pg_signal_backend'::regrole
+            AND member = 'neon_superuser'::regrole;
+
+    IF signal_backend IS NULL THEN
+        RAISE EXCEPTION 'no entry in pg_auth_members for neon_superuser and pg_signal_backend';
+    END IF;
+
+    IF signal_backend.member IS NULL OR NOT signal_backend.member THEN
+        RAISE EXCEPTION 'neon_superuser is not a member of pg_signal_backend';
+    END IF;
+
+    IF signal_backend.admin IS NULL OR NOT signal_backend.admin THEN
+        RAISE EXCEPTION 'neon_superuser cannot grant pg_signal_backend';
+    END IF;
+END $$;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -84,7 +84,8 @@ impl ComputeMonitor {
        if matches!(
            compute_status,
            ComputeStatus::Terminated
-                | ComputeStatus::TerminationPending { .. }
+                | ComputeStatus::TerminationPendingFast
+                | ComputeStatus::TerminationPendingImmediate
                | ComputeStatus::Failed
        ) {
            info!(
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -9,6 +9,7 @@ use reqwest::StatusCode;
 use tokio_postgres::Client;
 use tracing::{error, info, instrument};

+use crate::compute::ComputeNodeParams;
 use crate::config;
 use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
 use crate::migration::MigrationRunner;
@@ -169,7 +170,7 @@ pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
 }

 #[instrument(skip_all)]
-pub async fn handle_migrations(client: &mut Client) -> Result<()> {
+pub async fn handle_migrations(params: ComputeNodeParams, client: &mut Client) -> Result<()> {
    info!("handle migrations");

    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
@@ -178,24 +179,58 @@ pub async fn handle_migrations(client: &mut Client) -> Result<()> {

    // Add new migrations in numerical order.
    let migrations = [
-        include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
-        include_str!("./migrations/0002-alter_roles.sql"),
-        include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
-        include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
-        include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
-        include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
-        include_str!(
-            "./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
+        &format!(
+            include_str!("./migrations/0001-add_bypass_rls_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
        ),
-        include_str!(
-            "./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
+        &format!(
+            include_str!("./migrations/0002-alter_roles.sql"),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!("./migrations/0003-grant_pg_create_subscription_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!("./migrations/0004-grant_pg_monitor_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!("./migrations/0005-grant_all_on_tables_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!("./migrations/0006-grant_all_on_sequences_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!(
+                "./migrations/0007-grant_all_on_tables_with_grant_option_to_privileged_role.sql"
+            ),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!(
+                "./migrations/0008-grant_all_on_sequences_with_grant_option_to_privileged_role.sql"
+            ),
+            privileged_role_name = params.privileged_role_name
        ),
        include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
-        include_str!(
-            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
+        &format!(
+            include_str!(
+                "./migrations/0010-grant_snapshot_synchronization_funcs_to_privileged_role.sql"
+            ),
+            privileged_role_name = params.privileged_role_name
        ),
-        include_str!(
-            "./migrations/0011-grant_pg_show_replication_origin_status_to_neon_superuser.sql"
+        &format!(
+            include_str!(
+                "./migrations/0011-grant_pg_show_replication_origin_status_to_privileged_role.sql"
+            ),
+            privileged_role_name = params.privileged_role_name
+        ),
+        &format!(
+            include_str!("./migrations/0012-grant_pg_signal_backend_to_privileged_role.sql"),
+            privileged_role_name = params.privileged_role_name
        ),
    ];

--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -13,14 +13,14 @@ use tokio_postgres::Client;
 use tokio_postgres::error::SqlState;
 use tracing::{Instrument, debug, error, info, info_span, instrument, warn};

-use crate::compute::{ComputeNode, ComputeState};
+use crate::compute::{ComputeNode, ComputeNodeParams, ComputeState};
 use crate::pg_helpers::{
    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async,
    get_existing_roles_async,
 };
 use crate::spec_apply::ApplySpecPhase::{
-    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser,
-    CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon,
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension,
+    CreatePgauditlogtofileExtension, CreatePrivilegedRole, CreateSchemaNeon,
    DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
    RunInEachDatabase,
@@ -49,6 +49,7 @@ impl ComputeNode {
            // Proceed with post-startup configuration. Note, that order of operations is important.
            let client = Self::get_maintenance_client(&conf).await?;
            let spec = spec.clone();
+            let params = Arc::new(self.params.clone());

            let databases = get_existing_dbs_async(&client).await?;
            let roles = get_existing_roles_async(&client)
@@ -157,6 +158,7 @@ impl ComputeNode {

                    let conf = Arc::new(conf);
                    let fut = Self::apply_spec_sql_db(
+                        params.clone(),
                        spec.clone(),
                        conf,
                        ctx.clone(),
@@ -185,7 +187,7 @@ impl ComputeNode {
            }

            for phase in [
-                CreateNeonSuperuser,
+                CreatePrivilegedRole,
                DropInvalidDatabases,
                RenameRoles,
                CreateAndAlterRoles,
@@ -195,6 +197,7 @@ impl ComputeNode {
            ] {
                info!("Applying phase {:?}", &phase);
                apply_operations(
+                    params.clone(),
                    spec.clone(),
                    ctx.clone(),
                    jwks_roles.clone(),
@@ -243,6 +246,7 @@ impl ComputeNode {
                    }

                    let fut = Self::apply_spec_sql_db(
+                        params.clone(),
                        spec.clone(),
                        conf,
                        ctx.clone(),
@@ -293,6 +297,7 @@ impl ComputeNode {
            for phase in phases {
                debug!("Applying phase {:?}", &phase);
                apply_operations(
+                    params.clone(),
                    spec.clone(),
                    ctx.clone(),
                    jwks_roles.clone(),
@@ -313,7 +318,9 @@ impl ComputeNode {
    /// May opt to not connect to databases that don't have any scheduled
    /// operations.  The function is concurrency-controlled with the provided
    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
+    #[allow(clippy::too_many_arguments)] // TODO: needs bigger refactoring
    async fn apply_spec_sql_db(
+        params: Arc<ComputeNodeParams>,
        spec: Arc<ComputeSpec>,
        conf: Arc<tokio_postgres::Config>,
        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
@@ -328,6 +335,7 @@ impl ComputeNode {

        for subphase in subphases {
            apply_operations(
+                params.clone(),
                spec.clone(),
                ctx.clone(),
                jwks_roles.clone(),
@@ -467,7 +475,7 @@ pub enum PerDatabasePhase {

 #[derive(Clone, Debug)]
 pub enum ApplySpecPhase {
-    CreateNeonSuperuser,
+    CreatePrivilegedRole,
    DropInvalidDatabases,
    RenameRoles,
    CreateAndAlterRoles,
@@ -510,6 +518,7 @@ pub struct MutableApplyContext {
 /// - No timeouts have (yet) been implemented.
 /// - The caller is responsible for limiting and/or applying concurrency.
 pub async fn apply_operations<'a, Fut, F>(
+    params: Arc<ComputeNodeParams>,
    spec: Arc<ComputeSpec>,
    ctx: Arc<RwLock<MutableApplyContext>>,
    jwks_roles: Arc<HashSet<String>>,
@@ -527,7 +536,7 @@ where
        debug!("Processing phase {:?}", &apply_spec_phase);
        let ctx = ctx;

-        let mut ops = get_operations(&spec, &ctx, &jwks_roles, &apply_spec_phase)
+        let mut ops = get_operations(&params, &spec, &ctx, &jwks_roles, &apply_spec_phase)
            .await?
            .peekable();

@@ -588,14 +597,18 @@ where
 /// sort/merge/batch execution, but for now this is a nice way to improve
 /// batching behavior of the commands.
 async fn get_operations<'a>(
+    params: &'a ComputeNodeParams,
    spec: &'a ComputeSpec,
    ctx: &'a RwLock<MutableApplyContext>,
    jwks_roles: &'a HashSet<String>,
    apply_spec_phase: &'a ApplySpecPhase,
 ) -> Result<Box<dyn Iterator<Item = Operation> + 'a + Send>> {
    match apply_spec_phase {
-        ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation {
-            query: include_str!("sql/create_neon_superuser.sql").to_string(),
+        ApplySpecPhase::CreatePrivilegedRole => Ok(Box::new(once(Operation {
+            query: format!(
+                include_str!("sql/create_privileged_role.sql"),
+                privileged_role_name = params.privileged_role_name
+            ),
            comment: None,
        }))),
        ApplySpecPhase::DropInvalidDatabases => {
@@ -697,8 +710,9 @@ async fn get_operations<'a>(
                        None => {
                            let query = if !jwks_roles.contains(role.name.as_str()) {
                                format!(
-                                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser {}",
+                                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE {} {}",
                                    role.name.pg_quote(),
+                                    params.privileged_role_name,
                                    role.to_pg_options(),
                                )
                            } else {
@@ -849,8 +863,9 @@ async fn get_operations<'a>(
                                // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database
                                // (see https://www.postgresql.org/docs/current/ddl-priv.html)
                                query: format!(
-                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
-                                    db.name.pg_quote()
+                                    "GRANT ALL PRIVILEGES ON DATABASE {} TO {}",
+                                    db.name.pg_quote(),
+                                    params.privileged_role_name
                                ),
                                comment: None,
                            },
--- a/compute_tools/src/sql/create_neon_superuser.sql
+++ b/compute_tools/src/sql/create_neon_superuser.sql
@@ -1,8 +0,0 @@
-DO $$
-    BEGIN
-        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
-        THEN
-            CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
-        END IF;
-    END
-$$;
--- a/compute_tools/src/sql/create_privileged_role.sql
+++ b/compute_tools/src/sql/create_privileged_role.sql
@@ -0,0 +1,8 @@
+DO $$
+    BEGIN
+        IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = '{privileged_role_name}')
+        THEN
+            CREATE ROLE {privileged_role_name} CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data;
+        END IF;
+    END
+$$;
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -31,6 +31,7 @@ mod pg_helpers_tests {
 wal_level = logical
 hot_standby = on
 autoprewarm = off
+offload_lfc_interval_seconds = 20
 neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
 wal_log_hints = on
 log_connections = on
--- a/control_plane/README.md
+++ b/control_plane/README.md
@@ -8,10 +8,10 @@ code changes locally, but not suitable for running production systems.

 ## Example: Start with Postgres 16

-To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 3 of the start-up commands.
+To create and start a local development environment with Postgres 16, you will need to provide `--pg-version` flag to 2 of the start-up commands.

 ```shell
-cargo neon init --pg-version 16
+cargo neon init
 cargo neon start
 cargo neon tenant create --set-default --pg-version 16
 cargo neon endpoint create main --pg-version 16
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -631,6 +631,10 @@ struct EndpointCreateCmdArgs {
        help = "Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests."
    )]
    allow_multiple: bool,
+
+    /// Only allow changing it on creation
+    #[clap(long, help = "Name of the privileged role for the endpoint")]
+    privileged_role_name: Option<String>,
 }

 #[derive(clap::Args)]
@@ -675,6 +679,16 @@ struct EndpointStartCmdArgs {
    #[arg(default_value = "90s")]
    start_timeout: Duration,

+    #[clap(
+        long,
+        help = "Download LFC cache from endpoint storage on endpoint startup",
+        default_value = "false"
+    )]
+    autoprewarm: bool,
+
+    #[clap(long, help = "Upload LFC cache to endpoint storage periodically")]
+    offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
+
    #[clap(
        long,
        help = "Run in development mode, skipping VM-specific operations like process termination",
@@ -1470,6 +1484,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                args.grpc,
                !args.update_catalog,
                false,
+                args.privileged_role_name.clone(),
            )?;
        }
        EndpointCmd::Start(args) => {
@@ -1585,22 +1600,24 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            let endpoint_storage_token = env.generate_auth_token(&claims)?;
            let endpoint_storage_addr = env.endpoint_storage.listen_addr.to_string();

+            let args = control_plane::endpoint::EndpointStartArgs {
+                auth_token,
+                endpoint_storage_token,
+                endpoint_storage_addr,
+                safekeepers_generation,
+                safekeepers,
+                pageservers,
+                remote_ext_base_url: remote_ext_base_url.clone(),
+                shard_stripe_size: stripe_size.0 as usize,
+                create_test_user: args.create_test_user,
+                start_timeout: args.start_timeout,
+                autoprewarm: args.autoprewarm,
+                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
+                dev: args.dev,
+            };
+
            println!("Starting existing endpoint {endpoint_id}...");
-            endpoint
-                .start(
-                    &auth_token,
-                    endpoint_storage_token,
-                    endpoint_storage_addr,
-                    safekeepers_generation,
-                    safekeepers,
-                    pageservers,
-                    remote_ext_base_url.as_ref(),
-                    stripe_size.0 as usize,
-                    args.create_test_user,
-                    args.start_timeout,
-                    args.dev,
-                )
-                .await?;
+            endpoint.start(args).await?;
        }
        EndpointCmd::Reconfigure(args) => {
            let endpoint_id = &args.endpoint_id;
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -36,7 +36,7 @@ impl StorageBroker {
    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
        let broker = &self.env.broker;

-        print!("Starting neon broker at {}", broker.client_url());
+        println!("Starting neon broker at {}", broker.client_url());

        let mut args = Vec::new();

--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -32,7 +32,8 @@
 //!     config.json                 - passed to `compute_ctl`
 //!     pgdata/
 //!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
-//!         zenith.signal
+//!         neon.signal
+//!         zenith.signal         - copy of neon.signal, for backward compatibility
 //!         <other PostgreSQL files>
 //! ```
 //!
@@ -98,6 +99,7 @@ pub struct EndpointConf {
    features: Vec<ComputeFeature>,
    cluster: Option<Cluster>,
    compute_ctl_config: ComputeCtlConfig,
+    privileged_role_name: Option<String>,
 }

 //
@@ -198,6 +200,7 @@ impl ComputeControlPlane {
        grpc: bool,
        skip_pg_catalog_updates: bool,
        drop_subscriptions_before_start: bool,
+        privileged_role_name: Option<String>,
    ) -> Result<Arc<Endpoint>> {
        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
        let external_http_port = external_http_port.unwrap_or_else(|| self.get_port() + 1);
@@ -235,6 +238,7 @@ impl ComputeControlPlane {
            features: vec![],
            cluster: None,
            compute_ctl_config: compute_ctl_config.clone(),
+            privileged_role_name: privileged_role_name.clone(),
        });

        ep.create_endpoint_dir()?;
@@ -256,6 +260,7 @@ impl ComputeControlPlane {
                features: vec![],
                cluster: None,
                compute_ctl_config,
+                privileged_role_name,
            })?,
        )?;
        std::fs::write(
@@ -331,6 +336,9 @@ pub struct Endpoint {

    /// The compute_ctl config for the endpoint's compute.
    compute_ctl_config: ComputeCtlConfig,
+
+    /// The name of the privileged role for the endpoint.
+    privileged_role_name: Option<String>,
 }

 #[derive(PartialEq, Eq)]
@@ -373,6 +381,22 @@ impl std::fmt::Display for EndpointTerminateMode {
    }
 }

+pub struct EndpointStartArgs {
+    pub auth_token: Option<String>,
+    pub endpoint_storage_token: String,
+    pub endpoint_storage_addr: String,
+    pub safekeepers_generation: Option<SafekeeperGeneration>,
+    pub safekeepers: Vec<NodeId>,
+    pub pageservers: Vec<(PageserverProtocol, Host, u16)>,
+    pub remote_ext_base_url: Option<String>,
+    pub shard_stripe_size: usize,
+    pub create_test_user: bool,
+    pub start_timeout: Duration,
+    pub autoprewarm: bool,
+    pub offload_lfc_interval_seconds: Option<std::num::NonZeroU64>,
+    pub dev: bool,
+}
+
 impl Endpoint {
    fn from_dir_entry(entry: std::fs::DirEntry, env: &LocalEnv) -> Result<Endpoint> {
        if !entry.file_type()?.is_dir() {
@@ -415,6 +439,7 @@ impl Endpoint {
            features: conf.features,
            cluster: conf.cluster,
            compute_ctl_config: conf.compute_ctl_config,
+            privileged_role_name: conf.privileged_role_name,
        })
    }

@@ -447,7 +472,7 @@ impl Endpoint {
        conf.append("max_connections", "100");
        conf.append("wal_level", "logical");
        // wal_sender_timeout is the maximum time to wait for WAL replication.
-        // It also defines how often the walreciever will send a feedback message to the wal sender.
+        // It also defines how often the walreceiver will send a feedback message to the wal sender.
        conf.append("wal_sender_timeout", "5s");
        conf.append("listen_addresses", &self.pg_address.ip().to_string());
        conf.append("port", &self.pg_address.port().to_string());
@@ -677,21 +702,7 @@ impl Endpoint {
        })
    }

-    #[allow(clippy::too_many_arguments)]
-    pub async fn start(
-        &self,
-        auth_token: &Option<String>,
-        endpoint_storage_token: String,
-        endpoint_storage_addr: String,
-        safekeepers_generation: Option<SafekeeperGeneration>,
-        safekeepers: Vec<NodeId>,
-        pageservers: Vec<(PageserverProtocol, Host, u16)>,
-        remote_ext_base_url: Option<&String>,
-        shard_stripe_size: usize,
-        create_test_user: bool,
-        start_timeout: Duration,
-        dev: bool,
-    ) -> Result<()> {
+    pub async fn start(&self, args: EndpointStartArgs) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
        }
@@ -704,10 +715,10 @@ impl Endpoint {
            std::fs::remove_dir_all(self.pgdata())?;
        }

-        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
+        let pageserver_connstring = Self::build_pageserver_connstr(&args.pageservers);
        assert!(!pageserver_connstring.is_empty());

-        let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
+        let safekeeper_connstrings = self.build_safekeepers_connstrs(args.safekeepers)?;

        // check for file remote_extensions_spec.json
        // if it is present, read it and pass to compute_ctl
@@ -735,7 +746,7 @@ impl Endpoint {
                    cluster_id: None, // project ID: not used
                    name: None,       // project name: not used
                    state: None,
-                    roles: if create_test_user {
+                    roles: if args.create_test_user {
                        vec![Role {
                            name: PgIdent::from_str("test").unwrap(),
                            encrypted_password: None,
@@ -744,7 +755,7 @@ impl Endpoint {
                    } else {
                        Vec::new()
                    },
-                    databases: if create_test_user {
+                    databases: if args.create_test_user {
                        vec![Database {
                            name: PgIdent::from_str("neondb").unwrap(),
                            owner: PgIdent::from_str("test").unwrap(),
@@ -766,20 +777,21 @@ impl Endpoint {
                endpoint_id: Some(self.endpoint_id.clone()),
                mode: self.mode,
                pageserver_connstring: Some(pageserver_connstring),
-                safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
+                safekeepers_generation: args.safekeepers_generation.map(|g| g.into_inner()),
                safekeeper_connstrings,
-                storage_auth_token: auth_token.clone(),
+                storage_auth_token: args.auth_token.clone(),
                remote_extensions,
                pgbouncer_settings: None,
-                shard_stripe_size: Some(shard_stripe_size),
+                shard_stripe_size: Some(args.shard_stripe_size),
                local_proxy_config: None,
                reconfigure_concurrency: self.reconfigure_concurrency,
                drop_subscriptions_before_start: self.drop_subscriptions_before_start,
                audit_log_level: ComputeAudit::Disabled,
                logs_export_host: None::<String>,
-                endpoint_storage_addr: Some(endpoint_storage_addr),
-                endpoint_storage_token: Some(endpoint_storage_token),
-                autoprewarm: false,
+                endpoint_storage_addr: Some(args.endpoint_storage_addr),
+                endpoint_storage_token: Some(args.endpoint_storage_token),
+                autoprewarm: args.autoprewarm,
+                offload_lfc_interval_seconds: args.offload_lfc_interval_seconds,
                suspend_timeout_seconds: -1, // Only used in neon_local.
            };

@@ -791,7 +803,7 @@ impl Endpoint {
                debug!("spec.cluster {:?}", spec.cluster);

                // fill missing fields again
-                if create_test_user {
+                if args.create_test_user {
                    spec.cluster.roles.push(Role {
                        name: PgIdent::from_str("test").unwrap(),
                        encrypted_password: None,
@@ -826,7 +838,7 @@ impl Endpoint {
        // Launch compute_ctl
        let conn_str = self.connstr("cloud_admin", "postgres");
        println!("Starting postgres node at '{conn_str}'");
-        if create_test_user {
+        if args.create_test_user {
            let conn_str = self.connstr("test", "neondb");
            println!("Also at '{conn_str}'");
        }
@@ -858,14 +870,18 @@ impl Endpoint {
        .stderr(logfile.try_clone()?)
        .stdout(logfile);

-        if let Some(remote_ext_base_url) = remote_ext_base_url {
-            cmd.args(["--remote-ext-base-url", remote_ext_base_url]);
+        if let Some(remote_ext_base_url) = args.remote_ext_base_url {
+            cmd.args(["--remote-ext-base-url", &remote_ext_base_url]);
        }

-        if dev {
+        if args.dev {
            cmd.arg("--dev");
        }

+        if let Some(privileged_role_name) = self.privileged_role_name.clone() {
+            cmd.args(["--privileged-role-name", &privileged_role_name]);
+        }
+
        let child = cmd.spawn()?;
        // set up a scopeguard to kill & wait for the child in case we panic or bail below
        let child = scopeguard::guard(child, |mut child| {
@@ -894,10 +910,11 @@ impl Endpoint {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
-                            if Instant::now().duration_since(start_at) > start_timeout {
+                            let timeout = args.start_timeout;
+                            if Instant::now().duration_since(start_at) > timeout {
                                bail!(
                                    "compute startup timed out {:?}; still in Init state",
-                                    start_timeout
+                                    timeout
                                );
                            }
                            // keep retrying
@@ -918,16 +935,18 @@ impl Endpoint {
                        ComputeStatus::Empty
                        | ComputeStatus::ConfigurationPending
                        | ComputeStatus::Configuration
-                        | ComputeStatus::TerminationPending { .. }
+                        | ComputeStatus::TerminationPendingFast
+                        | ComputeStatus::TerminationPendingImmediate
                        | ComputeStatus::Terminated => {
                            bail!("unexpected compute status: {:?}", state.status)
                        }
                    }
                }
                Err(e) => {
-                    if Instant::now().duration_since(start_at) > start_timeout {
+                    if Instant::now().duration_since(start_at) > args.start_timeout {
                        return Err(e).context(format!(
-                            "timed out {start_timeout:?} waiting to connect to compute_ctl HTTP",
+                            "timed out {:?} waiting to connect to compute_ctl HTTP",
+                            args.start_timeout
                        ));
                    }
                }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -217,6 +217,9 @@ pub struct NeonStorageControllerConf {
    pub posthog_config: Option<PostHogConfig>,

    pub kick_secondary_downloads: Option<bool>,
+
+    #[serde(with = "humantime_serde")]
+    pub shard_split_request_timeout: Option<Duration>,
 }

 impl NeonStorageControllerConf {
@@ -250,6 +253,7 @@ impl Default for NeonStorageControllerConf {
            timeline_safekeeper_count: None,
            posthog_config: None,
            kick_secondary_downloads: None,
+            shard_split_request_timeout: None,
        }
    }
 }
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -303,7 +303,7 @@ impl PageServerNode {
    async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
        // TODO: using a thread here because start_process() is not async but we need to call check_status()
        let datadir = self.repo_path();
-        print!(
+        println!(
            "Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
            self.conf.id,
            self.pg_connection_config.raw_address(),
@@ -452,6 +452,12 @@ impl PageServerNode {
                .map(|x| x.parse::<usize>())
                .transpose()
                .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
+            // HADRON
+            image_layer_force_creation_period: settings
+                .remove("image_layer_force_creation_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'image_layer_force_creation_period' as duration")?,
            image_layer_creation_check_threshold: settings
                .remove("image_layer_creation_check_threshold")
                .map(|x| x.parse::<u8>())
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -127,7 +127,7 @@ impl SafekeeperNode {
        extra_opts: &[String],
        retry_timeout: &Duration,
    ) -> anyhow::Result<()> {
-        print!(
+        println!(
            "Starting safekeeper at '{}' in '{}', retrying for {:?}",
            self.pg_connection_config.raw_address(),
            self.datadir_path().display(),
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -648,6 +648,13 @@ impl StorageController {
            args.push(format!("--timeline-safekeeper-count={sk_cnt}"));
        }

+        if let Some(duration) = self.config.shard_split_request_timeout {
+            args.push(format!(
+                "--shard-split-request-timeout={}",
+                humantime::Duration::from(duration)
+            ));
+        }
+
        let mut envs = vec![
            ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
            ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
@@ -660,7 +667,7 @@ impl StorageController {
            ));
        }

-        println!("Starting storage controller");
+        println!("Starting storage controller at {scheme}://{host}:{listen_port}");

        background_process::start_process(
            COMMAND,
--- a/control_plane/storcon_cli/Cargo.toml
+++ b/control_plane/storcon_cli/Cargo.toml
@@ -14,6 +14,7 @@ humantime.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 reqwest.workspace = true
+safekeeper_api.workspace=true
 serde_json = { workspace = true, features = ["raw_value"] }
 storage_controller_client.workspace = true
 tokio.workspace = true
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -11,7 +11,7 @@ use pageserver_api::controller_api::{
    PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest,
    ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse,
    SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
-    TenantShardMigrateRequest, TenantShardMigrateResponse,
+    TenantShardMigrateRequest, TenantShardMigrateResponse, TimelineSafekeeperMigrateRequest,
 };
 use pageserver_api::models::{
    EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig,
@@ -21,6 +21,7 @@ use pageserver_api::models::{
 use pageserver_api::shard::{ShardStripeSize, TenantShardId};
 use pageserver_client::mgmt_api::{self};
 use reqwest::{Certificate, Method, StatusCode, Url};
+use safekeeper_api::models::TimelineLocateResponse;
 use storage_controller_client::control_api::Client;
 use utils::id::{NodeId, TenantId, TimelineId};

@@ -65,12 +66,33 @@ enum Command {
        #[arg(long)]
        scheduling: Option<NodeSchedulingPolicy>,
    },
-    // Set a node status as deleted.
+    /// Exists for backup usage and will be removed in future.
+    /// Use [`Command::NodeStartDelete`] instead, if possible.
    NodeDelete {
        #[arg(long)]
        node_id: NodeId,
    },
+    /// Start deletion of the specified pageserver.
+    NodeStartDelete {
+        #[arg(long)]
+        node_id: NodeId,
+        /// When `force` is true, skip waiting for shards to prewarm during migration.
+        /// This can significantly speed up node deletion since prewarming all shards
+        /// can take considerable time, but may result in slower initial access to
+        /// migrated shards until they warm up naturally.
+        #[arg(long)]
+        force: bool,
+    },
+    /// Cancel deletion of the specified pageserver and wait for `timeout`
+    /// for the operation to be canceled. May be retried.
+    NodeCancelDelete {
+        #[arg(long)]
+        node_id: NodeId,
+        #[arg(long)]
+        timeout: humantime::Duration,
+    },
    /// Delete a tombstone of node from the storage controller.
+    /// This is used when we want to allow the node to be re-registered.
    NodeDeleteTombstone {
        #[arg(long)]
        node_id: NodeId,
@@ -264,6 +286,23 @@ enum Command {
        #[arg(long)]
        concurrency: Option<usize>,
    },
+    /// Locate safekeepers for a timeline from the storcon DB.
+    TimelineLocate {
+        #[arg(long)]
+        tenant_id: TenantId,
+        #[arg(long)]
+        timeline_id: TimelineId,
+    },
+    /// Migrate a timeline to a new set of safekeepers
+    TimelineSafekeeperMigrate {
+        #[arg(long)]
+        tenant_id: TenantId,
+        #[arg(long)]
+        timeline_id: TimelineId,
+        /// Example: --new-sk-set 1,2,3
+        #[arg(long, required = true, value_delimiter = ',')]
+        new_sk_set: Vec<NodeId>,
+    },
 }

 #[derive(Parser)]
@@ -443,6 +482,7 @@ async fn main() -> anyhow::Result<()> {
                        listen_http_port,
                        listen_https_port,
                        availability_zone_id: AvailabilityZone(availability_zone_id),
+                        node_ip_addr: None,
                    }),
                )
                .await?;
@@ -912,10 +952,44 @@ async fn main() -> anyhow::Result<()> {
                .await?;
        }
        Command::NodeDelete { node_id } => {
+            eprintln!("Warning: This command is obsolete and will be removed in a future version");
+            eprintln!("Use `NodeStartDelete` instead, if possible");
            storcon_client
                .dispatch::<(), ()>(Method::DELETE, format!("control/v1/node/{node_id}"), None)
                .await?;
        }
+        Command::NodeStartDelete { node_id, force } => {
+            let query = if force {
+                format!("control/v1/node/{node_id}/delete?force=true")
+            } else {
+                format!("control/v1/node/{node_id}/delete")
+            };
+            storcon_client
+                .dispatch::<(), ()>(Method::PUT, query, None)
+                .await?;
+            println!("Delete started for {node_id}");
+        }
+        Command::NodeCancelDelete { node_id, timeout } => {
+            storcon_client
+                .dispatch::<(), ()>(
+                    Method::DELETE,
+                    format!("control/v1/node/{node_id}/delete"),
+                    None,
+                )
+                .await?;
+
+            println!("Waiting for node {node_id} to quiesce on scheduling policy ...");
+
+            let final_policy =
+                wait_for_scheduling_policy(storcon_client, node_id, *timeout, |sched| {
+                    !matches!(sched, NodeSchedulingPolicy::Deleting)
+                })
+                .await?;
+
+            println!(
+                "Delete was cancelled for node {node_id}. Schedulling policy is now {final_policy:?}"
+            );
+        }
        Command::NodeDeleteTombstone { node_id } => {
            storcon_client
                .dispatch::<(), ()>(
@@ -1276,7 +1350,7 @@ async fn main() -> anyhow::Result<()> {
            concurrency,
        } => {
            let mut path = format!(
-                "/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
+                "v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers",
            );

            if let Some(c) = concurrency {
@@ -1287,6 +1361,41 @@ async fn main() -> anyhow::Result<()> {
                .dispatch::<(), ()>(Method::POST, path, None)
                .await?;
        }
+        Command::TimelineLocate {
+            tenant_id,
+            timeline_id,
+        } => {
+            let path = format!("debug/v1/tenant/{tenant_id}/timeline/{timeline_id}/locate");
+
+            let resp = storcon_client
+                .dispatch::<(), TimelineLocateResponse>(Method::GET, path, None)
+                .await?;
+
+            let sk_set = resp.sk_set.iter().map(|id| id.0 as i64).collect::<Vec<_>>();
+            let new_sk_set = resp
+                .new_sk_set
+                .as_ref()
+                .map(|ids| ids.iter().map(|id| id.0 as i64).collect::<Vec<_>>());
+
+            println!("generation = {}", resp.generation);
+            println!("sk_set = {sk_set:?}");
+            println!("new_sk_set = {new_sk_set:?}");
+        }
+        Command::TimelineSafekeeperMigrate {
+            tenant_id,
+            timeline_id,
+            new_sk_set,
+        } => {
+            let path = format!("v1/tenant/{tenant_id}/timeline/{timeline_id}/safekeeper_migrate");
+
+            storcon_client
+                .dispatch::<_, ()>(
+                    Method::POST,
+                    path,
+                    Some(TimelineSafekeeperMigrateRequest { new_sk_set }),
+                )
+                .await?;
+        }
    }

    Ok(())
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -54,14 +54,16 @@ else
    printf '%s\n' "${result}" | jq .
  fi

-  echo "Check if a timeline present"
-  PARAMS=(
-       -X GET
-       -H "Content-Type: application/json"
-       "http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
-  )
-  timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
-  if [[ -z "${timeline_id}" || "${timeline_id}" = null ]]; then
+  if [[ "${RUN_PARALLEL:-false}" != "true" ]]; then
+    echo "Check if a timeline present"
+    PARAMS=(
+         -X GET
+         -H "Content-Type: application/json"
+        "http://pageserver:9898/v1/tenant/${tenant_id}/timeline"
+    )
+    timeline_id=$(curl "${PARAMS[@]}" | jq -r .[0].timeline_id)
+  fi
+  if [[ -z "${timeline_id:-}" || "${timeline_id:-}" = null ]]; then
    generate_id timeline_id
    PARAMS=(
        -sbf
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -142,7 +142,7 @@ services:
      - "storage_broker"
      - "--listen-addr=0.0.0.0:50051"

-  compute:
+  compute1:
    restart: always
    build:
      context: ./compute_wrapper/
@@ -152,6 +152,7 @@ services:
        - TAG=${COMPUTE_TAG:-${TAG:-latest}}
        - http_proxy=${http_proxy:-}
        - https_proxy=${https_proxy:-}
+    image: built-compute
    environment:
      - PG_VERSION=${PG_VERSION:-16}
      - TENANT_ID=${TENANT_ID:-}
@@ -166,6 +167,11 @@ services:
      - 3080:3080 # http endpoints
    entrypoint:
      - "/shell/compute.sh"
+    # Ad an alias for compute1 for compatibility
+    networks:
+      default:
+        aliases:
+            - compute
    depends_on:
      - safekeeper1
      - safekeeper2
@@ -174,15 +180,20 @@ services:

  compute_is_ready:
    image: postgres:latest
+    environment:
+      - PARALLEL_COMPUTES=1
    entrypoint:
-      - "/bin/bash"
+      - "/bin/sh"
      - "-c"
    command:
-      - "until pg_isready -h compute -p 55433 -U cloud_admin ; do
-            echo 'Waiting to start compute...' && sleep 1;
-         done"
+      - "for i in $(seq 1 $${PARALLEL_COMPUTES}); do
+           until pg_isready -h compute$$i -p 55433 -U cloud_admin ; do
+             sleep 1;
+           done;
+         done;
+         echo All computes are started"
    depends_on:
-      - compute
+      - compute1

  neon-test-extensions:
    profiles: ["test-extensions"]
@@ -196,4 +207,4 @@ services:
    command:
      - sleep 3600
    depends_on:
-      - compute
+      - compute1
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash

 # A basic test to ensure Docker images are built correctly.
 # Build a wrapper around the compute, start all services and runs a simple SQL query.
@@ -13,9 +13,36 @@
 #
 set -eux -o pipefail

+cd "$(dirname "${0}")"
 export COMPOSE_FILE='docker-compose.yml'
 export COMPOSE_PROFILES=test-extensions
-cd "$(dirname "${0}")"
+export PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
+READY_MESSAGE="All computes are started"
+COMPUTES=()
+for i in $(seq 1 "${PARALLEL_COMPUTES}"); do
+  COMPUTES+=("compute${i}")
+done
+CURRENT_TMPDIR=$(mktemp -d)
+trap 'rm -rf ${CURRENT_TMPDIR} docker-compose-parallel.yml' EXIT
+if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
+  export COMPOSE_FILE=docker-compose-parallel.yml
+  cp docker-compose.yml docker-compose-parallel.yml
+  # Replace the environment variable PARALLEL_COMPUTES with the actual value
+  yq eval -i ".services.compute_is_ready.environment |=  map(select(. | test(\"^PARALLEL_COMPUTES=\") | not)) + [\"PARALLEL_COMPUTES=${PARALLEL_COMPUTES}\"]" ${COMPOSE_FILE}
+  for i in $(seq 2 "${PARALLEL_COMPUTES}"); do
+    # Duplicate compute1 as compute${i} for parallel execution
+    yq eval -i ".services.compute${i} = .services.compute1" ${COMPOSE_FILE}
+    # We don't need these sections, so delete them
+    yq eval -i "(del .services.compute${i}.build) | (del .services.compute${i}.ports) | (del .services.compute${i}.networks)" ${COMPOSE_FILE}
+    # Let the compute 1 be the only dependence
+    yq eval -i ".services.compute${i}.depends_on = [\"compute1\"]" ${COMPOSE_FILE}
+    # Set RUN_PARALLEL=true for compute2. They will generate tenant_id and timeline_id to avoid using the same as other computes
+    yq eval -i ".services.compute${i}.environment += [\"RUN_PARALLEL=true\"]" ${COMPOSE_FILE}
+    # Remove TENANT_ID and TIMELINE_ID from the environment variables of the generated computes
+    # They will create new TENANT_ID and TIMELINE_ID anyway.
+    yq eval -i ".services.compute${i}.environment |= map(select(. | (test(\"^TENANT_ID=\") or test(\"^TIMELINE_ID=\")) | not))" ${COMPOSE_FILE}
+  done
+fi
 PSQL_OPTION="-h localhost -U cloud_admin -p 55433 -d postgres"

 function cleanup() {
@@ -27,11 +54,11 @@ function cleanup() {

 for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
    pg_version=${pg_version/v/}
-    echo "clean up containers if exists"
+    echo "clean up containers if exist"
    cleanup
    PG_TEST_VERSION=$((pg_version < 16 ? 16 : pg_version))
-    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull --build -d
-
+    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose build compute1
+    PG_VERSION=${pg_version} PG_TEST_VERSION=${PG_TEST_VERSION} docker compose up --quiet-pull -d
    echo "wait until the compute is ready. timeout after 60s. "
    cnt=0
    while sleep 3; do
@@ -41,45 +68,50 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
            echo "timeout before the compute is ready."
            exit 1
        fi
-        if docker compose logs "compute_is_ready" | grep -q "accepting connections"; then
+        if docker compose logs compute_is_ready | grep -q "${READY_MESSAGE}"; then
            echo "OK. The compute is ready to connect."
            echo "execute simple queries."
-            docker compose exec compute /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
+            for compute in "${COMPUTES[@]}"; do
+              docker compose exec "${compute}" /bin/bash -c "psql ${PSQL_OPTION} -c 'SELECT 1'"
+            done
            break
        fi
    done

    if [[ ${pg_version} -ge 16 ]]; then
-        # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
-        # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
-        echo Adding dummy config
-        docker compose exec compute touch /var/db/postgres/compute/compute_ctl_temp_override.conf
-        # Prepare for the PostGIS test
-        docker compose exec compute mkdir -p /tmp/pgis_reg/pgis_reg_tmp
-        TMPDIR=$(mktemp -d)
-        docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${TMPDIR}"
-        docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${TMPDIR}"
-        docker compose exec compute mkdir -p /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
-        docker compose cp "${TMPDIR}/test" compute:/ext-src/postgis-src/raster/test
-        docker compose cp "${TMPDIR}/00-regress-install" compute:/ext-src/postgis-src/regress
-        rm -rf "${TMPDIR}"
-        # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
-        TMPDIR=$(mktemp -d)
-        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${TMPDIR}/data"
-        docker compose cp "${TMPDIR}/data" compute:/ext-src/pg_hint_plan-src/
-        rm -rf "${TMPDIR}"
-        # The following block does the same for the contrib/file_fdw test
-        TMPDIR=$(mktemp -d)
-        docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${TMPDIR}/data"
-        docker compose cp "${TMPDIR}/data" compute:/postgres/contrib/file_fdw/data
-        rm -rf "${TMPDIR}"
+        mkdir "${CURRENT_TMPDIR}"/{pg_hint_plan-src,file_fdw,postgis-src}
+        docker compose cp neon-test-extensions:/ext-src/postgis-src/raster/test "${CURRENT_TMPDIR}/postgis-src/test"
+        docker compose cp neon-test-extensions:/ext-src/postgis-src/regress/00-regress-install "${CURRENT_TMPDIR}/postgis-src/00-regress-install"
+        docker compose cp neon-test-extensions:/ext-src/pg_hint_plan-src/data "${CURRENT_TMPDIR}/pg_hint_plan-src/data"
+        docker compose cp neon-test-extensions:/postgres/contrib/file_fdw/data "${CURRENT_TMPDIR}/file_fdw/data"
+
+        for compute in "${COMPUTES[@]}"; do
+          # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
+          # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
+          echo Adding dummy config on "${compute}"
+          docker compose exec "${compute}" touch /var/db/postgres/compute/compute_ctl_temp_override.conf
+          # Prepare for the PostGIS test
+          docker compose exec "${compute}" mkdir -p /tmp/pgis_reg/pgis_reg_tmp /ext-src/postgis-src/raster /ext-src/postgis-src/regress /ext-src/postgis-src/regress/00-regress-install
+          docker compose cp "${CURRENT_TMPDIR}/postgis-src/test" "${compute}":/ext-src/postgis-src/raster/test
+          docker compose cp "${CURRENT_TMPDIR}/postgis-src/00-regress-install" "${compute}":/ext-src/postgis-src/regress
+          # The following block copies the files for the pg_hintplan test to the compute node for the extension test in an isolated docker-compose environment
+          docker compose cp "${CURRENT_TMPDIR}/pg_hint_plan-src/data" "${compute}":/ext-src/pg_hint_plan-src/
+          # The following block does the same for the contrib/file_fdw test
+          docker compose cp "${CURRENT_TMPDIR}/file_fdw/data" "${compute}":/postgres/contrib/file_fdw/data
+        done
        # Apply patches
        docker compose exec -T neon-test-extensions bash -c "(cd /postgres && patch -p1)" <"../compute/patches/contrib_pg${pg_version}.patch"
        # We are running tests now
        rm -f testout.txt testout_contrib.txt
+        # We want to run the longest tests first to better utilize parallelization and reduce overall test time.
+        # Tests listed in the RUN_FIRST variable will be run before others.
+        # If parallelization is not used, this environment variable will be ignored.
+
        docker compose exec -e USE_PGXS=1 -e SKIP=timescaledb-src,rdkit-src,pg_jsonschema-src,kq_imcx-src,wal2json_2_5-src,rag_jina_reranker_v1_tiny_en-src,rag_bge_small_en_v15-src \
+        -e RUN_FIRST=hll-src,postgis-src,pgtap-src -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
        neon-test-extensions /run-tests.sh /ext-src | tee testout.txt && EXT_SUCCESS=1 || EXT_SUCCESS=0
        docker compose exec -e SKIP=start-scripts,postgres_fdw,ltree_plpython,jsonb_plpython,jsonb_plperl,hstore_plpython,hstore_plperl,dblink,bool_plperl \
+        -e PARALLEL_COMPUTES="${PARALLEL_COMPUTES}" \
        neon-test-extensions /run-tests.sh /postgres/contrib | tee testout_contrib.txt && CONTRIB_SUCCESS=1 || CONTRIB_SUCCESS=0
        if [[ ${EXT_SUCCESS} -eq 0 || ${CONTRIB_SUCCESS} -eq 0 ]]; then
            CONTRIB_FAILED=
--- a/docker-compose/run-tests.sh
+++ b/docker-compose/run-tests.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -x

 if [[ -v BENCHMARK_CONNSTR ]]; then
@@ -26,8 +26,9 @@ if [[ -v BENCHMARK_CONNSTR ]]; then
  fi
 fi
 REGULAR_USER=false
-while getopts r arg; do
-  case $arg in
+PARALLEL_COMPUTES=${PARALLEL_COMPUTES:-1}
+while getopts pr arg; do
+  case ${arg} in
  r)
    REGULAR_USER=true
    shift $((OPTIND-1))
@@ -41,26 +42,49 @@ extdir=${1}

 cd "${extdir}" || exit 2
 FAILED=
-LIST=$( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
-for d in ${LIST}; do
-    [ -d "${d}" ] || continue
-    if ! psql -w -c "select 1" >/dev/null; then
-      FAILED="${d} ${FAILED}"
-      break
-    fi
-    if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
-       "${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
-       continue
-    fi
+export FAILED_FILE=/tmp/failed
+rm -f ${FAILED_FILE}
+mapfile -t LIST < <( (echo -e "${SKIP//","/"\n"}"; ls) | sort | uniq -u)
+if [[ ${PARALLEL_COMPUTES} -gt 1 ]]; then
+  # Avoid errors if RUN_FIRST is not defined
+  RUN_FIRST=${RUN_FIRST:-}
+  # Move entries listed in the RUN_FIRST variable to the beginning
+  ORDERED_LIST=$(printf "%s\n" "${LIST[@]}" | grep -x -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"); printf "%s\n" "${LIST[@]}" | grep -vx -Ff <(echo -e "${RUN_FIRST//,/$'\n'}"))
+  parallel -j"${PARALLEL_COMPUTES}" "[[ -d {} ]] || exit 0
+                export PGHOST=compute{%}
+                if ! psql -c 'select 1'>/dev/null; then
+                  exit 1
+                fi
+                echo Running on \${PGHOST}
+                if [[ -f ${extdir}/{}/neon-test.sh ]]; then
+                  echo Running from script
+                  ${extdir}/{}/neon-test.sh || echo {} >> ${FAILED_FILE};
+                else
+                  echo Running using make;
+                  USE_PGXS=1 make -C {} installcheck || echo {} >> ${FAILED_FILE};
+                fi" ::: ${ORDERED_LIST}
+  [[ ! -f ${FAILED_FILE} ]] && exit 0
+else
+  for d in "${LIST[@]}"; do
+      [ -d "${d}" ] || continue
+      if ! psql -w -c "select 1" >/dev/null; then
+        FAILED="${d} ${FAILED}"
+        break
+      fi
+      if [[ ${REGULAR_USER} = true ]] && [ -f "${d}"/regular-test.sh ]; then
+        "${d}/regular-test.sh" || FAILED="${d} ${FAILED}"
+        continue
+      fi

-    if [ -f "${d}/neon-test.sh" ]; then
-       "${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
-    else
-       USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
-    fi
-done
-[ -z "${FAILED}" ] && exit 0
-for d in ${FAILED}; do
+      if [ -f "${d}/neon-test.sh" ]; then
+        "${d}/neon-test.sh" || FAILED="${d} ${FAILED}"
+      else
+        USE_PGXS=1 make -C "${d}" installcheck || FAILED="${d} ${FAILED}"
+      fi
+  done
+  [[ -z ${FAILED} ]]  && exit 0
+fi
+for d in ${FAILED} $([[ ! -f ${FAILED_FILE} ]] || cat ${FAILED_FILE}); do
  cat "$(find $d -name regression.diffs)"
 done
 for postgis_diff in /tmp/pgis_reg/*_diff; do
@@ -68,4 +92,5 @@ for postgis_diff in /tmp/pgis_reg/*_diff; do
  cat "${postgis_diff}"
 done
 echo "${FAILED}"
+cat ${FAILED_FILE}
 exit 1
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -eux -o pipefail
 cd "$(dirname "${0}")"
 # Takes a variable name as argument. The result is stored in that variable.
@@ -60,8 +60,8 @@ function check_timeline() {
 # Restarts the compute node with the required compute tag and timeline.
 # Accepts the tag for the compute node and the timeline as parameters.
 function restart_compute() {
-  docker compose down compute compute_is_ready
-  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
+  docker compose down compute1 compute_is_ready
+  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute1 compute_is_ready
  wait_for_ready
  check_timeline ${2}
 }
--- a/docs/core_changes.md
+++ b/docs/core_changes.md
@@ -129,9 +129,10 @@ segment to bootstrap the WAL writing, but it doesn't contain the checkpoint reco
 changes in xlog.c, to allow starting the compute node without reading the last checkpoint record
 from WAL.

-This includes code to read the `zenith.signal` file, which tells the startup code the LSN to start
-at. When the `zenith.signal` file is present, the startup uses that LSN instead of the last
-checkpoint's LSN. The system is known to be consistent at that LSN, without any WAL redo.
+This includes code to read the `neon.signal` (also `zenith.signal`) file, which tells the startup 
+code the LSN to start at. When the `neon.signal` file is present, the startup uses that LSN
+instead of the last checkpoint's LSN. The system is known to be consistent at that LSN, without 
+any WAL redo.


 ### How to get rid of the patch
--- a/docs/pageserver-services.md
+++ b/docs/pageserver-services.md
@@ -75,7 +75,7 @@ CLI examples:
 * AWS S3  : `env AWS_ACCESS_KEY_ID='SOMEKEYAAAAASADSAH*#' AWS_SECRET_ACCESS_KEY='SOMEsEcReTsd292v' ${PAGESERVER_BIN} -c "remote_storage={bucket_name='some-sample-bucket',bucket_region='eu-north-1', prefix_in_bucket='/test_prefix/'}"`

 For Amazon AWS S3, a key id and secret access key could be located in `~/.aws/credentials` if awscli was ever configured to work with the desired bucket, on the AWS Settings page for a certain user. Also note, that the bucket names does not contain any protocols when used on AWS.
-For local S3 installations, refer to the their documentation for name format and credentials.
+For local S3 installations, refer to their documentation for name format and credentials.

 Similar to other pageserver settings, toml config file can be used to configure either of the storages as backup targets.
 Required sections are:
--- a/docs/rfcs/035-safekeeper-dynamic-membership-change.md
+++ b/docs/rfcs/035-safekeeper-dynamic-membership-change.md
@@ -20,7 +20,7 @@ In our case consensus leader is compute (walproposer), and we don't want to wake
 up all computes for the change. Neither we want to fully reimplement the leader
 logic second time outside compute. Because of that the proposed algorithm relies
 for issuing configurations on the external fault tolerant (distributed) strongly
-consisent storage with simple API: CAS (compare-and-swap) on the single key.
+consistent storage with simple API: CAS (compare-and-swap) on the single key.
 Properly configured postgres suits this.

 In the system consensus is implemented at the timeline level, so algorithm below
@@ -34,7 +34,7 @@ A configuration is

 ```
 struct Configuration {
-    generation: Generation, // a number uniquely identifying configuration
+    generation: SafekeeperGeneration, // a number uniquely identifying configuration
    sk_set: Vec<NodeId>, // current safekeeper set
    new_sk_set: Optional<Vec<NodeId>>,
 }
@@ -81,11 +81,11 @@ configuration generation in them is less than its current one. Namely, it
 refuses to vote, to truncate WAL in `handle_elected` and to accept WAL. In
 response it sends its current configuration generation to let walproposer know.

-Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/configuration`
-accepting `Configuration`. Safekeeper switches to the given conf it is higher than its
+Safekeeper gets `PUT /v1/tenants/{tenant_id}/timelines/{timeline_id}/membership`
+accepting `Configuration`. Safekeeper switches to the given conf if it is higher than its
 current one and ignores it otherwise. In any case it replies with
 ```
-struct ConfigurationSwitchResponse {
+struct TimelineMembershipSwitchResponse {
    conf: Configuration,
    term: Term,
    last_log_term: Term,
@@ -108,7 +108,7 @@ establishes this configuration as its own and moves to voting.
 It should stop talking to safekeepers not listed in the configuration at this
 point, though it is not unsafe to continue doing so.

-To be elected it must receive votes from both majorites if `new_sk_set` is present.
+To be elected it must receive votes from both majorities if `new_sk_set` is present.
 Similarly, to commit WAL it must receive flush acknowledge from both majorities.

 If walproposer hears from safekeeper configuration higher than his own (i.e.
@@ -130,7 +130,7 @@ storage are reachable.
 1) Fetch current timeline configuration from the configuration storage.
 2) If it is already joint one and `new_set` is different from `desired_set`
   refuse to change. However, assign join conf to (in memory) var
-   `join_conf` and proceed to step 4 to finish the ongoing change.
+   `joint_conf` and proceed to step 4 to finish the ongoing change.
 3) Else, create joint `joint_conf: Configuration`: increment current conf number
   `n` and put `desired_set` to `new_sk_set`. Persist it in the configuration
   storage by doing CAS on the current generation: change happens only if
@@ -161,11 +161,11 @@ storage are reachable.
   because `pull_timeline` already includes it and plus additionally would be
   broadcast by compute. More importantly, we may proceed to the next step
   only when `<last_log_term, flush_lsn>` on the majority of the new set reached
-   `sync_position`. Similarly, on the happy path no waiting is not needed because
+   `sync_position`. Similarly, on the happy path no waiting is needed because
   `pull_timeline` already includes it. However, we should double
    check to be safe. For example, timeline could have been created earlier e.g.
    manually or after try-to-migrate, abort, try-to-migrate-again sequence.
-7) Create `new_conf: Configuration` incrementing `join_conf` generation and having new
+7) Create `new_conf: Configuration` incrementing `joint_conf` generation and having new
   safekeeper set as `sk_set` and None `new_sk_set`. Write it to configuration
   storage under one more CAS.
 8) Call `PUT` `configuration` on safekeepers from the new set,
@@ -178,12 +178,12 @@ spec of it.

 Description above focuses on safety. To make the flow practical and live, here a few more
 considerations.
-1) It makes sense to ping new set to ensure it we are migrating to live node(s) before
+1) It makes sense to ping new set to ensure we are migrating to live node(s) before
  step 3.
 2) If e.g. accidentally wrong new sk set has been specified, before CAS in step `6` is completed
   it is safe to rollback to the old conf with one more CAS.
 3) On step 4 timeline might be already created on members of the new set for various reasons;
-   the simplest is the procedure restart. There are more complicated scenarious like mentioned
+   the simplest is the procedure restart. There are more complicated scenarios like mentioned
   in step 5. Deleting and re-doing `pull_timeline` is generally unsafe without involving
   generations, so seems simpler to treat existing timeline as success. However, this also
   has a disadvantage: you might imagine an surpassingly unlikely schedule where condition in
@@ -192,7 +192,7 @@ considerations.
 4) In the end timeline should be locally deleted on the safekeeper(s) which are
   in the old set but not in the new one, unless they are unreachable. To be
   safe this also should be done under generation number (deletion proceeds only if
-   current configuration is <= than one in request and safekeeper is not memeber of it).
+   current configuration is <= than one in request and safekeeper is not member of it).
 5) If current conf fetched on step 1 is already not joint and members equal to `desired_set`,
   jump to step 7, using it as `new_conf`.

@@ -261,14 +261,14 @@ Timeline (branch) creation in cplane should call storage_controller POST
 Response should be augmented with `safekeepers_generation` and `safekeepers`
 fields like described in `/notify-safekeepers` above. Initially (currently)
 these fields may be absent; in this case cplane chooses safekeepers on its own
-like it currently does. The call should be retried until succeeds.
+like it currently does. The call should be retried until it succeeds.

 Timeline deletion and tenant deletion in cplane should call appropriate
 storage_controller endpoints like it currently does for sharded tenants. The
 calls should be retried until they succeed.

-When compute receives safekeepers list from control plane it needs to know the
-generation to checked whether it should be updated (note that compute may get
+When compute receives safekeeper list from control plane it needs to know the
+generation to check whether it should be updated (note that compute may get
 safekeeper list from either cplane or safekeepers). Currently `neon.safekeepers`
 GUC is just a comma separates list of `host:port`. Let's prefix it with
 `g#<generation>:` to this end, so it will look like
@@ -305,8 +305,8 @@ enum MigrationRequest {
 ```

 `FinishPending` requests to run the procedure to ensure state is clean: current
-configuration is not joint and majority of safekeepers are aware of it, but do
-not attempt to migrate anywhere. If current configuration fetched on step 1 is
+configuration is not joint and the majority of safekeepers are aware of it, but do
+not attempt to migrate anywhere. If the current configuration fetched on step 1 is
 not joint it jumps to step 7. It should be run at startup for all timelines (but
 similarly, in the first version it is ok to trigger it manually).

@@ -315,7 +315,7 @@ similarly, in the first version it is ok to trigger it manually).
 `safekeepers` table mirroring current `nodes` should be added, except that for
 `scheduling_policy`: it is enough to have at least in the beginning only 3
 fields: 1) `active` 2) `paused` (initially means only not assign new tlis there
-3) `decomissioned` (node is removed).
+3) `decommissioned` (node is removed).

 `timelines` table:
 ```
@@ -326,9 +326,10 @@ table! {
        tenant_id -> Varchar,
        start_lsn -> pg_lsn,
        generation -> Int4,
-        sk_set -> Array<Int4>, // list of safekeeper ids
+        sk_set -> Array<Int8>, // list of safekeeper ids
        new_sk_set -> Nullable<Array<Int8>>, // list of safekeeper ids, null if not joint conf
        cplane_notified_generation -> Int4,
+        sk_set_notified_generation -> Int4, // the generation a quorum of sk_set knows about
        deleted_at -> Nullable<Timestamptz>,
    }
 }
@@ -338,13 +339,23 @@ table! {
 might also want to add ancestor_timeline_id to preserve the hierarchy, but for
 this RFC it is not needed.

+`cplane_notified_generation` and `sk_set_notified_generation` fields are used to
+track the last stage of the algorithm, when we need to notify safekeeper set and cplane
+with the final configuration after it's already committed to DB.
+
+The timeline is up-to-date (no migration in progress) if `new_sk_set` is null and
+`*_notified_generation` fields are up to date with `generation`. 
+
+It's possible to replace `*_notified_generation` with one boolean field `migration_completed`,
+but for better observability it's nice to have them separately.
+
 #### API

 Node management is similar to pageserver:
-1) POST `/control/v1/safekeepers` inserts safekeeper.
-2) GET `/control/v1/safekeepers` lists safekeepers.
-3) GET `/control/v1/safekeepers/:node_id` gets safekeeper.
-4) PUT `/control/v1/safekepers/:node_id/status` changes status to e.g.
+1) POST `/control/v1/safekeeper` inserts safekeeper.
+2) GET `/control/v1/safekeeper` lists safekeepers.
+3) GET `/control/v1/safekeeper/:node_id` gets safekeeper.
+4) PUT `/control/v1/safekeper/:node_id/scheduling_policy` changes status to e.g.
   `offline` or `decomissioned`. Initially it is simpler not to schedule any
    migrations here.

@@ -368,8 +379,8 @@ Migration API: the first version is the simplest and the most imperative:
 all timelines from one safekeeper to another. It accepts json
 ```
 {
-    "src_sk": u32,
-    "dst_sk": u32,
+    "src_sk": NodeId,
+    "dst_sk": NodeId,
    "limit": Optional<u32>,
 }
 ```
@@ -379,12 +390,15 @@ Returns list of scheduled requests.
 2) PUT `/control/v1/tenant/:tenant_id/timeline/:timeline_id/safekeeper_migrate` schedules `MigrationRequest`
   to move single timeline to given set of safekeepers:
 ```
-{
-    "desired_set": Vec<u32>,
+struct TimelineSafekeeperMigrateRequest {
+    "new_sk_set": Vec<NodeId>,
 }
 ```

-Returns scheduled request.
+In the first version the handler migrates the timeline to `new_sk_set` synchronously.
+Should be retried until success.
+
+In the future we might change it to asynchronous API and return scheduled request.

 Similar call should be added for the tenant.

@@ -434,6 +448,9 @@ table! {
 }
 ```

+We load all pending ops from the table on startup into the memory.
+The table is needed only to preserve the state between restarts.
+
 `op_type` can be `include` (seed from peers and ensure generation is up to
 date), `exclude` (remove locally) and `delete`. Field is actually not strictly
 needed as it can be computed from current configuration, but gives more explicit
@@ -474,7 +491,7 @@ actions must be idempotent. Now, a tricky point here is timeline start LSN. For
 the initial (tenant creation) call cplane doesn't know it. However, setting
 start_lsn on safekeepers during creation is a good thing -- it provides a
 guarantee that walproposer can always find a common point in WAL histories of
-safekeeper and its own, and so absense of it would be a clear sign of
+safekeeper and its own, and so absence of it would be a clear sign of
 corruption. The following sequence works:
 1) Create timeline (or observe that it exists) on pageserver,
   figuring out last_record_lsn in response.
@@ -497,11 +514,9 @@ corruption. The following sequence works:
   retries the call until 200 response.

   There is a small question how request handler (timeline creation in this
-   case) would interact with per sk reconciler. As always I prefer to do the
-   simplest possible thing and here it seems to be just waking it up so it
-   re-reads the db for work to do. Passing work in memory is faster, but
-   that shouldn't matter, and path to scan db for work will exist anyway, 
-   simpler to reuse it.
+   case) would interact with per sk reconciler. In the current implementation
+   we first persist the request in the DB, and then send an in-memory request
+   to each safekeeper reconciler to process it.

 For pg version / wal segment size: while we may persist them in `timelines`
 table, it is not necessary as initial creation at step 3 can take them from
@@ -509,30 +524,40 @@ pageserver or cplane creation call and later pull_timeline will carry them
 around.

 Timeline migration.
-1) CAS to the db to create joint conf, and in the same transaction create
-   `safekeeper_timeline_pending_ops` `include` entries to initialize new members
-   as well as deliver this conf to current ones; poke per sk reconcilers to work
-   on it. Also any conf change should also poke cplane notifier task(s).
-2) Once it becomes possible per alg description above, get out of joint conf
-   with another CAS. Task should get wakeups from per sk reconcilers because 
-   conf switch is required for advancement; however retries should be sleep
-   based as well as LSN advancement might be needed, though in happy path 
-   it isn't. To see whether further transition is possible on wakup migration
-   executor polls safekeepers per the algorithm. CAS creating new conf with only
-   new members should again insert entries to `safekeeper_timeline_pending_ops`
-   to switch them there, as well as `exclude` rows to remove timeline from 
-   old members.
+1) CAS to the db to create joint conf. Since this moment the migration is considered to be 
+   "in progress". We can detect all "in-progress" migrations looking into the database.
+2) Do steps 4-6 from the algorithm, including `pull_timeline` onto `new_sk_set`, update membership
+   configuration on all safekeepers, notify cplane, etc. All operations are idempotent,
+   so we don't need to persist anything in the database at this stage. If any errors occur,
+   it's safe to retry or abort the migration.
+3) Once it becomes possible per alg description above, get out of joint conf
+   with another CAS. Also should insert `exclude` entries into `safekeeper_timeline_pending_ops`
+   in the same DB transaction. Adding `exclude` entries atomically is nesessary because after
+   CAS we don't have the list of excluded safekeepers in the `timelines` table anymore, but we
+   need to have them persisted somewhere in case the migration is interrupted right after the CAS.
+4) Finish the migration. The final membership configuration is committed to the DB at this stage.
+   So, the migration can not be aborted anymore. But it can still be retried if the migration fails
+   past stage 3. To finish the migration we need to send the new membership configuration to
+   a new quorum of safekeepers, notify cplane with the new safekeeper list and schedule the `exclude`
+   requests to in-memory queue for safekeeper reconciler. If the algrorithm is retried, it's
+   possible that we have already committed `exclude` requests to DB, but didn't send them to
+   the in-memory queue. In this case we need to read them from `safekeeper_timeline_pending_ops`
+   because it's the only place where they are persistent. The fields `sk_set_notified_generation`
+   and `cplane_notified_generation` are updated after each step. The migration is considered
+   fully completed when they match the `generation` field.
+
+In practice, we can report "success" after stage 3 and do the "finish" step in per-timeline
+reconciler (if we implement it). But it's wise to at least try to finish them synchronously,
+so the timeline is always in a "good state" and doesn't require an old quorum to commit
+WAL after the migration reported "success".

 Timeline deletion: just set `deleted_at` on the timeline row and insert
 `safekeeper_timeline_pending_ops` entries in the same xact, the rest is done by
 per sk reconcilers.

-When node is removed (set to `decomissioned`), `safekeeper_timeline_pending_ops`
+When node is removed (set to `decommissioned`), `safekeeper_timeline_pending_ops`
 for it must be cleared in the same transaction.

-One more task pool should infinitely retry notifying control plane about changed
-safekeeper sets (trying making `cplane_notified_generation` equal `generation`).
-
 #### Dealing with multiple instances of storage_controller

 Operations described above executed concurrently might create some errors but do
@@ -541,7 +566,7 @@ of storage_controller it is fine to have it temporarily, e.g. during redeploy.

 To harden against some controller instance creating some work in
 `safekeeper_timeline_pending_ops` and then disappearing without anyone pickup up
-the job per sk reconcilers apart from explicit wakups should scan for work
+the job per sk reconcilers apart from explicit wakeups should scan for work
 periodically. It is possible to remove that though if all db updates are
 protected with leadership token/term -- then such scans are needed only after
 leadership is acquired.
@@ -563,7 +588,7 @@ There should be following layers of tests:
   safekeeper communication and pull_timeline need to be mocked and main switch
   procedure wrapped to as a node (thread) in simulation tests, using these
   mocks. Test would inject migrations like it currently injects
-   safekeeper/walproposer restars. Main assert is the same -- committed WAL must
+   safekeeper/walproposer restarts. Main assert is the same -- committed WAL must
   not be lost.

 3) Since simulation testing injects at relatively high level points (not
@@ -613,7 +638,7 @@ Let's have the following implementation bits for gradual rollout:
  `notify-safekeepers`.

 Then the rollout for a region would be:
- Current situation: safekeepers are choosen by control_plane.
+- Current situation: safekeepers are chosen by control_plane.
 - We manually migrate some timelines, test moving them around.
 - Then we enable `--set-safekeepers` so that all new timelines
  are on storage controller.
--- a/endpoint_storage/src/app.rs
+++ b/endpoint_storage/src/app.rs
@@ -13,6 +13,8 @@ use utils::backoff::retry;
 pub fn app(state: Arc<Storage>) -> Router<()> {
    use axum::routing::{delete as _delete, get as _get};
    let delete_prefix = _delete(delete_prefix);
+    // NB: On any changes do not forget to update the OpenAPI spec
+    // in /endpoint_storage/src/openapi_spec.yml.
    Router::new()
        .route(
            "/{tenant_id}/{timeline_id}/{endpoint_id}/{*path}",
--- a/endpoint_storage/src/openapi_spec.yml
+++ b/endpoint_storage/src/openapi_spec.yml
@@ -0,0 +1,146 @@
+openapi: "3.0.2"
+info:
+  title: Endpoint Storage API
+  description: Endpoint Storage API
+  version: "1.0"
+  license:
+    name: "Apache"
+    url: https://github.com/neondatabase/neon/blob/main/LICENSE
+servers:
+  - url: ""
+paths:
+  /status:
+    description: Healthcheck endpoint
+    get:
+      description: Healthcheck
+      security: []
+      responses:
+        "200":
+          description: OK
+
+  /{tenant_id}/{timeline_id}/{endpoint_id}/{key}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: endpoint_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: key
+        in: path
+        required: true
+        schema:
+          type: string
+    get:
+      description: Get file from blob storage
+      responses:
+        "200":
+          description: "File stream from blob storage"
+          content:
+            application/octet-stream:
+              schema:
+                type: string
+                format: binary
+        "400":
+          description: File was not found
+        "403":
+          description: JWT does not authorize request to this route
+    put:
+      description: Insert file into blob storage. If file exists, override it
+      requestBody:
+        content:
+          application/octet-stream:
+            schema:
+              type: string
+              format: binary
+      responses:
+        "200":
+          description: File was inserted successfully
+        "403":
+          description: JWT does not authorize request to this route
+    delete:
+      description: Delete file from blob storage
+      responses:
+        "200":
+          description: File was successfully deleted or not found
+        "403":
+          description: JWT does not authorize request to this route
+
+  /{tenant_id}/{timeline_id}/{endpoint_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: endpoint_id
+        in: path
+        required: true
+        schema:
+          type: string
+    delete:
+      description: Delete endpoint data from blob storage
+      responses:
+        "200":
+          description: Endpoint data was deleted
+        "403":
+          description: JWT does not authorize request to this route
+
+  /{tenant_id}/{timeline_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+      - name: timeline_id
+        in: path
+        required: true
+        schema:
+          type: string
+    delete:
+      description: Delete timeline data from blob storage
+      responses:
+        "200":
+          description: Timeline data was deleted
+        "403":
+          description: JWT does not authorize request to this route
+
+  /{tenant_id}:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+    delete:
+      description: Delete tenant data from blob storage
+      responses:
+        "200":
+          description: Tenant data was deleted
+        "403":
+          description: JWT does not authorize request to this route
+
+components:
+  securitySchemes:
+    JWT:
+      type: http
+      scheme: bearer
+      bearerFormat: JWT
+
+security:
+  - JWT: []
--- a/libs/alloc-metrics/Cargo.toml
+++ b/libs/alloc-metrics/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "alloc-metrics"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+metrics.workspace = true
+measured.workspace = true
+thread_local.workspace = true
+
+[dev-dependencies]
+criterion.workspace = true
+tikv-jemallocator.workspace = true
+
+[[bench]]
+harness = false
+name = "alloc"
--- a/libs/alloc-metrics/benches/alloc.rs
+++ b/libs/alloc-metrics/benches/alloc.rs
@@ -0,0 +1,110 @@
+use std::alloc::{GlobalAlloc, Layout, System, handle_alloc_error};
+
+use alloc_metrics::TrackedAllocator;
+use criterion::{
+    AxisScale, BenchmarkGroup, BenchmarkId, Criterion, PlotConfiguration, measurement::Measurement,
+};
+use measured::FixedCardinalityLabel;
+use tikv_jemallocator::Jemalloc;
+
+fn main() {
+    let mut c = Criterion::default().configure_from_args();
+    bench(&mut c);
+    c.final_summary();
+}
+
+#[rustfmt::skip]
+fn bench(c: &mut Criterion) {
+    bench_alloc(c.benchmark_group("alloc/system"),  &System, &ALLOC_SYSTEM);
+    bench_alloc(c.benchmark_group("alloc/jemalloc"), &Jemalloc, &ALLOC_JEMALLOC);
+
+    bench_dealloc(c.benchmark_group("dealloc/system"), &System, &ALLOC_SYSTEM);
+    bench_dealloc(c.benchmark_group("dealloc/jemalloc"), &Jemalloc, &ALLOC_JEMALLOC);
+}
+
+#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
+#[label(singleton = "memory_context")]
+pub enum MemoryContext {
+    Root,
+    Test,
+}
+
+static ALLOC_SYSTEM: TrackedAllocator<System, MemoryContext> =
+    unsafe { TrackedAllocator::new(System, MemoryContext::Root) };
+static ALLOC_JEMALLOC: TrackedAllocator<Jemalloc, MemoryContext> =
+    unsafe { TrackedAllocator::new(Jemalloc, MemoryContext::Root) };
+
+const KB: u64 = 1024;
+const SIZES: [u64; 6] = [64, 256, KB, 4 * KB, 16 * KB, KB * KB];
+
+fn bench_alloc<A: GlobalAlloc>(
+    mut g: BenchmarkGroup<'_, impl Measurement>,
+    alloc1: &'static A,
+    alloc2: &'static TrackedAllocator<A, MemoryContext>,
+) {
+    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+    for size in SIZES {
+        let layout = Layout::from_size_align(size as usize, 8).unwrap();
+
+        g.throughput(criterion::Throughput::Bytes(size));
+        g.bench_with_input(BenchmarkId::new("default", size), &layout, |b, &layout| {
+            let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64);
+            b.iter_batched(|| {}, |()| Alloc::new(alloc1, layout), bs);
+        });
+        g.bench_with_input(BenchmarkId::new("tracked", size), &layout, |b, &layout| {
+            let _scope = alloc2.scope(MemoryContext::Test);
+
+            let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64);
+            b.iter_batched(|| {}, |()| Alloc::new(alloc2, layout), bs);
+        });
+    }
+}
+
+fn bench_dealloc<A: GlobalAlloc>(
+    mut g: BenchmarkGroup<'_, impl Measurement>,
+    alloc1: &'static A,
+    alloc2: &'static TrackedAllocator<A, MemoryContext>,
+) {
+    g.plot_config(PlotConfiguration::default().summary_scale(AxisScale::Logarithmic));
+    for size in SIZES {
+        let layout = Layout::from_size_align(size as usize, 8).unwrap();
+
+        g.throughput(criterion::Throughput::Bytes(size));
+        g.bench_with_input(BenchmarkId::new("default", size), &layout, |b, &layout| {
+            let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64);
+            b.iter_batched(|| Alloc::new(alloc1, layout), drop, bs);
+        });
+        g.bench_with_input(BenchmarkId::new("tracked", size), &layout, |b, &layout| {
+            let _scope = alloc2.scope(MemoryContext::Test);
+
+            let bs = criterion::BatchSize::NumBatches(10 + size.ilog2() as u64);
+            b.iter_batched(|| Alloc::new(alloc2, layout), drop, bs);
+        });
+    }
+}
+
+struct Alloc<'a, A: GlobalAlloc> {
+    alloc: &'a A,
+    ptr: *mut u8,
+    layout: Layout,
+}
+
+impl<'a, A: GlobalAlloc> Alloc<'a, A> {
+    fn new(alloc: &'a A, layout: Layout) -> Self {
+        let ptr = unsafe { alloc.alloc(layout) };
+        if ptr.is_null() {
+            handle_alloc_error(layout);
+        }
+
+        // actually make the page resident.
+        unsafe { ptr.cast::<u8>().write(1) };
+
+        Self { alloc, ptr, layout }
+    }
+}
+
+impl<'a, A: GlobalAlloc> Drop for Alloc<'a, A> {
+    fn drop(&mut self) {
+        unsafe { self.alloc.dealloc(self.ptr, self.layout) };
+    }
+}
--- a/libs/alloc-metrics/src/counters.rs
+++ b/libs/alloc-metrics/src/counters.rs
@@ -0,0 +1,48 @@
+use std::marker::PhantomData;
+
+use measured::{
+    FixedCardinalityLabel, LabelGroup, label::StaticLabelSet, metric::MetricFamilyEncoding,
+};
+use metrics::{CounterPairAssoc, Dec, Inc, MeasuredCounterPairState};
+
+use crate::metric_vec::DenseMetricVec;
+
+pub struct DenseCounterPairVec<
+    A: CounterPairAssoc<LabelGroupSet = StaticLabelSet<L>>,
+    L: FixedCardinalityLabel + LabelGroup,
+> {
+    pub vec: DenseMetricVec<MeasuredCounterPairState, L>,
+    pub _marker: PhantomData<A>,
+}
+
+impl<A: CounterPairAssoc<LabelGroupSet = StaticLabelSet<L>>, L: FixedCardinalityLabel + LabelGroup>
+    DenseCounterPairVec<A, L>
+{
+    pub fn new() -> Self {
+        Self {
+            vec: DenseMetricVec::new(),
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<T, A, L> ::measured::metric::group::MetricGroup<T> for DenseCounterPairVec<A, L>
+where
+    T: ::measured::metric::group::Encoding,
+    ::measured::metric::counter::CounterState: ::measured::metric::MetricEncoding<T>,
+    A: CounterPairAssoc<LabelGroupSet = StaticLabelSet<L>>,
+    L: FixedCardinalityLabel + LabelGroup,
+{
+    fn collect_group_into(&self, enc: &mut T) -> Result<(), T::Err> {
+        // write decrement first to avoid a race condition where inc - dec < 0
+        T::write_help(enc, A::DEC_NAME, A::DEC_HELP)?;
+        self.vec
+            .collect_family_into(A::DEC_NAME, &mut Dec(&mut *enc))?;
+
+        T::write_help(enc, A::INC_NAME, A::INC_HELP)?;
+        self.vec
+            .collect_family_into(A::INC_NAME, &mut Inc(&mut *enc))?;
+
+        Ok(())
+    }
+}
--- a/libs/alloc-metrics/src/lib.rs
+++ b/libs/alloc-metrics/src/lib.rs
@@ -0,0 +1,441 @@
+//! Tagged allocator measurements.
+
+mod counters;
+mod metric_vec;
+
+use std::{
+    alloc::{GlobalAlloc, Layout},
+    cell::Cell,
+    marker::PhantomData,
+    sync::{
+        OnceLock,
+        atomic::{AtomicU64, Ordering::Relaxed},
+    },
+};
+
+use measured::{
+    FixedCardinalityLabel, LabelGroup, MetricGroup,
+    label::StaticLabelSet,
+    metric::{MetricEncoding, counter::CounterState, group::Encoding, name::MetricName},
+};
+use metrics::{CounterPairAssoc, MeasuredCounterPairState};
+use thread_local::ThreadLocal;
+
+type AllocCounter<T> = counters::DenseCounterPairVec<AllocPair<T>, T>;
+
+pub struct TrackedAllocator<A, T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup> {
+    inner: A,
+
+    /// potentially high-content fallback if the thread was not registered.
+    default_counters: MeasuredCounterPairState,
+    /// Default tag to use if this thread is not registered.
+    default_tag: T,
+
+    thread: OnceLock<RegisteredThread<T>>,
+
+    /// where thread alloc data is eventually saved to, even if threads are shutdown.
+    global: OnceLock<AllocCounter<T>>,
+}
+
+impl<A, T> TrackedAllocator<A, T>
+where
+    T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup,
+{
+    /// # Safety
+    ///
+    /// [`FixedCardinalityLabel`] must be implemented correctly, fully dense, and must not panic.
+    pub const unsafe fn new(alloc: A, default: T) -> Self {
+        TrackedAllocator {
+            inner: alloc,
+            default_tag: default,
+            default_counters: MeasuredCounterPairState {
+                inc: CounterState {
+                    count: AtomicU64::new(0),
+                },
+                dec: CounterState {
+                    count: AtomicU64::new(0),
+                },
+            },
+            thread: OnceLock::new(),
+            global: OnceLock::new(),
+        }
+    }
+
+    /// Allocations
+    pub fn register_thread(&'static self) {
+        self.register_thread_inner();
+    }
+
+    pub fn scope(&'static self, tag: T) -> AllocScope<'static, T> {
+        let cell = self.register_thread_inner();
+        let last = cell.replace(tag);
+        AllocScope { cell, last }
+    }
+
+    fn register_thread_inner(&'static self) -> &'static Cell<T> {
+        let thread = self.thread.get_or_init(|| RegisteredThread {
+            scope: ThreadLocal::new(),
+            state: ThreadLocal::new(),
+        });
+
+        thread.state.get_or(|| ThreadState {
+            counters: AllocCounter::new(),
+            global: self.global.get_or_init(AllocCounter::new),
+        });
+
+        thread.scope.get_or(|| Cell::new(self.default_tag))
+    }
+}
+
+macro_rules! alloc {
+    ($alloc_fn:ident) => {
+        unsafe fn $alloc_fn(&self, layout: Layout) -> *mut u8 {
+            let Ok((tagged_layout, tag_offset)) = layout.extend(Layout::new::<T>()) else {
+                return std::ptr::null_mut();
+            };
+            let tagged_layout = tagged_layout.pad_to_align();
+
+            // Safety: The layout is not zero-sized.
+            let ptr = unsafe { self.inner.$alloc_fn(tagged_layout) };
+
+            // allocation failed.
+            if ptr.is_null() {
+                return ptr;
+            }
+
+            // We are being very careful here to not allocate or panic.
+            let thread = self.thread.get().map(|s| (s.scope.get(), s.state.get()));
+            let tag = thread.and_then(|t| t.0).map_or(self.default_tag, Cell::get);
+
+            // Allocation successful. Write our tag
+            // Safety: tag_offset is inbounds of the ptr
+            unsafe { ptr.add(tag_offset).cast::<T>().write(tag) }
+
+            let counters = thread.and_then(|t| t.1).map(|s| &s.counters);
+            let metric = if let Some(counters) = counters {
+                counters.vec.get_metric(tag)
+            } else {
+                // if tag is not default, then the thread state would have been registered, therefore tag must be default.
+                &self.default_counters
+            };
+
+            metric.inc.count.fetch_add(layout.size() as u64, Relaxed);
+
+            ptr
+        }
+    };
+}
+
+// We will tag our allocation by adding `T` to the end of the layout.
+// This is ok only as long as it does not overflow. If it does, we will
+// just fail the allocation by returning null.
+//
+// Safety: we will not unwind during alloc, and we will ensure layouts are handled correctly.
+unsafe impl<A, T> GlobalAlloc for TrackedAllocator<A, T>
+where
+    A: GlobalAlloc,
+    T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup,
+{
+    alloc!(alloc);
+    alloc!(alloc_zeroed);
+
+    unsafe fn realloc(&self, ptr: *mut u8, layout: Layout, new_size: usize) -> *mut u8 {
+        // SAFETY: the caller must ensure that the `new_size` does not overflow.
+        // `layout.align()` comes from a `Layout` and is thus guaranteed to be valid.
+        let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, layout.align()) };
+
+        let Ok((new_tagged_layout, new_tag_offset)) = new_layout.extend(Layout::new::<T>()) else {
+            return std::ptr::null_mut();
+        };
+        let new_tagged_layout = new_tagged_layout.pad_to_align();
+
+        let Ok((tagged_layout, tag_offset)) = layout.extend(Layout::new::<T>()) else {
+            // Safety: This layout clearly did not match what was originally allocated,
+            // otherwise alloc() would have caught this error and returned null.
+            unsafe { std::hint::unreachable_unchecked() }
+        };
+        let tagged_layout = tagged_layout.pad_to_align();
+
+        // get the tag set during alloc
+        // Safety: tag_offset is inbounds of the ptr
+        let tag = unsafe { ptr.add(tag_offset).cast::<T>().read() };
+
+        // Safety: layout sizes are correct
+        let new_ptr = unsafe {
+            self.inner
+                .realloc(ptr, tagged_layout, new_tagged_layout.size())
+        };
+
+        // allocation failed.
+        if new_ptr.is_null() {
+            return new_ptr;
+        }
+
+        // We are being very careful here to not allocate or panic.
+        let thread = self.thread.get().map(|s| (s.scope.get(), s.state.get()));
+        let new_tag = thread.and_then(|t| t.0).map_or(self.default_tag, Cell::get);
+
+        // Allocation successful. Write our tag
+        // Safety: new_tag_offset is inbounds of the ptr
+        unsafe { new_ptr.add(new_tag_offset).cast::<T>().write(new_tag) }
+
+        let counters = thread.and_then(|t| t.1).map(|s| &s.counters);
+        let counters = counters.or_else(|| self.global.get());
+        let (new_metric, old_metric) = if let Some(counters) = counters {
+            let new_metric = counters.vec.get_metric(new_tag);
+            let old_metric = counters.vec.get_metric(tag);
+
+            (new_metric, old_metric)
+        } else {
+            // no tag was registered at all, therefore both tags must be default.
+            (&self.default_counters, &self.default_counters)
+        };
+
+        let (inc, dec) = if tag.encode() != new_tag.encode() {
+            (new_layout.size() as u64, layout.size() as u64)
+        } else if new_layout.size() > layout.size() {
+            ((new_layout.size() - layout.size()) as u64, 0)
+        } else {
+            (0, (layout.size() - new_layout.size()) as u64)
+        };
+
+        new_metric.inc.count.fetch_add(inc, Relaxed);
+        old_metric.dec.count.fetch_add(dec, Relaxed);
+
+        new_ptr
+    }
+
+    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
+        let Ok((tagged_layout, tag_offset)) = layout.extend(Layout::new::<T>()) else {
+            // Safety: This layout clearly did not match what was originally allocated,
+            // otherwise alloc() would have caught this error and returned null.
+            unsafe { std::hint::unreachable_unchecked() }
+        };
+        let tagged_layout = tagged_layout.pad_to_align();
+
+        // get the tag set during alloc
+        // Safety: tag_offset is inbounds of the ptr
+        let tag = unsafe { ptr.add(tag_offset).cast::<T>().read() };
+
+        // Safety: caller upholds contract for us
+        unsafe { self.inner.dealloc(ptr, tagged_layout) }
+
+        // We are being very careful here to not allocate or panic.
+        let thread = self.thread.get().map(|s| (s.scope.get(), s.state.get()));
+        let counters = thread.and_then(|t| t.1).map(|s| &s.counters);
+        let counters = counters.or_else(|| self.global.get());
+
+        let metric = if let Some(counters) = counters {
+            counters.vec.get_metric(tag)
+        } else {
+            // if tag is not default, then global would have been registered, therefore tag must be default.
+            &self.default_counters
+        };
+
+        metric.dec.count.fetch_add(layout.size() as u64, Relaxed);
+    }
+}
+
+pub struct AllocScope<'a, T: FixedCardinalityLabel> {
+    cell: &'a Cell<T>,
+    last: T,
+}
+
+impl<'a, T: FixedCardinalityLabel> Drop for AllocScope<'a, T> {
+    fn drop(&mut self) {
+        self.cell.set(self.last);
+    }
+}
+
+struct AllocPair<T>(PhantomData<T>);
+
+impl<T: FixedCardinalityLabel + LabelGroup> CounterPairAssoc for AllocPair<T> {
+    const INC_NAME: &'static MetricName = MetricName::from_str("allocated_bytes");
+    const DEC_NAME: &'static MetricName = MetricName::from_str("deallocated_bytes");
+
+    const INC_HELP: &'static str = "total number of bytes allocated";
+    const DEC_HELP: &'static str = "total number of bytes deallocated";
+
+    type LabelGroupSet = StaticLabelSet<T>;
+}
+
+struct RegisteredThread<T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup> {
+    /// Current memory context for this thread.
+    scope: ThreadLocal<Cell<T>>,
+    /// per thread state containing low contention counters for faster allocations.
+    state: ThreadLocal<ThreadState<T>>,
+}
+
+struct ThreadState<T: 'static + FixedCardinalityLabel + LabelGroup> {
+    counters: AllocCounter<T>,
+    global: &'static AllocCounter<T>,
+}
+
+// Ensure the counters are measured on thread destruction.
+impl<T: 'static + FixedCardinalityLabel + LabelGroup> Drop for ThreadState<T> {
+    fn drop(&mut self) {
+        // iterate over all labels
+        for tag in (0..T::cardinality()).map(T::decode) {
+            // load and reset the counts in the thread-local counters.
+            let m = self.counters.vec.get_metric_mut(tag);
+            let inc = *m.inc.count.get_mut();
+            let dec = *m.dec.count.get_mut();
+
+            // add the counts into the global counters.
+            let m = self.global.vec.get_metric(tag);
+            m.inc.count.fetch_add(inc, Relaxed);
+            m.dec.count.fetch_add(dec, Relaxed);
+        }
+    }
+}
+
+impl<A, T, Enc> MetricGroup<Enc> for TrackedAllocator<A, T>
+where
+    T: 'static + Send + Sync + FixedCardinalityLabel + LabelGroup,
+    Enc: Encoding,
+    CounterState: MetricEncoding<Enc>,
+{
+    fn collect_group_into(&self, enc: &mut Enc) -> Result<(), Enc::Err> {
+        let global = self.global.get_or_init(AllocCounter::new);
+
+        // iterate over all counter threads
+        for s in self.thread.get().into_iter().flat_map(|s| s.state.iter()) {
+            // iterate over all labels
+            for tag in (0..T::cardinality()).map(T::decode) {
+                sample(global, s.counters.vec.get_metric(tag), tag);
+            }
+        }
+
+        sample(global, &self.default_counters, self.default_tag);
+
+        global.collect_group_into(enc)
+    }
+}
+
+fn sample<T: FixedCardinalityLabel + LabelGroup>(
+    global: &AllocCounter<T>,
+    local: &MeasuredCounterPairState,
+    tag: T,
+) {
+    // load and reset the counts in the thread-local counters.
+    let inc = local.inc.count.swap(0, Relaxed);
+    let dec = local.dec.count.swap(0, Relaxed);
+
+    // add the counts into the global counters.
+    let m = global.vec.get_metric(tag);
+    m.inc.count.fetch_add(inc, Relaxed);
+    m.dec.count.fetch_add(dec, Relaxed);
+}
+
+#[cfg(test)]
+mod tests {
+    use std::alloc::{GlobalAlloc, Layout, System};
+
+    use measured::{FixedCardinalityLabel, MetricGroup, text::BufferedTextEncoder};
+
+    use crate::TrackedAllocator;
+
+    #[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
+    #[label(singleton = "memory_context")]
+    pub enum MemoryContext {
+        Root,
+        Test,
+    }
+
+    #[test]
+    fn alloc() {
+        // Safety: `MemoryContext` upholds the safety requirements.
+        static GLOBAL: TrackedAllocator<System, MemoryContext> =
+            unsafe { TrackedAllocator::new(System, MemoryContext::Root) };
+
+        GLOBAL.register_thread();
+
+        let _test = GLOBAL.scope(MemoryContext::Test);
+
+        let ptr = unsafe { GLOBAL.alloc(Layout::for_value(&[0_i32])) };
+        let ptr = unsafe { GLOBAL.realloc(ptr, Layout::for_value(&[0_i32]), 8) };
+
+        drop(_test);
+
+        let ptr = unsafe { GLOBAL.realloc(ptr, Layout::for_value(&[0_i32, 1_i32]), 4) };
+        unsafe { GLOBAL.dealloc(ptr, Layout::for_value(&[0_i32])) };
+
+        let mut text = BufferedTextEncoder::new();
+        GLOBAL.collect_group_into(&mut text).unwrap();
+        let text = String::from_utf8(text.finish().into()).unwrap();
+        assert_eq!(
+            text,
+            r#"# HELP deallocated_bytes total number of bytes deallocated
+# TYPE deallocated_bytes counter
+deallocated_bytes{memory_context="root"} 4
+deallocated_bytes{memory_context="test"} 8
+
+# HELP allocated_bytes total number of bytes allocated
+# TYPE allocated_bytes counter
+allocated_bytes{memory_context="root"} 4
+allocated_bytes{memory_context="test"} 8
+"#
+        );
+    }
+
+    #[test]
+    fn unregistered_thread() {
+        // Safety: `MemoryContext` upholds the safety requirements.
+        static GLOBAL: TrackedAllocator<System, MemoryContext> =
+            unsafe { TrackedAllocator::new(System, MemoryContext::Root) };
+
+        GLOBAL.register_thread();
+
+        // unregistered thread
+        std::thread::spawn(|| {
+            let ptr = unsafe { GLOBAL.alloc(Layout::for_value(&[0_i32])) };
+            unsafe { GLOBAL.dealloc(ptr, Layout::for_value(&[0_i32])) };
+        })
+        .join()
+        .unwrap();
+
+        let mut text = BufferedTextEncoder::new();
+        GLOBAL.collect_group_into(&mut text).unwrap();
+        let text = String::from_utf8(text.finish().into()).unwrap();
+        assert_eq!(
+            text,
+            r#"# HELP deallocated_bytes total number of bytes deallocated
+# TYPE deallocated_bytes counter
+deallocated_bytes{memory_context="root"} 4
+deallocated_bytes{memory_context="test"} 0
+
+# HELP allocated_bytes total number of bytes allocated
+# TYPE allocated_bytes counter
+allocated_bytes{memory_context="root"} 4
+allocated_bytes{memory_context="test"} 0
+"#
+        );
+    }
+
+    #[test]
+    fn fully_unregistered() {
+        // Safety: `MemoryContext` upholds the safety requirements.
+        static GLOBAL: TrackedAllocator<System, MemoryContext> =
+            unsafe { TrackedAllocator::new(System, MemoryContext::Root) };
+
+        let ptr = unsafe { GLOBAL.alloc(Layout::for_value(&[0_i32])) };
+        unsafe { GLOBAL.dealloc(ptr, Layout::for_value(&[0_i32])) };
+
+        let mut text = BufferedTextEncoder::new();
+        GLOBAL.collect_group_into(&mut text).unwrap();
+        let text = String::from_utf8(text.finish().into()).unwrap();
+        assert_eq!(
+            text,
+            r#"# HELP deallocated_bytes total number of bytes deallocated
+# TYPE deallocated_bytes counter
+deallocated_bytes{memory_context="root"} 4
+deallocated_bytes{memory_context="test"} 0
+
+# HELP allocated_bytes total number of bytes allocated
+# TYPE allocated_bytes counter
+allocated_bytes{memory_context="root"} 4
+allocated_bytes{memory_context="test"} 0
+"#
+        );
+    }
+}
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`ALTER ROLE {privileged_role_name} BYPASSRLS;`
				`@@ -1 +0,0 @@`
				`GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;`
				`@@ -0,0 +1 @@`
				`GRANT pg_monitor TO {privileged_role_name} WITH ADMIN OPTION;`
				`@@ -1 +0,0 @@`
				`GRANT EXECUTE ON FUNCTION pg_show_replication_origin_status TO neon_superuser;`
				`@@ -0,0 +1 @@`
				`GRANT pg_signal_backend TO {privileged_role_name} WITH ADMIN OPTION;`