Notes on cutover

rename
docs: add hot secondaries rfc
2026-05-17 05:00:38 +00:00 · 2025-03-14 19:19:28 +00:00 · 2025-03-14 15:33:23 +00:00 · 2025-03-13 17:22:56 +00:00 · 2025-03-04 08:18:19 +00:00 · 2025-03-03 22:04:59 +00:00
568 changed files with 15232 additions and 9154 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,6 +14,7 @@
 !compute/
 !compute_tools/
 !control_plane/
+!docker-compose/ext-src
 !libs/
 !pageserver/
 !pgxn/
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -32,3 +32,4 @@ config-variables:
  - NEON_DEV_AWS_ACCOUNT_ID
  - NEON_PROD_AWS_ACCOUNT_ID
  - AWS_ECR_REGION
+  - BENCHMARK_LARGE_OLTP_PROJECTID
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -38,9 +38,11 @@ runs:
    #
    - name: Set variables
      shell: bash -euxo pipefail {0}
+      env:
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        BUCKET: neon-github-public-dev
      run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
+        if [ -n "${PR_NUMBER}" ]; then
          BRANCH_OR_PR=pr-${PR_NUMBER}
        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
             [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
@@ -59,8 +61,6 @@ runs:
        echo "LOCK_FILE=${LOCK_FILE}"       >> $GITHUB_ENV
        echo "WORKDIR=${WORKDIR}"           >> $GITHUB_ENV
        echo "BUCKET=${BUCKET}"             >> $GITHUB_ENV
-      env:
-        BUCKET: neon-github-public-dev

    # TODO: We can replace with a special docker image with Java and Allure pre-installed
    - uses: actions/setup-java@v4
@@ -80,8 +80,8 @@ runs:
          rm -f ${ALLURE_ZIP}
        fi
      env:
-        ALLURE_VERSION: 2.27.0
-        ALLURE_ZIP_SHA256: b071858fb2fa542c65d8f152c5c40d26267b2dfb74df1f1608a589ecca38e777
+        ALLURE_VERSION: 2.32.2
+        ALLURE_ZIP_SHA256: 3f28885e2118f6317c92f667eaddcc6491400af1fb9773c1f3797a5fa5174953

    - uses: aws-actions/configure-aws-credentials@v4
      if: ${{ !cancelled() }}
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -18,9 +18,11 @@ runs:
  steps:
    - name: Set variables
      shell: bash -euxo pipefail {0}
+      env:
+        PR_NUMBER: ${{ github.event.pull_request.number }}
+        REPORT_DIR: ${{ inputs.report-dir }}
      run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
+        if [ -n "${PR_NUMBER}" ]; then
          BRANCH_OR_PR=pr-${PR_NUMBER}
        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ] || \
             [ "${GITHUB_REF_NAME}" = "release-proxy" ] || [ "${GITHUB_REF_NAME}" = "release-compute" ]; then
@@ -32,8 +34,6 @@ runs:

        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
        echo "REPORT_DIR=${REPORT_DIR}"     >> $GITHUB_ENV
-      env:
-        REPORT_DIR: ${{ inputs.report-dir }}

    - uses: aws-actions/configure-aws-credentials@v4
      if: ${{ !cancelled() }}
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -84,7 +84,13 @@ runs:
          --header "Authorization: Bearer ${API_KEY}"
          )

-        role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name')
+        role_name=$(echo "$roles" | jq --raw-output '
+          (.roles | map(select(.protected == false))) as $roles |
+          if any($roles[]; .name == "neondb_owner")
+          then "neondb_owner"
+          else $roles[0].name
+          end
+        ')
        echo "role_name=${role_name}" >> $GITHUB_OUTPUT
      env:
        API_HOST: ${{ inputs.api_host }}
@@ -107,13 +113,13 @@ runs:
            )

          if [ -z "${reset_password}" ]; then
-            sleep 1
+            sleep $i
            continue
          fi

          password=$(echo $reset_password | jq --raw-output '.role.password')
          if [ "${password}" == "null" ]; then
-            sleep 1
+            sleep $i # increasing backoff
            continue
          fi

--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -44,6 +44,11 @@ inputs:
    description: 'Postgres version to use for tests'
    required: false
    default: 'v16'
+  sanitizers:
+    description: 'enabled or disabled'
+    required: false
+    default: 'disabled'
+    type: string
  benchmark_durations:
    description: 'benchmark durations JSON'
    required: false
@@ -59,7 +64,7 @@ runs:
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
        path: /tmp/neon
        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}

@@ -112,6 +117,7 @@ runs:
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FAILED: ${{ inputs.rerun_failed }}
        PG_VERSION: ${{ inputs.pg_version }}
+        SANITIZERS: ${{ inputs.sanitizers }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
@@ -236,5 +242,5 @@ runs:
      uses: ./.github/actions/allure-report-store
      with:
        report-dir: /tmp/test_output/allure/results
-        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}
+        unique-key: ${{ inputs.build_type }}-${{ inputs.pg_version }}-${{ runner.arch }}
        aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }}
--- a/.github/scripts/generate_image_maps.py
+++ b/.github/scripts/generate_image_maps.py
@@ -27,6 +27,7 @@ components = {
 registries = {
    "dev": [
        "docker.io/neondatabase",
+        "ghcr.io/neondatabase",
        f"{dev_aws}.dkr.ecr.{aws_region}.amazonaws.com",
        f"{dev_acr}.azurecr.io/neondatabase",
    ],
--- a/.github/scripts/previous-releases.jq
+++ b/.github/scripts/previous-releases.jq
@@ -0,0 +1,25 @@
+# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input,
+# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases.
+# Extract only the `tag_name` field from each release object
+[ .[].tag_name ]
+
+# Transform each tag name into a structured object using regex capture
+| reduce map(
+    capture("^(?<full>release(-(?<component>proxy|compute))?-(?<version>\\d+))$")
+    | {
+        component: (.component // "storage"),  # Default to "storage" if no component is specified
+        version: (.version | tonumber),        # Convert the version number to an integer
+        full: .full                            # Store the full tag name for final output
+      }
+  )[] as $entry  # Loop over the transformed list
+
+# Accumulate the latest (highest-numbered) version for each component
+({};
+ .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end))
+
+# Convert the resulting object into an array of formatted strings
+| to_entries
+| map("\(.key)=\(.value.full)")
+
+# Output each string separately
+| .[]
--- a/.github/scripts/push_with_image_map.py
+++ b/.github/scripts/push_with_image_map.py
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -280,7 +280,7 @@ jobs:
      - name: Upload Neon artifact
        uses: ./.github/actions/upload
        with:
-          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
+          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
          path: /tmp/neon
          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

@@ -337,7 +337,7 @@ jobs:
      - name: Pytest regression tests
        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
        uses: ./.github/actions/run-python-test-set
-        timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 60 || 180 }}
+        timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }}
        with:
          build_type: ${{ inputs.build-type }}
          test_selection: regress
@@ -347,6 +347,7 @@ jobs:
          real_s3_region: eu-central-1
          rerun_failed: true
          pg_version: ${{ matrix.pg_version }}
+          sanitizers: ${{ inputs.sanitizers }}
          aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
          # Attempt to stop tests gracefully to generate test reports
@@ -359,7 +360,6 @@ jobs:
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task
          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}
-          SANITIZERS: ${{ inputs.sanitizers }}

      # Temporary disable this step until we figure out why it's so flaky
      # Ref https://github.com/neondatabase/neon/issues/4540
--- a/.github/workflows/_meta.yml
+++ b/.github/workflows/_meta.yml
@@ -0,0 +1,103 @@
+name: Generate run metadata
+on:
+  workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true
+    outputs:
+      build-tag:
+        description: "Tag for the current workflow run"
+        value: ${{ jobs.tags.outputs.build-tag }}
+      previous-storage-release:
+        description: "Tag of the last storage release"
+        value: ${{ jobs.tags.outputs.storage }}
+      previous-proxy-release:
+        description: "Tag of the last proxy release"
+        value: ${{ jobs.tags.outputs.proxy }}
+      previous-compute-release:
+        description: "Tag of the last compute release"
+        value: ${{ jobs.tags.outputs.compute }}
+      run-kind:
+        description: "The kind of run we're currently in. Will be one of `pr`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`"
+        value: ${{ jobs.tags.outputs.run-kind }}
+
+permissions: {}
+
+jobs:
+  tags:
+    runs-on: ubuntu-22.04
+    outputs:
+      build-tag: ${{ steps.build-tag.outputs.tag }}
+      compute: ${{ steps.previous-releases.outputs.compute }}
+      proxy: ${{ steps.previous-releases.outputs.proxy }}
+      storage: ${{ steps.previous-releases.outputs.storage }}
+      run-kind: ${{ steps.run-kind.outputs.run-kind }}
+    permissions:
+      contents: read
+    steps:
+      # Need `fetch-depth: 0` to count the number of commits in the branch
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Get run kind
+        id: run-kind
+        env:
+          RUN_KIND: >-
+            ${{
+              false
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'main')            && 'push-main'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release')         && 'storage-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-compute') && 'compute-release'
+              || (inputs.github-event-name == 'push'         && github.ref_name == 'release-proxy')   && 'proxy-release'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release')         && 'storage-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr'
+              || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy')   && 'proxy-rc-pr'
+              || (inputs.github-event-name == 'pull_request')                                         && 'pr'
+              || 'unknown'
+            }}
+        run: |
+          echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT
+
+      - name: Get build tag
+        id: build-tag
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
+          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          RUN_KIND: ${{ steps.run-kind.outputs.run-kind }}
+        run: |
+          case $RUN_KIND in
+          push-main)
+            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          storage-release)
+            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          proxy-release)
+            echo "tag=release-proxy-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          compute-release)
+            echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
+            ;;
+          pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr)
+            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
+            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
+            ;;
+          *)
+            echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!"
+            exit 1
+          esac
+
+      - name: Get the previous release-tags
+        id: previous-releases
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api --paginate \
+            -H "Accept: application/vnd.github+json" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            "/repos/${GITHUB_REPOSITORY}/releases" \
+          | jq -f .github/scripts/previous-releases.jq -r \
+          | tee -a "${GITHUB_OUTPUT}"
--- a/.github/workflows/_push-to-container-registry.yml
+++ b/.github/workflows/_push-to-container-registry.yml
@@ -11,8 +11,12 @@ on:
        description: AWS region to log in to. Required when pushing to ECR.
        required: false
        type: string
-      aws-account-ids:
-        description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR.
+      aws-account-id:
+        description: AWS account ID to log in to for pushing to ECR. Required when pushing to ECR.
+        required: false
+        type: string
+      aws-role-to-assume:
+        description: AWS role to assume to for pushing to ECR. Required when pushing to ECR.
        required: false
        type: string
      azure-client-id:
@@ -31,16 +35,6 @@ on:
        description: ACR registry name. Required when pushing to ACR.
        required: false
        type: string
-    secrets:
-      docker-hub-username:
-        description: Docker Hub username. Required when pushing to Docker Hub.
-        required: false
-      docker-hub-password:
-        description: Docker Hub password. Required when pushing to Docker Hub.
-        required: false
-      aws-role-to-assume:
-        description: AWS role to assume. Required when pushing to ECR.
-        required: false

 permissions: {}

@@ -53,10 +47,11 @@ jobs:
    runs-on: ubuntu-22.04
    permissions:
      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
    steps:
      - uses: actions/checkout@v4
        with:
-          sparse-checkout: scripts/push_with_image_map.py
+          sparse-checkout: .github/scripts/push_with_image_map.py
          sparse-checkout-cone-mode: false

      - name: Print image-map
@@ -67,14 +62,14 @@ jobs:
        uses: aws-actions/configure-aws-credentials@v4
        with:
          aws-region: "${{ inputs.aws-region }}"
-          role-to-assume: "${{ secrets.aws-role-to-assume }}"
+          role-to-assume: "arn:aws:iam::${{ inputs.aws-account-id }}:role/${{ inputs.aws-role-to-assume }}"
          role-duration-seconds: 3600

      - name: Login to ECR
        if: contains(inputs.image-map, 'amazonaws.com/')
        uses: aws-actions/amazon-ecr-login@v2
        with:
-          registries: "${{ inputs.aws-account-ids }}"
+          registries: "${{ inputs.aws-account-id }}"

      - name: Configure Azure credentials
        if: contains(inputs.image-map, 'azurecr.io/')
@@ -89,13 +84,21 @@ jobs:
        run: |
          az acr login --name=${{ inputs.acr-registry-name }}

+      - name: Login to GHCR
+        if: contains(inputs.image-map, 'ghcr.io/')
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
      - name: Log in to Docker Hub
        uses: docker/login-action@v3
        with:
-          username: ${{ secrets.docker-hub-username }}
-          password: ${{ secrets.docker-hub-password }}
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

      - name: Copy docker images to target registries
-        run: python scripts/push_with_image_map.py
+        run: python3 .github/scripts/push_with_image_map.py
        env:
          IMAGE_MAP: ${{ inputs.image-map }}
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -140,6 +140,7 @@ jobs:
          --ignore test_runner/performance/test_logical_replication.py
          --ignore test_runner/performance/test_physical_replication.py
          --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py
+          --ignore test_runner/performance/test_cumulative_statistics_persistence.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -171,6 +172,61 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

+  cumstats-test:
+    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      DEFAULT_PG_VERSION: 17
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
+      PLATFORM: "neon-staging"
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Configure AWS credentials
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+    
+    - name: Verify that cumulative statistics are preserved
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance/test_cumulative_statistics_persistence.py
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 3600
+        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
+
  replication-tests:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    permissions:
@@ -398,6 +454,9 @@ jobs:
    runs-on: ${{ matrix.runner }}
    container:
      image: ${{ matrix.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

    # Increase timeout to 8h, default timeout is 6h
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -65,38 +65,11 @@ jobs:
          token: ${{ secrets.GITHUB_TOKEN }}
          filters: .github/file-filters.yaml

-  tag:
+  meta:
    needs: [ check-permissions ]
-    runs-on: [ self-hosted, small ]
-    container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned
-    outputs:
-      build-tag: ${{steps.build-tag.outputs.tag}}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        run: |
-          echo run:$GITHUB_RUN_ID
-          echo ref:$GITHUB_REF_NAME
-          echo rev:$(git rev-list --count HEAD)
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'"
-            echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT
-          fi
-        shell: bash
-        id: build-tag
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ github.event_name }}

  build-build-tools-image:
    needs: [ check-permissions ]
@@ -199,7 +172,7 @@ jobs:
    secrets: inherit

  build-and-test-locally:
-    needs: [ tag, build-build-tools-image ]
+    needs: [ meta, build-build-tools-image ]
    strategy:
      fail-fast: false
      matrix:
@@ -213,7 +186,7 @@ jobs:
    with:
      arch: ${{ matrix.arch }}
      build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm
-      build-tag: ${{ needs.tag.outputs.build-tag }}
+      build-tag: ${{ needs.meta.outputs.build-tag }}
      build-type: ${{ matrix.build-type }}
      # Run tests on all Postgres versions in release builds and only on the latest version in debug builds.
      # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled.
@@ -497,13 +470,24 @@ jobs:
            })

  trigger-e2e-tests:
-    if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }}
-    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ]
+    # Depends on jobs that can get skipped
+    if: >-
+      ${{
+        (
+          !github.event.pull_request.draft
+          || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft')
+          || contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind)
+        ) && !failure() && !cancelled()
+      }}
+    needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ]
    uses: ./.github/workflows/trigger-e2e-tests.yml
+    with:
+      github-event-name: ${{ github.event_name }}
    secrets: inherit

  neon-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    strategy:
      matrix:
        arch: [ x64, arm64 ]
@@ -539,7 +523,7 @@ jobs:
          build-args: |
            ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }}
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm
            DEBIAN_VERSION=bookworm
          provenance: false
@@ -549,10 +533,11 @@ jobs:
          cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }}
          tags: |
-            neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }}
+            neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }}

  neon-image:
-    needs: [ neon-image-arch, tag ]
+    needs: [ neon-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
@@ -567,13 +552,14 @@ jobs:

      - name: Create multi-arch image
        run: |
-          docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \
-                                             neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64
+          docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \
+                                             neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64

  compute-node-image-arch:
-    needs: [ check-permissions, build-build-tools-image, tag ]
+    needs: [ check-permissions, build-build-tools-image, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -631,7 +617,7 @@ jobs:
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
            PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
            DEBIAN_VERSION=${{ matrix.version.debian }}
          provenance: false
@@ -641,7 +627,7 @@ jobs:
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }}
          tags: |
-            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }}

      - name: Build neon extensions test image
        if: matrix.version.pg >= 'v16'
@@ -651,7 +637,7 @@ jobs:
          build-args: |
            GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
            PG_VERSION=${{ matrix.version.pg }}
-            BUILD_TAG=${{ needs.tag.outputs.build-tag }}
+            BUILD_TAG=${{ needs.meta.outputs.build-tag }}
            TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }}
            DEBIAN_VERSION=${{ matrix.version.debian }}
          provenance: false
@@ -661,10 +647,11 @@ jobs:
          target: extension-tests
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }}
          tags: |
-            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}
+            neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }}

  compute-node-image:
-    needs: [ compute-node-image-arch, tag ]
+    needs: [ compute-node-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -692,27 +679,28 @@ jobs:

      - name: Create multi-arch compute-node image
        run: |
-          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

      - name: Create multi-arch neon-test-extensions image
        if: matrix.version.pg >= 'v16'
        run: |
-          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
-                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64
+          docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                          -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \
+                                             neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64

-  vm-compute-node-image:
-    needs: [ check-permissions, tag, compute-node-image ]
-    runs-on: [ self-hosted, large ]
+  vm-compute-node-image-arch:
+    needs: [ check-permissions, meta, compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
    strategy:
      fail-fast: false
      matrix:
+        arch: [ amd64, arm64 ]
        version:
-          # see the comment for `compute-node-image-arch` job
          - pg: v14
            debian: bullseye
          - pg: v15
@@ -722,14 +710,14 @@ jobs:
          - pg: v17
            debian: bookworm
    env:
-      VM_BUILDER_VERSION: v0.37.1
+      VM_BUILDER_VERSION: v0.42.2

    steps:
      - uses: actions/checkout@v4

      - name: Downloading vm-builder
        run: |
-          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-${{ matrix.arch }} -o vm-builder
          chmod +x vm-builder

      - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193
@@ -742,22 +730,50 @@ jobs:
      # it won't have the proper authentication (written at v0.6.0)
      - name: Pulling compute-node image
        run: |
-          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}

      - name: Build vm image
        run: |
          ./vm-builder \
            -size=2G \
            -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \
-            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \
-            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+            -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+            -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \
+            -target-arch=linux/${{ matrix.arch }}

      - name: Pushing vm-compute-node image
        run: |
-          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}
+          docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }}
+
+  vm-compute-node-image:
+    needs: [ vm-compute-node-image-arch, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
+    runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        version:
+          # see the comment for `compute-node-image-arch` job
+          - pg: v14
+          - pg: v15
+          - pg: v16
+          - pg: v17
+    steps:
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+
+      - name: Create multi-arch compute-node image
+        run: |
+          docker buildx imagetools create -t neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \
+                                             neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \
+                                             neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64
+

  test-images:
-    needs: [ check-permissions, tag, neon-image, compute-node-image ]
+    needs: [ check-permissions, meta, neon-image, compute-node-image ]
+    # Depends on jobs that can get skipped
+    if: "!failure() && !cancelled()"
    strategy:
      fail-fast: false
      matrix:
@@ -775,17 +791,6 @@ jobs:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - name: Get the last compute release tag
-        id: get-last-compute-release-tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\
-            -H "Accept: application/vnd.github+json" \
-            -H "X-GitHub-Api-Version: 2022-11-28" \
-            "/repos/${{ github.repository }}/releases")
-          echo tag=${tag} >> ${GITHUB_OUTPUT}
-
      # `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library.
      # Pick pageserver as currently the only binary with extra "version" features printed in the string to verify.
      # Regular pageserver version string looks like
@@ -795,8 +800,9 @@ jobs:
      # Ensure that we don't have bad versions.
      - name: Verify image versions
        shell: bash # ensure no set -e for better error messages
+        if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
        run: |
-          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")
+          pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version")

          echo "Pageserver version string: $pageserver_version"

@@ -813,7 +819,24 @@ jobs:
      - name: Verify docker-compose example and test extensions
        timeout-minutes: 20
        env:
-          TAG: ${{needs.tag.outputs.build-tag}}
+          TAG: >-
+            ${{
+              contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.build-tag
+            }}
+          COMPUTE_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && needs.meta.outputs.previous-compute-release
+              || needs.meta.outputs.build-tag
+            }}
+          TEST_EXTENSIONS_TAG: >-
+            ${{
+              contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+              && 'latest'
+              || needs.meta.outputs.build-tag
+            }}
          TEST_VERSION_ONLY: ${{ matrix.pg_version }}
        run: ./docker-compose/docker_compose_test.sh

@@ -825,10 +848,17 @@ jobs:

      - name: Test extension upgrade
        timeout-minutes: 20
-        if: ${{ needs.tag.outputs.build-tag == github.run_id }}
+        if: ${{ contains(fromJSON('["pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
        env:
-          NEWTAG: ${{ needs.tag.outputs.build-tag }}
-          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          TAG: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag
+              || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release
+            }}
+          TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }}
+          NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }}
+          OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }}
        run: ./docker-compose/test_extensions_upgrade.sh

      - name: Print logs and clean up
@@ -838,7 +868,7 @@ jobs:
          docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down

  generate-image-maps:
-    needs: [ tag ]
+    needs: [ meta ]
    runs-on: ubuntu-22.04
    outputs:
      neon-dev: ${{ steps.generate.outputs.neon-dev }}
@@ -848,14 +878,14 @@ jobs:
    steps:
      - uses: actions/checkout@v4
        with:
-          sparse-checkout: scripts/generate_image_maps.py
+          sparse-checkout: .github/scripts/generate_image_maps.py
          sparse-checkout-cone-mode: false

      - name: Generate Image Maps
        id: generate
-        run: python scripts/generate_image_maps.py
+        run: python3 .github/scripts/generate_image_maps.py
        env:
-          BUILD_TAG: "${{ needs.tag.outputs.build-tag }}"
+          BUILD_TAG: "${{ needs.meta.outputs.build-tag }}"
          BRANCH: "${{ github.ref_name }}"
          DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}"
          PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}"
@@ -864,88 +894,95 @@ jobs:
          AWS_REGION: "${{ vars.AWS_ECR_REGION }}"

  push-neon-image-dev:
-    needs: [ generate-image-maps, neon-image ]
+    needs: [ meta, generate-image-maps, neon-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
    with:
      image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}'
      aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit

  push-compute-image-dev:
-    needs: [ generate-image-maps, vm-compute-node-image ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
    with:
      image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}'
      aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit

  push-neon-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, neon-image, test-images ]
+    needs: [ meta, generate-image-maps, neon-image, test-images ]
+    # Depends on jobs that can get skipped
+    if: ${{ !failure() && !cancelled() && contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }}
    uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
    with:
      image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}'
      aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit

  push-compute-image-prod:
-    if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
-    needs: [ generate-image-maps, vm-compute-node-image, test-images ]
+    needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ]
+    # Depends on jobs that can get skipped
+    if: ${{ !failure() && !cancelled() && needs.meta.outputs.run-kind == 'compute-release' }}
    uses: ./.github/workflows/_push-to-container-registry.yml
+    permissions:
+      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR
    with:
      image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}'
      aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
      azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }}
      azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }}
      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
      acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit

  # This is a bit of a special case so we're not using a generated image map.
  add-latest-tag-to-neon-extensions-test-image:
    if: github.ref_name == 'main'
-    needs: [ tag, compute-node-image ]
+    needs: [ meta, compute-node-image ]
    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
      image-map: |
        {
-          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
-          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
+          "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"],
+          "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"]
        }
-    secrets:
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit

  trigger-custom-extensions-build-and-wait:
-    needs: [ check-permissions, tag ]
+    needs: [ check-permissions, meta ]
+    if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }}
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
@@ -980,7 +1017,7 @@ jobs:
                \"ci_job_name\": \"build-and-upload-extensions\",
                \"commit_hash\": \"$COMMIT_SHA\",
                \"remote_repo\": \"${{ github.repository }}\",
-                \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\",
+                \"compute_image_tag\": \"${{ needs.meta.outputs.build-tag }}\",
                \"remote_branch_name\": \"${{ github.ref_name }}\"
              }
            }"
@@ -1024,9 +1061,9 @@ jobs:
          exit 1

  deploy:
-    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
-    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
-    if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled()
+    needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ]
+    # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
+    if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
@@ -1037,108 +1074,103 @@ jobs:
      - uses: actions/checkout@v4

      - name: Create git tag and GitHub release
-        if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute'
+        if: ${{ contains(fromJSON('["storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) }}
        uses: actions/github-script@v7
+        env:
+          TAG: "${{ needs.meta.outputs.build-tag }}"
+          BRANCH: "${{ github.ref_name }}"
+          PREVIOUS_RELEASE: >-
+            ${{
+              false
+              || needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release
+              || needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release
+              || needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release
+              || 'unknown'
+            }}
        with:
          retries: 5
          script: |
-            const tag = "${{ needs.tag.outputs.build-tag }}";
-            const branch = "${{ github.ref_name }}";
+            const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env

            try {
              const existingRef = await github.rest.git.getRef({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                ref: `tags/${tag}`,
+                ref: `tags/${TAG}`,
              });

              if (existingRef.data.object.sha !== context.sha) {
-                throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
+                throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`);
              }

-              console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`);
+              console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`);
            } catch (error) {
              if (error.status !== 404) {
                throw error;
              }

-              console.log(`Tag ${tag} does not exist. Creating it...`);
+              console.log(`Tag ${TAG} does not exist. Creating it...`);
              await github.rest.git.createRef({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                ref: `refs/tags/${tag}`,
+                ref: `refs/tags/${TAG}`,
                sha: context.sha,
              });
-              console.log(`Tag ${tag} created successfully.`);
+              console.log(`Tag ${TAG} created successfully.`);
            }

            try {
              const existingRelease = await github.rest.repos.getReleaseByTag({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                tag: tag,
+                tag: TAG,
              });

-              console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`);
+              console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`);
            } catch (error) {
              if (error.status !== 404) {
                throw error;
              }

-              console.log(`Release for tag ${tag} does not exist. Creating it...`);
+              console.log(`Release for tag ${TAG} does not exist. Creating it...`);

              // Find the PR number using the commit SHA
              const pullRequests = await github.rest.pulls.list({
                owner: context.repo.owner,
                repo: context.repo.repo,
                state: 'closed',
-                base: branch,
+                base: BRANCH,
              });

              const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha);
              const prNumber = pr ? pr.number : null;

-              // Find the previous release on the branch
-              const releases = await github.rest.repos.listReleases({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                per_page: 100,
-              });
-
-              const branchReleases = releases.data
-                .filter((release) => {
-                  const regex = new RegExp(`^${branch}-\\d+$`);
-                  return regex.test(release.tag_name) && !release.draft && !release.prerelease;
-                })
-                .sort((a, b) => new Date(b.created_at) - new Date(a.created_at));
-
-              const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null;
-
              const releaseNotes = [
                prNumber
                  ? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.`
                  : 'Release PR not found.',
-                previousTag
-                  ? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.`
-                  : `No previous release found on branch ${branch}.`,
+                `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.`
              ].join('\n\n');

              await github.rest.repos.createRelease({
                owner: context.repo.owner,
                repo: context.repo.repo,
-                tag_name: tag,
+                tag_name: TAG,
                body: releaseNotes,
              });
-              console.log(`Release for tag ${tag} created successfully.`);
+              console.log(`Release for tag ${TAG} created successfully.`);
            }

      - name: Trigger deploy workflow
        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+          RUN_KIND: ${{ needs.meta.outputs.run-kind }}
        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
+          case ${RUN_KIND} in
+          push-main)
+            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false
+            ;;
+          storage-release)
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
              -f deployPgSniRouter=false \
              -f deployProxy=false \
@@ -1146,7 +1178,7 @@ jobs:
              -f deployStorageBroker=true \
              -f deployStorageController=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
              -f deployPreprodRegion=true

            gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \
@@ -1154,8 +1186,9 @@ jobs:
              -f deployStorageBroker=true \
              -f deployStorageController=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          proxy-release)
            gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \
              -f deployPgSniRouter=true \
              -f deployProxy=true \
@@ -1163,7 +1196,7 @@ jobs:
              -f deployStorageBroker=false \
              -f deployStorageController=false \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}} \
+              -f dockerTag=${{needs.meta.outputs.build-tag}} \
              -f deployPreprodRegion=true

            gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \
@@ -1173,13 +1206,16 @@ jobs:
              -f deployProxyScram=true \
              -f deployProxyAuthBroker=true \
              -f branch=main \
-              -f dockerTag=${{needs.tag.outputs.build-tag}}
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}}
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'"
+              -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          compute-release)
+            gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}}
+            ;;
+          *)
+            echo "RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'"
            exit 1
-          fi
+            ;;
+          esac

  notify-storage-release-deploy-failure:
    needs: [ deploy ]
@@ -1204,7 +1240,7 @@ jobs:
      id-token: write # aws-actions/configure-aws-credentials
      statuses: write
      contents: read
-    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod`
+    # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod`
    if: github.ref_name == 'release' && !failure() && !cancelled()

    runs-on: ubuntu-22.04
@@ -1294,7 +1330,8 @@ jobs:

  pin-build-tools-image:
    needs: [ build-build-tools-image, test-images, build-and-test-locally ]
-    if: github.ref_name == 'main'
+    # `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped
+    if: github.ref_name == 'main' && !failure() && !cancelled()
    uses: ./.github/workflows/pin-build-tools-image.yml
    with:
      from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }}
@@ -1313,6 +1350,7 @@ jobs:
    # Format `needs` differently to make the list more readable.
    # Usually we do `needs: [...]`
    needs:
+      - meta
      - build-and-test-locally
      - check-codestyle-python
      - check-codestyle-rust
@@ -1336,7 +1374,7 @@ jobs:
          || needs.check-codestyle-python.result == 'skipped'
          || needs.check-codestyle-rust.result == 'skipped'
          || needs.files-changed.result == 'skipped'
-          || needs.push-compute-image-dev.result == 'skipped'
-          || needs.push-neon-image-dev.result == 'skipped'
+          || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
+          || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind))
          || needs.test-images.result == 'skipped'
-          || needs.trigger-custom-extensions-build-and-wait.result == 'skipped'
+          || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind))
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -38,6 +38,9 @@ jobs:
    runs-on: us-east-2
    container:
      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init

    steps:
--- a/.github/workflows/force-test-extensions-upgrade.yml
+++ b/.github/workflows/force-test-extensions-upgrade.yml
@@ -52,8 +52,9 @@ jobs:
      - name: Test extension upgrade
        timeout-minutes: 20
        env:
-          NEWTAG: latest
-          OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          NEW_COMPUTE_TAG: latest
+          OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
+          TEST_EXTENSIONS_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }}
          PG_VERSION: ${{ matrix.pg-version }}
          FORCE_ALL_UPGRADE_TESTS: true
        run: ./docker-compose/test_extensions_upgrade.sh
--- a/.github/workflows/large_oltp_benchmark.yml
+++ b/.github/workflows/large_oltp_benchmark.yml
@@ -0,0 +1,147 @@
+name: large oltp benchmark
+
+on:
+  # uncomment to run on push for debugging your PR
+  push:
+    branches: [ bodobolero/synthetic_oltp_workload ]
+
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │  ┌───────────── day of the month (1 - 31)
+    #          │ │  │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │  │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:   '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks
+  workflow_dispatch: # adds ability to run this manually
+
+defaults:
+  run:
+    shell: bash -euxo pipefail {0}
+
+concurrency:
+  # Allow only one workflow globally because we need dedicated resources which only exist once
+  group: large-oltp-bench-workflow
+  cancel-in-progress: true
+
+jobs:
+  oltp:
+    strategy:
+      fail-fast: false # allow other variants to continue even if one fails
+      matrix:
+        include:
+          - target: new_branch 
+            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+          - target: reuse_branch 
+            custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 
+      max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results
+    permissions:
+      contents: write
+      statuses: write
+      id-token: write # aws-actions/configure-aws-credentials
+    env:
+      TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h 
+      TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }}
+      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      PG_VERSION: 16 # pre-determined by pre-determined project
+      TEST_OUTPUT: /tmp/test_output
+      BUILD_TYPE: remote
+      SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }}
+      PLATFORM: ${{ matrix.target }}
+
+    runs-on: [ self-hosted, us-east-2, x64 ]
+    container:
+      image: neondatabase/build-tools:pinned-bookworm
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    # Increase timeout to 8h, default timeout is 6h
+    timeout-minutes: 480
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Configure AWS credentials # necessary to download artefacts
+      uses: aws-actions/configure-aws-credentials@v4
+      with:
+        aws-region: eu-central-1
+        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+        role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role
+
+    - name: Download Neon artifact
+      uses: ./.github/actions/download
+      with:
+        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        path: /tmp/neon/
+        prefix: latest
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+
+    - name: Create Neon Branch for large tenant
+      if: ${{ matrix.target == 'new_branch' }}
+      id: create-neon-branch-oltp-target
+      uses: ./.github/actions/neon-branch-create
+      with:
+          project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
+          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Set up Connection String
+      id: set-up-connstr
+      run: |
+          case "${{ matrix.target }}" in
+              new_branch)
+              CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }}
+              ;;
+              reuse_branch)
+              CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }}
+              ;;
+              *)
+              echo >&2 "Unknown target=${{ matrix.target }}"
+              exit 1
+              ;;
+          esac
+
+          echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
+
+    - name: Benchmark pgbench with custom-scripts
+      uses: ./.github/actions/run-python-test-set
+      with:
+        build_type: ${{ env.BUILD_TYPE }}
+        test_selection: performance
+        run_in_parallel: false
+        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
+        extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant
+        pg_version: ${{ env.PG_VERSION }}
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
+        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+
+    - name: Delete Neon Branch for large tenant
+      if: ${{ always() && matrix.target == 'new_branch' }}
+      uses: ./.github/actions/neon-branch-delete
+      with:
+        project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }}
+        branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }}
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    - name: Create Allure report
+      id: create-allure-report
+      if: ${{ !cancelled() }}
+      uses: ./.github/actions/allure-report-generate
+      with:
+        aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
+  
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
+        slack-message: |
+          Periodic large oltp perf testing: ${{ job.status }}
+          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
+          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -71,7 +71,7 @@ jobs:
    uses: ./.github/workflows/build-macos.yml
    with:
      pg_versions: ${{ needs.files-changed.outputs.postgres_changes }}
-      rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }}
+      rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }}
      rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }}

  gather-rust-build-stats:
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -65,6 +65,7 @@ jobs:

    permissions:
      id-token: write  # Required for aws/azure login
+      packages: write  # required for pushing to GHCR

    uses: ./.github/workflows/_push-to-container-registry.yml
    with:
@@ -72,12 +73,15 @@ jobs:
        {
          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [
            "docker.io/neondatabase/build-tools:pinned-bullseye",
+            "ghcr.io/neondatabase/build-tools:pinned-bullseye",
            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye",
            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye"
          ],
          "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [
            "docker.io/neondatabase/build-tools:pinned-bookworm",
            "docker.io/neondatabase/build-tools:pinned",
+            "ghcr.io/neondatabase/build-tools:pinned-bookworm",
+            "ghcr.io/neondatabase/build-tools:pinned",
            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm",
            "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned",
            "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm",
@@ -85,12 +89,10 @@ jobs:
          ]
        }
      aws-region: ${{ vars.AWS_ECR_REGION }}
-      aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}"
+      aws-role-to-assume: "gha-oidc-neon-admin"
      azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }}
      azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }}
      azure-tenant-id: ${{ vars.AZURE_TENANT_ID }}
      acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }}
-    secrets:
-      aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}"
-      docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-      docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+    secrets: inherit
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -5,6 +5,10 @@ on:
    types:
      - ready_for_review
  workflow_call:
+    inputs:
+      github-event-name:
+        type: string
+        required: true

 defaults:
  run:
@@ -19,7 +23,7 @@ jobs:
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
    uses: ./.github/workflows/check-permissions.yml
    with:
-      github-event-name: ${{ github.event_name }}
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}

  cancel-previous-e2e-tests:
    needs: [ check-permissions ]
@@ -35,46 +39,29 @@ jobs:
            run cancel-previous-in-concurrency-group.yml \
              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"

-  tag:
-    needs: [ check-permissions ]
-    runs-on: ubuntu-22.04
-    outputs:
-      build-tag: ${{ steps.build-tag.outputs.tag }}
-
-    steps:
-      # Need `fetch-depth: 0` to count the number of commits in the branch
-      - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Get build tag
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-          CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }}
-          CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then
-            echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then
-            echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT
-          else
-            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
-            BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId')
-            echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT
-          fi
-        id: build-tag
+  meta:
+    uses: ./.github/workflows/_meta.yml
+    with:
+      github-event-name: ${{ inputs.github-event-name || github.event_name }}

  trigger-e2e-tests:
-    needs: [ tag ]
+    needs: [ meta ]
    runs-on: ubuntu-22.04
    env:
      EVENT_ACTION: ${{ github.event.action }}
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-      TAG: ${{ needs.tag.outputs.build-tag }}
+      TAG: >-
+        ${{
+          contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-storage-release
+          || needs.meta.outputs.build-tag
+        }}
+      COMPUTE_TAG: >-
+        ${{
+          contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)
+          && needs.meta.outputs.previous-compute-release
+          || needs.meta.outputs.build-tag
+        }}
    steps:
      - name: Wait for `push-{neon,compute}-image-dev` job to finish
        # It's important to have a timeout here, the script in the step can run infinitely
@@ -157,6 +144,6 @@ jobs:
              --raw-field "commit_hash=$COMMIT_SHA" \
              --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
              --raw-field "storage_image_tag=${TAG}" \
-              --raw-field "compute_image_tag=${TAG}" \
+              --raw-field "compute_image_tag=${COMPUTE_TAG}" \
              --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
              --raw-field "e2e-platforms=${E2E_PLATFORMS}"
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -984,9 +984,9 @@ dependencies = [

 [[package]]
 name = "bindgen"
-version = "0.70.1"
+version = "0.71.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f"
+checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3"
 dependencies = [
 "bitflags 2.8.0",
 "cexpr",
@@ -997,7 +997,7 @@ dependencies = [
 "proc-macro2",
 "quote",
 "regex",
- "rustc-hash",
+ "rustc-hash 2.1.1",
 "shlex",
 "syn 2.0.90",
 ]
@@ -1342,7 +1342,9 @@ dependencies = [
 "tokio-util",
 "tower 0.5.2",
 "tower-http",
+ "tower-otel",
 "tracing",
+ "tracing-opentelemetry",
 "tracing-subscriber",
 "tracing-utils",
 "url",
@@ -1546,6 +1548,17 @@ dependencies = [
 "itertools 0.10.5",
 ]

+[[package]]
+name = "cron"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5877d3fbf742507b66bc2a1945106bd30dd8504019d596901ddd012a4dd01740"
+dependencies = [
+ "chrono",
+ "once_cell",
+ "winnow",
+]
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.8"
@@ -1874,6 +1887,12 @@ dependencies = [
 "syn 2.0.90",
 ]

+[[package]]
+name = "difflib"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8"
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -2391,9 +2410,9 @@ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"

 [[package]]
 name = "futures-timer"
-version = "3.0.2"
+version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"

 [[package]]
 name = "futures-util"
@@ -2496,6 +2515,27 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

+[[package]]
+name = "governor"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "842dc78579ce01e6a1576ad896edc92fca002dd60c9c3746b7fc2bec6fb429d0"
+dependencies = [
+ "cfg-if",
+ "dashmap 6.1.0",
+ "futures-sink",
+ "futures-timer",
+ "futures-util",
+ "no-std-compat",
+ "nonzero_ext",
+ "parking_lot 0.12.1",
+ "portable-atomic",
+ "quanta",
+ "rand 0.8.5",
+ "smallvec",
+ "spinning_top",
+]
+
 [[package]]
 name = "group"
 version = "0.12.1"
@@ -3331,6 +3371,17 @@ dependencies = [
 "wasm-bindgen",
 ]

+[[package]]
+name = "json-structural-diff"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e878e36a8a44c158505c2c818abdc1350413ad83dcb774a0459f6a7ef2b65cbf"
+dependencies = [
+ "difflib",
+ "regex",
+ "serde_json",
+]
+
 [[package]]
 name = "jsonwebtoken"
 version = "9.2.0"
@@ -3507,7 +3558,7 @@ dependencies = [
 "measured-derive",
 "memchr",
 "parking_lot 0.12.1",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "ryu",
 ]

@@ -3695,6 +3746,12 @@ dependencies = [
 "memoffset 0.9.0",
 ]

+[[package]]
+name = "no-std-compat"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c"
+
 [[package]]
 name = "nom"
 version = "7.1.3"
@@ -3705,6 +3762,12 @@ dependencies = [
 "minimal-lexical",
 ]

+[[package]]
+name = "nonzero_ext"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21"
+
 [[package]]
 name = "notify"
 version = "8.0.0"
@@ -4155,7 +4218,6 @@ dependencies = [
 "pageserver_client",
 "pageserver_compaction",
 "pin-project-lite",
- "postgres",
 "postgres-protocol",
 "postgres-types",
 "postgres_backend",
@@ -4242,7 +4304,6 @@ dependencies = [
 "futures",
 "http-utils",
 "pageserver_api",
- "postgres",
 "reqwest",
 "serde",
 "thiserror 1.0.69",
@@ -4458,18 +4519,18 @@ dependencies = [

 [[package]]
 name = "pin-project"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead"
+checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d"
 dependencies = [
 "pin-project-internal",
 ]

 [[package]]
 name = "pin-project-internal"
-version = "1.1.0"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
+checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -4563,6 +4624,12 @@ dependencies = [
 "never-say-never",
 ]

+[[package]]
+name = "portable-atomic"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+
 [[package]]
 name = "postgres"
 version = "0.19.7"
@@ -4657,7 +4724,6 @@ dependencies = [
 "anyhow",
 "itertools 0.10.5",
 "once_cell",
- "postgres",
 "tokio-postgres",
 "url",
 ]
@@ -4985,7 +5051,7 @@ dependencies = [
 "reqwest-tracing",
 "rsa",
 "rstest",
- "rustc-hash",
+ "rustc-hash 1.1.0",
 "rustls 0.23.18",
 "rustls-native-certs 0.8.0",
 "rustls-pemfile 2.1.1",
@@ -5025,6 +5091,21 @@ dependencies = [
 "zerocopy",
 ]

+[[package]]
+name = "quanta"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
+dependencies = [
+ "crossbeam-utils",
+ "libc",
+ "once_cell",
+ "raw-cpuid",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+ "web-sys",
+ "winapi",
+]
+
 [[package]]
 name = "quick-xml"
 version = "0.26.0"
@@ -5155,6 +5236,15 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "raw-cpuid"
+version = "11.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e"
+dependencies = [
+ "bitflags 2.8.0",
+]
+
 [[package]]
 name = "rayon"
 version = "1.7.0"
@@ -5603,6 +5693,12 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"

+[[package]]
+name = "rustc-hash"
+version = "2.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.0"
@@ -5799,7 +5895,6 @@ dependencies = [
 "once_cell",
 "pageserver_api",
 "parking_lot 0.12.1",
- "postgres",
 "postgres-protocol",
 "postgres_backend",
 "postgres_ffi",
@@ -6363,6 +6458,15 @@ version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"

+[[package]]
+name = "spinning_top"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300"
+dependencies = [
+ "lock_api",
+]
+
 [[package]]
 name = "spki"
 version = "0.6.0"
@@ -6433,16 +6537,19 @@ dependencies = [
 "chrono",
 "clap",
 "control_plane",
+ "cron",
 "diesel",
 "diesel-async",
 "diesel_migrations",
 "fail",
 "futures",
+ "governor",
 "hex",
 "http-utils",
 "humantime",
 "hyper 0.14.30",
 "itertools 0.10.5",
+ "json-structural-diff",
 "lasso",
 "measured",
 "metrics",
@@ -7268,6 +7375,20 @@ version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"

+[[package]]
+name = "tower-otel"
+version = "0.2.0"
+source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd"
+dependencies = [
+ "http 1.1.0",
+ "opentelemetry",
+ "pin-project",
+ "tower-layer",
+ "tower-service",
+ "tracing",
+ "tracing-opentelemetry",
+]
+
 [[package]]
 name = "tower-service"
 version = "0.3.3"
@@ -8124,9 +8245,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

 [[package]]
 name = "winnow"
-version = "0.6.13"
+version = "0.6.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1"
+checksum = "1e90edd2ac1aa278a5c4599b1d89cf03074b610800f866d4026dc199d7929a28"
 dependencies = [
 "memchr",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -43,7 +43,7 @@ members = [
 ]

 [workspace.package]
-edition = "2021"
+edition = "2024"
 license = "Apache-2.0"

 ## All dependency versions, used in the project
@@ -70,13 +70,14 @@ aws-types = "1.3"
 axum = { version = "0.8.1", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
-bindgen = "0.70"
+bindgen = "0.71"
 bit_field = "0.10.2"
 bstr = "1.0"
 byteorder = "1.4"
 bytes = "1.9"
 camino = "1.1.6"
 cfg-if = "1.0.0"
+cron = "0.15"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 clap = { version = "4.0", features = ["derive", "env"] }
 clashmap = { version = "1.0", features = ["raw-api"] }
@@ -94,6 +95,7 @@ futures = "0.3"
 futures-core = "0.3"
 futures-util = "0.3"
 git-version = "0.3"
+governor = "0.8"
 hashbrown = "0.14"
 hashlink = "0.9.1"
 hdrhistogram = "7.5.2"
@@ -192,6 +194,10 @@ toml_edit = "0.22"
 tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]}
 tower = { version = "0.5.2", default-features = false }
 tower-http = { version = "0.6.2", features = ["request-id", "trace"] }
+
+# This revision uses opentelemetry 0.27. There's no tag for it.
+tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" }
+
 tower-service = "0.3.3"
 tracing = "0.1"
 tracing-error = "0.2"
@@ -210,6 +216,7 @@ rustls-native-certs = "0.8"
 x509-parser = "0.16"
 whoami = "1.5.1"
 zerocopy = { version = "0.7", features = ["derive"] }
+json-structural-diff = { version = "0.2.0" }

 ## TODO replace this with tracing
 env_logger = "0.10"
--- a/7
+++ b/7
@@ -11,15 +11,16 @@ ICU_PREFIX_DIR := /usr/local/icu
 #
 BUILD_TYPE ?= debug
 WITH_SANITIZERS ?= no
+PG_CFLAGS = -fsigned-char
 ifeq ($(BUILD_TYPE),release)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl
-	PG_CFLAGS = -O2 -g3 $(CFLAGS)
+	PG_CFLAGS += -O2 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 	# Unfortunately, `--profile=...` is a nightly feature
 	CARGO_BUILD_FLAGS += --release
 else ifeq ($(BUILD_TYPE),debug)
 	PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend
-	PG_CFLAGS = -O0 -g3 $(CFLAGS)
+	PG_CFLAGS += -O0 -g3 $(CFLAGS)
 	PG_LDFLAGS = $(LDFLAGS)
 else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
@@ -159,6 +160,8 @@ postgres-%: postgres-configure-% \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install
 	+@echo "Compiling pageinspect $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
+	+@echo "Compiling pg_trgm $*"
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install
 	+@echo "Compiling amcheck $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
 	+@echo "Compiling test_decoding $*"
--- a/compute/compute-node.Dockerfile
+++ b/compute/compute-node.Dockerfile
@@ -162,7 +162,7 @@ FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION:?} postgres
 RUN cd postgres && \
-    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
+    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \
    --with-icu --with-libxml --with-libxslt --with-lz4" && \
    if [ "${PG_VERSION:?}" != "v14" ]; then \
        # zstd is available only from PG15
@@ -395,15 +395,22 @@ RUN case "${PG_VERSION:?}" in \
    cd plv8-src && \
    if [[ "${PG_VERSION:?}" < "v17" ]]; then patch -p1 < /ext-src/plv8-3.1.10.patch; fi

-FROM pg-build AS plv8-build
+# Step 1: Build the vendored V8 engine. It doesn't depend on PostgreSQL, so use
+# 'build-deps' as the base. This enables caching and avoids unnecessary rebuilds.
+# (The V8 engine takes a very long time to build)
+FROM build-deps AS plv8-build
 ARG PG_VERSION
+WORKDIR /ext-src/plv8-src
 RUN apt update && \
    apt install --no-install-recommends --no-install-suggests -y \
    ninja-build python3-dev libncurses5 binutils clang \
    && apt clean && rm -rf /var/lib/apt/lists/*
-
 COPY --from=plv8-src /ext-src/ /ext-src/
-WORKDIR /ext-src/plv8-src
+RUN make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) v8
+
+# Step 2: Build the PostgreSQL-dependent parts
+COPY --from=pg-build /usr/local/pgsql /usr/local/pgsql
+ENV PATH="/usr/local/pgsql/bin:$PATH"
 RUN \
    # generate and copy upgrade scripts
    make generate_upgrades && \
@@ -1451,9 +1458,11 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_mooncake-src
 ARG PG_VERSION
 WORKDIR /ext-src
+COPY compute/patches/duckdb_v113.patch .
 RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \
    echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \
    mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \
+    cd third_party/duckdb && patch -p1 < /ext-src/duckdb_v113.patch && cd ../.. && \
    echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \
    chmod a+x neon-test.sh

@@ -1473,6 +1482,7 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \
 FROM build-deps AS pg_duckdb-src
 WORKDIR /ext-src
 COPY compute/patches/pg_duckdb_v031.patch .
+COPY compute/patches/duckdb_v120.patch .
 # pg_duckdb build requires source dir to be a git repo to get submodules
 # allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: 
 # - extension management function duckdb.install_extension()
@@ -1480,7 +1490,9 @@ COPY compute/patches/pg_duckdb_v031.patch .
 RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \
    cd pg_duckdb-src && \
    git submodule update --init --recursive && \
-    patch -p1 < /ext-src/pg_duckdb_v031.patch
+    patch -p1 < /ext-src/pg_duckdb_v031.patch && \
+    cd third_party/duckdb && \
+    patch -p1 < /ext-src/duckdb_v120.patch

 FROM pg-build AS pg_duckdb-build
 ARG PG_VERSION
@@ -1669,11 +1681,7 @@ COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# Disabled temporarily, because it clashed with pg_mooncake. pg_mooncake
-# also depends on libduckdb, but a different version.
-#COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
-
+COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -1810,7 +1818,7 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute

 FROM pg-build AS extension-tests
 ARG PG_VERSION
-RUN mkdir /ext-src
+COPY docker-compose/ext-src/ /ext-src/

 COPY --from=pg-build /postgres /postgres
 #COPY --from=postgis-src /ext-src/ /ext-src/
--- a/compute/patches/duckdb_v113.patch
+++ b/compute/patches/duckdb_v113.patch
@@ -0,0 +1,25 @@
+diff --git a/libduckdb.map b/libduckdb.map
+new file mode 100644
+index 0000000000..3b56f00cd7
+--- /dev/null
+++ b/libduckdb.map
+@@ -0,0 +1,6 @@
+DUCKDB_1.1.3 {
+    global:
+        *duckdb*;
+    local:
+        *;
+};
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 3e757a4bcc..88ab4005b9 100644
+--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
+@@ -135,6 +135,8 @@ else()
+   target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
+   link_threads(duckdb)
+   link_extension_libraries(duckdb)
+  target_link_options(duckdb PRIVATE
+    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb.map)
+ 
+   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
+   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
--- a/compute/patches/duckdb_v120.patch
+++ b/compute/patches/duckdb_v120.patch
@@ -0,0 +1,67 @@
+diff --git a/libduckdb_pg_duckdb.map b/libduckdb_pg_duckdb.map
+new file mode 100644
+index 0000000000..0872978b48
+--- /dev/null
+++ b/libduckdb_pg_duckdb.map
+@@ -0,0 +1,6 @@
+DUCKDB_1.2.0 {
+    global:
+        *duckdb*;
+    local:
+        *;
+};
+diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
+index 58adef3fc0..2c522f91be 100644
+--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
+@@ -59,7 +59,7 @@ endfunction()
+ 
+ if(AMALGAMATION_BUILD)
+ 
+-  add_library(duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
+  add_library(duckdb_pg_duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp")
+   target_link_libraries(duckdb ${DUCKDB_SYSTEM_LIBS})
+   link_threads(duckdb)
+   link_extension_libraries(duckdb)
+@@ -109,7 +109,7 @@ else()
+       duckdb_yyjson
+       duckdb_zstd)
+ 
+-  add_library(duckdb SHARED ${ALL_OBJECT_FILES})
+  add_library(duckdb_pg_duckdb SHARED ${ALL_OBJECT_FILES})
+ 
+   if(WIN32 AND NOT MINGW)
+     ensure_variable_is_number(DUCKDB_MAJOR_VERSION RC_MAJOR_VERSION)
+@@ -131,9 +131,11 @@ else()
+     target_sources(duckdb PRIVATE version.rc)
+   endif()
+ 
+-  target_link_libraries(duckdb ${DUCKDB_LINK_LIBS})
+-  link_threads(duckdb)
+-  link_extension_libraries(duckdb)
+  target_link_libraries(duckdb_pg_duckdb ${DUCKDB_LINK_LIBS})
+  link_threads(duckdb_pg_duckdb)
+  link_extension_libraries(duckdb_pg_duckdb)
+  target_link_options(duckdb_pg_duckdb PRIVATE
+    -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb_pg_duckdb.map)
+ 
+   add_library(duckdb_static STATIC ${ALL_OBJECT_FILES})
+   target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS})
+@@ -141,7 +143,7 @@ else()
+   link_extension_libraries(duckdb_static)
+ 
+   target_include_directories(
+-    duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+    duckdb_pg_duckdb PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+                   $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
+ 
+   target_include_directories(
+@@ -161,7 +163,7 @@ else()
+ endif()
+ 
+ install(
+-  TARGETS duckdb duckdb_static
+  TARGETS duckdb_pg_duckdb duckdb_static
+   EXPORT "${DUCKDB_EXPORT_SET}"
+   LIBRARY DESTINATION "${INSTALL_LIB_DIR}"
+   ARCHIVE DESTINATION "${INSTALL_LIB_DIR}"
--- a/compute/patches/pg_duckdb_v031.patch
+++ b/compute/patches/pg_duckdb_v031.patch
@@ -1,3 +1,25 @@
+diff --git a/Makefile b/Makefile
+index 3235cc8..6b892bc 100644
+--- a/Makefile
+++ b/Makefile
+@@ -32,7 +32,7 @@ else
+ 	DUCKDB_BUILD_TYPE = release
+ endif
+ 
+-DUCKDB_LIB = libduckdb$(DLSUFFIX)
+DUCKDB_LIB = libduckdb_pg_duckdb$(DLSUFFIX)
+ FULL_DUCKDB_LIB = third_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src/$(DUCKDB_LIB)
+ 
+ ERROR_ON_WARNING ?=
+@@ -54,7 +54,7 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} -
+ # changes to the vendored code in one place.
+ override PG_CFLAGS += -Wno-declaration-after-statement
+ 
+-SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4
+SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb_pg_duckdb -lstdc++ -llz4
+ 
+ include Makefile.global
+ 
 diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql
 index d777d76..af60106 100644
 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql
--- a/compute/vm-image-spec-bookworm.yaml
+++ b/compute/vm-image-spec-bookworm.yaml
@@ -44,6 +44,11 @@ shutdownHook: |
 files:
  - filename: compute_ctl-sudoers
    content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
      # regardless of hostname (ALL)
--- a/compute/vm-image-spec-bullseye.yaml
+++ b/compute/vm-image-spec-bullseye.yaml
@@ -44,6 +44,11 @@ shutdownHook: |
 files:
  - filename: compute_ctl-sudoers
    content: |
+      # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all
+      # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to
+      # resolve host" log messages that they generate.
+      Defaults !fqdn
+      
      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
      # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD),
      # regardless of hostname (ALL)
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_tools"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true

 [features]
@@ -46,7 +46,9 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
+tower-otel.workspace = true
 tracing.workspace = true
+tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -33,41 +33,28 @@
 //!             -b /usr/local/bin/postgres \
 //!             -r http://pg-ext-s3-gateway \
 //! ```
-use std::collections::HashMap;
 use std::ffi::OsString;
 use std::fs::File;
 use std::path::Path;
 use std::process::exit;
-use std::str::FromStr;
-use std::sync::atomic::Ordering;
-use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock};
-use std::{thread, time::Duration};
+use std::sync::mpsc;
+use std::thread;
+use std::time::Duration;

 use anyhow::{Context, Result};
-use chrono::Utc;
 use clap::Parser;
-use compute_tools::disk_quota::set_disk_quota;
-use compute_tools::http::server::Server;
-use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static;
-use signal_hook::consts::{SIGQUIT, SIGTERM};
-use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
-use url::Url;
-
-use compute_api::responses::{ComputeCtlConfig, ComputeStatus};
+use compute_api::responses::ComputeCtlConfig;
 use compute_api::spec::ComputeSpec;
-
-use compute_tools::compute::{
-    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
-};
-use compute_tools::configurator::launch_configurator;
+use compute_tools::compute::{ComputeNode, ComputeNodeParams, forward_termination_signal};
 use compute_tools::extension_server::get_pg_version_string;
 use compute_tools::logger::*;
-use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;
-use rlimit::{setrlimit, Resource};
+use rlimit::{Resource, setrlimit};
+use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM};
+use signal_hook::iterator::Signals;
+use tracing::{error, info};
+use url::Url;
 use utils::failpoint_support;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
@@ -149,6 +136,8 @@ struct Cli {
 fn main() -> Result<()> {
    let cli = Cli::parse();

+    let scenario = failpoint_support::init();
+
    // For historical reasons, the main thread that processes the spec and launches postgres
    // is synchronous, but we always have this tokio runtime available and we "enter" it so
    // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...)
@@ -160,34 +149,43 @@ fn main() -> Result<()> {

    let build_tag = runtime.block_on(init())?;

-    let scenario = failpoint_support::init();
-
    // enable core dumping for all child processes
    setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;

-    let (pg_handle, start_pg_result) = {
-        // Enter startup tracing context
-        let _startup_context_guard = startup_context_from_env();
+    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;

-        let cli_spec = try_spec_from_cli(&cli)?;
+    let cli_spec = try_spec_from_cli(&cli)?;

-        let compute = wait_spec(build_tag, &cli, cli_spec)?;
+    let compute_node = ComputeNode::new(
+        ComputeNodeParams {
+            compute_id: cli.compute_id,
+            connstr,
+            pgdata: cli.pgdata.clone(),
+            pgbin: cli.pgbin.clone(),
+            pgversion: get_pg_version_string(&cli.pgbin),
+            external_http_port: cli.external_http_port,
+            internal_http_port: cli.internal_http_port,
+            ext_remote_storage: cli.remote_ext_config.clone(),
+            resize_swap_on_bind: cli.resize_swap_on_bind,
+            set_disk_quota_for_fs: cli.set_disk_quota_for_fs,
+            #[cfg(target_os = "linux")]
+            filecache_connstr: cli.filecache_connstr,
+            #[cfg(target_os = "linux")]
+            cgroup: cli.cgroup,
+            #[cfg(target_os = "linux")]
+            vm_monitor_addr: cli.vm_monitor_addr,
+            build_tag,

-        start_postgres(&cli, compute)?
+            live_config_allowed: cli_spec.live_config_allowed,
+        },
+        cli_spec.spec,
+    )?;

-        // Startup is finished, exit the startup tracing span
-    };
-
-    // PostgreSQL is now running, if startup was successful. Wait until it exits.
-    let wait_pg_result = wait_postgres(pg_handle)?;
-
-    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
-
-    maybe_delay_exit(delay_exit);
+    let exit_code = compute_node.run()?;

    scenario.teardown();

-    deinit_and_exit(wait_pg_result);
+    deinit_and_exit(exit_code);
 }

 async fn init() -> Result<String> {
@@ -208,56 +206,6 @@ async fn init() -> Result<String> {
    Ok(build_tag)
 }

-fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
-    // Extract OpenTelemetry context for the startup actions from the
-    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
-    // tracing context.
-    //
-    // This is used to propagate the context for the 'start_compute' operation
-    // from the neon control plane. This allows linking together the wider
-    // 'start_compute' operation that creates the compute container, with the
-    // startup actions here within the container.
-    //
-    // There is no standard for passing context in env variables, but a lot of
-    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
-    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
-    //
-    // Switch to the startup context here, and exit it once the startup has
-    // completed and Postgres is up and running.
-    //
-    // If this pod is pre-created without binding it to any particular endpoint
-    // yet, this isn't the right place to enter the startup context. In that
-    // case, the control plane should pass the tracing context as part of the
-    // /configure API call.
-    //
-    // NOTE: This is supposed to only cover the *startup* actions. Once
-    // postgres is configured and up-and-running, we exit this span. Any other
-    // actions that are performed on incoming HTTP requests, for example, are
-    // performed in separate spans.
-    //
-    // XXX: If the pod is restarted, we perform the startup actions in the same
-    // context as the original startup actions, which probably doesn't make
-    // sense.
-    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
-    if let Ok(val) = std::env::var("TRACEPARENT") {
-        startup_tracing_carrier.insert("traceparent".to_string(), val);
-    }
-    if let Ok(val) = std::env::var("TRACESTATE") {
-        startup_tracing_carrier.insert("tracestate".to_string(), val);
-    }
-    if !startup_tracing_carrier.is_empty() {
-        use opentelemetry::propagation::TextMapPropagator;
-        use opentelemetry_sdk::propagation::TraceContextPropagator;
-        let guard = TraceContextPropagator::new()
-            .extract(&startup_tracing_carrier)
-            .attach();
-        info!("startup tracing context attached");
-        Some(guard)
-    } else {
-        None
-    }
-}
-
 fn try_spec_from_cli(cli: &Cli) -> Result<CliSpecParams> {
    // First, try to get cluster spec from the cli argument
    if let Some(ref spec_json) = cli.spec_json {
@@ -308,342 +256,7 @@ struct CliSpecParams {
    live_config_allowed: bool,
 }

-fn wait_spec(
-    build_tag: String,
-    cli: &Cli,
-    CliSpecParams {
-        spec,
-        live_config_allowed,
-        compute_ctl_config: _,
-    }: CliSpecParams,
-) -> Result<Arc<ComputeNode>> {
-    let mut new_state = ComputeState::new();
-    let spec_set;
-
-    if let Some(spec) = spec {
-        let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
-        info!("new pspec.spec: {:?}", pspec.spec);
-        new_state.pspec = Some(pspec);
-        spec_set = true;
-    } else {
-        spec_set = false;
-    }
-    let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?;
-    let conn_conf = postgres::config::Config::from_str(connstr.as_str())
-        .context("cannot build postgres config from connstr")?;
-    let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str())
-        .context("cannot build tokio postgres config from connstr")?;
-    let compute_node = ComputeNode {
-        compute_id: cli.compute_id.clone(),
-        connstr,
-        conn_conf,
-        tokio_conn_conf,
-        pgdata: cli.pgdata.clone(),
-        pgbin: cli.pgbin.clone(),
-        pgversion: get_pg_version_string(&cli.pgbin),
-        external_http_port: cli.external_http_port,
-        internal_http_port: cli.internal_http_port,
-        live_config_allowed,
-        state: Mutex::new(new_state),
-        state_changed: Condvar::new(),
-        ext_remote_storage: cli.remote_ext_config.clone(),
-        ext_download_progress: RwLock::new(HashMap::new()),
-        build_tag,
-    };
-    let compute = Arc::new(compute_node);
-
-    // If this is a pooled VM, prewarm before starting HTTP server and becoming
-    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
-    // the necessary binaries will already be cached.
-    if !spec_set {
-        compute.prewarm_postgres()?;
-    }
-
-    // Launch the external HTTP server first, so that we can serve control plane
-    // requests while configuration is still in progress.
-    Server::External(cli.external_http_port).launch(&compute);
-
-    // The internal HTTP server could be launched later, but there isn't much
-    // sense in waiting.
-    Server::Internal(cli.internal_http_port).launch(&compute);
-
-    if !spec_set {
-        // No spec provided, hang waiting for it.
-        info!("no compute spec provided, waiting");
-
-        let mut state = compute.state.lock().unwrap();
-        while state.status != ComputeStatus::ConfigurationPending {
-            state = compute.state_changed.wait(state).unwrap();
-
-            if state.status == ComputeStatus::ConfigurationPending {
-                info!("got spec, continue configuration");
-                // Spec is already set by the http server handler.
-                break;
-            }
-        }
-
-        // Record for how long we slept waiting for the spec.
-        let now = Utc::now();
-        state.metrics.wait_for_spec_ms = now
-            .signed_duration_since(state.start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
-
-        // Reset start time, so that the total startup time that is calculated later will
-        // not include the time that we waited for the spec.
-        state.start_time = now;
-    }
-
-    launch_lsn_lease_bg_task_for_static(&compute);
-
-    Ok(compute)
-}
-
-fn start_postgres(
-    cli: &Cli,
-    compute: Arc<ComputeNode>,
-) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
-    // We got all we need, update the state.
-    let mut state = compute.state.lock().unwrap();
-    state.set_status(ComputeStatus::Init, &compute.state_changed);
-
-    info!(
-        "running compute with features: {:?}",
-        state.pspec.as_ref().unwrap().spec.features
-    );
-    // before we release the mutex, fetch some parameters for later.
-    let &ComputeSpec {
-        swap_size_bytes,
-        disk_quota_bytes,
-        #[cfg(target_os = "linux")]
-        disable_lfc_resizing,
-        ..
-    } = &state.pspec.as_ref().unwrap().spec;
-    drop(state);
-
-    // Launch remaining service threads
-    let _monitor_handle = launch_monitor(&compute);
-    let _configurator_handle = launch_configurator(&compute);
-
-    let mut prestartup_failed = false;
-    let mut delay_exit = false;
-
-    // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) {
-        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
-        // *before* starting postgres.
-        //
-        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
-        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
-        // OOM-killed during startup because swap wasn't available yet.
-        match resize_swap(size_bytes) {
-            Ok(()) => {
-                let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_mib, "resized swap");
-            }
-            Err(err) => {
-                let err = err.context("failed to resize swap");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                compute.set_failed_status(err);
-                delay_exit = true;
-            }
-        }
-    }
-
-    // Set disk quota if the compute spec says so
-    if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) =
-        (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref())
-    {
-        match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) {
-            Ok(()) => {
-                let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%disk_quota_bytes, %size_mib, "set disk quota");
-            }
-            Err(err) => {
-                let err = err.context("failed to set disk quota");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                compute.set_failed_status(err);
-                delay_exit = true;
-            }
-        }
-    }
-
-    // Start Postgres
-    let mut pg = None;
-    if !prestartup_failed {
-        pg = match compute.start_compute() {
-            Ok(pg) => {
-                info!(postmaster_pid = %pg.0.id(), "Postgres was started");
-                Some(pg)
-            }
-            Err(err) => {
-                error!("could not start the compute node: {:#}", err);
-                compute.set_failed_status(err);
-                delay_exit = true;
-                None
-            }
-        };
-    } else {
-        warn!("skipping postgres startup because pre-startup step failed");
-    }
-
-    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
-    // because it requires cgroups.
-    cfg_if::cfg_if! {
-        if #[cfg(target_os = "linux")] {
-            use std::env;
-            use tokio_util::sync::CancellationToken;
-
-            // This token is used internally by the monitor to clean up all threads
-            let token = CancellationToken::new();
-
-            // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC
-            let pgconnstr = if disable_lfc_resizing.unwrap_or(false) {
-                None
-            } else {
-                Some(cli.filecache_connstr.clone())
-            };
-
-            let vm_monitor = if env::var_os("AUTOSCALING").is_some() {
-                let vm_monitor = tokio::spawn(vm_monitor::start(
-                    Box::leak(Box::new(vm_monitor::Args {
-                        cgroup: Some(cli.cgroup.clone()),
-                        pgconnstr,
-                        addr: cli.vm_monitor_addr.clone(),
-                    })),
-                    token.clone(),
-                ));
-                Some(vm_monitor)
-            } else {
-                None
-            };
-        }
-    }
-
-    Ok((
-        pg,
-        StartPostgresResult {
-            delay_exit,
-            compute,
-            #[cfg(target_os = "linux")]
-            token,
-            #[cfg(target_os = "linux")]
-            vm_monitor,
-        },
-    ))
-}
-
-type PostgresHandle = (std::process::Child, tokio::task::JoinHandle<Result<()>>);
-
-struct StartPostgresResult {
-    delay_exit: bool,
-    // passed through from WaitSpecResult
-    compute: Arc<ComputeNode>,
-
-    #[cfg(target_os = "linux")]
-    token: tokio_util::sync::CancellationToken,
-    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
-}
-
-fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
-    // Wait for the child Postgres process forever. In this state Ctrl+C will
-    // propagate to Postgres and it will be shut down as well.
-    let mut exit_code = None;
-    if let Some((mut pg, logs_handle)) = pg {
-        info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit");
-
-        let ecode = pg
-            .wait()
-            .expect("failed to start waiting on Postgres process");
-        PG_PID.store(0, Ordering::SeqCst);
-
-        // Process has exited. Wait for the log collecting task to finish.
-        let _ = tokio::runtime::Handle::current()
-            .block_on(logs_handle)
-            .map_err(|e| tracing::error!("log task panicked: {:?}", e));
-
-        info!("Postgres exited with code {}, shutting down", ecode);
-        exit_code = ecode.code()
-    }
-
-    Ok(WaitPostgresResult { exit_code })
-}
-
-struct WaitPostgresResult {
-    exit_code: Option<i32>,
-}
-
-fn cleanup_after_postgres_exit(
-    StartPostgresResult {
-        mut delay_exit,
-        compute,
-        #[cfg(target_os = "linux")]
-        vm_monitor,
-        #[cfg(target_os = "linux")]
-        token,
-    }: StartPostgresResult,
-) -> Result<bool> {
-    // Terminate the vm_monitor so it releases the file watcher on
-    // /sys/fs/cgroup/neon-postgres.
-    // Note: the vm-monitor only runs on linux because it requires cgroups.
-    cfg_if::cfg_if! {
-        if #[cfg(target_os = "linux")] {
-            if let Some(handle) = vm_monitor {
-                // Kills all threads spawned by the monitor
-                token.cancel();
-                // Kills the actual task running the monitor
-                handle.abort();
-            }
-        }
-    }
-
-    // Maybe sync safekeepers again, to speed up next startup
-    let compute_state = compute.state.lock().unwrap().clone();
-    let pspec = compute_state.pspec.as_ref().expect("spec must be set");
-    if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
-        info!("syncing safekeepers on shutdown");
-        let storage_auth_token = pspec.storage_auth_token.clone();
-        let lsn = compute.sync_safekeepers(storage_auth_token)?;
-        info!("synced safekeepers at lsn {lsn}");
-    }
-
-    let mut state = compute.state.lock().unwrap();
-    if state.status == ComputeStatus::TerminationPending {
-        state.status = ComputeStatus::Terminated;
-        compute.state_changed.notify_all();
-        // we were asked to terminate gracefully, don't exit to avoid restart
-        delay_exit = true
-    }
-    drop(state);
-
-    if let Err(err) = compute.check_for_core_dumps() {
-        error!("error while checking for core dumps: {err:?}");
-    }
-
-    Ok(delay_exit)
-}
-
-fn maybe_delay_exit(delay_exit: bool) {
-    // If launch failed, keep serving HTTP requests for a while, so the cloud
-    // control plane can get the actual error.
-    if delay_exit {
-        info!("giving control plane 30s to collect the error before shutdown");
-        thread::sleep(Duration::from_secs(30));
-    }
-}
-
-fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
+fn deinit_and_exit(exit_code: Option<i32>) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
--- a/compute_tools/src/bin/fast_import.rs
+++ b/compute_tools/src/bin/fast_import.rs
@@ -25,13 +25,13 @@
 //! docker push localhost:3030/localregistry/compute-node-v14:latest
 //! ```

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use aws_config::BehaviorVersion;
 use camino::{Utf8Path, Utf8PathBuf};
 use clap::{Parser, Subcommand};
-use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion};
+use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version};
 use nix::unistd::Pid;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{Instrument, error, info, info_span, warn};
 use utils::fs_ext::is_directory_empty;

 #[path = "fast_import/aws_s3_sync.rs"]
@@ -558,7 +558,9 @@ async fn cmd_dumprestore(
                    decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext)
                        .await?
                } else {
-                    bail!("destination connection string must be provided in spec for dump_restore command");
+                    bail!(
+                        "destination connection string must be provided in spec for dump_restore command"
+                    );
                };

                (source, dest)
--- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs
+++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs
@@ -1,11 +1,10 @@
 use camino::{Utf8Path, Utf8PathBuf};
 use tokio::task::JoinSet;
+use tracing::{info, warn};
 use walkdir::WalkDir;

 use super::s3_uri::S3Uri;

-use tracing::{info, warn};
-
 const MAX_PARALLEL_UPLOADS: usize = 10;

 /// Upload all files from 'local' to 'remote'
--- a/compute_tools/src/bin/fast_import/s3_uri.rs
+++ b/compute_tools/src/bin/fast_import/s3_uri.rs
@@ -1,6 +1,7 @@
-use anyhow::Result;
 use std::str::FromStr;

+use anyhow::Result;
+
 /// Struct to hold parsed S3 components
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct S3Uri {
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,18 +1,20 @@
+use std::path::Path;
+use std::process::Stdio;
+use std::result::Result;
+use std::sync::Arc;
+
+use compute_api::responses::CatalogObjects;
 use futures::Stream;
 use postgres::NoTls;
-use std::{path::Path, process::Stdio, result::Result, sync::Arc};
-use tokio::{
-    io::{AsyncBufReadExt, BufReader},
-    process::Command,
-    spawn,
-};
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tokio::spawn;
 use tokio_stream::{self as stream, StreamExt};
 use tokio_util::codec::{BytesCodec, FramedRead};
 use tracing::warn;

 use crate::compute::ComputeNode;
 use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db};
-use compute_api::responses::CatalogObjects;

 pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
    let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles"));
@@ -55,15 +57,15 @@ pub enum SchemaDumpError {
 pub async fn get_database_schema(
    compute: &Arc<ComputeNode>,
    dbname: &str,
-) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
-    let pgbin = &compute.pgbin;
+) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>> + use<>, SchemaDumpError> {
+    let pgbin = &compute.params.pgbin;
    let basepath = Path::new(pgbin).parent().unwrap();
    let pgdump = basepath.join("pg_dump");

    // Replace the DB in the connection string and disable it to parts.
    // This is the only option to handle DBs with special characters.
-    let conf =
-        postgres_conf_for_db(&compute.connstr, dbname).map_err(|_| SchemaDumpError::Unexpected)?;
+    let conf = postgres_conf_for_db(&compute.params.connstr, dbname)
+        .map_err(|_| SchemaDumpError::Unexpected)?;
    let host = conf
        .get_hosts()
        .first()
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,4 +1,4 @@
-use anyhow::{anyhow, Ok, Result};
+use anyhow::{Ok, Result, anyhow};
 use tokio_postgres::NoTls;
 use tracing::{error, instrument, warn};

--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -1,14 +1,15 @@
+use std::fmt::Write as FmtWrite;
 use std::fs::{File, OpenOptions};
 use std::io;
+use std::io::Write;
 use std::io::prelude::*;
 use std::path::Path;

 use anyhow::Result;
-
-use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
 use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};

+use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value};
+
 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
 pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
@@ -56,10 +57,20 @@ pub fn write_postgres_conf(
        writeln!(file, "neon.stripe_size={stripe_size}")?;
    }
    if !spec.safekeeper_connstrings.is_empty() {
+        let mut neon_safekeepers_value = String::new();
+        tracing::info!(
+            "safekeepers_connstrings is not zero, gen: {:?}",
+            spec.safekeepers_generation
+        );
+        // If generation is given, prepend sk list with g#number:
+        if let Some(generation) = spec.safekeepers_generation {
+            write!(neon_safekeepers_value, "g#{}:", generation)?;
+        }
+        neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(","));
        writeln!(
            file,
            "neon.safekeepers={}",
-            escape_conf_value(&spec.safekeeper_connstrings.join(","))
+            escape_conf_value(&neon_safekeepers_value)
        )?;
    }
    if let Some(s) = &spec.tenant_id {
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -1,9 +1,8 @@
 use std::sync::Arc;
 use std::thread;

-use tracing::{error, info, instrument};
-
 use compute_api::responses::ComputeStatus;
+use tracing::{error, info, instrument};

 use crate::compute::ComputeNode;

--- a/compute_tools/src/disk_quota.rs
+++ b/compute_tools/src/disk_quota.rs
@@ -1,9 +1,11 @@
 use anyhow::Context;
+use tracing::instrument;

 pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota";

 /// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes.
 /// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set.
+#[instrument]
 pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> {
    let size_kb = size_bytes / 1024;
    // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}`
--- a/compute_tools/src/extension_server.rs
+++ b/compute_tools/src/extension_server.rs
@@ -71,15 +71,15 @@ More specifically, here is an example ext_index.json
    }
 }
 */
-use anyhow::Result;
-use anyhow::{bail, Context};
+use std::path::Path;
+use std::str;
+
+use anyhow::{Context, Result, bail};
 use bytes::Bytes;
 use compute_api::spec::RemoteExtSpec;
 use regex::Regex;
 use remote_storage::*;
 use reqwest::StatusCode;
-use std::path::Path;
-use std::str;
 use tar::Archive;
 use tracing::info;
 use tracing::log::warn;
@@ -244,7 +244,10 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) {
                info!("writing file {:?}{:?}", control_path, control_content);
                std::fs::write(control_path, control_content).unwrap();
            } else {
-                warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path);
+                warn!(
+                    "control file {:?} exists both locally and remotely. ignoring the remote version.",
+                    control_path
+                );
            }
        }
    }
--- a/compute_tools/src/http/extract/json.rs
+++ b/compute_tools/src/http/extract/json.rs
@@ -1,6 +1,7 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::JsonRejection, FromRequest, Request};
+use axum::extract::rejection::JsonRejection;
+use axum::extract::{FromRequest, Request};
 use compute_api::responses::GenericAPIError;
 use http::StatusCode;

--- a/compute_tools/src/http/extract/path.rs
+++ b/compute_tools/src/http/extract/path.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::PathRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::PathRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;

 /// Custom `Path` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
--- a/compute_tools/src/http/extract/query.rs
+++ b/compute_tools/src/http/extract/query.rs
@@ -1,8 +1,10 @@
 use std::ops::{Deref, DerefMut};

-use axum::extract::{rejection::QueryRejection, FromRequestParts};
+use axum::extract::FromRequestParts;
+use axum::extract::rejection::QueryRejection;
 use compute_api::responses::GenericAPIError;
-use http::{request::Parts, StatusCode};
+use http::StatusCode;
+use http::request::Parts;

 /// Custom `Query` extractor, so that we can format errors into
 /// `JsonResponse<GenericAPIError>`.
--- a/compute_tools/src/http/mod.rs
+++ b/compute_tools/src/http/mod.rs
@@ -1,6 +1,8 @@
-use axum::{body::Body, response::Response};
+use axum::body::Body;
+use axum::response::Response;
 use compute_api::responses::{ComputeStatus, GenericAPIError};
-use http::{header::CONTENT_TYPE, StatusCode};
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Serialize;
 use tracing::error;

--- a/compute_tools/src/http/routes/check_writability.rs
+++ b/compute_tools/src/http/routes/check_writability.rs
@@ -1,10 +1,13 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;

-use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse};
+use crate::checker::check_writability;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Check that the compute is currently running.
 pub(in crate::http) async fn is_writable(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/configure.rs
+++ b/compute_tools/src/http/routes/configure.rs
@@ -1,18 +1,16 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ConfigurationRequest,
-    responses::{ComputeStatus, ComputeStatusResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ConfigurationRequest;
+use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
 use http::StatusCode;
 use tokio::task;
 use tracing::info;

-use crate::{
-    compute::{ComputeNode, ParsedSpec},
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::{ComputeNode, ParsedSpec};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 // Accept spec in JSON format and request compute configuration. If anything
 // goes wrong after we set the compute status to `ConfigurationPending` and
@@ -24,7 +22,7 @@ pub(in crate::http) async fn configure(
    State(compute): State<Arc<ComputeNode>>,
    request: Json<ConfigurationRequest>,
 ) -> Response {
-    if !compute.live_config_allowed {
+    if !compute.params.live_config_allowed {
        return JsonResponse::error(
            StatusCode::PRECONDITION_FAILED,
            "live configuration is not allowed for this compute node".to_string(),
@@ -47,13 +45,18 @@ pub(in crate::http) async fn configure(
            return JsonResponse::invalid_status(state.status);
        }

+        // Pass the tracing span to the main thread that performs the startup,
+        // so that the start_compute operation is considered a child of this
+        // configure request for tracing purposes.
+        state.startup_span = Some(tracing::Span::current());
+
        state.pspec = Some(pspec);
        state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed);
        drop(state);
    }

    // Spawn a blocking thread to wait for compute to become Running. This is
-    // needed to do not block the main pool of workers and be able to serve
+    // needed to not block the main pool of workers and to be able to serve
    // other requests while some particular request is waiting for compute to
    // finish configuration.
    let c = compute.clone();
--- a/compute_tools/src/http/routes/database_schema.rs
+++ b/compute_tools/src/http/routes/database_schema.rs
@@ -1,14 +1,16 @@
 use std::sync::Arc;

-use axum::{body::Body, extract::State, response::Response};
-use http::{header::CONTENT_TYPE, StatusCode};
+use axum::body::Body;
+use axum::extract::State;
+use axum::response::Response;
+use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use serde::Deserialize;

-use crate::{
-    catalog::{get_database_schema, SchemaDumpError},
-    compute::ComputeNode,
-    http::{extract::Query, JsonResponse},
-};
+use crate::catalog::{SchemaDumpError, get_database_schema};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Query;

 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct DatabaseSchemaParams {
--- a/compute_tools/src/http/routes/dbs_and_roles.rs
+++ b/compute_tools/src/http/routes/dbs_and_roles.rs
@@ -1,9 +1,12 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;

-use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse};
+use crate::catalog::get_dbs_and_roles;
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Get the databases and roles from the compute.
 pub(in crate::http) async fn get_catalog_objects(
--- a/compute_tools/src/http/routes/extension_server.rs
+++ b/compute_tools/src/http/routes/extension_server.rs
@@ -1,19 +1,13 @@
 use std::sync::Arc;

-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use http::StatusCode;
 use serde::Deserialize;

-use crate::{
-    compute::ComputeNode,
-    http::{
-        extract::{Path, Query},
-        JsonResponse,
-    },
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::{Path, Query};

 #[derive(Debug, Clone, Deserialize)]
 pub(in crate::http) struct ExtensionServerParams {
@@ -24,11 +18,11 @@ pub(in crate::http) struct ExtensionServerParams {
 /// Download a remote extension.
 pub(in crate::http) async fn download_extension(
    Path(filename): Path<String>,
-    params: Query<ExtensionServerParams>,
+    ext_server_params: Query<ExtensionServerParams>,
    State(compute): State<Arc<ComputeNode>>,
 ) -> Response {
    // Don't even try to download extensions if no remote storage is configured
-    if compute.ext_remote_storage.is_none() {
+    if compute.params.ext_remote_storage.is_none() {
        return JsonResponse::error(
            StatusCode::PRECONDITION_FAILED,
            "remote storage is not configured",
@@ -52,9 +46,9 @@ pub(in crate::http) async fn download_extension(

        remote_extensions.get_ext(
            &filename,
-            params.is_library,
-            &compute.build_tag,
-            &compute.pgversion,
+            ext_server_params.is_library,
+            &compute.params.build_tag,
+            &compute.params.pgversion,
        )
    };

--- a/compute_tools/src/http/routes/extensions.rs
+++ b/compute_tools/src/http/routes/extensions.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::ExtensionInstallRequest,
-    responses::{ComputeStatus, ExtensionInstallResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::ExtensionInstallRequest;
+use compute_api::responses::{ComputeStatus, ExtensionInstallResponse};
 use http::StatusCode;

-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Install a extension.
 pub(in crate::http) async fn install_extension(
--- a/compute_tools/src/http/routes/failpoints.rs
+++ b/compute_tools/src/http/routes/failpoints.rs
@@ -17,7 +17,8 @@ pub struct FailpointConfig {
    pub actions: String,
 }

-use crate::http::{extract::Json, JsonResponse};
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Configure failpoints for testing purposes.
 pub(in crate::http) async fn configure_failpoints(
--- a/compute_tools/src/http/routes/grants.rs
+++ b/compute_tools/src/http/routes/grants.rs
@@ -1,16 +1,14 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
-use compute_api::{
-    requests::SetRoleGrantsRequest,
-    responses::{ComputeStatus, SetRoleGrantsResponse},
-};
+use axum::extract::State;
+use axum::response::Response;
+use compute_api::requests::SetRoleGrantsRequest;
+use compute_api::responses::{ComputeStatus, SetRoleGrantsResponse};
 use http::StatusCode;

-use crate::{
-    compute::ComputeNode,
-    http::{extract::Json, JsonResponse},
-};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;
+use crate::http::extract::Json;

 /// Add grants for a role.
 pub(in crate::http) async fn add_grant(
--- a/compute_tools/src/http/routes/insights.rs
+++ b/compute_tools/src/http/routes/insights.rs
@@ -1,10 +1,12 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Collect current Postgres usage insights.
 pub(in crate::http) async fn get_insights(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/metrics.rs
+++ b/compute_tools/src/http/routes/metrics.rs
@@ -1,10 +1,12 @@
-use axum::{body::Body, response::Response};
-use http::header::CONTENT_TYPE;
+use axum::body::Body;
+use axum::response::Response;
 use http::StatusCode;
+use http::header::CONTENT_TYPE;
 use metrics::proto::MetricFamily;
 use metrics::{Encoder, TextEncoder};

-use crate::{http::JsonResponse, metrics::collect};
+use crate::http::JsonResponse;
+use crate::metrics::collect;

 /// Expose Prometheus metrics.
 pub(in crate::http) async fn get_metrics() -> Response {
--- a/compute_tools/src/http/routes/metrics_json.rs
+++ b/compute_tools/src/http/routes/metrics_json.rs
@@ -1,9 +1,11 @@
 use std::sync::Arc;

-use axum::{extract::State, response::Response};
+use axum::extract::State;
+use axum::response::Response;
 use http::StatusCode;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Get startup metrics.
 pub(in crate::http) async fn get_metrics(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/status.rs
+++ b/compute_tools/src/http/routes/status.rs
@@ -1,9 +1,13 @@
-use std::{ops::Deref, sync::Arc};
+use std::ops::Deref;
+use std::sync::Arc;

-use axum::{extract::State, http::StatusCode, response::Response};
+use axum::extract::State;
+use axum::http::StatusCode;
+use axum::response::Response;
 use compute_api::responses::ComputeStatusResponse;

-use crate::{compute::ComputeNode, http::JsonResponse};
+use crate::compute::ComputeNode;
+use crate::http::JsonResponse;

 /// Retrieve the state of the comute.
 pub(in crate::http) async fn get_status(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/routes/terminate.rs
+++ b/compute_tools/src/http/routes/terminate.rs
@@ -1,18 +1,14 @@
 use std::sync::Arc;

-use axum::{
-    extract::State,
-    response::{IntoResponse, Response},
-};
+use axum::extract::State;
+use axum::response::{IntoResponse, Response};
 use compute_api::responses::ComputeStatus;
 use http::StatusCode;
 use tokio::task;
 use tracing::info;

-use crate::{
-    compute::{forward_termination_signal, ComputeNode},
-    http::JsonResponse,
-};
+use crate::compute::{ComputeNode, forward_termination_signal};
+use crate::http::JsonResponse;

 /// Terminate the compute.
 pub(in crate::http) async fn terminate(State(compute): State<Arc<ComputeNode>>) -> Response {
--- a/compute_tools/src/http/server.rs
+++ b/compute_tools/src/http/server.rs
@@ -1,23 +1,20 @@
-use std::{
-    fmt::Display,
-    net::{IpAddr, Ipv6Addr, SocketAddr},
-    sync::Arc,
-    time::Duration,
-};
+use std::fmt::Display;
+use std::net::{IpAddr, Ipv6Addr, SocketAddr};
+use std::sync::Arc;
+use std::time::Duration;

 use anyhow::Result;
-use axum::{
-    extract::Request,
-    middleware::{self, Next},
-    response::{IntoResponse, Response},
-    routing::{get, post},
-    Router,
-};
+use axum::Router;
+use axum::extract::Request;
+use axum::middleware::{self, Next};
+use axum::response::{IntoResponse, Response};
+use axum::routing::{get, post};
 use http::StatusCode;
 use tokio::net::TcpListener;
 use tower::ServiceBuilder;
-use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer};
-use tracing::{debug, error, info, Span};
+use tower_http::request_id::PropagateRequestIdLayer;
+use tower_http::trace::TraceLayer;
+use tracing::{Span, debug, error, info};
 use uuid::Uuid;

 use super::routes::{
@@ -124,6 +121,7 @@ impl From<Server> for Router<Arc<ComputeNode>> {
                )
                .layer(PropagateRequestIdLayer::x_request_id()),
        )
+            .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO))
    }
 }

--- a/compute_tools/src/installed_extensions.rs
+++ b/compute_tools/src/installed_extensions.rs
@@ -1,7 +1,7 @@
-use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use std::collections::HashMap;

 use anyhow::Result;
+use compute_api::responses::{InstalledExtension, InstalledExtensions};
 use postgres::{Client, NoTls};

 use crate::metrics::INSTALLED_EXTENSIONS;
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -1,3 +1,5 @@
+use std::collections::HashMap;
+use tracing::info;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::prelude::*;

@@ -42,3 +44,50 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result
 pub fn inlinify(s: &str) -> String {
    s.replace('\n', "\u{200B}")
 }
+
+pub fn startup_context_from_env() -> Option<opentelemetry::Context> {
+    // Extract OpenTelemetry context for the startup actions from the
+    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
+    // tracing context.
+    //
+    // This is used to propagate the context for the 'start_compute' operation
+    // from the neon control plane. This allows linking together the wider
+    // 'start_compute' operation that creates the compute container, with the
+    // startup actions here within the container.
+    //
+    // There is no standard for passing context in env variables, but a lot of
+    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
+    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
+    //
+    // Switch to the startup context here, and exit it once the startup has
+    // completed and Postgres is up and running.
+    //
+    // If this pod is pre-created without binding it to any particular endpoint
+    // yet, this isn't the right place to enter the startup context. In that
+    // case, the control plane should pass the tracing context as part of the
+    // /configure API call.
+    //
+    // NOTE: This is supposed to only cover the *startup* actions. Once
+    // postgres is configured and up-and-running, we exit this span. Any other
+    // actions that are performed on incoming HTTP requests, for example, are
+    // performed in separate spans.
+    //
+    // XXX: If the pod is restarted, we perform the startup actions in the same
+    // context as the original startup actions, which probably doesn't make
+    // sense.
+    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
+    if let Ok(val) = std::env::var("TRACEPARENT") {
+        startup_tracing_carrier.insert("traceparent".to_string(), val);
+    }
+    if let Ok(val) = std::env::var("TRACESTATE") {
+        startup_tracing_carrier.insert("tracestate".to_string(), val);
+    }
+    if !startup_tracing_carrier.is_empty() {
+        use opentelemetry::propagation::TextMapPropagator;
+        use opentelemetry_sdk::propagation::TraceContextPropagator;
+        info!("got startup tracing context from env variables");
+        Some(TraceContextPropagator::new().extract(&startup_tracing_carrier))
+    } else {
+        None
+    }
+}
--- a/compute_tools/src/lsn_lease.rs
+++ b/compute_tools/src/lsn_lease.rs
@@ -1,17 +1,15 @@
-use anyhow::bail;
-use anyhow::Result;
-use postgres::{NoTls, SimpleQueryMessage};
-use std::time::SystemTime;
-use std::{str::FromStr, sync::Arc, thread, time::Duration};
-use utils::id::TenantId;
-use utils::id::TimelineId;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::thread;
+use std::time::{Duration, SystemTime};

+use anyhow::{Result, bail};
 use compute_api::spec::ComputeMode;
+use postgres::{NoTls, SimpleQueryMessage};
 use tracing::{info, warn};
-use utils::{
-    lsn::Lsn,
-    shard::{ShardCount, ShardNumber, TenantShardId},
-};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+use utils::shard::{ShardCount, ShardNumber, TenantShardId};

 use crate::compute::ComputeNode;

--- a/compute_tools/src/metrics.rs
+++ b/compute_tools/src/metrics.rs
@@ -1,6 +1,6 @@
 use metrics::core::Collector;
 use metrics::proto::MetricFamily;
-use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec};
+use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec};
 use once_cell::sync::Lazy;

 pub(crate) static INSTALLED_EXTENSIONS: Lazy<UIntGaugeVec> = Lazy::new(|| {
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,13 +1,14 @@
 use std::sync::Arc;
-use std::{thread, time::Duration};
+use std::thread;
+use std::time::Duration;

 use chrono::{DateTime, Utc};
+use compute_api::responses::ComputeStatus;
+use compute_api::spec::ComputeFeature;
 use postgres::{Client, NoTls};
 use tracing::{debug, error, info, warn};

 use crate::compute::ComputeNode;
-use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeFeature;

 const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);

@@ -17,7 +18,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
 // should be handled gracefully.
 fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let connstr = compute.connstr.clone();
+    let connstr = compute.params.connstr.clone();
    let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor"));

    // During startup and configuration we connect to every Postgres database,
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -9,7 +9,8 @@ use std::process::Child;
 use std::str::FromStr;
 use std::time::{Duration, Instant};

-use anyhow::{bail, Result};
+use anyhow::{Result, bail};
+use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
 use futures::StreamExt;
 use ini::Ini;
 use notify::{RecursiveMode, Watcher};
@@ -21,8 +22,6 @@ use tokio_postgres;
 use tokio_postgres::NoTls;
 use tracing::{debug, error, info, instrument};

-use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
-
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

 /// Escape a string for including it in a SQL literal.
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,20 +1,20 @@
-use anyhow::{anyhow, bail, Result};
-use reqwest::StatusCode;
 use std::fs::File;
 use std::path::Path;
-use tokio_postgres::Client;
-use tracing::{error, info, instrument, warn};
-
-use crate::config;
-use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS};
-use crate::migration::MigrationRunner;
-use crate::params::PG_HBA_ALL_MD5;
-use crate::pg_helpers::*;

+use anyhow::{Result, anyhow, bail};
 use compute_api::responses::{
    ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse,
 };
 use compute_api::spec::ComputeSpec;
+use reqwest::StatusCode;
+use tokio_postgres::Client;
+use tracing::{error, info, instrument, warn};
+
+use crate::config;
+use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS};
+use crate::migration::MigrationRunner;
+use crate::params::PG_HBA_ALL_MD5;
+use crate::pg_helpers::*;

 // Do control plane request and return response if any. In case of error it
 // returns a bool flag indicating whether it makes sense to retry the request
@@ -141,7 +141,6 @@ pub fn get_spec_from_control_plane(
 /// Check `pg_hba.conf` and update if needed to allow external connections.
 pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
    // XXX: consider making it a part of spec.json
-    info!("checking pg_hba.conf");
    let pghba_path = pgdata_path.join("pg_hba.conf");

    if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? {
@@ -156,12 +155,11 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
 /// Create a standby.signal file
 pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
    // XXX: consider making it a part of spec.json
-    info!("adding standby.signal");
    let signalfile = pgdata_path.join("standby.signal");

    if !signalfile.exists() {
-        info!("created standby.signal");
        File::create(signalfile)?;
+        info!("created standby.signal");
    } else {
        info!("reused pre-existing standby.signal");
    }
@@ -170,7 +168,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {

 #[instrument(skip_all)]
 pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
-    info!("handle neon extension upgrade");
    let query = "ALTER EXTENSION neon UPDATE";
    info!("update neon extension version with query: {}", query);
    client.simple_query(query).await?;
--- a/compute_tools/src/spec_apply.rs
+++ b/compute_tools/src/spec_apply.rs
@@ -1,18 +1,416 @@
 use std::collections::{HashMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use std::future::Future;
-use std::iter::empty;
-use std::iter::once;
+use std::iter::{empty, once};
 use std::sync::Arc;

-use crate::compute::construct_superuser_query;
-use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt};
-use anyhow::{bail, Result};
+use anyhow::{Context, Result};
+use compute_api::responses::ComputeStatus;
 use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role};
 use futures::future::join_all;
 use tokio::sync::RwLock;
 use tokio_postgres::Client;
-use tracing::{debug, info_span, Instrument};
+use tokio_postgres::error::SqlState;
+use tracing::{Instrument, debug, error, info, info_span, instrument, warn};
+
+use crate::compute::{ComputeNode, ComputeState, construct_superuser_query};
+use crate::pg_helpers::{
+    DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async,
+    get_existing_roles_async,
+};
+use crate::spec_apply::ApplySpecPhase::{
+    CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon,
+    CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions,
+    HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles,
+    RunInEachDatabase,
+};
+use crate::spec_apply::PerDatabasePhase::{
+    ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension,
+};
+
+impl ComputeNode {
+    /// Apply the spec to the running PostgreSQL instance.
+    /// The caller can decide to run with multiple clients in parallel, or
+    /// single mode.  Either way, the commands executed will be the same, and
+    /// only commands run in different databases are parallelized.
+    #[instrument(skip_all)]
+    pub fn apply_spec_sql(
+        &self,
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        concurrency: usize,
+    ) -> Result<()> {
+        info!("Applying config with max {} concurrency", concurrency);
+        debug!("Config: {:?}", spec);
+
+        let rt = tokio::runtime::Handle::current();
+        rt.block_on(async {
+            // Proceed with post-startup configuration. Note, that order of operations is important.
+            let client = Self::get_maintenance_client(&conf).await?;
+            let spec = spec.clone();
+
+            let databases = get_existing_dbs_async(&client).await?;
+            let roles = get_existing_roles_async(&client)
+                .await?
+                .into_iter()
+                .map(|role| (role.name.clone(), role))
+                .collect::<HashMap<String, Role>>();
+
+            // Check if we need to drop subscriptions before starting the endpoint.
+            //
+            // It is important to do this operation exactly once when endpoint starts on a new branch.
+            // Otherwise, we may drop not inherited, but newly created subscriptions.
+            //
+            // We cannot rely only on spec.drop_subscriptions_before_start flag,
+            // because if for some reason compute restarts inside VM,
+            // it will start again with the same spec and flag value.
+            //
+            // To handle this, we save the fact of the operation in the database
+            // in the neon.drop_subscriptions_done table.
+            // If the table does not exist, we assume that the operation was never performed, so we must do it.
+            // If table exists, we check if the operation was performed on the current timelilne.
+            //
+            let mut drop_subscriptions_done = false;
+
+            if spec.drop_subscriptions_before_start {
+                let timeline_id = self.get_timeline_id().context("timeline_id must be set")?;
+                let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id);
+
+                info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id);
+
+                drop_subscriptions_done =  match
+                    client.simple_query(&query).await {
+                    Ok(result) => {
+                        matches!(&result[0], postgres::SimpleQueryMessage::Row(_))
+                    },
+                    Err(e) =>
+                    {
+                        match e.code() {
+                            Some(&SqlState::UNDEFINED_TABLE) => false,
+                            _ => {
+                                // We don't expect any other error here, except for the schema/table not existing
+                                error!("Error checking if drop subscription operation was already performed: {}", e);
+                                return Err(e.into());
+                            }
+                        }
+                    }
+                }
+            };
+
+
+            let jwks_roles = Arc::new(
+                spec.as_ref()
+                    .local_proxy_config
+                    .iter()
+                    .flat_map(|it| &it.jwks)
+                    .flatten()
+                    .flat_map(|setting| &setting.role_names)
+                    .cloned()
+                    .collect::<HashSet<_>>(),
+            );
+
+            let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext {
+                roles,
+                dbs: databases,
+            }));
+
+            // Apply special pre drop database phase.
+            // NOTE: we use the code of RunInEachDatabase phase for parallelism
+            // and connection management, but we don't really run it in *each* database,
+            // only in databases, we're about to drop.
+            info!("Applying PerDatabase (pre-dropdb) phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            // Run the phase for each database that we're about to drop.
+            let db_processes = spec
+                .delta_operations
+                .iter()
+                .flatten()
+                .filter_map(move |op| {
+                    if op.action.as_str() == "delete_db" {
+                        Some(op.name.clone())
+                    } else {
+                        None
+                    }
+                })
+                .map(|dbname| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    // We only need dbname field for this phase, so set other fields to dummy values
+                    let db = DB::UserDB(Database {
+                        name: dbname.clone(),
+                        owner: "cloud_admin".to_string(),
+                        options: None,
+                        restrict_conn: false,
+                        invalid: false,
+                    });
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        [DropLogicalSubscriptions].to_vec(),
+                    );
+
+                    Ok(tokio::spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                if let Err(e) = handle.await? {
+                    // Handle the error case where the database does not exist
+                    // We do not check whether the DB exists or not in the deletion phase,
+                    // so we shouldn't be strict about it in pre-deletion cleanup as well.
+                    if e.to_string().contains("does not exist") {
+                        warn!("Error dropping subscription: {}", e);
+                    } else {
+                        return Err(e);
+                    }
+                };
+            }
+
+            for phase in [
+                CreateSuperUser,
+                DropInvalidDatabases,
+                RenameRoles,
+                CreateAndAlterRoles,
+                RenameAndDeleteDatabases,
+                CreateAndAlterDatabases,
+                CreateSchemaNeon,
+            ] {
+                info!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            info!("Applying RunInEachDatabase2 phase");
+            let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency));
+
+            let db_processes = spec
+                .cluster
+                .databases
+                .iter()
+                .map(|db| DB::new(db.clone()))
+                // include
+                .chain(once(DB::SystemDB))
+                .map(|db| {
+                    let spec = spec.clone();
+                    let ctx = ctx.clone();
+                    let jwks_roles = jwks_roles.clone();
+                    let mut conf = conf.as_ref().clone();
+                    let concurrency_token = concurrency_token.clone();
+                    let db = db.clone();
+
+                    debug!("Applying per-database phases for Database {:?}", &db);
+
+                    match &db {
+                        DB::SystemDB => {}
+                        DB::UserDB(db) => {
+                            conf.dbname(db.name.as_str());
+                        }
+                    }
+
+                    let conf = Arc::new(conf);
+                    let mut phases = vec![
+                        DeleteDBRoleReferences,
+                        ChangeSchemaPerms,
+                        HandleAnonExtension,
+                    ];
+
+                    if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                        info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                        phases.push(DropLogicalSubscriptions);
+                    }
+
+                    let fut = Self::apply_spec_sql_db(
+                        spec.clone(),
+                        conf,
+                        ctx.clone(),
+                        jwks_roles.clone(),
+                        concurrency_token.clone(),
+                        db,
+                        phases,
+                    );
+
+                    Ok(tokio::spawn(fut))
+                })
+                .collect::<Vec<Result<_, anyhow::Error>>>();
+
+            for process in db_processes.into_iter() {
+                let handle = process?;
+                handle.await??;
+            }
+
+            let mut phases = vec![
+                HandleOtherExtensions,
+                HandleNeonExtension, // This step depends on CreateSchemaNeon
+                CreateAvailabilityCheck,
+                DropRoles,
+            ];
+
+            // This step depends on CreateSchemaNeon
+            if spec.drop_subscriptions_before_start && !drop_subscriptions_done {
+                info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set");
+                phases.push(FinalizeDropLogicalSubscriptions);
+            }
+
+            for phase in phases {
+                debug!("Applying phase {:?}", &phase);
+                apply_operations(
+                    spec.clone(),
+                    ctx.clone(),
+                    jwks_roles.clone(),
+                    phase,
+                    || async { Ok(&client) },
+                )
+                .await?;
+            }
+
+            Ok::<(), anyhow::Error>(())
+        })?;
+
+        Ok(())
+    }
+
+    /// Apply SQL migrations of the RunInEachDatabase phase.
+    ///
+    /// May opt to not connect to databases that don't have any scheduled
+    /// operations.  The function is concurrency-controlled with the provided
+    /// semaphore.  The caller has to make sure the semaphore isn't exhausted.
+    async fn apply_spec_sql_db(
+        spec: Arc<ComputeSpec>,
+        conf: Arc<tokio_postgres::Config>,
+        ctx: Arc<tokio::sync::RwLock<MutableApplyContext>>,
+        jwks_roles: Arc<HashSet<String>>,
+        concurrency_token: Arc<tokio::sync::Semaphore>,
+        db: DB,
+        subphases: Vec<PerDatabasePhase>,
+    ) -> Result<()> {
+        let _permit = concurrency_token.acquire().await?;
+
+        let mut client_conn = None;
+
+        for subphase in subphases {
+            apply_operations(
+                spec.clone(),
+                ctx.clone(),
+                jwks_roles.clone(),
+                RunInEachDatabase {
+                    db: db.clone(),
+                    subphase,
+                },
+                // Only connect if apply_operation actually wants a connection.
+                // It's quite possible this database doesn't need any queries,
+                // so by not connecting we save time and effort connecting to
+                // that database.
+                || async {
+                    if client_conn.is_none() {
+                        let db_client = Self::get_maintenance_client(&conf).await?;
+                        client_conn.replace(db_client);
+                    }
+                    let client = client_conn.as_ref().unwrap();
+                    Ok(client)
+                },
+            )
+            .await?;
+        }
+
+        drop(client_conn);
+
+        Ok::<(), anyhow::Error>(())
+    }
+
+    /// Choose how many concurrent connections to use for applying the spec changes.
+    pub fn max_service_connections(
+        &self,
+        compute_state: &ComputeState,
+        spec: &ComputeSpec,
+    ) -> usize {
+        // If the cluster is in Init state we don't have to deal with user connections,
+        // and can thus use all `max_connections` connection slots. However, that's generally not
+        // very efficient, so we generally still limit it to a smaller number.
+        if compute_state.status == ComputeStatus::Init {
+            // If the settings contain 'max_connections', use that as template
+            if let Some(config) = spec.cluster.settings.find("max_connections") {
+                config.parse::<usize>().ok()
+            } else {
+                // Otherwise, try to find the setting in the postgresql_conf string
+                spec.cluster
+                    .postgresql_conf
+                    .iter()
+                    .flat_map(|conf| conf.split("\n"))
+                    .filter_map(|line| {
+                        if !line.contains("max_connections") {
+                            return None;
+                        }
+
+                        let (key, value) = line.split_once("=")?;
+                        let key = key
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        let value = value
+                            .trim_start_matches(char::is_whitespace)
+                            .trim_end_matches(char::is_whitespace);
+
+                        if key != "max_connections" {
+                            return None;
+                        }
+
+                        value.parse::<usize>().ok()
+                    })
+                    .next()
+            }
+            // If max_connections is present, use at most 1/3rd of that.
+            // When max_connections is lower than 30, try to use at least 10 connections, but
+            // never more than max_connections.
+            .map(|limit| match limit {
+                0..10 => limit,
+                10..30 => 10,
+                30.. => limit / 3,
+            })
+            // If we didn't find max_connections, default to 10 concurrent connections.
+            .unwrap_or(10)
+        } else {
+            // state == Running
+            // Because the cluster is already in the Running state, we should assume users are
+            // already connected to the cluster, and high concurrency could negatively
+            // impact user connectivity. Therefore, we can limit concurrency to the number of
+            // reserved superuser connections, which users wouldn't be able to use anyway.
+            spec.cluster
+                .settings
+                .find("superuser_reserved_connections")
+                .iter()
+                .filter_map(|val| val.parse::<usize>().ok())
+                .map(|val| if val > 1 { val - 1 } else { 1 })
+                .last()
+                .unwrap_or(3)
+        }
+    }
+}

 #[derive(Clone)]
 pub enum DB {
@@ -47,6 +445,11 @@ pub enum PerDatabasePhase {
    DeleteDBRoleReferences,
    ChangeSchemaPerms,
    HandleAnonExtension,
+    /// This is a shared phase, used for both i) dropping dangling LR subscriptions
+    /// before dropping the DB, and ii) dropping all subscriptions after creating
+    /// a fresh branch.
+    /// N.B. we will skip all DBs that are not present in Postgres, invalid, or
+    /// have `datallowconn = false` (`restrict_conn`).
    DropLogicalSubscriptions,
 }

@@ -168,7 +571,7 @@ where
 ///
 /// In the future we may generate a single stream of changes and then
 /// sort/merge/batch execution, but for now this is a nice way to improve
-/// batching behaviour of the commands.
+/// batching behavior of the commands.
 async fn get_operations<'a>(
    spec: &'a ComputeSpec,
    ctx: &'a RwLock<MutableApplyContext>,
@@ -451,6 +854,41 @@ async fn get_operations<'a>(
            )),
        }))),
        ApplySpecPhase::RunInEachDatabase { db, subphase } => {
+            // Do some checks that user DB exists and we can access it.
+            //
+            // During the phases like DropLogicalSubscriptions, DeleteDBRoleReferences,
+            // which happen before dropping the DB, the current run could be a retry,
+            // so it's a valid case when DB is absent already. The case of
+            // `pg_database.datallowconn = false`/`restrict_conn` is a bit tricky, as
+            // in theory user can have some dangling objects there, so we will fail at
+            // the actual drop later. Yet, to fix that in the current code we would need
+            // to ALTER DATABASE, and then check back, but that even more invasive, so
+            // that's not what we really want to do here.
+            //
+            // For ChangeSchemaPerms, skipping DBs we cannot access is totally fine.
+            if let DB::UserDB(db) = db {
+                let databases = &ctx.read().await.dbs;
+
+                let edb = match databases.get(&db.name) {
+                    Some(edb) => edb,
+                    None => {
+                        warn!(
+                            "skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL",
+                            subphase, db.name
+                        );
+                        return Ok(Box::new(empty()));
+                    }
+                };
+
+                if edb.restrict_conn || edb.invalid {
+                    warn!(
+                        "skipping RunInEachDatabase phase {:?}, database {} is (restrict_conn={}, invalid={})",
+                        subphase, db.name, edb.restrict_conn, edb.invalid
+                    );
+                    return Ok(Box::new(empty()));
+                }
+            }
+
            match subphase {
                PerDatabasePhase::DropLogicalSubscriptions => {
                    match &db {
@@ -530,25 +968,12 @@ async fn get_operations<'a>(
                    Ok(Box::new(operations))
                }
                PerDatabasePhase::ChangeSchemaPerms => {
-                    let ctx = ctx.read().await;
-                    let databases = &ctx.dbs;
-
                    let db = match &db {
                        // ignore schema permissions on the system database
                        DB::SystemDB => return Ok(Box::new(empty())),
                        DB::UserDB(db) => db,
                    };

-                    if databases.get(&db.name).is_none() {
-                        bail!("database {} doesn't exist in PostgreSQL", db.name);
-                    }
-
-                    let edb = databases.get(&db.name).unwrap();
-
-                    if edb.restrict_conn || edb.invalid {
-                        return Ok(Box::new(empty()));
-                    }
-
                    let operations = vec![
                        Operation {
                            query: format!(
@@ -566,6 +991,7 @@ async fn get_operations<'a>(

                    Ok(Box::new(operations))
                }
+                // TODO: remove this completely https://github.com/neondatabase/cloud/issues/22663
                PerDatabasePhase::HandleAnonExtension => {
                    // Only install Anon into user databases
                    let db = match &db {
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,10 +1,11 @@
 use std::path::Path;

-use anyhow::{anyhow, Context};
-use tracing::warn;
+use anyhow::{Context, anyhow};
+use tracing::{instrument, warn};

 pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";

+#[instrument]
 pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
    // run `/neonvm/bin/resize-swap --once {size_bytes}`
    //
--- a/compute_tools/tests/config_test.rs
+++ b/compute_tools/tests/config_test.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 mod config_tests {

-    use std::fs::{remove_file, File};
+    use std::fs::{File, remove_file};
    use std::io::{Read, Write};
    use std::path::Path;

--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -25,7 +25,7 @@ use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use nix::errno::Errno;
 use nix::fcntl::{FcntlArg, FdFlag};
-use nix::sys::signal::{kill, Signal};
+use nix::sys::signal::{Signal, kill};
 use nix::unistd::Pid;
 use utils::pid_file::{self, PidFileRead};

--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -5,7 +5,16 @@
 //! easier to work with locally. The python tests in `test_runner`
 //! rely on `neon_local` to set up the environment for each test.
 //!
-use anyhow::{anyhow, bail, Context, Result};
+use std::borrow::Cow;
+use std::collections::{BTreeSet, HashMap};
+use std::fs::File;
+use std::os::fd::AsRawFd;
+use std::path::PathBuf;
+use std::process::exit;
+use std::str::FromStr;
+use std::time::Duration;
+
+use anyhow::{Context, Result, anyhow, bail};
 use clap::Parser;
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
@@ -19,7 +28,7 @@ use control_plane::storage_controller::{
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
-use nix::fcntl::{flock, FlockArg};
+use nix::fcntl::{FlockArg, flock};
 use pageserver_api::config::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
@@ -31,27 +40,18 @@ use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInf
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
+use safekeeper_api::membership::SafekeeperGeneration;
 use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
 };
-use std::borrow::Cow;
-use std::collections::{BTreeSet, HashMap};
-use std::fs::File;
-use std::os::fd::AsRawFd;
-use std::path::PathBuf;
-use std::process::exit;
-use std::str::FromStr;
-use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
 use tokio::task::JoinSet;
 use url::Host;
-use utils::{
-    auth::{Claims, Scope},
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-    project_git_version,
-};
+use utils::auth::{Claims, Scope};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};
+use utils::lsn::Lsn;
+use utils::project_git_version;

 // Default id of a safekeeper node, if not specified on the command line.
 const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1);
@@ -597,7 +597,15 @@ struct EndpointStartCmdArgs {
    #[clap(long = "pageserver-id")]
    endpoint_pageserver_id: Option<NodeId>,

-    #[clap(long)]
+    #[clap(
+        long,
+        help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations."
+    )]
+    safekeepers_generation: Option<u32>,
+    #[clap(
+        long,
+        help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override."
+    )]
    safekeepers: Option<String>,

    #[clap(
@@ -618,9 +626,9 @@ struct EndpointStartCmdArgs {
    )]
    allow_multiple: bool,

-    #[clap(short = 't', long, help = "timeout until we fail the command")]
-    #[arg(default_value = "10s")]
-    start_timeout: humantime::Duration,
+    #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")]
+    #[arg(default_value = "90s")]
+    start_timeout: Duration,
 }

 #[derive(clap::Args)]
@@ -887,20 +895,6 @@ fn print_timeline(
    Ok(())
 }

-/// Returns a map of timeline IDs to timeline_id@lsn strings.
-/// Connects to the pageserver to query this information.
-async fn get_timeline_infos(
-    env: &local_env::LocalEnv,
-    tenant_shard_id: &TenantShardId,
-) -> Result<HashMap<TimelineId, TimelineInfo>> {
-    Ok(get_default_pageserver(env)
-        .timeline_list(tenant_shard_id)
-        .await?
-        .into_iter()
-        .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
-        .collect())
-}
-
 /// Helper function to get tenant id from an optional --tenant_id option or from the config file
 fn get_tenant_id(
    tenant_id_arg: Option<TenantId>,
@@ -935,7 +929,9 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result<LocalEnv> {
    let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config {
        // User (likely the Python test suite) provided a description of the environment.
        if args.num_pageservers.is_some() {
-            bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
+            bail!(
+                "Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead"
+            );
        }
        // load and parse the file
        let contents = std::fs::read_to_string(config_path).with_context(|| {
@@ -1251,12 +1247,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller
            // where shard 0 is attached, and query there.
            let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?;
-            let timeline_infos = get_timeline_infos(env, &tenant_shard_id)
-                .await
-                .unwrap_or_else(|e| {
-                    eprintln!("Failed to load timeline info: {}", e);
-                    HashMap::new()
-                });

            let timeline_name_mappings = env.timeline_name_mappings();

@@ -1285,12 +1275,9 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
                        lsn.to_string()
                    }
                    _ => {
-                        // -> primary endpoint or hot replica
-                        // Use the LSN at the end of the timeline.
-                        timeline_infos
-                            .get(&endpoint.timeline_id)
-                            .map(|bi| bi.last_record_lsn.to_string())
-                            .unwrap_or_else(|| "?".to_string())
+                        // As the LSN here refers to the one that the compute is started with,
+                        // we display nothing as it is a primary/hot standby compute.
+                        "---".to_string()
                    }
                };

@@ -1338,10 +1325,14 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res

            match (mode, args.hot_standby) {
                (ComputeMode::Static(_), true) => {
-                    bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
+                    bail!(
+                        "Cannot start a node in hot standby mode when it is already configured as a static replica"
+                    )
                }
                (ComputeMode::Primary, true) => {
-                    bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
+                    bail!(
+                        "Cannot start a node as a hot standby replica, it is already configured as primary node"
+                    )
                }
                _ => {}
            }
@@ -1368,6 +1359,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            let pageserver_id = args.endpoint_pageserver_id;
            let remote_ext_config = &args.remote_ext_config;

+            let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new);
            // If --safekeepers argument is given, use only the listed
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? {
@@ -1443,11 +1435,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res
            endpoint
                .start(
                    &auth_token,
+                    safekeepers_generation,
                    safekeepers,
                    pageservers,
                    remote_ext_config.as_ref(),
                    stripe_size.0 as usize,
                    args.create_test_user,
+                    args.start_timeout,
                )
                .await?;
        }
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -8,7 +8,6 @@
 use std::time::Duration;

 use anyhow::Context;
-
 use camino::Utf8PathBuf;

 use crate::{background_process, local_env};
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -37,29 +37,23 @@
 //! ```
 //!
 use std::collections::BTreeMap;
-use std::net::IpAddr;
-use std::net::Ipv4Addr;
-use std::net::SocketAddr;
-use std::net::TcpStream;
+use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream};
 use std::path::PathBuf;
 use std::process::Command;
 use std::str::FromStr;
 use std::sync::Arc;
-use std::time::Duration;
-use std::time::SystemTime;
-use std::time::UNIX_EPOCH;
+use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};

-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{Context, Result, anyhow, bail};
 use compute_api::requests::ConfigurationRequest;
-use compute_api::responses::ComputeCtlConfig;
-use compute_api::spec::Database;
-use compute_api::spec::PgIdent;
-use compute_api::spec::RemoteExtSpec;
-use compute_api::spec::Role;
-use nix::sys::signal::kill;
-use nix::sys::signal::Signal;
+use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse};
+use compute_api::spec::{
+    Cluster, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, RemoteExtSpec, Role,
+};
+use nix::sys::signal::{Signal, kill};
 use pageserver_api::shard::ShardStripeSize;
 use reqwest::header::CONTENT_TYPE;
+use safekeeper_api::membership::SafekeeperGeneration;
 use serde::{Deserialize, Serialize};
 use tracing::debug;
 use url::Host;
@@ -69,9 +63,6 @@ use crate::local_env::LocalEnv;
 use crate::postgresql_conf::PostgresConf;
 use crate::storage_controller::StorageController;

-use compute_api::responses::{ComputeStatus, ComputeStatusResponse};
-use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec};
-
 // contents of a endpoint.json file
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct EndpointConf {
@@ -237,7 +228,9 @@ impl ComputeControlPlane {
            });

            if let Some((key, _)) = duplicates.next() {
-                bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported.");
+                bail!(
+                    "attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported."
+                );
            }
        }
        Ok(())
@@ -584,14 +577,17 @@ impl Endpoint {
        Ok(safekeeper_connstrings)
    }

+    #[allow(clippy::too_many_arguments)]
    pub async fn start(
        &self,
        auth_token: &Option<String>,
+        safekeepers_generation: Option<SafekeeperGeneration>,
        safekeepers: Vec<NodeId>,
        pageservers: Vec<(Host, u16)>,
        remote_ext_config: Option<&String>,
        shard_stripe_size: usize,
        create_test_user: bool,
+        start_timeout: Duration,
    ) -> Result<()> {
        if self.status() == EndpointStatus::Running {
            anyhow::bail!("The endpoint is already running");
@@ -663,6 +659,7 @@ impl Endpoint {
            timeline_id: Some(self.timeline_id),
            mode: self.mode,
            pageserver_connstring: Some(pageserver_connstring),
+            safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()),
            safekeeper_connstrings,
            storage_auth_token: auth_token.clone(),
            remote_extensions,
@@ -778,17 +775,18 @@ impl Endpoint {
        std::fs::write(pidfile_path, pid.to_string())?;

        // Wait for it to start
-        let mut attempt = 0;
        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
-        const MAX_ATTEMPTS: u32 = 10 * 90; // Wait up to 1.5 min
+        let start_at = Instant::now();
        loop {
-            attempt += 1;
            match self.get_status().await {
                Ok(state) => {
                    match state.status {
                        ComputeStatus::Init => {
-                            if attempt == MAX_ATTEMPTS {
-                                bail!("compute startup timed out; still in Init state");
+                            if Instant::now().duration_since(start_at) > start_timeout {
+                                bail!(
+                                    "compute startup timed out {:?}; still in Init state",
+                                    start_timeout
+                                );
                            }
                            // keep retrying
                        }
@@ -815,8 +813,11 @@ impl Endpoint {
                    }
                }
                Err(e) => {
-                    if attempt == MAX_ATTEMPTS {
-                        return Err(e).context("timed out waiting to connect to compute_ctl HTTP");
+                    if Instant::now().duration_since(start_at) > start_timeout {
+                        return Err(e).context(format!(
+                            "timed out {:?} waiting to connect to compute_ctl HTTP",
+                            start_timeout,
+                        ));
                    }
                }
            }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -3,28 +3,22 @@
 //! Now it also provides init method which acts like a stub for proper installation
 //! script which will use local paths.

-use anyhow::{bail, Context};
+use std::collections::HashMap;
+use std::net::{IpAddr, Ipv4Addr, SocketAddr};
+use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
+use std::time::Duration;
+use std::{env, fs};

+use anyhow::{Context, bail};
 use clap::ValueEnum;
 use postgres_backend::AuthType;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::env;
-use std::fs;
-use std::net::IpAddr;
-use std::net::Ipv4Addr;
-use std::net::SocketAddr;
-use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
-use std::time::Duration;
-use utils::{
-    auth::{encode_from_key_file, Claims},
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-};
+use utils::auth::{Claims, encode_from_key_file};
+use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId};

-use crate::pageserver::PageServerNode;
-use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
+use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode};
 use crate::safekeeper::SafekeeperNode;

 pub const DEFAULT_PG_VERSION: u32 = 16;
@@ -171,6 +165,8 @@ pub struct NeonStorageControllerConf {

    #[serde(with = "humantime_serde")]
    pub long_reconcile_threshold: Option<Duration>,
+
+    pub load_safekeepers: bool,
 }

 impl NeonStorageControllerConf {
@@ -194,6 +190,7 @@ impl Default for NeonStorageControllerConf {
            max_secondary_lag_bytes: None,
            heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL,
            long_reconcile_threshold: None,
+            load_safekeepers: true,
        }
    }
 }
@@ -465,7 +462,9 @@ impl LocalEnv {
            if old_timeline_id == &timeline_id {
                Ok(())
            } else {
-                bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}");
+                bail!(
+                    "branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}"
+                );
            }
        } else {
            existing_values.push((tenant_id, timeline_id));
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -7,7 +7,6 @@
 //! ```
 //!
 use std::collections::HashMap;
-
 use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;
@@ -15,22 +14,19 @@ use std::path::PathBuf;
 use std::str::FromStr;
 use std::time::Duration;

-use anyhow::{bail, Context};
+use anyhow::{Context, bail};
 use camino::Utf8PathBuf;
 use pageserver_api::models::{self, TenantInfo, TimelineInfo};
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
-use postgres_connection::{parse_host_port, PgConnectionConfig};
+use postgres_connection::{PgConnectionConfig, parse_host_port};
 use utils::auth::{Claims, Scope};
-use utils::id::NodeId;
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use utils::id::{NodeId, TenantId, TimelineId};
+use utils::lsn::Lsn;

-use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
-use crate::{background_process, local_env::LocalEnv};
+use crate::background_process;
+use crate::local_env::{LocalEnv, NeonLocalInitPageserverConf, PageServerConf};

 /// Directory within .neon which will be used by default for LocalFs remote storage.
 pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver";
@@ -81,7 +77,11 @@ impl PageServerNode {
        &self,
        conf: NeonLocalInitPageserverConf,
    ) -> anyhow::Result<toml_edit::DocumentMut> {
-        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
+        assert_eq!(
+            &PageServerConf::from(&conf),
+            &self.conf,
+            "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully"
+        );

        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)

@@ -335,13 +335,21 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'checkpoint_distance' as an integer")?,
-            checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
+            checkpoint_timeout: settings
+                .remove("checkpoint_timeout")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'checkpoint_timeout' as duration")?,
            compaction_target_size: settings
                .remove("compaction_target_size")
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'compaction_target_size' as an integer")?,
-            compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
+            compaction_period: settings
+                .remove("compaction_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'compaction_period' as duration")?,
            compaction_threshold: settings
                .remove("compaction_threshold")
                .map(|x| x.parse::<usize>())
@@ -387,7 +395,10 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'gc_horizon' as an integer")?,
-            gc_period: settings.remove("gc_period").map(|x| x.to_string()),
+            gc_period: settings.remove("gc_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'gc_period' as duration")?,
            image_creation_threshold: settings
                .remove("image_creation_threshold")
                .map(|x| x.parse::<usize>())
@@ -403,13 +414,20 @@ impl PageServerNode {
                .map(|x| x.parse::<usize>())
                .transpose()
                .context("Failed to parse 'image_creation_preempt_threshold' as integer")?,
-            pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
+            pitr_interval: settings.remove("pitr_interval")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'pitr_interval' as duration")?,
            walreceiver_connect_timeout: settings
                .remove("walreceiver_connect_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'walreceiver_connect_timeout' as duration")?,
            lagging_wal_timeout: settings
                .remove("lagging_wal_timeout")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lagging_wal_timeout' as duration")?,
            max_lsn_wal_lag: settings
                .remove("max_lsn_wal_lag")
                .map(|x| x.parse::<NonZeroU64>())
@@ -427,8 +445,14 @@ impl PageServerNode {
                .context("Failed to parse 'min_resident_size_override' as integer")?,
            evictions_low_residence_duration_metric_threshold: settings
                .remove("evictions_low_residence_duration_metric_threshold")
-                .map(|x| x.to_string()),
-            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'evictions_low_residence_duration_metric_threshold' as duration")?,
+            heatmap_period: settings
+                .remove("heatmap_period")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'heatmap_period' as duration")?,
            lazy_slru_download: settings
                .remove("lazy_slru_download")
                .map(|x| x.parse::<bool>())
@@ -439,10 +463,15 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
+            lsn_lease_length: settings.remove("lsn_lease_length")
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length' as duration")?,
            lsn_lease_length_for_ts: settings
                .remove("lsn_lease_length_for_ts")
-                .map(|x| x.to_string()),
+                .map(humantime::parse_duration)
+                .transpose()
+                .context("Failed to parse 'lsn_lease_length_for_ts' as duration")?,
            timeline_offloading: settings
                .remove("timeline_offloading")
                .map(|x| x.parse::<bool>())
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -1,3 +1,6 @@
+use std::collections::HashMap;
+use std::fmt;
+
 ///
 /// Module for parsing postgresql.conf file.
 ///
@@ -6,8 +9,6 @@
 /// funny stuff like include-directives or funny escaping.
 use once_cell::sync::Lazy;
 use regex::Regex;
-use std::collections::HashMap;
-use std::fmt;

 /// In-memory representation of a postgresql.conf file
 #[derive(Default, Debug)]
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -14,18 +14,15 @@ use std::{io, result};

 use anyhow::Context;
 use camino::Utf8PathBuf;
+use http_utils::error::HttpErrorBody;
 use postgres_connection::PgConnectionConfig;
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-
-use http_utils::error::HttpErrorBody;
 use utils::auth::{Claims, Scope};
 use utils::id::NodeId;

-use crate::{
-    background_process,
-    local_env::{LocalEnv, SafekeeperConf},
-};
+use crate::background_process;
+use crate::local_env::{LocalEnv, SafekeeperConf};

 #[derive(Error, Debug)]
 pub enum SafekeeperHttpError {
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -1,44 +1,39 @@
-use crate::{
-    background_process,
-    local_env::{LocalEnv, NeonStorageControllerConf},
-};
+use std::ffi::OsStr;
+use std::fs;
+use std::net::SocketAddr;
+use std::path::PathBuf;
+use std::process::ExitStatus;
+use std::str::FromStr;
+use std::sync::OnceLock;
+use std::time::{Duration, Instant};
+
 use camino::{Utf8Path, Utf8PathBuf};
 use hyper0::Uri;
 use nix::unistd::Pid;
-use pageserver_api::{
-    controller_api::{
-        NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
-        TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
-        TenantShardMigrateResponse,
-    },
-    models::{
-        TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
-    },
-    shard::{ShardStripeSize, TenantShardId},
+use pageserver_api::controller_api::{
+    NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest,
+    TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest,
+    TenantShardMigrateResponse,
 };
+use pageserver_api::models::{
+    TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
+};
+use pageserver_api::shard::{ShardStripeSize, TenantShardId};
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
 use reqwest::Method;
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{
-    ffi::OsStr,
-    fs,
-    net::SocketAddr,
-    path::PathBuf,
-    process::ExitStatus,
-    str::FromStr,
-    sync::OnceLock,
-    time::{Duration, Instant},
-};
+use serde::de::DeserializeOwned;
+use serde::{Deserialize, Serialize};
 use tokio::process::Command;
 use tracing::instrument;
 use url::Url;
-use utils::{
-    auth::{encode_from_key_file, Claims, Scope},
-    id::{NodeId, TenantId},
-};
+use utils::auth::{Claims, Scope, encode_from_key_file};
+use utils::id::{NodeId, TenantId};
 use whoami::username;

+use crate::background_process;
+use crate::local_env::{LocalEnv, NeonStorageControllerConf};
+
 pub struct StorageController {
    env: LocalEnv,
    private_key: Option<Vec<u8>>,
@@ -96,7 +91,8 @@ pub struct AttachHookRequest {

 #[derive(Serialize, Deserialize)]
 pub struct AttachHookResponse {
-    pub gen: Option<u32>,
+    #[serde(rename = "gen")]
+    pub generation: Option<u32>,
 }

 #[derive(Serialize, Deserialize)]
@@ -541,6 +537,10 @@ impl StorageController {
            args.push("--start-as-candidate".to_string());
        }

+        if self.config.load_safekeepers {
+            args.push("--load-safekeepers".to_string());
+        }
+
        if let Some(private_key) = &self.private_key {
            let claims = Claims::new(None, Scope::PageServerApi);
            let jwt_token =
@@ -779,7 +779,7 @@ impl StorageController {
            )
            .await?;

-        Ok(response.gen)
+        Ok(response.generation)
    }

    #[instrument(skip(self))]
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,34 +1,27 @@
-use futures::StreamExt;
-use std::{
-    collections::{HashMap, HashSet},
-    str::FromStr,
-    time::Duration,
-};
+use std::collections::{HashMap, HashSet};
+use std::str::FromStr;
+use std::time::Duration;

 use clap::{Parser, Subcommand};
-use pageserver_api::{
-    controller_api::{
-        AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse,
-        SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
-        ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy,
-        TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest,
-    },
-    models::{
-        EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary,
-        ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest,
-        TenantShardSplitRequest, TenantShardSplitResponse,
-    },
-    shard::{ShardStripeSize, TenantShardId},
+use futures::StreamExt;
+use pageserver_api::controller_api::{
+    AvailabilityZone, NodeAvailabilityWrapper, NodeConfigureRequest, NodeDescribeResponse,
+    NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, PlacementPolicy,
+    SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy,
+    ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest,
+    TenantDescribeResponse, TenantPolicyRequest, TenantShardMigrateRequest,
+    TenantShardMigrateResponse,
 };
+use pageserver_api::models::{
+    EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters,
+    TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest,
+    TenantShardSplitResponse,
+};
+use pageserver_api::shard::{ShardStripeSize, TenantShardId};
 use pageserver_client::mgmt_api::{self};
 use reqwest::{Method, StatusCode, Url};
-use utils::id::{NodeId, TenantId, TimelineId};
-
-use pageserver_api::controller_api::{
-    NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy,
-    TenantShardMigrateRequest, TenantShardMigrateResponse,
-};
 use storage_controller_client::control_api::Client;
+use utils::id::{NodeId, TenantId, TimelineId};

 #[derive(Subcommand, Debug)]
 enum Command {
@@ -921,7 +914,9 @@ async fn main() -> anyhow::Result<()> {
        }
        Command::TenantDrop { tenant_id, unclean } => {
            if !unclean {
-                anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant.  If you know what you're doing, add `--unclean` to proceed.")
+                anyhow::bail!(
+                    "This command is not a tenant deletion, and uncleanly drops all controller state for the tenant.  If you know what you're doing, add `--unclean` to proceed."
+                )
            }
            storcon_client
                .dispatch::<(), ()>(
@@ -933,7 +928,9 @@ async fn main() -> anyhow::Result<()> {
        }
        Command::NodeDrop { node_id, unclean } => {
            if !unclean {
-                anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it.  If you know what you're doing, add `--unclean` to proceed.")
+                anyhow::bail!(
+                    "This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it.  If you know what you're doing, add `--unclean` to proceed."
+                )
            }
            storcon_client
                .dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None)
@@ -959,7 +956,7 @@ async fn main() -> anyhow::Result<()> {
                                threshold: threshold.into(),
                            },
                        )),
-                        heatmap_period: Some("300s".to_string()),
+                        heatmap_period: Some(Duration::from_secs(300)),
                        ..Default::default()
                    },
                })
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -186,7 +186,7 @@ services:

  neon-test-extensions:
    profiles: ["test-extensions"]
-    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest}
+    image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}}
    environment:
      - PGPASSWORD=cloud_admin
    entrypoint:
--- a/docker-compose/docker_compose_test.sh
+++ b/docker-compose/docker_compose_test.sh
@@ -51,8 +51,6 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do
    done

    if [ $pg_version -ge 16 ]; then
-        docker cp ext-src $TEST_CONTAINER_NAME:/
-        docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl"
        # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail
        # It cannot be moved to Dockerfile now because the database directory is created after the start of the container
        echo Adding dummy config
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch
--- a/docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch
+++ b/docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch
--- a/docker-compose/ext-src/pgtap-src/test-upgrade.patch
+++ b/docker-compose/ext-src/pgtap-src/test-upgrade.patch
@@ -7,7 +7,7 @@ index f255fe6..0a0fa65 100644
 GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests
 REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe
 -REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
-+REGRESS_OPTS = --use-existing --dbname=pgtap_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
+REGRESS_OPTS = --use-existing --dbname=contrib_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF)
 SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets!
 IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=))
 PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS)))
--- a/docker-compose/test_extensions_upgrade.sh
+++ b/docker-compose/test_extensions_upgrade.sh
@@ -6,12 +6,16 @@ generate_id() {
    local -n resvar=$1
    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
 }
-if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then
-  echo OLDTAG and NEWTAG must be defined
+echo "${OLD_COMPUTE_TAG}"
+echo "${NEW_COMPUTE_TAG}"
+echo "${TEST_EXTENSIONS_TAG}"
+if [ -z "${OLD_COMPUTE_TAG:-}" ] || [ -z "${NEW_COMPUTE_TAG:-}" ] || [ -z "${TEST_EXTENSIONS_TAG:-}" ]; then
+  echo OLD_COMPUTE_TAG, NEW_COMPUTE_TAG and TEST_EXTENSIONS_TAG must be set
  exit 1
 fi
 export PG_VERSION=${PG_VERSION:-16}
 export PG_TEST_VERSION=${PG_VERSION}
+# Waits for compute node is ready
 function wait_for_ready {
  TIME=0
  while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do
@@ -23,11 +27,45 @@ function wait_for_ready {
    exit 2
  fi
 }
+# Creates extensions. Gets a string with space-separated extensions as a parameter
 function create_extensions() {
  for ext in ${1}; do
    docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE"
  done
 }
+# Creates a new timeline. Gets the parent ID and an extension name as parameters.
+# Saves the timeline ID in the variable EXT_TIMELINE
+function create_timeline() {
+  generate_id new_timeline_id
+
+  PARAMS=(
+      -sbf
+      -X POST
+      -H "Content-Type: application/json"
+      -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${1}\"}"
+      "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
+  )
+  result=$(curl "${PARAMS[@]}")
+  echo $result | jq .
+  EXT_TIMELINE[${2}]=${new_timeline_id}
+}
+# Checks if the timeline ID of the compute node is expected. Gets the timeline ID as a parameter
+function check_timeline() {
+    TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
+    if [ "${TID}" != "${1}" ]; then
+      echo Timeline mismatch
+      exit 1
+    fi
+}
+# Restarts the compute node with the required compute tag and timeline.
+# Accepts the tag for the compute node and the timeline as parameters.
+function restart_compute() {
+  docker compose down compute compute_is_ready
+  COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready
+  wait_for_ready
+  check_timeline ${2}
+}
+declare -A EXT_TIMELINE
 EXTENSIONS='[
 {"extname": "plv8", "extdir": "plv8-src"},
 {"extname": "vector", "extdir": "pgvector-src"},
@@ -47,7 +85,7 @@ EXTENSIONS='[
 {"extname": "pg_repack", "extdir": "pg_repack-src"}
 ]'
 EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -)
-TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d
+COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d
 wait_for_ready
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
@@ -55,13 +93,14 @@ create_extensions "${EXTNAMES}"
 query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')"
 new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query")
 docker compose --profile test-extensions down
-TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
+COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate
 wait_for_ready
-docker compose cp  ext-src neon-test-extensions:/
 docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression"
 docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression"
-docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression"
-docker compose exec neon-test-extensions psql -d pgtap_regression -c "CREATE EXTENSION pgtap"
+tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
+EXT_TIMELINE["main"]=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
+create_timeline "${EXT_TIMELINE["main"]}" init
+restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE["init"]}"
 create_extensions "${EXTNAMES}"
 if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then
  exts="${EXTNAMES}"
@@ -72,29 +111,13 @@ fi
 if [ -z "${exts}" ]; then
  echo "No extensions were upgraded"
 else
-  tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id")
-  timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
  for ext in ${exts}; do
    echo Testing ${ext}...
+    create_timeline "${EXT_TIMELINE["main"]}" ${ext}
    EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir')
-    generate_id new_timeline_id
-    PARAMS=(
-        -sbf
-        -X POST
-        -H "Content-Type: application/json"
-        -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}"
-        "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/"
-    )
-    result=$(curl "${PARAMS[@]}")
-    echo $result | jq .
-    TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready
-    COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready
-    wait_for_ready
-    TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id")
-    if [ ${TID} != ${new_timeline_id} ]; then
-      echo Timeline mismatch
-      exit 1
-    fi
+    restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
+    docker compose exec neon-test-extensions psql -d contrib_regression -c "CREATE EXTENSION ${ext} CASCADE"
+    restart_compute "${NEW_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}"
    docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}"
    if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then
      docker  compose exec neon-test-extensions  cat /ext-src/${EXTDIR}/regression.diffs
--- a/docs/rfcs/2025-03-15-hot-secondaries.md
+++ b/docs/rfcs/2025-03-15-hot-secondaries.md
@@ -0,0 +1,164 @@
+
+# Pageserver Hot Secondaries
+
+## Summary
+
+It is proposed to add a new mode for pageserver tenant shard locations,
+called "hot secondary", which is able to serve page_service requests but
+does not do all the same housekeeping as an attached location, and does
+not store any additional data in S3.
+
+There is a stark tradeoff between resource cost and complexity: a very simple solution would be to have multiple full attached locations doing independent I/O, but this RFC proposes some additional complexity to
+reduce cost.
+
+## Background
+
+In the [pageserver migration RFC](028-pageserver-migration.md), we introduced the concept of "warm secondaries".  These are pageserver locations that poll remote storage for a _heatmap_ describing which layers they should hold, and then download those layers from S3.  This enables them to rapidly transition into a usable attached location with a warm cache.
+
+Combined with the storage controller's detect of pageserver failures, warm
+secondaries enabled high availability of pageservers with a recovery time
+objective (RTO) measured in seconds (depends on configured heartbeat frequency) -- occasional cloud instance failures are typically recovered
+in well under a minute, without human intervention.
+
+## Purpose
+
+We aim to provide a sub-second RTO for pageserver failures, for mission
+critical workloads.  To do this, we should enable the postgres client
+to make its own decision about cutting over to a secondary, rather than
+waiting for the controller to detect a failure and instruct it to
+use a different pageserver.  These secondaries should be maintained
+in a continuously readable state rather than requiring explicit activation.
+
+Because low-RTO failover is intrinsically vulnerable to "flapping"/false
+positives, reads from such a hot secondary will not "promote" the secondary: we don't want to flap back and forth at millisecond timescales.  Rather, reads will be served by hot secondaries at any time, 
+but their transition to an attached (primary) location will still be
+managed by the storage controller.
+
+## Design of Hot Location Mode
+
+At a high level, hot locations are basically the same `Tenant` and `Timeline` types as an attached location, but with some behavioral tweaks.  This RFC won't get into code structure details: these changes
+may be expressed as different types (more robust) or as different modes
+for existing types (less code churn, more complexity).
+
+### Load and ingest
+
+Initially, we may start in the same way as a normal attached location:
+by discovering the latest metadata in remote storage and constructing
+a LayerMap.
+
+We should also do ingest as normal: subscribing to safekeeper and streaming
+writes into ephemeral layer files that are then frozen into L0s.  However,
+we do not want to wastefully upload these to S3 (they duplicate what the
+attached location is already writing).
+
+### "Virtual" compaction
+
+Clearly ingesting but never uploading or compacting will generate an unbounded stack of L0 layers, unless we do something about it.
+
+To solve this, we may add a special type of compaction that re-reads
+from remote storage, updates the layer map to contain all L1
+and image layers from the remote metadata, and triggers download of these.
+
+We do not download remote L0s during virtual compaction, because the hot secondary has also been ingesting and generating these, so it would be wasteful.  We just trim any local L0s which are now covered by the L1 high watermark of the remote metadata, and retain any that are still needed to serve reads.
+
+Note that this process is expected to generate some overlaps in LSN space: we might have an L0 that we generated locally which overlaps with an L1 from remote storage.  getpage@lsn logic must handle this, and avoid assuming non-overlapping layers (i.e. having read some deltas from L0, we must not read the same deltas again in an L1, we must remember what LSN we already passed).
+
+The average total network download bandwidth of the hot secondary is equal to the rate at which the attached location generates L1 and image layers, plus the rate at which WAL is generated.
+
+The average total disk write bandwidth is the sum of WAL generation rate plus L1/image generation rate: this is about the same as a normal attached location.  The average disk _read_ bandwidth of a hot secondary is far lower than an attached location because it is not reading back layers to compact them -- layers are only read in periods where the attached location was unavailable, so computes started reading from a hot secondary.
+
+The trigger for virtual compaction can be similar to the existing trigger
+for L1 compaction on attached locations: once we build up a deep stack of L0s, then we do virtual compaction to trim it.  This assumes that the attached location has kept up with compaction.  The hot secondary can be
+more tolerant of a deeper L0 stack because it is less often serving
+reads: for example it might make sense to trigger normal L1 compaction at 10 L0 layers, and trigger shallow compaction at 15 L0 layers, giving a good chance that by the time the hot secondary does compaction, the attached location has already written out some layer files for it to read.
+
+To avoid an availability gap while downloading data from S3, it is important that the hot
+secondary downloads new layer files before updating its layer map to de-reference replaced
+layers.
+
+### Handling missing layers/timelines
+
+If an incoming request references a timeline that the hot secondary is
+unaware of, it must go read from S3 to determine if the timeline exists, and if so then load it.
+
+The hot secondary should also be tipped off by the storage controller when
+timelines are created, so that in normal operation it is aware of timelines
+immediately rather than having to load on demand (loading on demand could
+have much higher latency for reads).
+
+Hot secondaries may also experience 404s reading layers from remote storage, because the layer might have been deleted by the attached location
+during compaction or GC.  If the hot secondary finds such a 404, it should
+trigger a re-download of the timeline index.
+
+### Transition from Hot Secondary to Attached
+
+While a hot secondary can serve writes independently for a short period of time (until
+too many L0s build up to efficiently serve reads), it needs to be promoted to be the attached
+location if the last attached location becomes unavailable (or if the storage controller
+determines that the tenant should be migrated).
+
+This can be done trivially by shutting down and starting up again in attached mode (on startup
+the layer map will be reset to the content of remote storage), but this can impose an availability gap, because:
+- After unexpected failure of an attached location, the hot secondary's local L0s may be
+  further ahead in WAL ingest than the contents of remote storage, so resetting to what's
+  in remote storage will make recent data unavailable until it is re-ingested.
+- Even if the remote data is up to date with latest WAL, it may take some time to download
+  layers. 
+
+To avoid an availability gap while re-ingesting WAL, it is necessary to stitch the local L0s with remote storage state.  We may do this at startup, by making an exception to our
+usual policy of only respecting remote storage state at startup.  This exception can
+be specific to L0 files, and perhaps also specific to when we can detect that these
+were written by a hot secondary (perhaps by marking these files with a suffix or magic 0xffff generation?)
+
+We should also only do this cutover once we're reasonably sure the old attached location
+isn't still uploading, so that on startup we do not see a whole new layer map with lots
+of layers that need downloading.
+
+We may still tolerate some availability gap in the <1s range while reloading the tenant
+in a different mode.  We should aim for this to be under 100ms under usual circumstances,
+as it should only require long enough to:
+- Flush ephemeral layer to L0 on shutdown (writing 128MB takes of the order 100ms)
+- Load remote index on startup (reading from S3 takes of the order 10ms)
+
+Doing many such cutovers concurrently may result in worse availability, so the controller
+should be tuned to understand that when cutting over multiple hot secondaries to attached,
+it is best not to rush it (as they are already in a readable state, it is less urgent
+than when activating warm secondaries).
+
+## Summary of a failover
+
+To summarize the order of operations when a pageserver instance fails while holding a tenant
+that has a hot secondary location:
+- after some short timeout (100s of ms), compute gives up on getpage requests to the primary and sends
+  them to the hot secondary.
+- after some much longer timeout (e.g. ~30s), controller decides that the hot secondary should
+  become attached, so that it can do its own compaction.
+- Hot secondary is instructed to do a compaction before shutting it down, so that during
+  its restart into attached mode it will not have to deal with any remote storage change.
+- Hot secondary shuts down, flushing ephemeral layer to L0.
+- Previously-secondary location starts up in attached mode with a new generation.  Downloads
+  index from remote storage, and identifies which L0 files to retain.  Adds these to LayerMap
+  and enqueues them for upload.
+- Now fully available for reads and able to proceed with compaction etc as normal.
+
+## Optimisations/details
+
+- We should add a read-only mode to RemoteTimelineClient
+
+## Alternatives considered
+
+### Full mirror
+
+We could make hot secondary locations do all compaction, gc, etc operations
+independently, and maintain their own set of layer files in S3.  These would essentially be separate tenants in pageserver terms, but consuming the same safekeeper timelines.
+
+These locations would on longer be anything special in pageserver terms, they'd simply be attached locations that use some modified path like `<tenant_id>.secondary` to avoid colliding with the primary data.
+
+The storage controller could have some `AttachedHotSecondary` placement
+policy that configures the hot secondary location with some flag to indicate that the alternative storage path should be used.
+
+Clearly the advantage of this approach is code simplicity.  However, the
+downsides are substantial:
+- Double object storage costs
+- Compaction costs are doubled (CPU & disk read I/O), whereas the proposed
+  implementation of hot secondaries only pays twice for the compaction _write_ IO as it writes compacted layers to local disk.
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "compute_api"
 version = "0.1.0"
-edition.workspace = true
+edition = "2024"
 license.workspace = true

 [dependencies]
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,11 +1,10 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-use crate::{
-    privilege::Privilege,
-    responses::ComputeCtlConfig,
-    spec::{ComputeSpec, ExtVersion, PgIdent},
-};
 use serde::{Deserialize, Serialize};

+use crate::privilege::Privilege;
+use crate::responses::ComputeCtlConfig;
+use crate::spec::{ComputeSpec, ExtVersion, PgIdent};
+
 /// Request of the /configure API
 ///
 /// We now pass only `spec` in the configuration request, but later we can
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -6,10 +6,8 @@ use chrono::{DateTime, Utc};
 use jsonwebtoken::jwk::JwkSet;
 use serde::{Deserialize, Serialize, Serializer};

-use crate::{
-    privilege::Privilege,
-    spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role},
-};
+use crate::privilege::Privilege;
+use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role};

 #[derive(Serialize, Debug, Deserialize)]
 pub struct GenericAPIError {
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -5,13 +5,12 @@
 //! and connect it to the storage nodes.
 use std::collections::HashMap;

+use regex::Regex;
+use remote_storage::RemotePath;
 use serde::{Deserialize, Serialize};
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

-use regex::Regex;
-use remote_storage::RemotePath;
-
 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
 pub type PgIdent = String;
@@ -102,6 +101,17 @@ pub struct ComputeSpec {
    pub timeline_id: Option<TimelineId>,
    pub pageserver_connstring: Option<String>,

+    /// Safekeeper membership config generation. It is put in
+    /// neon.safekeepers GUC and serves two purposes:
+    /// 1) Non zero value forces walproposer to use membership configurations.
+    /// 2) If walproposer wants to update list of safekeepers to connect to
+    ///    taking them from some safekeeper mconf, it should check what value
+    ///    is newer by comparing the generation.
+    ///
+    /// Note: it could be SafekeeperGeneration, but this needs linking
+    /// compute_ctl with postgres_ffi.
+    #[serde(default)]
+    pub safekeepers_generation: Option<u32>,
    #[serde(default)]
    pub safekeeper_connstrings: Vec<String>,

@@ -339,9 +349,10 @@ pub struct JwksSettings {

 #[cfg(test)]
 mod tests {
-    use super::*;
    use std::fs::File;

+    use super::*;
+
    #[test]
    fn allow_installing_remote_extensions() {
        let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({
--- a/libs/consumption_metrics/Cargo.toml
+++ b/libs/consumption_metrics/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "consumption_metrics"
 version = "0.1.0"
-edition = "2021"
+edition = "2024"
 license = "Apache-2.0"

 [dependencies]
--- a/libs/desim/src/chan.rs
+++ b/libs/desim/src/chan.rs
@@ -1,4 +1,5 @@
-use std::{collections::VecDeque, sync::Arc};
+use std::collections::VecDeque;
+use std::sync::Arc;

 use parking_lot::{Mutex, MutexGuard};

--- a/libs/desim/src/executor.rs
+++ b/libs/desim/src/executor.rs
@@ -1,11 +1,7 @@
-use std::{
-    panic::AssertUnwindSafe,
-    sync::{
-        atomic::{AtomicBool, AtomicU32, AtomicU8, Ordering},
-        mpsc, Arc, OnceLock,
-    },
-    thread::JoinHandle,
-};
+use std::panic::AssertUnwindSafe;
+use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, Ordering};
+use std::sync::{Arc, OnceLock, mpsc};
+use std::thread::JoinHandle;

 use tracing::{debug, error, trace};

--- a/libs/desim/src/network.rs
+++ b/libs/desim/src/network.rs
@@ -1,26 +1,19 @@
-use std::{
-    cmp::Ordering,
-    collections::{BinaryHeap, VecDeque},
-    fmt::{self, Debug},
-    ops::DerefMut,
-    sync::{mpsc, Arc},
-};
+use std::cmp::Ordering;
+use std::collections::{BinaryHeap, VecDeque};
+use std::fmt::{self, Debug};
+use std::ops::DerefMut;
+use std::sync::{Arc, mpsc};

-use parking_lot::{
-    lock_api::{MappedMutexGuard, MutexGuard},
-    Mutex, RawMutex,
-};
+use parking_lot::lock_api::{MappedMutexGuard, MutexGuard};
+use parking_lot::{Mutex, RawMutex};
 use rand::rngs::StdRng;
 use tracing::debug;

-use crate::{
-    executor::{self, ThreadContext},
-    options::NetworkOptions,
-    proto::NetEvent,
-    proto::NodeEvent,
-};
-
-use super::{chan::Chan, proto::AnyMessage};
+use super::chan::Chan;
+use super::proto::AnyMessage;
+use crate::executor::{self, ThreadContext};
+use crate::options::NetworkOptions;
+use crate::proto::{NetEvent, NodeEvent};

 pub struct NetworkTask {
    options: Arc<NetworkOptions>,
--- a/libs/desim/src/node_os.rs
+++ b/libs/desim/src/node_os.rs
@@ -2,14 +2,11 @@ use std::sync::Arc;

 use rand::Rng;

+use super::chan::Chan;
+use super::network::TCP;
+use super::world::{Node, NodeId, World};
 use crate::proto::NodeEvent;

-use super::{
-    chan::Chan,
-    network::TCP,
-    world::{Node, NodeId, World},
-};
-
 /// Abstraction with all functions (aka syscalls) available to the node.
 #[derive(Clone)]
 pub struct NodeOs {
--- a/libs/desim/src/options.rs
+++ b/libs/desim/src/options.rs
@@ -1,4 +1,5 @@
-use rand::{rngs::StdRng, Rng};
+use rand::Rng;
+use rand::rngs::StdRng;

 /// Describes random delays and failures. Delay will be uniformly distributed in [min, max].
 /// Connection failure will occur with the probablity fail_prob.
--- a/libs/desim/src/proto.rs
+++ b/libs/desim/src/proto.rs
@@ -3,7 +3,8 @@ use std::fmt::Debug;
 use bytes::Bytes;
 use utils::lsn::Lsn;

-use crate::{network::TCP, world::NodeId};
+use crate::network::TCP;
+use crate::world::NodeId;

 /// Internal node events.
 #[derive(Debug)]
--- a/libs/desim/src/time.rs
+++ b/libs/desim/src/time.rs
@@ -1,12 +1,8 @@
-use std::{
-    cmp::Ordering,
-    collections::BinaryHeap,
-    ops::DerefMut,
-    sync::{
-        atomic::{AtomicU32, AtomicU64},
-        Arc,
-    },
-};
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
+use std::ops::DerefMut;
+use std::sync::Arc;
+use std::sync::atomic::{AtomicU32, AtomicU64};

 use parking_lot::Mutex;
 use tracing::trace;
--- a/libs/desim/src/world.rs
+++ b/libs/desim/src/world.rs
@@ -1,19 +1,18 @@
+use std::ops::DerefMut;
+use std::sync::{Arc, mpsc};
+
 use parking_lot::Mutex;
-use rand::{rngs::StdRng, SeedableRng};
-use std::{
-    ops::DerefMut,
-    sync::{mpsc, Arc},
-};
+use rand::SeedableRng;
+use rand::rngs::StdRng;

-use crate::{
-    executor::{ExternalHandle, Runtime},
-    network::NetworkTask,
-    options::NetworkOptions,
-    proto::{NodeEvent, SimEvent},
-    time::Timing,
-};
-
-use super::{chan::Chan, network::TCP, node_os::NodeOs};
+use super::chan::Chan;
+use super::network::TCP;
+use super::node_os::NodeOs;
+use crate::executor::{ExternalHandle, Runtime};
+use crate::network::NetworkTask;
+use crate::options::NetworkOptions;
+use crate::proto::{NodeEvent, SimEvent};
+use crate::time::Timing;

 pub type NodeId = u32;

--- a/libs/desim/tests/reliable_copy_test.rs
+++ b/libs/desim/tests/reliable_copy_test.rs
@@ -1,14 +1,15 @@
 //! Simple test to verify that simulator is working.
 #[cfg(test)]
 mod reliable_copy_test {
+    use std::sync::Arc;
+
    use anyhow::Result;
    use desim::executor::{self, PollSome};
+    use desim::node_os::NodeOs;
    use desim::options::{Delay, NetworkOptions};
-    use desim::proto::{NetEvent, NodeEvent, ReplCell};
+    use desim::proto::{AnyMessage, NetEvent, NodeEvent, ReplCell};
    use desim::world::{NodeId, World};
-    use desim::{node_os::NodeOs, proto::AnyMessage};
    use parking_lot::Mutex;
-    use std::sync::Arc;
    use tracing::info;

    /// Disk storage trait and implementation.
--- a/Show More
+++ b/Show More