Fix calculation of logical delta layer space

More precisely calculate logical image size
Fi unit tests
2026-02-17 01:20:36 +00:00 · 2023-05-19 11:03:29 +03:00 · 2023-05-18 17:58:15 +03:00 · 2023-05-18 00:07:44 +03:00 · 2023-05-17 22:57:09 +03:00 · 2023-05-17 14:19:56 +03:00
167 changed files with 4319 additions and 4146 deletions
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -0,0 +1,184 @@
+name: 'Create Allure report'
+description: 'Generate Allure report from uploaded by actions/allure-report-store tests results'
+
+outputs:
+  report-url:
+    description: 'Allure report URL'
+    value: ${{ steps.generate-report.outputs.report-url }}
+  report-json-url:
+    description: 'Allure report JSON URL'
+    value: ${{ steps.generate-report.outputs.report-json-url }}
+
+runs:
+  using: "composite"
+
+  steps:
+    # We're using some of env variables quite offen, so let's set them once.
+    #
+    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
+    #
+    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
+    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
+    #
+    - name: Set variables
+      shell: bash -euxo pipefail {0}
+      run: |
+        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
+        if [ "${PR_NUMBER}" != "null" ]; then
+          BRANCH_OR_PR=pr-${PR_NUMBER}
+        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
+          # Shortcut for special branches
+          BRANCH_OR_PR=${GITHUB_REF_NAME}
+        else
+          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
+        fi
+
+        LOCK_FILE=reports/${BRANCH_OR_PR}/lock.txt
+
+        WORKDIR=/tmp/${BRANCH_OR_PR}-$(date +%s)
+        mkdir -p ${WORKDIR}
+
+        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
+        echo "LOCK_FILE=${LOCK_FILE}"       >> $GITHUB_ENV
+        echo "WORKDIR=${WORKDIR}"           >> $GITHUB_ENV
+        echo "BUCKET=${BUCKET}"             >> $GITHUB_ENV
+      env:
+        BUCKET: neon-github-public-dev
+
+    # TODO: We can replace with a special docker image with Java and Allure pre-installed
+    - uses: actions/setup-java@v3
+      with:
+        distribution: 'temurin'
+        java-version: '17'
+
+    - name: Install Allure
+      shell: bash -euxo pipefail {0}
+      run: |
+        if ! which allure; then
+          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
+          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
+          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
+          unzip -q ${ALLURE_ZIP}
+          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
+          rm -f ${ALLURE_ZIP}
+        fi
+      env:
+        ALLURE_VERSION: 2.22.0
+        ALLURE_ZIP_MD5: d5c9f0989b896482536956340a7d5ec9
+
+    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
+    - name: Acquire lock
+      shell: bash -euxo pipefail {0}
+      run: |
+        LOCK_TIMEOUT=300 # seconds
+
+        LOCK_CONTENT="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+        echo ${LOCK_CONTENT} > ${WORKDIR}/lock.txt
+
+        # Do it up to 5 times to avoid race condition
+        for _ in $(seq 1 5); do
+          for i in $(seq 1 ${LOCK_TIMEOUT}); do
+            LOCK_ACQUIRED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
+            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
+            if [ -z "${LOCK_ACQUIRED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ACQUIRED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
+              break
+            fi
+            sleep 1
+          done
+
+          aws s3 mv --only-show-errors ${WORKDIR}/lock.txt "s3://${BUCKET}/${LOCK_FILE}"
+
+          # Double-check that exactly THIS run has acquired the lock
+          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
+          if [ "$(cat lock.txt)" = "${LOCK_CONTENT}" ]; then
+            break
+          fi
+        done
+
+    - name: Generate and publish final Allure report
+      id: generate-report
+      shell: bash -euxo pipefail {0}
+      run: |
+        REPORT_PREFIX=reports/${BRANCH_OR_PR}
+        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
+
+        # Get previously uploaded data for this run
+        ZSTD_NBTHREADS=0
+
+        S3_FILEPATHS=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/ | jq --raw-output  '.Contents[].Key')
+        if [ -z "$S3_FILEPATHS" ]; then
+          # There's no previously uploaded data for this $GITHUB_RUN_ID
+          exit 0
+        fi
+        for S3_FILEPATH in ${S3_FILEPATHS}; do
+          time aws s3 cp --only-show-errors "s3://${BUCKET}/${S3_FILEPATH}" "${WORKDIR}"
+
+          archive=${WORKDIR}/$(basename $S3_FILEPATH)
+          mkdir -p ${archive%.tar.zst}
+          time tar -xf ${archive} -C ${archive%.tar.zst}
+          rm -f ${archive}
+        done
+
+        # Get history trend
+        time aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${WORKDIR}/latest/history" || true
+
+        # Generate report
+        time allure generate --clean --output ${WORKDIR}/report ${WORKDIR}/*
+
+        # Replace a logo link with a redirect to the latest version of the report
+        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html?nocache='"'+Date.now()+'"'" class=|g' ${WORKDIR}/report/app.js
+
+        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
+        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
+        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
+
+        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
+
+        # Generate redirect
+        cat <<EOF > ${WORKDIR}/index.html
+          <!DOCTYPE html>
+
+          <meta charset="utf-8">
+          <title>Redirecting to ${REPORT_URL}</title>
+          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
+        EOF
+        time aws s3 cp --only-show-errors ${WORKDIR}/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
+
+        echo "report-url=${REPORT_URL}"                                   >> $GITHUB_OUTPUT
+        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
+
+    - name: Release lock
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
+
+        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" ]; then
+          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
+        fi
+
+    - name: Cleanup
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        if [ -d "${WORKDIR}" ]; then
+          rm -rf ${WORKDIR}
+        fi
+
+    - uses: actions/github-script@v6
+      if: always()
+      env:
+        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
+        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+      with:
+        script: |
+          const { REPORT_URL, COMMIT_SHA } = process.env
+
+          await github.rest.repos.createCommitStatus({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            sha: `${COMMIT_SHA}`,
+            state: 'success',
+            target_url: `${REPORT_URL}`,
+            context: 'Allure report',
+          })
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -0,0 +1,72 @@
+name: 'Store Allure results'
+description: 'Upload test results to be used by actions/allure-report-generate'
+
+inputs:
+  report-dir:
+    description: 'directory with test results generated by tests'
+    required: true
+  unique-key:
+    description: 'string to distinguish different results in the same run'
+    required: true
+
+runs:
+  using: "composite"
+
+  steps:
+    - name: Set variables
+      shell: bash -euxo pipefail {0}
+      run: |
+        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
+        if [ "${PR_NUMBER}" != "null" ]; then
+          BRANCH_OR_PR=pr-${PR_NUMBER}
+        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
+          # Shortcut for special branches
+          BRANCH_OR_PR=${GITHUB_REF_NAME}
+        else
+          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
+        fi
+
+        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
+        echo "REPORT_DIR=${REPORT_DIR}"     >> $GITHUB_ENV
+      env:
+        REPORT_DIR: ${{ inputs.report-dir }}
+
+    - name: Upload test results
+      shell: bash -euxo pipefail {0}
+      run: |
+        REPORT_PREFIX=reports/${BRANCH_OR_PR}
+        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
+
+        # Add metadata
+        cat <<EOF > ${REPORT_DIR}/executor.json
+          {
+            "name": "GitHub Actions",
+            "type": "github",
+            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
+            "buildOrder": ${GITHUB_RUN_ID},
+            "buildName": "GitHub Actions Run #${GITHUB_RUN_NUMBER}/${GITHUB_RUN_ATTEMPT}",
+            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
+            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
+            "reportName": "Allure Report"
+          }
+        EOF
+
+        cat <<EOF > ${REPORT_DIR}/environment.properties
+          COMMIT_SHA=${COMMIT_SHA}
+        EOF
+
+        ARCHIVE="${UNIQUE_KEY}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
+        ZSTD_NBTHREADS=0
+
+        time tar -C ${REPORT_DIR} -cf ${ARCHIVE} --zstd .
+        time aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
+      env:
+        UNIQUE_KEY: ${{ inputs.unique-key }}
+        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        BUCKET: neon-github-public-dev
+
+    - name: Cleanup
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        rm -rf ${REPORT_DIR}
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -1,254 +0,0 @@
-name: 'Create Allure report'
-description: 'Create and publish Allure report'
-
-inputs:
-  action:
-    desctiption: 'generate or store'
-    required: true
-  build_type:
-    description: '`build_type` from run-python-test-set action'
-    required: true
-  test_selection:
-    description: '`test_selector` from run-python-test-set action'
-    required: false
-outputs:
-  report-url:
-    description: 'Allure report URL'
-    value: ${{ steps.generate-report.outputs.report-url }}
-  report-json-url:
-    description: 'Allure report JSON URL'
-    value: ${{ steps.generate-report.outputs.report-json-url }}
-
-runs:
-  using: "composite"
-
-  steps:
-    # We're using some of env variables quite offen, so let's set them once.
-    #
-    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
-    #
-    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
-    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
-    #
-    - name: Set common environment variables
-      shell: bash -euxo pipefail {0}
-      run: |
-        echo "BUILD_TYPE=${BUILD_TYPE}"   >> $GITHUB_ENV
-        echo "BUCKET=${BUCKET}"           >> $GITHUB_ENV
-        echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
-      env:
-        BUILD_TYPE: ${{ inputs.build_type }}
-        BUCKET: neon-github-public-dev
-        TEST_OUTPUT: /tmp/test_output
-
-    - name: Validate input parameters
-      shell: bash -euxo pipefail {0}
-      run: |
-        if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
-          echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
-          exit 1
-        fi
-
-        if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
-          echo >&2 "inputs.test_selection must be set for 'store' action"
-          exit 2
-        fi
-
-    - name: Calculate variables
-      id: calculate-vars
-      shell: bash -euxo pipefail {0}
-      run: |
-        # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
-
-        pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${pr_number}" != "null" ]; then
-          key=pr-${pr_number}
-        elif [ "${GITHUB_REF_NAME}" = "main" ]; then
-          # Shortcut for a special branch
-          key=main
-        elif [ "${GITHUB_REF_NAME}" = "release" ]; then
-          # Shortcut for a special branch
-          key=release
-        else
-          key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
-        fi
-        echo "KEY=${key}" >> $GITHUB_OUTPUT
-
-        # Sanitize test selection to remove `/` and any other special characters
-        # Use printf instead of echo to avoid having `\n` at the end of the string
-        test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
-        echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
-
-    - uses: actions/setup-java@v3
-      if: ${{ inputs.action == 'generate' }}
-      with:
-        distribution: 'temurin'
-        java-version: '17'
-
-    - name: Install Allure
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        if ! which allure; then
-          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
-          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
-          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
-          unzip -q ${ALLURE_ZIP}
-          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
-          rm -f ${ALLURE_ZIP}
-        fi
-      env:
-        ALLURE_VERSION: 2.21.0
-        ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
-
-    - name: Upload Allure results
-      if: ${{ inputs.action == 'store' }}
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Add metadata
-        cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
-          {
-            "name": "GitHub Actions",
-            "type": "github",
-            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
-            "buildOrder": ${GITHUB_RUN_ID},
-            "buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
-            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
-            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
-            "reportName": "Allure Report"
-          }
-        EOF
-        cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
-          TEST_SELECTION=${{ inputs.test_selection }}
-          BUILD_TYPE=${BUILD_TYPE}
-        EOF
-
-        ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
-        ZSTD_NBTHREADS=0
-
-        tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
-        aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
-
-    # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
-    - name: Acquire Allure lock
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      run: |
-        LOCK_TIMEOUT=300 # seconds
-
-        for _ in $(seq 1 5); do
-          for i in $(seq 1 ${LOCK_TIMEOUT}); do
-            LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
-            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
-            if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
-              break
-            fi
-            sleep 1
-          done
-          echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
-          aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
-
-          # A double-check that exactly WE have acquired the lock
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
-          if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
-            break
-          fi
-        done
-
-    - name: Generate and publish final Allure report
-      if: ${{ inputs.action == 'generate' }}
-      id: generate-report
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Get previously uploaded data for this run
-        ZSTD_NBTHREADS=0
-
-        s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output  '.Contents[].Key')
-        if [ -z "$s3_filepaths" ]; then
-          # There's no previously uploaded data for this run
-          exit 0
-        fi
-        for s3_filepath in ${s3_filepaths}; do
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
-
-          archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
-          mkdir -p ${archive%.tar.zst}
-          tar -xf ${archive} -C ${archive%.tar.zst}
-          rm -f ${archive}
-        done
-
-        # Get history trend
-        aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
-
-        # Generate report
-        allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
-
-        # Replace a logo link with a redirect to the latest version of the report
-        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
-
-        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
-
-        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
-
-        # Generate redirect
-        cat <<EOF > ${TEST_OUTPUT}/allure/index.html
-          <!DOCTYPE html>
-
-          <meta charset="utf-8">
-          <title>Redirecting to ${REPORT_URL}</title>
-          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
-        EOF
-        aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
-
-        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
-        echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
-        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
-
-    - name: Release Allure lock
-      if: ${{ inputs.action == 'generate' && always() }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      run: |
-        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
-
-        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
-          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
-        fi
-
-    - name: Cleanup
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        rm -rf ${TEST_OUTPUT}/allure
-
-    - uses: actions/github-script@v6
-      if: ${{ inputs.action == 'generate' && always() }}
-      env:
-        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
-        SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-      with:
-        script: |
-          const { REPORT_URL, BUILD_TYPE, SHA } = process.env
-
-          await github.rest.repos.createCommitStatus({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            sha: `${SHA}`,
-            state: 'success',
-            target_url: `${REPORT_URL}`,
-            context: `Allure report / ${BUILD_TYPE}`,
-          })
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -197,14 +197,13 @@ runs:
      uses: ./.github/actions/upload
      with:
        name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
-        # The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
-        path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
+        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
+        path: /tmp/test_output/compatibility_snapshot_pg14/
        prefix: latest

-    - name: Create Allure report
+    - name: Upload test results
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
+      uses: ./.github/actions/allure-report-store
      with:
-        action: store
-        build_type: ${{ inputs.build_type }}
-        test_selection: ${{ inputs.test_selection }}
+        report-dir: /tmp/test_output/allure/results
+        unique-key: ${{ inputs.build_type }}
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -1,5 +0,0 @@
-neon_install.tar.gz
-.neon_current_version
-
-collections/*
-!collections/.keep
--- a/.github/ansible/ansible.cfg
+++ b/.github/ansible/ansible.cfg
@@ -1,12 +0,0 @@
-[defaults]
-
-localhost_warning = False
-host_key_checking = False
-timeout = 30
-
-[ssh_connection]
-ssh_args   = -F ./ansible.ssh.cfg
-# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
-# and scp neither worked for me
-transfer_method = piped
-pipelining = True
--- a/.github/ansible/ansible.ssh.cfg
+++ b/.github/ansible/ansible.ssh.cfg
@@ -1,15 +0,0 @@
-# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
-# (use pre 8.5 option name to cope with old ssh in CI)
-PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
-
-Host tele.zenith.tech
-    User admin
-    Port 3023
-    StrictHostKeyChecking no
-    UserKnownHostsFile /dev/null
-
-Host * !tele.zenith.tech
-    User admin
-    StrictHostKeyChecking no
-    UserKnownHostsFile /dev/null
-    ProxyJump tele.zenith.tech
--- a/.github/ansible/collections/.keep
+++ b/.github/ansible/collections/.keep
--- a/.github/ansible/deploy.yaml
+++ b/.github/ansible/deploy.yaml
@@ -1,211 +0,0 @@
- name: Upload Neon binaries
-  hosts: storage
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: get latest version of Neon binaries
-      register: current_version_file
-      set_fact:
-        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
-      tags:
-      - pageserver
-      - safekeeper
-
-    - name: inform about versions
-      debug:
-        msg: "Version to deploy - {{ current_version }}"
-      tags:
-      - pageserver
-      - safekeeper
-
-    - name: upload and extract Neon binaries to /usr/local
-      ansible.builtin.unarchive:
-        owner: root
-        group: root
-        src: neon_install.tar.gz
-        dest: /usr/local
-      become: true
-      tags:
-      - pageserver
-      - safekeeper
-      - binaries
-      - putbinaries
-
- name: Deploy pageserver
-  hosts: pageservers
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: upload init script
-      when: console_mgmt_base_url is defined
-      ansible.builtin.template:
-        src: scripts/init_pageserver.sh
-        dest: /tmp/init_pageserver.sh
-        owner: root
-        group: root
-        mode: '0755'
-      become: true
-      tags:
-      - pageserver
-
-    - name: init pageserver
-      shell:
-        cmd: /tmp/init_pageserver.sh
-      args:
-        creates: "/storage/pageserver/data/tenants"
-      environment:
-        NEON_REPO_DIR: "/storage/pageserver/data"
-        LD_LIBRARY_PATH: "/usr/local/v14/lib"
-      become: true
-      tags:
-      - pageserver
-
-    - name: read the existing remote pageserver config
-      ansible.builtin.slurp:
-        src: /storage/pageserver/data/pageserver.toml
-      register: _remote_ps_config
-      tags:
-      - pageserver
-
-    - name: parse the existing pageserver configuration
-      ansible.builtin.set_fact:
-        _existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
-      tags:
-      - pageserver
-
-    - name: construct the final pageserver configuration dict
-      ansible.builtin.set_fact:
-        pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
-      tags:
-      - pageserver
-
-    - name: template the pageserver config
-      template:
-        src: templates/pageserver.toml.j2
-        dest: /storage/pageserver/data/pageserver.toml
-      become: true
-      tags:
-      - pageserver
-
-    # used in `pageserver.service` template
-    - name: learn current availability_zone
-      shell:
-        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
-      register: ec2_availability_zone
-
-    - set_fact: 
-        ec2_availability_zone={{ ec2_availability_zone.stdout }}
-
-    - name: upload systemd service definition
-      ansible.builtin.template:
-        src: systemd/pageserver.service
-        dest: /etc/systemd/system/pageserver.service
-        owner: root
-        group: root
-        mode: '0644'
-      become: true
-      tags:
-      - pageserver
-
-    - name: start systemd service
-      ansible.builtin.systemd:
-        daemon_reload: yes
-        name: pageserver
-        enabled: yes
-        state: restarted
-      become: true
-      tags:
-      - pageserver
-
-    - name: post version to console
-      when: console_mgmt_base_url is defined
-      shell:
-        cmd: |
-          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
-      tags:
-      - pageserver
-
- name: Deploy safekeeper
-  hosts: safekeepers
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: upload init script
-      when: console_mgmt_base_url is defined
-      ansible.builtin.template:
-        src: scripts/init_safekeeper.sh
-        dest: /tmp/init_safekeeper.sh
-        owner: root
-        group: root
-        mode: '0755'
-      become: true
-      tags:
-      - safekeeper
-
-    - name: init safekeeper
-      shell:
-        cmd: /tmp/init_safekeeper.sh
-      args:
-        creates: "/storage/safekeeper/data/safekeeper.id"
-      environment:
-        NEON_REPO_DIR: "/storage/safekeeper/data"
-        LD_LIBRARY_PATH: "/usr/local/v14/lib"
-      become: true
-      tags:
-      - safekeeper
-
-    # used in `safekeeper.service` template
-    - name: learn current availability_zone
-      shell:
-        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
-      register: ec2_availability_zone
-
-    - set_fact: 
-        ec2_availability_zone={{ ec2_availability_zone.stdout }}
-
-    # in the future safekeepers should discover pageservers byself
-    # but currently use first pageserver that was discovered
-    - name: set first pageserver var for safekeepers
-      set_fact:
-        first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
-      tags:
-      - safekeeper
-
-    - name: upload systemd service definition
-      ansible.builtin.template:
-        src: systemd/safekeeper.service
-        dest: /etc/systemd/system/safekeeper.service
-        owner: root
-        group: root
-        mode: '0644'
-      become: true
-      tags:
-      - safekeeper
-
-    - name: start systemd service
-      ansible.builtin.systemd:
-        daemon_reload: yes
-        name: safekeeper
-        enabled: yes
-        state: restarted
-      become: true
-      tags:
-      - safekeeper
-
-    - name: post version to console
-      when: console_mgmt_base_url is defined
-      shell:
-        cmd: |
-          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
-      tags:
-      - safekeeper
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-set -e
-
-if [ -n "${DOCKER_TAG}" ]; then
-  # Verson is DOCKER_TAG but without prefix
-  VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
-else
-  echo "Please set DOCKER_TAG environment variable"
-  exit 1
-fi
-
-
-# do initial cleanup
-rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
-mkdir neon_install
-
-# retrieve binaries from docker image
-echo "getting binaries from docker image"
-docker pull --quiet neondatabase/neon:${DOCKER_TAG}
-ID=$(docker create neondatabase/neon:${DOCKER_TAG})
-docker cp ${ID}:/data/postgres_install.tar.gz .
-tar -xzf postgres_install.tar.gz -C neon_install
-mkdir neon_install/bin/
-docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
-docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
-docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
-docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
-docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
-docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
-docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
-docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
-docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
-docker rm -vf ${ID}
-
-# store version to file (for ansible playbooks) and create binaries tarball
-echo ${VERSION} > neon_install/.neon_current_version
-echo ${VERSION} > .neon_current_version
-tar -czf neon_install.tar.gz -C neon_install .
-
-# do final cleaup
-rm -rf neon_install postgres_install.tar.gz
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -1,48 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-ap-southeast-1
-    bucket_region: ap-southeast-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: ap-southeast-1
-    ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
-    console_region_id: aws-ap-southeast-1
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-064de8ea28bdb495b
-        pageserver-1.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-0b180defcaeeb6b93
-
-    safekeepers:
-      hosts:
-        safekeeper-0.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-0d6f1dc5161eef894
-        safekeeper-2.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-04fb63634e4679eb9
-        safekeeper-3.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-05481f3bc88cfc2d4
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -1,50 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-eu-central-1
-    bucket_region: eu-central-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-central-1
-    ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
-    console_region_id: aws-eu-central-1
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-central-1.aws.neon.tech:
-          ansible_host:  i-0cd8d316ecbb715be
-        pageserver-1.eu-central-1.aws.neon.tech:
-          ansible_host:  i-090044ed3d383fef0
-        pageserver-2.eu-central-1.aws.neon.tech:
-          ansible_host:  i-033584edf3f4b6742
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-central-1.aws.neon.tech:
-          ansible_host:  i-0b238612d2318a050
-        safekeeper-1.eu-central-1.aws.neon.tech:
-          ansible_host:  i-07b9c45e5c2637cd4
-        safekeeper-2.eu-central-1.aws.neon.tech:
-          ansible_host:  i-020257302c3c93d88
--- a/.github/ansible/prod.us-east-1.hosts.yaml
+++ b/.github/ansible/prod.us-east-1.hosts.yaml
@@ -1,50 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-us-east-1
-    bucket_region: us-east-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.theta.us-east-1.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-east-1
-    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-1
-    console_region_id: aws-us-east-1
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-east-1.aws.neon.tech:
-          ansible_host: i-085222088b0d2e0c7
-        pageserver-1.us-east-1.aws.neon.tech:
-          ansible_host: i-0969d4f684d23a21e
-        pageserver-2.us-east-1.aws.neon.tech:
-          ansible_host: i-05dee87895da58dad
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-east-1.aws.neon.tech:
-          ansible_host: i-04ce739e88793d864
-        safekeeper-1.us-east-1.aws.neon.tech:
-          ansible_host: i-0e9e6c9227fb81410
-        safekeeper-2.us-east-1.aws.neon.tech:
-          ansible_host: i-072f4dd86a327d52f
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -1,51 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-us-east-2
-    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-east-2
-    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
-    console_region_id: aws-us-east-2
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-east-2.aws.neon.tech:
-          ansible_host:  i-062227ba7f119eb8c
-        pageserver-1.us-east-2.aws.neon.tech:
-          ansible_host:  i-0b3ec0afab5968938
-        pageserver-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-0d7a1c4325e71421d
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-east-2.aws.neon.tech:
-          ansible_host:  i-0e94224750c57d346
-        safekeeper-1.us-east-2.aws.neon.tech:
-          ansible_host:  i-06d113fb73bfddeb0
-        safekeeper-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-09f66c8e04afff2e8
-
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -1,72 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-us-west-2
-    bucket_region: us-west-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-west-2
-    ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
-    console_region_id: aws-us-west-2-new
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-west-2.aws.neon.tech:
-          ansible_host: i-0d9f6dfae0e1c780d
-        pageserver-1.us-west-2.aws.neon.tech:
-          ansible_host: i-0c834be1dddba8b3f
-        pageserver-2.us-west-2.aws.neon.tech:
-          ansible_host: i-051642d372c0a4f32
-        pageserver-3.us-west-2.aws.neon.tech:
-          ansible_host: i-00c3844beb9ad1c6b
-        pageserver-4.us-west-2.aws.neon.tech:
-          ansible_host: i-013263dd1c239adcc
-        pageserver-5.us-west-2.aws.neon.tech:
-          ansible_host: i-00ca6417c7bf96820
-        pageserver-6.us-west-2.aws.neon.tech:
-          ansible_host: i-01cdf7d2bc1433b6a
-        pageserver-7.us-west-2.aws.neon.tech:
-          ansible_host: i-02eec9b40617db5bc
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-west-2.aws.neon.tech:
-          ansible_host: i-00719d8a74986fda6
-        safekeeper-1.us-west-2.aws.neon.tech:
-          ansible_host: i-074682f9d3c712e7c
-        safekeeper-2.us-west-2.aws.neon.tech:
-          ansible_host: i-042b7efb1729d7966
-        safekeeper-3.us-west-2.aws.neon.tech:
-          ansible_host: i-089f6b9ef426dff76
-        safekeeper-4.us-west-2.aws.neon.tech:
-          ansible_host: i-0fe6bf912c4710c82
-        safekeeper-5.us-west-2.aws.neon.tech:
-          ansible_host: i-0a83c1c46d2b4e409
-        safekeeper-6.us-west-2.aws.neon.tech:
-          ansible_host: i-0fef5317b8fdc9f8d
-        safekeeper-7.us-west-2.aws.neon.tech:
-          ansible_host: i-0be739190d4289bf9
-        safekeeper-8.us-west-2.aws.neon.tech:
-          ansible_host: i-00e851803669e5cfe                    
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-# fetch params from meta-data service
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
-INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
-DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
-
-# store fqdn hostname in var
-HOST=$(hostname -f)
-
-
-cat <<EOF | tee /tmp/payload
-{
-  "version": 1,
-  "host": "${HOST}",
-  "port": 6400,
-  "region_id": "{{ console_region_id }}",
-  "instance_id": "${INSTANCE_ID}",
-  "http_host": "${HOST}",
-  "http_port": 9898,
-  "active": false,
-  "availability_zone_id": "${AZ_ID}",
-  "disk_size": ${DISK_SIZE},
-  "instance_type": "${INSTANCE_TYPE}"
-}
-EOF
-
-# check if pageserver already registered or not
-if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
-
-    # not registered, so register it now
-    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
-
-    # init pageserver
-    sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
-fi
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -1,31 +0,0 @@
-#!/bin/sh
-
-# fetch params from meta-data service
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
-
-# store fqdn hostname in var
-HOST=$(hostname -f)
-
-
-cat <<EOF | tee /tmp/payload
-{
-  "version": 1,
-  "host": "${HOST}",
-  "port": 6500,
-  "http_port": 7676,
-  "region_id": "{{ console_region_id }}",
-  "instance_id": "${INSTANCE_ID}",
-  "availability_zone_id": "${AZ_ID}",
-  "active": false
-}
-EOF
-
-# check if safekeeper already registered or not
-if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
-
-    # not registered, so register it now
-    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
-    # init safekeeper
-    sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
-fi
--- a/.github/ansible/ssm_config
+++ b/.github/ansible/ssm_config
@@ -1,2 +0,0 @@
-ansible_connection: aws_ssm
-ansible_python_interpreter: /usr/bin/python3
--- a/.github/ansible/staging.eu-central-1.hosts.yaml
+++ b/.github/ansible/staging.eu-central-1.hosts.yaml
@@ -1,47 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-dev-storage-eu-central-1
-    bucket_region: eu-central-1
-    # We only register/update storage in one preview console and manually copy to other instances
-    console_mgmt_base_url: http://neon-internal-api.helium.aws.neon.build
-    broker_endpoint: http://storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.helium.aws.neon.build/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-central-1
-    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-central-1
-    console_region_id: aws-eu-central-1
-    sentry_environment: staging
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-central-1.aws.neon.build:
-          ansible_host: i-011f93ec26cfba2d4
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-central-1.aws.neon.build:
-          ansible_host: i-0ff026d27babf8ddd
-        safekeeper-1.eu-central-1.aws.neon.build:
-          ansible_host: i-03983a49ee54725d9
-        safekeeper-2.eu-central-1.aws.neon.build:
-          ansible_host: i-0bd025ecdb61b0db3
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -1,60 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-dev-storage-eu-west-1
-    bucket_region: eu-west-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
-    broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-west-1
-    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
-    console_region_id: aws-eu-west-1
-    sentry_environment: staging
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-west-1.aws.neon.build:
-          ansible_host: i-01d496c5041c7f34c
-        pageserver-1.eu-west-1.aws.neon.build:
-          ansible_host: i-0e8013e239ce3928c
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-west-1.aws.neon.build:
-          ansible_host: i-05226ef85722831bf
-        safekeeper-1.eu-west-1.aws.neon.build:
-          ansible_host: i-06969ee1bf2958bfc
-        safekeeper-2.eu-west-1.aws.neon.build:
-          ansible_host: i-087892e9625984a0b
-        safekeeper-3.eu-west-1.aws.neon.build:
-          ansible_host: i-0a6f91660e99e8891
-        safekeeper-4.eu-west-1.aws.neon.build:
-          ansible_host: i-0012e309e28e7c249
-        safekeeper-5.eu-west-1.aws.neon.build:
-          ansible_host: i-085a2b1193287b32e
-        safekeeper-6.eu-west-1.aws.neon.build:
-          ansible_host: i-0c713248465ed0fbd
-        safekeeper-7.eu-west-1.aws.neon.build:
-          ansible_host: i-02ad231aed2a80b7a
-        safekeeper-8.eu-west-1.aws.neon.build:
-          ansible_host: i-0dbbd8ffef66efda8
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -1,56 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-staging-storage-us-east-2
-    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
-    broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-east-2
-    ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
-    console_region_id: aws-us-east-2
-    sentry_environment: staging
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-east-2.aws.neon.build:
-          ansible_host: i-0c3e70929edb5d691
-        pageserver-1.us-east-2.aws.neon.build:
-          ansible_host: i-0565a8b4008aa3f40
-        pageserver-2.us-east-2.aws.neon.build:
-          ansible_host: i-01e31cdf7e970586a
-        pageserver-3.us-east-2.aws.neon.build:
-          ansible_host: i-0602a0291365ef7cc
-        pageserver-99.us-east-2.aws.neon.build:
-          ansible_host: i-0c39491109bb88824
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-east-2.aws.neon.build:
-          ansible_host: i-027662bd552bf5db0
-        safekeeper-2.us-east-2.aws.neon.build:
-          ansible_host: i-0de0b03a51676a6ce
-        safekeeper-3.us-east-2.aws.neon.build:
-          ansible_host: i-05f8ba2cda243bd18
-        safekeeper-99.us-east-2.aws.neon.build:
-          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/ansible/systemd/pageserver.service
+++ b/.github/ansible/systemd/pageserver.service
@@ -1,18 +0,0 @@
-[Unit]
-Description=Neon pageserver
-After=network.target auditd.service
-
-[Service]
-Type=simple
-User=pageserver
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
-ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data
-ExecReload=/bin/kill -HUP $MAINPID
-KillMode=mixed
-KillSignal=SIGINT
-Restart=on-failure
-TimeoutSec=10
-LimitNOFILE=30000000
-
-[Install]
-WantedBy=multi-user.target
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -1,18 +0,0 @@
-[Unit]
-Description=Neon safekeeper
-After=network.target auditd.service
-
-[Service]
-Type=simple
-User=safekeeper
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }}
-ExecReload=/bin/kill -HUP $MAINPID
-KillMode=mixed
-KillSignal=SIGINT
-Restart=on-failure
-TimeoutSec=10
-LimitNOFILE=30000000
-
-[Install]
-WantedBy=multi-user.target
--- a/.github/ansible/templates/pageserver.toml.j2
+++ b/.github/ansible/templates/pageserver.toml.j2
@@ -1 +0,0 @@
-{{ pageserver_config | sivel.toiletwater.to_toml }}
--- a/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
+++ b/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
-
-settings:
-  domain: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -1,76 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.eu-west-1.aws.neon.build"
-  otelExporterOtlpEndpoint: "https://otel-collector.zeta.eu-west-1.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: eu-west-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
-
-settings:
-  domain: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -1,68 +0,0 @@
-# Helm chart values for neon-proxy-link.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "link"
-  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.stage.neon.tech/psql_session/"
-  domain: "pg.neon.build"
-  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy-link pods
-podLabels:
-  neon_service: proxy
-  neon_env: dev
-  neon_region: us-east-2
-
-service:
-  type: LoadBalancer
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.cloud.stage.neon.tech"
-  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram-legacy
-  neon_env: dev
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -1,78 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.us-east-2.aws.neon.build"
-  extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
-  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
-
-settings:
-  domain: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
-  sentryEnvironment: "staging"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/preview-template.neon-proxy-scram.yaml
+++ b/.github/helm-values/preview-template.neon-proxy-scram.yaml
@@ -1,67 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/management/api/v2"
-  domain: "*.cloud.${PREVIEW_NAME}.aws.neon.build"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: test
-  neon_region: ${PREVIEW_NAME}.eu-central-1
-
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: cloud.${PREVIEW_NAME}.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.ap-southeast-1.aws.neon.tech"
-  extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: ap-southeast-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
-
-settings:
-  domain: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
-  sentryEnvironment: "production"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.eu-central-1.aws.neon.tech"
-  extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: eu-central-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
-
-settings:
-  domain: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
-  sentryEnvironment: "production"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
@@ -1,69 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.us-east-1.aws.neon.tech"
-  # *.us-east-1.retooldb.com hasn't been delegated yet.
-  extraDomains: ["*.us-east-1.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-east-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-east-1.aws.neon.tech
-  httpsPort: 443
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.theta.us-east-1.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
-
-settings:
-  domain: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
-  sentryEnvironment: "production"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
@@ -1,58 +0,0 @@
-# Helm chart values for neon-proxy-link.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "link"
-  authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.neon.tech/psql_session/"
-  domain: "pg.neon.tech"
-  sentryEnvironment: "production"
-
-# -- Additional labels for zenith-proxy pods
-podLabels:
-  neon_service: proxy
-  neon_env: production
-  neon_region: us-east-2
-
-service:
-  type: LoadBalancer
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.us-east-2.aws.neon.tech"
-  extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
-
-settings:
-  domain: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
-  sentryEnvironment: "production"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -1,76 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.cloud.neon.tech"
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.us-west-2.aws.neon.tech"
-  extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
@@ -1,19 +0,0 @@
-useCertManager: true
-
-replicaCount: 3
-
-exposedService:
-  # exposedService.port -- Exposed Service proxy port
-  port: 4432
-  annotations:
-    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
-
-settings:
-  domain: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
-  sentryEnvironment: "production"
-
-imagePullSecrets:
-  - name: docker-hub-neon
-
-metrics:
-  enabled: false
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -93,10 +93,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -283,10 +280,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -380,10 +374,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -476,10 +467,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -566,16 +554,13 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+        slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -330,6 +330,7 @@ jobs:
      fail-fast: false
      matrix:
        build_type: [ debug, release ]
+        pg_version: [ v14, v15 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -350,11 +351,12 @@ jobs:
          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
          rerun_flaky: true
        env:
+          DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug'
+        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
        uses: ./.github/actions/save-coverage-data

  benchmarks:
@@ -399,79 +401,50 @@ jobs:
    steps:
      - uses: actions/checkout@v3

-      - name: Create Allure report (debug)
+      - name: Create Allure report
        if: ${{ !cancelled() }}
-        id: create-allure-report-debug
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: debug
-
-      - name: Create Allure report (release)
-        if: ${{ !cancelled() }}
-        id: create-allure-report-release
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: release
+        id: create-allure-report
+        uses: ./.github/actions/allure-report-generate

      - uses: actions/github-script@v6
        if: >
          !cancelled() &&
-          github.event_name == 'pull_request' && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+          github.event_name == 'pull_request'
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
          script: |
-            const reports = [{
-              buildType: "debug",
-              reportUrl: "${{ steps.create-allure-report-debug.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-debug.outputs.report-json-url }}",
-            }, {
-              buildType: "release",
-              reportUrl: "${{ steps.create-allure-report-release.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-release.outputs.report-json-url }}",
-            }]
+            const report = {
+              reportUrl:     "${{ steps.create-allure-report.outputs.report-url }}",
+              reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
+            }

            const script = require("./scripts/pr-comment-test-report.js")
            await script({
              github,
              context,
              fetch,
-              reports,
+              report,
            })

      - name: Store Allure test stat in the DB
-        if: >
-          !cancelled() && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+        if: ${{ !cancelled() && steps.create-allure-report.outputs.report-json-url }}
        env:
-          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          REPORT_JSON_URL_DEBUG: ${{ steps.create-allure-report-debug.outputs.report-json-url }}
-          REPORT_JSON_URL_RELEASE: ${{ steps.create-allure-report-release.outputs.report-json-url }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          REPORT_JSON_URL: ${{ steps.create-allure-report.outputs.report-json-url }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
        run: |
          ./scripts/pysync

-          for report_url in $REPORT_JSON_URL_DEBUG $REPORT_JSON_URL_RELEASE; do
-            if [ -z "$report_url" ]; then
-              continue
-            fi
+          curl --fail --output suites.json "${REPORT_JSON_URL}"
+          export BUILD_TYPE=unified
+          export DATABASE_URL="$TEST_RESULT_CONNSTR"

-            if [[ "$report_url" == "$REPORT_JSON_URL_DEBUG" ]]; then
-              BUILD_TYPE=debug
-            else
-              BUILD_TYPE=release
-            fi
-
-            curl --fail --output suites.json "${report_url}"
-            DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
-          done
+          poetry run python3 scripts/ingest_regress_test_result.py \
+            --revision ${COMMIT_SHA} \
+            --reference ${GITHUB_REF} \
+            --build-type ${BUILD_TYPE} \
+            --ingest suites.json

  coverage-report:
    runs-on: [ self-hosted, gen3, small ]
@@ -633,6 +606,7 @@ jobs:
                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/neon:${{needs.tag.outputs.build-tag}}

@@ -718,6 +692,7 @@ jobs:
                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-tools
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
@@ -770,6 +745,7 @@ jobs:
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
                           --build-arg PG_VERSION=${{ matrix.version }}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-node
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
@@ -935,42 +911,6 @@ jobs:
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

-  deploy-pr-test-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
-    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ promote-images, tag, regress-tests ]
-    if: |
-      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        target_region: [ eu-west-1 ]
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh
-
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
-
-      - name: Cleanup ansible folder
-        run: rm -rf ~/.ansible
-
  deploy:
    runs-on: [ self-hosted, gen3, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
@@ -995,12 +935,12 @@ jobs:

      - name: Trigger deploy workflow
        env:
-          GH_TOKEN: ${{ github.token }}
+          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow run deploy-dev.yml --ref main -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}}
+            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}}
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            gh workflow run deploy-prod.yml --ref release -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
+            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -1,280 +0,0 @@
-name: Neon Deploy dev
-
-on:
-  workflow_dispatch:
-    inputs:
-      dockerTag:
-        description: 'Docker tag to deploy'
-        required: true
-        type: string
-      branch:
-        description: 'Branch or commit used for deploy scripts and configs'
-        required: true
-        type: string
-        default: 'main'
-      deployStorage:
-        description: 'Deploy storage'
-        required: true
-        type: boolean
-        default: true
-      deployProxy:
-        description: 'Deploy proxy'
-        required: true
-        type: boolean
-        default: true
-      deployStorageBroker:
-        description: 'Deploy storage-broker'
-        required: true
-        type: boolean
-        default: true
-      deployPgSniRouter:
-        description: 'Deploy pg-sni-router'
-        required: true
-        type: boolean
-        default: true
-
-env:
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-
-concurrency:
-  group: deploy-dev
-  cancel-in-progress: false
-
-jobs:
-  deploy-storage-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-      options: --user root --privileged
-    if: inputs.deployStorage
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        # TODO(sergey): Fix storage deploy in eu-central-1
-        target_region: [ eu-west-1, us-east-2]
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{ inputs.dockerTag }}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh
-
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
-
-      - name: Cleanup ansible folder
-        run: rm -rf ~/.ansible
-
-  deploy-proxy-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployProxy
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-            deploy_link_proxy: true
-            deploy_legacy_scram_proxy: true
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Re-deploy scram proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Re-deploy link proxy
-        if: matrix.deploy_link_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Re-deploy legacy scram proxy
-        if: matrix.deploy_legacy_scram_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
-
-  deploy-preview-proxy-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployProxy
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  eu-central-1
-            target_cluster: dev-eu-central-1-alpha
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Re-deploy preview proxies
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          for PREVIEW_NAME in helium argon krypton xenon radon oganesson hydrogen nitrogen oxygen fluorine chlorine; do
-            export PREVIEW_NAME
-            envsubst <.github/helm-values/preview-template.neon-proxy-scram.yaml >preview-${PREVIEW_NAME}.neon-proxy-scram.yaml
-            helm upgrade neon-proxy-scram-${PREVIEW_NAME} neondatabase/neon-proxy --namespace neon-proxy-${PREVIEW_NAME} --create-namespace --install --atomic -f preview-${PREVIEW_NAME}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-          done
-
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
-  
-  deploy-storage-broker-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployStorageBroker
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-          - target_region:  eu-central-1
-            target_cluster: dev-eu-central-1-alpha
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
-  
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
-
-  deploy-pg-sni-router:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployPgSniRouter
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-          - target_region:  eu-central-1
-            target_cluster: dev-eu-central-1-alpha
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Deploy pg-sni-router
-        run:
-          helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
-  
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -1,217 +0,0 @@
-name: Neon Deploy prod
-
-on:
-  workflow_dispatch:
-    inputs:
-      dockerTag:
-        description: 'Docker tag to deploy'
-        required: true
-        type: string
-      branch:
-        description: 'Branch or commit used for deploy scripts and configs'
-        required: true
-        type: string
-        default: 'release'
-      deployStorage:
-        description: 'Deploy storage'
-        required: true
-        type: boolean
-        default: true
-      deployProxy:
-        description: 'Deploy proxy'
-        required: true
-        type: boolean
-        default: true
-      deployStorageBroker:
-        description: 'Deploy storage-broker'
-        required: true
-        type: boolean
-        default: true
-      deployPgSniRouter:
-        description: 'Deploy pg-sni-router'
-        required: true
-        type: boolean
-        default: true
-      disclamerAcknowledged:
-        description: 'I confirm that there is an emergency and I can not use regular release workflow'
-        required: true
-        type: boolean
-        default: false
-
-concurrency:
-  group: deploy-prod
-  cancel-in-progress: false
-
-jobs:
-  deploy-prod-new:
-    runs-on: prod
-    container:
-      image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-      options: --user root --privileged
-    if: inputs.deployStorage && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1, us-east-1 ]
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{ inputs.dockerTag }}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh
-
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-proxy-prod-new:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    if: inputs.deployProxy && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-            deploy_link_proxy: true
-            deploy_legacy_scram_proxy: false
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: true
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-          - target_region: us-east-1
-            target_cluster: prod-us-east-1-theta
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Re-deploy scram proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-      - name: Re-deploy link proxy
-        if: matrix.deploy_link_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-      - name: Re-deploy legacy scram proxy
-        if: matrix.deploy_legacy_scram_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-  deploy-storage-broker-prod-new:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-          - target_region: us-east-1
-            target_cluster: prod-us-east-1-theta
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
-
-  deploy-pg-sni-router:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    if: inputs.deployPgSniRouter && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-          - target_region: us-east-1
-            target_cluster: prod-us-east-1-theta
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Deploy pg-sni-router
-        run:
-          helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1032,6 +1032,7 @@ dependencies = [
 "serde",
 "serde_json",
 "serde_with",
+ "utils",
 "workspace_hack",
 ]

@@ -1105,6 +1106,7 @@ dependencies = [
 "anyhow",
 "clap 4.2.2",
 "comfy-table",
+ "compute_api",
 "git-version",
 "nix",
 "once_cell",
@@ -3674,6 +3676,7 @@ dependencies = [
 "remote_storage",
 "reqwest",
 "safekeeper_api",
+ "scopeguard",
 "serde",
 "serde_json",
 "serde_with",
--- a/2
+++ b/2
@@ -2,7 +2,7 @@
 ### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
 ### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
 ### inside this image in the real deployments.
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned

--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -1,5 +1,5 @@
 ARG PG_VERSION
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned

@@ -393,6 +393,28 @@ RUN case "${PG_VERSION}" in \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control

+#########################################################################################
+#
+# Layer "kq-imcx-pg-build"
+# compile kq_imcx extension
+#
+#########################################################################################
+FROM build-deps AS kq-imcx-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN apt-get update && \
+    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
+    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
+    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
+    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
+    mkdir build && \
+    cd build && \
+    cmake .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control
+
 #########################################################################################
 #
 # Layer "rust extensions"
@@ -506,6 +528,7 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,6 +1,6 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned

--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
+
 # Neon

 Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -30,6 +30,7 @@
 //!             -b /usr/local/bin/postgres
 //! ```
 //!
+use std::collections::HashMap;
 use std::fs::File;
 use std::panic;
 use std::path::Path;
@@ -67,6 +68,54 @@ fn main() -> Result<()> {
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

+    // Extract OpenTelemetry context for the startup actions from the
+    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
+    // tracing context.
+    //
+    // This is used to propagate the context for the 'start_compute' operation
+    // from the neon control plane. This allows linking together the wider
+    // 'start_compute' operation that creates the compute container, with the
+    // startup actions here within the container.
+    //
+    // There is no standard for passing context in env variables, but a lot of
+    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
+    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
+    //
+    // Switch to the startup context here, and exit it once the startup has
+    // completed and Postgres is up and running.
+    //
+    // If this pod is pre-created without binding it to any particular endpoint
+    // yet, this isn't the right place to enter the startup context. In that
+    // case, the control plane should pass the tracing context as part of the
+    // /configure API call.
+    //
+    // NOTE: This is supposed to only cover the *startup* actions. Once
+    // postgres is configured and up-and-running, we exit this span. Any other
+    // actions that are performed on incoming HTTP requests, for example, are
+    // performed in separate spans.
+    //
+    // XXX: If the pod is restarted, we perform the startup actions in the same
+    // context as the original startup actions, which probably doesn't make
+    // sense.
+    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
+    if let Ok(val) = std::env::var("TRACEPARENT") {
+        startup_tracing_carrier.insert("traceparent".to_string(), val);
+    }
+    if let Ok(val) = std::env::var("TRACESTATE") {
+        startup_tracing_carrier.insert("tracestate".to_string(), val);
+    }
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
+        use opentelemetry::propagation::TextMapPropagator;
+        use opentelemetry::sdk::propagation::TraceContextPropagator;
+        let guard = TraceContextPropagator::new()
+            .extract(&startup_tracing_carrier)
+            .attach();
+        info!("startup tracing context attached");
+        Some(guard)
+    } else {
+        None
+    };
+
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

@@ -148,8 +197,6 @@ fn main() -> Result<()> {

    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
-    let pspec = state.pspec.as_ref().expect("spec must be set");
-    let startup_tracing_context = pspec.spec.startup_tracing_context.clone();

    // Record for how long we slept waiting for the spec.
    state.metrics.wait_for_spec_ms = Utc::now()
@@ -165,29 +212,6 @@ fn main() -> Result<()> {
    compute.state_changed.notify_all();
    drop(state);

-    // Extract OpenTelemetry context for the startup actions from the spec, and
-    // attach it to the current tracing context.
-    //
-    // This is used to propagate the context for the 'start_compute' operation
-    // from the neon control plane. This allows linking together the wider
-    // 'start_compute' operation that creates the compute container, with the
-    // startup actions here within the container.
-    //
-    // Switch to the startup context here, and exit it once the startup has
-    // completed and Postgres is up and running.
-    //
-    // NOTE: This is supposed to only cover the *startup* actions. Once
-    // postgres is configured and up-and-running, we exit this span. Any other
-    // actions that are performed on incoming HTTP requests, for example, are
-    // performed in separate spans.
-    let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
-        use opentelemetry::propagation::TextMapPropagator;
-        use opentelemetry::sdk::propagation::TraceContextPropagator;
-        Some(TraceContextPropagator::new().extract(carrier).attach())
-    } else {
-        None
-    };
-
    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
    let _configurator_handle =
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -30,7 +30,7 @@ use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::ComputeSpec;
+use compute_api::spec::{ComputeMode, ComputeSpec};

 use crate::config;
 use crate::pg_helpers::*;
@@ -67,8 +67,9 @@ pub struct ComputeNode {
 pub struct ComputeState {
    pub start_time: DateTime<Utc>,
    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    pub last_active: DateTime<Utc>,
+    /// Timestamp of the last Postgres activity. It could be `None` if
+    /// compute wasn't used since start.
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
    pub pspec: Option<ParsedSpec>,
    pub metrics: ComputeMetrics,
@@ -79,7 +80,7 @@ impl ComputeState {
        Self {
            start_time: Utc::now(),
            status: ComputeStatus::Empty,
-            last_active: Utc::now(),
+            last_active: None,
            error: None,
            pspec: None,
            metrics: ComputeMetrics::default(),
@@ -249,38 +250,10 @@ impl ComputeNode {
    /// safekeepers sync, basebackup, etc.
    #[instrument(skip(self, compute_state))]
    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
-        #[derive(Clone)]
-        enum Replication {
-            Primary,
-            Static { lsn: Lsn },
-            HotStandby,
-        }
-
        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
        let spec = &pspec.spec;
        let pgdata_path = Path::new(&self.pgdata);

-        let hot_replica = if let Some(option) = spec.cluster.settings.find_ref("hot_standby") {
-            if let Some(value) = &option.value {
-                anyhow::ensure!(option.vartype == "bool");
-                matches!(value.as_str(), "on" | "yes" | "true")
-            } else {
-                false
-            }
-        } else {
-            false
-        };
-
-        let replication = if hot_replica {
-            Replication::HotStandby
-        } else if let Some(lsn) = spec.cluster.settings.find("recovery_target_lsn") {
-            Replication::Static {
-                lsn: Lsn::from_str(&lsn)?,
-            }
-        } else {
-            Replication::Primary
-        };
-
        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;
@@ -288,8 +261,8 @@ impl ComputeNode {
        // Syncing safekeepers is only safe with primary nodes: if a primary
        // is already connected it will be kicked out, so a secondary (standby)
        // cannot sync safekeepers.
-        let lsn = match &replication {
-            Replication::Primary => {
+        let lsn = match spec.mode {
+            ComputeMode::Primary => {
                info!("starting safekeepers syncing");
                let lsn = self
                    .sync_safekeepers(pspec.storage_auth_token.clone())
@@ -297,11 +270,11 @@ impl ComputeNode {
                info!("safekeepers synced at LSN {}", lsn);
                lsn
            }
-            Replication::Static { lsn } => {
+            ComputeMode::Static(lsn) => {
                info!("Starting read-only node at static LSN {}", lsn);
-                *lsn
+                lsn
            }
-            Replication::HotStandby => {
+            ComputeMode::Replica => {
                info!("Initializing standby from latest Pageserver LSN");
                Lsn(0)
            }
@@ -321,9 +294,9 @@ impl ComputeNode {
        // Update pg_hba.conf received with basebackup.
        update_pg_hba(pgdata_path)?;

-        match &replication {
-            Replication::Primary | Replication::Static { .. } => {}
-            Replication::HotStandby => {
+        match spec.mode {
+            ComputeMode::Primary | ComputeMode::Static(..) => {}
+            ComputeMode::Replica => {
                add_standby_signal(pgdata_path)?;
            }
        }
@@ -430,11 +403,13 @@ impl ComputeNode {
        self.pg_reload_conf(&mut client)?;

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        handle_roles(&spec, &mut client)?;
-        handle_databases(&spec, &mut client)?;
-        handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(&spec, self.connstr.as_str(), &mut client)?;
-        handle_extensions(&spec, &mut client)?;
+        if spec.mode == ComputeMode::Primary {
+            handle_roles(&spec, &mut client)?;
+            handle_databases(&spec, &mut client)?;
+            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+            handle_grants(&spec, self.connstr.as_str(), &mut client)?;
+            handle_extensions(&spec, &mut client)?;
+        }

        // 'Close' connection
        drop(client);
@@ -467,7 +442,9 @@ impl ComputeNode {

        let pg = self.start_postgres(spec.storage_auth_token.clone())?;

-        self.apply_config(&compute_state)?;
+        if spec.spec.mode == ComputeMode::Primary {
+            self.apply_config(&compute_state)?;
+        }

        let startup_end_time = Utc::now();
        {
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,7 +6,7 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::PgOptionsSerialize;
-use compute_api::spec::ComputeSpec;
+use compute_api::spec::{ComputeMode, ComputeSpec};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -34,17 +34,25 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
-    let mut postgres_conf = File::create(path)?;
+    let mut file = File::create(path)?;

-    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
-
-    Ok(())
-}
-
-// Write Postgres config block wrapped with generated comment section
-fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
    writeln!(file, "# Managed by compute_ctl: begin")?;
-    writeln!(file, "{}", buf)?;
+
+    write!(file, "{}", &spec.cluster.settings.as_pg_settings())?;
+
+    match spec.mode {
+        ComputeMode::Primary => {}
+        ComputeMode::Static(lsn) => {
+            // hot_standby is 'on' by default, but let's be explicit
+            writeln!(file, "hot_standby=on")?;
+            writeln!(file, "recovery_target_lsn='{lsn}'")?;
+        }
+        ComputeMode::Replica => {
+            // hot_standby is 'on' by default, but let's be explicit
+            writeln!(file, "hot_standby=on")?;
+        }
+    }
+
    writeln!(file, "# Managed by compute_ctl: end")?;

    Ok(())
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -181,8 +181,8 @@ components:
    ComputeState:
      type: object
      required:
+        - start_time
        - status
-        - last_active
      properties:
        start_time:
          type: string
@@ -195,11 +195,13 @@ components:
          $ref: '#/components/schemas/ComputeStatus'
        last_active:
          type: string
-          description: The last detected compute activity timestamp in UTC and RFC3339 format.
+          description: |
+            The last detected compute activity timestamp in UTC and RFC3339 format.
+            It could be empty if compute was never used by user since start.
          example: "2022-10-12T07:20:50.52Z"
        error:
          type: string
-          description: Text of the error during compute startup, if any.
+          description: Text of the error during compute startup or reconfiguration, if any.
          example: ""
        tenant:
          type: string
@@ -222,9 +224,12 @@ components:
    ComputeStatus:
      type: string
      enum:
+        - empty
        - init
        - failed
        - running
+        - configuration_pending
+        - configuration
      example: running

    #
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -74,7 +74,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                            // Found non-idle backend, so the last activity is NOW.
                            // Save it and exit the for loop. Also clear the idle backend
                            // `state_change` timestamps array as it doesn't matter now.
-                            last_active = Utc::now();
+                            last_active = Some(Utc::now());
                            idle_backs.clear();
                            break;
                        }
@@ -82,15 +82,16 @@ fn watch_compute_activity(compute: &ComputeNode) {

                    // Get idle backend `state_change` with the max timestamp.
                    if let Some(last) = idle_backs.iter().max() {
-                        last_active = *last;
+                        last_active = Some(*last);
                    }
                }

                // Update the last activity in the shared state if we got a more recent one.
                let mut state = compute.state.lock().unwrap();
+                // NB: `Some(<DateTime>)` is always greater than `None`.
                if last_active > state.last_active {
                    state.last_active = last_active;
-                    debug!("set the last compute activity time to: {}", last_active);
+                    debug!("set the last compute activity time to: {:?}", last_active);
                }
            }
            Err(e) => {
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -30,4 +30,5 @@ postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true

+compute_api.workspace = true
 workspace_hack.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -7,8 +7,8 @@
 //!
 use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
+use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::endpoint::ComputeMode;
 use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -22,6 +22,8 @@ use crate::local_env::LocalEnv;
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

+use compute_api::spec::ComputeMode;
+
 // contents of a endpoint.json file
 #[serde_as]
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
@@ -122,26 +124,12 @@ impl ComputeControlPlane {

 ///////////////////////////////////////////////////////////////////////////////

-#[serde_as]
-#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
-pub enum ComputeMode {
-    // Regular read-write node
-    Primary,
-    // if recovery_target_lsn is provided, and we want to pin the node to a specific LSN
-    Static(#[serde_as(as = "DisplayFromStr")] Lsn),
-    // Hot standby; read-only replica.
-    // Future versions may want to distinguish between replicas with hot standby
-    // feedback and other kinds of replication configurations.
-    Replica,
-}
-
 #[derive(Debug)]
 pub struct Endpoint {
    /// used as the directory name
    name: String,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
    pub mode: ComputeMode,

    // port and address of the Postgres server
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -11,4 +11,5 @@ serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true

+utils = { path = "../utils" }
 workspace_hack.workspace = true
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -19,7 +19,7 @@ pub struct ComputeStatusResponse {
    pub timeline: Option<String>,
    pub status: ComputeStatus,
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
 }

@@ -29,7 +29,7 @@ pub struct ComputeState {
    pub status: ComputeStatus,
    /// Timestamp of the last Postgres activity
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
 }

@@ -54,11 +54,15 @@ pub enum ComputeStatus {
    Failed,
 }

-fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
+fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
 {
-    x.to_rfc3339().serialize(s)
+    if let Some(x) = x {
+        x.to_rfc3339().serialize(s)
+    } else {
+        s.serialize_none()
+    }
 }

 /// Response of the /metrics.json API
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -3,8 +3,9 @@
 //! The spec.json file is used to pass information to 'compute_ctl'. It contains
 //! all the information needed to start up the right version of PostgreSQL,
 //! and connect it to the storage nodes.
-use serde::Deserialize;
-use std::collections::HashMap;
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::lsn::Lsn;

 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
@@ -12,6 +13,7 @@ pub type PgIdent = String;

 /// Cluster spec or configuration represented as an optional number of
 /// delta operations + final cluster state description.
+#[serde_as]
 #[derive(Clone, Debug, Default, Deserialize)]
 pub struct ComputeSpec {
    pub format_version: f32,
@@ -24,9 +26,25 @@ pub struct ComputeSpec {
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,

-    pub storage_auth_token: Option<String>,
+    #[serde(default)]
+    pub mode: ComputeMode,

-    pub startup_tracing_context: Option<HashMap<String, String>>,
+    pub storage_auth_token: Option<String>,
+}
+
+#[serde_as]
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
+pub enum ComputeMode {
+    /// A read-write node
+    #[default]
+    Primary,
+    /// A read-only node, pinned at a particular LSN
+    Static(#[serde_as(as = "DisplayFromStr")] Lsn),
+    /// A read-only node that follows the tip of the branch in hot standby mode
+    ///
+    /// Future versions may want to distinguish between replicas with hot standby
+    /// feedback and other kinds of replication configurations.
+    Replica,
 }

 #[derive(Clone, Debug, Default, Deserialize)]
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -48,13 +48,33 @@ pub enum TenantState {
 }

 impl TenantState {
-    pub fn has_in_progress_downloads(&self) -> bool {
+    pub fn attachment_status(&self) -> TenantAttachmentStatus {
+        use TenantAttachmentStatus::*;
        match self {
-            Self::Loading => true,
-            Self::Attaching => true,
-            Self::Active => false,
-            Self::Stopping => false,
-            Self::Broken { .. } => false,
+            // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
+            // So, technically, we can return Attached here.
+            // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
+            // But, our attach task might still be fetching the remote timelines, etc.
+            // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
+            Self::Attaching => Maybe,
+            // tenant mgr startup distinguishes attaching from loading via marker file.
+            // If it's loading, there is no attach marker file, i.e., attach had finished in the past.
+            Self::Loading => Attached,
+            // We only reach Active after successful load / attach.
+            // So, call atttachment status Attached.
+            Self::Active => Attached,
+            // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
+            // However, it also becomes Broken if the regular load fails.
+            // We would need a separate TenantState variant to distinguish these cases.
+            // However, there's no practical difference from Console's perspective.
+            // It will run a Postgres-level health check as soon as it observes Attached.
+            // That will fail on Broken tenants.
+            // Console can then rollback the attach, or, wait for operator to fix the Broken tenant.
+            Self::Broken { .. } => Attached,
+            // Why is Stopping a Maybe case? Because, during pageserver shutdown,
+            // we set the Stopping state irrespective of whether the tenant
+            // has finished attaching or not.
+            Self::Stopping => Maybe,
        }
    }

@@ -209,16 +229,25 @@ impl TenantConfigRequest {
    }
 }

+/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
+#[derive(Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum TenantAttachmentStatus {
+    Maybe,
+    Attached,
+}
+
 #[serde_as]
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
    #[serde_as(as = "DisplayFromStr")]
    pub id: TenantId,
+    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    pub state: TenantState,
    /// Sum of the size of all layer files.
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
-    pub has_in_progress_downloads: Option<bool>,
+    pub attachment_status: TenantAttachmentStatus,
 }

 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
@@ -691,7 +720,7 @@ mod tests {
            id: TenantId::generate(),
            state: TenantState::Active,
            current_physical_size: Some(42),
-            has_in_progress_downloads: Some(false),
+            attachment_status: TenantAttachmentStatus::Attached,
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -699,7 +728,7 @@ mod tests {
                "slug": "Active",
            },
            "current_physical_size": 42,
-            "has_in_progress_downloads": false,
+            "attachment_status": "attached",
        });

        let original_broken = TenantInfo {
@@ -709,7 +738,7 @@ mod tests {
                backtrace: "backtrace info".into(),
            },
            current_physical_size: Some(42),
-            has_in_progress_downloads: Some(false),
+            attachment_status: TenantAttachmentStatus::Attached,
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
@@ -721,7 +750,7 @@ mod tests {
                }
            },
            "current_physical_size": 42,
-            "has_in_progress_downloads": false,
+            "attachment_status": "attached",
        });

        assert_eq!(
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -146,6 +146,10 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
 pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
 pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;

+// From replication/message.h
+pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
+
+// From rmgrlist.h
 pub const RM_XLOG_ID: u8 = 0;
 pub const RM_XACT_ID: u8 = 1;
 pub const RM_SMGR_ID: u8 = 2;
@@ -157,6 +161,7 @@ pub const RM_RELMAP_ID: u8 = 7;
 pub const RM_STANDBY_ID: u8 = 8;
 pub const RM_HEAP2_ID: u8 = 9;
 pub const RM_HEAP_ID: u8 = 10;
+pub const RM_LOGICALMSG_ID: u8 = 21;

 // from xlogreader.h
 pub const XLR_INFO_MASK: u8 = 0x0F;
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -613,7 +613,7 @@ pub struct XLogDataBody<'a> {

 #[derive(Debug)]
 pub struct WalSndKeepAlive {
-    pub sent_ptr: u64,
+    pub wal_end: u64, // current end of WAL on the server
    pub timestamp: i64,
    pub request_reply: bool,
 }
@@ -924,7 +924,7 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'd');
                write_body(buf, |buf| {
                    buf.put_u8(b'k');
-                    buf.put_u64(req.sent_ptr);
+                    buf.put_u64(req.wal_end);
                    buf.put_i64(req.timestamp);
                    buf.put_u8(u8::from(req.request_reply));
                });
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -128,6 +128,15 @@ impl RemoteStorage for LocalFs {
        // We need this dance with sort of durable rename (without fsyncs)
        // to prevent partial uploads. This was really hit when pageserver shutdown
        // cancelled the upload and partial file was left on the fs
+        // NOTE: Because temp file suffix always the same this operation is racy.
+        // Two concurrent operations can lead to the following sequence:
+        // T1: write(temp)
+        // T2: write(temp) -> overwrites the content
+        // T1: rename(temp, dst) -> succeeds
+        // T2: rename(temp, dst) -> fails, temp no longet exists
+        // This can be solved by supplying unique temp suffix every time, but this situation
+        // is not normal in the first place, the error can help (and helped at least once)
+        // to discover bugs in upper level synchronization.
        let temp_file_path =
            path_with_suffix_extension(&target_file_path, LOCAL_FS_TEMP_FILE_SUFFIX);
        let mut destination = io::BufWriter::new(
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -1,21 +1,21 @@
 #!/bin/bash
+
+set -euxo pipefail
+
 PG_BIN=$1
 WAL_PATH=$2
 DATA_DIR=$3
 PORT=$4
-SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
-rm -fr $DATA_DIR
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
-echo port=$PORT >> $DATA_DIR/postgresql.conf
-REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
+SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-)
+rm -fr "$DATA_DIR"
+env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID"
+echo port="$PORT" >> "$DATA_DIR"/postgresql.conf
+REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
 declare -i WAL_SIZE=$REDO_POS+114
-$PG_BIN/pg_ctl -D $DATA_DIR -l logfile start
-$PG_BIN/pg_ctl -D $DATA_DIR -l logfile stop -m immediate
-cp $DATA_DIR/pg_wal/000000010000000000000001 .
-cp $WAL_PATH/* $DATA_DIR/pg_wal/
-if [ -f $DATA_DIR/pg_wal/*.partial ]
-then
-	(cd $DATA_DIR/pg_wal ; for partial in \*.partial ; do mv $partial `basename $partial .partial` ; done)
-fi
-dd if=000000010000000000000001 of=$DATA_DIR/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile start
+"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile stop -m immediate
+cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
+cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
+for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
+dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
 rm -f 000000010000000000000001
--- a/libs/utils/scripts/restore_from_wal_archive.sh
+++ b/libs/utils/scripts/restore_from_wal_archive.sh
@@ -1,20 +0,0 @@
-PG_BIN=$1
-WAL_PATH=$2
-DATA_DIR=$3
-PORT=$4
-SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
-rm -fr $DATA_DIR /tmp/pg_wals
-mkdir /tmp/pg_wals
-env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
-echo port=$PORT >> $DATA_DIR/postgresql.conf
-REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
-declare -i WAL_SIZE=$REDO_POS+114
-cp $WAL_PATH/* /tmp/pg_wals
-if [ -f $DATA_DIR/pg_wal/*.partial ]
-then
-	(cd /tmp/pg_wals ; for partial in \*.partial ; do  mv $partial `basename $partial .partial` ; done)
-fi
-dd if=$DATA_DIR/pg_wal/000000010000000000000001 of=/tmp/pg_wals/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
-echo > $DATA_DIR/recovery.signal
-rm -f $DATA_DIR/pg_wal/*
-echo "restore_command = 'cp /tmp/pg_wals/%f %p'" >> $DATA_DIR/postgresql.conf
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -58,24 +58,45 @@ pub mod pageserver_feedback;

 pub mod tracing_span_assert;

-/// use with fail::cfg("$name", "return(2000)")
-#[macro_export]
-macro_rules! failpoint_sleep_millis_async {
-    ($name:literal) => {{
-        let should_sleep: Option<std::time::Duration> = (|| {
-            fail::fail_point!($name, |v: Option<_>| {
-                let millis = v.unwrap().parse::<u64>().unwrap();
-                Some(Duration::from_millis(millis))
-            });
-            None
-        })();
-        if let Some(d) = should_sleep {
-            tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
-            tokio::time::sleep(d).await;
-            tracing::info!("failpoint {:?}: sleep done", $name);
-        }
-    }};
+pub mod rate_limit;
+
+mod failpoint_macro_helpers {
+
+    /// use with fail::cfg("$name", "return(2000)")
+    ///
+    /// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
+    /// specified time (in milliseconds). The main difference is that we use async
+    /// tokio sleep function. Another difference is that we print lines to the log,
+    /// which can be useful in tests to check that the failpoint was hit.
+    #[macro_export]
+    macro_rules! failpoint_sleep_millis_async {
+        ($name:literal) => {{
+            // If the failpoint is used with a "return" action, set should_sleep to the
+            // returned value (as string). Otherwise it's set to None.
+            let should_sleep = (|| {
+                ::fail::fail_point!($name, |x| x);
+                ::std::option::Option::None
+            })();
+
+            // Sleep if the action was a returned value
+            if let ::std::option::Option::Some(duration_str) = should_sleep {
+                $crate::failpoint_sleep_helper($name, duration_str).await
+            }
+        }};
+    }
+
+    // Helper function used by the macro. (A function has nicer scoping so we
+    // don't need to decorate everything with "::")
+    pub async fn failpoint_sleep_helper(name: &'static str, duration_str: String) {
+        let millis = duration_str.parse::<u64>().unwrap();
+        let d = std::time::Duration::from_millis(millis);
+
+        tracing::info!("failpoint {:?}: sleeping for {:?}", name, d);
+        tokio::time::sleep(d).await;
+        tracing::info!("failpoint {:?}: sleep done", name);
+    }
 }
+pub use failpoint_macro_helpers::failpoint_sleep_helper;

 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
--- a/libs/utils/src/rate_limit.rs
+++ b/libs/utils/src/rate_limit.rs
@@ -0,0 +1,66 @@
+//! A helper to rate limit operations.
+
+use std::time::{Duration, Instant};
+
+pub struct RateLimit {
+    last: Option<Instant>,
+    interval: Duration,
+}
+
+impl RateLimit {
+    pub fn new(interval: Duration) -> Self {
+        Self {
+            last: None,
+            interval,
+        }
+    }
+
+    /// Call `f` if the rate limit allows.
+    /// Don't call it otherwise.
+    pub fn call<F: FnOnce()>(&mut self, f: F) {
+        let now = Instant::now();
+        match self.last {
+            Some(last) if now - last <= self.interval => {
+                // ratelimit
+            }
+            _ => {
+                self.last = Some(now);
+                f();
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::sync::atomic::AtomicUsize;
+
+    #[test]
+    fn basics() {
+        use super::RateLimit;
+        use std::sync::atomic::Ordering::Relaxed;
+        use std::time::Duration;
+
+        let called = AtomicUsize::new(0);
+        let mut f = RateLimit::new(Duration::from_millis(100));
+
+        let cl = || {
+            called.fetch_add(1, Relaxed);
+        };
+
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 1);
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 1);
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 1);
+        std::thread::sleep(Duration::from_millis(100));
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 2);
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 2);
+        std::thread::sleep(Duration::from_millis(100));
+        f.call(cl);
+        assert_eq!(called.load(Relaxed), 3);
+    }
+}
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -5,7 +5,7 @@
 //!
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::mgr;
+use crate::tenant::{mgr, LogicalSizeCalculationCause};
 use anyhow;
 use chrono::Utc;
 use consumption_metrics::{idempotency_key, Event, EventChunk, EventType, CHUNK_SIZE};
@@ -164,7 +164,8 @@ pub async fn collect_metrics_iteration(
                    timeline_written_size,
                ));

-                match timeline.get_current_logical_size(ctx) {
+                let span = info_span!("collect_metrics_iteration", tenant_id = %timeline.tenant_id, timeline_id = %timeline.timeline_id);
+                match span.in_scope(|| timeline.get_current_logical_size(ctx)) {
                    // Only send timeline logical size when it is fully calculated.
                    Ok((size, is_exact)) if is_exact => {
                        current_metrics.push((
@@ -334,7 +335,9 @@ pub async fn calculate_synthetic_size_worker(

                    if let Ok(tenant) = mgr::get_tenant(tenant_id, true).await
                    {
-                        if let Err(e) = tenant.calculate_synthetic_size(ctx).await {
+                        if let Err(e) = tenant.calculate_synthetic_size(
+                            LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize,
+                            ctx).await {
                            error!("failed to calculate synthetic size for tenant {}: {}", tenant_id, e);
                        }
                    }
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -340,7 +340,29 @@ paths:
          format: hex

    post:
-      description: Schedules attach operation to happen in the background for given tenant
+      description: |
+        Schedules attach operation to happen in the background for the given tenant.
+        As soon as the caller sends this request, it must assume the pageserver
+        starts writing to the tenant's S3 state unless it receives one of the
+        distinguished errors below that state otherwise.
+
+        If a client receives a not-distinguished response, e.g., a network timeout,
+        it MUST retry the /attach request and poll again for the tenant's
+        attachment status.
+
+        After the client has received a 202, it MUST poll the tenant's
+        attachment status (field `attachment_status`) to reach state `attached`.
+        If the `attachment_status` is missing, the client MUST retry the `/attach`
+        request (goto previous paragraph). This is a robustness measure in case the tenant
+        status endpoint is buggy, but the attach operation is ongoing.
+
+        There is no way to cancel an in-flight request.
+
+        In any case, the client
+        * MUST NOT ASSUME that the /attach request has been lost in the network,
+        * MUST NOT ASSUME that the request has been lost, based on the observation
+          that a subsequent tenant status request returns 404. The request may
+          still be in flight. It must be retried.
      responses:
        "202":
          description: Tenant attaching scheduled
@@ -866,13 +888,27 @@ components:
      type: object
      required:
        - id
+        - attachment_status
      properties:
        id:
          type: string
        current_physical_size:
          type: integer
-        has_in_progress_downloads:
-          type: boolean
+        attachment_status:
+          description: |
+            Status of this tenant's attachment to this pageserver.
+
+            - `maybe` means almost nothing, don't read anything into it
+              except for the fact that the pageserver _might_ be already
+              writing to the tenant's S3 state, so, DO NOT ATTACH the
+              tenant to any other pageserver, or we risk split-brain.
+            - `attached` means that the attach operation has completed,
+              maybe successfully, maybe not. Perform a health check at
+              the Postgres level to determine healthiness of the tenant.
+
+            See the tenant `/attach` endpoint for more information.
+          type: string
+          enum: [ "maybe", "attached" ]
    TenantCreateInfo:
      type: object
      properties:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -25,7 +25,7 @@ use crate::tenant::config::TenantConfOpt;
 use crate::tenant::mgr::{TenantMapInsertError, TenantStateError};
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::{PageReconstructError, Timeline};
+use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, Timeline};
 use crate::{config::PageServerConf, tenant::mgr};
 use utils::{
    auth::JwtAuth,
@@ -105,6 +105,9 @@ impl From<PageReconstructError> for ApiError {
            PageReconstructError::Cancelled => {
                ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
            }
+            PageReconstructError::AncestorStopping(_) => {
+                ApiError::InternalServerError(anyhow::Error::new(pre))
+            }
            PageReconstructError::WalRedo(pre) => {
                ApiError::InternalServerError(anyhow::Error::new(pre))
            }
@@ -169,6 +172,8 @@ async fn build_timeline_info(
    include_non_incremental_logical_size: bool,
    ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
+    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
+
    let mut info = build_timeline_info_common(timeline, ctx)?;
    if include_non_incremental_logical_size {
        // XXX we should be using spawn_ondemand_logical_size_calculation here.
@@ -191,6 +196,7 @@ fn build_timeline_info_common(
    timeline: &Arc<Timeline>,
    ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
+    crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
    let last_record_lsn = timeline.get_last_record_lsn();
    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
        let guard = timeline.last_received_wal.lock().unwrap();
@@ -263,25 +269,28 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Error);

-    let tenant = mgr::get_tenant(tenant_id, true).await?;
-    match tenant.create_timeline(
-        new_timeline_id,
-        request_data.ancestor_timeline_id.map(TimelineId::from),
-        request_data.ancestor_start_lsn,
-        request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
-        &ctx,
-    )
-    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
-    .await {
-        Ok(Some(new_timeline)) => {
-            // Created. Construct a TimelineInfo for it.
-            let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
-                .map_err(ApiError::InternalServerError)?;
-            json_response(StatusCode::CREATED, timeline_info)
+    async {
+        let tenant = mgr::get_tenant(tenant_id, true).await?;
+        match tenant.create_timeline(
+            new_timeline_id,
+            request_data.ancestor_timeline_id.map(TimelineId::from),
+            request_data.ancestor_start_lsn,
+            request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
+            &ctx,
+        )
+        .await {
+            Ok(Some(new_timeline)) => {
+                // Created. Construct a TimelineInfo for it.
+                let timeline_info = build_timeline_info_common(&new_timeline, &ctx)
+                    .map_err(ApiError::InternalServerError)?;
+                json_response(StatusCode::CREATED, timeline_info)
+            }
+            Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
+            Err(err) => Err(ApiError::InternalServerError(err)),
        }
-        Ok(None) => json_response(StatusCode::CONFLICT, ()), // timeline already exists
-        Err(err) => Err(ApiError::InternalServerError(err)),
    }
+    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
+    .await
 }

 async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -303,6 +312,7 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
                include_non_incremental_logical_size.unwrap_or(false),
                &ctx,
            )
+            .instrument(info_span!("build_timeline_info", timeline_id = %timeline.timeline_id))
            .await
            .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
            .map_err(ApiError::InternalServerError)?;
@@ -467,7 +477,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
            id: *id,
            state: state.clone(),
            current_physical_size: None,
-            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
+            attachment_status: state.attachment_status(),
        })
        .collect::<Vec<TenantInfo>>();

@@ -492,7 +502,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
            id: tenant_id,
            state: state.clone(),
            current_physical_size: Some(current_physical_size),
-            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
+            attachment_status: state.attachment_status(),
        })
    }
    .instrument(info_span!("tenant_status_handler", tenant = %tenant_id))
@@ -527,7 +537,11 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A

    // this can be long operation
    let inputs = tenant
-        .gather_size_inputs(retention_period, &ctx)
+        .gather_size_inputs(
+            retention_period,
+            LogicalSizeCalculationCause::TenantSizeHandler,
+            &ctx,
+        )
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -691,16 +705,6 @@ pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>,
    Ok(response)
 }

-// Helper function to standardize the error messages we produce on bad durations
-//
-// Intended to be used with anyhow's `with_context`, e.g.:
-//
-//   let value = result.with_context(bad_duration("name", &value))?;
-//
-fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn() -> String {
-    move || format!("Cannot parse `{field_name}` duration {value:?}")
-}
-
 async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permission(&request, None)?;

@@ -708,91 +712,7 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    let request_data: TenantCreateRequest = json_request(&mut request).await?;

-    let mut tenant_conf = TenantConfOpt::default();
-    if let Some(gc_period) = request_data.gc_period {
-        tenant_conf.gc_period = Some(
-            humantime::parse_duration(&gc_period)
-                .with_context(bad_duration("gc_period", &gc_period))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    tenant_conf.gc_horizon = request_data.gc_horizon;
-    tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
-
-    if let Some(pitr_interval) = request_data.pitr_interval {
-        tenant_conf.pitr_interval = Some(
-            humantime::parse_duration(&pitr_interval)
-                .with_context(bad_duration("pitr_interval", &pitr_interval))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
-        tenant_conf.walreceiver_connect_timeout = Some(
-            humantime::parse_duration(&walreceiver_connect_timeout)
-                .with_context(bad_duration(
-                    "walreceiver_connect_timeout",
-                    &walreceiver_connect_timeout,
-                ))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
-        tenant_conf.lagging_wal_timeout = Some(
-            humantime::parse_duration(&lagging_wal_timeout)
-                .with_context(bad_duration("lagging_wal_timeout", &lagging_wal_timeout))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
-        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
-    }
-    if let Some(trace_read_requests) = request_data.trace_read_requests {
-        tenant_conf.trace_read_requests = Some(trace_read_requests);
-    }
-
-    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
-        tenant_conf.checkpoint_timeout = Some(
-            humantime::parse_duration(&checkpoint_timeout)
-                .with_context(bad_duration("checkpoint_timeout", &checkpoint_timeout))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    tenant_conf.compaction_target_size = request_data.compaction_target_size;
-    tenant_conf.compaction_threshold = request_data.compaction_threshold;
-
-    if let Some(compaction_period) = request_data.compaction_period {
-        tenant_conf.compaction_period = Some(
-            humantime::parse_duration(&compaction_period)
-                .with_context(bad_duration("compaction_period", &compaction_period))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    if let Some(eviction_policy) = request_data.eviction_policy {
-        tenant_conf.eviction_policy = Some(
-            serde_json::from_value(eviction_policy)
-                .context("parse field `eviction_policy`")
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
-
-    if let Some(evictions_low_residence_duration_metric_threshold) =
-        request_data.evictions_low_residence_duration_metric_threshold
-    {
-        tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
-            humantime::parse_duration(&evictions_low_residence_duration_metric_threshold)
-                .with_context(bad_duration(
-                    "evictions_low_residence_duration_metric_threshold",
-                    &evictions_low_residence_duration_metric_threshold,
-                ))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
+    let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;

    let target_tenant_id = request_data
        .new_tenant_id
@@ -860,85 +780,7 @@ async fn update_tenant_config_handler(
    let tenant_id = request_data.tenant_id;
    check_permission(&request, Some(tenant_id))?;

-    let mut tenant_conf = TenantConfOpt::default();
-    if let Some(gc_period) = request_data.gc_period {
-        tenant_conf.gc_period = Some(
-            humantime::parse_duration(&gc_period)
-                .with_context(bad_duration("gc_period", &gc_period))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    tenant_conf.gc_horizon = request_data.gc_horizon;
-    tenant_conf.image_creation_threshold = request_data.image_creation_threshold;
-
-    if let Some(pitr_interval) = request_data.pitr_interval {
-        tenant_conf.pitr_interval = Some(
-            humantime::parse_duration(&pitr_interval)
-                .with_context(bad_duration("pitr_interval", &pitr_interval))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    if let Some(walreceiver_connect_timeout) = request_data.walreceiver_connect_timeout {
-        tenant_conf.walreceiver_connect_timeout = Some(
-            humantime::parse_duration(&walreceiver_connect_timeout)
-                .with_context(bad_duration(
-                    "walreceiver_connect_timeout",
-                    &walreceiver_connect_timeout,
-                ))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    if let Some(lagging_wal_timeout) = request_data.lagging_wal_timeout {
-        tenant_conf.lagging_wal_timeout = Some(
-            humantime::parse_duration(&lagging_wal_timeout)
-                .with_context(bad_duration("lagging_wal_timeout", &lagging_wal_timeout))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    tenant_conf.max_lsn_wal_lag = request_data.max_lsn_wal_lag;
-    tenant_conf.trace_read_requests = request_data.trace_read_requests;
-
-    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
-    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
-        tenant_conf.checkpoint_timeout = Some(
-            humantime::parse_duration(&checkpoint_timeout)
-                .with_context(bad_duration("checkpoint_timeout", &checkpoint_timeout))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-    tenant_conf.compaction_target_size = request_data.compaction_target_size;
-    tenant_conf.compaction_threshold = request_data.compaction_threshold;
-
-    if let Some(compaction_period) = request_data.compaction_period {
-        tenant_conf.compaction_period = Some(
-            humantime::parse_duration(&compaction_period)
-                .with_context(bad_duration("compaction_period", &compaction_period))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    if let Some(eviction_policy) = request_data.eviction_policy {
-        tenant_conf.eviction_policy = Some(
-            serde_json::from_value(eviction_policy)
-                .context("parse field `eviction_policy`")
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
-    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
-
-    if let Some(evictions_low_residence_duration_metric_threshold) =
-        request_data.evictions_low_residence_duration_metric_threshold
-    {
-        tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
-            humantime::parse_duration(&evictions_low_residence_duration_metric_threshold)
-                .with_context(bad_duration(
-                    "evictions_low_residence_duration_metric_threshold",
-                    &evictions_low_residence_duration_metric_threshold,
-                ))
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
+    let tenant_conf = TenantConfOpt::try_from(&request_data).map_err(ApiError::BadRequest)?;

    let state = get_state(&request);
    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -5,7 +5,7 @@ use std::ops::Range;
 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct KeySpace {
    /// Contiguous ranges of keys that belong to the key space. In key order,
    /// and with no overlap.
@@ -61,6 +61,58 @@ impl KeySpace {

        KeyPartitioning { parts }
    }
+
+    ///
+    /// Calculate logical size of delta layers: total size of all blocks covered by it's key range
+    ///
+    pub fn get_logical_size(&self, range: &Range<Key>) -> u64 {
+        let mut start_key = range.start;
+        let n_ranges = self.ranges.len();
+        let start_index = match self.ranges.binary_search_by_key(&start_key, |r| r.start) {
+            Ok(index) => index, // keyspace range starts with start_key
+            Err(index) => {
+                if index != 0 && self.ranges[index - 1].end > start_key {
+                    index - 1 // previous keyspace range overlaps with specified
+                } else if index == n_ranges {
+                    return 0; // no intersection with specified range
+                } else {
+                    start_key = self.ranges[index].start;
+                    index
+                }
+            }
+        };
+        let mut size = 0u64;
+        for i in start_index..n_ranges {
+            if self.ranges[i].start >= range.end {
+                break;
+            }
+            let end_key = if self.ranges[i].end < range.end {
+                self.ranges[i].end
+            } else {
+                range.end
+            };
+            let n_blocks = key_range_size(&(start_key..end_key));
+            if n_blocks != u32::MAX {
+                size += n_blocks as u64 * BLCKSZ as u64;
+            }
+            if i + 1 < n_ranges {
+                start_key = self.ranges[i + 1].start;
+            }
+        }
+        size
+    }
+
+    ///
+    /// Check if key space contains overlapping range
+    ///
+    pub fn overlaps(&self, range: &Range<Key>) -> bool {
+        match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
+            Ok(0) => false,
+            Err(0) => false,
+            Ok(index) => self.ranges[index - 1].end > range.start,
+            Err(index) => self.ranges[index - 1].end > range.start,
+        }
+    }
 }

 ///
@@ -129,3 +181,226 @@ impl KeySpaceAccum {
        }
    }
 }
+
+///
+/// A helper object, to collect a set of keys and key ranges into a KeySpace
+/// object. Key ranges may be inserted in any order and can overlap.
+///
+#[derive(Clone, Debug, Default)]
+pub struct KeySpaceRandomAccum {
+    ranges: Vec<Range<Key>>,
+}
+
+impl KeySpaceRandomAccum {
+    pub fn new() -> Self {
+        Self { ranges: Vec::new() }
+    }
+
+    pub fn add_key(&mut self, key: Key) {
+        self.add_range(singleton_range(key))
+    }
+
+    pub fn add_range(&mut self, range: Range<Key>) {
+        self.ranges.push(range);
+    }
+
+    pub fn to_keyspace(mut self) -> KeySpace {
+        let mut ranges = Vec::new();
+        if !self.ranges.is_empty() {
+            self.ranges.sort_by_key(|r| r.start);
+            let mut start = self.ranges.first().unwrap().start;
+            let mut end = self.ranges.first().unwrap().end;
+            for r in self.ranges {
+                assert!(r.start >= start);
+                if r.start > end {
+                    ranges.push(start..end);
+                    start = r.start;
+                    end = r.end;
+                } else if r.end > end {
+                    end = r.end;
+                }
+            }
+            ranges.push(start..end);
+        }
+        KeySpace { ranges }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::fmt::Write;
+
+    // Helper function to create a key range.
+    //
+    // Make the tests below less verbose.
+    fn kr(irange: Range<i128>) -> Range<Key> {
+        Key::from_i128(irange.start)..Key::from_i128(irange.end)
+    }
+
+    #[allow(dead_code)]
+    fn dump_keyspace(ks: &KeySpace) {
+        for r in ks.ranges.iter() {
+            println!("  {}..{}", r.start.to_i128(), r.end.to_i128());
+        }
+    }
+
+    fn assert_ks_eq(actual: &KeySpace, expected: Vec<Range<Key>>) {
+        if actual.ranges != expected {
+            let mut msg = String::new();
+
+            writeln!(msg, "expected:").unwrap();
+            for r in &expected {
+                writeln!(msg, "  {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
+            }
+            writeln!(msg, "got:").unwrap();
+            for r in &actual.ranges {
+                writeln!(msg, "  {}..{}", r.start.to_i128(), r.end.to_i128()).unwrap();
+            }
+            panic!("{}", msg);
+        }
+    }
+
+    #[test]
+    fn keyspace_add_range() {
+        // two separate ranges
+        //
+        // #####
+        //         #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(20..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..10), kr(20..30)]);
+
+        // two separate ranges, added in reverse order
+        //
+        //         #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(20..30));
+        ks.add_range(kr(0..10));
+
+        // add range that is adjacent to the end of an existing range
+        //
+        // #####
+        //      #####
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(10..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that is adjacent to the start of an existing range
+        //
+        //      #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..30));
+        ks.add_range(kr(0..10));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that overlaps with the end of an existing range
+        //
+        // #####
+        //    #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(5..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that overlaps with the start of an existing range
+        //
+        //    #####
+        // #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(5..30));
+        ks.add_range(kr(0..10));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that is fully covered by an existing range
+        //
+        // #########
+        //   #####
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..30));
+        ks.add_range(kr(10..20));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add range that extends an existing range from both ends
+        //
+        //   #####
+        // #########
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..20));
+        ks.add_range(kr(0..30));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+
+        // add a range that overlaps with two existing ranges, joining them
+        //
+        // #####   #####
+        //    #######
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(0..10));
+        ks.add_range(kr(20..30));
+        ks.add_range(kr(5..25));
+        assert_ks_eq(&ks.to_keyspace(), vec![kr(0..30)]);
+    }
+
+    #[test]
+    fn keyspace_overlaps() {
+        let mut ks = KeySpaceRandomAccum::default();
+        ks.add_range(kr(10..20));
+        ks.add_range(kr(30..40));
+        let ks = ks.to_keyspace();
+
+        //        #####      #####
+        // xxxx
+        assert!(!ks.overlaps(&kr(0..5)));
+
+        //        #####      #####
+        //   xxxx
+        assert!(!ks.overlaps(&kr(5..9)));
+
+        //        #####      #####
+        //    xxxx
+        assert!(!ks.overlaps(&kr(5..10)));
+
+        //        #####      #####
+        //     xxxx
+        assert!(ks.overlaps(&kr(5..11)));
+
+        //        #####      #####
+        //        xxxx
+        assert!(ks.overlaps(&kr(10..15)));
+
+        //        #####      #####
+        //         xxxx
+        assert!(ks.overlaps(&kr(15..20)));
+
+        //        #####      #####
+        //           xxxx
+        assert!(ks.overlaps(&kr(15..25)));
+
+        //        #####      #####
+        //              xxxx
+        assert!(!ks.overlaps(&kr(22..28)));
+
+        //        #####      #####
+        //               xxxx
+        assert!(!ks.overlaps(&kr(25..30)));
+
+        //        #####      #####
+        //                      xxxx
+        assert!(ks.overlaps(&kr(35..35)));
+
+        //        #####      #####
+        //                        xxxx
+        assert!(!ks.overlaps(&kr(40..45)));
+
+        //        #####      #####
+        //                        xxxx
+        assert!(!ks.overlaps(&kr(45..50)));
+
+        //        #####      #####
+        //        xxxxxxxxxxx
+        assert!(ks.overlaps(&kr(0..30))); // XXXXX This fails currently!
+    }
+}
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,9 +1,9 @@
 use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
    register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
-    register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec, Counter, CounterVec,
-    Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge,
-    UIntGaugeVec,
+    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
+    Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
+    UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::models::TenantState;
@@ -30,6 +30,7 @@ const STORAGE_TIME_OPERATIONS: &[&str] = &[
    "create images",
    "init logical size",
    "logical size",
+    "imitate logical size",
    "load layer map",
    "gc",
 ];
@@ -186,6 +187,16 @@ static PERSISTENT_BYTES_WRITTEN: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

+pub(crate) static EVICTION_ITERATION_DURATION: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_eviction_iteration_duration_seconds_global",
+        "Time spent on a single eviction iteration",
+        &["period_secs", "threshold_secs"],
+        STORAGE_OP_BUCKETS.into(),
+    )
+    .expect("failed to define a metric")
+});
+
 static EVICTIONS: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "pageserver_evictions",
@@ -478,6 +489,65 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("Failed to register tenant_task_events metric")
 });

+pub static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_background_loop_period_overrun_count",
+        "Incremented whenever warn_when_period_overrun() logs a warning.",
+        &["task", "period"],
+    )
+    .expect("failed to define a metric")
+});
+
+// walreceiver metrics
+
+pub static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_walreceiver_started_connections_total",
+        "Number of started walreceiver connections"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
+        "pageserver_walreceiver_active_managers",
+        "Number of active walreceiver managers"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_walreceiver_switches_total",
+        "Number of walreceiver manager change_connection calls",
+        &["reason"]
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_walreceiver_broker_updates_total",
+        "Number of received broker updates in walreceiver"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_walreceiver_candidates_events_total",
+        "Number of walreceiver candidate events",
+        &["event"]
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
+    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
+
+pub static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
+    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
+
 // Metrics collected on WAL redo operations
 //
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
@@ -638,6 +708,7 @@ pub struct TimelineMetrics {
    pub compact_time_histo: StorageTimeMetrics,
    pub create_images_time_histo: StorageTimeMetrics,
    pub logical_size_histo: StorageTimeMetrics,
+    pub imitate_logical_size_histo: StorageTimeMetrics,
    pub load_layer_map_histo: StorageTimeMetrics,
    pub garbage_collect_histo: StorageTimeMetrics,
    pub last_record_gauge: IntGauge,
@@ -670,6 +741,8 @@ impl TimelineMetrics {
        let create_images_time_histo =
            StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
        let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
+        let imitate_logical_size_histo =
+            StorageTimeMetrics::new("imitate logical size", &tenant_id, &timeline_id);
        let load_layer_map_histo =
            StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
        let garbage_collect_histo = StorageTimeMetrics::new("gc", &tenant_id, &timeline_id);
@@ -706,6 +779,7 @@ impl TimelineMetrics {
            compact_time_histo,
            create_images_time_histo,
            logical_size_histo,
+            imitate_logical_size_histo,
            garbage_collect_histo,
            load_layer_map_histo,
            last_record_gauge,
@@ -1166,4 +1240,7 @@ pub fn preinitialize_metrics() {
    // Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0.
    assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0);
    UNEXPECTED_ONDEMAND_DOWNLOADS.reset();
+
+    // Same as above for this metric, but, it's a Vec-type metric for which we don't know all the labels.
+    BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT.reset();
 }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -256,7 +256,10 @@ async fn page_service_conn_main(
    //
    // no write timeout is used, because the kernel is assumed to error writes after some time.
    let mut socket = tokio_io_timeout::TimeoutReader::new(socket);
-    socket.set_timeout(Some(std::time::Duration::from_secs(60 * 10)));
+
+    // timeout should be lower, but trying out multiple days for
+    // <https://github.com/neondatabase/neon/issues/4205>
+    socket.set_timeout(Some(std::time::Duration::from_secs(60 * 60 * 24 * 3)));
    let socket = std::pin::pin!(socket);

    // XXX: pgbackend.run() should take the connection_ctx,
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -500,6 +500,8 @@ impl Timeline {
        cancel: CancellationToken,
        ctx: &RequestContext,
    ) -> Result<u64, CalculateLogicalSizeError> {
+        crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
+
        // Fetch list of database dirs and iterate them
        let buf = self.get(DBDIR_KEY, lsn, ctx).await.context("read dbdir")?;
        let dbdir = DbDirectory::des(&buf).context("deserialize db directory")?;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -58,6 +58,8 @@ use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::metadata::load_metadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
+use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
+use crate::tenant::remote_timeline_client::PersistIndexPartWithDeletedFlagError;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::tenant::storage_layer::Layer;
@@ -95,7 +97,10 @@ mod timeline;

 pub mod size;

-pub use timeline::{LocalLayerInfoForDiskUsageEviction, PageReconstructError, Timeline};
+pub(crate) use timeline::debug_assert_current_span_has_tenant_and_timeline_id;
+pub use timeline::{
+    LocalLayerInfoForDiskUsageEviction, LogicalSizeCalculationCause, PageReconstructError, Timeline,
+};

 // re-export this function so that page_cache.rs can use it.
 pub use crate::tenant::ephemeral_file::writeback as writeback_ephemeral_file;
@@ -271,7 +276,10 @@ impl UninitializedTimeline<'_> {
            .await
            .context("Failed to flush after basebackup import")?;

-        self.initialize(ctx)
+        // Initialize without loading the layer map. We started with an empty layer map, and already
+        // updated it for the layers that we created during the import.
+        let mut timelines = self.owning_tenant.timelines.lock().unwrap();
+        self.initialize_with_lock(ctx, &mut timelines, false, true)
    }

    fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
@@ -588,16 +596,19 @@ impl Tenant {
    /// finishes. You can use wait_until_active() to wait for the task to
    /// complete.
    ///
-    pub fn spawn_attach(
+    pub(crate) fn spawn_attach(
        conf: &'static PageServerConf,
        tenant_id: TenantId,
        remote_storage: GenericRemoteStorage,
        ctx: &RequestContext,
-    ) -> Arc<Tenant> {
+    ) -> anyhow::Result<Arc<Tenant>> {
        // XXX: Attach should provide the config, especially during tenant migration.
        //      See https://github.com/neondatabase/neon/issues/1555
        let tenant_conf = TenantConfOpt::default();

+        Self::attach_idempotent_create_marker_file(conf, tenant_id)
+            .context("create attach marker file")?;
+
        let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
        let tenant = Arc::new(Tenant::new(
            TenantState::Attaching,
@@ -630,7 +641,46 @@ impl Tenant {
                Ok(())
            },
        );
-        tenant
+        Ok(tenant)
+    }
+
+    fn attach_idempotent_create_marker_file(
+        conf: &'static PageServerConf,
+        tenant_id: TenantId,
+    ) -> anyhow::Result<()> {
+        // Create directory with marker file to indicate attaching state.
+        // The load_local_tenants() function in tenant::mgr relies on the marker file
+        // to determine whether a tenant has finished attaching.
+        let tenant_dir = conf.tenant_path(&tenant_id);
+        let marker_file = conf.tenant_attaching_mark_file_path(&tenant_id);
+        debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir);
+        // TODO: should use tokio::fs here, but
+        // 1. caller is not async, for good reason (it holds tenants map lock)
+        // 2. we'd need to think about cancel safety. Turns out dropping a tokio::fs future
+        //    doesn't wait for the activity in the fs thread pool.
+        crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?;
+        match fs::OpenOptions::new()
+            .write(true)
+            .create_new(true)
+            .open(&marker_file)
+        {
+            Ok(_) => {}
+            Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
+                // Either this is a retry of attach or there is a concurrent task also doing attach for this tenant.
+                // We cannot distinguish this here.
+                // The caller is responsible for ensuring there's no concurrent attach for a tenant.
+                {} // fsync again, we don't know if that already happened
+            }
+            err => {
+                err.context("create tenant attaching marker file")?;
+                unreachable!("we covered the Ok() case above");
+            }
+        }
+        crashsafe::fsync_file_and_parent(&marker_file)
+            .context("fsync tenant attaching marker file and parent")?;
+        debug_assert!(tenant_dir.is_dir());
+        debug_assert!(marker_file.is_file());
+        Ok(())
    }

    ///
@@ -638,26 +688,15 @@ impl Tenant {
    ///
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    async fn attach(self: &Arc<Tenant>, ctx: RequestContext) -> anyhow::Result<()> {
-        // Create directory with marker file to indicate attaching state.
-        // The load_local_tenants() function in tenant::mgr relies on the marker file
-        // to determine whether a tenant has finished attaching.
-        let tenant_dir = self.conf.tenant_path(&self.tenant_id);
        let marker_file = self.conf.tenant_attaching_mark_file_path(&self.tenant_id);
-        debug_assert_eq!(marker_file.parent().unwrap(), tenant_dir);
-        if tenant_dir.exists() {
-            if !marker_file.is_file() {
-                anyhow::bail!(
-                    "calling Tenant::attach with a tenant directory that doesn't have the attaching marker file:\ntenant_dir: {}\nmarker_file: {}",
-                    tenant_dir.display(), marker_file.display());
-            }
-        } else {
-            crashsafe::create_dir_all(&tenant_dir).context("create tenant directory")?;
-            fs::File::create(&marker_file).context("create tenant attaching marker file")?;
-            crashsafe::fsync_file_and_parent(&marker_file)
-                .context("fsync tenant attaching marker file and parent")?;
+        if !tokio::fs::try_exists(&marker_file)
+            .await
+            .context("check for existence of marker file")?
+        {
+            anyhow::bail!(
+                "implementation error: marker file should exist at beginning of this function"
+            );
        }
-        debug_assert!(tenant_dir.is_dir());
-        debug_assert!(marker_file.is_file());

        // Get list of remote timelines
        // download index files for every tenant timeline
@@ -695,16 +734,9 @@ impl Tenant {
                        .await
                        .context("download index file")?;

-                    let remote_metadata = index_part.parse_metadata().context("parse metadata")?;
-
                    debug!("finished index part download");

-                    Result::<_, anyhow::Error>::Ok((
-                        timeline_id,
-                        client,
-                        index_part,
-                        remote_metadata,
-                    ))
+                    Result::<_, anyhow::Error>::Ok((timeline_id, client, index_part))
                }
                .map(move |res| {
                    res.with_context(|| format!("download index part for timeline {timeline_id}"))
@@ -713,17 +745,26 @@ impl Tenant {
            );
        }
        // Wait for all the download tasks to complete & collect results.
-        let mut remote_clients = HashMap::new();
-        let mut index_parts = HashMap::new();
+        let mut remote_index_and_client = HashMap::new();
        let mut timeline_ancestors = HashMap::new();
        while let Some(result) = part_downloads.join_next().await {
            // NB: we already added timeline_id as context to the error
            let result: Result<_, anyhow::Error> = result.context("joinset task join")?;
-            let (timeline_id, client, index_part, remote_metadata) = result?;
+            let (timeline_id, client, index_part) = result?;
            debug!("successfully downloaded index part for timeline {timeline_id}");
-            timeline_ancestors.insert(timeline_id, remote_metadata);
-            index_parts.insert(timeline_id, index_part);
-            remote_clients.insert(timeline_id, client);
+            match index_part {
+                MaybeDeletedIndexPart::IndexPart(index_part) => {
+                    timeline_ancestors.insert(
+                        timeline_id,
+                        index_part.parse_metadata().context("parse_metadata")?,
+                    );
+                    remote_index_and_client.insert(timeline_id, (index_part, client));
+                }
+                MaybeDeletedIndexPart::Deleted => {
+                    info!("timeline {} is deleted, skipping", timeline_id);
+                    continue;
+                }
+            }
        }

        // For every timeline, download the metadata file, scan the local directory,
@@ -731,12 +772,16 @@ impl Tenant {
        // layer file.
        let sorted_timelines = tree_sort_timelines(timeline_ancestors)?;
        for (timeline_id, remote_metadata) in sorted_timelines {
+            let (index_part, remote_client) = remote_index_and_client
+                .remove(&timeline_id)
+                .expect("just put it in above");
+
            // TODO again handle early failure
            self.load_remote_timeline(
                timeline_id,
-                index_parts.remove(&timeline_id).unwrap(),
+                index_part,
                remote_metadata,
-                remote_clients.remove(&timeline_id).unwrap(),
+                remote_client,
                &ctx,
            )
            .await
@@ -828,11 +873,15 @@ impl Tenant {
    }

    /// Create a placeholder Tenant object for a broken tenant
-    pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc<Tenant> {
+    pub fn create_broken_tenant(
+        conf: &'static PageServerConf,
+        tenant_id: TenantId,
+        reason: String,
+    ) -> Arc<Tenant> {
        let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
        Arc::new(Tenant::new(
            TenantState::Broken {
-                reason: "create_broken_tenant".into(),
+                reason,
                backtrace: String::new(),
            },
            conf,
@@ -865,7 +914,7 @@ impl Tenant {
            Ok(conf) => conf,
            Err(e) => {
                error!("load tenant config failed: {:?}", e);
-                return Tenant::create_broken_tenant(conf, tenant_id);
+                return Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"));
            }
        };

@@ -1042,21 +1091,13 @@ impl Tenant {
    /// Subroutine of `load_tenant`, to load an individual timeline
    ///
    /// NB: The parent is assumed to be already loaded!
-    #[instrument(skip(self, local_metadata, ctx), fields(timeline_id=%timeline_id))]
+    #[instrument(skip_all, fields(timeline_id))]
    async fn load_local_timeline(
        &self,
        timeline_id: TimelineId,
        local_metadata: TimelineMetadata,
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        let ancestor = if let Some(ancestor_timeline_id) = local_metadata.ancestor_timeline() {
-            let ancestor_timeline = self.get_timeline(ancestor_timeline_id, false)
-            .with_context(|| anyhow::anyhow!("cannot find ancestor timeline {ancestor_timeline_id} for timeline {timeline_id}"))?;
-            Some(ancestor_timeline)
-        } else {
-            None
-        };
-
        let remote_client = self.remote_storage.as_ref().map(|remote_storage| {
            RemoteTimelineClient::new(
                remote_storage.clone(),
@@ -1069,6 +1110,29 @@ impl Tenant {
        let remote_startup_data = match &remote_client {
            Some(remote_client) => match remote_client.download_index_file().await {
                Ok(index_part) => {
+                    let index_part = match index_part {
+                        MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
+                        MaybeDeletedIndexPart::Deleted => {
+                            // TODO: we won't reach here if remote storage gets de-configured after start of the deletion operation.
+                            // Example:
+                            //  start deletion operation
+                            //  finishes upload of index part
+                            //  pageserver crashes
+                            //  remote storage gets de-configured
+                            //  pageserver starts
+                            //
+                            // We don't really anticipate remote storage to be de-configured, so, for now, this is fine.
+                            // Also, maybe we'll remove that option entirely in the future, see https://github.com/neondatabase/neon/issues/4099.
+                            info!("is_deleted is set on remote, resuming removal of local data originally done by timeline deletion handler");
+                            std::fs::remove_dir_all(
+                                self.conf.timeline_path(&timeline_id, &self.tenant_id),
+                            )
+                            .context("remove_dir_all")?;
+
+                            return Ok(());
+                        }
+                    };
+
                    let remote_metadata = index_part.parse_metadata().context("parse_metadata")?;
                    Some(RemoteStartupData {
                        index_part,
@@ -1084,6 +1148,14 @@ impl Tenant {
            None => None,
        };

+        let ancestor = if let Some(ancestor_timeline_id) = local_metadata.ancestor_timeline() {
+            let ancestor_timeline = self.get_timeline(ancestor_timeline_id, false)
+            .with_context(|| anyhow::anyhow!("cannot find ancestor timeline {ancestor_timeline_id} for timeline {timeline_id}"))?;
+            Some(ancestor_timeline)
+        } else {
+            None
+        };
+
        self.timeline_init_and_sync(
            timeline_id,
            remote_client,
@@ -1192,8 +1264,24 @@ impl Tenant {
            "Cannot create timelines on inactive tenant"
        );

-        if self.get_timeline(new_timeline_id, false).is_ok() {
+        if let Ok(existing) = self.get_timeline(new_timeline_id, false) {
            debug!("timeline {new_timeline_id} already exists");
+
+            if let Some(remote_client) = existing.remote_client.as_ref() {
+                // Wait for uploads to complete, so that when we return Ok, the timeline
+                // is known to be durable on remote storage. Just like we do at the end of
+                // this function, after we have created the timeline ourselves.
+                //
+                // We only really care that the initial version of `index_part.json` has
+                // been uploaded. That's enough to remember that the timeline
+                // exists. However, there is no function to wait specifically for that so
+                // we just wait for all in-progress uploads to finish.
+                remote_client
+                    .wait_completion()
+                    .await
+                    .context("wait for timeline uploads to complete")?;
+            }
+
            return Ok(None);
        }

@@ -1235,6 +1323,17 @@ impl Tenant {
            }
        };

+        if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
+            // Wait for the upload of the 'index_part.json` file to finish, so that when we return
+            // Ok, the timeline is durable in remote storage.
+            let kind = ancestor_timeline_id
+                .map(|_| "branched")
+                .unwrap_or("bootstrapped");
+            remote_client.wait_completion().await.with_context(|| {
+                format!("wait for {} timeline initial uploads to complete", kind)
+            })?;
+        }
+
        Ok(Some(loaded_timeline))
    }

@@ -1331,6 +1430,8 @@ impl Tenant {
        timeline_id: TimelineId,
        _ctx: &RequestContext,
    ) -> Result<(), DeleteTimelineError> {
+        timeline::debug_assert_current_span_has_tenant_and_timeline_id();
+
        // Transition the timeline into TimelineState::Stopping.
        // This should prevent new operations from starting.
        let timeline = {
@@ -1371,9 +1472,44 @@ impl Tenant {
        timeline.walreceiver.stop().await;
        debug!("wal receiver shutdown confirmed");

+        // Prevent new uploads from starting.
+        if let Some(remote_client) = timeline.remote_client.as_ref() {
+            let res = remote_client.stop();
+            match res {
+                Ok(()) => {}
+                Err(e) => match e {
+                    remote_timeline_client::StopError::QueueUninitialized => {
+                        // This case shouldn't happen currently because the
+                        // load and attach code bails out if _any_ of the timeline fails to fetch its IndexPart.
+                        // That is, before we declare the Tenant as Active.
+                        // But we only allow calls to delete_timeline on Active tenants.
+                        return Err(DeleteTimelineError::Other(anyhow::anyhow!("upload queue is uninitialized, likely the timeline was in Broken state prior to this call because it failed to fetch IndexPart during load or attach, check the logs")));
+                    }
+                },
+            }
+        }
+
+        // Stop & wait for the remaining timeline tasks, including upload tasks.
+        // NB: This and other delete_timeline calls do not run as a task_mgr task,
+        //     so, they are not affected by this shutdown_tasks() call.
        info!("waiting for timeline tasks to shutdown");
        task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id)).await;

+        // Mark timeline as deleted in S3 so we won't pick it up next time
+        // during attach or pageserver restart.
+        // See comment in persist_index_part_with_deleted_flag.
+        if let Some(remote_client) = timeline.remote_client.as_ref() {
+            match remote_client.persist_index_part_with_deleted_flag().await {
+                // If we (now, or already) marked it successfully as deleted, we can proceed
+                Ok(()) | Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(_)) => (),
+                // Bail out otherwise
+                Err(e @ PersistIndexPartWithDeletedFlagError::AlreadyInProgress(_))
+                | Err(e @ PersistIndexPartWithDeletedFlagError::Other(_)) => {
+                    return Err(DeleteTimelineError::Other(anyhow::anyhow!(e)));
+                }
+            }
+        }
+
        {
            // Grab the layer_removal_cs lock, and actually perform the deletion.
            //
@@ -1397,19 +1533,54 @@ impl Tenant {
            //     by the caller.

            let local_timeline_directory = self.conf.timeline_path(&timeline_id, &self.tenant_id);
-            // XXX make this atomic so that, if we crash-mid-way, the timeline won't be picked up
-            // with some layers missing.
-            std::fs::remove_dir_all(&local_timeline_directory).with_context(|| {
-                format!(
-                    "Failed to remove local timeline directory '{}'",
-                    local_timeline_directory.display()
-                )
-            })?;
+
+            fail::fail_point!("timeline-delete-before-rm", |_| {
+                Err(anyhow::anyhow!("failpoint: timeline-delete-before-rm"))?
+            });
+
+            // NB: This need not be atomic because the deleted flag in the IndexPart
+            // will be observed during tenant/timeline load. The deletion will be resumed there.
+            //
+            // For configurations without remote storage, we tolerate that we're not crash-safe here.
+            // The timeline may come up Active but with missing layer files, in such setups.
+            // See https://github.com/neondatabase/neon/pull/3919#issuecomment-1531726720
+            match std::fs::remove_dir_all(&local_timeline_directory) {
+                Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
+                    // This can happen if we're called a second time, e.g.,
+                    // because of a previous failure/cancellation at/after
+                    // failpoint timeline-delete-after-rm.
+                    //
+                    // It can also happen if we race with tenant detach, because,
+                    // it doesn't grab the layer_removal_cs lock.
+                    //
+                    // For now, log and continue.
+                    // warn! level is technically not appropriate for the
+                    // first case because we should expect retries to happen.
+                    // But the error is so rare, it seems better to get attention if it happens.
+                    let tenant_state = self.current_state();
+                    warn!(
+                        timeline_dir=?local_timeline_directory,
+                        ?tenant_state,
+                        "timeline directory not found, proceeding anyway"
+                    );
+                    // continue with the rest of the deletion
+                }
+                res => res.with_context(|| {
+                    format!(
+                        "Failed to remove local timeline directory '{}'",
+                        local_timeline_directory.display()
+                    )
+                })?,
+            }

            info!("finished deleting layer files, releasing layer_removal_cs.lock()");
            drop(layer_removal_guard);
        }

+        fail::fail_point!("timeline-delete-after-rm", |_| {
+            Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))?
+        });
+
        // Remove the timeline from the map.
        let mut timelines = self.timelines.lock().unwrap();
        let children_exist = timelines
@@ -2024,7 +2195,7 @@ impl Tenant {
                // made.
                break;
            }
-            let result = timeline.gc().await?;
+            let result = timeline.gc(ctx).await?;
            totals += result;
        }

@@ -2232,17 +2403,18 @@ impl Tenant {
            src_timeline.initdb_lsn,
            src_timeline.pg_version,
        );
-        let mut timelines = self.timelines.lock().unwrap();
-        let new_timeline = self
-            .prepare_timeline(
+
+        let new_timeline = {
+            let mut timelines = self.timelines.lock().unwrap();
+            self.prepare_timeline(
                dst_id,
                &metadata,
                timeline_uninit_mark,
                false,
                Some(Arc::clone(src_timeline)),
            )?
-            .initialize_with_lock(ctx, &mut timelines, true, true)?;
-        drop(timelines);
+            .initialize_with_lock(ctx, &mut timelines, true, true)?
+        };

        // Root timeline gets its layers during creation and uploads them along with the metadata.
        // A branch timeline though, when created, can get no writes for some time, hence won't get any layers created.
@@ -2352,6 +2524,8 @@ impl Tenant {
                )
            })?;

+        // Initialize the timeline without loading the layer map, because we already updated the layer
+        // map above, when we imported the datadir.
        let timeline = {
            let mut timelines = self.timelines.lock().unwrap();
            raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)?
@@ -2498,6 +2672,7 @@ impl Tenant {
        // `max_retention_period` overrides the cutoff that is used to calculate the size
        // (only if it is shorter than the real cutoff).
        max_retention_period: Option<u64>,
+        cause: LogicalSizeCalculationCause,
        ctx: &RequestContext,
    ) -> anyhow::Result<size::ModelInputs> {
        let logical_sizes_at_once = self
@@ -2519,6 +2694,7 @@ impl Tenant {
            logical_sizes_at_once,
            max_retention_period,
            &mut shared_cache,
+            cause,
            ctx,
        )
        .await
@@ -2528,8 +2704,12 @@ impl Tenant {
    /// This is periodically called by background worker.
    /// result is cached in tenant struct
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
-    pub async fn calculate_synthetic_size(&self, ctx: &RequestContext) -> anyhow::Result<u64> {
-        let inputs = self.gather_size_inputs(None, ctx).await?;
+    pub async fn calculate_synthetic_size(
+        &self,
+        cause: LogicalSizeCalculationCause,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<u64> {
+        let inputs = self.gather_size_inputs(None, cause, ctx).await?;

        let size = inputs.calculate()?;

@@ -3289,14 +3469,26 @@ mod tests {
            .gc_iteration(Some(TIMELINE_ID), 0x10, Duration::ZERO, &ctx)
            .await?;

+        // The branchpoints should contain all timelines, even ones marked
+        // as Broken.
+        {
+            let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
+            assert_eq!(branchpoints.len(), 1);
+            assert_eq!(branchpoints[0], Lsn(0x40));
+        }
+
+        // You can read the key from the child branch even though the parent is
+        // Broken, as long as you don't need to access data from the parent.
        assert_eq!(
-            newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await?,
-            TEST_IMG(&format!("foo at {}", Lsn(0x40)))
+            newtline.get(*TEST_KEY, Lsn(0x70), &ctx).await?,
+            TEST_IMG(&format!("foo at {}", Lsn(0x70)))
        );

-        let branchpoints = &tline.gc_info.read().unwrap().retain_lsns;
-        assert_eq!(branchpoints.len(), 1);
-        assert_eq!(branchpoints[0], Lsn(0x40));
+        // This needs to traverse to the parent, and fails.
+        let err = newtline.get(*TEST_KEY, Lsn(0x50), &ctx).await.unwrap_err();
+        assert!(err
+            .to_string()
+            .contains("will not become active. Current state: Broken"));

        Ok(())
    }
@@ -3562,7 +3754,7 @@ mod tests {
                .await?;
            tline.freeze_and_flush().await?;
            tline.compact(&ctx).await?;
-            tline.gc().await?;
+            tline.gc(&ctx).await?;
        }

        Ok(())
@@ -3634,7 +3826,7 @@ mod tests {
                .await?;
            tline.freeze_and_flush().await?;
            tline.compact(&ctx).await?;
-            tline.gc().await?;
+            tline.gc(&ctx).await?;
        }

        Ok(())
@@ -3718,7 +3910,7 @@ mod tests {
                .await?;
            tline.freeze_and_flush().await?;
            tline.compact(&ctx).await?;
-            tline.gc().await?;
+            tline.gc(&ctx).await?;
        }

        Ok(())
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -8,6 +8,8 @@
 //! We cannot use global or default config instead, because wrong settings
 //! may lead to a data loss.
 //!
+use anyhow::Context;
+use pageserver_api::models::{TenantConfigRequest, TenantCreateRequest};
 use serde::{Deserialize, Serialize};
 use std::num::NonZeroU64;
 use std::time::Duration;
@@ -280,6 +282,167 @@ impl Default for TenantConf {
    }
 }

+// Helper function to standardize the error messages we produce on bad durations
+//
+// Intended to be used with anyhow's `with_context`, e.g.:
+//
+//   let value = result.with_context(bad_duration("name", &value))?;
+//
+fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn() -> String {
+    move || format!("Cannot parse `{field_name}` duration {value:?}")
+}
+
+impl TenantConfOpt {
+    #[allow(clippy::too_many_arguments)]
+    fn from_request(
+        checkpoint_distance: Option<u64>,
+        checkpoint_timeout: &Option<String>,
+        compaction_target_size: Option<u64>,
+        compaction_period: &Option<String>,
+        compaction_threshold: Option<usize>,
+        gc_horizon: Option<u64>,
+        gc_period: &Option<String>,
+        image_creation_threshold: Option<usize>,
+        pitr_interval: &Option<String>,
+        walreceiver_connect_timeout: &Option<String>,
+        lagging_wal_timeout: &Option<String>,
+        max_lsn_wal_lag: Option<NonZeroU64>,
+        trace_read_requests: Option<bool>,
+        eviction_policy: &Option<serde_json::Value>,
+        min_resident_size_override: Option<u64>,
+        evictions_low_residence_duration_metric_threshold: &Option<String>,
+    ) -> Result<Self, anyhow::Error> {
+        let mut tenant_conf = TenantConfOpt::default();
+
+        if let Some(gc_period) = &gc_period {
+            tenant_conf.gc_period = Some(
+                humantime::parse_duration(gc_period)
+                    .with_context(bad_duration("gc_period", gc_period))?,
+            );
+        }
+        tenant_conf.gc_horizon = gc_horizon;
+        tenant_conf.image_creation_threshold = image_creation_threshold;
+
+        if let Some(pitr_interval) = &pitr_interval {
+            tenant_conf.pitr_interval = Some(
+                humantime::parse_duration(pitr_interval)
+                    .with_context(bad_duration("pitr_interval", pitr_interval))?,
+            );
+        }
+
+        if let Some(walreceiver_connect_timeout) = &walreceiver_connect_timeout {
+            tenant_conf.walreceiver_connect_timeout = Some(
+                humantime::parse_duration(walreceiver_connect_timeout).with_context(
+                    bad_duration("walreceiver_connect_timeout", walreceiver_connect_timeout),
+                )?,
+            );
+        }
+        if let Some(lagging_wal_timeout) = &lagging_wal_timeout {
+            tenant_conf.lagging_wal_timeout = Some(
+                humantime::parse_duration(lagging_wal_timeout)
+                    .with_context(bad_duration("lagging_wal_timeout", lagging_wal_timeout))?,
+            );
+        }
+        if let Some(max_lsn_wal_lag) = max_lsn_wal_lag {
+            tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+        }
+        if let Some(trace_read_requests) = trace_read_requests {
+            tenant_conf.trace_read_requests = Some(trace_read_requests);
+        }
+
+        tenant_conf.checkpoint_distance = checkpoint_distance;
+        if let Some(checkpoint_timeout) = &checkpoint_timeout {
+            tenant_conf.checkpoint_timeout = Some(
+                humantime::parse_duration(checkpoint_timeout)
+                    .with_context(bad_duration("checkpoint_timeout", checkpoint_timeout))?,
+            );
+        }
+
+        tenant_conf.compaction_target_size = compaction_target_size;
+        tenant_conf.compaction_threshold = compaction_threshold;
+
+        if let Some(compaction_period) = &compaction_period {
+            tenant_conf.compaction_period = Some(
+                humantime::parse_duration(compaction_period)
+                    .with_context(bad_duration("compaction_period", compaction_period))?,
+            );
+        }
+
+        if let Some(eviction_policy) = &eviction_policy {
+            tenant_conf.eviction_policy = Some(
+                serde::Deserialize::deserialize(eviction_policy)
+                    .context("parse field `eviction_policy`")?,
+            );
+        }
+
+        tenant_conf.min_resident_size_override = min_resident_size_override;
+
+        if let Some(evictions_low_residence_duration_metric_threshold) =
+            &evictions_low_residence_duration_metric_threshold
+        {
+            tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
+                humantime::parse_duration(evictions_low_residence_duration_metric_threshold)
+                    .with_context(bad_duration(
+                        "evictions_low_residence_duration_metric_threshold",
+                        evictions_low_residence_duration_metric_threshold,
+                    ))?,
+            );
+        }
+
+        Ok(tenant_conf)
+    }
+}
+
+impl TryFrom<&'_ TenantCreateRequest> for TenantConfOpt {
+    type Error = anyhow::Error;
+
+    fn try_from(request_data: &TenantCreateRequest) -> Result<Self, Self::Error> {
+        Self::from_request(
+            request_data.checkpoint_distance,
+            &request_data.checkpoint_timeout,
+            request_data.compaction_target_size,
+            &request_data.compaction_period,
+            request_data.compaction_threshold,
+            request_data.gc_horizon,
+            &request_data.gc_period,
+            request_data.image_creation_threshold,
+            &request_data.pitr_interval,
+            &request_data.walreceiver_connect_timeout,
+            &request_data.lagging_wal_timeout,
+            request_data.max_lsn_wal_lag,
+            request_data.trace_read_requests,
+            &request_data.eviction_policy,
+            request_data.min_resident_size_override,
+            &request_data.evictions_low_residence_duration_metric_threshold,
+        )
+    }
+}
+
+impl TryFrom<&'_ TenantConfigRequest> for TenantConfOpt {
+    type Error = anyhow::Error;
+
+    fn try_from(request_data: &TenantConfigRequest) -> Result<Self, Self::Error> {
+        Self::from_request(
+            request_data.checkpoint_distance,
+            &request_data.checkpoint_timeout,
+            request_data.compaction_target_size,
+            &request_data.compaction_period,
+            request_data.compaction_threshold,
+            request_data.gc_horizon,
+            &request_data.gc_period,
+            request_data.image_creation_threshold,
+            &request_data.pitr_interval,
+            &request_data.walreceiver_connect_timeout,
+            &request_data.lagging_wal_timeout,
+            request_data.max_lsn_wal_lag,
+            request_data.trace_read_requests,
+            &request_data.eviction_policy,
+            request_data.min_resident_size_override,
+            &request_data.evictions_low_residence_duration_metric_threshold,
+        )
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -12,6 +12,7 @@ use std::io::Write;
 use anyhow::{bail, ensure, Context};
 use serde::{Deserialize, Serialize};
 use tracing::info_span;
+use utils::bin_ser::SerializeError;
 use utils::{
    bin_ser::BeSer,
    id::{TenantId, TimelineId},
@@ -182,7 +183,7 @@ impl TimelineMetadata {
        }
    }

-    pub fn to_bytes(&self) -> anyhow::Result<Vec<u8>> {
+    pub fn to_bytes(&self) -> Result<Vec<u8>, SerializeError> {
        let body_bytes = self.body.ser()?;
        let metadata_size = METADATA_HDR_SIZE + body_bytes.len();
        let hdr = TimelineMetadataHeader {
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -186,10 +186,20 @@ pub fn schedule_local_tenant_processing(
    let tenant = if conf.tenant_attaching_mark_file_path(&tenant_id).exists() {
        info!("tenant {tenant_id} has attaching mark file, resuming its attach operation");
        if let Some(remote_storage) = remote_storage {
-            Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx)
+            match Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx) {
+                Ok(tenant) => tenant,
+                Err(e) => {
+                    error!("Failed to spawn_attach tenant {tenant_id}, reason: {e:#}");
+                    Tenant::create_broken_tenant(conf, tenant_id, format!("{e:#}"))
+                }
+            }
        } else {
            warn!("tenant {tenant_id} has attaching mark file, but pageserver has no remote storage configured");
-            Tenant::create_broken_tenant(conf, tenant_id)
+            Tenant::create_broken_tenant(
+                conf,
+                tenant_id,
+                "attaching mark file present but no remote storage configured".to_string(),
+            )
        }
    } else {
        info!("tenant {tenant_id} is assumed to be loadable, starting load operation");
@@ -466,7 +476,8 @@ pub async fn attach_tenant(
            "Cannot attach tenant {tenant_id}, local tenant directory already exists"
        );

-        let tenant = Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx);
+        let tenant =
+            Tenant::spawn_attach(conf, tenant_id, remote_storage, ctx).context("spawn_attach")?;
        vacant_entry.insert(tenant);
        Ok(())
    })
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -204,8 +204,11 @@ mod download;
 pub mod index;
 mod upload;

+use anyhow::Context;
+use chrono::{NaiveDateTime, Utc};
 // re-export these
 pub use download::{is_temp_download_file, list_remote_timelines};
+use scopeguard::ScopeGuard;

 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};
@@ -213,7 +216,7 @@ use std::sync::{Arc, Mutex};
 use remote_storage::{DownloadError, GenericRemoteStorage};
 use std::ops::DerefMut;
 use tokio::runtime::Runtime;
-use tracing::{debug, info, warn};
+use tracing::{debug, error, info, warn};
 use tracing::{info_span, Instrument};
 use utils::lsn::Lsn;

@@ -240,6 +243,7 @@ use utils::id::{TenantId, TimelineId};
 use self::index::IndexPart;

 use super::storage_layer::LayerFileName;
+use super::upload_queue::SetDeletedFlagProgress;

 // Occasional network issues and such can cause remote operations to fail, and
 // that's expected. If a download fails, we log it at info-level, and retry.
@@ -253,6 +257,30 @@ const FAILED_DOWNLOAD_RETRIES: u32 = 10;
 // retries. Uploads and deletions are retried forever, though.
 const FAILED_UPLOAD_WARN_THRESHOLD: u32 = 3;

+pub enum MaybeDeletedIndexPart {
+    IndexPart(IndexPart),
+    Deleted,
+}
+
+/// Errors that can arise when calling [`RemoteTimelineClient::stop`].
+#[derive(Debug, thiserror::Error)]
+pub enum StopError {
+    /// Returned if the upload queue was never initialized.
+    /// See [`RemoteTimelineClient::init_upload_queue`] and [`RemoteTimelineClient::init_upload_queue_for_empty_remote`].
+    #[error("queue is not initialized")]
+    QueueUninitialized,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum PersistIndexPartWithDeletedFlagError {
+    #[error("another task is already setting the deleted_flag, started at {0:?}")]
+    AlreadyInProgress(NaiveDateTime),
+    #[error("the deleted_flag was already set, value is {0:?}")]
+    AlreadyDeleted(NaiveDateTime),
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
 /// A client for accessing a timeline's data in remote storage.
 ///
 /// This takes care of managing the number of connections, and balancing them
@@ -367,7 +395,7 @@ impl RemoteTimelineClient {
    //

    /// Download index file
-    pub async fn download_index_file(&self) -> Result<IndexPart, DownloadError> {
+    pub async fn download_index_file(&self) -> Result<MaybeDeletedIndexPart, DownloadError> {
        let _unfinished_gauge_guard = self.metrics.call_begin(
            &RemoteOpFileKind::Index,
            &RemoteOpKind::Download,
@@ -376,7 +404,7 @@ impl RemoteTimelineClient {
            },
        );

-        download::download_index_part(
+        let index_part = download::download_index_part(
            self.conf,
            &self.storage_impl,
            self.tenant_id,
@@ -389,7 +417,13 @@ impl RemoteTimelineClient {
            RemoteOpKind::Download,
            Arc::clone(&self.metrics),
        )
-        .await
+        .await?;
+
+        if index_part.deleted_at.is_some() {
+            Ok(MaybeDeletedIndexPart::Deleted)
+        } else {
+            Ok(MaybeDeletedIndexPart::IndexPart(index_part))
+        }
    }

    /// Download a (layer) file from `path`, into local filesystem.
@@ -624,6 +658,116 @@ impl RemoteTimelineClient {
        Ok(())
    }

+    /// Set the deleted_at field in the remote index file.
+    ///
+    /// This fails if the upload queue has not been `stop()`ed.
+    ///
+    /// The caller is responsible for calling `stop()` AND for waiting
+    /// for any ongoing upload tasks to finish after `stop()` has succeeded.
+    /// Check method [`RemoteTimelineClient::stop`] for details.
+    pub(crate) async fn persist_index_part_with_deleted_flag(
+        self: &Arc<Self>,
+    ) -> Result<(), PersistIndexPartWithDeletedFlagError> {
+        let index_part_with_deleted_at = {
+            let mut locked = self.upload_queue.lock().unwrap();
+
+            // We must be in stopped state because otherwise
+            // we can have inprogress index part upload that can overwrite the file
+            // with missing is_deleted flag that we going to set below
+            let stopped = match &mut *locked {
+                UploadQueue::Uninitialized => {
+                    return Err(anyhow::anyhow!("is not Stopped but Uninitialized").into())
+                }
+                UploadQueue::Initialized(_) => {
+                    return Err(anyhow::anyhow!("is not Stopped but Initialized").into())
+                }
+                UploadQueue::Stopped(stopped) => stopped,
+            };
+
+            match stopped.deleted_at {
+                SetDeletedFlagProgress::NotRunning => (), // proceed
+                SetDeletedFlagProgress::InProgress(at) => {
+                    return Err(PersistIndexPartWithDeletedFlagError::AlreadyInProgress(at));
+                }
+                SetDeletedFlagProgress::Successful(at) => {
+                    return Err(PersistIndexPartWithDeletedFlagError::AlreadyDeleted(at));
+                }
+            };
+            let deleted_at = Utc::now().naive_utc();
+            stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);
+
+            let mut index_part = IndexPart::new(
+                stopped.latest_files.clone(),
+                stopped.last_uploaded_consistent_lsn,
+                stopped
+                    .latest_metadata
+                    .to_bytes()
+                    .context("serialize metadata")?,
+            );
+            index_part.deleted_at = Some(deleted_at);
+            index_part
+        };
+
+        let undo_deleted_at = scopeguard::guard(Arc::clone(self), |self_clone| {
+            let mut locked = self_clone.upload_queue.lock().unwrap();
+            let stopped = match &mut *locked {
+                UploadQueue::Uninitialized | UploadQueue::Initialized(_) => unreachable!(
+                    "there's no way out of Stopping, and we checked it's Stopping above: {:?}",
+                    locked.as_str(),
+                ),
+                UploadQueue::Stopped(stopped) => stopped,
+            };
+            stopped.deleted_at = SetDeletedFlagProgress::NotRunning;
+        });
+
+        // Have a failpoint that can use the `pause` failpoint action.
+        // We don't want to block the executor thread, hence, spawn_blocking + await.
+        #[cfg(feature = "testing")]
+        tokio::task::spawn_blocking({
+            let current = tracing::Span::current();
+            move || {
+                let _entered = current.entered();
+                tracing::info!(
+                    "at failpoint persist_index_part_with_deleted_flag_after_set_before_upload_pause"
+                );
+                fail::fail_point!(
+                    "persist_index_part_with_deleted_flag_after_set_before_upload_pause"
+                );
+            }
+        })
+        .await
+        .expect("spawn_blocking");
+
+        upload::upload_index_part(
+            self.conf,
+            &self.storage_impl,
+            self.tenant_id,
+            self.timeline_id,
+            &index_part_with_deleted_at,
+        )
+        .await?;
+
+        // all good, disarm the guard and mark as success
+        ScopeGuard::into_inner(undo_deleted_at);
+        {
+            let mut locked = self.upload_queue.lock().unwrap();
+            let stopped = match &mut *locked {
+                UploadQueue::Uninitialized | UploadQueue::Initialized(_) => unreachable!(
+                    "there's no way out of Stopping, and we checked it's Stopping above: {:?}",
+                    locked.as_str(),
+                ),
+                UploadQueue::Stopped(stopped) => stopped,
+            };
+            stopped.deleted_at = SetDeletedFlagProgress::Successful(
+                index_part_with_deleted_at
+                    .deleted_at
+                    .expect("we set it above"),
+            );
+        }
+
+        Ok(())
+    }
+
    ///
    /// Pick next tasks from the queue, and start as many of them as possible without violating
    /// the ordering constraints.
@@ -741,8 +885,13 @@ impl RemoteTimelineClient {
            // upload finishes or times out soon enough.
            if task_mgr::is_shutdown_requested() {
                info!("upload task cancelled by shutdown request");
+                match self.stop() {
+                    Ok(()) => {}
+                    Err(StopError::QueueUninitialized) => {
+                        unreachable!("we never launch an upload task if the queue is uninitialized, and once it is initialized, we never go back")
+                    }
+                }
                self.calls_unfinished_metric_end(&task.op);
-                self.stop();
                return;
            }

@@ -946,32 +1095,48 @@ impl RemoteTimelineClient {
        self.metrics.call_end(&file_kind, &op_kind, track_bytes);
    }

-    fn stop(&self) {
+    /// Close the upload queue for new operations and cancel queued operations.
+    /// In-progress operations will still be running after this function returns.
+    /// Use `task_mgr::shutdown_tasks(None, Some(self.tenant_id), Some(timeline_id))`
+    /// to wait for them to complete, after calling this function.
+    pub fn stop(&self) -> Result<(), StopError> {
        // Whichever *task* for this RemoteTimelineClient grabs the mutex first will transition the queue
        // into stopped state, thereby dropping all off the queued *ops* which haven't become *tasks* yet.
        // The other *tasks* will come here and observe an already shut down queue and hence simply wrap up their business.
        let mut guard = self.upload_queue.lock().unwrap();
-        match &*guard {
-            UploadQueue::Uninitialized => panic!(
-                "callers are responsible for ensuring this is only called on initialized queue"
-            ),
+        match &mut *guard {
+            UploadQueue::Uninitialized => Err(StopError::QueueUninitialized),
            UploadQueue::Stopped(_) => {
                // nothing to do
                info!("another concurrent task already shut down the queue");
+                Ok(())
            }
-            UploadQueue::Initialized(qi) => {
+            UploadQueue::Initialized(UploadQueueInitialized {
+                latest_files,
+                latest_metadata,
+                last_uploaded_consistent_lsn,
+                ..
+            }) => {
                info!("shutting down upload queue");

                // Replace the queue with the Stopped state, taking ownership of the old
                // Initialized queue. We will do some checks on it, and then drop it.
                let qi = {
-                    let last_uploaded_consistent_lsn = qi.last_uploaded_consistent_lsn;
-                    let upload_queue = std::mem::replace(
-                        &mut *guard,
-                        UploadQueue::Stopped(UploadQueueStopped {
-                            last_uploaded_consistent_lsn,
-                        }),
-                    );
+                    // take or clone what we need
+                    let latest_files = std::mem::take(latest_files);
+                    let last_uploaded_consistent_lsn = *last_uploaded_consistent_lsn;
+                    // this could be Copy
+                    let latest_metadata = latest_metadata.clone();
+
+                    let stopped = UploadQueueStopped {
+                        latest_files,
+                        last_uploaded_consistent_lsn,
+                        latest_metadata,
+                        deleted_at: SetDeletedFlagProgress::NotRunning,
+                    };
+
+                    let upload_queue =
+                        std::mem::replace(&mut *guard, UploadQueue::Stopped(stopped));
                    if let UploadQueue::Initialized(qi) = upload_queue {
                        qi
                    } else {
@@ -979,6 +1144,8 @@ impl RemoteTimelineClient {
                    }
                };

+                assert!(qi.latest_files.is_empty(), "do not use this anymore");
+
                // consistency check
                assert_eq!(
                    qi.num_inprogress_layer_uploads
@@ -1002,6 +1169,7 @@ impl RemoteTimelineClient {

                // We're done.
                drop(guard);
+                Ok(())
            }
        }
    }
@@ -1240,7 +1408,11 @@ mod tests {
        }

        // Download back the index.json, and check that the list of files is correct
-        let index_part = runtime.block_on(client.download_index_file())?;
+        let index_part = match runtime.block_on(client.download_index_file())? {
+            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
+            MaybeDeletedIndexPart::Deleted => panic!("unexpectedly got deleted index part"),
+        };
+
        assert_file_list(
            &index_part.timeline_layers,
            &[
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -4,6 +4,7 @@

 use std::collections::{HashMap, HashSet};

+use chrono::NaiveDateTime;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};

@@ -55,6 +56,10 @@ pub struct IndexPart {
    #[serde(default)]
    version: usize,

+    #[serde(default)]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_at: Option<NaiveDateTime>,
+
    /// Layer names, which are stored on the remote storage.
    ///
    /// Additional metadata can might exist in `layer_metadata`.
@@ -78,7 +83,7 @@ impl IndexPart {
    /// used to understand later versions.
    ///
    /// Version is currently informative only.
-    const LATEST_VERSION: usize = 1;
+    const LATEST_VERSION: usize = 2;
    pub const FILE_NAME: &'static str = "index_part.json";

    pub fn new(
@@ -101,6 +106,7 @@ impl IndexPart {
            layer_metadata,
            disk_consistent_lsn,
            metadata_bytes,
+            deleted_at: None,
        }
    }

@@ -156,6 +162,7 @@ mod tests {
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
+            deleted_at: None,
        };

        let part = serde_json::from_str::<IndexPart>(example).unwrap();
@@ -192,6 +199,7 @@ mod tests {
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata_bytes: [112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
+            deleted_at: None,
        };

        let part = serde_json::from_str::<IndexPart>(example).unwrap();
@@ -236,6 +244,7 @@ mod tests {
                0, 0,
            ]
            .to_vec(),
+            deleted_at: None,
        };

        let empty_layers_parsed = serde_json::from_str::<IndexPart>(empty_layers_json).unwrap();
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -19,9 +19,12 @@ pub(super) async fn upload_index_part<'a>(
    timeline_id: TimelineId,
    index_part: &'a IndexPart,
 ) -> anyhow::Result<()> {
+    tracing::trace!("uploading new index part");
+
    fail_point!("before-upload-index", |_| {
        bail!("failpoint before-upload-index")
    });
+
    let index_part_bytes = serde_json::to_vec(&index_part)
        .context("Failed to serialize index part file into bytes")?;
    let index_part_size = index_part_bytes.len();
@@ -31,6 +34,7 @@ pub(super) async fn upload_index_part<'a>(
        .metadata_path(timeline_id, tenant_id)
        .with_file_name(IndexPart::FILE_NAME);
    let storage_path = conf.remote_path(&index_part_path)?;
+
    storage
        .upload_storage_object(Box::new(index_part_bytes), index_part_size, &storage_path)
        .await
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -11,7 +11,7 @@ use tokio_util::sync::CancellationToken;
 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;

-use super::Tenant;
+use super::{LogicalSizeCalculationCause, Tenant};
 use crate::tenant::Timeline;
 use utils::id::TimelineId;
 use utils::lsn::Lsn;
@@ -126,6 +126,7 @@ pub(super) async fn gather_inputs(
    limit: &Arc<Semaphore>,
    max_retention_period: Option<u64>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
+    cause: LogicalSizeCalculationCause,
    ctx: &RequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
@@ -318,7 +319,15 @@ pub(super) async fn gather_inputs(

    // We left the 'size' field empty in all of the Segments so far.
    // Now find logical sizes for all of the points that might need or benefit from them.
-    fill_logical_sizes(&timelines, &mut segments, limit, logical_size_cache, ctx).await?;
+    fill_logical_sizes(
+        &timelines,
+        &mut segments,
+        limit,
+        logical_size_cache,
+        cause,
+        ctx,
+    )
+    .await?;

    Ok(ModelInputs {
        segments,
@@ -336,6 +345,7 @@ async fn fill_logical_sizes(
    segments: &mut [SegmentMeta],
    limit: &Arc<Semaphore>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
+    cause: LogicalSizeCalculationCause,
    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
    let timeline_hash: HashMap<TimelineId, Arc<Timeline>> = HashMap::from_iter(
@@ -373,13 +383,17 @@ async fn fill_logical_sizes(
                let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap());
                let parallel_size_calcs = Arc::clone(limit);
                let ctx = ctx.attached_child();
-                joinset.spawn(calculate_logical_size(
-                    parallel_size_calcs,
-                    timeline,
-                    lsn,
-                    ctx,
-                    cancel.child_token(),
-                ));
+                joinset.spawn(
+                    calculate_logical_size(
+                        parallel_size_calcs,
+                        timeline,
+                        lsn,
+                        cause,
+                        ctx,
+                        cancel.child_token(),
+                    )
+                    .in_current_span(),
+                );
            }
            e.insert(cached_size);
        }
@@ -482,6 +496,7 @@ async fn calculate_logical_size(
    limit: Arc<tokio::sync::Semaphore>,
    timeline: Arc<crate::tenant::Timeline>,
    lsn: utils::lsn::Lsn,
+    cause: LogicalSizeCalculationCause,
    ctx: RequestContext,
    cancel: CancellationToken,
 ) -> Result<TimelineAtLsnSizeResult, RecvError> {
@@ -490,7 +505,7 @@ async fn calculate_logical_size(
        .expect("global semaphore should not had been closed");

    let size_res = timeline
-        .spawn_ondemand_logical_size_calculation(lsn, ctx, cancel)
+        .spawn_ondemand_logical_size_calculation(lsn, cause, ctx, cancel)
        .instrument(info_span!("spawn_ondemand_logical_size_calculation"))
        .await?;
    Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -13,9 +13,9 @@ use crate::task_mgr::TaskKind;
 use crate::walrecord::NeonWalRecord;
 use anyhow::Result;
 use bytes::Bytes;
-use either::Either;
 use enum_map::EnumMap;
 use enumset::EnumSet;
+use once_cell::sync::Lazy;
 use pageserver_api::models::LayerAccessKind;
 use pageserver_api::models::{
    HistoricLayerInfo, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
@@ -23,8 +23,10 @@ use pageserver_api::models::{
 use std::ops::Range;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex};
-use std::time::{SystemTime, UNIX_EPOCH};
+use std::time::{Duration, SystemTime, UNIX_EPOCH};
+use tracing::warn;
 use utils::history_buffer::HistoryBufferWithDropCounter;
+use utils::rate_limit::RateLimit;

 use utils::{
    id::{TenantId, TimelineId},
@@ -37,6 +39,8 @@ pub use image_layer::{ImageLayer, ImageLayerWriter};
 pub use inmemory_layer::InMemoryLayer;
 pub use remote_layer::RemoteLayer;

+use super::layer_map::BatchedUpdates;
+
 pub fn range_overlaps<T>(a: &Range<T>, b: &Range<T>) -> bool
 where
    T: PartialOrd<T>,
@@ -158,41 +162,82 @@ impl LayerAccessStatFullDetails {
 }

 impl LayerAccessStats {
-    pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
-        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
-        new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
-        new
+    /// Create an empty stats object.
+    ///
+    /// The caller is responsible for recording a residence event
+    /// using [`record_residence_event`] before calling `latest_activity`.
+    /// If they don't, [`latest_activity`] will return `None`.
+    pub(crate) fn empty_will_record_residence_event_later() -> Self {
+        LayerAccessStats(Mutex::default())
    }

-    pub(crate) fn for_new_layer_file() -> Self {
+    /// Create an empty stats object and record a [`LayerLoad`] event with the given residence status.
+    ///
+    /// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
+    pub(crate) fn for_loading_layer<L>(
+        layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
+        status: LayerResidenceStatus,
+    ) -> Self
+    where
+        L: ?Sized + Layer,
+    {
        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
        new.record_residence_event(
-            LayerResidenceStatus::Resident,
-            LayerResidenceEventReason::LayerCreate,
+            layer_map_lock_held_witness,
+            status,
+            LayerResidenceEventReason::LayerLoad,
        );
        new
    }

    /// Creates a clone of `self` and records `new_status` in the clone.
-    /// The `new_status` is not recorded in `self`
-    pub(crate) fn clone_for_residence_change(
+    ///
+    /// The `new_status` is not recorded in `self`.
+    ///
+    /// See [`record_residence_event`] for why you need to do this while holding the layer map lock.
+    pub(crate) fn clone_for_residence_change<L>(
        &self,
+        layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
        new_status: LayerResidenceStatus,
-    ) -> LayerAccessStats {
+    ) -> LayerAccessStats
+    where
+        L: ?Sized + Layer,
+    {
        let clone = {
            let inner = self.0.lock().unwrap();
            inner.clone()
        };
        let new = LayerAccessStats(Mutex::new(clone));
-        new.record_residence_event(new_status, LayerResidenceEventReason::ResidenceChange);
+        new.record_residence_event(
+            layer_map_lock_held_witness,
+            new_status,
+            LayerResidenceEventReason::ResidenceChange,
+        );
        new
    }

-    fn record_residence_event(
+    /// Record a change in layer residency.
+    ///
+    /// Recording the event must happen while holding the layer map lock to
+    /// ensure that latest-activity-threshold-based layer eviction (eviction_task.rs)
+    /// can do an "imitate access" to this layer, before it observes `now-latest_activity() > threshold`.
+    ///
+    /// If we instead recorded the residence event with a timestamp from before grabbing the layer map lock,
+    /// the following race could happen:
+    ///
+    /// - Compact: Write out an L1 layer from several L0 layers. This records residence event LayerCreate with the current timestamp.
+    /// - Eviction: imitate access logical size calculation. This accesses the L0 layers because the L1 layer is not yet in the layer map.
+    /// - Compact: Grab layer map lock, add the new L1 to layer map and remove the L0s, release layer map lock.
+    /// - Eviction: observes the new L1 layer whose only activity timestamp is the LayerCreate event.
+    ///
+    pub(crate) fn record_residence_event<L>(
        &self,
+        _layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
        status: LayerResidenceStatus,
        reason: LayerResidenceEventReason,
-    ) {
+    ) where
+        L: ?Sized + Layer,
+    {
        let mut locked = self.0.lock().unwrap();
        locked.iter_mut().for_each(|inner| {
            inner
@@ -255,24 +300,37 @@ impl LayerAccessStats {
        ret
    }

-    fn most_recent_access_or_residence_event(
-        &self,
-    ) -> Either<LayerAccessStatFullDetails, LayerResidenceEvent> {
+    /// Get the latest access timestamp, falling back to latest residence event.
+    ///
+    /// This function can only return `None` if there has not yet been a call to the
+    /// [`record_residence_event`] method. That would generally be considered an
+    /// implementation error. This function logs a rate-limited warning in that case.
+    ///
+    /// TODO: use type system to avoid the need for `fallback`.
+    /// The approach in https://github.com/neondatabase/neon/pull/3775
+    /// could be used to enforce that a residence event is recorded
+    /// before a layer is added to the layer map. We could also have
+    /// a layer wrapper type that holds the LayerAccessStats, and ensure
+    /// that that type can only be produced by inserting into the layer map.
+    pub(crate) fn latest_activity(&self) -> Option<SystemTime> {
        let locked = self.0.lock().unwrap();
        let inner = &locked.for_eviction_policy;
        match inner.last_accesses.recent() {
-            Some(a) => Either::Left(*a),
+            Some(a) => Some(a.when),
            None => match inner.last_residence_changes.recent() {
-                Some(e) => Either::Right(e.clone()),
-                None => unreachable!("constructors for LayerAccessStats ensure that there's always a residence change event"),
-            }
-        }
-    }
-
-    pub(crate) fn latest_activity(&self) -> SystemTime {
-        match self.most_recent_access_or_residence_event() {
-            Either::Left(mra) => mra.when,
-            Either::Right(re) => re.timestamp,
+                Some(e) => Some(e.timestamp),
+                None => {
+                    static WARN_RATE_LIMIT: Lazy<Mutex<(usize, RateLimit)>> =
+                        Lazy::new(|| Mutex::new((0, RateLimit::new(Duration::from_secs(10)))));
+                    let mut guard = WARN_RATE_LIMIT.lock().unwrap();
+                    guard.0 += 1;
+                    let occurences = guard.0;
+                    guard.1.call(move || {
+                        warn!(parent: None, occurences, "latest_activity not available, this is an implementation bug, using fallback value");
+                    });
+                    None
+                }
+            },
        }
    }
 }
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -57,7 +57,7 @@ use utils::{

 use super::{
    DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerFileName, LayerIter,
-    LayerKeyIter, LayerResidenceStatus, PathOrConf,
+    LayerKeyIter, PathOrConf,
 };

 ///
@@ -637,7 +637,7 @@ impl DeltaLayer {
            key_range: summary.key_range,
            lsn_range: summary.lsn_range,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident),
+            access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
            inner: RwLock::new(DeltaLayerInner {
                loaded: false,
                file: None,
@@ -808,7 +808,7 @@ impl DeltaLayerWriterInner {
            key_range: self.key_start..key_end,
            lsn_range: self.lsn_range.clone(),
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_new_layer_file(),
+            access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
            inner: RwLock::new(DeltaLayerInner {
                loaded: false,
                file: None,
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -53,7 +53,7 @@ use utils::{
 };

 use super::filename::{ImageFileName, LayerFileName};
-use super::{Layer, LayerAccessStatsReset, LayerIter, LayerResidenceStatus, PathOrConf};
+use super::{Layer, LayerAccessStatsReset, LayerIter, PathOrConf};

 ///
 /// Header stored in the beginning of the file
@@ -438,7 +438,7 @@ impl ImageLayer {
            key_range: summary.key_range,
            lsn: summary.lsn,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident),
+            access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
            inner: RwLock::new(ImageLayerInner {
                file: None,
                loaded: false,
@@ -598,7 +598,7 @@ impl ImageLayerWriterInner {
            key_range: self.key_range.clone(),
            lsn: self.lsn,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_new_layer_file(),
+            access_stats: LayerAccessStats::empty_will_record_residence_event_later(),
            inner: RwLock::new(ImageLayerInner {
                loaded: false,
                file: None,
--- a/pageserver/src/tenant/storage_layer/remote_layer.rs
+++ b/pageserver/src/tenant/storage_layer/remote_layer.rs
@@ -4,6 +4,7 @@
 use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::repository::Key;
+use crate::tenant::layer_map::BatchedUpdates;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
 use crate::tenant::storage_layer::{Layer, ValueReconstructResult, ValueReconstructState};
 use anyhow::{bail, Result};
@@ -246,11 +247,15 @@ impl RemoteLayer {
    }

    /// Create a Layer struct representing this layer, after it has been downloaded.
-    pub fn create_downloaded_layer(
+    pub fn create_downloaded_layer<L>(
        &self,
+        layer_map_lock_held_witness: &BatchedUpdates<'_, L>,
        conf: &'static PageServerConf,
        file_size: u64,
-    ) -> Arc<dyn PersistentLayer> {
+    ) -> Arc<dyn PersistentLayer>
+    where
+        L: ?Sized + Layer,
+    {
        if self.is_delta {
            let fname = DeltaFileName {
                key_range: self.key_range.clone(),
@@ -262,8 +267,10 @@ impl RemoteLayer {
                self.tenantid,
                &fname,
                file_size,
-                self.access_stats
-                    .clone_for_residence_change(LayerResidenceStatus::Resident),
+                self.access_stats.clone_for_residence_change(
+                    layer_map_lock_held_witness,
+                    LayerResidenceStatus::Resident,
+                ),
            ))
        } else {
            let fname = ImageFileName {
@@ -276,8 +283,10 @@ impl RemoteLayer {
                self.tenantid,
                &fname,
                file_size,
-                self.access_stats
-                    .clone_for_residence_change(LayerResidenceStatus::Resident),
+                self.access_stats.clone_for_residence_change(
+                    layer_map_lock_held_witness,
+                    LayerResidenceStatus::Resident,
+                ),
            ))
        }
    }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -259,6 +259,7 @@ pub(crate) async fn random_init_delay(
    }
 }

+/// Attention: the `task` and `period` beocme labels of a pageserver-wide prometheus metric.
 pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task: &str) {
    // Duration::ZERO will happen because it's the "disable [bgtask]" value.
    if elapsed >= period && period != Duration::ZERO {
@@ -271,5 +272,8 @@ pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task
            task,
            "task iteration took longer than the configured period"
        );
+        crate::metrics::BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT
+            .with_label_values(&[task, &format!("{}", period.as_secs())])
+            .inc();
    }
 }
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`{{ pageserver_config \| sivel.toiletwater.to_toml }}`