Local auth renew proxy

2026-07-09 07:00:37 +00:00 · 2025-03-22 03:31:54 +02:00
239 changed files with 6569 additions and 10261 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -14,4 +14,3 @@ opt-level = 1

 [alias]
 build_testing = ["build", "--features", "testing"]
-neon = ["run", "--bin", "neon_local"]
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -1,186 +0,0 @@
-name: 'Create Allure report'
-description: 'Generate Allure report from uploaded by actions/allure-report-store tests results'
-
-outputs:
-  report-url:
-    description: 'Allure report URL'
-    value: ${{ steps.generate-report.outputs.report-url }}
-  report-json-url:
-    description: 'Allure report JSON URL'
-    value: ${{ steps.generate-report.outputs.report-json-url }}
-
-runs:
-  using: "composite"
-
-  steps:
-    # We're using some of env variables quite offen, so let's set them once.
-    #
-    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
-    #
-    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
-    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
-    #
-    - name: Set variables
-      shell: bash -euxo pipefail {0}
-      run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
-          BRANCH_OR_PR=pr-${PR_NUMBER}
-        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
-          # Shortcut for special branches
-          BRANCH_OR_PR=${GITHUB_REF_NAME}
-        else
-          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
-        fi
-
-        LOCK_FILE=reports/${BRANCH_OR_PR}/lock.txt
-
-        WORKDIR=/tmp/${BRANCH_OR_PR}-$(date +%s)
-        mkdir -p ${WORKDIR}
-
-        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
-        echo "LOCK_FILE=${LOCK_FILE}"       >> $GITHUB_ENV
-        echo "WORKDIR=${WORKDIR}"           >> $GITHUB_ENV
-        echo "BUCKET=${BUCKET}"             >> $GITHUB_ENV
-      env:
-        BUCKET: neon-github-public-dev
-
-    # TODO: We can replace with a special docker image with Java and Allure pre-installed
-    - uses: actions/setup-java@v3
-      with:
-        distribution: 'temurin'
-        java-version: '17'
-
-    - name: Install Allure
-      shell: bash -euxo pipefail {0}
-      run: |
-        if ! which allure; then
-          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
-          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
-          echo "${ALLURE_ZIP_SHA256} ${ALLURE_ZIP}" | sha256sum --check
-          unzip -q ${ALLURE_ZIP}
-          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
-          rm -f ${ALLURE_ZIP}
-        fi
-      env:
-        ALLURE_VERSION: 2.22.1
-        ALLURE_ZIP_SHA256: fdc7a62d94b14c5e0bf25198ae1feded6b005fdbed864b4d3cb4e5e901720b0b
-
-    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
-    - name: Acquire lock
-      shell: bash -euxo pipefail {0}
-      run: |
-        LOCK_TIMEOUT=300 # seconds
-
-        LOCK_CONTENT="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
-        echo ${LOCK_CONTENT} > ${WORKDIR}/lock.txt
-
-        # Do it up to 5 times to avoid race condition
-        for _ in $(seq 1 5); do
-          for i in $(seq 1 ${LOCK_TIMEOUT}); do
-            LOCK_ACQUIRED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
-            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
-            if [ -z "${LOCK_ACQUIRED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ACQUIRED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
-              break
-            fi
-            sleep 1
-          done
-
-          aws s3 mv --only-show-errors ${WORKDIR}/lock.txt "s3://${BUCKET}/${LOCK_FILE}"
-
-          # Double-check that exactly THIS run has acquired the lock
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
-          if [ "$(cat lock.txt)" = "${LOCK_CONTENT}" ]; then
-            break
-          fi
-        done
-
-    - name: Generate and publish final Allure report
-      id: generate-report
-      shell: bash -euxo pipefail {0}
-      run: |
-        REPORT_PREFIX=reports/${BRANCH_OR_PR}
-        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
-
-        # Get previously uploaded data for this run
-        ZSTD_NBTHREADS=0
-
-        S3_FILEPATHS=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/ | jq --raw-output  '.Contents[].Key')
-        if [ -z "$S3_FILEPATHS" ]; then
-          # There's no previously uploaded data for this $GITHUB_RUN_ID
-          exit 0
-        fi
-        for S3_FILEPATH in ${S3_FILEPATHS}; do
-          time aws s3 cp --only-show-errors "s3://${BUCKET}/${S3_FILEPATH}" "${WORKDIR}"
-
-          archive=${WORKDIR}/$(basename $S3_FILEPATH)
-          mkdir -p ${archive%.tar.zst}
-          time tar -xf ${archive} -C ${archive%.tar.zst}
-          rm -f ${archive}
-        done
-
-        # Get history trend
-        time aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${WORKDIR}/latest/history" || true
-
-        # Generate report
-        time allure generate --clean --output ${WORKDIR}/report ${WORKDIR}/*
-
-        # Replace a logo link with a redirect to the latest version of the report
-        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html?nocache='"'+Date.now()+'"'" class=|g' ${WORKDIR}/report/app.js
-
-        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
-        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
-        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
-
-        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
-
-        # Generate redirect
-        cat <<EOF > ${WORKDIR}/index.html
-          <!DOCTYPE html>
-
-          <meta charset="utf-8">
-          <title>Redirecting to ${REPORT_URL}</title>
-          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
-        EOF
-        time aws s3 cp --only-show-errors ${WORKDIR}/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
-
-        echo "report-url=${REPORT_URL}"                                   >> $GITHUB_OUTPUT
-        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
-
-        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
-
-    - name: Release lock
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
-
-        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" ]; then
-          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
-        fi
-
-    - name: Cleanup
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        if [ -d "${WORKDIR}" ]; then
-          rm -rf ${WORKDIR}
-        fi
-
-    - uses: actions/github-script@v6
-      if: always()
-      env:
-        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
-        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-      with:
-        script: |
-          const { REPORT_URL, COMMIT_SHA } = process.env
-
-          await github.rest.repos.createCommitStatus({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            sha: `${COMMIT_SHA}`,
-            state: 'success',
-            target_url: `${REPORT_URL}`,
-            context: 'Allure report',
-          })
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -1,72 +0,0 @@
-name: 'Store Allure results'
-description: 'Upload test results to be used by actions/allure-report-generate'
-
-inputs:
-  report-dir:
-    description: 'directory with test results generated by tests'
-    required: true
-  unique-key:
-    description: 'string to distinguish different results in the same run'
-    required: true
-
-runs:
-  using: "composite"
-
-  steps:
-    - name: Set variables
-      shell: bash -euxo pipefail {0}
-      run: |
-        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${PR_NUMBER}" != "null" ]; then
-          BRANCH_OR_PR=pr-${PR_NUMBER}
-        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
-          # Shortcut for special branches
-          BRANCH_OR_PR=${GITHUB_REF_NAME}
-        else
-          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
-        fi
-
-        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
-        echo "REPORT_DIR=${REPORT_DIR}"     >> $GITHUB_ENV
-      env:
-        REPORT_DIR: ${{ inputs.report-dir }}
-
-    - name: Upload test results
-      shell: bash -euxo pipefail {0}
-      run: |
-        REPORT_PREFIX=reports/${BRANCH_OR_PR}
-        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
-
-        # Add metadata
-        cat <<EOF > ${REPORT_DIR}/executor.json
-          {
-            "name": "GitHub Actions",
-            "type": "github",
-            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
-            "buildOrder": ${GITHUB_RUN_ID},
-            "buildName": "GitHub Actions Run #${GITHUB_RUN_NUMBER}/${GITHUB_RUN_ATTEMPT}",
-            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
-            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
-            "reportName": "Allure Report"
-          }
-        EOF
-
-        cat <<EOF > ${REPORT_DIR}/environment.properties
-          COMMIT_SHA=${COMMIT_SHA}
-        EOF
-
-        ARCHIVE="${UNIQUE_KEY}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
-        ZSTD_NBTHREADS=0
-
-        time tar -C ${REPORT_DIR} -cf ${ARCHIVE} --zstd .
-        time aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
-      env:
-        UNIQUE_KEY: ${{ inputs.unique-key }}
-        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        BUCKET: neon-github-public-dev
-
-    - name: Cleanup
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        rm -rf ${REPORT_DIR}
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -0,0 +1,254 @@
+name: 'Create Allure report'
+description: 'Create and publish Allure report'
+
+inputs:
+  action:
+    desctiption: 'generate or store'
+    required: true
+  build_type:
+    description: '`build_type` from run-python-test-set action'
+    required: true
+  test_selection:
+    description: '`test_selector` from run-python-test-set action'
+    required: false
+outputs:
+  report-url:
+    description: 'Allure report URL'
+    value: ${{ steps.generate-report.outputs.report-url }}
+  report-json-url:
+    description: 'Allure report JSON URL'
+    value: ${{ steps.generate-report.outputs.report-json-url }}
+
+runs:
+  using: "composite"
+
+  steps:
+    # We're using some of env variables quite offen, so let's set them once.
+    #
+    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
+    #
+    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
+    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
+    #
+    - name: Set common environment variables
+      shell: bash -euxo pipefail {0}
+      run: |
+        echo "BUILD_TYPE=${BUILD_TYPE}"   >> $GITHUB_ENV
+        echo "BUCKET=${BUCKET}"           >> $GITHUB_ENV
+        echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
+      env:
+        BUILD_TYPE: ${{ inputs.build_type }}
+        BUCKET: neon-github-public-dev
+        TEST_OUTPUT: /tmp/test_output
+
+    - name: Validate input parameters
+      shell: bash -euxo pipefail {0}
+      run: |
+        if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
+          echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
+          exit 1
+        fi
+
+        if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
+          echo >&2 "inputs.test_selection must be set for 'store' action"
+          exit 2
+        fi
+
+    - name: Calculate variables
+      id: calculate-vars
+      shell: bash -euxo pipefail {0}
+      run: |
+        # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
+
+        pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
+        if [ "${pr_number}" != "null" ]; then
+          key=pr-${pr_number}
+        elif [ "${GITHUB_REF_NAME}" = "main" ]; then
+          # Shortcut for a special branch
+          key=main
+        elif [ "${GITHUB_REF_NAME}" = "release" ]; then
+          # Shortcut for a special branch
+          key=release
+        else
+          key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
+        fi
+        echo "KEY=${key}" >> $GITHUB_OUTPUT
+
+        # Sanitize test selection to remove `/` and any other special characters
+        # Use printf instead of echo to avoid having `\n` at the end of the string
+        test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
+        echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
+
+    - uses: actions/setup-java@v3
+      if: ${{ inputs.action == 'generate' }}
+      with:
+        distribution: 'temurin'
+        java-version: '17'
+
+    - name: Install Allure
+      if: ${{ inputs.action == 'generate' }}
+      shell: bash -euxo pipefail {0}
+      run: |
+        if ! which allure; then
+          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
+          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
+          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
+          unzip -q ${ALLURE_ZIP}
+          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
+          rm -f ${ALLURE_ZIP}
+        fi
+      env:
+        ALLURE_VERSION: 2.21.0
+        ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
+
+    - name: Upload Allure results
+      if: ${{ inputs.action == 'store' }}
+      env:
+        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
+      shell: bash -euxo pipefail {0}
+      run: |
+        # Add metadata
+        cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
+          {
+            "name": "GitHub Actions",
+            "type": "github",
+            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
+            "buildOrder": ${GITHUB_RUN_ID},
+            "buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
+            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
+            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
+            "reportName": "Allure Report"
+          }
+        EOF
+        cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
+          TEST_SELECTION=${{ inputs.test_selection }}
+          BUILD_TYPE=${BUILD_TYPE}
+        EOF
+
+        ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
+        ZSTD_NBTHREADS=0
+
+        tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
+        aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
+
+    # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
+    - name: Acquire Allure lock
+      if: ${{ inputs.action == 'generate' }}
+      shell: bash -euxo pipefail {0}
+      env:
+        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
+        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
+      run: |
+        LOCK_TIMEOUT=300 # seconds
+
+        for _ in $(seq 1 5); do
+          for i in $(seq 1 ${LOCK_TIMEOUT}); do
+            LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
+            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
+            if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
+              break
+            fi
+            sleep 1
+          done
+          echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
+          aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
+
+          # A double-check that exactly WE have acquired the lock
+          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
+          if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
+            break
+          fi
+        done
+
+    - name: Generate and publish final Allure report
+      if: ${{ inputs.action == 'generate' }}
+      id: generate-report
+      env:
+        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+      shell: bash -euxo pipefail {0}
+      run: |
+        # Get previously uploaded data for this run
+        ZSTD_NBTHREADS=0
+
+        s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output  '.Contents[].Key')
+        if [ -z "$s3_filepaths" ]; then
+          # There's no previously uploaded data for this run
+          exit 0
+        fi
+        for s3_filepath in ${s3_filepaths}; do
+          aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
+
+          archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
+          mkdir -p ${archive%.tar.zst}
+          tar -xf ${archive} -C ${archive%.tar.zst}
+          rm -f ${archive}
+        done
+
+        # Get history trend
+        aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
+
+        # Generate report
+        allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
+
+        # Replace a logo link with a redirect to the latest version of the report
+        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
+
+        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
+        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
+        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
+
+        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
+
+        # Generate redirect
+        cat <<EOF > ${TEST_OUTPUT}/allure/index.html
+          <!DOCTYPE html>
+
+          <meta charset="utf-8">
+          <title>Redirecting to ${REPORT_URL}</title>
+          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
+        EOF
+        aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
+
+        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
+        echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
+        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
+
+    - name: Release Allure lock
+      if: ${{ inputs.action == 'generate' && always() }}
+      shell: bash -euxo pipefail {0}
+      env:
+        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
+        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
+      run: |
+        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
+
+        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
+          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
+        fi
+
+    - name: Cleanup
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        rm -rf ${TEST_OUTPUT}/allure
+
+    - uses: actions/github-script@v6
+      if: ${{ inputs.action == 'generate' && always() }}
+      env:
+        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
+        SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+      with:
+        script: |
+          const { REPORT_URL, BUILD_TYPE, SHA } = process.env
+
+          await github.rest.repos.createCommitStatus({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            sha: `${SHA}`,
+            state: 'success',
+            target_url: `${REPORT_URL}`,
+            context: `Allure report / ${BUILD_TYPE}`,
+          })
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -36,14 +36,18 @@ inputs:
    description: 'Region name for real s3 tests'
    required: false
    default: ''
+  real_s3_access_key_id:
+    description: 'Access key id'
+    required: false
+    default: ''
+  real_s3_secret_access_key:
+    description: 'Secret access key'
+    required: false
+    default: ''
  rerun_flaky:
    description: 'Whether to rerun flaky tests'
    required: false
    default: 'false'
-  pg_version:
-    description: 'Postgres version to use for tests'
-    required: false
-    default: 'v14'

 runs:
  using: "composite"
@@ -63,12 +67,12 @@ runs:
        path: /tmp/neon-previous
        prefix: latest

-    - name: Download compatibility snapshot
+    - name: Download compatibility snapshot for Postgres 14
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
-        path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg14
+        path: /tmp/compatibility_snapshot_pg14
        prefix: latest

    - name: Checkout
@@ -96,18 +100,19 @@ runs:
        COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
        TEST_OUTPUT: /tmp/test_output
        BUILD_TYPE: ${{ inputs.build_type }}
-        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
+        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
+        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
+        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
        ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FLAKY: ${{ inputs.rerun_flaky }}
-        PG_VERSION: ${{ inputs.pg_version }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
        # and it is needed to distinguish different environments
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
-        export DEFAULT_PG_VERSION=${PG_VERSION#v}
+        export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -187,18 +192,19 @@ runs:
          scripts/generate_and_push_perf_report.sh
        fi

-    - name: Upload compatibility snapshot
+    - name: Upload compatibility snapshot for Postgres 14
      if: github.ref_name == 'release'
      uses: ./.github/actions/upload
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
-        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
-        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
+        # The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
+        path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
        prefix: latest

-    - name: Upload test results
+    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-store
+      uses: ./.github/actions/allure-report
      with:
-        report-dir: /tmp/test_output/allure/results
-        unique-key: ${{ inputs.build_type }}
+        action: store
+        build_type: ${{ inputs.build_type }}
+        test_selection: ${{ inputs.test_selection }}
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -0,0 +1,5 @@
+neon_install.tar.gz
+.neon_current_version
+
+collections/*
+!collections/.keep
--- a/.github/ansible/ansible.cfg
+++ b/.github/ansible/ansible.cfg
@@ -0,0 +1,12 @@
+[defaults]
+
+localhost_warning = False
+host_key_checking = False
+timeout = 30
+
+[ssh_connection]
+ssh_args   = -F ./ansible.ssh.cfg
+# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
+# and scp neither worked for me
+transfer_method = piped
+pipelining = True
--- a/.github/ansible/ansible.ssh.cfg
+++ b/.github/ansible/ansible.ssh.cfg
@@ -0,0 +1,15 @@
+# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
+# (use pre 8.5 option name to cope with old ssh in CI)
+PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
+
+Host tele.zenith.tech
+    User admin
+    Port 3023
+    StrictHostKeyChecking no
+    UserKnownHostsFile /dev/null
+
+Host * !tele.zenith.tech
+    User admin
+    StrictHostKeyChecking no
+    UserKnownHostsFile /dev/null
+    ProxyJump tele.zenith.tech
--- a/.github/ansible/collections/.keep
+++ b/.github/ansible/collections/.keep
--- a/.github/ansible/deploy.yaml
+++ b/.github/ansible/deploy.yaml
@@ -0,0 +1,211 @@
+- name: Upload Neon binaries
+  hosts: storage
+  gather_facts: False
+  remote_user: "{{ remote_user }}"
+
+  tasks:
+
+    - name: get latest version of Neon binaries
+      register: current_version_file
+      set_fact:
+        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
+      tags:
+      - pageserver
+      - safekeeper
+
+    - name: inform about versions
+      debug:
+        msg: "Version to deploy - {{ current_version }}"
+      tags:
+      - pageserver
+      - safekeeper
+
+    - name: upload and extract Neon binaries to /usr/local
+      ansible.builtin.unarchive:
+        owner: root
+        group: root
+        src: neon_install.tar.gz
+        dest: /usr/local
+      become: true
+      tags:
+      - pageserver
+      - safekeeper
+      - binaries
+      - putbinaries
+
+- name: Deploy pageserver
+  hosts: pageservers
+  gather_facts: False
+  remote_user: "{{ remote_user }}"
+
+  tasks:
+
+    - name: upload init script
+      when: console_mgmt_base_url is defined
+      ansible.builtin.template:
+        src: scripts/init_pageserver.sh
+        dest: /tmp/init_pageserver.sh
+        owner: root
+        group: root
+        mode: '0755'
+      become: true
+      tags:
+      - pageserver
+
+    - name: init pageserver
+      shell:
+        cmd: /tmp/init_pageserver.sh
+      args:
+        creates: "/storage/pageserver/data/tenants"
+      environment:
+        NEON_REPO_DIR: "/storage/pageserver/data"
+        LD_LIBRARY_PATH: "/usr/local/v14/lib"
+      become: true
+      tags:
+      - pageserver
+
+    - name: read the existing remote pageserver config
+      ansible.builtin.slurp:
+        src: /storage/pageserver/data/pageserver.toml
+      register: _remote_ps_config
+      tags:
+      - pageserver
+
+    - name: parse the existing pageserver configuration
+      ansible.builtin.set_fact:
+        _existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
+      tags:
+      - pageserver
+
+    - name: construct the final pageserver configuration dict
+      ansible.builtin.set_fact:
+        pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
+      tags:
+      - pageserver
+
+    - name: template the pageserver config
+      template:
+        src: templates/pageserver.toml.j2
+        dest: /storage/pageserver/data/pageserver.toml
+      become: true
+      tags:
+      - pageserver
+
+    # used in `pageserver.service` template
+    - name: learn current availability_zone
+      shell:
+        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
+      register: ec2_availability_zone
+
+    - set_fact: 
+        ec2_availability_zone={{ ec2_availability_zone.stdout }}
+
+    - name: upload systemd service definition
+      ansible.builtin.template:
+        src: systemd/pageserver.service
+        dest: /etc/systemd/system/pageserver.service
+        owner: root
+        group: root
+        mode: '0644'
+      become: true
+      tags:
+      - pageserver
+
+    - name: start systemd service
+      ansible.builtin.systemd:
+        daemon_reload: yes
+        name: pageserver
+        enabled: yes
+        state: restarted
+      become: true
+      tags:
+      - pageserver
+
+    - name: post version to console
+      when: console_mgmt_base_url is defined
+      shell:
+        cmd: |
+          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
+          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
+          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
+      tags:
+      - pageserver
+
+- name: Deploy safekeeper
+  hosts: safekeepers
+  gather_facts: False
+  remote_user: "{{ remote_user }}"
+
+  tasks:
+
+    - name: upload init script
+      when: console_mgmt_base_url is defined
+      ansible.builtin.template:
+        src: scripts/init_safekeeper.sh
+        dest: /tmp/init_safekeeper.sh
+        owner: root
+        group: root
+        mode: '0755'
+      become: true
+      tags:
+      - safekeeper
+
+    - name: init safekeeper
+      shell:
+        cmd: /tmp/init_safekeeper.sh
+      args:
+        creates: "/storage/safekeeper/data/safekeeper.id"
+      environment:
+        NEON_REPO_DIR: "/storage/safekeeper/data"
+        LD_LIBRARY_PATH: "/usr/local/v14/lib"
+      become: true
+      tags:
+      - safekeeper
+
+    # used in `safekeeper.service` template
+    - name: learn current availability_zone
+      shell:
+        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
+      register: ec2_availability_zone
+
+    - set_fact: 
+        ec2_availability_zone={{ ec2_availability_zone.stdout }}
+
+    # in the future safekeepers should discover pageservers byself
+    # but currently use first pageserver that was discovered
+    - name: set first pageserver var for safekeepers
+      set_fact:
+        first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
+      tags:
+      - safekeeper
+
+    - name: upload systemd service definition
+      ansible.builtin.template:
+        src: systemd/safekeeper.service
+        dest: /etc/systemd/system/safekeeper.service
+        owner: root
+        group: root
+        mode: '0644'
+      become: true
+      tags:
+      - safekeeper
+
+    - name: start systemd service
+      ansible.builtin.systemd:
+        daemon_reload: yes
+        name: safekeeper
+        enabled: yes
+        state: restarted
+      become: true
+      tags:
+      - safekeeper
+
+    - name: post version to console
+      when: console_mgmt_base_url is defined
+      shell:
+        cmd: |
+          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
+          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
+          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
+      tags:
+      - safekeeper
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -e
+
+if [ -n "${DOCKER_TAG}" ]; then
+  # Verson is DOCKER_TAG but without prefix
+  VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
+else
+  echo "Please set DOCKER_TAG environment variable"
+  exit 1
+fi
+
+
+# do initial cleanup
+rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
+mkdir neon_install
+
+# retrieve binaries from docker image
+echo "getting binaries from docker image"
+docker pull --quiet neondatabase/neon:${DOCKER_TAG}
+ID=$(docker create neondatabase/neon:${DOCKER_TAG})
+docker cp ${ID}:/data/postgres_install.tar.gz .
+tar -xzf postgres_install.tar.gz -C neon_install
+mkdir neon_install/bin/
+docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
+docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
+docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
+docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
+docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
+docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
+docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
+docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
+docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
+docker rm -vf ${ID}
+
+# store version to file (for ansible playbooks) and create binaries tarball
+echo ${VERSION} > neon_install/.neon_current_version
+echo ${VERSION} > .neon_current_version
+tar -czf neon_install.tar.gz -C neon_install .
+
+# do final cleaup
+rm -rf neon_install postgres_install.tar.gz
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -0,0 +1,48 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-ap-southeast-1
+    bucket_region: ap-southeast-1
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: ap-southeast-1
+    ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
+    console_region_id: aws-ap-southeast-1
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-064de8ea28bdb495b
+        pageserver-1.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-0b180defcaeeb6b93
+
+    safekeepers:
+      hosts:
+        safekeeper-0.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-0d6f1dc5161eef894
+        safekeeper-2.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-04fb63634e4679eb9
+        safekeeper-3.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-05481f3bc88cfc2d4
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -0,0 +1,50 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-eu-central-1
+    bucket_region: eu-central-1
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: eu-central-1
+    ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
+    console_region_id: aws-eu-central-1
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.eu-central-1.aws.neon.tech:
+          ansible_host:  i-0cd8d316ecbb715be
+        pageserver-1.eu-central-1.aws.neon.tech:
+          ansible_host:  i-090044ed3d383fef0
+        pageserver-2.eu-central-1.aws.neon.tech:
+          ansible_host:  i-033584edf3f4b6742
+
+    safekeepers:
+      hosts:
+        safekeeper-0.eu-central-1.aws.neon.tech:
+          ansible_host:  i-0b238612d2318a050
+        safekeeper-1.eu-central-1.aws.neon.tech:
+          ansible_host:  i-07b9c45e5c2637cd4
+        safekeeper-2.eu-central-1.aws.neon.tech:
+          ansible_host:  i-020257302c3c93d88
--- a/.github/ansible/prod.us-east-1.hosts.yaml
+++ b/.github/ansible/prod.us-east-1.hosts.yaml
@@ -0,0 +1,50 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-us-east-1
+    bucket_region: us-east-1
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.theta.us-east-1.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-east-1
+    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-1
+    console_region_id: aws-us-east-1
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-east-1.aws.neon.tech:
+          ansible_host: i-085222088b0d2e0c7
+        pageserver-1.us-east-1.aws.neon.tech:
+          ansible_host: i-0969d4f684d23a21e
+        pageserver-2.us-east-1.aws.neon.tech:
+          ansible_host: i-05dee87895da58dad
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-east-1.aws.neon.tech:
+          ansible_host: i-04ce739e88793d864
+        safekeeper-1.us-east-1.aws.neon.tech:
+          ansible_host: i-0e9e6c9227fb81410
+        safekeeper-2.us-east-1.aws.neon.tech:
+          ansible_host: i-072f4dd86a327d52f
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -0,0 +1,51 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-us-east-2
+    bucket_region: us-east-2
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-east-2
+    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
+    console_region_id: aws-us-east-2
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-east-2.aws.neon.tech:
+          ansible_host:  i-062227ba7f119eb8c
+        pageserver-1.us-east-2.aws.neon.tech:
+          ansible_host:  i-0b3ec0afab5968938
+        pageserver-2.us-east-2.aws.neon.tech:
+          ansible_host:  i-0d7a1c4325e71421d
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-east-2.aws.neon.tech:
+          ansible_host:  i-0e94224750c57d346
+        safekeeper-1.us-east-2.aws.neon.tech:
+          ansible_host:  i-06d113fb73bfddeb0
+        safekeeper-2.us-east-2.aws.neon.tech:
+          ansible_host:  i-09f66c8e04afff2e8
+
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -0,0 +1,53 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-us-west-2
+    bucket_region: us-west-2
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-west-2
+    ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
+    console_region_id: aws-us-west-2-new
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-west-2.aws.neon.tech:
+          ansible_host: i-0d9f6dfae0e1c780d
+        pageserver-1.us-west-2.aws.neon.tech:
+          ansible_host: i-0c834be1dddba8b3f
+        pageserver-2.us-west-2.aws.neon.tech:
+          ansible_host: i-051642d372c0a4f32
+        pageserver-3.us-west-2.aws.neon.tech:
+          ansible_host: i-00c3844beb9ad1c6b
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-west-2.aws.neon.tech:
+          ansible_host: i-00719d8a74986fda6
+        safekeeper-1.us-west-2.aws.neon.tech:
+          ansible_host: i-074682f9d3c712e7c
+        safekeeper-2.us-west-2.aws.neon.tech:
+          ansible_host: i-042b7efb1729d7966
+
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+
+# fetch params from meta-data service
+INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
+AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
+INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
+DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
+
+# store fqdn hostname in var
+HOST=$(hostname -f)
+
+
+cat <<EOF | tee /tmp/payload
+{
+  "version": 1,
+  "host": "${HOST}",
+  "port": 6400,
+  "region_id": "{{ console_region_id }}",
+  "instance_id": "${INSTANCE_ID}",
+  "http_host": "${HOST}",
+  "http_port": 9898,
+  "active": false,
+  "availability_zone_id": "${AZ_ID}",
+  "disk_size": ${DISK_SIZE},
+  "instance_type": "${INSTANCE_TYPE}"
+}
+EOF
+
+# check if pageserver already registered or not
+if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
+
+    # not registered, so register it now
+    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
+
+    # init pageserver
+    sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
+fi
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+# fetch params from meta-data service
+INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
+AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
+
+# store fqdn hostname in var
+HOST=$(hostname -f)
+
+
+cat <<EOF | tee /tmp/payload
+{
+  "version": 1,
+  "host": "${HOST}",
+  "port": 6500,
+  "http_port": 7676,
+  "region_id": "{{ console_region_id }}",
+  "instance_id": "${INSTANCE_ID}",
+  "availability_zone_id": "${AZ_ID}",
+  "active": false
+}
+EOF
+
+# check if safekeeper already registered or not
+if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
+
+    # not registered, so register it now
+    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
+    # init safekeeper
+    sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
+fi
--- a/.github/ansible/ssm_config
+++ b/.github/ansible/ssm_config
@@ -0,0 +1,2 @@
+ansible_connection: aws_ssm
+ansible_python_interpreter: /usr/bin/python3
--- a/.github/ansible/staging.eu-central-1.hosts.yaml
+++ b/.github/ansible/staging.eu-central-1.hosts.yaml
@@ -0,0 +1,47 @@
+storage:
+  vars:
+    bucket_name: neon-dev-storage-eu-central-1
+    bucket_region: eu-central-1
+    # We only register/update storage in one preview console and manually copy to other instances
+    console_mgmt_base_url: http://neon-internal-api.helium.aws.neon.build
+    broker_endpoint: http://storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.helium.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "20m"
+          threshold: &default_eviction_threshold "20m"
+      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: eu-central-1
+    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-central-1
+    console_region_id: aws-eu-central-1
+    sentry_environment: staging
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.eu-central-1.aws.neon.build:
+          ansible_host: i-011f93ec26cfba2d4
+
+    safekeepers:
+      hosts:
+        safekeeper-0.eu-central-1.aws.neon.build:
+          ansible_host: i-0ff026d27babf8ddd
+        safekeeper-1.eu-central-1.aws.neon.build:
+          ansible_host: i-03983a49ee54725d9
+        safekeeper-2.eu-central-1.aws.neon.build:
+          ansible_host: i-0bd025ecdb61b0db3
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -0,0 +1,46 @@
+storage:
+  vars:
+    bucket_name: neon-dev-storage-eu-west-1
+    bucket_region: eu-west-1
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "20m"
+          threshold: &default_eviction_threshold "20m"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: eu-west-1
+    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
+    console_region_id: aws-eu-west-1
+    sentry_environment: staging
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.eu-west-1.aws.neon.build:
+          ansible_host: i-01d496c5041c7f34c
+
+    safekeepers:
+      hosts:
+        safekeeper-0.eu-west-1.aws.neon.build:
+          ansible_host: i-05226ef85722831bf
+        safekeeper-1.eu-west-1.aws.neon.build:
+          ansible_host: i-06969ee1bf2958bfc
+        safekeeper-2.eu-west-1.aws.neon.build:
+          ansible_host: i-087892e9625984a0b
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -0,0 +1,56 @@
+storage:
+  vars:
+    bucket_name: neon-staging-storage-us-east-2
+    bucket_region: us-east-2
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "20m"
+          threshold: &default_eviction_threshold "20m"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-east-2
+    ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
+    console_region_id: aws-us-east-2
+    sentry_environment: staging
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-east-2.aws.neon.build:
+          ansible_host: i-0c3e70929edb5d691
+        pageserver-1.us-east-2.aws.neon.build:
+          ansible_host: i-0565a8b4008aa3f40
+        pageserver-2.us-east-2.aws.neon.build:
+          ansible_host: i-01e31cdf7e970586a
+        pageserver-3.us-east-2.aws.neon.build:
+          ansible_host: i-0602a0291365ef7cc
+        pageserver-99.us-east-2.aws.neon.build:
+          ansible_host: i-0c39491109bb88824
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-east-2.aws.neon.build:
+          ansible_host: i-027662bd552bf5db0
+        safekeeper-2.us-east-2.aws.neon.build:
+          ansible_host: i-0de0b03a51676a6ce
+        safekeeper-3.us-east-2.aws.neon.build:
+          ansible_host: i-05f8ba2cda243bd18
+        safekeeper-99.us-east-2.aws.neon.build:
+          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/ansible/systemd/pageserver.service
+++ b/.github/ansible/systemd/pageserver.service
@@ -0,0 +1,18 @@
+[Unit]
+Description=Neon pageserver
+After=network.target auditd.service
+
+[Service]
+Type=simple
+User=pageserver
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
+ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data
+ExecReload=/bin/kill -HUP $MAINPID
+KillMode=mixed
+KillSignal=SIGINT
+Restart=on-failure
+TimeoutSec=10
+LimitNOFILE=30000000
+
+[Install]
+WantedBy=multi-user.target
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -0,0 +1,18 @@
+[Unit]
+Description=Neon safekeeper
+After=network.target auditd.service
+
+[Service]
+Type=simple
+User=safekeeper
+Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
+ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }}
+ExecReload=/bin/kill -HUP $MAINPID
+KillMode=mixed
+KillSignal=SIGINT
+Restart=on-failure
+TimeoutSec=10
+LimitNOFILE=30000000
+
+[Install]
+WantedBy=multi-user.target
--- a/.github/ansible/templates/pageserver.toml.j2
+++ b/.github/ansible/templates/pageserver.toml.j2
@@ -0,0 +1 @@
+{{ pageserver_config | sivel.toiletwater.to_toml }}
--- a/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: staging
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -0,0 +1,76 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  domain: "*.eu-west-1.aws.neon.build"
+  otelExporterOtlpEndpoint: "https://otel-collector.zeta.eu-west-1.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: dev
+  neon_region: eu-west-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: staging
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -0,0 +1,68 @@
+# Helm chart values for neon-proxy-link.
+# This is a YAML-formatted file.
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "link"
+  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
+  uri: "https://console.stage.neon.tech/psql_session/"
+  domain: "pg.neon.build"
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy-link pods
+podLabels:
+  neon_service: proxy
+  neon_env: dev
+  neon_region: us-east-2
+
+service:
+  type: LoadBalancer
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -0,0 +1,77 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  domain: "*.cloud.stage.neon.tech"
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram-legacy
+  neon_env: dev
+  neon_region: us-east-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -0,0 +1,78 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  domain: "*.us-east-2.aws.neon.build"
+  extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: dev
+  neon_region: us-east-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: staging
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "staging"
--- a/.github/helm-values/preview-template.neon-proxy-scram.yaml
+++ b/.github/helm-values/preview-template.neon-proxy-scram.yaml
@@ -0,0 +1,67 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/management/api/v2"
+  domain: "*.cloud.${PREVIEW_NAME}.aws.neon.build"
+  sentryEnvironment: "staging"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: test
+  neon_region: ${PREVIEW_NAME}.eu-central-1
+
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: cloud.${PREVIEW_NAME}.aws.neon.build
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -0,0 +1,77 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.ap-southeast-1.aws.neon.tech"
+  extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: ap-southeast-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -0,0 +1,77 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.eu-central-1.aws.neon.tech"
+  extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: eu-central-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
@@ -0,0 +1,69 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.us-east-1.aws.neon.tech"
+  # *.us-east-1.retooldb.com hasn't been delegated yet.
+  extraDomains: ["*.us-east-1.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: us-east-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-east-1.aws.neon.tech
+  httpsPort: 443
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.theta.us-east-1.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
@@ -0,0 +1,58 @@
+# Helm chart values for neon-proxy-link.
+# This is a YAML-formatted file.
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "link"
+  authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
+  uri: "https://console.neon.tech/psql_session/"
+  domain: "pg.neon.tech"
+  sentryEnvironment: "production"
+
+# -- Additional labels for zenith-proxy pods
+podLabels:
+  neon_service: proxy
+  neon_env: production
+  neon_region: us-east-2
+
+service:
+  type: LoadBalancer
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -0,0 +1,77 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.us-east-2.aws.neon.tech"
+  extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: us-east-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -0,0 +1,76 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.cloud.neon.tech"
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: us-west-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -0,0 +1,77 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.us-west-2.aws.neon.tech"
+  extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: us-west-2
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,6 +1,6 @@
-## Problem
+## Describe your changes

-## Summary of changes
+## Issue ticket number and link

 ## Checklist before requesting a review

--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -16,12 +16,12 @@ on:
  workflow_dispatch: # adds ability to run this manually
    inputs:
      region_id:
-        description: 'Project region id. If not set, the default region will be used'
+        description: 'Use a particular region. If not set the default region will be used'
        required: false
        default: 'aws-us-east-2'
      save_perf_report:
        type: boolean
-        description: 'Publish perf report. If not set, the report will be published only for the main branch'
+        description: 'Publish perf report or not. If not set, the report is published only for the main branch'
        required: false

 defaults:
@@ -93,7 +93,10 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
+      uses: ./.github/actions/allure-report
+      with:
+        action: generate
+        build_type: ${{ env.BUILD_TYPE }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -125,14 +128,13 @@ jobs:
        matrix='{
          "platform": [
            "neon-captest-new",
-            "neon-captest-reuse",
-            "neonvm-captest-new"
+            "neon-captest-reuse"
          ],
          "db_size": [ "10gb" ],
-          "include": [{ "platform": "neon-captest-freetier",   "db_size": "3gb"  },
-                      { "platform": "neon-captest-new",        "db_size": "50gb" },
-                      { "platform": "neonvm-captest-freetier", "db_size": "3gb"  },
-                      { "platform": "neonvm-captest-new",      "db_size": "50gb" }]
+          "include": [
+            { "platform": "neon-captest-freetier", "db_size": "3gb"  },
+            { "platform": "neon-captest-new",      "db_size": "50gb" }
+          ]
        }'

        if [ "$(date +%A)" = "Saturday" ]; then
@@ -198,7 +200,7 @@ jobs:
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH

    - name: Create Neon Project
-      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
+      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
@@ -206,7 +208,6 @@ jobs:
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
-        provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -215,7 +216,7 @@ jobs:
          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
            ;;
-          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
+          neon-captest-new | neon-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -225,7 +226,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -282,7 +283,10 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
+      uses: ./.github/actions/allure-report
+      with:
+        action: generate
+        build_type: ${{ env.BUILD_TYPE }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -376,7 +380,10 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
+      uses: ./.github/actions/allure-report
+      with:
+        action: generate
+        build_type: ${{ env.BUILD_TYPE }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -469,7 +476,10 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
+      uses: ./.github/actions/allure-report
+      with:
+        action: generate
+        build_type: ${{ env.BUILD_TYPE }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -556,13 +566,16 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
+      uses: ./.github/actions/allure-report
+      with:
+        action: generate
+        build_type: ${{ env.BUILD_TYPE }}

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+        slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -324,14 +324,12 @@ jobs:
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      # Default shared memory is 64mb
-      options: --init --shm-size=512mb
+      options: --init
    needs: [ build-neon ]
    strategy:
      fail-fast: false
      matrix:
        build_type: [ debug, release ]
-        pg_version: [ v14, v15 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -346,24 +344,24 @@ jobs:
          test_selection: regress
          needs_postgres_source: true
          run_with_real_s3: true
-          real_s3_bucket: neon-github-ci-tests
-          real_s3_region: eu-central-1
+          real_s3_bucket: ci-tests-s3
+          real_s3_region: us-west-2
+          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
+          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
          rerun_flaky: true
-          pg_version: ${{ matrix.pg_version }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
+        if: matrix.build_type == 'debug'
        uses: ./.github/actions/save-coverage-data

  benchmarks:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      # Default shared memory is 64mb
-      options: --init --shm-size=512mb
+      options: --init
    needs: [ build-neon ]
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
@@ -401,48 +399,79 @@ jobs:
    steps:
      - uses: actions/checkout@v3

-      - name: Create Allure report
+      - name: Create Allure report (debug)
        if: ${{ !cancelled() }}
-        id: create-allure-report
-        uses: ./.github/actions/allure-report-generate
+        id: create-allure-report-debug
+        uses: ./.github/actions/allure-report
+        with:
+          action: generate
+          build_type: debug
+
+      - name: Create Allure report (release)
+        if: ${{ !cancelled() }}
+        id: create-allure-report-release
+        uses: ./.github/actions/allure-report
+        with:
+          action: generate
+          build_type: release

      - uses: actions/github-script@v6
-        if: ${{ !cancelled() }}
+        if: >
+          !cancelled() &&
+          github.event_name == 'pull_request' && (
+            steps.create-allure-report-debug.outputs.report-url ||
+            steps.create-allure-report-release.outputs.report-url
+          )
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
          script: |
-            const report = {
-              reportUrl:     "${{ steps.create-allure-report.outputs.report-url }}",
-              reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
-            }
+            const reports = [{
+              buildType: "debug",
+              reportUrl: "${{ steps.create-allure-report-debug.outputs.report-url }}",
+              jsonUrl:   "${{ steps.create-allure-report-debug.outputs.report-json-url }}",
+            }, {
+              buildType: "release",
+              reportUrl: "${{ steps.create-allure-report-release.outputs.report-url }}",
+              jsonUrl:   "${{ steps.create-allure-report-release.outputs.report-json-url }}",
+            }]

-            const script = require("./scripts/comment-test-report.js")
+            const script = require("./scripts/pr-comment-test-report.js")
            await script({
              github,
              context,
              fetch,
-              report,
+              reports,
            })

      - name: Store Allure test stat in the DB
-        if: ${{ !cancelled() && steps.create-allure-report.outputs.report-json-url }}
+        if: >
+          !cancelled() && (
+            steps.create-allure-report-debug.outputs.report-url ||
+            steps.create-allure-report-release.outputs.report-url
+          )
        env:
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          REPORT_JSON_URL: ${{ steps.create-allure-report.outputs.report-json-url }}
+          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          REPORT_JSON_URL_DEBUG: ${{ steps.create-allure-report-debug.outputs.report-json-url }}
+          REPORT_JSON_URL_RELEASE: ${{ steps.create-allure-report-release.outputs.report-json-url }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
        run: |
          ./scripts/pysync

-          curl --fail --output suites.json "${REPORT_JSON_URL}"
-          export BUILD_TYPE=unified
-          export DATABASE_URL="$TEST_RESULT_CONNSTR"
+          for report_url in $REPORT_JSON_URL_DEBUG $REPORT_JSON_URL_RELEASE; do
+            if [ -z "$report_url" ]; then
+              continue
+            fi

-          poetry run python3 scripts/ingest_regress_test_result.py \
-            --revision ${COMMIT_SHA} \
-            --reference ${GITHUB_REF} \
-            --build-type ${BUILD_TYPE} \
-            --ingest suites.json
+            if [[ "$report_url" == "$REPORT_JSON_URL_DEBUG" ]]; then
+              BUILD_TYPE=debug
+            else
+              BUILD_TYPE=release
+            fi
+
+            curl --fail --output suites.json "${report_url}"
+            DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
+          done

  coverage-report:
    runs-on: [ self-hosted, gen3, small ]
@@ -488,48 +517,37 @@ jobs:
      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

-      - name: Build coverage report
-        env:
-          COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}
+      - name: Build and upload coverage report
        run: |
-          scripts/coverage --dir=/tmp/coverage \
-            report \
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
+
+          scripts/coverage \
+            --dir=/tmp/coverage report \
            --input-objects=/tmp/coverage/binaries.list \
-            --commit-url=${COMMIT_URL} \
+            --commit-url=$COMMIT_URL \
            --format=github

-          scripts/coverage --dir=/tmp/coverage \
-            report \
-            --input-objects=/tmp/coverage/binaries.list \
-            --format=lcov
+          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA

-      - name: Upload coverage report
-        id: upload-coverage-report
-        env:
-          BUCKET: neon-github-public-dev
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        run: |
-          aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://${BUCKET}/code-coverage/${COMMIT_SHA}
+          scripts/git-upload \
+            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
+            --message="Add code coverage for $COMMIT_URL" \
+            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE

-          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/index.html
-          echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
-
-      - uses: actions/github-script@v6
-        env:
-          REPORT_URL: ${{ steps.upload-coverage-report.outputs.report-url }}
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-        with:
-          script: |
-            const { REPORT_URL, COMMIT_SHA } = process.env
-
-            await github.rest.repos.createCommitStatus({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              sha: `${COMMIT_SHA}`,
-              state: 'success',
-              target_url: `${REPORT_URL}`,
-              context: 'Code coverage report',
-            })
+          # Add link to the coverage report to the commit
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"success\",
+              \"context\": \"neon-coverage\",
+              \"description\": \"Coverage report is ready\",
+              \"target_url\": \"$REPORT_URL\"
+            }"

  trigger-e2e-tests:
    runs-on: [ self-hosted, gen3, small ]
@@ -615,7 +633,6 @@ jobs:
                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
-                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/neon:${{needs.tag.outputs.build-tag}}

@@ -664,9 +681,6 @@ jobs:
          project: nrdv0s4kcs
          push: true
          tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
-          build-args: |
-            GIT_VERSION=${{ github.sha }}
-            REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com

  compute-tools-image:
    runs-on: [ self-hosted, gen3, large ]
@@ -704,7 +718,6 @@ jobs:
                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
-                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-tools
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
@@ -715,11 +728,7 @@ jobs:

  compute-node-image:
    runs-on: [ self-hosted, gen3, large ]
-    container:
-      image: gcr.io/kaniko-project/executor:v1.9.2-debug
-      # Workaround for "Resolving download.osgeo.org (download.osgeo.org)... failed: Temporary failure in name resolution.""
-      # Should be prevented by https://github.com/neondatabase/neon/issues/4281
-      options: --add-host=download.osgeo.org:140.211.15.30
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    needs: [ tag ]
    strategy:
      fail-fast: false
@@ -761,7 +770,6 @@ jobs:
                           --context .
                           --build-arg GIT_VERSION=${{ github.sha }}
                           --build-arg PG_VERSION=${{ matrix.version }}
-                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
                           --dockerfile Dockerfile.compute-node
                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
                           --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
@@ -781,7 +789,7 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.8.0
+      VM_BUILDER_VERSION: v0.4.6

    steps:
      - name: Checkout
@@ -791,18 +799,21 @@ jobs:

      - name: Downloading vm-builder
        run: |
-          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
          chmod +x vm-builder

-      # Note: we need a separate pull step here because otherwise vm-builder will try to pull, and
-      # it won't have the proper authentication (written at v0.6.0)
      - name: Pulling compute-node image
        run: |
          docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

+      - name: Building VM compute-node rootfs
+        run: |
+          docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
+
      - name: Build vm image
        run: |
-          ./vm-builder -enable-file-cache -src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          # note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
+          ./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

      - name: Pushing vm-compute-node image
        run: |
@@ -924,6 +935,42 @@ jobs:
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

+  deploy-pr-test-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
+    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
+    needs: [ promote-images, tag, regress-tests ]
+    if: |
+      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
+      github.event_name != 'workflow_dispatch'
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        target_region: [ eu-west-1 ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh
+
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          rm -f neon_install.tar.gz .neon_current_version
+
+      - name: Cleanup ansible folder
+        run: rm -rf ~/.ansible
+
  deploy:
    runs-on: [ self-hosted, gen3, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
@@ -948,12 +995,12 @@ jobs:

      - name: Trigger deploy workflow
        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+          GH_TOKEN: ${{ github.token }}
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}}
+            gh workflow run deploy-dev.yml --ref main -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}}
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
+            gh workflow run deploy-prod.yml --ref release -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
@@ -962,7 +1009,7 @@ jobs:
  promote-compatibility-data:
    runs-on: [ self-hosted, gen3, small ]
    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
    needs: [ promote-images, tag, regress-tests ]
    if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
@@ -973,13 +1020,11 @@ jobs:
          PREFIX: artifacts/latest
        run: |
          # Update compatibility snapshot for the release
-          for pg_version in v14 v15; do
-            for build_type in debug release; do
-              OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
-              NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst
+          for build_type in debug release; do
+            OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
+            NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst

-              time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
-            done
+            time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
          done

          # Update Neon artifact for the release (reuse already uploaded artifact)
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -0,0 +1,229 @@
+name: Neon Deploy dev
+
+on:
+  workflow_dispatch:
+    inputs:
+      dockerTag:
+        description: 'Docker tag to deploy'
+        required: true
+        type: string
+      branch:
+        description: 'Branch or commit used for deploy scripts and configs'
+        required: true
+        type: string
+        default: 'main'
+      deployStorage:
+        description: 'Deploy storage'
+        required: true
+        type: boolean
+        default: true
+      deployProxy:
+        description: 'Deploy proxy'
+        required: true
+        type: boolean
+        default: true
+      deployStorageBroker:
+        description: 'Deploy storage-broker'
+        required: true
+        type: boolean
+        default: true
+
+env:
+  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
+  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
+
+concurrency:
+  group: deploy-dev
+  cancel-in-progress: false
+
+jobs:
+  deploy-storage-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+      options: --user root --privileged
+    if: inputs.deployStorage
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        # TODO(sergey): Fix storage deploy in eu-central-1
+        target_region: [ eu-west-1, us-east-2]
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{ inputs.dockerTag }}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh
+
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          rm -f neon_install.tar.gz .neon_current_version
+
+      - name: Cleanup ansible folder
+        run: rm -rf ~/.ansible
+
+  deploy-proxy-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployProxy
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: dev-us-east-2-beta
+            deploy_link_proxy: true
+            deploy_legacy_scram_proxy: true
+          - target_region:  eu-west-1
+            target_cluster: dev-eu-west-1-zeta
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: false
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1-node16
+        with:
+          role-to-assume: arn:aws:iam::369495373322:role/github-runner
+          aws-region: eu-central-1
+          role-skip-session-tagging: true
+          role-duration-seconds: 1800
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Re-deploy scram proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+  
+      - name: Re-deploy link proxy
+        if: matrix.deploy_link_proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+  
+      - name: Re-deploy legacy scram proxy
+        if: matrix.deploy_legacy_scram_proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+  
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
+
+  deploy-preview-proxy-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployProxy
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  eu-central-1
+            target_cluster: dev-eu-central-1-alpha
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1-node16
+        with:
+          role-to-assume: arn:aws:iam::369495373322:role/github-runner
+          aws-region: eu-central-1
+          role-skip-session-tagging: true
+          role-duration-seconds: 1800
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Re-deploy preview proxies
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          for PREVIEW_NAME in helium argon krypton xenon radon oganesson hydrogen nitrogen oxygen fluorine chlorine; do
+            export PREVIEW_NAME
+            envsubst <.github/helm-values/preview-template.neon-proxy-scram.yaml >preview-${PREVIEW_NAME}.neon-proxy-scram.yaml
+            helm upgrade neon-proxy-scram-${PREVIEW_NAME} neondatabase/neon-proxy --namespace neon-proxy-${PREVIEW_NAME} --create-namespace --install --atomic -f preview-${PREVIEW_NAME}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          done
+
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
+  
+  deploy-storage-broker-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployStorageBroker
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: dev-us-east-2-beta
+          - target_region:  eu-west-1
+            target_cluster: dev-eu-west-1-zeta
+          - target_region:  eu-central-1
+            target_cluster: dev-eu-central-1-alpha
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1-node16
+        with:
+          role-to-assume: arn:aws:iam::369495373322:role/github-runner
+          aws-region: eu-central-1
+          role-skip-session-tagging: true
+          role-duration-seconds: 1800
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Deploy storage-broker
+        run:
+          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+  
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -0,0 +1,173 @@
+name: Neon Deploy prod
+
+on:
+  workflow_dispatch:
+    inputs:
+      dockerTag:
+        description: 'Docker tag to deploy'
+        required: true
+        type: string
+      branch:
+        description: 'Branch or commit used for deploy scripts and configs'
+        required: true
+        type: string
+        default: 'release'
+      deployStorage:
+        description: 'Deploy storage'
+        required: true
+        type: boolean
+        default: true
+      deployProxy:
+        description: 'Deploy proxy'
+        required: true
+        type: boolean
+        default: true
+      deployStorageBroker:
+        description: 'Deploy storage-broker'
+        required: true
+        type: boolean
+        default: true
+      disclamerAcknowledged:
+        description: 'I confirm that there is an emergency and I can not use regular release workflow'
+        required: true
+        type: boolean
+        default: false
+
+concurrency:
+  group: deploy-prod
+  cancel-in-progress: false
+
+jobs:
+  deploy-prod-new:
+    runs-on: prod
+    container:
+      image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+      options: --user root --privileged
+    if: inputs.deployStorage && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1, us-east-1 ]
+    environment:
+      name: prod-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{ inputs.dockerTag }}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh
+
+          ansible-galaxy collection install sivel.toiletwater
+          ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          rm -f neon_install.tar.gz .neon_current_version
+
+  deploy-proxy-prod-new:
+    runs-on: prod
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    if: inputs.deployProxy && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: prod-us-east-2-delta
+            deploy_link_proxy: true
+            deploy_legacy_scram_proxy: false
+          - target_region:  us-west-2
+            target_cluster: prod-us-west-2-eta
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: true
+          - target_region: eu-central-1
+            target_cluster: prod-eu-central-1-gamma
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: false
+          - target_region: ap-southeast-1
+            target_cluster: prod-ap-southeast-1-epsilon
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: false
+          - target_region: us-east-1
+            target_cluster: prod-us-east-1-theta
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: false
+    environment:
+      name: prod-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+
+      - name: Re-deploy scram proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+
+      - name: Re-deploy link proxy
+        if: matrix.deploy_link_proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+
+      - name: Re-deploy legacy scram proxy
+        if: matrix.deploy_legacy_scram_proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+
+  deploy-storage-broker-prod-new:
+    runs-on: prod
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: prod-us-east-2-delta
+          - target_region:  us-west-2
+            target_cluster: prod-us-west-2-eta
+          - target_region: eu-central-1
+            target_cluster: prod-eu-central-1-gamma
+          - target_region: ap-southeast-1
+            target_cluster: prod-ap-southeast-1-epsilon
+          - target_region: us-east-1
+            target_cluster: prod-us-east-1-theta
+    environment:
+      name: prod-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+
+      - name: Deploy storage-broker
+        run:
+          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@

 Howdy! Usual good software engineering practices apply. Write
 tests. Write comments. Follow standard Rust coding practices where
-possible. Use `cargo fmt` and `cargo clippy` to tidy up formatting.
+possible. Use 'cargo fmt' and 'clippy' to tidy up formatting.

 There are soft spots in the code, which could use cleanup,
 refactoring, additional comments, and so forth. Let's try to raise the
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
-    "pageserver/ctl",
    "proxy",
    "safekeeper",
    "storage_broker",
@@ -22,10 +21,9 @@ anyhow = { version = "1.0", features = ["backtrace"] }
 async-stream = "0.3"
 async-trait = "0.1"
 atty = "0.2.14"
-aws-config = { version = "0.55", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.27"
-aws-smithy-http = "0.55"
-aws-credential-types = "0.55"
+aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "0.21.0"
+aws-smithy-http = "0.51.0"
 aws-types = "0.55"
 base64 = "0.13.0"
 bincode = "1.3"
@@ -127,11 +125,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
-postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
 tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }

 ## Other git libraries
@@ -163,16 +161,10 @@ rstest = "0.17"
 tempfile = "3.4"
 tonic-build = "0.9"

-[patch.crates-io]
-
 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="2e9b5f1ddc481d1a98fa79f6b9378ac4f170b7c9" }
-
-# Changes the MAX_THREADS limit from 4096 to 32768.
-# This is a temporary workaround for using tracing from many threads in safekeepers code,
-# until async safekeepers patch is merged to the main.
-sharded-slab = { git = "https://github.com/neondatabase/sharded-slab.git", rev="98d16753ab01c61f0a028de44167307a00efea00" }
+[patch.crates-io]
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }

 ################# Binary contents sections

--- a/15
+++ b/15
@@ -2,7 +2,7 @@
 ### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
 ### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
 ### inside this image in the real deployments.
-ARG REPOSITORY=neondatabase
+ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 ARG IMAGE=rust
 ARG TAG=pinned

@@ -44,14 +44,7 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && mold -run cargo build  \
-      --bin pg_sni_router  \
-      --bin pageserver  \
-      --bin pagectl  \
-      --bin safekeeper  \
-      --bin storage_broker  \
-      --bin proxy  \
-      --locked --release \
+&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
    && cachepot -s

 # Build final image
@@ -70,9 +63,9 @@ RUN set -e \
    && useradd -d /data neon \
    && chown -R neon:neon /data

-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir   /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker         /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -1,5 +1,5 @@
 ARG PG_VERSION
-ARG REPOSITORY=neondatabase
+ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 ARG IMAGE=rust
 ARG TAG=pinned

@@ -393,45 +393,6 @@ RUN case "${PG_VERSION}" in \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control

-#########################################################################################
-#
-# Layer "kq-imcx-pg-build"
-# compile kq_imcx extension
-#
-#########################################################################################
-FROM build-deps AS kq-imcx-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN apt-get update && \
-    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
-    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
-    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
-    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
-    mkdir build && \
-    cd build && \
-    cmake .. && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control
-
-#########################################################################################
-#
-# Layer "pg-cron-pg-build"
-# compile pg_cron extension
-#
-#########################################################################################
-FROM build-deps AS pg-cron-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin/:$PATH"
-RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.5.2.tar.gz -O pg_cron.tar.gz && \
-    echo "6f7f0980c03f1e2a6a747060e67bf4a303ca2a50e941e2c19daeed2b44dec744 pg_cron.tar.gz" | sha256sum --check && \
-    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
-
 #########################################################################################
 #
 # Layer "rust extensions"
@@ -545,8 +506,6 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -632,7 +591,6 @@ RUN apt update &&  \
        libxml2 \
        libxslt1.1 \
        libzstd1 \
-        libcurl4-openssl-dev \
        procps && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,6 +1,6 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
-ARG REPOSITORY=neondatabase
+ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 ARG IMAGE=rust
 ARG TAG=pinned

--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -0,0 +1,70 @@
+# Note: this file *mostly* just builds on Dockerfile.compute-node
+
+ARG SRC_IMAGE
+ARG VM_INFORMANT_VERSION=v0.1.14
+# on libcgroup update, make sure to check bootstrap.sh for changes
+ARG LIBCGROUP_VERSION=v2.0.3
+
+# Pull VM informant, to copy from later
+FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
+
+# Build cgroup-tools
+#
+# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
+# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
+# requires cgroup v2, so we'll build cgroup-tools ourselves.
+FROM debian:bullseye-slim as libcgroup-builder
+ARG LIBCGROUP_VERSION
+
+RUN set -exu \
+	&& apt update \
+	&& apt install --no-install-recommends -y \
+		git \
+		ca-certificates \
+		automake \
+		cmake \
+		make \
+		gcc \
+		byacc \
+		flex \
+		libtool \
+		libpam0g-dev \
+	&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
+	&& INSTALL_DIR="/libcgroup-install" \
+	&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
+	&& cd libcgroup \
+	# extracted from bootstrap.sh, with modified flags:
+	&& (test -d m4 || mkdir m4) \
+	&& autoreconf -fi \
+	&& rm -rf autom4te.cache \
+	&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
+	# actually build the thing...
+	&& make install
+
+# Combine, starting from non-VM compute node image.
+FROM $SRC_IMAGE as base
+
+# Temporarily set user back to root so we can run adduser, set inittab
+USER root
+RUN adduser vm-informant --disabled-password --no-create-home
+
+RUN set -e \
+	&& rm -f /etc/inittab \
+	&& touch /etc/inittab
+
+RUN set -e \
+	&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
+	&& CONNSTR="dbname=postgres user=cloud_admin sslmode=disable" \
+	&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
+	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
+
+USER postgres
+
+ADD vm-cgconfig.conf /etc/cgconfig.conf
+COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
+
+COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
+COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
+COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
+
+ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]
--- a/README.md
+++ b/README.md
@@ -1,5 +1,3 @@
-[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
-
 # Neon

 Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
@@ -17,7 +15,7 @@ The Neon storage engine consists of two major components:
 - Pageserver. Scalable storage backend for the compute nodes.
 - Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.

-See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.
+See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.

 ## Running local installation

@@ -130,11 +128,11 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 ```sh
 # Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
-> cargo neon init
+> ./target/debug/neon_local init
 Starting pageserver at '127.0.0.1:64000' in '.neon'.

 # start pageserver, safekeeper, and broker for their intercommunication
-> cargo neon start
+> ./target/debug/neon_local start
 Starting neon broker at 127.0.0.1:50051
 storage_broker started, pid: 2918372
 Starting pageserver at '127.0.0.1:64000' in '.neon'.
@@ -143,19 +141,19 @@ Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
 safekeeper 1 started, pid: 2918437

 # create initial tenant and use it as a default for every future neon_local invocation
-> cargo neon tenant create --set-default
+> ./target/debug/neon_local tenant create --set-default
 tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
 Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one

 # start postgres compute node
-> cargo neon endpoint start main
+> ./target/debug/neon_local endpoint start main
 Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
 Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
-> cargo neon endpoint list
+> ./target/debug/neon_local endpoint list
 ENDPOINT  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
 main      127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```
@@ -177,22 +175,22 @@ postgres=# select * from t;
 3. And create branches and run postgres on them:
 ```sh
 # create branch named migration_check
-> cargo neon timeline branch --branch-name migration_check
+> ./target/debug/neon_local timeline branch --branch-name migration_check
 Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'

 # check branches tree
-> cargo neon timeline list
+> ./target/debug/neon_local timeline list
 (L) main [de200bd42b49cc1814412c7e592dd6e9]
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

 # start postgres on that branch
-> cargo neon endpoint start migration_check --branch-name migration_check
+> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
 Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
 Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
-> cargo neon endpoint list
+> ./target/debug/neon_local endpoint list
 ENDPOINT         ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
 main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
 migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running
@@ -221,7 +219,7 @@ postgres=# select * from t;
 4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
   you have just started. You can terminate them all with one command:
 ```sh
-> cargo neon stop
+> ./target/debug/neon_local stop
 ```

 ## Running tests
@@ -238,9 +236,9 @@ CARGO_BUILD_FLAGS="--features=testing" make

 ## Documentation

-[docs](/docs) Contains a top-level overview of all available markdown documentation.
+[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.

- [sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
+- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.

 To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`

@@ -265,6 +263,6 @@ To get more familiar with this aspect, refer to:

 ## Join the development

- Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.
- To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).
+- Read `CONTRIBUTING.md` to learn about project code style and practices.
+- To get familiar with a source tree layout, use [/docs/sourcetree.md](/docs/sourcetree.md).
 - To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -30,7 +30,6 @@
 //!             -b /usr/local/bin/postgres
 //! ```
 //!
-use std::collections::HashMap;
 use std::fs::File;
 use std::panic;
 use std::path::Path;
@@ -68,54 +67,6 @@ fn main() -> Result<()> {
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

-    // Extract OpenTelemetry context for the startup actions from the
-    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
-    // tracing context.
-    //
-    // This is used to propagate the context for the 'start_compute' operation
-    // from the neon control plane. This allows linking together the wider
-    // 'start_compute' operation that creates the compute container, with the
-    // startup actions here within the container.
-    //
-    // There is no standard for passing context in env variables, but a lot of
-    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
-    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
-    //
-    // Switch to the startup context here, and exit it once the startup has
-    // completed and Postgres is up and running.
-    //
-    // If this pod is pre-created without binding it to any particular endpoint
-    // yet, this isn't the right place to enter the startup context. In that
-    // case, the control plane should pass the tracing context as part of the
-    // /configure API call.
-    //
-    // NOTE: This is supposed to only cover the *startup* actions. Once
-    // postgres is configured and up-and-running, we exit this span. Any other
-    // actions that are performed on incoming HTTP requests, for example, are
-    // performed in separate spans.
-    //
-    // XXX: If the pod is restarted, we perform the startup actions in the same
-    // context as the original startup actions, which probably doesn't make
-    // sense.
-    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
-    if let Ok(val) = std::env::var("TRACEPARENT") {
-        startup_tracing_carrier.insert("traceparent".to_string(), val);
-    }
-    if let Ok(val) = std::env::var("TRACESTATE") {
-        startup_tracing_carrier.insert("tracestate".to_string(), val);
-    }
-    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
-        use opentelemetry::propagation::TextMapPropagator;
-        use opentelemetry::sdk::propagation::TraceContextPropagator;
-        let guard = TraceContextPropagator::new()
-            .extract(&startup_tracing_carrier)
-            .attach();
-        info!("startup tracing context attached");
-        Some(guard)
-    } else {
-        None
-    };
-
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

@@ -197,6 +148,8 @@ fn main() -> Result<()> {

    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
+    let pspec = state.pspec.as_ref().expect("spec must be set");
+    let startup_tracing_context = pspec.spec.startup_tracing_context.clone();

    // Record for how long we slept waiting for the spec.
    state.metrics.wait_for_spec_ms = Utc::now()
@@ -212,6 +165,29 @@ fn main() -> Result<()> {
    compute.state_changed.notify_all();
    drop(state);

+    // Extract OpenTelemetry context for the startup actions from the spec, and
+    // attach it to the current tracing context.
+    //
+    // This is used to propagate the context for the 'start_compute' operation
+    // from the neon control plane. This allows linking together the wider
+    // 'start_compute' operation that creates the compute container, with the
+    // startup actions here within the container.
+    //
+    // Switch to the startup context here, and exit it once the startup has
+    // completed and Postgres is up and running.
+    //
+    // NOTE: This is supposed to only cover the *startup* actions. Once
+    // postgres is configured and up-and-running, we exit this span. Any other
+    // actions that are performed on incoming HTTP requests, for example, are
+    // performed in separate spans.
+    let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
+        use opentelemetry::propagation::TextMapPropagator;
+        use opentelemetry::sdk::propagation::TraceContextPropagator;
+        Some(TraceContextPropagator::new().extract(carrier).attach())
+    } else {
+        None
+    };
+
    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
    let _configurator_handle =
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -30,7 +30,7 @@ use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::{ComputeMode, ComputeSpec};
+use compute_api::spec::ComputeSpec;

 use crate::config;
 use crate::pg_helpers::*;
@@ -67,9 +67,8 @@ pub struct ComputeNode {
 pub struct ComputeState {
    pub start_time: DateTime<Utc>,
    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity. It could be `None` if
-    /// compute wasn't used since start.
-    pub last_active: Option<DateTime<Utc>>,
+    /// Timestamp of the last Postgres activity
+    pub last_active: DateTime<Utc>,
    pub error: Option<String>,
    pub pspec: Option<ParsedSpec>,
    pub metrics: ComputeMetrics,
@@ -80,7 +79,7 @@ impl ComputeState {
        Self {
            start_time: Utc::now(),
            status: ComputeStatus::Empty,
-            last_active: None,
+            last_active: Utc::now(),
            error: None,
            pspec: None,
            metrics: ComputeMetrics::default(),
@@ -250,10 +249,38 @@ impl ComputeNode {
    /// safekeepers sync, basebackup, etc.
    #[instrument(skip(self, compute_state))]
    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
+        #[derive(Clone)]
+        enum Replication {
+            Primary,
+            Static { lsn: Lsn },
+            HotStandby,
+        }
+
        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
        let spec = &pspec.spec;
        let pgdata_path = Path::new(&self.pgdata);

+        let hot_replica = if let Some(option) = spec.cluster.settings.find_ref("hot_standby") {
+            if let Some(value) = &option.value {
+                anyhow::ensure!(option.vartype == "bool");
+                matches!(value.as_str(), "on" | "yes" | "true")
+            } else {
+                false
+            }
+        } else {
+            false
+        };
+
+        let replication = if hot_replica {
+            Replication::HotStandby
+        } else if let Some(lsn) = spec.cluster.settings.find("recovery_target_lsn") {
+            Replication::Static {
+                lsn: Lsn::from_str(&lsn)?,
+            }
+        } else {
+            Replication::Primary
+        };
+
        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;
@@ -261,8 +288,8 @@ impl ComputeNode {
        // Syncing safekeepers is only safe with primary nodes: if a primary
        // is already connected it will be kicked out, so a secondary (standby)
        // cannot sync safekeepers.
-        let lsn = match spec.mode {
-            ComputeMode::Primary => {
+        let lsn = match &replication {
+            Replication::Primary => {
                info!("starting safekeepers syncing");
                let lsn = self
                    .sync_safekeepers(pspec.storage_auth_token.clone())
@@ -270,11 +297,11 @@ impl ComputeNode {
                info!("safekeepers synced at LSN {}", lsn);
                lsn
            }
-            ComputeMode::Static(lsn) => {
+            Replication::Static { lsn } => {
                info!("Starting read-only node at static LSN {}", lsn);
-                lsn
+                *lsn
            }
-            ComputeMode::Replica => {
+            Replication::HotStandby => {
                info!("Initializing standby from latest Pageserver LSN");
                Lsn(0)
            }
@@ -294,9 +321,9 @@ impl ComputeNode {
        // Update pg_hba.conf received with basebackup.
        update_pg_hba(pgdata_path)?;

-        match spec.mode {
-            ComputeMode::Primary | ComputeMode::Static(..) => {}
-            ComputeMode::Replica => {
+        match &replication {
+            Replication::Primary | Replication::Static { .. } => {}
+            Replication::HotStandby => {
                add_standby_signal(pgdata_path)?;
            }
        }
@@ -362,8 +389,6 @@ impl ComputeNode {
        };

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        // Disable DDL forwarding because control plane already knows about these roles/databases.
-        client.simple_query("SET neon.forward_ddl = false")?;
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
@@ -405,15 +430,11 @@ impl ComputeNode {
        self.pg_reload_conf(&mut client)?;

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        // Disable DDL forwarding because control plane already knows about these roles/databases.
-        if spec.mode == ComputeMode::Primary {
-            client.simple_query("SET neon.forward_ddl = false")?;
-            handle_roles(&spec, &mut client)?;
-            handle_databases(&spec, &mut client)?;
-            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-            handle_grants(&spec, self.connstr.as_str(), &mut client)?;
-            handle_extensions(&spec, &mut client)?;
-        }
+        handle_roles(&spec, &mut client)?;
+        handle_databases(&spec, &mut client)?;
+        handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+        handle_grants(&spec, self.connstr.as_str(), &mut client)?;
+        handle_extensions(&spec, &mut client)?;

        // 'Close' connection
        drop(client);
@@ -446,9 +467,7 @@ impl ComputeNode {

        let pg = self.start_postgres(spec.storage_auth_token.clone())?;

-        if spec.spec.mode == ComputeMode::Primary {
-            self.apply_config(&compute_state)?;
-        }
+        self.apply_config(&compute_state)?;

        let startup_end_time = Utc::now();
        {
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,7 +6,7 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::PgOptionsSerialize;
-use compute_api::spec::{ComputeMode, ComputeSpec};
+use compute_api::spec::ComputeSpec;

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -34,25 +34,17 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
-    let mut file = File::create(path)?;
+    let mut postgres_conf = File::create(path)?;

+    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+
+    Ok(())
+}
+
+// Write Postgres config block wrapped with generated comment section
+fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
    writeln!(file, "# Managed by compute_ctl: begin")?;
-
-    write!(file, "{}", &spec.cluster.settings.as_pg_settings())?;
-
-    match spec.mode {
-        ComputeMode::Primary => {}
-        ComputeMode::Static(lsn) => {
-            // hot_standby is 'on' by default, but let's be explicit
-            writeln!(file, "hot_standby=on")?;
-            writeln!(file, "recovery_target_lsn='{lsn}'")?;
-        }
-        ComputeMode::Replica => {
-            // hot_standby is 'on' by default, but let's be explicit
-            writeln!(file, "hot_standby=on")?;
-        }
-    }
-
+    writeln!(file, "{}", buf)?;
    writeln!(file, "# Managed by compute_ctl: end")?;

    Ok(())
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -181,8 +181,8 @@ components:
    ComputeState:
      type: object
      required:
-        - start_time
        - status
+        - last_active
      properties:
        start_time:
          type: string
@@ -195,13 +195,11 @@ components:
          $ref: '#/components/schemas/ComputeStatus'
        last_active:
          type: string
-          description: |
-            The last detected compute activity timestamp in UTC and RFC3339 format.
-            It could be empty if compute was never used by user since start.
+          description: The last detected compute activity timestamp in UTC and RFC3339 format.
          example: "2022-10-12T07:20:50.52Z"
        error:
          type: string
-          description: Text of the error during compute startup or reconfiguration, if any.
+          description: Text of the error during compute startup, if any.
          example: ""
        tenant:
          type: string
@@ -224,12 +222,9 @@ components:
    ComputeStatus:
      type: string
      enum:
-        - empty
        - init
        - failed
        - running
-        - configuration_pending
-        - configuration
      example: running

    #
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -74,7 +74,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                            // Found non-idle backend, so the last activity is NOW.
                            // Save it and exit the for loop. Also clear the idle backend
                            // `state_change` timestamps array as it doesn't matter now.
-                            last_active = Some(Utc::now());
+                            last_active = Utc::now();
                            idle_backs.clear();
                            break;
                        }
@@ -82,16 +82,15 @@ fn watch_compute_activity(compute: &ComputeNode) {

                    // Get idle backend `state_change` with the max timestamp.
                    if let Some(last) = idle_backs.iter().max() {
-                        last_active = Some(*last);
+                        last_active = *last;
                    }
                }

                // Update the last activity in the shared state if we got a more recent one.
                let mut state = compute.state.lock().unwrap();
-                // NB: `Some(<DateTime>)` is always greater than `None`.
                if last_active > state.last_active {
                    state.last_active = last_active;
-                    debug!("set the last compute activity time to: {:?}", last_active);
+                    debug!("set the last compute activity time to: {}", last_active);
                }
            }
            Err(e) => {
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -121,8 +121,9 @@ impl RoleExt for Role {
    /// string of arguments.
    fn to_pg_options(&self) -> String {
        // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
-        let mut params: String = self.options.as_pg_options();
-        params.push_str(" LOGIN");
+        // For now, we do not use generic `options` for roles. Once used, add
+        // `self.options.as_pg_options()` somewhere here.
+        let mut params: String = "LOGIN".to_string();

        if let Some(pass) = &self.encrypted_password {
            // Some time ago we supported only md5 and treated all encrypted_password as md5.
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -62,7 +62,7 @@ fn do_control_plane_request(
    }
 }

-/// Request spec from the control-plane by compute_id. If `NEON_CONTROL_PLANE_TOKEN`
+/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
 /// env variable is set, it will be used for authorization.
 pub fn get_spec_from_control_plane(
    base_uri: &str,
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -16,7 +16,7 @@ mod pg_helpers_tests {
        );
        assert_eq!(
            spec.cluster.roles.first().unwrap().to_pg_options(),
-            " LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
+            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
        );
    }

--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -30,5 +30,4 @@ postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true

-compute_api.workspace = true
 workspace_hack.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -7,8 +7,8 @@
 //!
 use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
-use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
+use control_plane::endpoint::Replication;
 use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
@@ -41,7 +41,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

-const DEFAULT_PG_VERSION: &str = "15";
+const DEFAULT_PG_VERSION: &str = "14";

 fn default_conf() -> String {
    format!(
@@ -481,7 +481,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                timeline_id,
                None,
                pg_version,
-                ComputeMode::Primary,
+                Replication::Primary,
            )?;
            println!("Done");
        }
@@ -568,8 +568,8 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .iter()
                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
            {
-                let lsn_str = match endpoint.mode {
-                    ComputeMode::Static(lsn) => {
+                let lsn_str = match endpoint.replication {
+                    Replication::Static(lsn) => {
                        // -> read-only endpoint
                        // Use the node's LSN.
                        lsn.to_string()
@@ -632,14 +632,21 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .copied()
                .unwrap_or(false);

-            let mode = match (lsn, hot_standby) {
-                (Some(lsn), false) => ComputeMode::Static(lsn),
-                (None, true) => ComputeMode::Replica,
-                (None, false) => ComputeMode::Primary,
+            let replication = match (lsn, hot_standby) {
+                (Some(lsn), false) => Replication::Static(lsn),
+                (None, true) => Replication::Replica,
+                (None, false) => Replication::Primary,
                (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
            };

-            cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, port, pg_version, mode)?;
+            cplane.new_endpoint(
+                tenant_id,
+                &endpoint_id,
+                timeline_id,
+                port,
+                pg_version,
+                replication,
+            )?;
        }
        "start" => {
            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
@@ -663,11 +670,11 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .unwrap_or(false);

            if let Some(endpoint) = endpoint {
-                match (&endpoint.mode, hot_standby) {
-                    (ComputeMode::Static(_), true) => {
+                match (&endpoint.replication, hot_standby) {
+                    (Replication::Static(_), true) => {
                        bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
                    }
-                    (ComputeMode::Primary, true) => {
+                    (Replication::Primary, true) => {
                        bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
                    }
                    _ => {}
@@ -694,10 +701,10 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                    .copied()
                    .context("Failed to `pg-version` from the argument string")?;

-                let mode = match (lsn, hot_standby) {
-                    (Some(lsn), false) => ComputeMode::Static(lsn),
-                    (None, true) => ComputeMode::Replica,
-                    (None, false) => ComputeMode::Primary,
+                let replication = match (lsn, hot_standby) {
+                    (Some(lsn), false) => Replication::Static(lsn),
+                    (None, true) => Replication::Replica,
+                    (None, false) => Replication::Primary,
                    (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
                };

@@ -714,7 +721,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                    timeline_id,
                    port,
                    pg_version,
-                    mode,
+                    replication,
                )?;
                ep.start(&auth_token)?;
            }
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -11,33 +11,15 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
 use utils::{
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };

-use crate::local_env::LocalEnv;
+use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

-use compute_api::spec::ComputeMode;
-
-// contents of a endpoint.json file
-#[serde_as]
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-pub struct EndpointConf {
-    name: String,
-    #[serde_as(as = "DisplayFromStr")]
-    tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    timeline_id: TimelineId,
-    mode: ComputeMode,
-    port: u16,
-    pg_version: u32,
-}
-
 //
 // ComputeControlPlane
 //
@@ -88,7 +70,7 @@ impl ComputeControlPlane {
        timeline_id: TimelineId,
        port: Option<u16>,
        pg_version: u32,
-        mode: ComputeMode,
+        replication: Replication,
    ) -> Result<Arc<Endpoint>> {
        let port = port.unwrap_or_else(|| self.get_port());

@@ -98,22 +80,12 @@ impl ComputeControlPlane {
            env: self.env.clone(),
            pageserver: Arc::clone(&self.pageserver),
            timeline_id,
-            mode,
+            replication,
            tenant_id,
            pg_version,
        });
+
        ep.create_pgdata()?;
-        std::fs::write(
-            ep.endpoint_path().join("endpoint.json"),
-            serde_json::to_string_pretty(&EndpointConf {
-                name: name.to_string(),
-                tenant_id,
-                timeline_id,
-                mode,
-                port,
-                pg_version,
-            })?,
-        )?;
        ep.setup_pg_conf()?;

        self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));
@@ -124,17 +96,29 @@ impl ComputeControlPlane {

 ///////////////////////////////////////////////////////////////////////////////

+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum Replication {
+    // Regular read-write node
+    Primary,
+    // if recovery_target_lsn is provided, and we want to pin the node to a specific LSN
+    Static(Lsn),
+    // Hot standby; read-only replica.
+    // Future versions may want to distinguish between replicas with hot standby
+    // feedback and other kinds of replication configurations.
+    Replica,
+}
+
 #[derive(Debug)]
 pub struct Endpoint {
    /// used as the directory name
    name: String,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    pub mode: ComputeMode,
+    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
+    pub replication: Replication,

    // port and address of the Postgres server
    pub address: SocketAddr,
-    // postgres major version in the format: 14, 15, etc.
    pg_version: u32,

    // These are not part of the endpoint as such, but the environment
@@ -160,20 +144,50 @@ impl Endpoint {
        let fname = entry.file_name();
        let name = fname.to_str().unwrap().to_string();

-        // Read the endpoint.json file
-        let conf: EndpointConf =
-            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
+        // Read config file into memory
+        let cfg_path = entry.path().join("pgdata").join("postgresql.conf");
+        let cfg_path_str = cfg_path.to_string_lossy();
+        let mut conf_file = File::open(&cfg_path)
+            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
+        let conf = PostgresConf::read(&mut conf_file)
+            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
+
+        // Read a few options from the config file
+        let context = format!("in config file {}", cfg_path_str);
+        let port: u16 = conf.parse_field("port", &context)?;
+        let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
+        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
+
+        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
+        // If it doesn't exist, assume broken data directory and use default pg version.
+        let pg_version_path = entry.path().join("PG_VERSION");
+
+        let pg_version_str =
+            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
+        let pg_version = u32::from_str(&pg_version_str)?;
+
+        // parse recovery_target_lsn and primary_conninfo into Recovery Target, if any
+        let replication = if let Some(lsn_str) = conf.get("recovery_target_lsn") {
+            Replication::Static(Lsn::from_str(lsn_str)?)
+        } else if let Some(slot_name) = conf.get("primary_slot_name") {
+            let slot_name = slot_name.to_string();
+            let prefix = format!("repl_{}_", timeline_id);
+            assert!(slot_name.starts_with(&prefix));
+            Replication::Replica
+        } else {
+            Replication::Primary
+        };

        // ok now
        Ok(Endpoint {
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.port),
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
            name,
            env: env.clone(),
            pageserver: Arc::clone(pageserver),
-            timeline_id: conf.timeline_id,
-            mode: conf.mode,
-            tenant_id: conf.tenant_id,
-            pg_version: conf.pg_version,
+            timeline_id,
+            replication,
+            tenant_id,
+            pg_version,
        })
    }

@@ -309,8 +323,8 @@ impl Endpoint {

        conf.append_line("");
        // Replication-related configurations, such as WAL sending
-        match &self.mode {
-            ComputeMode::Primary => {
+        match &self.replication {
+            Replication::Primary => {
                // Configure backpressure
                // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
                //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
@@ -352,10 +366,10 @@ impl Endpoint {
                    conf.append("synchronous_standby_names", "pageserver");
                }
            }
-            ComputeMode::Static(lsn) => {
+            Replication::Static(lsn) => {
                conf.append("recovery_target_lsn", &lsn.to_string());
            }
-            ComputeMode::Replica => {
+            Replication::Replica => {
                assert!(!self.env.safekeepers.is_empty());

                // TODO: use future host field from safekeeper spec
@@ -382,11 +396,6 @@ impl Endpoint {
                conf.append("primary_conninfo", connstr.as_str());
                conf.append("primary_slot_name", slot_name.as_str());
                conf.append("hot_standby", "on");
-                // prefetching of blocks referenced in WAL doesn't make sense for us
-                // Neon hot standby ignores pages that are not in the shared_buffers
-                if self.pg_version >= 15 {
-                    conf.append("recovery_prefetch", "off");
-                }
            }
        }

@@ -400,8 +409,8 @@ impl Endpoint {
    }

    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
-        let backup_lsn = match &self.mode {
-            ComputeMode::Primary => {
+        let backup_lsn = match &self.replication {
+            Replication::Primary => {
                if !self.env.safekeepers.is_empty() {
                    // LSN 0 means that it is bootstrap and we need to download just
                    // latest data from the pageserver. That is a bit clumsy but whole bootstrap
@@ -417,8 +426,8 @@ impl Endpoint {
                    None
                }
            }
-            ComputeMode::Static(lsn) => Some(*lsn),
-            ComputeMode::Replica => {
+            Replication::Static(lsn) => Some(*lsn),
+            Replication::Replica => {
                None // Take the latest snapshot available to start with
            }
        };
@@ -517,7 +526,7 @@ impl Endpoint {
        // 3. Load basebackup
        self.load_basebackup(auth_token)?;

-        if self.mode != ComputeMode::Primary {
+        if self.replication != Replication::Primary {
            File::create(self.pgdata().join("standby.signal"))?;
        }

--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -24,7 +24,7 @@ use utils::{

 use crate::safekeeper::SafekeeperNode;

-pub const DEFAULT_PG_VERSION: u32 = 15;
+pub const DEFAULT_PG_VERSION: u32 = 14;

 //
 // This data structures represents neon_local CLI config
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -8,7 +8,9 @@ use std::process::{Child, Command};
 use std::{io, result};

 use anyhow::{bail, Context};
-use pageserver_api::models::{self, TenantInfo, TimelineInfo};
+use pageserver_api::models::{
+    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
+};
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
 use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -314,8 +316,8 @@ impl PageServerNode {
        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<TenantId> {
        let mut settings = settings.clone();
-
-        let config = models::TenantConfig {
+        let request = TenantCreateRequest {
+            new_tenant_id,
            checkpoint_distance: settings
                .remove("checkpoint_distance")
                .map(|x| x.parse::<u64>())
@@ -370,14 +372,6 @@ impl PageServerNode {
                .remove("evictions_low_residence_duration_metric_threshold")
                .map(|x| x.to_string()),
        };
-
-        // If tenant ID was not specified, generate one
-        let new_tenant_id = new_tenant_id.unwrap_or(TenantId::generate());
-
-        let request = models::TenantCreateRequest {
-            new_tenant_id,
-            config,
-        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
        }
@@ -397,81 +391,67 @@ impl PageServerNode {
            })
    }

-    pub fn tenant_config(
-        &self,
-        tenant_id: TenantId,
-        mut settings: HashMap<&str, &str>,
-    ) -> anyhow::Result<()> {
-        let config = {
-            // Braces to make the diff easier to read
-            models::TenantConfig {
+    pub fn tenant_config(&self, tenant_id: TenantId, settings: HashMap<&str, &str>) -> Result<()> {
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
+            .json(&TenantConfigRequest {
+                tenant_id,
                checkpoint_distance: settings
-                    .remove("checkpoint_distance")
+                    .get("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
-                checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
+                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
-                    .remove("compaction_target_size")
+                    .get("compaction_target_size")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'compaction_target_size' as an integer")?,
-                compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
+                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
                compaction_threshold: settings
-                    .remove("compaction_threshold")
+                    .get("compaction_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'compaction_threshold' as an integer")?,
                gc_horizon: settings
-                    .remove("gc_horizon")
+                    .get("gc_horizon")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'gc_horizon' as an integer")?,
-                gc_period: settings.remove("gc_period").map(|x| x.to_string()),
+                gc_period: settings.get("gc_period").map(|x| x.to_string()),
                image_creation_threshold: settings
-                    .remove("image_creation_threshold")
+                    .get("image_creation_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
-                pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
+                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
                walreceiver_connect_timeout: settings
-                    .remove("walreceiver_connect_timeout")
-                    .map(|x| x.to_string()),
-                lagging_wal_timeout: settings
-                    .remove("lagging_wal_timeout")
+                    .get("walreceiver_connect_timeout")
                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
                max_lsn_wal_lag: settings
-                    .remove("max_lsn_wal_lag")
+                    .get("max_lsn_wal_lag")
                    .map(|x| x.parse::<NonZeroU64>())
                    .transpose()
                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
                trace_read_requests: settings
-                    .remove("trace_read_requests")
+                    .get("trace_read_requests")
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'trace_read_requests' as bool")?,
                eviction_policy: settings
-                    .remove("eviction_policy")
-                    .map(serde_json::from_str)
+                    .get("eviction_policy")
+                    .map(|x| serde_json::from_str(x))
                    .transpose()
                    .context("Failed to parse 'eviction_policy' json")?,
                min_resident_size_override: settings
-                    .remove("min_resident_size_override")
+                    .get("min_resident_size_override")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'min_resident_size_override' as an integer")?,
                evictions_low_residence_duration_metric_threshold: settings
-                    .remove("evictions_low_residence_duration_metric_threshold")
+                    .get("evictions_low_residence_duration_metric_threshold")
                    .map(|x| x.to_string()),
-            }
-        };
-
-        if !settings.is_empty() {
-            bail!("Unrecognized tenant settings: {settings:?}")
-        }
-
-        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
-            .json(&models::TenantConfigRequest { tenant_id, config })
+            })
            .send()?
            .error_from_body()?;

@@ -499,14 +479,11 @@ impl PageServerNode {
        ancestor_timeline_id: Option<TimelineId>,
        pg_version: Option<u32>,
    ) -> anyhow::Result<TimelineInfo> {
-        // If timeline ID was not specified, generate one
-        let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
-
        self.http_request(
            Method::POST,
            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
        )?
-        .json(&models::TimelineCreateRequest {
+        .json(&TimelineCreateRequest {
            new_timeline_id,
            ancestor_start_lsn,
            ancestor_timeline_id,
--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -1,14 +1,6 @@
 #!/bin/bash
 set -eux

-# Generate a random tenant or timeline ID
-#
-# Takes a variable name as argument. The result is stored in that variable.
-generate_id() {
-    local -n resvar=$1
-    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
-}
-
 PG_VERSION=${PG_VERSION:-14}

 SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
@@ -21,29 +13,29 @@ done
 echo "Page server is ready."

 echo "Create a tenant and timeline"
-generate_id tenant_id
 PARAMS=(
     -sb 
     -X POST
     -H "Content-Type: application/json"
-     -d "{\"new_tenant_id\": \"${tenant_id}\"}"
+     -d "{}"
     http://pageserver:9898/v1/tenant/
 )
-result=$(curl "${PARAMS[@]}")
-echo $result | jq .
+tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')

-generate_id timeline_id
 PARAMS=(
     -sb 
     -X POST
     -H "Content-Type: application/json"
-     -d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
+     -d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
     "http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
 )
 result=$(curl "${PARAMS[@]}")
 echo $result | jq .

 echo "Overwrite tenant id and timeline id in spec file"
+tenant_id=$(echo ${result} | jq -r .tenant_id)
+timeline_id=$(echo ${result} | jq -r .timeline_id)
+
 sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
 sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}

--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -4,11 +4,6 @@ The pageserver uses Tokio for handling concurrency. Everything runs in
 Tokio tasks, although some parts are written in blocking style and use
 spawn_blocking().

-We currently use std blocking functions for disk I/O, however.  The
-current model is that we consider disk I/Os to be short enough that we
-perform them while running in a Tokio task. Changing all the disk I/O
-calls to async is a TODO.
-
 Each Tokio task is tracked by the `task_mgr` module. It maintains a
 registry of tasks, and which tenant or timeline they are operating
 on.
@@ -26,86 +21,19 @@ also a `shudown_watcher()` Future that can be used with `tokio::select!`
 or similar, to wake up on shutdown.


-### Async cancellation safety
+### Sync vs async

-In async Rust, futures can be "cancelled" at any await point, by
-dropping the Future. For example, `tokio::select!` returns as soon as
-one of the Futures returns, and drops the others. `tokio::timeout!` is
-another example. In the Rust ecosystem, some functions are
-cancellation-safe, meaning they can be safely dropped without
-side-effects, while others are not. See documentation of
-`tokio::select!` for examples.
+We use async to wait for incoming data on network connections, and to
+perform other long-running operations. For example, each WAL receiver
+connection is handled by a tokio Task. Once a piece of WAL has been
+received from the network, the task calls the blocking functions in
+the Repository to process the WAL.

-In the pageserver and safekeeper, async code is *not*
-cancellation-safe by default. Unless otherwise marked, any async
-function that you call cannot be assumed to be async
-cancellation-safe, and must be polled to completion.
+The core storage code in `layered_repository/` is synchronous, with
+blocking locks and I/O calls. The current model is that we consider
+disk I/Os to be short enough that we perform them while running in a
+Tokio task. If that becomes a problem, we should use `spawn_blocking`
+before entering the synchronous parts of the code, or switch to using
+tokio I/O functions.

-The downside of non-cancellation safe code is that you have to be very
-careful when using `tokio::select!`, `tokio::timeout!`, and other such
-functions that can cause a Future to be dropped. They can only be used
-with functions that are explicitly documented to be cancellation-safe,
-or you need to spawn a separate task to shield from the cancellation.
-
-At the entry points to the code, we also take care to poll futures to
-completion, or shield the rest of the code from surprise cancellations
-by spawning a separate task. The code that handles incoming HTTP
-requests, for example, spawns a separate task for each request,
-because Hyper will drop the request-handling Future if the HTTP
-connection is lost.  (FIXME: our HTTP handlers do not do that
-currently, but we should fix that. See [issue
-3478](https://github.com/neondatabase/neon/issues/3478)).
-
-
-#### How to cancel, then?
-
-If our code is not cancellation-safe, how do you cancel long-running
-tasks? Use CancellationTokens.
-
-TODO: More details on that. And we have an ongoing discussion on what
-to do if cancellations might come from multiple sources.
-
-#### Exceptions
-Some library functions are cancellation-safe, and are explicitly marked
-as such. For example, `utils::seqwait`.
-
-#### Rationale
-
-The alternative would be to make all async code cancellation-safe,
-unless otherwise marked. That way, you could use `tokio::select!` more
-liberally. The reasons we didn't choose that are explained in this
-section.
-
-Writing code in a cancellation-safe manner is tedious, as you need to
-scrutinize every `.await` and ensure that if the `.await` call never
-returns, the system is in a safe, consistent state. In some ways, you
-need to do that with `?` and early `returns`, too, but `.await`s are
-easier to miss. It is also easier to perform cleanup tasks when a
-function returns an `Err` than when an `.await` simply never
-returns. You can use `scopeguard` and Drop guards to perform cleanup
-tasks, but it is more tedious. An `.await` that never returns is more
-similar to a panic.
-
-Note that even if you only use building blocks that themselves are
-cancellation-safe, it doesn't mean that the code as whole is
-cancellation-safe. For example, consider the following code:
-
-```
-while let Some(i) = work_inbox.recv().await {
-	if let Err(_) = results_outbox.send(i).await {
-		println!("receiver dropped");
-		return;
-		}
-	}
-}
-```
-
-It reads messages from one channel, sends them to another channel. If
-this code is cancelled at the `results_outbox.send(i).await`, the
-message read from the receiver is lost. That may or may not be OK,
-depending on the context.
-
-Another reason to not require cancellation-safety is historical: we
-already had a lot of async code that was not scrutinized for
-cancellation-safety when this issue was raised. Scrutinizing all
-existing code is no fun.
+Be very careful when mixing sync and async code!
--- a/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
+++ b/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
@@ -1,232 +0,0 @@
-# The state of pageserver tenant relocation
-
-Created on 17.03.23
-
-## Motivation
-
-There were previous write ups on the subject. The design of tenant relocation was planned at the time when we had quite different landscape. I e there was no on-demand download/eviction. They were on the horizon but we still planned for cases when they were not available. Some other things have changed. Now safekeepers offload wal to s3 so we're not risking overflowing their disks. Having all of the above, it makes sense to recap and take a look at the options we have now, which adjustments we'd like to make to original process, etc.
-
-Related (in chronological order):
-
- Tracking issue with initial discussion: [#886](https://github.com/neondatabase/neon/issues/886)
- [015. Storage Messaging](015-storage-messaging.md)
- [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md)
-
-## Summary
-
-The RFC consists of a walkthrough of prior art on tenant relocation and corresponding problems. It describes 3 approaches.
-
-1. Simplistic approach that uses ignore and is the fastest to implement. The main downside is a requirement of short downtime.
-2. More complicated approach that avoids even short downtime.
-3. Even more complicated approach that will allow multiple pageservers to operate concurrently on the same tenant possibly allowing for HA cluster topologies and horizontal scaling of reads (i e compute talks to multiple pageservers).
-
-The order in which solutions are described is a bit different. We start from 2, then move to possible compromises (aka simplistic approach) and then move to discussing directions for solving HA/Pageserver replica case with 3.
-
-## Components
-
-pageserver, control-plane, safekeepers (a bit)
-
-## Requirements
-
-Relocation procedure should move tenant from one pageserver to another without downtime introduced by storage side. For now restarting compute for applying new configuration is fine.
-
- component restarts
- component outage
- pageserver loss
-
-## The original proposed implementation
-
-The starting point is this sequence:
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant CP as Control Plane
-    participant PS1 as Pageserver 1
-    participant PS2 as Pageserver 2
-    participant S3
-
-    CP->>PS2: Attach tenant X
-    PS2->>S3: Fetch timelines, indexes for them
-    PS2->>CP: Accepted
-    CP->>CP: Change pageserver id in project
-    CP->>PS1: Detach
-```
-
-Which problems do we have with naive approach?
-
-### Concurrent GC and Compaction
-
-The problem is that they can run on both, PS1 and PS2. Consider this example from [Pageserver S3 Coordination RFC](020-pageserver-s3-coordination.md)
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant PS1
-    participant S3
-    participant PS2
-
-    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2
-    PS2->>S3: Attach called, sees L1, L2
-    PS1->>S3: Compaction comes <br/> Removes L1, adds L3
-    note over S3: Index now L2, L3
-    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)
-    note over S3: Index now L1, L2, L4
-```
-
-At this point it is not possible to restore the state from index, it contains L2 which
-is no longer available in s3 and doesnt contain L3 added by compaction by the
-first pageserver. So if any of the pageservers restart, initial sync will fail
-(or in on-demand world it will fail a bit later during page request from
-missing layer)
-
-The problem lies in shared index_part.json. Having intersecting layers from append only edits is expected to work, though this is an uncharted territory without tests.
-
-#### Options
-
-There are several options on how to restrict concurrent access to index file.
-
-First and the simplest one is external orchestration. Control plane which runs migration can use special api call on pageserver to stop background processes (gc, compaction), and even possibly all uploads.
-
-So the sequence becomes:
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant CP as Control Plane
-    participant PS1 as Pageserver 1
-    participant PS2 as Pageserver 2
-    participant S3
-
-    CP->>PS1: Pause background jobs, pause uploading new layers.
-    CP->>PS2: Attach tenant X.
-    PS2->>S3: Fetch timelines, index, start background operations
-    PS2->>CP: Accepted
-    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
-    CP->>CP: Change pageserver id in project
-    CP->>PS1: Detach
-```
-
-The downside of this sequence is the potential rollback process. What if something goes wrong on new pageserver? Can we safely roll back to source pageserver?
-
-There are two questions:
-
-#### How can we detect that something went wrong?
-
-We can run usual availability check (consists of compute startup and an update of one row).
-Note that we cant run separate compute for that before touching compute that client runs actual workload on, because we cant have two simultaneous computes running in read-write mode on the same timeline (enforced by safekeepers consensus algorithm). So we can either run some readonly check first (basebackup) and then change pageserver id and run availability check. If it failed we can roll it back to the old one.
-
-#### What can go wrong? And how we can safely roll-back?
-
-In the sequence above during attach we start background processes/uploads. They change state in remote storage so it is possible that after rollback remote state will be different from one that was observed by source pageserver. So if target pageserver goes wild then source pageserver may fail to start with changed remote state.
-
-Proposed option would be to implement a barrier (read-only) mode when pageserver does not update remote state.
-
-So the sequence for happy path becomes this one:
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant CP as Control Plane
-    participant PS1 as Pageserver 1
-    participant PS2 as Pageserver 2
-    participant S3
-
-    CP->>PS1: Pause background jobs, pause uploading new layers.
-    CP->>PS2: Attach tenant X in remote readonly mode.
-    PS2->>S3: Fetch timelines, index
-    PS2->>CP: Accepted
-    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
-    CP->>CP: Change pageserver id in project
-    CP->>CP: Run successful availability check
-    CP->>PS2: Start uploads, background tasks
-    CP->>PS1: Detach
-```
-
-With this sequence we restrict any changes to remote storage to one pageserver. So there is no concurrent access at all, not only for index_part.json, but for everything else too. This approach makes it possible to roll back after failure on new pageserver.
-
-The sequence with roll back process:
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant CP as Control Plane
-    participant PS1 as Pageserver 1
-    participant PS2 as Pageserver 2
-    participant S3
-
-    CP->>PS1: Pause background jobs, pause uploading new layers.
-    CP->>PS2: Attach tenant X in remote readonly mode.
-    PS2->>S3: Fetch timelines, index
-    PS2->>CP: Accepted
-    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
-    CP->>CP: Change pageserver id in project
-    CP->>CP: Availability check Failed
-    CP->>CP: Change pageserver id back
-    CP->>PS1: Resume remote operations
-    CP->>PS2: Ignore (instead of detach for investigation purposes)
-```
-
-## Concurrent branch creation
-
-Another problem is a possibility of concurrent branch creation calls.
-
-I e during migration create_branch can be called on old pageserver and newly created branch wont be seen on new pageserver. Prior art includes prototyping an approach of trying to mirror such branches, but currently it lost its importance, because now attach is fast because we dont need to download all data, and additionally to the best of my knowledge of control plane internals (cc @ololobus to confirm) operations on one project are executed sequentially, so it is not possible to have such case. So branch create operation will be executed only when relocation is completed. As a safety measure we can forbid branch creation for tenants that are in readonly remote state.
-
-## Simplistic approach
-
-The difference of simplistic approach from one described above is that it calls ignore on source tenant first and then calls attach on target pageserver. Approach above does it in opposite order thus opening a possibility for race conditions we strive to avoid.
-
-The approach largely follows this guide: <https://github.com/neondatabase/cloud/wiki/Cloud:-Ad-hoc-tenant-relocation>
-
-The happy path sequence:
-
-```mermaid
-sequenceDiagram
-    autonumber
-    participant CP as Control Plane
-    participant PS1 as Pageserver 1
-    participant PS2 as Pageserver 2
-    participant SK as Safekeeper
-    participant S3
-
-    CP->>CP: Enable maintenance mode
-    CP->>PS1: Ignore
-    CP->>PS2: Attach
-    PS2->>CP: Accepted
-    loop Delete layers for each timeline
-        CP->>PS2: Get last record lsn
-        CP->>SK: Get commit lsn
-        CP->>CP: OK? Timed out?
-    end
-    CP->>CP: Change pageserver id in project
-    CP->>CP: Run successful availability check
-    CP->>CP: Disable maintenance mode
-    CP->>PS1: Detach ignored
-```
-
-The sequence contains exactly the same rollback problems as in previous approach described above. They can be resolved the same way.
-
-Most probably we'd like to move forward without this safety measure and implement it on top of this approach to make progress towards the downtime-less one.
-
-## Lease based approach
-
-In order to allow for concurrent operation on the same data on remote storage for multiple pageservers we need to go further than external orchestration.
-
-NOTE: [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) discusses one more approach that relies on duplication of index_part.json for each pageserver operating on the timeline. This approach still requires external coordination which makes certain things easier but requires additional bookkeeping to account for multiple index_part.json files. Discussion/comparison with proposed lease based approach
-
-The problems are outlined in [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) and suggested solution includes [Coordination based approach](020-pageserver-s3-coordination.md#coordination-based-approach). This way it will allow to do basic leader election for pageservers so they can decide which node will be responsible for running GC and compaction. The process is based on extensive communication via storage broker and consists of a lease that is taken by one of the pageservers that extends it to continue serving a leader role.
-
-There are two options for ingesting new data into pageserver in follower role. One option is to avoid WAL ingestion at all and rely on notifications from leader to discover new layers on s3. Main downside of this approach is that follower will always lag behind the primary node because it wont have the last layer until it is uploaded to remote storage. In case of a primary failure follower will be required to reingest last segment (up to 256Mb of WAL currently) which slows down recovery. Additionally if compute is connected to follower pageserver it will observe latest data with a delay. Queries from compute will likely experience bigger delays when recent lsn is required.
-
-The second option is to consume WAL stream on both pageservers. In this case the only problem is non deterministic layer generation. Additional bookkeeping will be required to deduplicate layers from primary with local ones. Some process needs to somehow merge them to remove duplicated data. Additionally we need to have good testing coverage to ensure that our implementation of `get_page@lsn` properly handles intersecting layers.
-
-There is another tradeoff. Approaches may be different in amount of traffic between system components. With first approach there can be increased traffic between follower and remote storage. But only in case follower has some activity that actually requests pages (!). With other approach traffic increase will be permanent and will be caused by two WAL streams instead of one.
-
-## Summary
-
-Proposed implementation strategy:
-
-Go with the simplest approach for now. Then work on tech debt, increase test coverage. Then gradually move forward to second approach by implementing safety measures first, finishing with switch of order between ignore and attach operation.
-
-And only then go to lease based approach to solve HA/Pageserver replica use cases.
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -11,5 +11,4 @@ serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true

-utils = { path = "../utils" }
 workspace_hack.workspace = true
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -19,7 +19,7 @@ pub struct ComputeStatusResponse {
    pub timeline: Option<String>,
    pub status: ComputeStatus,
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: Option<DateTime<Utc>>,
+    pub last_active: DateTime<Utc>,
    pub error: Option<String>,
 }

@@ -29,7 +29,7 @@ pub struct ComputeState {
    pub status: ComputeStatus,
    /// Timestamp of the last Postgres activity
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: Option<DateTime<Utc>>,
+    pub last_active: DateTime<Utc>,
    pub error: Option<String>,
 }

@@ -54,15 +54,11 @@ pub enum ComputeStatus {
    Failed,
 }

-fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
+fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
 {
-    if let Some(x) = x {
-        x.to_rfc3339().serialize(s)
-    } else {
-        s.serialize_none()
-    }
+    x.to_rfc3339().serialize(s)
 }

 /// Response of the /metrics.json API
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -3,9 +3,8 @@
 //! The spec.json file is used to pass information to 'compute_ctl'. It contains
 //! all the information needed to start up the right version of PostgreSQL,
 //! and connect it to the storage nodes.
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-use utils::lsn::Lsn;
+use serde::Deserialize;
+use std::collections::HashMap;

 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
@@ -13,7 +12,6 @@ pub type PgIdent = String;

 /// Cluster spec or configuration represented as an optional number of
 /// delta operations + final cluster state description.
-#[serde_as]
 #[derive(Clone, Debug, Default, Deserialize)]
 pub struct ComputeSpec {
    pub format_version: f32,
@@ -26,25 +24,9 @@ pub struct ComputeSpec {
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,

-    #[serde(default)]
-    pub mode: ComputeMode,
-
    pub storage_auth_token: Option<String>,
-}

-#[serde_as]
-#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
-pub enum ComputeMode {
-    /// A read-write node
-    #[default]
-    Primary,
-    /// A read-only node, pinned at a particular LSN
-    Static(#[serde_as(as = "DisplayFromStr")] Lsn),
-    /// A read-only node that follows the tip of the branch in hot standby mode
-    ///
-    /// Future versions may want to distinguish between replicas with hot standby
-    /// feedback and other kinds of replication configurations.
-    Replica,
+    pub startup_tracing_context: Option<HashMap<String, String>>,
 }

 #[derive(Clone, Debug, Default, Deserialize)]
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -18,29 +18,7 @@ use crate::reltag::RelTag;
 use anyhow::bail;
 use bytes::{BufMut, Bytes, BytesMut};

-/// The state of a tenant in this pageserver.
-///
-/// ```mermaid
-/// stateDiagram-v2
-///
-///     [*] --> Loading: spawn_load()
-///     [*] --> Attaching: spawn_attach()
-///
-///     Loading --> Activating: activate()
-///     Attaching --> Activating: activate()
-///     Activating --> Active: infallible
-///
-///     Loading --> Broken: load() failure
-///     Attaching --> Broken: attach() failure
-///
-///     Active --> Stopping: set_stopping(), part of shutdown & detach
-///     Stopping --> Broken: late error in remove_tenant_from_memory
-///
-///     Broken --> [*]: ignore / detach / shutdown
-///     Stopping --> [*]: remove_from_memory complete
-///
-///     Active --> Broken: cfg(testing)-only tenant break point
-/// ```
+/// A state of a tenant in pageserver's memory.
 #[derive(
    Clone,
    PartialEq,
@@ -48,78 +26,35 @@ use bytes::{BufMut, Bytes, BytesMut};
    serde::Serialize,
    serde::Deserialize,
    strum_macros::Display,
+    strum_macros::EnumString,
    strum_macros::EnumVariantNames,
    strum_macros::AsRefStr,
    strum_macros::IntoStaticStr,
 )]
 #[serde(tag = "slug", content = "data")]
 pub enum TenantState {
-    /// This tenant is being loaded from local disk.
-    ///
-    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
+    /// This tenant is being loaded from local disk
    Loading,
-    /// This tenant is being attached to the pageserver.
-    ///
-    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
+    /// This tenant is being downloaded from cloud storage.
    Attaching,
-    /// The tenant is transitioning from Loading/Attaching to Active.
-    ///
-    /// While in this state, the individual timelines are being activated.
-    ///
-    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
-    Activating(ActivatingFrom),
-    /// The tenant has finished activating and is open for business.
-    ///
-    /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
+    /// Tenant is fully operational
    Active,
-    /// The tenant is recognized by pageserver, but it is being detached or the
+    /// A tenant is recognized by pageserver, but it is being detached or the
    /// system is being shut down.
-    ///
-    /// Transitions out of this state are possible through `set_broken()`.
    Stopping,
-    /// The tenant is recognized by the pageserver, but can no longer be used for
-    /// any operations.
-    ///
-    /// If the tenant fails to load or attach, it will transition to this state
-    /// and it is guaranteed that no background tasks are running in its name.
-    ///
-    /// The other way to transition into this state is from `Stopping` state
-    /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
-    /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
+    /// A tenant is recognized by the pageserver, but can no longer be used for
+    /// any operations, because it failed to be activated.
    Broken { reason: String, backtrace: String },
 }

 impl TenantState {
-    pub fn attachment_status(&self) -> TenantAttachmentStatus {
-        use TenantAttachmentStatus::*;
-
-        // Below TenantState::Activating is used as "transient" or "transparent" state for
-        // attachment_status determining.
+    pub fn has_in_progress_downloads(&self) -> bool {
        match self {
-            // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
-            // So, technically, we can return Attached here.
-            // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
-            // But, our attach task might still be fetching the remote timelines, etc.
-            // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
-            Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
-            // tenant mgr startup distinguishes attaching from loading via marker file.
-            // If it's loading, there is no attach marker file, i.e., attach had finished in the past.
-            Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
-            // We only reach Active after successful load / attach.
-            // So, call atttachment status Attached.
-            Self::Active => Attached,
-            // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
-            // However, it also becomes Broken if the regular load fails.
-            // We would need a separate TenantState variant to distinguish these cases.
-            // However, there's no practical difference from Console's perspective.
-            // It will run a Postgres-level health check as soon as it observes Attached.
-            // That will fail on Broken tenants.
-            // Console can then rollback the attach, or, wait for operator to fix the Broken tenant.
-            Self::Broken { .. } => Attached,
-            // Why is Stopping a Maybe case? Because, during pageserver shutdown,
-            // we set the Stopping state irrespective of whether the tenant
-            // has finished attaching or not.
-            Self::Stopping => Maybe,
+            Self::Loading => true,
+            Self::Attaching => true,
+            Self::Active => false,
+            Self::Stopping => false,
+            Self::Broken { .. } => false,
        }
    }

@@ -143,15 +78,6 @@ impl std::fmt::Debug for TenantState {
    }
 }

-/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-pub enum ActivatingFrom {
-    /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
-    Loading,
-    /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
-    Attaching,
-}
-
 /// A state of a timeline in pageserver's memory.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub enum TimelineState {
@@ -172,8 +98,9 @@ pub enum TimelineState {
 #[serde_as]
 #[derive(Serialize, Deserialize)]
 pub struct TimelineCreateRequest {
-    #[serde_as(as = "DisplayFromStr")]
-    pub new_timeline_id: TimelineId,
+    #[serde(default)]
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub new_timeline_id: Option<TimelineId>,
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub ancestor_timeline_id: Option<TimelineId>,
@@ -184,25 +111,11 @@ pub struct TimelineCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(deny_unknown_fields)]
+#[derive(Serialize, Deserialize, Default)]
 pub struct TenantCreateRequest {
-    #[serde_as(as = "DisplayFromStr")]
-    pub new_tenant_id: TenantId,
-    #[serde(flatten)]
-    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
-}
-
-impl std::ops::Deref for TenantCreateRequest {
-    type Target = TenantConfig;
-
-    fn deref(&self) -> &Self::Target {
-        &self.config
-    }
-}
-
-#[derive(Serialize, Deserialize, Debug, Default)]
-pub struct TenantConfig {
+    #[serde(default)]
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub new_tenant_id: Option<TenantId>,
    pub checkpoint_distance: Option<u64>,
    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
@@ -236,35 +149,46 @@ pub struct StatusResponse {
 }

 impl TenantCreateRequest {
-    pub fn new(new_tenant_id: TenantId) -> TenantCreateRequest {
+    pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
        TenantCreateRequest {
            new_tenant_id,
-            config: TenantConfig::default(),
+            ..Default::default()
        }
    }
 }

 #[serde_as]
-#[derive(Serialize, Deserialize, Debug)]
-#[serde(deny_unknown_fields)]
+#[derive(Serialize, Deserialize)]
 pub struct TenantConfigRequest {
    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
-    #[serde(flatten)]
-    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
-}
-
-impl std::ops::Deref for TenantConfigRequest {
-    type Target = TenantConfig;
-
-    fn deref(&self) -> &Self::Target {
-        &self.config
-    }
+    #[serde(default)]
+    pub checkpoint_distance: Option<u64>,
+    pub checkpoint_timeout: Option<String>,
+    pub compaction_target_size: Option<u64>,
+    pub compaction_period: Option<String>,
+    pub compaction_threshold: Option<usize>,
+    pub gc_horizon: Option<u64>,
+    pub gc_period: Option<String>,
+    pub image_creation_threshold: Option<usize>,
+    pub pitr_interval: Option<String>,
+    pub walreceiver_connect_timeout: Option<String>,
+    pub lagging_wal_timeout: Option<String>,
+    pub max_lsn_wal_lag: Option<NonZeroU64>,
+    pub trace_read_requests: Option<bool>,
+    // We defer the parsing of the eviction_policy field to the request handler.
+    // Otherwise we'd have to move the types for eviction policy into this package.
+    // We might do that once the eviction feature has stabilizied.
+    // For now, this field is not even documented in the openapi_spec.yml.
+    pub eviction_policy: Option<serde_json::Value>,
+    pub min_resident_size_override: Option<u64>,
+    pub evictions_low_residence_duration_metric_threshold: Option<String>,
 }

 impl TenantConfigRequest {
    pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
-        let config = TenantConfig {
+        TenantConfigRequest {
+            tenant_id,
            checkpoint_distance: None,
            checkpoint_timeout: None,
            compaction_target_size: None,
@@ -281,52 +205,20 @@ impl TenantConfigRequest {
            eviction_policy: None,
            min_resident_size_override: None,
            evictions_low_residence_duration_metric_threshold: None,
-        };
-        TenantConfigRequest { tenant_id, config }
+        }
    }
 }

-#[derive(Debug, Serialize, Deserialize)]
-pub struct TenantAttachRequest {
-    pub config: TenantAttachConfig,
-}
-
-/// Newtype to enforce deny_unknown_fields on TenantConfig for
-/// its usage inside `TenantAttachRequest`.
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(deny_unknown_fields)]
-pub struct TenantAttachConfig {
-    #[serde(flatten)]
-    allowing_unknown_fields: TenantConfig,
-}
-
-impl std::ops::Deref for TenantAttachConfig {
-    type Target = TenantConfig;
-
-    fn deref(&self) -> &Self::Target {
-        &self.allowing_unknown_fields
-    }
-}
-
-/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
-#[derive(Serialize, Deserialize, Clone)]
-#[serde(rename_all = "snake_case")]
-pub enum TenantAttachmentStatus {
-    Maybe,
-    Attached,
-}
-
 #[serde_as]
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
    #[serde_as(as = "DisplayFromStr")]
    pub id: TenantId,
-    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    pub state: TenantState,
    /// Sum of the size of all layer files.
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
-    pub attachment_status: TenantAttachmentStatus,
+    pub has_in_progress_downloads: Option<bool>,
 }

 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
@@ -799,7 +691,7 @@ mod tests {
            id: TenantId::generate(),
            state: TenantState::Active,
            current_physical_size: Some(42),
-            attachment_status: TenantAttachmentStatus::Attached,
+            has_in_progress_downloads: Some(false),
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -807,7 +699,7 @@ mod tests {
                "slug": "Active",
            },
            "current_physical_size": 42,
-            "attachment_status": "attached",
+            "has_in_progress_downloads": false,
        });

        let original_broken = TenantInfo {
@@ -817,7 +709,7 @@ mod tests {
                backtrace: "backtrace info".into(),
            },
            current_physical_size: Some(42),
-            attachment_status: TenantAttachmentStatus::Attached,
+            has_in_progress_downloads: Some(false),
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
@@ -829,7 +721,7 @@ mod tests {
                }
            },
            "current_physical_size": 42,
-            "attachment_status": "attached",
+            "has_in_progress_downloads": false,
        });

        assert_eq!(
@@ -844,94 +736,4 @@ mod tests {
        assert!(format!("{:?}", &original_broken.state).contains("reason"));
        assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    }
-
-    #[test]
-    fn test_reject_unknown_field() {
-        let id = TenantId::generate();
-        let create_request = json!({
-            "new_tenant_id": id.to_string(),
-            "unknown_field": "unknown_value".to_string(),
-        });
-        let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();
-        assert!(
-            err.to_string().contains("unknown field `unknown_field`"),
-            "expect unknown field `unknown_field` error, got: {}",
-            err
-        );
-
-        let id = TenantId::generate();
-        let config_request = json!({
-            "tenant_id": id.to_string(),
-            "unknown_field": "unknown_value".to_string(),
-        });
-        let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
-        assert!(
-            err.to_string().contains("unknown field `unknown_field`"),
-            "expect unknown field `unknown_field` error, got: {}",
-            err
-        );
-
-        let attach_request = json!({
-            "config": {
-                "unknown_field": "unknown_value".to_string(),
-            },
-        });
-        let err = serde_json::from_value::<TenantAttachRequest>(attach_request).unwrap_err();
-        assert!(
-            err.to_string().contains("unknown field `unknown_field`"),
-            "expect unknown field `unknown_field` error, got: {}",
-            err
-        );
-    }
-
-    #[test]
-    fn tenantstatus_activating_serde() {
-        let states = [
-            TenantState::Activating(ActivatingFrom::Loading),
-            TenantState::Activating(ActivatingFrom::Attaching),
-        ];
-        let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
-
-        let actual = serde_json::to_string(&states).unwrap();
-
-        assert_eq!(actual, expected);
-
-        let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
-
-        assert_eq!(states.as_slice(), &parsed);
-    }
-
-    #[test]
-    fn tenantstatus_activating_strum() {
-        // tests added, because we use these for metrics
-        let examples = [
-            (line!(), TenantState::Loading, "Loading"),
-            (line!(), TenantState::Attaching, "Attaching"),
-            (
-                line!(),
-                TenantState::Activating(ActivatingFrom::Loading),
-                "Activating",
-            ),
-            (
-                line!(),
-                TenantState::Activating(ActivatingFrom::Attaching),
-                "Activating",
-            ),
-            (line!(), TenantState::Active, "Active"),
-            (line!(), TenantState::Stopping, "Stopping"),
-            (
-                line!(),
-                TenantState::Broken {
-                    reason: "Example".into(),
-                    backtrace: "Looooong backtrace".into(),
-                },
-                "Broken",
-            ),
-        ];
-
-        for (line, rendered, expected) in examples {
-            let actual: &'static str = rendered.into();
-            assert_eq!(actual, expected, "example on {line}");
-        }
-    }
 }
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -50,14 +50,11 @@ impl QueryError {
    }
 }

-/// Returns true if the given error is a normal consequence of a network issue,
-/// or the client closing the connection. These errors can happen during normal
-/// operations, and don't indicate a bug in our code.
 pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
        e.kind(),
-        BrokenPipe | ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
+        ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
    )
 }

--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -146,10 +146,6 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
 pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
 pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;

-// From replication/message.h
-pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
-
-// From rmgrlist.h
 pub const RM_XLOG_ID: u8 = 0;
 pub const RM_XACT_ID: u8 = 1;
 pub const RM_SMGR_ID: u8 = 2;
@@ -161,7 +157,6 @@ pub const RM_RELMAP_ID: u8 = 7;
 pub const RM_STANDBY_ID: u8 = 8;
 pub const RM_HEAP2_ID: u8 = 9;
 pub const RM_HEAP_ID: u8 = 10;
-pub const RM_LOGICALMSG_ID: u8 = 21;

 // from xlogreader.h
 pub const XLR_INFO_MASK: u8 = 0x0F;
--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -1,4 +1,5 @@
-use anyhow::{bail, ensure};
+use anyhow::*;
+use core::time::Duration;
 use log::*;
 use postgres::types::PgLsn;
 use postgres::Client;
@@ -7,7 +8,7 @@ use postgres_ffi::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
 use std::cmp::Ordering;
 use std::path::{Path, PathBuf};
 use std::process::Command;
-use std::time::{Duration, Instant};
+use std::time::Instant;
 use tempfile::{tempdir, TempDir};

 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -54,7 +55,7 @@ impl Conf {
        self.datadir.join("pg_wal")
    }

-    fn new_pg_command(&self, command: impl AsRef<Path>) -> anyhow::Result<Command> {
+    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
        let path = self.pg_bin_dir()?.join(command);
        ensure!(path.exists(), "Command {:?} does not exist", path);
        let mut cmd = Command::new(path);
@@ -64,7 +65,7 @@ impl Conf {
        Ok(cmd)
    }

-    pub fn initdb(&self) -> anyhow::Result<()> {
+    pub fn initdb(&self) -> Result<()> {
        if let Some(parent) = self.datadir.parent() {
            info!("Pre-creating parent directory {:?}", parent);
            // Tests may be run concurrently and there may be a race to create `test_output/`.
@@ -78,7 +79,7 @@ impl Conf {
        let output = self
            .new_pg_command("initdb")?
            .arg("-D")
-            .arg(&self.datadir)
+            .arg(self.datadir.as_os_str())
            .args(["-U", "postgres", "--no-instructions", "--no-sync"])
            .output()?;
        debug!("initdb output: {:?}", output);
@@ -91,7 +92,7 @@ impl Conf {
        Ok(())
    }

-    pub fn start_server(&self) -> anyhow::Result<PostgresServer> {
+    pub fn start_server(&self) -> Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
@@ -99,9 +100,9 @@ impl Conf {
            .new_pg_command("postgres")?
            .args(["-c", "listen_addresses="])
            .arg("-k")
-            .arg(&unix_socket_dir_path)
+            .arg(unix_socket_dir_path.as_os_str())
            .arg("-D")
-            .arg(&self.datadir)
+            .arg(self.datadir.as_os_str())
            .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
            .spawn()?;
        let server = PostgresServer {
@@ -122,7 +123,7 @@ impl Conf {
        &self,
        first_segment_name: &str,
        last_segment_name: &str,
-    ) -> anyhow::Result<std::process::Output> {
+    ) -> Result<std::process::Output> {
        let first_segment_file = self.datadir.join(first_segment_name);
        let last_segment_file = self.datadir.join(last_segment_name);
        info!(
@@ -132,7 +133,10 @@ impl Conf {
        );
        let output = self
            .new_pg_command("pg_waldump")?
-            .args([&first_segment_file, &last_segment_file])
+            .args([
+                &first_segment_file.as_os_str(),
+                &last_segment_file.as_os_str(),
+            ])
            .output()?;
        debug!("waldump output: {:?}", output);
        Ok(output)
@@ -140,9 +144,10 @@ impl Conf {
 }

 impl PostgresServer {
-    pub fn connect_with_timeout(&self) -> anyhow::Result<Client> {
+    pub fn connect_with_timeout(&self) -> Result<Client> {
        let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();
        while Instant::now() < retry_until {
+            use std::result::Result::Ok;
            if let Ok(client) = self.client_config.connect(postgres::NoTls) {
                return Ok(client);
            }
@@ -159,6 +164,7 @@ impl PostgresServer {

 impl Drop for PostgresServer {
    fn drop(&mut self) {
+        use std::result::Result::Ok;
        match self.process.try_wait() {
            Ok(Some(_)) => return,
            Ok(None) => {
@@ -173,12 +179,12 @@ impl Drop for PostgresServer {
 }

 pub trait PostgresClientExt: postgres::GenericClient {
-    fn pg_current_wal_insert_lsn(&mut self) -> anyhow::Result<PgLsn> {
+    fn pg_current_wal_insert_lsn(&mut self) -> Result<PgLsn> {
        Ok(self
            .query_one("SELECT pg_current_wal_insert_lsn()", &[])?
            .get(0))
    }
-    fn pg_current_wal_flush_lsn(&mut self) -> anyhow::Result<PgLsn> {
+    fn pg_current_wal_flush_lsn(&mut self) -> Result<PgLsn> {
        Ok(self
            .query_one("SELECT pg_current_wal_flush_lsn()", &[])?
            .get(0))
@@ -187,7 +193,7 @@ pub trait PostgresClientExt: postgres::GenericClient {

 impl<C: postgres::GenericClient> PostgresClientExt for C {}

-pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> anyhow::Result<()> {
+pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
    client.execute("create extension if not exists neon_test_utils", &[])?;

    let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
@@ -221,13 +227,13 @@ pub trait Crafter {
    /// * A vector of some valid "interesting" intermediate LSNs which one may start reading from.
    ///   May include or exclude Lsn(0) and the end-of-wal.
    /// * The expected end-of-wal LSN.
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)>;
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)>;
 }

 fn craft_internal<C: postgres::GenericClient>(
    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> anyhow::Result<(Vec<PgLsn>, Option<PgLsn>)>,
-) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    f: impl Fn(&mut C, PgLsn) -> Result<(Vec<PgLsn>, Option<PgLsn>)>,
+) -> Result<(Vec<PgLsn>, PgLsn)> {
    ensure_server_config(client)?;

    let initial_lsn = client.pg_current_wal_insert_lsn()?;
@@ -259,7 +265,7 @@ fn craft_internal<C: postgres::GenericClient>(
 pub struct Simple;
 impl Crafter for Simple {
    const NAME: &'static str = "simple";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
        craft_internal(client, |client, _| {
            client.execute("CREATE table t(x int)", &[])?;
            Ok((Vec::new(), None))
@@ -270,7 +276,7 @@ impl Crafter for Simple {
 pub struct LastWalRecordXlogSwitch;
 impl Crafter for LastWalRecordXlogSwitch {
    const NAME: &'static str = "last_wal_record_xlog_switch";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -292,7 +298,7 @@ impl Crafter for LastWalRecordXlogSwitch {
 pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
 impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -359,7 +365,7 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
 fn craft_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
-) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+) -> Result<(Vec<PgLsn>, PgLsn)> {
    craft_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
@@ -401,7 +407,7 @@ fn craft_single_logical_message(
 pub struct WalRecordCrossingSegmentFollowedBySmallOne;
 impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
        craft_single_logical_message(client, true)
    }
 }
@@ -409,7 +415,7 @@ impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
 pub struct LastWalRecordCrossingSegment;
 impl Crafter for LastWalRecordCrossingSegment {
    const NAME: &'static str = "last_wal_record_crossing_segment";
-    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
        craft_single_logical_message(client, false)
    }
 }
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -613,7 +613,7 @@ pub struct XLogDataBody<'a> {

 #[derive(Debug)]
 pub struct WalSndKeepAlive {
-    pub wal_end: u64, // current end of WAL on the server
+    pub sent_ptr: u64,
    pub timestamp: i64,
    pub request_reply: bool,
 }
@@ -924,7 +924,7 @@ impl<'a> BeMessage<'a> {
                buf.put_u8(b'd');
                write_body(buf, |buf| {
                    buf.put_u8(b'k');
-                    buf.put_u64(req.wal_end);
+                    buf.put_u64(req.sent_ptr);
                    buf.put_i64(req.timestamp);
                    buf.put_u8(u8::from(req.request_reply));
                });
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -12,7 +12,6 @@ aws-smithy-http.workspace = true
 aws-types.workspace = true
 aws-config.workspace = true
 aws-sdk-s3.workspace = true
-aws-credential-types.workspace = true
 hyper = { workspace = true, features = ["stream"] }
 serde.workspace = true
 serde_json.workspace = true
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -128,15 +128,6 @@ impl RemoteStorage for LocalFs {
        // We need this dance with sort of durable rename (without fsyncs)
        // to prevent partial uploads. This was really hit when pageserver shutdown
        // cancelled the upload and partial file was left on the fs
-        // NOTE: Because temp file suffix always the same this operation is racy.
-        // Two concurrent operations can lead to the following sequence:
-        // T1: write(temp)
-        // T2: write(temp) -> overwrites the content
-        // T1: rename(temp, dst) -> succeeds
-        // T2: rename(temp, dst) -> fails, temp no longet exists
-        // This can be solved by supplying unique temp suffix every time, but this situation
-        // is not normal in the first place, the error can help (and helped at least once)
-        // to discover bugs in upper level synchronization.
        let temp_file_path =
            path_with_suffix_extension(&target_file_path, LOCAL_FS_TEMP_FILE_SUFFIX);
        let mut destination = io::BufWriter::new(
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -9,15 +9,14 @@ use std::sync::Arc;
 use anyhow::Context;
 use aws_config::{
    environment::credentials::EnvironmentVariableCredentialsProvider,
-    imds::credentials::ImdsCredentialsProvider, meta::credentials::CredentialsProviderChain,
+    imds::credentials::ImdsCredentialsProvider,
+    meta::credentials::{CredentialsProviderChain, LazyCachingCredentialsProvider},
 };
-use aws_credential_types::cache::CredentialsCache;
 use aws_sdk_s3::{
-    config::{Config, Region},
-    error::SdkError,
-    operation::get_object::GetObjectError,
-    primitives::ByteStream,
-    Client,
+    config::Config,
+    error::{GetObjectError, GetObjectErrorKind},
+    types::{ByteStream, SdkError},
+    Client, Endpoint, Region,
 };
 use aws_smithy_http::body::SdkBody;
 use hyper::Body;
@@ -126,23 +125,28 @@ impl S3Bucket {

        let credentials_provider = {
            // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
-            CredentialsProviderChain::first_try(
-                "env",
-                EnvironmentVariableCredentialsProvider::new(),
-            )
+            let env_creds = EnvironmentVariableCredentialsProvider::new();
            // uses imds v2
-            .or_else("imds", ImdsCredentialsProvider::builder().build())
+            let imds = ImdsCredentialsProvider::builder().build();
+
+            // finally add caching.
+            // this might change in future, see https://github.com/awslabs/aws-sdk-rust/issues/629
+            LazyCachingCredentialsProvider::builder()
+                .load(CredentialsProviderChain::first_try("env", env_creds).or_else("imds", imds))
+                .build()
        };

        let mut config_builder = Config::builder()
            .region(Region::new(aws_config.bucket_region.clone()))
-            .credentials_cache(CredentialsCache::lazy())
            .credentials_provider(credentials_provider);

        if let Some(custom_endpoint) = aws_config.endpoint.clone() {
-            config_builder = config_builder
-                .endpoint_url(custom_endpoint)
-                .force_path_style(true);
+            let endpoint = Endpoint::immutable(
+                custom_endpoint
+                    .parse()
+                    .expect("Failed to parse S3 custom endpoint"),
+            );
+            config_builder.set_endpoint_resolver(Some(Arc::new(endpoint)));
        }
        let client = Client::from_conf(config_builder.build());

@@ -225,9 +229,14 @@ impl S3Bucket {
                    ))),
                })
            }
-            Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {
-                Err(DownloadError::NotFound)
-            }
+            Err(SdkError::ServiceError {
+                err:
+                    GetObjectError {
+                        kind: GetObjectErrorKind::NoSuchKey(..),
+                        ..
+                    },
+                ..
+            }) => Err(DownloadError::NotFound),
            Err(e) => {
                metrics::inc_get_object_fail();
                Err(DownloadError::Other(anyhow::anyhow!(
--- a/libs/utils/scripts/restore_from_wal.sh
+++ b/libs/utils/scripts/restore_from_wal.sh
@@ -1,21 +1,21 @@
 #!/bin/bash
-
-set -euxo pipefail
-
 PG_BIN=$1
 WAL_PATH=$2
 DATA_DIR=$3
 PORT=$4
-SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-)
-rm -fr "$DATA_DIR"
-env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID"
-echo port="$PORT" >> "$DATA_DIR"/postgresql.conf
-REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-)
+SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
+rm -fr $DATA_DIR
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
+echo port=$PORT >> $DATA_DIR/postgresql.conf
+REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
 declare -i WAL_SIZE=$REDO_POS+114
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile start
-"$PG_BIN"/pg_ctl -D "$DATA_DIR" -l logfile stop -m immediate
-cp "$DATA_DIR"/pg_wal/000000010000000000000001 .
-cp "$WAL_PATH"/* "$DATA_DIR"/pg_wal/
-for partial in "$DATA_DIR"/pg_wal/*.partial ; do mv "$partial" "${partial%.partial}" ; done
-dd if=000000010000000000000001 of="$DATA_DIR"/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+$PG_BIN/pg_ctl -D $DATA_DIR -l logfile start
+$PG_BIN/pg_ctl -D $DATA_DIR -l logfile stop -m immediate
+cp $DATA_DIR/pg_wal/000000010000000000000001 .
+cp $WAL_PATH/* $DATA_DIR/pg_wal/
+if [ -f $DATA_DIR/pg_wal/*.partial ]
+then
+	(cd $DATA_DIR/pg_wal ; for partial in \*.partial ; do mv $partial `basename $partial .partial` ; done)
+fi
+dd if=000000010000000000000001 of=$DATA_DIR/pg_wal/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
 rm -f 000000010000000000000001
--- a/libs/utils/scripts/restore_from_wal_archive.sh
+++ b/libs/utils/scripts/restore_from_wal_archive.sh
@@ -0,0 +1,20 @@
+PG_BIN=$1
+WAL_PATH=$2
+DATA_DIR=$3
+PORT=$4
+SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
+rm -fr $DATA_DIR /tmp/pg_wals
+mkdir /tmp/pg_wals
+env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
+echo port=$PORT >> $DATA_DIR/postgresql.conf
+REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
+declare -i WAL_SIZE=$REDO_POS+114
+cp $WAL_PATH/* /tmp/pg_wals
+if [ -f $DATA_DIR/pg_wal/*.partial ]
+then
+	(cd /tmp/pg_wals ; for partial in \*.partial ; do  mv $partial `basename $partial .partial` ; done)
+fi
+dd if=$DATA_DIR/pg_wal/000000010000000000000001 of=/tmp/pg_wals/000000010000000000000001 bs=$WAL_SIZE count=1 conv=notrunc
+echo > $DATA_DIR/recovery.signal
+rm -f $DATA_DIR/pg_wal/*
+echo "restore_command = 'cp /tmp/pg_wals/%f %p'" >> $DATA_DIR/postgresql.conf
--- a/libs/utils/src/completion.rs
+++ b/libs/utils/src/completion.rs
@@ -1,33 +0,0 @@
-use std::sync::Arc;
-
-use tokio::sync::{mpsc, Mutex};
-
-/// While a reference is kept around, the associated [`Barrier::wait`] will wait.
-///
-/// Can be cloned, moved and kept around in futures as "guard objects".
-#[derive(Clone)]
-pub struct Completion(mpsc::Sender<()>);
-
-/// Barrier will wait until all clones of [`Completion`] have been dropped.
-#[derive(Clone)]
-pub struct Barrier(Arc<Mutex<mpsc::Receiver<()>>>);
-
-impl Barrier {
-    pub async fn wait(self) {
-        self.0.lock().await.recv().await;
-    }
-
-    pub async fn maybe_wait(barrier: Option<Barrier>) {
-        if let Some(b) = barrier {
-            b.wait().await
-        }
-    }
-}
-
-/// Create new Guard and Barrier pair.
-pub fn channel() -> (Completion, Barrier) {
-    let (tx, rx) = mpsc::channel::<()>(1);
-    let rx = Mutex::new(rx);
-    let rx = Arc::new(rx);
-    (Completion(tx), Barrier(rx))
-}
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,5 +1,5 @@
 use crate::auth::{Claims, JwtAuth};
-use crate::http::error::{api_error_handler, route_error_handler, ApiError};
+use crate::http::error;
 use anyhow::{anyhow, Context};
 use hyper::header::{HeaderName, AUTHORIZATION};
 use hyper::http::HeaderValue;
@@ -16,6 +16,8 @@ use std::future::Future;
 use std::net::TcpListener;
 use std::str::FromStr;

+use super::error::ApiError;
+
 static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
    register_int_counter!(
        "libmetrics_metric_handler_requests_total",
@@ -33,12 +35,8 @@ struct RequestId(String);
 /// Adds a tracing info_span! instrumentation around the handler events,
 /// logs the request start and end events for non-GET requests and non-200 responses.
 ///
-/// Usage: Replace `my_handler` with `|r| request_span(r, my_handler)`
-///
 /// Use this to distinguish between logs of different HTTP requests: every request handler wrapped
-/// with this will get request info logged in the wrapping span, including the unique request ID.
-///
-/// This also handles errors, logging them and converting them to an HTTP error response.
+/// in this type will get request info logged in the wrapping span, including the unique request ID.
 ///
 /// There could be other ways to implement similar functionality:
 ///
@@ -56,56 +54,60 @@ struct RequestId(String);
 /// tries to achive with its `.instrument` used in the current approach.
 ///
 /// If needed, a declarative macro to substitute the |r| ... closure boilerplate could be introduced.
-pub async fn request_span<R, H>(request: Request<Body>, handler: H) -> R::Output
+pub struct RequestSpan<E, R, H>(pub H)
 where
-    R: Future<Output = Result<Response<Body>, ApiError>> + Send + 'static,
-    H: FnOnce(Request<Body>) -> R + Send + Sync + 'static,
+    E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
+    R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
+    H: Fn(Request<Body>) -> R + Send + Sync + 'static;
+
+impl<E, R, H> RequestSpan<E, R, H>
+where
+    E: Into<Box<dyn std::error::Error + Send + Sync>> + 'static,
+    R: Future<Output = Result<Response<Body>, E>> + Send + 'static,
+    H: Fn(Request<Body>) -> R + Send + Sync + 'static,
 {
-    let request_id = request.context::<RequestId>().unwrap_or_default().0;
-    let method = request.method();
-    let path = request.uri().path();
-    let request_span = info_span!("request", %method, %path, %request_id);
+    /// Creates a tracing span around inner request handler and executes the request handler in the contex of that span.
+    /// Use as `|r| RequestSpan(my_handler).handle(r)` instead of `my_handler` as the request handler to get the span enabled.
+    pub async fn handle(self, request: Request<Body>) -> Result<Response<Body>, E> {
+        let request_id = request.context::<RequestId>().unwrap_or_default().0;
+        let method = request.method();
+        let path = request.uri().path();
+        let request_span = info_span!("request", %method, %path, %request_id);

-    let log_quietly = method == Method::GET;
-    async move {
-        let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
-        if log_quietly {
-            debug!("Handling request");
-        } else {
-            info!("Handling request");
-        }
-
-        // No special handling for panics here. There's a `tracing_panic_hook` from another
-        // module to do that globally.
-        let res = handler(request).await;
-
-        cancellation_guard.disarm();
-
-        // Log the result if needed.
-        //
-        // We also convert any errors into an Ok response with HTTP error code here.
-        // `make_router` sets a last-resort error handler that would do the same, but
-        // we prefer to do it here, before we exit the request span, so that the error
-        // is still logged with the span.
-        //
-        // (Because we convert errors to Ok response, we never actually return an error,
-        // and we could declare the function to return the never type (`!`). However,
-        // using `routerify::RouterBuilder` requires a proper error type.)
-        match res {
-            Ok(response) => {
-                let response_status = response.status();
-                if log_quietly && response_status.is_success() {
-                    debug!("Request handled, status: {response_status}");
-                } else {
-                    info!("Request handled, status: {response_status}");
-                }
-                Ok(response)
+        let log_quietly = method == Method::GET;
+        async move {
+            let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
+            if log_quietly {
+                debug!("Handling request");
+            } else {
+                info!("Handling request");
+            }
+
+            // Note that we reuse `error::handler` here and not returning and error at all,
+            // yet cannot use `!` directly in the method signature due to `routerify::RouterBuilder` limitation.
+            // Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call.
+            //
+            // Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally.
+            let res = (self.0)(request).await;
+
+            cancellation_guard.disarm();
+
+            match res {
+                Ok(response) => {
+                    let response_status = response.status();
+                    if log_quietly && response_status.is_success() {
+                        debug!("Request handled, status: {response_status}");
+                    } else {
+                        info!("Request handled, status: {response_status}");
+                    }
+                    Ok(response)
+                }
+                Err(e) => Ok(error::handler(e.into()).await),
            }
-            Err(err) => Ok(api_error_handler(err)),
        }
+        .instrument(request_span)
+        .await
    }
-    .instrument(request_span)
-    .await
 }

 /// Drop guard to WARN in case the request was dropped before completion.
@@ -129,9 +131,7 @@ impl RequestCancelled {

 impl Drop for RequestCancelled {
    fn drop(&mut self) {
-        if std::thread::panicking() {
-            // we are unwinding due to panicking, assume we are not dropped for cancellation
-        } else if let Some(span) = self.warn.take() {
+        if let Some(span) = self.warn.take() {
            // the span has all of the info already, but the outer `.instrument(span)` has already
            // been dropped, so we need to manually re-enter it for this message.
            //
@@ -205,8 +205,10 @@ pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
        .middleware(Middleware::post_with_info(
            add_request_id_header_to_response,
        ))
-        .get("/metrics", |r| request_span(r, prometheus_metrics_handler))
-        .err_handler(route_error_handler)
+        .get("/metrics", |r| {
+            RequestSpan(prometheus_metrics_handler).handle(r)
+        })
+        .err_handler(error::handler)
 }

 pub fn attach_openapi_ui(
@@ -216,14 +218,12 @@ pub fn attach_openapi_ui(
    ui_mount_path: &'static str,
 ) -> RouterBuilder<hyper::Body, ApiError> {
    router_builder
-        .get(spec_mount_path,
-            move |r| request_span(r, move |_| async move {
-                Ok(Response::builder().body(Body::from(spec)).unwrap())
-            })
-        )
-        .get(ui_mount_path,
-             move |r| request_span(r, move |_| async move {
-                 Ok(Response::builder().body(Body::from(format!(r#"
+        .get(spec_mount_path, move |r| {
+            RequestSpan(move |_| async move { Ok(Response::builder().body(Body::from(spec)).unwrap()) })
+                .handle(r)
+        })
+        .get(ui_mount_path, move |r| RequestSpan( move |_| async move {
+            Ok(Response::builder().body(Body::from(format!(r#"
                <!DOCTYPE html>
                <html lang="en">
                <head>
@@ -253,8 +253,7 @@ pub fn attach_openapi_ui(
                </body>
                </html>
            "#, spec_mount_path))).unwrap())
-             })
-        )
+        }).handle(r))
 }

 fn parse_token(header_value: &str) -> Result<&str, ApiError> {
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -83,24 +83,13 @@ impl HttpErrorBody {
    }
 }

-pub async fn route_error_handler(err: routerify::RouteError) -> Response<Body> {
-    match err.downcast::<ApiError>() {
-        Ok(api_error) => api_error_handler(*api_error),
-        Err(other_error) => {
-            // We expect all the request handlers to return an ApiError, so this should
-            // not be reached. But just in case.
-            error!("Error processing HTTP request: {other_error:?}");
-            HttpErrorBody::response_from_msg_and_status(
-                other_error.to_string(),
-                StatusCode::INTERNAL_SERVER_ERROR,
-            )
-        }
-    }
-}
+pub async fn handler(err: routerify::RouteError) -> Response<Body> {
+    let api_error = err
+        .downcast::<ApiError>()
+        .expect("handler should always return api error");

-pub fn api_error_handler(api_error: ApiError) -> Response<Body> {
    // Print a stack trace for Internal Server errors
-    if let ApiError::InternalServerError(_) = api_error {
+    if let ApiError::InternalServerError(_) = api_error.as_ref() {
        error!("Error processing HTTP request: {api_error:?}");
    } else {
        error!("Error processing HTTP request: {api_error:#}");
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -8,26 +8,12 @@ use super::error::ApiError;
 pub async fn json_request<T: for<'de> Deserialize<'de>>(
    request: &mut Request<Body>,
 ) -> Result<T, ApiError> {
-    json_request_or_empty_body(request)
-        .await?
-        .context("missing request body")
-        .map_err(ApiError::BadRequest)
-}
-
-/// Will be removed as part of https://github.com/neondatabase/neon/issues/4282
-pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
-    request: &mut Request<Body>,
-) -> Result<Option<T>, ApiError> {
-    let body = hyper::body::aggregate(request.body_mut())
+    let whole_body = hyper::body::aggregate(request.body_mut())
        .await
        .context("Failed to read request body")
        .map_err(ApiError::BadRequest)?;
-    if body.remaining() == 0 {
-        return Ok(None);
-    }
-    serde_json::from_reader(body.reader())
+    serde_json::from_reader(whole_body.reader())
        .context("Failed to parse json request")
-        .map(Some)
        .map_err(ApiError::BadRequest)
 }

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -58,48 +58,24 @@ pub mod pageserver_feedback;

 pub mod tracing_span_assert;

-pub mod rate_limit;
-
-/// Simple once-barrier and a guard which keeps barrier awaiting.
-pub mod completion;
-
-mod failpoint_macro_helpers {
-
-    /// use with fail::cfg("$name", "return(2000)")
-    ///
-    /// The effect is similar to a "sleep(2000)" action, i.e. we sleep for the
-    /// specified time (in milliseconds). The main difference is that we use async
-    /// tokio sleep function. Another difference is that we print lines to the log,
-    /// which can be useful in tests to check that the failpoint was hit.
-    #[macro_export]
-    macro_rules! failpoint_sleep_millis_async {
-        ($name:literal) => {{
-            // If the failpoint is used with a "return" action, set should_sleep to the
-            // returned value (as string). Otherwise it's set to None.
-            let should_sleep = (|| {
-                ::fail::fail_point!($name, |x| x);
-                ::std::option::Option::None
-            })();
-
-            // Sleep if the action was a returned value
-            if let ::std::option::Option::Some(duration_str) = should_sleep {
-                $crate::failpoint_sleep_helper($name, duration_str).await
-            }
-        }};
-    }
-
-    // Helper function used by the macro. (A function has nicer scoping so we
-    // don't need to decorate everything with "::")
-    pub async fn failpoint_sleep_helper(name: &'static str, duration_str: String) {
-        let millis = duration_str.parse::<u64>().unwrap();
-        let d = std::time::Duration::from_millis(millis);
-
-        tracing::info!("failpoint {:?}: sleeping for {:?}", name, d);
-        tokio::time::sleep(d).await;
-        tracing::info!("failpoint {:?}: sleep done", name);
-    }
+/// use with fail::cfg("$name", "return(2000)")
+#[macro_export]
+macro_rules! failpoint_sleep_millis_async {
+    ($name:literal) => {{
+        let should_sleep: Option<std::time::Duration> = (|| {
+            fail::fail_point!($name, |v: Option<_>| {
+                let millis = v.unwrap().parse::<u64>().unwrap();
+                Some(Duration::from_millis(millis))
+            });
+            None
+        })();
+        if let Some(d) = should_sleep {
+            tracing::info!("failpoint {:?}: sleeping for {:?}", $name, d);
+            tokio::time::sleep(d).await;
+            tracing::info!("failpoint {:?}: sleep done", $name);
+        }
+    }};
 }
-pub use failpoint_macro_helpers::failpoint_sleep_helper;

 /// This is a shortcut to embed git sha into binaries and avoid copying the same build script to all packages
 ///
--- a/libs/utils/src/rate_limit.rs
+++ b/libs/utils/src/rate_limit.rs
@@ -1,66 +0,0 @@
-//! A helper to rate limit operations.
-
-use std::time::{Duration, Instant};
-
-pub struct RateLimit {
-    last: Option<Instant>,
-    interval: Duration,
-}
-
-impl RateLimit {
-    pub fn new(interval: Duration) -> Self {
-        Self {
-            last: None,
-            interval,
-        }
-    }
-
-    /// Call `f` if the rate limit allows.
-    /// Don't call it otherwise.
-    pub fn call<F: FnOnce()>(&mut self, f: F) {
-        let now = Instant::now();
-        match self.last {
-            Some(last) if now - last <= self.interval => {
-                // ratelimit
-            }
-            _ => {
-                self.last = Some(now);
-                f();
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::sync::atomic::AtomicUsize;
-
-    #[test]
-    fn basics() {
-        use super::RateLimit;
-        use std::sync::atomic::Ordering::Relaxed;
-        use std::time::Duration;
-
-        let called = AtomicUsize::new(0);
-        let mut f = RateLimit::new(Duration::from_millis(100));
-
-        let cl = || {
-            called.fetch_add(1, Relaxed);
-        };
-
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 1);
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 1);
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 1);
-        std::thread::sleep(Duration::from_millis(100));
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 2);
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 2);
-        std::thread::sleep(Duration::from_millis(100));
-        f.call(cl);
-        assert_eq!(called.load(Relaxed), 3);
-    }
-}
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -144,8 +144,6 @@ where
    ///
    /// This call won't complete until someone has called `advance`
    /// with a number greater than or equal to the one we're waiting for.
-    ///
-    /// This function is async cancellation-safe.
    pub async fn wait_for(&self, num: V) -> Result<(), SeqWaitError> {
        match self.queue_for_wait(num) {
            Ok(None) => Ok(()),
@@ -161,8 +159,6 @@ where
    ///
    /// If that hasn't happened after the specified timeout duration,
    /// [`SeqWaitError::Timeout`] will be returned.
-    ///
-    /// This function is async cancellation-safe.
    pub async fn wait_for_timeout(
        &self,
        num: V,
--- a/pageserver/ctl/Cargo.toml
+++ b/pageserver/ctl/Cargo.toml
@@ -1,18 +0,0 @@
-[package]
-name = "pagectl"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
-[dependencies]
-anyhow.workspace = true
-bytes.workspace = true
-clap = { workspace = true, features = ["string"] }
-git-version.workspace = true
-pageserver = { path = ".." }
-postgres_ffi.workspace = true
-utils.workspace = true
-svg_fmt.workspace = true
-workspace_hack.workspace = true
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -1,169 +0,0 @@
-use std::path::{Path, PathBuf};
-
-use anyhow::Result;
-use clap::Subcommand;
-use pageserver::tenant::block_io::BlockCursor;
-use pageserver::tenant::disk_btree::DiskBtreeReader;
-use pageserver::tenant::storage_layer::delta_layer::{BlobRef, Summary};
-use pageserver::{page_cache, virtual_file};
-use pageserver::{
-    repository::{Key, KEY_SIZE},
-    tenant::{
-        block_io::FileBlockReader, disk_btree::VisitDirection,
-        storage_layer::delta_layer::DELTA_KEY_SIZE,
-    },
-    virtual_file::VirtualFile,
-};
-use std::fs;
-use utils::bin_ser::BeSer;
-
-use crate::layer_map_analyzer::parse_filename;
-
-#[derive(Subcommand)]
-pub(crate) enum LayerCmd {
-    /// List all tenants and timelines under the pageserver path
-    ///
-    /// Example: `cargo run --bin pagectl layer list .neon/`
-    List { path: PathBuf },
-    /// List all layers of a given tenant and timeline
-    ///
-    /// Example: `cargo run --bin pagectl layer list .neon/`
-    ListLayer {
-        path: PathBuf,
-        tenant: String,
-        timeline: String,
-    },
-    /// Dump all information of a layer file
-    DumpLayer {
-        path: PathBuf,
-        tenant: String,
-        timeline: String,
-        /// The id from list-layer command
-        id: usize,
-    },
-}
-
-fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
-    use pageserver::tenant::blob_io::BlobCursor;
-    use pageserver::tenant::block_io::BlockReader;
-
-    let path = path.as_ref();
-    virtual_file::init(10);
-    page_cache::init(100);
-    let file = FileBlockReader::new(VirtualFile::open(path)?);
-    let summary_blk = file.read_blk(0)?;
-    let actual_summary = Summary::des_prefix(summary_blk.as_ref())?;
-    let tree_reader = DiskBtreeReader::<_, DELTA_KEY_SIZE>::new(
-        actual_summary.index_start_blk,
-        actual_summary.index_root_blk,
-        &file,
-    );
-    // TODO(chi): dedup w/ `delta_layer.rs` by exposing the API.
-    let mut all = vec![];
-    tree_reader.visit(
-        &[0u8; DELTA_KEY_SIZE],
-        VisitDirection::Forwards,
-        |key, value_offset| {
-            let curr = Key::from_slice(&key[..KEY_SIZE]);
-            all.push((curr, BlobRef(value_offset)));
-            true
-        },
-    )?;
-    let mut cursor = BlockCursor::new(&file);
-    for (k, v) in all {
-        let value = cursor.read_blob(v.pos())?;
-        println!("key:{} value_len:{}", k, value.len());
-    }
-    // TODO(chi): special handling for last key?
-    Ok(())
-}
-
-pub(crate) fn main(cmd: &LayerCmd) -> Result<()> {
-    match cmd {
-        LayerCmd::List { path } => {
-            for tenant in fs::read_dir(path.join("tenants"))? {
-                let tenant = tenant?;
-                if !tenant.file_type()?.is_dir() {
-                    continue;
-                }
-                println!("tenant {}", tenant.file_name().to_string_lossy());
-                for timeline in fs::read_dir(tenant.path().join("timelines"))? {
-                    let timeline = timeline?;
-                    if !timeline.file_type()?.is_dir() {
-                        continue;
-                    }
-                    println!("- timeline {}", timeline.file_name().to_string_lossy());
-                }
-            }
-        }
-        LayerCmd::ListLayer {
-            path,
-            tenant,
-            timeline,
-        } => {
-            let timeline_path = path
-                .join("tenants")
-                .join(tenant)
-                .join("timelines")
-                .join(timeline);
-            let mut idx = 0;
-            for layer in fs::read_dir(timeline_path)? {
-                let layer = layer?;
-                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
-                {
-                    println!(
-                        "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
-                        idx,
-                        layer_file.key_range.start,
-                        layer_file.key_range.end,
-                        layer_file.lsn_range.start,
-                        layer_file.lsn_range.end,
-                        layer_file.is_delta,
-                    );
-                    idx += 1;
-                }
-            }
-        }
-        LayerCmd::DumpLayer {
-            path,
-            tenant,
-            timeline,
-            id,
-        } => {
-            let timeline_path = path
-                .join("tenants")
-                .join(tenant)
-                .join("timelines")
-                .join(timeline);
-            let mut idx = 0;
-            for layer in fs::read_dir(timeline_path)? {
-                let layer = layer?;
-                if let Some(layer_file) = parse_filename(&layer.file_name().into_string().unwrap())
-                {
-                    if *id == idx {
-                        // TODO(chi): dedup code
-                        println!(
-                            "[{:3}]  key:{}-{}\n       lsn:{}-{}\n       delta:{}",
-                            idx,
-                            layer_file.key_range.start,
-                            layer_file.key_range.end,
-                            layer_file.lsn_range.start,
-                            layer_file.lsn_range.end,
-                            layer_file.is_delta,
-                        );
-
-                        if layer_file.is_delta {
-                            read_delta_file(layer.path())?;
-                        } else {
-                            anyhow::bail!("not supported yet :(");
-                        }
-
-                        break;
-                    }
-                    idx += 1;
-                }
-            }
-        }
-    }
-    Ok(())
-}
--- a/pageserver/ctl/src/main.rs
+++ b/pageserver/ctl/src/main.rs
@@ -1,179 +0,0 @@
-//! A helper tool to manage pageserver binary files.
-//! Accepts a file as an argument, attempts to parse it with all ways possible
-//! and prints its interpreted context.
-//!
-//! Separate, `metadata` subcommand allows to print and update pageserver's metadata file.
-
-mod draw_timeline_dir;
-mod layer_map_analyzer;
-mod layers;
-
-use clap::{Parser, Subcommand};
-use layers::LayerCmd;
-use pageserver::{
-    context::{DownloadBehavior, RequestContext},
-    page_cache,
-    task_mgr::TaskKind,
-    tenant::{dump_layerfile_from_path, metadata::TimelineMetadata},
-    virtual_file,
-};
-use postgres_ffi::ControlFileData;
-use std::path::{Path, PathBuf};
-use utils::{lsn::Lsn, project_git_version};
-
-project_git_version!(GIT_VERSION);
-
-#[derive(Parser)]
-#[command(
-    version = GIT_VERSION,
-    about = "Neon Pageserver binutils",
-    long_about = "Reads pageserver (and related) binary files management utility"
-)]
-#[command(propagate_version = true)]
-struct CliOpts {
-    #[command(subcommand)]
-    command: Commands,
-}
-
-#[derive(Subcommand)]
-enum Commands {
-    Metadata(MetadataCmd),
-    PrintLayerFile(PrintLayerFileCmd),
-    DrawTimeline {},
-    AnalyzeLayerMap(AnalyzeLayerMapCmd),
-    #[command(subcommand)]
-    Layer(LayerCmd),
-}
-
-/// Read and update pageserver metadata file
-#[derive(Parser)]
-struct MetadataCmd {
-    /// Input metadata file path
-    metadata_path: PathBuf,
-    /// Replace disk consistent Lsn
-    disk_consistent_lsn: Option<Lsn>,
-    /// Replace previous record Lsn
-    prev_record_lsn: Option<Lsn>,
-    /// Replace latest gc cuttoff
-    latest_gc_cuttoff: Option<Lsn>,
-}
-
-#[derive(Parser)]
-struct PrintLayerFileCmd {
-    /// Pageserver data path
-    path: PathBuf,
-}
-
-#[derive(Parser)]
-struct AnalyzeLayerMapCmd {
-    /// Pageserver data path
-    path: PathBuf,
-    /// Max holes
-    max_holes: Option<usize>,
-}
-
-fn main() -> anyhow::Result<()> {
-    let cli = CliOpts::parse();
-
-    match cli.command {
-        Commands::Layer(cmd) => {
-            layers::main(&cmd)?;
-        }
-        Commands::Metadata(cmd) => {
-            handle_metadata(&cmd)?;
-        }
-        Commands::DrawTimeline {} => {
-            draw_timeline_dir::main()?;
-        }
-        Commands::AnalyzeLayerMap(cmd) => {
-            layer_map_analyzer::main(&cmd)?;
-        }
-        Commands::PrintLayerFile(cmd) => {
-            if let Err(e) = read_pg_control_file(&cmd.path) {
-                println!(
-                    "Failed to read input file as a pg control one: {e:#}\n\
-                    Attempting to read it as layer file"
-                );
-                print_layerfile(&cmd.path)?;
-            }
-        }
-    };
-    Ok(())
-}
-
-fn read_pg_control_file(control_file_path: &Path) -> anyhow::Result<()> {
-    let control_file = ControlFileData::decode(&std::fs::read(control_file_path)?)?;
-    println!("{control_file:?}");
-    let control_file_initdb = Lsn(control_file.checkPoint);
-    println!(
-        "pg_initdb_lsn: {}, aligned: {}",
-        control_file_initdb,
-        control_file_initdb.align()
-    );
-    Ok(())
-}
-
-fn print_layerfile(path: &Path) -> anyhow::Result<()> {
-    // Basic initialization of things that don't change after startup
-    virtual_file::init(10);
-    page_cache::init(100);
-    let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error);
-    dump_layerfile_from_path(path, true, &ctx)
-}
-
-fn handle_metadata(
-    MetadataCmd {
-        metadata_path: path,
-        disk_consistent_lsn,
-        prev_record_lsn,
-        latest_gc_cuttoff,
-    }: &MetadataCmd,
-) -> Result<(), anyhow::Error> {
-    let metadata_bytes = std::fs::read(path)?;
-    let mut meta = TimelineMetadata::from_bytes(&metadata_bytes)?;
-    println!("Current metadata:\n{meta:?}");
-    let mut update_meta = false;
-    if let Some(disk_consistent_lsn) = disk_consistent_lsn {
-        meta = TimelineMetadata::new(
-            *disk_consistent_lsn,
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-    if let Some(prev_record_lsn) = prev_record_lsn {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            Some(*prev_record_lsn),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            meta.latest_gc_cutoff_lsn(),
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-    if let Some(latest_gc_cuttoff) = latest_gc_cuttoff {
-        meta = TimelineMetadata::new(
-            meta.disk_consistent_lsn(),
-            meta.prev_record_lsn(),
-            meta.ancestor_timeline(),
-            meta.ancestor_lsn(),
-            *latest_gc_cuttoff,
-            meta.initdb_lsn(),
-            meta.pg_version(),
-        );
-        update_meta = true;
-    }
-
-    if update_meta {
-        let metadata_bytes = meta.to_bytes()?;
-        std::fs::write(path, metadata_bytes)?;
-    }
-
-    Ok(())
-}
--- a/pageserver/src/bin/draw_timeline_dir.rs
+++ b/pageserver/src/bin/draw_timeline_dir.rs
@@ -12,7 +12,7 @@
 //! Example use:
 //! ```
 //! $ ls test_output/test_pgbench\[neon-45-684\]/repo/tenants/$TENANT/timelines/$TIMELINE | \
-//! $   grep "__" | cargo run --release --bin pagectl draw-timeline-dir > out.svg
+//! $   grep "__" | cargo run --release --bin draw_timeline_dir > out.svg
 //! $ firefox out.svg
 //! ```
 //!
@@ -62,7 +62,7 @@ fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
    (keys, lsns)
 }

-pub fn main() -> Result<()> {
+fn main() -> Result<()> {
    // Parse layer filenames from stdin
    let mut ranges: Vec<(Range<Key>, Range<Lsn>)> = vec![];
    let stdin = io::stdin();
--- a/pageserver/src/bin/layer_map_analyzer.rs
+++ b/pageserver/src/bin/layer_map_analyzer.rs
@@ -6,7 +6,7 @@ use anyhow::Result;
 use std::cmp::Ordering;
 use std::collections::BinaryHeap;
 use std::ops::Range;
-use std::{fs, path::Path, str};
+use std::{env, fs, path::Path, path::PathBuf, str, str::FromStr};

 use pageserver::page_cache::PAGE_SZ;
 use pageserver::repository::{Key, KEY_SIZE};
@@ -18,14 +18,12 @@ use pageserver::virtual_file::VirtualFile;

 use utils::{bin_ser::BeSer, lsn::Lsn};

-use crate::AnalyzeLayerMapCmd;
-
 const MIN_HOLE_LENGTH: i128 = (128 * 1024 * 1024 / PAGE_SZ) as i128;
 const DEFAULT_MAX_HOLES: usize = 10;

 /// Wrapper for key range to provide reverse ordering by range length for BinaryHeap
 #[derive(PartialEq, Eq)]
-pub struct Hole(Range<Key>);
+struct Hole(Range<Key>);

 impl Ord for Hole {
    fn cmp(&self, other: &Self) -> Ordering {
@@ -41,11 +39,11 @@ impl PartialOrd for Hole {
    }
 }

-pub(crate) struct LayerFile {
-    pub key_range: Range<Key>,
-    pub lsn_range: Range<Lsn>,
-    pub is_delta: bool,
-    pub holes: Vec<Hole>,
+struct LayerFile {
+    key_range: Range<Key>,
+    lsn_range: Range<Lsn>,
+    is_delta: bool,
+    holes: Vec<Hole>,
 }

 impl LayerFile {
@@ -69,7 +67,7 @@ impl LayerFile {
    }
 }

-pub(crate) fn parse_filename(name: &str) -> Option<LayerFile> {
+fn parse_filename(name: &str) -> Option<LayerFile> {
    let split: Vec<&str> = name.split("__").collect();
    if split.len() != 2 {
        return None;
@@ -129,9 +127,18 @@ fn get_holes(path: &Path, max_holes: usize) -> Result<Vec<Hole>> {
    Ok(holes)
 }

-pub(crate) fn main(cmd: &AnalyzeLayerMapCmd) -> Result<()> {
-    let storage_path = &cmd.path;
-    let max_holes = cmd.max_holes.unwrap_or(DEFAULT_MAX_HOLES);
+fn main() -> Result<()> {
+    let args: Vec<String> = env::args().collect();
+    if args.len() < 2 {
+        println!("Usage: layer_map_analyzer PAGESERVER_DATA_DIR [MAX_HOLES]");
+        return Ok(());
+    }
+    let storage_path = PathBuf::from_str(&args[1])?;
+    let max_holes = if args.len() > 2 {
+        args[2].parse::<usize>().unwrap()
+    } else {
+        DEFAULT_MAX_HOLES
+    };

    // Initialize virtual_file (file desriptor cache) and page cache which are needed to access layer persistent B-Tree.
    pageserver::virtual_file::init(10);
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -9,7 +9,6 @@ use clap::{Arg, ArgAction, Command};
 use fail::FailScenario;
 use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
-use pageserver::task_mgr::WALRECEIVER_RUNTIME;
 use remote_storage::GenericRemoteStorage;
 use tracing::*;

@@ -19,7 +18,9 @@ use pageserver::{
    context::{DownloadBehavior, RequestContext},
    http, page_cache, page_service, task_mgr,
    task_mgr::TaskKind,
-    task_mgr::{BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME},
+    task_mgr::{
+        BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
+    },
    tenant::mgr,
    virtual_file,
 };
@@ -275,18 +276,7 @@ fn start_pageserver(
    let pageserver_listener = tcp_listener::bind(pg_addr)?;

    // Launch broker client
-    // The storage_broker::connect call needs to happen inside a tokio runtime thread.
-    let broker_client = WALRECEIVER_RUNTIME
-        .block_on(async {
-            // Note: we do not attempt connecting here (but validate endpoints sanity).
-            storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)
-        })
-        .with_context(|| {
-            format!(
-                "create broker client for uri={:?} keepalive_interval={:?}",
-                &conf.broker_endpoint, conf.broker_keepalive_interval,
-            )
-        })?;
+    WALRECEIVER_RUNTIME.block_on(pageserver::broker_client::init_broker_client(conf))?;

    // Initialize authentication for incoming connections
    let http_auth;
@@ -335,33 +325,8 @@ fn start_pageserver(
    // Set up remote storage client
    let remote_storage = create_remote_storage_client(conf)?;

-    // All tenant load operations carry this while they are ongoing; it will be dropped once those
-    // operations finish either successfully or in some other manner. However, the initial load
-    // will be then done, and we can start the global background tasks.
-    let (init_done_tx, init_done_rx) = utils::completion::channel();
-
    // Scan the local 'tenants/' directory and start loading the tenants
-    let init_started_at = std::time::Instant::now();
-    BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(
-        conf,
-        broker_client.clone(),
-        remote_storage.clone(),
-        (init_done_tx, init_done_rx.clone()),
-    ))?;
-
-    BACKGROUND_RUNTIME.spawn({
-        let init_done_rx = init_done_rx.clone();
-        async move {
-            init_done_rx.wait().await;
-
-            let elapsed = init_started_at.elapsed();
-
-            tracing::info!(
-                elapsed_millis = elapsed.as_millis(),
-                "Initial load completed."
-            );
-        }
-    });
+    BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(conf, remote_storage.clone()))?;

    // shared state between the disk-usage backed eviction background task and the http endpoint
    // that allows triggering disk-usage based eviction manually. note that the http endpoint
@@ -374,7 +339,6 @@ fn start_pageserver(
            conf,
            remote_storage.clone(),
            disk_usage_eviction_state.clone(),
-            init_done_rx.clone(),
        )?;
    }

@@ -387,7 +351,6 @@ fn start_pageserver(
            conf,
            launch_ts,
            http_auth,
-            broker_client.clone(),
            remote_storage,
            disk_usage_eviction_state,
        )?
@@ -412,7 +375,6 @@ fn start_pageserver(
        );

        if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
-            let init_done_rx = init_done_rx;
            let metrics_ctx = RequestContext::todo_child(
                TaskKind::MetricsCollection,
                // This task itself shouldn't download anything.
@@ -428,13 +390,6 @@ fn start_pageserver(
                "consumption metrics collection",
                true,
                async move {
-                    // first wait for initial load to complete before first iteration.
-                    //
-                    // this is because we only process active tenants and timelines, and the
-                    // Timeline::get_current_logical_size will spawn the logical size calculation,
-                    // which will not be rate-limited.
-                    init_done_rx.wait().await;
-
                    pageserver::consumption_metrics::collect_metrics(
                        metric_collection_endpoint,
                        conf.metric_collection_interval,
@@ -472,7 +427,6 @@ fn start_pageserver(
            async move {
                page_service::libpq_listener_main(
                    conf,
-                    broker_client,
                    pg_auth,
                    pageserver_listener,
                    conf.pg_auth_type,
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`{{ pageserver_config \| sivel.toiletwater.to_toml }}`