Merge pull request #4803 from neondatabase/releases/2023-07-25

Release 2023-07-25
fix: count broken tenant more correct (#4800 )
2026-03-19 08:10:37 +00:00 · 2023-07-25 14:21:07 +03:00 · 2023-07-25 12:31:24 +03:00 · 2023-07-25 11:15:54 +03:00 · 2023-07-25 10:19:04 +03:00 · 2023-07-25 00:43:27 +03:00
391 changed files with 28605 additions and 12774 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -12,5 +12,11 @@ opt-level = 3
 # Turn on a small amount of optimization in Development mode.
 opt-level = 1

+[build]
+# This is only present for local builds, as it will be overridden
+# by the RUSTDOCFLAGS env var in CI.
+rustdocflags = ["-Arustdoc::private_intra_doc_links"]
+
 [alias]
 build_testing = ["build", "--features", "testing"]
+neon = ["run", "--bin", "neon_local"]
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -0,0 +1,186 @@
+name: 'Create Allure report'
+description: 'Generate Allure report from uploaded by actions/allure-report-store tests results'
+
+outputs:
+  report-url:
+    description: 'Allure report URL'
+    value: ${{ steps.generate-report.outputs.report-url }}
+  report-json-url:
+    description: 'Allure report JSON URL'
+    value: ${{ steps.generate-report.outputs.report-json-url }}
+
+runs:
+  using: "composite"
+
+  steps:
+    # We're using some of env variables quite offen, so let's set them once.
+    #
+    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
+    #
+    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
+    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
+    #
+    - name: Set variables
+      shell: bash -euxo pipefail {0}
+      run: |
+        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
+        if [ "${PR_NUMBER}" != "null" ]; then
+          BRANCH_OR_PR=pr-${PR_NUMBER}
+        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
+          # Shortcut for special branches
+          BRANCH_OR_PR=${GITHUB_REF_NAME}
+        else
+          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
+        fi
+
+        LOCK_FILE=reports/${BRANCH_OR_PR}/lock.txt
+
+        WORKDIR=/tmp/${BRANCH_OR_PR}-$(date +%s)
+        mkdir -p ${WORKDIR}
+
+        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
+        echo "LOCK_FILE=${LOCK_FILE}"       >> $GITHUB_ENV
+        echo "WORKDIR=${WORKDIR}"           >> $GITHUB_ENV
+        echo "BUCKET=${BUCKET}"             >> $GITHUB_ENV
+      env:
+        BUCKET: neon-github-public-dev
+
+    # TODO: We can replace with a special docker image with Java and Allure pre-installed
+    - uses: actions/setup-java@v3
+      with:
+        distribution: 'temurin'
+        java-version: '17'
+
+    - name: Install Allure
+      shell: bash -euxo pipefail {0}
+      run: |
+        if ! which allure; then
+          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
+          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
+          echo "${ALLURE_ZIP_SHA256} ${ALLURE_ZIP}" | sha256sum --check
+          unzip -q ${ALLURE_ZIP}
+          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
+          rm -f ${ALLURE_ZIP}
+        fi
+      env:
+        ALLURE_VERSION: 2.22.1
+        ALLURE_ZIP_SHA256: fdc7a62d94b14c5e0bf25198ae1feded6b005fdbed864b4d3cb4e5e901720b0b
+
+    # Potentially we could have several running build for the same key (for example, for the main branch), so we use improvised lock for this
+    - name: Acquire lock
+      shell: bash -euxo pipefail {0}
+      run: |
+        LOCK_TIMEOUT=300 # seconds
+
+        LOCK_CONTENT="${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+        echo ${LOCK_CONTENT} > ${WORKDIR}/lock.txt
+
+        # Do it up to 5 times to avoid race condition
+        for _ in $(seq 1 5); do
+          for i in $(seq 1 ${LOCK_TIMEOUT}); do
+            LOCK_ACQUIRED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
+            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
+            if [ -z "${LOCK_ACQUIRED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ACQUIRED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
+              break
+            fi
+            sleep 1
+          done
+
+          aws s3 mv --only-show-errors ${WORKDIR}/lock.txt "s3://${BUCKET}/${LOCK_FILE}"
+
+          # Double-check that exactly THIS run has acquired the lock
+          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
+          if [ "$(cat lock.txt)" = "${LOCK_CONTENT}" ]; then
+            break
+          fi
+        done
+
+    - name: Generate and publish final Allure report
+      id: generate-report
+      shell: bash -euxo pipefail {0}
+      run: |
+        REPORT_PREFIX=reports/${BRANCH_OR_PR}
+        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
+
+        # Get previously uploaded data for this run
+        ZSTD_NBTHREADS=0
+
+        S3_FILEPATHS=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/ | jq --raw-output '.Contents[]?.Key')
+        if [ -z "$S3_FILEPATHS" ]; then
+          # There's no previously uploaded data for this $GITHUB_RUN_ID
+          exit 0
+        fi
+        for S3_FILEPATH in ${S3_FILEPATHS}; do
+          time aws s3 cp --only-show-errors "s3://${BUCKET}/${S3_FILEPATH}" "${WORKDIR}"
+
+          archive=${WORKDIR}/$(basename $S3_FILEPATH)
+          mkdir -p ${archive%.tar.zst}
+          time tar -xf ${archive} -C ${archive%.tar.zst}
+          rm -f ${archive}
+        done
+
+        # Get history trend
+        time aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${WORKDIR}/latest/history" || true
+
+        # Generate report
+        time allure generate --clean --output ${WORKDIR}/report ${WORKDIR}/*
+
+        # Replace a logo link with a redirect to the latest version of the report
+        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html?nocache='"'+Date.now()+'"'" class=|g' ${WORKDIR}/report/app.js
+
+        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
+        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
+        time aws s3 mv --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
+
+        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
+
+        # Generate redirect
+        cat <<EOF > ${WORKDIR}/index.html
+          <!DOCTYPE html>
+
+          <meta charset="utf-8">
+          <title>Redirecting to ${REPORT_URL}</title>
+          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
+        EOF
+        time aws s3 cp --only-show-errors ${WORKDIR}/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
+
+        echo "report-url=${REPORT_URL}"                                   >> $GITHUB_OUTPUT
+        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
+
+        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
+
+    - name: Release lock
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
+
+        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" ]; then
+          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
+        fi
+
+    - name: Cleanup
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        if [ -d "${WORKDIR}" ]; then
+          rm -rf ${WORKDIR}
+        fi
+
+    - uses: actions/github-script@v6
+      if: always()
+      env:
+        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
+        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+      with:
+        script: |
+          const { REPORT_URL, COMMIT_SHA } = process.env
+
+          await github.rest.repos.createCommitStatus({
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            sha: `${COMMIT_SHA}`,
+            state: 'success',
+            target_url: `${REPORT_URL}`,
+            context: 'Allure report',
+          })
--- a/.github/actions/allure-report-store/action.yml
+++ b/.github/actions/allure-report-store/action.yml
@@ -0,0 +1,72 @@
+name: 'Store Allure results'
+description: 'Upload test results to be used by actions/allure-report-generate'
+
+inputs:
+  report-dir:
+    description: 'directory with test results generated by tests'
+    required: true
+  unique-key:
+    description: 'string to distinguish different results in the same run'
+    required: true
+
+runs:
+  using: "composite"
+
+  steps:
+    - name: Set variables
+      shell: bash -euxo pipefail {0}
+      run: |
+        PR_NUMBER=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
+        if [ "${PR_NUMBER}" != "null" ]; then
+          BRANCH_OR_PR=pr-${PR_NUMBER}
+        elif [ "${GITHUB_REF_NAME}" = "main" ] || [ "${GITHUB_REF_NAME}" = "release" ]; then
+          # Shortcut for special branches
+          BRANCH_OR_PR=${GITHUB_REF_NAME}
+        else
+          BRANCH_OR_PR=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
+        fi
+
+        echo "BRANCH_OR_PR=${BRANCH_OR_PR}" >> $GITHUB_ENV
+        echo "REPORT_DIR=${REPORT_DIR}"     >> $GITHUB_ENV
+      env:
+        REPORT_DIR: ${{ inputs.report-dir }}
+
+    - name: Upload test results
+      shell: bash -euxo pipefail {0}
+      run: |
+        REPORT_PREFIX=reports/${BRANCH_OR_PR}
+        RAW_PREFIX=reports-raw/${BRANCH_OR_PR}/${GITHUB_RUN_ID}
+
+        # Add metadata
+        cat <<EOF > ${REPORT_DIR}/executor.json
+          {
+            "name": "GitHub Actions",
+            "type": "github",
+            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
+            "buildOrder": ${GITHUB_RUN_ID},
+            "buildName": "GitHub Actions Run #${GITHUB_RUN_NUMBER}/${GITHUB_RUN_ATTEMPT}",
+            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
+            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
+            "reportName": "Allure Report"
+          }
+        EOF
+
+        cat <<EOF > ${REPORT_DIR}/environment.properties
+          COMMIT_SHA=${COMMIT_SHA}
+        EOF
+
+        ARCHIVE="${UNIQUE_KEY}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
+        ZSTD_NBTHREADS=0
+
+        time tar -C ${REPORT_DIR} -cf ${ARCHIVE} --zstd .
+        time aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
+      env:
+        UNIQUE_KEY: ${{ inputs.unique-key }}
+        COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        BUCKET: neon-github-public-dev
+
+    - name: Cleanup
+      if: always()
+      shell: bash -euxo pipefail {0}
+      run: |
+        rm -rf ${REPORT_DIR}
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -1,254 +0,0 @@
-name: 'Create Allure report'
-description: 'Create and publish Allure report'
-
-inputs:
-  action:
-    desctiption: 'generate or store'
-    required: true
-  build_type:
-    description: '`build_type` from run-python-test-set action'
-    required: true
-  test_selection:
-    description: '`test_selector` from run-python-test-set action'
-    required: false
-outputs:
-  report-url:
-    description: 'Allure report URL'
-    value: ${{ steps.generate-report.outputs.report-url }}
-  report-json-url:
-    description: 'Allure report JSON URL'
-    value: ${{ steps.generate-report.outputs.report-json-url }}
-
-runs:
-  using: "composite"
-
-  steps:
-    # We're using some of env variables quite offen, so let's set them once.
-    #
-    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
-    #
-    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
-    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
-    #
-    - name: Set common environment variables
-      shell: bash -euxo pipefail {0}
-      run: |
-        echo "BUILD_TYPE=${BUILD_TYPE}"   >> $GITHUB_ENV
-        echo "BUCKET=${BUCKET}"           >> $GITHUB_ENV
-        echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
-      env:
-        BUILD_TYPE: ${{ inputs.build_type }}
-        BUCKET: neon-github-public-dev
-        TEST_OUTPUT: /tmp/test_output
-
-    - name: Validate input parameters
-      shell: bash -euxo pipefail {0}
-      run: |
-        if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
-          echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
-          exit 1
-        fi
-
-        if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
-          echo >&2 "inputs.test_selection must be set for 'store' action"
-          exit 2
-        fi
-
-    - name: Calculate variables
-      id: calculate-vars
-      shell: bash -euxo pipefail {0}
-      run: |
-        # TODO: for manually triggered workflows (via workflow_dispatch) we need to have a separate key
-
-        pr_number=$(jq --raw-output .pull_request.number "$GITHUB_EVENT_PATH" || true)
-        if [ "${pr_number}" != "null" ]; then
-          key=pr-${pr_number}
-        elif [ "${GITHUB_REF_NAME}" = "main" ]; then
-          # Shortcut for a special branch
-          key=main
-        elif [ "${GITHUB_REF_NAME}" = "release" ]; then
-          # Shortcut for a special branch
-          key=release
-        else
-          key=branch-$(printf "${GITHUB_REF_NAME}" | tr -c "[:alnum:]._-" "-")
-        fi
-        echo "KEY=${key}" >> $GITHUB_OUTPUT
-
-        # Sanitize test selection to remove `/` and any other special characters
-        # Use printf instead of echo to avoid having `\n` at the end of the string
-        test_selection=$(printf "${{ inputs.test_selection }}" | tr -c "[:alnum:]._-" "-" )
-        echo "TEST_SELECTION=${test_selection}" >> $GITHUB_OUTPUT
-
-    - uses: actions/setup-java@v3
-      if: ${{ inputs.action == 'generate' }}
-      with:
-        distribution: 'temurin'
-        java-version: '17'
-
-    - name: Install Allure
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        if ! which allure; then
-          ALLURE_ZIP=allure-${ALLURE_VERSION}.zip
-          wget -q https://github.com/allure-framework/allure2/releases/download/${ALLURE_VERSION}/${ALLURE_ZIP}
-          echo "${ALLURE_ZIP_MD5}  ${ALLURE_ZIP}" | md5sum -c
-          unzip -q ${ALLURE_ZIP}
-          echo "$(pwd)/allure-${ALLURE_VERSION}/bin" >> $GITHUB_PATH
-          rm -f ${ALLURE_ZIP}
-        fi
-      env:
-        ALLURE_VERSION: 2.21.0
-        ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
-
-    - name: Upload Allure results
-      if: ${{ inputs.action == 'store' }}
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Add metadata
-        cat <<EOF > $TEST_OUTPUT/allure/results/executor.json
-          {
-            "name": "GitHub Actions",
-            "type": "github",
-            "url": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/latest/index.html",
-            "buildOrder": ${GITHUB_RUN_ID},
-            "buildName": "GitHub Actions Run #${{ github.run_number }}/${GITHUB_RUN_ATTEMPT}",
-            "buildUrl": "${GITHUB_SERVER_URL}/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/attempts/${GITHUB_RUN_ATTEMPT}",
-            "reportUrl": "https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html",
-            "reportName": "Allure Report"
-          }
-        EOF
-        cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
-          TEST_SELECTION=${{ inputs.test_selection }}
-          BUILD_TYPE=${BUILD_TYPE}
-        EOF
-
-        ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
-        ZSTD_NBTHREADS=0
-
-        tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
-        aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"
-
-    # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
-    - name: Acquire Allure lock
-      if: ${{ inputs.action == 'generate' }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      run: |
-        LOCK_TIMEOUT=300 # seconds
-
-        for _ in $(seq 1 5); do
-          for i in $(seq 1 ${LOCK_TIMEOUT}); do
-            LOCK_ADDED=$(aws s3api head-object --bucket neon-github-public-dev --key ${LOCK_FILE} | jq --raw-output '.LastModified' || true)
-            # `date --date="..."` is supported only by gnu date (i.e. it doesn't work on BSD/macOS)
-            if [ -z "${LOCK_ADDED}" ] || [ "$(( $(date +%s) - $(date --date="${LOCK_ADDED}" +%s) ))" -gt "${LOCK_TIMEOUT}" ]; then
-              break
-            fi
-            sleep 1
-          done
-          echo "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" > lock.txt
-          aws s3 mv --only-show-errors lock.txt "s3://${BUCKET}/${LOCK_FILE}"
-
-          # A double-check that exactly WE have acquired the lock
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt
-          if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
-            break
-          fi
-        done
-
-    - name: Generate and publish final Allure report
-      if: ${{ inputs.action == 'generate' }}
-      id: generate-report
-      env:
-        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
-      shell: bash -euxo pipefail {0}
-      run: |
-        # Get previously uploaded data for this run
-        ZSTD_NBTHREADS=0
-
-        s3_filepaths=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${RAW_PREFIX}/${GITHUB_RUN_ID}- | jq --raw-output  '.Contents[].Key')
-        if [ -z "$s3_filepaths" ]; then
-          # There's no previously uploaded data for this run
-          exit 0
-        fi
-        for s3_filepath in ${s3_filepaths}; do
-          aws s3 cp --only-show-errors "s3://${BUCKET}/${s3_filepath}" "${TEST_OUTPUT}/allure/"
-
-          archive=${TEST_OUTPUT}/allure/$(basename $s3_filepath)
-          mkdir -p ${archive%.tar.zst}
-          tar -xf ${archive} -C ${archive%.tar.zst}
-          rm -f ${archive}
-        done
-
-        # Get history trend
-        aws s3 cp --recursive --only-show-errors "s3://${BUCKET}/${REPORT_PREFIX}/latest/history" "${TEST_OUTPUT}/allure/latest/history" || true
-
-        # Generate report
-        allure generate --clean --output $TEST_OUTPUT/allure/report $TEST_OUTPUT/allure/*
-
-        # Replace a logo link with a redirect to the latest version of the report
-        sed -i 's|<a href="." class=|<a href="https://'${BUCKET}'.s3.amazonaws.com/'${REPORT_PREFIX}'/latest/index.html" class=|g' $TEST_OUTPUT/allure/report/app.js
-
-        # Upload a history and the final report (in this particular order to not to have duplicated history in 2 places)
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report/history" "s3://${BUCKET}/${REPORT_PREFIX}/latest/history"
-        aws s3 mv --recursive --only-show-errors "${TEST_OUTPUT}/allure/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
-
-        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html
-
-        # Generate redirect
-        cat <<EOF > ${TEST_OUTPUT}/allure/index.html
-          <!DOCTYPE html>
-
-          <meta charset="utf-8">
-          <title>Redirecting to ${REPORT_URL}</title>
-          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
-        EOF
-        aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
-
-        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
-        echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
-        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT
-
-    - name: Release Allure lock
-      if: ${{ inputs.action == 'generate' && always() }}
-      shell: bash -euxo pipefail {0}
-      env:
-        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
-        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
-      run: |
-        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
-
-        if [ "$(cat lock.txt)" = "${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}-${TEST_SELECTION}" ]; then
-          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
-        fi
-
-    - name: Cleanup
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        rm -rf ${TEST_OUTPUT}/allure
-
-    - uses: actions/github-script@v6
-      if: ${{ inputs.action == 'generate' && always() }}
-      env:
-        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
-        SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-      with:
-        script: |
-          const { REPORT_URL, BUILD_TYPE, SHA } = process.env
-
-          await github.rest.repos.createCommitStatus({
-            owner: context.repo.owner,
-            repo: context.repo.repo,
-            sha: `${SHA}`,
-            state: 'success',
-            target_url: `${REPORT_URL}`,
-            context: `Allure report / ${BUILD_TYPE}`,
-          })
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -36,18 +36,14 @@ inputs:
    description: 'Region name for real s3 tests'
    required: false
    default: ''
-  real_s3_access_key_id:
-    description: 'Access key id'
-    required: false
-    default: ''
-  real_s3_secret_access_key:
-    description: 'Secret access key'
-    required: false
-    default: ''
  rerun_flaky:
    description: 'Whether to rerun flaky tests'
    required: false
    default: 'false'
+  pg_version:
+    description: 'Postgres version to use for tests'
+    required: false
+    default: 'v14'

 runs:
  using: "composite"
@@ -67,12 +63,12 @@ runs:
        path: /tmp/neon-previous
        prefix: latest

-    - name: Download compatibility snapshot for Postgres 14
+    - name: Download compatibility snapshot
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg14
-        path: /tmp/compatibility_snapshot_pg14
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}
+        path: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
        prefix: latest

    - name: Checkout
@@ -100,19 +96,18 @@ runs:
        COMPATIBILITY_POSTGRES_DISTRIB_DIR: /tmp/neon-previous/pg_install
        TEST_OUTPUT: /tmp/test_output
        BUILD_TYPE: ${{ inputs.build_type }}
-        AWS_ACCESS_KEY_ID: ${{ inputs.real_s3_access_key_id }}
-        AWS_SECRET_ACCESS_KEY: ${{ inputs.real_s3_secret_access_key }}
-        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
+        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg${{ inputs.pg_version }}
        ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
        RERUN_FLAKY: ${{ inputs.rerun_flaky }}
+        PG_VERSION: ${{ inputs.pg_version }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
        # and it is needed to distinguish different environments
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
-        export DEFAULT_PG_VERSION=${DEFAULT_PG_VERSION:-14}
+        export DEFAULT_PG_VERSION=${PG_VERSION#v}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -155,6 +150,14 @@ runs:
          EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
        fi

+        # We use pytest-split plugin to run benchmarks in parallel on different CI runners
+        if [ "${TEST_SELECTION}" = "test_runner/performance" ] && [ "${{ inputs.build_type }}" != "remote" ]; then
+          mkdir -p $TEST_OUTPUT
+          poetry run ./scripts/benchmark_durations.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/benchmark_durations.json"
+
+          EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
+        fi
+
        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
@@ -192,19 +195,18 @@ runs:
          scripts/generate_and_push_perf_report.sh
        fi

-    - name: Upload compatibility snapshot for Postgres 14
+    - name: Upload compatibility snapshot
      if: github.ref_name == 'release'
      uses: ./.github/actions/upload
      with:
-        name: compatibility-snapshot-${{ inputs.build_type }}-pg14-${{ github.run_id }}
-        # The path includes a test name (test_create_snapshot) and directory that the test creates (compatibility_snapshot_pg14), keep the path in sync with the test
-        path: /tmp/test_output/test_create_snapshot/compatibility_snapshot_pg14/
+        name: compatibility-snapshot-${{ inputs.build_type }}-pg${{ inputs.pg_version }}-${{ github.run_id }}
+        # Directory is created by test_compatibility.py::test_create_snapshot, keep the path in sync with the test
+        path: /tmp/test_output/compatibility_snapshot_pg${{ inputs.pg_version }}/
        prefix: latest

-    - name: Create Allure report
+    - name: Upload test results
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
+      uses: ./.github/actions/allure-report-store
      with:
-        action: store
-        build_type: ${{ inputs.build_type }}
-        test_selection: ${{ inputs.test_selection }}
+        report-dir: /tmp/test_output/allure/results
+        unique-key: ${{ inputs.build_type }}
--- a/.github/ansible/.gitignore
+++ b/.github/ansible/.gitignore
@@ -1,5 +0,0 @@
-neon_install.tar.gz
-.neon_current_version
-
-collections/*
-!collections/.keep
--- a/.github/ansible/ansible.cfg
+++ b/.github/ansible/ansible.cfg
@@ -1,12 +0,0 @@
-[defaults]
-
-localhost_warning = False
-host_key_checking = False
-timeout = 30
-
-[ssh_connection]
-ssh_args   = -F ./ansible.ssh.cfg
-# teleport doesn't support sftp yet https://github.com/gravitational/teleport/issues/7127
-# and scp neither worked for me
-transfer_method = piped
-pipelining = True
--- a/.github/ansible/ansible.ssh.cfg
+++ b/.github/ansible/ansible.ssh.cfg
@@ -1,15 +0,0 @@
-# Remove this once https://github.com/gravitational/teleport/issues/10918 is fixed
-# (use pre 8.5 option name to cope with old ssh in CI)
-PubkeyAcceptedKeyTypes +ssh-rsa-cert-v01@openssh.com
-
-Host tele.zenith.tech
-    User admin
-    Port 3023
-    StrictHostKeyChecking no
-    UserKnownHostsFile /dev/null
-
-Host * !tele.zenith.tech
-    User admin
-    StrictHostKeyChecking no
-    UserKnownHostsFile /dev/null
-    ProxyJump tele.zenith.tech
--- a/.github/ansible/collections/.keep
+++ b/.github/ansible/collections/.keep
--- a/.github/ansible/deploy.yaml
+++ b/.github/ansible/deploy.yaml
@@ -1,211 +0,0 @@
- name: Upload Neon binaries
-  hosts: storage
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: get latest version of Neon binaries
-      register: current_version_file
-      set_fact:
-        current_version: "{{ lookup('file', '.neon_current_version') | trim }}"
-      tags:
-      - pageserver
-      - safekeeper
-
-    - name: inform about versions
-      debug:
-        msg: "Version to deploy - {{ current_version }}"
-      tags:
-      - pageserver
-      - safekeeper
-
-    - name: upload and extract Neon binaries to /usr/local
-      ansible.builtin.unarchive:
-        owner: root
-        group: root
-        src: neon_install.tar.gz
-        dest: /usr/local
-      become: true
-      tags:
-      - pageserver
-      - safekeeper
-      - binaries
-      - putbinaries
-
- name: Deploy pageserver
-  hosts: pageservers
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: upload init script
-      when: console_mgmt_base_url is defined
-      ansible.builtin.template:
-        src: scripts/init_pageserver.sh
-        dest: /tmp/init_pageserver.sh
-        owner: root
-        group: root
-        mode: '0755'
-      become: true
-      tags:
-      - pageserver
-
-    - name: init pageserver
-      shell:
-        cmd: /tmp/init_pageserver.sh
-      args:
-        creates: "/storage/pageserver/data/tenants"
-      environment:
-        NEON_REPO_DIR: "/storage/pageserver/data"
-        LD_LIBRARY_PATH: "/usr/local/v14/lib"
-      become: true
-      tags:
-      - pageserver
-
-    - name: read the existing remote pageserver config
-      ansible.builtin.slurp:
-        src: /storage/pageserver/data/pageserver.toml
-      register: _remote_ps_config
-      tags:
-      - pageserver
-
-    - name: parse the existing pageserver configuration
-      ansible.builtin.set_fact:
-        _existing_ps_config: "{{ _remote_ps_config['content'] | b64decode | sivel.toiletwater.from_toml }}"
-      tags:
-      - pageserver
-
-    - name: construct the final pageserver configuration dict
-      ansible.builtin.set_fact:
-        pageserver_config: "{{ pageserver_config_stub | combine({'id': _existing_ps_config.id }) }}"
-      tags:
-      - pageserver
-
-    - name: template the pageserver config
-      template:
-        src: templates/pageserver.toml.j2
-        dest: /storage/pageserver/data/pageserver.toml
-      become: true
-      tags:
-      - pageserver
-
-    # used in `pageserver.service` template
-    - name: learn current availability_zone
-      shell:
-        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
-      register: ec2_availability_zone
-
-    - set_fact: 
-        ec2_availability_zone={{ ec2_availability_zone.stdout }}
-
-    - name: upload systemd service definition
-      ansible.builtin.template:
-        src: systemd/pageserver.service
-        dest: /etc/systemd/system/pageserver.service
-        owner: root
-        group: root
-        mode: '0644'
-      become: true
-      tags:
-      - pageserver
-
-    - name: start systemd service
-      ansible.builtin.systemd:
-        daemon_reload: yes
-        name: pageserver
-        enabled: yes
-        state: restarted
-      become: true
-      tags:
-      - pageserver
-
-    - name: post version to console
-      when: console_mgmt_base_url is defined
-      shell:
-        cmd: |
-          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/pageservers
-      tags:
-      - pageserver
-
- name: Deploy safekeeper
-  hosts: safekeepers
-  gather_facts: False
-  remote_user: "{{ remote_user }}"
-
-  tasks:
-
-    - name: upload init script
-      when: console_mgmt_base_url is defined
-      ansible.builtin.template:
-        src: scripts/init_safekeeper.sh
-        dest: /tmp/init_safekeeper.sh
-        owner: root
-        group: root
-        mode: '0755'
-      become: true
-      tags:
-      - safekeeper
-
-    - name: init safekeeper
-      shell:
-        cmd: /tmp/init_safekeeper.sh
-      args:
-        creates: "/storage/safekeeper/data/safekeeper.id"
-      environment:
-        NEON_REPO_DIR: "/storage/safekeeper/data"
-        LD_LIBRARY_PATH: "/usr/local/v14/lib"
-      become: true
-      tags:
-      - safekeeper
-
-    # used in `safekeeper.service` template
-    - name: learn current availability_zone
-      shell:
-        cmd: "curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone"
-      register: ec2_availability_zone
-
-    - set_fact: 
-        ec2_availability_zone={{ ec2_availability_zone.stdout }}
-
-    # in the future safekeepers should discover pageservers byself
-    # but currently use first pageserver that was discovered
-    - name: set first pageserver var for safekeepers
-      set_fact:
-        first_pageserver: "{{ hostvars[groups['pageservers'][0]]['inventory_hostname'] }}"
-      tags:
-      - safekeeper
-
-    - name: upload systemd service definition
-      ansible.builtin.template:
-        src: systemd/safekeeper.service
-        dest: /etc/systemd/system/safekeeper.service
-        owner: root
-        group: root
-        mode: '0644'
-      become: true
-      tags:
-      - safekeeper
-
-    - name: start systemd service
-      ansible.builtin.systemd:
-        daemon_reload: yes
-        name: safekeeper
-        enabled: yes
-        state: restarted
-      become: true
-      tags:
-      - safekeeper
-
-    - name: post version to console
-      when: console_mgmt_base_url is defined
-      shell:
-        cmd: |
-          INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/$INSTANCE_ID | jq '.version = {{ current_version }}' > /tmp/new_version
-          curl -sfS -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" -X POST -d@/tmp/new_version {{ console_mgmt_base_url }}/management/api/v2/safekeepers
-      tags:
-      - safekeeper
--- a/.github/ansible/get_binaries.sh
+++ b/.github/ansible/get_binaries.sh
@@ -1,42 +0,0 @@
-#!/bin/bash
-
-set -e
-
-if [ -n "${DOCKER_TAG}" ]; then
-  # Verson is DOCKER_TAG but without prefix
-  VERSION=$(echo $DOCKER_TAG | sed 's/^.*-//g')
-else
-  echo "Please set DOCKER_TAG environment variable"
-  exit 1
-fi
-
-
-# do initial cleanup
-rm -rf neon_install postgres_install.tar.gz neon_install.tar.gz .neon_current_version
-mkdir neon_install
-
-# retrieve binaries from docker image
-echo "getting binaries from docker image"
-docker pull --quiet neondatabase/neon:${DOCKER_TAG}
-ID=$(docker create neondatabase/neon:${DOCKER_TAG})
-docker cp ${ID}:/data/postgres_install.tar.gz .
-tar -xzf postgres_install.tar.gz -C neon_install
-mkdir neon_install/bin/
-docker cp ${ID}:/usr/local/bin/pageserver neon_install/bin/
-docker cp ${ID}:/usr/local/bin/pageserver_binutils neon_install/bin/
-docker cp ${ID}:/usr/local/bin/safekeeper neon_install/bin/
-docker cp ${ID}:/usr/local/bin/storage_broker neon_install/bin/
-docker cp ${ID}:/usr/local/bin/proxy neon_install/bin/
-docker cp ${ID}:/usr/local/v14/bin/ neon_install/v14/bin/
-docker cp ${ID}:/usr/local/v15/bin/ neon_install/v15/bin/
-docker cp ${ID}:/usr/local/v14/lib/ neon_install/v14/lib/
-docker cp ${ID}:/usr/local/v15/lib/ neon_install/v15/lib/
-docker rm -vf ${ID}
-
-# store version to file (for ansible playbooks) and create binaries tarball
-echo ${VERSION} > neon_install/.neon_current_version
-echo ${VERSION} > .neon_current_version
-tar -czf neon_install.tar.gz -C neon_install .
-
-# do final cleaup
-rm -rf neon_install postgres_install.tar.gz
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -1,48 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-ap-southeast-1
-    bucket_region: ap-southeast-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: ap-southeast-1
-    ansible_aws_ssm_bucket_name: neon-prod-storage-ap-southeast-1
-    console_region_id: aws-ap-southeast-1
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-064de8ea28bdb495b
-        pageserver-1.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-0b180defcaeeb6b93
-
-    safekeepers:
-      hosts:
-        safekeeper-0.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-0d6f1dc5161eef894
-        safekeeper-2.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-04fb63634e4679eb9
-        safekeeper-3.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-05481f3bc88cfc2d4
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -1,50 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-eu-central-1
-    bucket_region: eu-central-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-central-1
-    ansible_aws_ssm_bucket_name: neon-prod-storage-eu-central-1
-    console_region_id: aws-eu-central-1
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-central-1.aws.neon.tech:
-          ansible_host:  i-0cd8d316ecbb715be
-        pageserver-1.eu-central-1.aws.neon.tech:
-          ansible_host:  i-090044ed3d383fef0
-        pageserver-2.eu-central-1.aws.neon.tech:
-          ansible_host:  i-033584edf3f4b6742
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-central-1.aws.neon.tech:
-          ansible_host:  i-0b238612d2318a050
-        safekeeper-1.eu-central-1.aws.neon.tech:
-          ansible_host:  i-07b9c45e5c2637cd4
-        safekeeper-2.eu-central-1.aws.neon.tech:
-          ansible_host:  i-020257302c3c93d88
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -1,51 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-us-east-2
-    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-east-2
-    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-2
-    console_region_id: aws-us-east-2
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-east-2.aws.neon.tech:
-          ansible_host:  i-062227ba7f119eb8c
-        pageserver-1.us-east-2.aws.neon.tech:
-          ansible_host:  i-0b3ec0afab5968938
-        pageserver-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-0d7a1c4325e71421d
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-east-2.aws.neon.tech:
-          ansible_host:  i-0e94224750c57d346
-        safekeeper-1.us-east-2.aws.neon.tech:
-          ansible_host:  i-06d113fb73bfddeb0
-        safekeeper-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-09f66c8e04afff2e8
-
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -1,53 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-prod-storage-us-west-2
-    bucket_region: us-west-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
-    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-west-2
-    ansible_aws_ssm_bucket_name: neon-prod-storage-us-west-2
-    console_region_id: aws-us-west-2-new
-    sentry_environment: production
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-west-2.aws.neon.tech:
-          ansible_host: i-0d9f6dfae0e1c780d
-        pageserver-1.us-west-2.aws.neon.tech:
-          ansible_host: i-0c834be1dddba8b3f
-        pageserver-2.us-west-2.aws.neon.tech:
-          ansible_host: i-051642d372c0a4f32
-        pageserver-3.us-west-2.aws.neon.tech:
-          ansible_host: i-00c3844beb9ad1c6b
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-west-2.aws.neon.tech:
-          ansible_host: i-00719d8a74986fda6
-        safekeeper-1.us-west-2.aws.neon.tech:
-          ansible_host: i-074682f9d3c712e7c
-        safekeeper-2.us-west-2.aws.neon.tech:
-          ansible_host: i-042b7efb1729d7966
-
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -1,37 +0,0 @@
-#!/bin/sh
-
-# fetch params from meta-data service
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
-INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
-DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')
-
-# store fqdn hostname in var
-HOST=$(hostname -f)
-
-
-cat <<EOF | tee /tmp/payload
-{
-  "version": 1,
-  "host": "${HOST}",
-  "port": 6400,
-  "region_id": "{{ console_region_id }}",
-  "instance_id": "${INSTANCE_ID}",
-  "http_host": "${HOST}",
-  "http_port": 9898,
-  "active": false,
-  "availability_zone_id": "${AZ_ID}",
-  "disk_size": ${DISK_SIZE},
-  "instance_type": "${INSTANCE_TYPE}"
-}
-EOF
-
-# check if pageserver already registered or not
-if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/pageservers/${INSTANCE_ID} -o /dev/null; then
-
-    # not registered, so register it now
-    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/pageservers -d@/tmp/payload | jq -r '.id')
-
-    # init pageserver
-    sudo -u pageserver /usr/local/bin/pageserver -c "id=${ID}" -c "pg_distrib_dir='/usr/local'" --init -D /storage/pageserver/data
-fi
--- a/.github/ansible/scripts/init_safekeeper.sh
+++ b/.github/ansible/scripts/init_safekeeper.sh
@@ -1,31 +0,0 @@
-#!/bin/sh
-
-# fetch params from meta-data service
-INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
-AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
-
-# store fqdn hostname in var
-HOST=$(hostname -f)
-
-
-cat <<EOF | tee /tmp/payload
-{
-  "version": 1,
-  "host": "${HOST}",
-  "port": 6500,
-  "http_port": 7676,
-  "region_id": "{{ console_region_id }}",
-  "instance_id": "${INSTANCE_ID}",
-  "availability_zone_id": "${AZ_ID}",
-  "active": false
-}
-EOF
-
-# check if safekeeper already registered or not
-if ! curl -sf -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" {{ console_mgmt_base_url }}/management/api/v2/safekeepers/${INSTANCE_ID} -o /dev/null; then
-
-    # not registered, so register it now
-    ID=$(curl -sf -X POST -H "Authorization: Bearer {{ CONSOLE_API_TOKEN }}" -H "Content-Type: application/json" {{ console_mgmt_base_url }}/management/api/v2/safekeepers -d@/tmp/payload | jq -r '.id')
-    # init safekeeper
-    sudo -u safekeeper /usr/local/bin/safekeeper --id ${ID} --init -D /storage/safekeeper/data
-fi
--- a/.github/ansible/ssm_config
+++ b/.github/ansible/ssm_config
@@ -1,2 +0,0 @@
-ansible_connection: aws_ssm
-ansible_python_interpreter: /usr/bin/python3
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -1,46 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-dev-storage-eu-west-1
-    bucket_region: eu-west-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
-    broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: eu-west-1
-    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-west-1
-    console_region_id: aws-eu-west-1
-    sentry_environment: staging
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.eu-west-1.aws.neon.build:
-          ansible_host: i-01d496c5041c7f34c
-
-    safekeepers:
-      hosts:
-        safekeeper-0.eu-west-1.aws.neon.build:
-          ansible_host: i-05226ef85722831bf
-        safekeeper-1.eu-west-1.aws.neon.build:
-          ansible_host: i-06969ee1bf2958bfc
-        safekeeper-2.eu-west-1.aws.neon.build:
-          ansible_host: i-087892e9625984a0b
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -1,56 +0,0 @@
-storage:
-  vars:
-    bucket_name: neon-staging-storage-us-east-2
-    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
-    broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
-    pageserver_config_stub:
-      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
-      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
-      remote_storage:
-        bucket_name: "{{ bucket_name }}"
-        bucket_region: "{{ bucket_region }}"
-        prefix_in_bucket: "pageserver/v1"
-    safekeeper_s3_prefix: safekeeper/v1/wal
-    hostname_suffix: ""
-    remote_user: ssm-user
-    ansible_aws_ssm_region: us-east-2
-    ansible_aws_ssm_bucket_name: neon-staging-storage-us-east-2
-    console_region_id: aws-us-east-2
-    sentry_environment: staging
-
-  children:
-    pageservers:
-      hosts:
-        pageserver-0.us-east-2.aws.neon.build:
-          ansible_host: i-0c3e70929edb5d691
-        pageserver-1.us-east-2.aws.neon.build:
-          ansible_host: i-0565a8b4008aa3f40
-        pageserver-2.us-east-2.aws.neon.build:
-          ansible_host: i-01e31cdf7e970586a
-        pageserver-3.us-east-2.aws.neon.build:
-          ansible_host: i-0602a0291365ef7cc
-        pageserver-99.us-east-2.aws.neon.build:
-          ansible_host: i-0c39491109bb88824
-
-    safekeepers:
-      hosts:
-        safekeeper-0.us-east-2.aws.neon.build:
-          ansible_host: i-027662bd552bf5db0
-        safekeeper-1.us-east-2.aws.neon.build:
-          ansible_host: i-0171efc3604a7b907
-        safekeeper-2.us-east-2.aws.neon.build:
-          ansible_host: i-0de0b03a51676a6ce
-        safekeeper-99.us-east-2.aws.neon.build:
-          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/ansible/systemd/pageserver.service
+++ b/.github/ansible/systemd/pageserver.service
@@ -1,18 +0,0 @@
-[Unit]
-Description=Neon pageserver
-After=network.target auditd.service
-
-[Service]
-Type=simple
-User=pageserver
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/pageserver LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_PAGESERVER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
-ExecStart=/usr/local/bin/pageserver -c "pg_distrib_dir='/usr/local'" -c "listen_pg_addr='0.0.0.0:6400'" -c "listen_http_addr='0.0.0.0:9898'" -c "broker_endpoint='{{ broker_endpoint }}'" -c "availability_zone='{{ ec2_availability_zone }}'" -D /storage/pageserver/data
-ExecReload=/bin/kill -HUP $MAINPID
-KillMode=mixed
-KillSignal=SIGINT
-Restart=on-failure
-TimeoutSec=10
-LimitNOFILE=30000000
-
-[Install]
-WantedBy=multi-user.target
--- a/.github/ansible/systemd/safekeeper.service
+++ b/.github/ansible/systemd/safekeeper.service
@@ -1,18 +0,0 @@
-[Unit]
-Description=Neon safekeeper
-After=network.target auditd.service
-
-[Service]
-Type=simple
-User=safekeeper
-Environment=RUST_BACKTRACE=1 NEON_REPO_DIR=/storage/safekeeper/data LD_LIBRARY_PATH=/usr/local/v14/lib SENTRY_DSN={{ SENTRY_URL_SAFEKEEPER }} SENTRY_ENVIRONMENT={{ sentry_environment }}
-ExecStart=/usr/local/bin/safekeeper -l {{ inventory_hostname }}{{ hostname_suffix }}:6500 --listen-http {{ inventory_hostname }}{{ hostname_suffix }}:7676 -D /storage/safekeeper/data --broker-endpoint={{ broker_endpoint }} --remote-storage='{bucket_name="{{bucket_name}}", bucket_region="{{bucket_region}}", prefix_in_bucket="{{ safekeeper_s3_prefix }}"}' --availability-zone={{ ec2_availability_zone }}
-ExecReload=/bin/kill -HUP $MAINPID
-KillMode=mixed
-KillSignal=SIGINT
-Restart=on-failure
-TimeoutSec=10
-LimitNOFILE=30000000
-
-[Install]
-WantedBy=multi-user.target
--- a/.github/ansible/templates/pageserver.toml.j2
+++ b/.github/ansible/templates/pageserver.toml.j2
@@ -1 +0,0 @@
-{{ pageserver_config | sivel.toiletwater.to_toml }}
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -1,75 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.eu-west-1.aws.neon.build"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: eu-west-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -1,67 +0,0 @@
-# Helm chart values for neon-proxy-link.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "link"
-  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.stage.neon.tech/psql_session/"
-  domain: "pg.neon.build"
-  sentryEnvironment: "staging"
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy-link pods
-podLabels:
-  neon_service: proxy
-  neon_env: dev
-  neon_region: us-east-2
-
-service:
-  type: LoadBalancer
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.beta.us-east-2.aws.neon.build
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.beta.us-east-2.aws.neon.build
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -1,76 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.cloud.stage.neon.tech"
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram-legacy
-  neon_env: dev
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
-  domain: "*.us-east-2.aws.neon.build"
-  extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
-  sentryEnvironment: "staging"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
-  metricCollectionInterval: "1min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: staging
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.beta.us-east-2.internal.aws.neon.build
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "staging"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.ap-southeast-1.aws.neon.tech"
-  extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: ap-southeast-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.eu-central-1.aws.neon.tech"
-  extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: eu-central-1
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
@@ -1,58 +0,0 @@
-# Helm chart values for neon-proxy-link.
-# This is a YAML-formatted file.
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "link"
-  authEndpoint: "https://console.neon.tech/authenticate_proxy_request/"
-  uri: "https://console.neon.tech/psql_session/"
-  domain: "pg.neon.tech"
-  sentryEnvironment: "production"
-
-# -- Additional labels for zenith-proxy pods
-podLabels:
-  neon_service: proxy
-  neon_env: production
-  neon_region: us-east-2
-
-service:
-  type: LoadBalancer
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link-mgmt.delta.us-east-2.aws.neon.tech
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-link.delta.us-east-2.aws.neon.tech
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.us-east-2.aws.neon.tech"
-  extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-east-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.delta.us-east-2.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -1,76 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.cloud.neon.tech"
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.eta.us-west-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,77 +0,0 @@
-# Helm chart values for neon-proxy-scram.
-# This is a YAML-formatted file.
-
-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 5 minutes (5 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 300"]
-terminationGracePeriodSeconds: 604800
-
-
-image:
-  repository: neondatabase/neon
-
-settings:
-  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
-  domain: "*.us-west-2.aws.neon.tech"
-  extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
-  sentryEnvironment: "production"
-  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
-  metricCollectionInterval: "10min"
-
-# -- Additional labels for neon-proxy pods
-podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
-
-exposedService:
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
-    external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
-  httpsPort: 443
-
-#metrics:
-#  enabled: true
-#  serviceMonitor:
-#    enabled: true
-#    selector:
-#      release: kube-prometheus-stack
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-proxy.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-proxy
-        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-proxy"
-      endpoints:
-        - port: http
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-storage-broker.yaml
@@ -1,52 +0,0 @@
-# Helm chart values for neon-storage-broker
-podLabels:
-  neon_env: production
-  neon_service: storage-broker
-
-# Use L4 LB
-service:
-  # service.annotations -- Annotations to add to the service
-  annotations:
-    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
-    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
-    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
-    # assign service to this name at external-dns
-    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.eta.us-west-2.internal.aws.neon.tech
-  # service.type -- Service type
-  type: LoadBalancer
-  # service.port -- broker listen port
-  port: 50051
-
-ingress:
-  enabled: false
-
-metrics:
-  enabled: false
-
-extraManifests:
-  - apiVersion: operator.victoriametrics.com/v1beta1
-    kind: VMServiceScrape
-    metadata:
-      name: "{{ include \"neon-storage-broker.fullname\" . }}"
-      labels:
-        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
-        app.kubernetes.io/name: neon-storage-broker
-        app.kubernetes.io/instance: neon-storage-broker
-        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
-        app.kubernetes.io/managed-by: Helm
-      namespace: "{{ .Release.Namespace }}"
-    spec:
-      selector:
-        matchLabels:
-          app.kubernetes.io/name: "neon-storage-broker"
-      endpoints:
-        - port: broker
-          path: /metrics
-          interval: 10s
-          scrapeTimeout: 10s
-      namespaceSelector:
-        matchNames:
-          - "{{ .Release.Namespace }}"
-
-settings:
-  sentryEnvironment: "production"
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -1,6 +1,6 @@
-## Describe your changes
+## Problem

-## Issue ticket number and link
+## Summary of changes

 ## Checklist before requesting a review

--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -0,0 +1,55 @@
+name: Handle `approved-for-ci-run` label
+# This workflow helps to run CI pipeline for PRs made by external contributors (from forks).
+
+on:
+  pull_request:
+    types:
+      # Default types that triggers a workflow ([1]):
+      # - [1] https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request
+      - opened
+      - synchronize
+      - reopened
+      # Types that we wand to handle in addition to keep labels tidy:
+      - closed
+      # Actual magic happens here:
+      - labeled
+
+env:
+  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+  PR_NUMBER: ${{ github.event.pull_request.number }}
+
+jobs:
+  remove-label:
+    # Remove `approved-for-ci-run` label if the workflow is triggered by changes in a PR.
+    # The PR should be reviewed and labelled manually again.
+
+    runs-on: [ ubuntu-latest ]
+
+    if: |
+      contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
+      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
+
+    steps:
+      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
+
+  create-branch:
+    # Create a local branch for an `approved-for-ci-run` labelled PR to run CI pipeline in it.
+
+    runs-on: [ ubuntu-latest ]
+
+    if: |
+      github.event.action == 'labeled' &&
+      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
+
+    steps:
+      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
+
+      - uses: actions/checkout@v3
+        with:
+          ref: main
+
+      - run: gh pr checkout "${PR_NUMBER}"
+
+      - run: git checkout -b "ci-run/pr-${PR_NUMBER}"
+
+      - run: git push --force origin "ci-run/pr-${PR_NUMBER}"
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -16,12 +16,12 @@ on:
  workflow_dispatch: # adds ability to run this manually
    inputs:
      region_id:
-        description: 'Use a particular region. If not set the default region will be used'
+        description: 'Project region id. If not set, the default region will be used'
        required: false
        default: 'aws-us-east-2'
      save_perf_report:
        type: boolean
-        description: 'Publish perf report or not. If not set, the report is published only for the main branch'
+        description: 'Publish perf report. If not set, the report will be published only for the main branch'
        required: false

 defaults:
@@ -93,10 +93,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -128,13 +125,14 @@ jobs:
        matrix='{
          "platform": [
            "neon-captest-new",
-            "neon-captest-reuse"
+            "neon-captest-reuse",
+            "neonvm-captest-new"
          ],
          "db_size": [ "10gb" ],
-          "include": [
-            { "platform": "neon-captest-freetier", "db_size": "3gb"  },
-            { "platform": "neon-captest-new",      "db_size": "50gb" }
-          ]
+          "include": [{ "platform": "neon-captest-freetier",   "db_size": "3gb"  },
+                      { "platform": "neon-captest-new",        "db_size": "50gb" },
+                      { "platform": "neonvm-captest-freetier", "db_size": "3gb"  },
+                      { "platform": "neonvm-captest-new",      "db_size": "50gb" }]
        }'

        if [ "$(date +%A)" = "Saturday" ]; then
@@ -182,7 +180,8 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

-    timeout-minutes: 360 # 6h
+    # Increase timeout to 8h, default timeout is 6h
+    timeout-minutes: 480

    steps:
    - uses: actions/checkout@v3
@@ -200,7 +199,7 @@ jobs:
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH

    - name: Create Neon Project
-      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform)
+      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
@@ -208,6 +207,7 @@ jobs:
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
+        provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -216,7 +216,7 @@ jobs:
          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
            ;;
-          neon-captest-new | neon-captest-freetier)
+          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -226,7 +226,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}"
            exit 1
            ;;
        esac
@@ -283,10 +283,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -325,8 +322,6 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

-    timeout-minutes: 360 # 6h
-
    steps:
    - uses: actions/checkout@v3

@@ -380,10 +375,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -421,8 +413,6 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

-    timeout-minutes: 360 # 6h
-
    steps:
    - uses: actions/checkout@v3

@@ -476,10 +466,7 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
@@ -511,8 +498,6 @@ jobs:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init

-    timeout-minutes: 360 # 6h
-
    steps:
    - uses: actions/checkout@v3

@@ -566,16 +551,13 @@ jobs:

    - name: Create Allure report
      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report
-      with:
-        action: generate
-        build_type: ${{ env.BUILD_TYPE }}
+      uses: ./.github/actions/allure-report-generate

    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+        slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -5,6 +5,7 @@ on:
    branches:
      - main
      - release
+      - ci-run/pr-*
  pull_request:

 defaults:
@@ -111,8 +112,26 @@ jobs:
      - name: Get postgres headers
        run: make postgres-headers -j$(nproc)

-      - name: Run cargo clippy
-        run: ./run_clippy.sh
+      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
+      # This will catch compiler & clippy warnings in all feature combinations.
+      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
+      # NB: keep clippy args in sync with ./run_clippy.sh
+      - run: |
+          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
+          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
+            echo "No clippy args found in .neon_clippy_args"
+            exit 1
+          fi
+          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
+      - name: Run cargo clippy (debug)
+        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
+      - name: Run cargo clippy (release)
+        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
+
+      - name: Check documentation generation
+        run: cargo doc --workspace --no-deps --document-private-items
+        env:
+            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"

      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
      - name: Check formatting
@@ -142,7 +161,7 @@ jobs:
        build_type: [ debug, release ]
    env:
      BUILD_TYPE: ${{ matrix.build_type }}
-      GIT_VERSION: ${{ github.sha }}
+      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}

    steps:
      - name: Fix git ownership
@@ -161,6 +180,27 @@ jobs:
          submodules: true
          fetch-depth: 1

+      - name: Check Postgres submodules revision
+        shell: bash -euo pipefail {0}
+        run: |
+          # This is a temporary solution to ensure that the Postgres submodules revision is correct (i.e. the updated intentionally).
+          # Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
+
+          FAILED=false
+          for postgres in postgres-v14 postgres-v15; do
+            expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
+            actual=$(git rev-parse "HEAD:vendor/${postgres}")
+            if [ "${expected}" != "${actual}" ]; then
+              echo >&2 "Expected ${postgres} rev to be at '${expected}', but it is at '${actual}'"
+              FAILED=true
+            fi
+          done
+
+          if [ "${FAILED}" = "true" ]; then
+            echo >&2 "Please update vendors/revisions.json if these changes are intentional"
+            exit 1
+          fi
+
      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
@@ -251,7 +291,7 @@ jobs:
          export REMOTE_STORAGE_S3_BUCKET=neon-github-public-dev
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          ${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test pagination_tests -- s3_pagination_should_work --exact
+          ${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test test_real_s3

      - name: Install rust binaries
        run: |
@@ -311,12 +351,14 @@ jobs:
    runs-on: [ self-hosted, gen3, large ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    needs: [ build-neon ]
    strategy:
      fail-fast: false
      matrix:
        build_type: [ debug, release ]
+        pg_version: [ v14, v15 ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -331,36 +373,34 @@ jobs:
          test_selection: regress
          needs_postgres_source: true
          run_with_real_s3: true
-          real_s3_bucket: ci-tests-s3
-          real_s3_region: us-west-2
-          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
-          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
+          real_s3_bucket: neon-github-ci-tests
+          real_s3_region: eu-central-1
          rerun_flaky: true
+          pg_version: ${{ matrix.pg_version }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

      - name: Merge and upload coverage data
-        if: matrix.build_type == 'debug'
+        if: matrix.build_type == 'debug' && matrix.pg_version == 'v14'
        uses: ./.github/actions/save-coverage-data

  benchmarks:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+      # Default shared memory is 64mb
+      options: --init --shm-size=512mb
    needs: [ build-neon ]
    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
    strategy:
      fail-fast: false
      matrix:
+        pytest_split_group: [ 1, 2, 3, 4 ]
        build_type: [ release ]
    steps:
      - name: Checkout
        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 1

      - name: Pytest benchmarks
        uses: ./.github/actions/run-python-test-set
@@ -369,9 +409,11 @@ jobs:
          test_selection: performance
          run_in_parallel: false
          save_perf_report: ${{ github.ref_name == 'main' }}
+          extra_params: --splits ${{ strategy.job-total }} --group ${{ matrix.pytest_split_group }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+          TEST_RESULT_CONNSTR: "${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}"
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

@@ -386,79 +428,48 @@ jobs:
    steps:
      - uses: actions/checkout@v3

-      - name: Create Allure report (debug)
+      - name: Create Allure report
        if: ${{ !cancelled() }}
-        id: create-allure-report-debug
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: debug
-
-      - name: Create Allure report (release)
-        if: ${{ !cancelled() }}
-        id: create-allure-report-release
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: release
+        id: create-allure-report
+        uses: ./.github/actions/allure-report-generate

      - uses: actions/github-script@v6
-        if: >
-          !cancelled() &&
-          github.event_name == 'pull_request' && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+        if: ${{ !cancelled() }}
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
          script: |
-            const reports = [{
-              buildType: "debug",
-              reportUrl: "${{ steps.create-allure-report-debug.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-debug.outputs.report-json-url }}",
-            }, {
-              buildType: "release",
-              reportUrl: "${{ steps.create-allure-report-release.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-release.outputs.report-json-url }}",
-            }]
+            const report = {
+              reportUrl:     "${{ steps.create-allure-report.outputs.report-url }}",
+              reportJsonUrl: "${{ steps.create-allure-report.outputs.report-json-url }}",
+            }

-            const script = require("./scripts/pr-comment-test-report.js")
+            const script = require("./scripts/comment-test-report.js")
            await script({
              github,
              context,
              fetch,
-              reports,
+              report,
            })

      - name: Store Allure test stat in the DB
-        if: >
-          !cancelled() && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+        if: ${{ !cancelled() && steps.create-allure-report.outputs.report-json-url }}
        env:
-          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          REPORT_JSON_URL_DEBUG: ${{ steps.create-allure-report-debug.outputs.report-json-url }}
-          REPORT_JSON_URL_RELEASE: ${{ steps.create-allure-report-release.outputs.report-json-url }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          REPORT_JSON_URL: ${{ steps.create-allure-report.outputs.report-json-url }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
        run: |
          ./scripts/pysync

-          for report_url in $REPORT_JSON_URL_DEBUG $REPORT_JSON_URL_RELEASE; do
-            if [ -z "$report_url" ]; then
-              continue
-            fi
+          curl --fail --output suites.json "${REPORT_JSON_URL}"
+          export BUILD_TYPE=unified
+          export DATABASE_URL="$TEST_RESULT_CONNSTR"

-            if [[ "$report_url" == "$REPORT_JSON_URL_DEBUG" ]]; then
-              BUILD_TYPE=debug
-            else
-              BUILD_TYPE=release
-            fi
-
-            curl --fail --output suites.json "${report_url}"
-            DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
-          done
+          poetry run python3 scripts/ingest_regress_test_result.py \
+            --revision ${COMMIT_SHA} \
+            --reference ${GITHUB_REF} \
+            --build-type ${BUILD_TYPE} \
+            --ingest suites.json

  coverage-report:
    runs-on: [ self-hosted, gen3, small ]
@@ -504,44 +515,55 @@ jobs:
      - name: Merge coverage data
        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

-      - name: Build and upload coverage report
+      - name: Build coverage report
+        env:
+          COMMIT_URL: ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.event.pull_request.head.sha || github.sha }}
        run: |
-          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
-          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA
-
-          scripts/coverage \
-            --dir=/tmp/coverage report \
+          scripts/coverage --dir=/tmp/coverage \
+            report \
            --input-objects=/tmp/coverage/binaries.list \
-            --commit-url=$COMMIT_URL \
+            --commit-url=${COMMIT_URL} \
            --format=github

-          REPORT_URL=https://${{ github.repository_owner }}.github.io/zenith-coverage-data/$COMMIT_SHA
+          scripts/coverage --dir=/tmp/coverage \
+            report \
+            --input-objects=/tmp/coverage/binaries.list \
+            --format=lcov

-          scripts/git-upload \
-            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
-            --message="Add code coverage for $COMMIT_URL" \
-            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
+      - name: Upload coverage report
+        id: upload-coverage-report
+        env:
+          BUCKET: neon-github-public-dev
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        run: |
+          aws s3 cp --only-show-errors --recursive /tmp/coverage/report s3://${BUCKET}/code-coverage/${COMMIT_SHA}

-          # Add link to the coverage report to the commit
-          curl -f -X POST \
-          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-          -H "Accept: application/vnd.github.v3+json" \
-          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-          --data \
-            "{
-              \"state\": \"success\",
-              \"context\": \"neon-coverage\",
-              \"description\": \"Coverage report is ready\",
-              \"target_url\": \"$REPORT_URL\"
-            }"
+          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/code-coverage/${COMMIT_SHA}/index.html
+          echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
+
+      - uses: actions/github-script@v6
+        env:
+          REPORT_URL: ${{ steps.upload-coverage-report.outputs.report-url }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        with:
+          script: |
+            const { REPORT_URL, COMMIT_SHA } = process.env
+
+            await github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: `${COMMIT_SHA}`,
+              state: 'success',
+              target_url: `${REPORT_URL}`,
+              context: 'Code coverage report',
+            })

  trigger-e2e-tests:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
-    needs: [ push-docker-hub, tag ]
+    needs: [ promote-images, tag ]
    steps:
      - name: Set PR's status to pending and request a remote CI test
        run: |
@@ -584,8 +606,7 @@ jobs:
  neon-image:
    runs-on: [ self-hosted, gen3, large ]
    needs: [ tag ]
-    # https://github.com/GoogleContainerTools/kaniko/issues/2005
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    defaults:
      run:
        shell: sh -eu {0}
@@ -597,62 +618,42 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build neon
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/neon:${{needs.tag.outputs.build-tag}}

      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

-
-  neon-image-depot:
-    # For testing this will run side-by-side for a few merges.
-    # This action is not really optimized yet, but gets the job done
-    runs-on: [ self-hosted, gen3, large ]
-    needs: [ tag ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
-    permissions:
-      contents: read
-      id-token: write
-
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Setup go
-        uses: actions/setup-go@v3
-        with:
-          go-version: '1.19'
-
-      - name: Set up Depot CLI
-        uses: depot/setup-action@v1
-
-      - name: Install Crane & ECR helper
-        run: go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
-
-      - name: Configure ECR login
-        run: |
-          mkdir /github/home/.docker/
-          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
-
-      - name: Build and push
-        uses: depot/build-push-action@v1
-        with:
-          # if no depot.json file is at the root of your repo, you must specify the project id
-          project: nrdv0s4kcs
-          push: true
-          tags: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:depot-${{needs.tag.outputs.build-tag}}
-
  compute-tools-image:
    runs-on: [ self-hosted, gen3, large ]
    needs: [ tag ]
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    defaults:
      run:
        shell: sh -eu {0}
@@ -661,18 +662,47 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v1 # v3 won't work with kaniko

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build compute tools
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
+                           --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+                           --dockerfile Dockerfile.compute-tools
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}

+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

  compute-node-image:
    runs-on: [ self-hosted, gen3, large ]
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container:
+      image: gcr.io/kaniko-project/executor:v1.9.2-debug
+      # Workaround for "Resolving download.osgeo.org (download.osgeo.org)... failed: Temporary failure in name resolution.""
+      # Should be prevented by https://github.com/neondatabase/neon/issues/4281
+      options: --add-host=download.osgeo.org:140.211.15.30
    needs: [ tag ]
    strategy:
      fail-fast: false
@@ -689,12 +719,67 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build compute node with extensions
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --build-arg GIT_VERSION=${{ github.sha }} --build-arg PG_VERSION=${{ matrix.version }} --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }}
+                           --build-arg PG_VERSION=${{ matrix.version }}
+                           --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}}
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+                           --dockerfile Dockerfile.compute-node
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+                           --cleanup

+      # Due to a kaniko bug, we can't use cache for extensions image, thus it takes about the same amount of time as compute-node image to build (~10 min)
+      # During the transition period we need to have extensions in both places (in S3 and in compute-node image),
+      # so we won't build extension twice, but extract them from compute-node.
+      #
+      # For now we use extensions image only for new custom extensitons
+      - name: Kaniko build extensions only
+        run: |
+          # Kaniko is suposed to clean up after itself if --cleanup flag is set, but it doesn't.
+          # Despite some fixes were made in https://github.com/GoogleContainerTools/kaniko/pull/2504 (in kaniko v1.11.0),
+          # it still fails with error:
+          #   error building image: could not save file: copying file: symlink postgres /kaniko/1/usr/local/pgsql/bin/postmaster: file exists
+          #
+          # Ref https://github.com/GoogleContainerTools/kaniko/issues/1406
+          find /kaniko -maxdepth 1 -mindepth 1 -type d -regex "/kaniko/[0-9]*" -exec rm -rv {} \;
+
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true \
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache \
+                           --context . \
+                           --build-arg GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} \
+                           --build-arg PG_VERSION=${{ matrix.version }} \
+                           --build-arg BUILD_TAG=${{needs.tag.outputs.build-tag}} \
+                           --build-arg REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com \
+                           --dockerfile Dockerfile.compute-node \
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
+                           --destination neondatabase/extensions-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} \
+                           --cleanup \
+                           --target postgres-extensions
+
+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

@@ -709,7 +794,7 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.4.6
+      VM_BUILDER_VERSION: v0.13.1

    steps:
      - name: Checkout
@@ -719,21 +804,18 @@ jobs:

      - name: Downloading vm-builder
        run: |
-          curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
          chmod +x vm-builder

+      # Note: we need a separate pull step here because otherwise vm-builder will try to pull, and
+      # it won't have the proper authentication (written at v0.6.0)
      - name: Pulling compute-node image
        run: |
          docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

-      - name: Building VM compute-node rootfs
-        run: |
-          docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
-
      - name: Build vm image
        run: |
-          # note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
-          ./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          ./vm-builder -enable-file-cache -src=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

      - name: Pushing vm-compute-node image
        run: |
@@ -786,13 +868,11 @@ jobs:
    runs-on: [ self-hosted, gen3, small ]
    needs: [ tag, test-images, vm-compute-node-image ]
    container: golang:1.19-bullseye
-    if: github.event_name != 'workflow_dispatch'
+    # Don't add if-condition here.
+    # The job should always be run because we have dependant other jobs that shouldn't be skipped

    steps:
      - name: Install Crane & ECR helper
-        if: |
-          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
        run: |
          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
@@ -802,69 +882,38 @@ jobs:
          mkdir /github/home/.docker/
          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json

+      - name: Copy vm-compute-node images to Docker Hub
+        run: |
+          crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
+          crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
+
      - name: Add latest tag to images
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
+           github.event_name != 'workflow_dispatch'
        run: |
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
+          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
-
-      - name: Cleanup ECR folder
-        run: rm -rf ~/.ecr
-
-  push-docker-hub:
-    runs-on: [ self-hosted, dev, x64 ]
-    needs: [ promote-images, tag ]
-    container: golang:1.19-bullseye
-
-    steps:
-      - name: Install Crane & ECR helper
-        run: |
-          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
-          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
-
-      - name: Configure ECR login
-        run: |
-          mkdir /github/home/.docker/
-          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
-
-      - name: Pull neon image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
-
-      - name: Pull compute tools image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
-
-      - name: Pull compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
-
-      - name: Pull vm compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
-
-      - name: Pull compute node v15 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
-
-      - name: Pull vm compute node v15 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
-
-      - name: Pull rust image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
+          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} latest

      - name: Push images to production ECR
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
+           github.event_name != 'workflow_dispatch'
        run: |
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v14:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:latest
+          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/extensions-v15:latest

      - name: Configure Docker Hub login
        run: |
@@ -872,28 +921,12 @@ jobs:
          echo "" > /github/home/.docker/config.json
          crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io

-      - name: Push neon image to Docker Hub
-        run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
+      - name: Push vm-compute-node to Docker Hub
+        run: |
+          crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
+          crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}

-      - name: Push compute tools image to Docker Hub
-        run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node v14 image to Docker Hub
-        run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
-
-      - name: Push vm compute node v14 image to Docker Hub
-        run: crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node v15 image to Docker Hub
-        run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
-
-      - name: Push vm compute node v15 image to Docker Hub
-        run: crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}
-
-      - name: Push rust image to Docker Hub
-        run: crane push rust neondatabase/rust:pinned
-
-      - name: Add latest tag to images in Docker Hub
+      - name: Push latest tags to Docker Hub
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
          github.event_name != 'workflow_dispatch'
@@ -902,52 +935,95 @@ jobs:
          crane tag neondatabase/compute-tools:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} latest
+          crane tag neondatabase/extensions-v14:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
          crane tag neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest
+          crane tag neondatabase/extensions-v15:${{needs.tag.outputs.build-tag}} latest

      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

-  deploy-pr-test-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
-    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, tag, regress-tests ]
+  upload-postgres-extensions-to-s3:
    if: |
-      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
-      github.event_name != 'workflow_dispatch'
-    defaults:
-      run:
-        shell: bash
+      (github.ref_name == 'main' || github.ref_name == 'release') &&
+       github.event_name != 'workflow_dispatch'
+    runs-on: ${{ github.ref_name == 'release' && fromJSON('["self-hosted", "prod", "x64"]') || fromJSON('["self-hosted", "gen3", "small"]') }}
+    needs: [ tag, promote-images ]
    strategy:
+      fail-fast: false
      matrix:
-        target_region: [ eu-west-1 ]
+        version: [ v14, v15 ]
+
+    env:
+      # While on transition period we extract public extensions from compute-node image and custom extensions from extensions image.
+      # Later all the extensions will be moved to extensions image.
+      EXTENSIONS_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/extensions-${{ matrix.version }}:latest
+      COMPUTE_NODE_IMAGE: ${{ github.ref_name == 'release' && '093970136003' || '369495373322'}}.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:latest
+      AWS_ACCESS_KEY_ID: ${{ github.ref_name == 'release' && secrets.AWS_ACCESS_KEY_PROD || secrets.AWS_ACCESS_KEY_DEV }}
+      AWS_SECRET_ACCESS_KEY: ${{ github.ref_name == 'release' && secrets.AWS_SECRET_KEY_PROD || secrets.AWS_SECRET_KEY_DEV }}
+      S3_BUCKETS: |
+        ${{ github.ref_name == 'release' &&
+          'neon-prod-extensions-ap-southeast-1 neon-prod-extensions-eu-central-1 neon-prod-extensions-us-east-1 neon-prod-extensions-us-east-2 neon-prod-extensions-us-west-2' ||
+          'neon-dev-extensions-eu-central-1 neon-dev-extensions-eu-west-1 neon-dev-extensions-us-east-2' }}
+
    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-
-      - name: Redeploy
+      - name: Pull postgres-extensions image
        run: |
-          export DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          cd "$(pwd)/.github/ansible"
+          docker pull ${EXTENSIONS_IMAGE}
+          docker pull ${COMPUTE_NODE_IMAGE}

-          ./get_binaries.sh
+      - name: Create postgres-extensions container
+        id: create-container
+        run: |
+          EID=$(docker create ${EXTENSIONS_IMAGE} true)
+          echo "EID=${EID}" >> $GITHUB_OUTPUT

-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
+          CID=$(docker create ${COMPUTE_NODE_IMAGE} true)
+          echo "CID=${CID}" >> $GITHUB_OUTPUT

-      - name: Cleanup ansible folder
-        run: rm -rf ~/.ansible
+      - name: Extract postgres-extensions from container
+        run: |
+          rm -rf ./extensions-to-upload ./custom-extensions # Just in case
+
+          # In compute image we have a bit different directory layout
+          mkdir -p extensions-to-upload/share
+          docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/share/extension ./extensions-to-upload/share/extension
+          docker cp ${{ steps.create-container.outputs.CID }}:/usr/local/lib             ./extensions-to-upload/lib
+
+          # Delete Neon extensitons (they always present on compute-node image)
+          rm -rf ./extensions-to-upload/share/extension/neon*
+          rm -rf ./extensions-to-upload/lib/neon*
+
+          # Delete leftovers from the extension build step
+          rm -rf ./extensions-to-upload/lib/pgxs
+          rm -rf ./extensions-to-upload/lib/pkgconfig
+
+          docker cp ${{ steps.create-container.outputs.EID }}:/extensions ./custom-extensions
+          for EXT_NAME in $(ls ./custom-extensions); do
+            mkdir -p ./extensions-to-upload/${EXT_NAME}/share
+
+            mv ./custom-extensions/${EXT_NAME}/share/extension ./extensions-to-upload/${EXT_NAME}/share/extension
+            mv ./custom-extensions/${EXT_NAME}/lib             ./extensions-to-upload/${EXT_NAME}/lib
+          done
+
+      - name: Upload postgres-extensions to S3
+        # TODO: Reenable step after switching to the new extensions format (tar-gzipped + index.json)
+        if: false
+        run: |
+          for BUCKET in $(echo ${S3_BUCKETS}); do
+            aws s3 cp --recursive --only-show-errors ./extensions-to-upload s3://${BUCKET}/${{ needs.tag.outputs.build-tag }}/${{ matrix.version }}
+          done
+
+      - name: Cleanup
+        if: ${{ always() && (steps.create-container.outputs.CID || steps.create-container.outputs.EID) }}
+        run: |
+          docker rm ${{ steps.create-container.outputs.CID }} || true
+          docker rm ${{ steps.create-container.outputs.EID }} || true

  deploy:
    runs-on: [ self-hosted, gen3, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ upload-postgres-extensions-to-s3, promote-images, tag, regress-tests ]
    if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
    steps:
      - name: Fix git ownership
@@ -968,23 +1044,37 @@ jobs:

      - name: Trigger deploy workflow
        env:
-          GH_TOKEN: ${{ github.token }}
+          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          if [[ "$GITHUB_REF_NAME" == "main" ]]; then
-            gh workflow run deploy-dev.yml --ref main -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}}
+            gh workflow --repo neondatabase/aws run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}}
          elif [[ "$GITHUB_REF_NAME" == "release" ]]; then
-            gh workflow run deploy-prod.yml --ref release -f branch=${{ github.sha }} -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
+            gh workflow --repo neondatabase/aws run deploy-prod.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f disclamerAcknowledged=true
          else
            echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'"
            exit 1
          fi

+      - name: Create git tag
+        if: github.ref_name == 'release'
+        uses: actions/github-script@v6
+        with:
+          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
+          retries: 5
+          script: |
+            github.rest.git.createRef({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              ref: "refs/tags/${{ needs.tag.outputs.build-tag }}",
+              sha: context.sha,
+            })
+
  promote-compatibility-data:
    runs-on: [ self-hosted, gen3, small ]
    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ promote-images, tag, regress-tests ]
    if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
    steps:
      - name: Promote compatibility snapshot for the release
@@ -993,11 +1083,13 @@ jobs:
          PREFIX: artifacts/latest
        run: |
          # Update compatibility snapshot for the release
-          for build_type in debug release; do
-            OLD_FILENAME=compatibility-snapshot-${build_type}-pg14-${GITHUB_RUN_ID}.tar.zst
-            NEW_FILENAME=compatibility-snapshot-${build_type}-pg14.tar.zst
+          for pg_version in v14 v15; do
+            for build_type in debug release; do
+              OLD_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}-${GITHUB_RUN_ID}.tar.zst
+              NEW_FILENAME=compatibility-snapshot-${build_type}-pg${pg_version}.tar.zst

-            time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
+              time aws s3 mv --only-show-errors s3://${BUCKET}/${PREFIX}/${OLD_FILENAME} s3://${BUCKET}/${PREFIX}/${NEW_FILENAME}
+            done
          done

          # Update Neon artifact for the release (reuse already uploaded artifact)
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -1,179 +0,0 @@
-name: Neon Deploy dev
-
-on:
-  workflow_dispatch:
-    inputs:
-      dockerTag:
-        description: 'Docker tag to deploy'
-        required: true
-        type: string
-      branch:
-        description: 'Branch or commit used for deploy scripts and configs'
-        required: true
-        type: string
-        default: 'main'
-      deployStorage:
-        description: 'Deploy storage'
-        required: true
-        type: boolean
-        default: true
-      deployProxy:
-        description: 'Deploy proxy'
-        required: true
-        type: boolean
-        default: true
-      deployStorageBroker:
-        description: 'Deploy storage-broker'
-        required: true
-        type: boolean
-        default: true
-
-env:
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-
-concurrency:
-  group: deploy-dev
-  cancel-in-progress: false
-
-jobs:
-  deploy-storage-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-      options: --user root --privileged
-    if: inputs.deployStorage
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        target_region: [ eu-west-1, us-east-2 ]
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{ inputs.dockerTag }}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh
-
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
-
-      - name: Cleanup ansible folder
-        run: rm -rf ~/.ansible
-
-  deploy-proxy-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployProxy
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-            deploy_link_proxy: true
-            deploy_legacy_scram_proxy: true
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Re-deploy scram proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Re-deploy link proxy
-        if: matrix.deploy_link_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Re-deploy legacy scram proxy
-        if: matrix.deploy_legacy_scram_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-  
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
-  
-  deploy-storage-broker-new:
-    runs-on: [ self-hosted, gen3, small ]
-    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
-    if: inputs.deployStorageBroker
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: dev-us-east-2-beta
-          - target_region:  eu-west-1
-            target_cluster: dev-eu-west-1-zeta
-    environment:
-      name: dev-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-  
-      - name: Configure AWS Credentials
-        uses: aws-actions/configure-aws-credentials@v1-node16
-        with:
-          role-to-assume: arn:aws:iam::369495373322:role/github-runner
-          aws-region: eu-central-1
-          role-skip-session-tagging: true
-          role-duration-seconds: 1800
-  
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-  
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
-  
-      - name: Cleanup helm folder
-        run: rm -rf ~/.cache
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -1,167 +0,0 @@
-name: Neon Deploy prod
-
-on:
-  workflow_dispatch:
-    inputs:
-      dockerTag:
-        description: 'Docker tag to deploy'
-        required: true
-        type: string
-      branch:
-        description: 'Branch or commit used for deploy scripts and configs'
-        required: true
-        type: string
-        default: 'release'
-      deployStorage:
-        description: 'Deploy storage'
-        required: true
-        type: boolean
-        default: true
-      deployProxy:
-        description: 'Deploy proxy'
-        required: true
-        type: boolean
-        default: true
-      deployStorageBroker:
-        description: 'Deploy storage-broker'
-        required: true
-        type: boolean
-        default: true
-      disclamerAcknowledged:
-        description: 'I confirm that there is an emergency and I can not use regular release workflow'
-        required: true
-        type: boolean
-        default: false
-
-concurrency:
-  group: deploy-prod
-  cancel-in-progress: false
-
-jobs:
-  deploy-prod-new:
-    runs-on: prod
-    container:
-      image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-      options: --user root --privileged
-    if: inputs.deployStorage && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Redeploy
-        run: |
-          export DOCKER_TAG=${{ inputs.dockerTag }}
-          cd "$(pwd)/.github/ansible"
-
-          ./get_binaries.sh
-
-          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
-          rm -f neon_install.tar.gz .neon_current_version
-
-  deploy-proxy-prod-new:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    if: inputs.deployProxy && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-            deploy_link_proxy: true
-            deploy_legacy_scram_proxy: false
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: true
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-            deploy_link_proxy: false
-            deploy_legacy_scram_proxy: false
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Re-deploy scram proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-      - name: Re-deploy link proxy
-        if: matrix.deploy_link_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-      - name: Re-deploy legacy scram proxy
-        if: matrix.deploy_legacy_scram_proxy
-        run: |
-          DOCKER_TAG=${{ inputs.dockerTag }}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-
-  deploy-storage-broker-prod-new:
-    runs-on: prod
-    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
-    defaults:
-      run:
-        shell: bash
-    strategy:
-      matrix:
-        include:
-          - target_region:  us-east-2
-            target_cluster: prod-us-east-2-delta
-          - target_region:  us-west-2
-            target_cluster: prod-us-west-2-eta
-          - target_region: eu-central-1
-            target_cluster: prod-eu-central-1-gamma
-          - target_region: ap-southeast-1
-            target_cluster: prod-ap-southeast-1-epsilon
-    environment:
-      name: prod-${{ matrix.target_region }}
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-          fetch-depth: 0
-          ref: ${{ inputs.branch }}
-
-      - name: Configure environment
-        run: |
-          helm repo add neondatabase https://neondatabase.github.io/helm-charts
-          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
-
-      - name: Deploy storage-broker
-        run:
-          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -3,7 +3,8 @@ name: Check neon with extra platform builds
 on:
  push:
    branches:
-    - main
+      - main
+      - ci-run/pr-*
  pull_request:

 defaults:
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -3,6 +3,7 @@ name: Create Release Branch
 on:
  schedule:
    - cron: '0 10 * * 2'
+  workflow_dispatch:

 jobs:
  create_release_branch:
--- a/.neon_clippy_args
+++ b/.neon_clippy_args
@@ -0,0 +1,4 @@
+# * `-A unknown_lints` – do not warn about unknown lint suppressions
+#                        that people with newer toolchains might use
+# * `-D warnings`      - fail on any warnings (`cargo` returns non-zero exit status)
+export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,7 +2,7 @@

 Howdy! Usual good software engineering practices apply. Write
 tests. Write comments. Follow standard Rust coding practices where
-possible. Use 'cargo fmt' and 'clippy' to tidy up formatting.
+possible. Use `cargo fmt` and `cargo clippy` to tidy up formatting.

 There are soft spots in the code, which could use cleanup,
 refactoring, additional comments, and so forth. Let's try to raise the
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,12 +3,26 @@ members = [
    "compute_tools",
    "control_plane",
    "pageserver",
+    "pageserver/ctl",
    "proxy",
    "safekeeper",
    "storage_broker",
    "workspace_hack",
    "trace",
-    "libs/*",
+    "libs/compute_api",
+    "libs/pageserver_api",
+    "libs/postgres_ffi",
+    "libs/safekeeper_api",
+    "libs/utils",
+    "libs/consumption_metrics",
+    "libs/postgres_backend",
+    "libs/pq_proto",
+    "libs/tenant_size_model",
+    "libs/metrics",
+    "libs/postgres_connection",
+    "libs/remote_storage",
+    "libs/tracing-utils",
+    "libs/postgres_ffi/wal_craft",
 ]

 [workspace.package]
@@ -18,12 +32,14 @@ license = "Apache-2.0"
 ## All dependency versions, used in the project
 [workspace.dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
+async-compression = { version = "0.4.0", features = ["tokio", "gzip"] }
+flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-atty = "0.2.14"
-aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "0.21.0"
-aws-smithy-http = "0.51.0"
+aws-config = { version = "0.55", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "0.27"
+aws-smithy-http = "0.55"
+aws-credential-types = "0.55"
 aws-types = "0.55"
 base64 = "0.13.0"
 bincode = "1.3"
@@ -62,23 +78,26 @@ jsonwebtoken = "8"
 libc = "0.2"
 md5 = "0.7.0"
 memoffset = "0.8"
+native-tls = "0.2"
 nix = "0.26"
 notify = "5.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
-opentelemetry = "0.18.0"
-opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
-opentelemetry-semantic-conventions = "0.10.0"
+opentelemetry = "0.19.0"
+opentelemetry-otlp = { version = "0.12.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-semantic-conventions = "0.11.0"
 parking_lot = "0.12"
+pbkdf2 = "0.12.1"
 pin-project-lite = "0.2"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
 regex = "1.4"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
+reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_19"] }
 reqwest-middleware = "0.2.0"
+reqwest-retry = "0.2.2"
 routerify = "3"
 rpds = "0.13"
 rustls = "0.20"
@@ -105,13 +124,15 @@ tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.9.0"
 tokio-rustls = "0.23"
 tokio-stream = "0.1"
+tokio-tar = "0.3"
 tokio-util = { version = "0.7", features = ["io"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tracing = "0.1"
-tracing-opentelemetry = "0.18.0"
-tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+tracing-error = "0.2.0"
+tracing-opentelemetry = "0.19.0"
+tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter"] }
 url = "2.2"
 uuid = { version = "1.2", features = ["v4", "serde"] }
 walkdir = "2.3.2"
@@ -123,11 +144,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }

 ## Other git libraries
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
@@ -152,16 +173,17 @@ utils = { version = "0.1", path = "./libs/utils/" }
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }

 ## Build dependencies
-criterion = "0.4"
+criterion = "0.5.1"
 rcgen = "0.10"
 rstest = "0.17"
 tempfile = "3.4"
 tonic-build = "0.9"

+[patch.crates-io]
+
 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-[patch.crates-io]
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="1aaedab101b23f7612042850d8f2036810fa7c7f" }

 ################# Binary contents sections

--- a/15
+++ b/15
@@ -2,7 +2,7 @@
 ### The image itself is mainly used as a container for the binaries and for starting e2e tests with custom parameters.
 ### By default, the binaries inside the image have some mock parameters and can start, but are not intended to be used
 ### inside this image in the real deployments.
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned

@@ -44,7 +44,14 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
+    && mold -run cargo build  \
+      --bin pg_sni_router  \
+      --bin pageserver  \
+      --bin pagectl  \
+      --bin safekeeper  \
+      --bin storage_broker  \
+      --bin proxy  \
+      --locked --release \
    && cachepot -s

 # Build final image
@@ -63,9 +70,9 @@ RUN set -e \
    && useradd -d /data neon \
    && chown -R neon:neon /data

+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir   /usr/local/bin
+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pagectl             /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/safekeeper          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker         /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -1,7 +1,8 @@
 ARG PG_VERSION
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned
+ARG BUILD_TAG

 #########################################################################################
 #
@@ -67,7 +68,7 @@ RUN apt update && \
 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
-    cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
+    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /

@@ -95,7 +96,7 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && \
    cd build && \
-    cmake .. && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
@@ -131,10 +132,20 @@ RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.ta
 FROM build-deps AS h3-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-# packaged cmake is too old
-RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
+RUN case "$(uname -m)" in \
+      "x86_64") \
+        export CMAKE_CHECKSUM=739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 \
+        ;; \
+      "aarch64") \
+        export CMAKE_CHECKSUM=281b42627c9a1beed03e29706574d04c6c53fae4994472e90985ef018dd29c02 \
+        ;; \
+      *) \
+        echo "Unsupported architecture '$(uname -m)'. Supported are x86_64 and aarch64" && exit 1 \
+        ;; \
+    esac && \
+    wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-$(uname -m).sh \
      -q -O /tmp/cmake-install.sh \
-      && echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \
+      && echo "${CMAKE_CHECKSUM} /tmp/cmake-install.sh" | sha256sum --check \
      && chmod u+x /tmp/cmake-install.sh \
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh
@@ -188,8 +199,8 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
 FROM build-deps AS vector-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
-    echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.4.tar.gz -O pgvector.tar.gz && \
+    echo "1cb70a63f8928e396474796c22a20be9f7285a8a013009deb8152445b61b72e6 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -355,7 +366,7 @@ RUN apt-get update && \
    wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
    echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
-    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
+    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
    cd build && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
@@ -393,6 +404,166 @@ RUN case "${PG_VERSION}" in \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control

+#########################################################################################
+#
+# Layer "kq-imcx-pg-build"
+# compile kq_imcx extension
+#
+#########################################################################################
+FROM build-deps AS kq-imcx-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN apt-get update && \
+    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
+    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
+    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
+    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
+    mkdir build && \
+    cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control
+
+#########################################################################################
+#
+# Layer "pg-cron-pg-build"
+# compile pg_cron extension
+#
+#########################################################################################
+FROM build-deps AS pg-cron-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.5.2.tar.gz -O pg_cron.tar.gz && \
+    echo "6f7f0980c03f1e2a6a747060e67bf4a303ca2a50e941e2c19daeed2b44dec744 pg_cron.tar.gz" | sha256sum --check && \
+    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
+
+#########################################################################################
+#
+# Layer "rdkit-pg-build"
+# compile rdkit extension
+#
+#########################################################################################
+FROM build-deps AS rdkit-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN apt-get update && \
+    apt-get install -y \
+        cmake \
+        libboost-iostreams1.74-dev \
+        libboost-regex1.74-dev \
+        libboost-serialization1.74-dev \
+        libboost-system1.74-dev \
+        libeigen3-dev \
+        libfreetype6-dev
+
+ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
+RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_1.tar.gz -O rdkit.tar.gz && \
+    echo "db346afbd0ba52c843926a2a62f8a38c7b774ffab37eaf382d789a824f21996c rdkit.tar.gz" | sha256sum --check && \
+    mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
+    cmake \
+        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
+        -D RDK_BUILD_INCHI_SUPPORT=ON \
+        -D RDK_BUILD_AVALON_SUPPORT=ON \
+        -D RDK_BUILD_PYTHON_WRAPPERS=OFF \
+        -D RDK_BUILD_DESCRIPTORS3D=OFF \
+        -D RDK_BUILD_FREESASA_SUPPORT=OFF \
+        -D RDK_BUILD_COORDGEN_SUPPORT=ON \
+        -D RDK_BUILD_MOLINTERCHANGE_SUPPORT=OFF \
+        -D RDK_BUILD_YAEHMOP_SUPPORT=OFF \
+        -D RDK_BUILD_STRUCTCHECKER_SUPPORT=OFF \
+        -D RDK_USE_URF=OFF \
+        -D RDK_BUILD_PGSQL=ON \
+        -D RDK_PGSQL_STATIC=ON \
+        -D PostgreSQL_CONFIG=pg_config \
+        -D PostgreSQL_INCLUDE_DIR=`pg_config --includedir` \
+        -D PostgreSQL_TYPE_INCLUDE_DIR=`pg_config --includedir-server` \
+        -D PostgreSQL_LIBRARY_DIR=`pg_config --libdir` \
+        -D RDK_INSTALL_INTREE=OFF \
+        -D CMAKE_BUILD_TYPE=Release \
+        . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rdkit.control
+
+#########################################################################################
+#
+# Layer "pg-uuidv7-pg-build"
+# compile pg_uuidv7 extension
+#
+#########################################################################################
+FROM build-deps AS pg-uuidv7-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
+    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
+    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
+
+#########################################################################################
+#
+# Layer "pg-roaringbitmap-pg-build"
+# compile pg_roaringbitmap extension
+#
+#########################################################################################
+FROM build-deps AS pg-roaringbitmap-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
+    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
+    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
+
+#########################################################################################
+#
+# Layer "pg-embedding-pg-build"
+# compile pg_embedding extension
+#
+#########################################################################################
+FROM build-deps AS pg-embedding-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+# eeb3ba7c3a60c95b2604dd543c64b2f1bb4a3703 made on 15/07/2023
+# There is no release tag yet
+RUN wget https://github.com/neondatabase/pg_embedding/archive/eeb3ba7c3a60c95b2604dd543c64b2f1bb4a3703.tar.gz -O pg_embedding.tar.gz && \
+    echo "030846df723652f99a8689ce63b66fa0c23477a7fd723533ab8a6b28ab70730f pg_embedding.tar.gz" | sha256sum --check && \
+    mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/embedding.control
+
+#########################################################################################
+#
+# Layer "pg-anon-pg-build"
+# compile anon extension
+#
+#########################################################################################
+FROM build-deps AS pg-anon-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+# Kaniko doesn't allow to do `${from#/usr/local/pgsql/}`, so we use `${from:17}` instead
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://gitlab.com/dalibo/postgresql_anonymizer/-/archive/1.1.0/postgresql_anonymizer-1.1.0.tar.gz -O pg_anon.tar.gz && \
+    echo "08b09d2ff9b962f96c60db7e6f8e79cf7253eb8772516998fc35ece08633d3ad pg_anon.tar.gz" | sha256sum --check && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sort  > /before.txt && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
+    find /usr/local/pgsql -type f | sort  > /after.txt && \
+    /bin/bash -c 'for from in $(comm -13 /before.txt /after.txt); do to=/extensions/anon/${from:17} && mkdir -p $(dirname ${to}) && cp -a ${from} ${to}; done'
+
 #########################################################################################
 #
 # Layer "rust extensions"
@@ -478,6 +649,22 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405
    cargo pgx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control

+#########################################################################################
+#
+# Layer "pg-pgx-ulid-build"
+# Compile "pgx_ulid" extension
+#
+#########################################################################################
+
+FROM rust-extensions-build AS pg-pgx-ulid-build
+
+RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.0.tar.gz -O pgx_ulid.tar.gz && \
+    echo "908b7358e6f846e87db508ae5349fb56a88ee6305519074b12f3d5b0ff09f791 pgx_ulid.tar.gz" | sha256sum --check && \
+    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
+    sed -i 's/pgx        = "=0.7.3"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    cargo pgx install --release && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
+
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -485,6 +672,7 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405
 #
 #########################################################################################
 FROM build-deps AS neon-pg-ext-build
+# Public extensions
 COPY --from=postgis-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=postgis-build /sfcgal/* /
 COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -506,6 +694,13 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -515,6 +710,10 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
    make -j $(getconf _NPROCESSORS_ONLN) \
        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
        -C pgxn/neon_utils \
+        -s install && \
+    make -j $(getconf _NPROCESSORS_ONLN) \
+        PG_CONFIG=/usr/local/pgsql/bin/pg_config \
+        -C pgxn/hnsw \
        -s install

 #########################################################################################
@@ -523,6 +722,9 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) \
 #
 #########################################################################################
 FROM $REPOSITORY/$IMAGE:$TAG AS compute-tools
+ARG BUILD_TAG
+ENV BUILD_TAG=$BUILD_TAG
+
 USER nonroot
 # Copy entire project to get Cargo.* files with proper dependencies for the whole project
 COPY --chown=nonroot . .
@@ -547,6 +749,22 @@ RUN rm -r /usr/local/pgsql/include
 # if they were to be used by other libraries.
 RUN rm /usr/local/pgsql/lib/lib*.a

+#########################################################################################
+#
+# Extenstion only
+#
+#########################################################################################
+FROM scratch AS postgres-extensions
+# After the transition this layer will include all extensitons.
+# As for now, it's only for new custom ones
+#
+# # Default extensions
+# COPY --from=postgres-cleanup-layer /usr/local/pgsql/share/extension /usr/local/pgsql/share/extension
+# COPY --from=postgres-cleanup-layer /usr/local/pgsql/lib             /usr/local/pgsql/lib
+# Custom extensions
+COPY --from=pg-anon-pg-build /extensions/anon/lib/ /extensions/anon/lib
+COPY --from=pg-anon-pg-build /extensions/anon/share/extension /extensions/anon/share/extension
+
 #########################################################################################
 #
 # Final layer
@@ -575,14 +793,19 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
 # libxml2, libxslt1.1 for xml2
 # libzstd1 for zstd
+# libboost*, libfreetype6, and zlib1g for rdkit
 RUN apt update &&  \
    apt install --no-install-recommends -y \
        gdb \
-        locales \
        libicu67 \
        liblz4-1 \
        libreadline8 \
+        libboost-iostreams1.74.0 \
+        libboost-regex1.74.0 \
+        libboost-serialization1.74.0 \
+        libboost-system1.74.0 \
        libossp-uuid16 \
+        libfreetype6 \
        libgeos-c1v5 \
        libgdal28 \
        libproj19 \
@@ -591,7 +814,10 @@ RUN apt update &&  \
        libxml2 \
        libxslt1.1 \
        libzstd1 \
-        procps && \
+        libcurl4-openssl-dev \
+        locales \
+        procps \
+        zlib1g && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8

--- a/Dockerfile.compute-tools
+++ b/Dockerfile.compute-tools
@@ -1,8 +1,9 @@
 # First transient image to build compute_tools binaries
 # NB: keep in sync with rust image version in .github/workflows/build_and_test.yml
-ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
+ARG REPOSITORY=neondatabase
 ARG IMAGE=rust
 ARG TAG=pinned
+ARG BUILD_TAG

 FROM $REPOSITORY/$IMAGE:$TAG AS rust-build
 WORKDIR /home/nonroot
@@ -16,6 +17,8 @@ ENV CACHEPOT_S3_KEY_PREFIX=cachepot
 ARG CACHEPOT_BUCKET=neon-github-dev
 #ARG AWS_ACCESS_KEY_ID
 #ARG AWS_SECRET_ACCESS_KEY
+ARG BUILD_TAG
+ENV BUILD_TAG=$BUILD_TAG

 COPY . .

--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -1,70 +0,0 @@
-# Note: this file *mostly* just builds on Dockerfile.compute-node
-
-ARG SRC_IMAGE
-ARG VM_INFORMANT_VERSION=v0.1.14
-# on libcgroup update, make sure to check bootstrap.sh for changes
-ARG LIBCGROUP_VERSION=v2.0.3
-
-# Pull VM informant, to copy from later
-FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
-
-# Build cgroup-tools
-#
-# At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
-# libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-informant
-# requires cgroup v2, so we'll build cgroup-tools ourselves.
-FROM debian:bullseye-slim as libcgroup-builder
-ARG LIBCGROUP_VERSION
-
-RUN set -exu \
-	&& apt update \
-	&& apt install --no-install-recommends -y \
-		git \
-		ca-certificates \
-		automake \
-		cmake \
-		make \
-		gcc \
-		byacc \
-		flex \
-		libtool \
-		libpam0g-dev \
-	&& git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
-	&& INSTALL_DIR="/libcgroup-install" \
-	&& mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
-	&& cd libcgroup \
-	# extracted from bootstrap.sh, with modified flags:
-	&& (test -d m4 || mkdir m4) \
-	&& autoreconf -fi \
-	&& rm -rf autom4te.cache \
-	&& CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
-	# actually build the thing...
-	&& make install
-
-# Combine, starting from non-VM compute node image.
-FROM $SRC_IMAGE as base
-
-# Temporarily set user back to root so we can run adduser, set inittab
-USER root
-RUN adduser vm-informant --disabled-password --no-create-home
-
-RUN set -e \
-	&& rm -f /etc/inittab \
-	&& touch /etc/inittab
-
-RUN set -e \
-	&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
-	&& CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \
-	&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
-	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab
-
-USER postgres
-
-ADD vm-cgconfig.conf /etc/cgconfig.conf
-COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
-
-COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/
-COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/
-COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
-
-ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]
--- a/8
+++ b/8
@@ -138,6 +138,11 @@ neon-pg-ext-%: postgres-%
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile install
+	+@echo "Compiling hnsw $*"
+	mkdir -p $(POSTGRES_INSTALL_DIR)/build/hnsw-$*
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
+		-C $(POSTGRES_INSTALL_DIR)/build/hnsw-$* \
+		-f $(ROOT_PROJECT_DIR)/pgxn/hnsw/Makefile install

 .PHONY: neon-pg-ext-clean-%
 neon-pg-ext-clean-%:
@@ -153,6 +158,9 @@ neon-pg-ext-clean-%:
 	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
 	-C $(POSTGRES_INSTALL_DIR)/build/neon-utils-$* \
 	-f $(ROOT_PROJECT_DIR)/pgxn/neon_utils/Makefile clean
+	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
+	-C $(POSTGRES_INSTALL_DIR)/build/hnsw-$* \
+	-f $(ROOT_PROJECT_DIR)/pgxn/hnsw/Makefile clean

 .PHONY: neon-pg-ext
 neon-pg-ext: \
--- a/README.md
+++ b/README.md
@@ -1,3 +1,5 @@
+[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
+
 # Neon

 Neon is a serverless open-source alternative to AWS Aurora Postgres. It separates storage and compute and substitutes the PostgreSQL storage layer by redistributing data across a cluster of nodes.
@@ -15,7 +17,7 @@ The Neon storage engine consists of two major components:
 - Pageserver. Scalable storage backend for the compute nodes.
 - Safekeepers. The safekeepers form a redundant WAL service that received WAL from the compute node, and stores it durably until it has been processed by the pageserver and uploaded to cloud storage.

-See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more information.
+See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more information.

 ## Running local installation

@@ -26,18 +28,19 @@ See developer documentation in [/docs/SUMMARY.md](/docs/SUMMARY.md) for more inf
 * On Ubuntu or Debian, this set of packages should be sufficient to build the code:
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
-libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
+libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
+libcurl4-openssl-dev
 ```
 * On Fedora, these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
-  protobuf-devel
+  protobuf-devel libcurl-devel
 ```
 * On Arch based systems, these packages are needed:
 ```bash
 pacman -S base-devel readline zlib libseccomp openssl clang \
-postgresql-libs cmake postgresql protobuf
+postgresql-libs cmake postgresql protobuf curl
 ```

 Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
@@ -128,32 +131,31 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 ```sh
 # Create repository in .neon with proper paths to binaries and data
 # Later that would be responsibility of a package install script
-> ./target/debug/neon_local init
-Starting pageserver at '127.0.0.1:64000' in '.neon'.
+> cargo neon init
+Initializing pageserver node 1 at '127.0.0.1:64000' in ".neon"

 # start pageserver, safekeeper, and broker for their intercommunication
-> ./target/debug/neon_local start
-Starting neon broker at 127.0.0.1:50051
+> cargo neon start
+Starting neon broker at 127.0.0.1:50051.
 storage_broker started, pid: 2918372
-Starting pageserver at '127.0.0.1:64000' in '.neon'.
+Starting pageserver node 1 at '127.0.0.1:64000' in ".neon".
 pageserver started, pid: 2918386
 Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
 safekeeper 1 started, pid: 2918437

 # create initial tenant and use it as a default for every future neon_local invocation
-> ./target/debug/neon_local tenant create --set-default
+> cargo neon tenant create --set-default
 tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
 Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one

 # start postgres compute node
-> ./target/debug/neon_local endpoint start main
+> cargo neon endpoint start main
 Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
-Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
-Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
+Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55432/postgres'

 # check list of running postgres instances
-> ./target/debug/neon_local endpoint list
+> cargo neon endpoint list
 ENDPOINT  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
 main      127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```
@@ -175,29 +177,28 @@ postgres=# select * from t;
 3. And create branches and run postgres on them:
 ```sh
 # create branch named migration_check
-> ./target/debug/neon_local timeline branch --branch-name migration_check
+> cargo neon timeline branch --branch-name migration_check
 Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c. Ancestor timeline: 'main'

 # check branches tree
-> ./target/debug/neon_local timeline list
+> cargo neon timeline list
 (L) main [de200bd42b49cc1814412c7e592dd6e9]
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

 # start postgres on that branch
-> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
+> cargo neon endpoint start migration_check --branch-name migration_check
 Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
-Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
-Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
+Starting postgres at 'postgresql://cloud_admin@127.0.0.1:55434/postgres'

 # check the new list of running postgres instances
-> ./target/debug/neon_local endpoint list
+> cargo neon endpoint list
 ENDPOINT         ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
 main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
- migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running
+ migration_check  127.0.0.1:55434  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running

 # this new postgres instance will have all the data from 'main' postgres,
 # but all modifications would not affect data in original postgres
-> psql -p55433 -h 127.0.0.1 -U cloud_admin postgres
+> psql -p55434 -h 127.0.0.1 -U cloud_admin postgres
 postgres=# select * from t;
 key | value
 -----+-------
@@ -219,7 +220,7 @@ postgres=# select * from t;
 4. If you want to run tests afterward (see below), you must stop all the running of the pageserver, safekeeper, and postgres instances
   you have just started. You can terminate them all with one command:
 ```sh
-> ./target/debug/neon_local stop
+> cargo neon stop
 ```

 ## Running tests
@@ -236,9 +237,9 @@ CARGO_BUILD_FLAGS="--features=testing" make

 ## Documentation

-[/docs/](/docs/) Contains a top-level overview of all available markdown documentation.
+[docs](/docs) Contains a top-level overview of all available markdown documentation.

- [/docs/sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.
+- [sourcetree.md](/docs/sourcetree.md) contains overview of source tree layout.

 To view your `rustdoc` documentation in a browser, try running `cargo doc --no-deps --open`

@@ -263,6 +264,6 @@ To get more familiar with this aspect, refer to:

 ## Join the development

- Read `CONTRIBUTING.md` to learn about project code style and practices.
- To get familiar with a source tree layout, use [/docs/sourcetree.md](/docs/sourcetree.md).
+- Read [CONTRIBUTING.md](/CONTRIBUTING.md) to learn about project code style and practices.
+- To get familiar with a source tree layout, use [sourcetree.md](/docs/sourcetree.md).
 - To learn more about PostgreSQL internals, check http://www.interdb.jp/pg/index.html
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -6,8 +6,10 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
+async-compression.workspace = true
 chrono.workspace = true
 clap.workspace = true
+flate2.workspace = true
 futures.workspace = true
 hyper = { workspace = true, features = ["full"] }
 notify.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -30,6 +30,7 @@
 //!             -b /usr/local/bin/postgres
 //! ```
 //!
+use std::collections::HashMap;
 use std::fs::File;
 use std::panic;
 use std::path::Path;
@@ -53,11 +54,20 @@ use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;

+const BUILD_TAG_DEFAULT: &str = "local";
+
 fn main() -> Result<()> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

+    let build_tag = option_env!("BUILD_TAG").unwrap_or(BUILD_TAG_DEFAULT);
+
+    info!("build_tag: {build_tag}");
+
    let matches = cli().get_matches();

+    let http_port = *matches
+        .get_one::<u16>("http-port")
+        .expect("http-port is required");
    let pgdata = matches
        .get_one::<String>("pgdata")
        .expect("PGDATA path is required");
@@ -67,13 +77,61 @@ fn main() -> Result<()> {
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

+    // Extract OpenTelemetry context for the startup actions from the
+    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
+    // tracing context.
+    //
+    // This is used to propagate the context for the 'start_compute' operation
+    // from the neon control plane. This allows linking together the wider
+    // 'start_compute' operation that creates the compute container, with the
+    // startup actions here within the container.
+    //
+    // There is no standard for passing context in env variables, but a lot of
+    // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See
+    // https://github.com/open-telemetry/opentelemetry-specification/issues/740
+    //
+    // Switch to the startup context here, and exit it once the startup has
+    // completed and Postgres is up and running.
+    //
+    // If this pod is pre-created without binding it to any particular endpoint
+    // yet, this isn't the right place to enter the startup context. In that
+    // case, the control plane should pass the tracing context as part of the
+    // /configure API call.
+    //
+    // NOTE: This is supposed to only cover the *startup* actions. Once
+    // postgres is configured and up-and-running, we exit this span. Any other
+    // actions that are performed on incoming HTTP requests, for example, are
+    // performed in separate spans.
+    //
+    // XXX: If the pod is restarted, we perform the startup actions in the same
+    // context as the original startup actions, which probably doesn't make
+    // sense.
+    let mut startup_tracing_carrier: HashMap<String, String> = HashMap::new();
+    if let Ok(val) = std::env::var("TRACEPARENT") {
+        startup_tracing_carrier.insert("traceparent".to_string(), val);
+    }
+    if let Ok(val) = std::env::var("TRACESTATE") {
+        startup_tracing_carrier.insert("tracestate".to_string(), val);
+    }
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
+        use opentelemetry::propagation::TextMapPropagator;
+        use opentelemetry::sdk::propagation::TraceContextPropagator;
+        let guard = TraceContextPropagator::new()
+            .extract(&startup_tracing_carrier)
+            .attach();
+        info!("startup tracing context attached");
+        Some(guard)
+    } else {
+        None
+    };
+
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

-    let mut spec = None;
+    let spec;
    let mut live_config_allowed = false;
    match spec_json {
        // First, try to get cluster spec from the cli argument
@@ -89,9 +147,13 @@ fn main() -> Result<()> {
            } else if let Some(id) = compute_id {
                if let Some(cp_base) = control_plane_uri {
                    live_config_allowed = true;
-                    if let Ok(s) = get_spec_from_control_plane(cp_base, id) {
-                        spec = Some(s);
-                    }
+                    spec = match get_spec_from_control_plane(cp_base, id) {
+                        Ok(s) => s,
+                        Err(e) => {
+                            error!("cannot get response from control plane: {}", e);
+                            panic!("neither spec nor confirmation that compute is in the Empty state was received");
+                        }
+                    };
                } else {
                    panic!("must specify both --control-plane-uri and --compute-id or none");
                }
@@ -114,7 +176,6 @@ fn main() -> Result<()> {
        spec_set = false;
    }
    let compute_node = ComputeNode {
-        start_time: Utc::now(),
        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
        pgdata: pgdata.to_string(),
        pgbin: pgbin.to_string(),
@@ -126,7 +187,8 @@ fn main() -> Result<()> {

    // Launch http service first, so we were able to serve control-plane
    // requests, while configuration is still in progress.
-    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
+    let _http_handle =
+        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");

    if !spec_set {
        // No spec provided, hang waiting for it.
@@ -145,39 +207,24 @@ fn main() -> Result<()> {

    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
-    let pspec = state.pspec.as_ref().expect("spec must be set");
-    let startup_tracing_context = pspec.spec.startup_tracing_context.clone();
+
+    // Record for how long we slept waiting for the spec.
+    state.metrics.wait_for_spec_ms = Utc::now()
+        .signed_duration_since(state.start_time)
+        .to_std()
+        .unwrap()
+        .as_millis() as u64;
+    // Reset start time to the actual start of the configuration, so that
+    // total startup time was properly measured at the end.
+    state.start_time = Utc::now();
+
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();
    drop(state);

-    // Extract OpenTelemetry context for the startup actions from the spec, and
-    // attach it to the current tracing context.
-    //
-    // This is used to propagate the context for the 'start_compute' operation
-    // from the neon control plane. This allows linking together the wider
-    // 'start_compute' operation that creates the compute container, with the
-    // startup actions here within the container.
-    //
-    // Switch to the startup context here, and exit it once the startup has
-    // completed and Postgres is up and running.
-    //
-    // NOTE: This is supposed to only cover the *startup* actions. Once
-    // postgres is configured and up-and-running, we exit this span. Any other
-    // actions that are performed on incoming HTTP requests, for example, are
-    // performed in separate spans.
-    let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
-        use opentelemetry::propagation::TextMapPropagator;
-        use opentelemetry::sdk::propagation::TraceContextPropagator;
-        Some(TraceContextPropagator::new().extract(carrier).attach())
-    } else {
-        None
-    };
-
    // Launch remaining service threads
-    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
-    let _configurator_handle =
-        launch_configurator(&compute).expect("cannot launch configurator thread");
+    let _monitor_handle = launch_monitor(&compute);
+    let _configurator_handle = launch_configurator(&compute);

    // Start Postgres
    let mut delay_exit = false;
@@ -208,6 +255,16 @@ fn main() -> Result<()> {
        exit_code = ecode.code()
    }

+    // Maybe sync safekeepers again, to speed up next startup
+    let compute_state = compute.state.lock().unwrap().clone();
+    let pspec = compute_state.pspec.as_ref().expect("spec must be set");
+    if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) {
+        info!("syncing safekeepers on shutdown");
+        let storage_auth_token = pspec.storage_auth_token.clone();
+        let lsn = compute.sync_safekeepers(storage_auth_token)?;
+        info!("synced safekeepers at lsn {lsn}");
+    }
+
    if let Err(err) = compute.check_for_core_dumps() {
        error!("error while checking for core dumps: {err:?}");
    }
@@ -248,6 +305,14 @@ fn cli() -> clap::Command {
    let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
    clap::Command::new("compute_ctl")
        .version(version)
+        .arg(
+            Arg::new("http-port")
+                .long("http-port")
+                .value_name("HTTP_PORT")
+                .default_value("3080")
+                .value_parser(clap::value_parser!(u16))
+                .required(false),
+        )
        .arg(
            Arg::new("connstr")
                .short('C')
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -1,20 +1,5 @@
-//
-// XXX: This starts to be scarry similar to the `PostgresNode` from `control_plane`,
-// but there are several things that makes `PostgresNode` usage inconvenient in the
-// cloud:
-// - it inherits from `LocalEnv`, which contains **all-all** the information about
-//   a complete service running
-// - it uses `PageServerNode` with information about http endpoint, which we do not
-//   need in the cloud again
-// - many tiny pieces like, for example, we do not use `pg_ctl` in the cloud
-//
-// Thus, to use `PostgresNode` in the cloud, we need to 'mock' a bunch of required
-// attributes (not required for the cloud). Yet, it is still tempting to unify these
-// `PostgresNode` and `ComputeNode` and use one in both places.
-//
-// TODO: stabilize `ComputeNode` and think about using it in the `control_plane`.
-//
 use std::fs;
+use std::io::BufRead;
 use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::{Command, Stdio};
@@ -30,7 +15,8 @@ use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::ComputeSpec;
+use compute_api::spec::{ComputeMode, ComputeSpec};
+use utils::measured_stream::MeasuredReader;

 use crate::config;
 use crate::pg_helpers::*;
@@ -38,7 +24,6 @@ use crate::spec::*;

 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
-    pub start_time: DateTime<Utc>,
    // Url type maintains proper escaping
    pub connstr: url::Url,
    pub pgdata: String,
@@ -66,9 +51,11 @@ pub struct ComputeNode {

 #[derive(Clone, Debug)]
 pub struct ComputeState {
+    pub start_time: DateTime<Utc>,
    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    pub last_active: DateTime<Utc>,
+    /// Timestamp of the last Postgres activity. It could be `None` if
+    /// compute wasn't used since start.
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
    pub pspec: Option<ParsedSpec>,
    pub metrics: ComputeMetrics,
@@ -77,8 +64,9 @@ pub struct ComputeState {
 impl ComputeState {
    pub fn new() -> Self {
        Self {
+            start_time: Utc::now(),
            status: ComputeStatus::Empty,
-            last_active: Utc::now(),
+            last_active: None,
            error: None,
            pspec: None,
            metrics: ComputeMetrics::default(),
@@ -104,26 +92,38 @@ pub struct ParsedSpec {
 impl TryFrom<ComputeSpec> for ParsedSpec {
    type Error = String;
    fn try_from(spec: ComputeSpec) -> Result<Self, String> {
+        // Extract the options from the spec file that are needed to connect to
+        // the storage system.
+        //
+        // For backwards-compatibility, the top-level fields in the spec file
+        // may be empty. In that case, we need to dig them from the GUCs in the
+        // cluster.settings field.
        let pageserver_connstr = spec
-            .cluster
-            .settings
-            .find("neon.pageserver_connstring")
+            .pageserver_connstring
+            .clone()
+            .or_else(|| spec.cluster.settings.find("neon.pageserver_connstring"))
            .ok_or("pageserver connstr should be provided")?;
        let storage_auth_token = spec.storage_auth_token.clone();
-        let tenant_id: TenantId = spec
-            .cluster
-            .settings
-            .find("neon.tenant_id")
-            .ok_or("tenant id should be provided")
-            .map(|s| TenantId::from_str(&s))?
-            .or(Err("invalid tenant id"))?;
-        let timeline_id: TimelineId = spec
-            .cluster
-            .settings
-            .find("neon.timeline_id")
-            .ok_or("timeline id should be provided")
-            .map(|s| TimelineId::from_str(&s))?
-            .or(Err("invalid timeline id"))?;
+        let tenant_id: TenantId = if let Some(tenant_id) = spec.tenant_id {
+            tenant_id
+        } else {
+            spec.cluster
+                .settings
+                .find("neon.tenant_id")
+                .ok_or("tenant id should be provided")
+                .map(|s| TenantId::from_str(&s))?
+                .or(Err("invalid tenant id"))?
+        };
+        let timeline_id: TimelineId = if let Some(timeline_id) = spec.timeline_id {
+            timeline_id
+        } else {
+            spec.cluster
+                .settings
+                .find("neon.timeline_id")
+                .ok_or("timeline id should be provided")
+                .map(|s| TimelineId::from_str(&s))?
+                .or(Err("invalid timeline id"))?
+        };

        Ok(ParsedSpec {
            spec,
@@ -135,6 +135,84 @@ impl TryFrom<ComputeSpec> for ParsedSpec {
    }
 }

+/// Create special neon_superuser role, that's a slightly nerfed version of a real superuser
+/// that we give to customers
+fn create_neon_superuser(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+    let roles = spec
+        .cluster
+        .roles
+        .iter()
+        .map(|r| escape_literal(&r.name))
+        .collect::<Vec<_>>();
+
+    let dbs = spec
+        .cluster
+        .databases
+        .iter()
+        .map(|db| escape_literal(&db.name))
+        .collect::<Vec<_>>();
+
+    let roles_decl = if roles.is_empty() {
+        String::from("roles text[] := NULL;")
+    } else {
+        format!(
+            r#"
+               roles text[] := ARRAY(SELECT rolname
+                                     FROM pg_catalog.pg_roles
+                                     WHERE rolname IN ({}));"#,
+            roles.join(", ")
+        )
+    };
+
+    let database_decl = if dbs.is_empty() {
+        String::from("dbs text[] := NULL;")
+    } else {
+        format!(
+            r#"
+               dbs text[] := ARRAY(SELECT datname
+                                   FROM pg_catalog.pg_database
+                                   WHERE datname IN ({}));"#,
+            dbs.join(", ")
+        )
+    };
+
+    // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases
+    // (see https://www.postgresql.org/docs/current/ddl-priv.html)
+    let query = format!(
+        r#"
+            DO $$
+                DECLARE
+                    r text;
+                    {}
+                    {}
+                BEGIN
+                    IF NOT EXISTS (
+                        SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser')
+                    THEN
+                        CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN IN ROLE pg_read_all_data, pg_write_all_data;
+                        IF array_length(roles, 1) IS NOT NULL THEN
+                            EXECUTE format('GRANT neon_superuser TO %s',
+                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', '));
+                            FOREACH r IN ARRAY roles LOOP
+                                EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r));
+                            END LOOP;
+                        END IF;
+                        IF array_length(dbs, 1) IS NOT NULL THEN
+                            EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser',
+                                           array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', '));
+                        END IF;
+                    END IF;
+                END
+            $$;"#,
+        roles_decl, database_decl,
+    );
+    info!("Neon superuser created:\n{}", &query);
+    client
+        .simple_query(&query)
+        .map_err(|e| anyhow::anyhow!(e).context(query))?;
+    Ok(())
+}
+
 impl ComputeNode {
    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
@@ -159,7 +237,7 @@ impl ComputeNode {

    // Get basebackup from the libpq connection to pageserver using `connstr` and
    // unarchive it to `pgdata` directory overriding all its previous content.
-    #[instrument(skip(self, compute_state))]
+    #[instrument(skip_all, fields(%lsn))]
    fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        let start_time = Utc::now();
@@ -177,20 +255,52 @@ impl ComputeNode {

        let mut client = config.connect(NoTls)?;
        let basebackup_cmd = match lsn {
-            Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute
-            _ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
+            // HACK We don't use compression on first start (Lsn(0)) because there's no API for it
+            Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id),
+            _ => format!(
+                "basebackup {} {} {} --gzip",
+                spec.tenant_id, spec.timeline_id, lsn
+            ),
        };
+
        let copyreader = client.copy_out(basebackup_cmd.as_str())?;
+        let mut measured_reader = MeasuredReader::new(copyreader);
+
+        // Check the magic number to see if it's a gzip or not. Even though
+        // we might explicitly ask for gzip, an old pageserver with no implementation
+        // of gzip compression might send us uncompressed data. After some time
+        // passes we can assume all pageservers know how to compress and we can
+        // delete this check.
+        //
+        // If the data is not gzip, it will be tar. It will not be mistakenly
+        // recognized as gzip because tar starts with an ascii encoding of a filename,
+        // and 0x1f and 0x8b are unlikely first characters for any filename. Moreover,
+        // we send the "global" directory first from the pageserver, so it definitely
+        // won't be recognized as gzip.
+        let mut bufreader = std::io::BufReader::new(&mut measured_reader);
+        let gzip = {
+            let peek = bufreader.fill_buf().unwrap();
+            peek[0] == 0x1f && peek[1] == 0x8b
+        };

        // Read the archive directly from the `CopyOutReader`
        //
        // Set `ignore_zeros` so that unpack() reads all the Copy data and
        // doesn't stop at the end-of-archive marker. Otherwise, if the server
        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.pgdata)?;
+        if gzip {
+            let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader));
+            ar.set_ignore_zeros(true);
+            ar.unpack(&self.pgdata)?;
+        } else {
+            let mut ar = tar::Archive::new(&mut bufreader);
+            ar.set_ignore_zeros(true);
+            ar.unpack(&self.pgdata)?;
+        };

+        // Report metrics
+        self.state.lock().unwrap().metrics.basebackup_bytes =
+            measured_reader.get_byte_count() as u64;
        self.state.lock().unwrap().metrics.basebackup_ms = Utc::now()
            .signed_duration_since(start_time)
            .to_std()
@@ -201,8 +311,8 @@ impl ComputeNode {

    // Run `postgres` in a special mode with `--sync-safekeepers` argument
    // and return the reported LSN back to the caller.
-    #[instrument(skip(self, storage_auth_token))]
-    fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
+    #[instrument(skip_all)]
+    pub fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
        let start_time = Utc::now();

        let sync_handle = Command::new(&self.pgbin)
@@ -246,20 +356,37 @@ impl ComputeNode {

    /// Do all the preparations like PGDATA directory creation, configuration,
    /// safekeepers sync, basebackup, etc.
-    #[instrument(skip(self, compute_state))]
+    #[instrument(skip_all)]
    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
+        let spec = &pspec.spec;
        let pgdata_path = Path::new(&self.pgdata);

        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;

-        info!("starting safekeepers syncing");
-        let lsn = self
-            .sync_safekeepers(pspec.storage_auth_token.clone())
-            .with_context(|| "failed to sync safekeepers")?;
-        info!("safekeepers synced at LSN {}", lsn);
+        // Syncing safekeepers is only safe with primary nodes: if a primary
+        // is already connected it will be kicked out, so a secondary (standby)
+        // cannot sync safekeepers.
+        let lsn = match spec.mode {
+            ComputeMode::Primary => {
+                info!("starting safekeepers syncing");
+                let lsn = self
+                    .sync_safekeepers(pspec.storage_auth_token.clone())
+                    .with_context(|| "failed to sync safekeepers")?;
+                info!("safekeepers synced at LSN {}", lsn);
+                lsn
+            }
+            ComputeMode::Static(lsn) => {
+                info!("Starting read-only node at static LSN {}", lsn);
+                lsn
+            }
+            ComputeMode::Replica => {
+                info!("Initializing standby from latest Pageserver LSN");
+                Lsn(0)
+            }
+        };

        info!(
            "getting basebackup@{} from pageserver {}",
@@ -275,12 +402,19 @@ impl ComputeNode {
        // Update pg_hba.conf received with basebackup.
        update_pg_hba(pgdata_path)?;

+        match spec.mode {
+            ComputeMode::Primary => {}
+            ComputeMode::Replica | ComputeMode::Static(..) => {
+                add_standby_signal(pgdata_path)?;
+            }
+        }
+
        Ok(())
    }

    /// Start Postgres as a child process and manage DBs/roles.
    /// After that this will hang waiting on the postmaster process to exit.
-    #[instrument(skip(self))]
+    #[instrument(skip_all)]
    pub fn start_postgres(
        &self,
        storage_auth_token: Option<String>,
@@ -304,7 +438,7 @@ impl ComputeNode {
    }

    /// Do initial configuration of the already started Postgres.
-    #[instrument(skip(self, compute_state))]
+    #[instrument(skip_all)]
    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
        // If connection fails,
        // it may be the old node with `zenith_admin` superuser.
@@ -325,6 +459,8 @@ impl ComputeNode {
                    .map_err(|_| anyhow::anyhow!("invalid connstr"))?;

                let mut client = Client::connect(zenith_admin_connstr.as_str(), NoTls)?;
+                // Disable forwarding so that users don't get a cloud_admin role
+                client.simple_query("SET neon.forward_ddl = false")?;
                client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                drop(client);
@@ -335,29 +471,28 @@ impl ComputeNode {
            Ok(client) => client,
        };

+        // Disable DDL forwarding because control plane already knows about these roles/databases.
+        client.simple_query("SET neon.forward_ddl = false")?;
+
        // Proceed with post-startup configuration. Note, that order of operations is important.
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
+        create_neon_superuser(spec, &mut client)?;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(spec, self.connstr.as_str(), &mut client)?;
+        handle_grants(spec, self.connstr.as_str())?;
        handle_extensions(spec, &mut client)?;

        // 'Close' connection
        drop(client);

-        info!(
-            "finished configuration of compute for project {}",
-            spec.cluster.cluster_id
-        );
-
        Ok(())
    }

    // We could've wrapped this around `pg_ctl reload`, but right now we don't use
    // `pg_ctl` for start / stop, so this just seems much easier to do as we already
    // have opened connection to Postgres and superuser access.
-    #[instrument(skip(self, client))]
+    #[instrument(skip_all)]
    fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
        client.simple_query("SELECT pg_reload_conf()")?;
        Ok(())
@@ -365,7 +500,7 @@ impl ComputeNode {

    /// Similar to `apply_config()`, but does a bit different sequence of operations,
    /// as it's used to reconfigure a previously started and configured Postgres node.
-    #[instrument(skip(self))]
+    #[instrument(skip_all)]
    pub fn reconfigure(&self) -> Result<()> {
        let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;

@@ -377,11 +512,15 @@ impl ComputeNode {
        self.pg_reload_conf(&mut client)?;

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        handle_roles(&spec, &mut client)?;
-        handle_databases(&spec, &mut client)?;
-        handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(&spec, self.connstr.as_str(), &mut client)?;
-        handle_extensions(&spec, &mut client)?;
+        // Disable DDL forwarding because control plane already knows about these roles/databases.
+        if spec.mode == ComputeMode::Primary {
+            client.simple_query("SET neon.forward_ddl = false")?;
+            handle_roles(&spec, &mut client)?;
+            handle_databases(&spec, &mut client)?;
+            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+            handle_grants(&spec, self.connstr.as_str())?;
+            handle_extensions(&spec, &mut client)?;
+        }

        // 'Close' connection
        drop(client);
@@ -396,42 +535,61 @@ impl ComputeNode {
        Ok(())
    }

-    #[instrument(skip(self))]
+    #[instrument(skip_all)]
    pub fn start_compute(&self) -> Result<std::process::Child> {
        let compute_state = self.state.lock().unwrap().clone();
-        let spec = compute_state.pspec.as_ref().expect("spec must be set");
+        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
        info!(
            "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            spec.spec.cluster.cluster_id,
-            spec.spec.operation_uuid.as_deref().unwrap_or("None"),
-            spec.tenant_id,
-            spec.timeline_id,
+            pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None"),
+            pspec.spec.operation_uuid.as_deref().unwrap_or("None"),
+            pspec.tenant_id,
+            pspec.timeline_id,
        );

        self.prepare_pgdata(&compute_state)?;

        let start_time = Utc::now();
+        let pg = self.start_postgres(pspec.storage_auth_token.clone())?;

-        let pg = self.start_postgres(spec.storage_auth_token.clone())?;
-
-        self.apply_config(&compute_state)?;
+        let config_time = Utc::now();
+        if pspec.spec.mode == ComputeMode::Primary && !pspec.spec.skip_pg_catalog_updates {
+            self.apply_config(&compute_state)?;
+        }

        let startup_end_time = Utc::now();
        {
            let mut state = self.state.lock().unwrap();
-            state.metrics.config_ms = startup_end_time
+            state.metrics.start_postgres_ms = config_time
                .signed_duration_since(start_time)
                .to_std()
                .unwrap()
                .as_millis() as u64;
+            state.metrics.config_ms = startup_end_time
+                .signed_duration_since(config_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64;
            state.metrics.total_startup_ms = startup_end_time
-                .signed_duration_since(self.start_time)
+                .signed_duration_since(compute_state.start_time)
                .to_std()
                .unwrap()
                .as_millis() as u64;
        }
        self.set_status(ComputeStatus::Running);

+        info!(
+            "finished configuration of compute for project {}",
+            pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None")
+        );
+
+        // Log metrics so that we can search for slow operations in logs
+        let metrics = {
+            let state = self.state.lock().unwrap();
+            state.metrics.clone()
+        };
+        info!(?metrics, "compute start finished");
+
        Ok(pg)
    }

--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -5,8 +5,9 @@ use std::path::Path;

 use anyhow::Result;

+use crate::pg_helpers::escape_conf_value;
 use crate::pg_helpers::PgOptionsSerialize;
-use compute_api::spec::ComputeSpec;
+use compute_api::spec::{ComputeMode, ComputeSpec};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -34,18 +35,57 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 /// Create or completely rewrite configuration file specified by `path`
 pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
-    let mut postgres_conf = File::create(path)?;
+    let mut file = File::create(path)?;

-    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
-
-    Ok(())
-}
-
-// Write Postgres config block wrapped with generated comment section
-fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
-    writeln!(file, "# Managed by compute_ctl: begin")?;
-    writeln!(file, "{}", buf)?;
-    writeln!(file, "# Managed by compute_ctl: end")?;
+    // Write the postgresql.conf content from the spec file as is.
+    if let Some(conf) = &spec.cluster.postgresql_conf {
+        writeln!(file, "{}", conf)?;
+    }
+
+    write!(file, "{}", &spec.cluster.settings.as_pg_settings())?;
+
+    // Add options for connecting to storage
+    writeln!(file, "# Neon storage settings")?;
+    if let Some(s) = &spec.pageserver_connstring {
+        writeln!(file, "neon.pageserver_connstring={}", escape_conf_value(s))?;
+    }
+    if !spec.safekeeper_connstrings.is_empty() {
+        writeln!(
+            file,
+            "neon.safekeepers={}",
+            escape_conf_value(&spec.safekeeper_connstrings.join(","))
+        )?;
+    }
+    if let Some(s) = &spec.tenant_id {
+        writeln!(file, "neon.tenant_id={}", escape_conf_value(&s.to_string()))?;
+    }
+    if let Some(s) = &spec.timeline_id {
+        writeln!(
+            file,
+            "neon.timeline_id={}",
+            escape_conf_value(&s.to_string())
+        )?;
+    }
+
+    match spec.mode {
+        ComputeMode::Primary => {}
+        ComputeMode::Static(lsn) => {
+            // hot_standby is 'on' by default, but let's be explicit
+            writeln!(file, "hot_standby=on")?;
+            writeln!(file, "recovery_target_lsn='{lsn}'")?;
+        }
+        ComputeMode::Replica => {
+            // hot_standby is 'on' by default, but let's be explicit
+            writeln!(file, "hot_standby=on")?;
+        }
+    }
+
+    // If there are any extra options in the 'settings' field, append those
+    if spec.cluster.settings.is_some() {
+        writeln!(file, "# Managed by compute_ctl: begin")?;
+        write!(file, "{}", spec.cluster.settings.as_pg_settings())?;
+        writeln!(file, "# Managed by compute_ctl: end")?;
+    }

    Ok(())
 }
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -1,14 +1,13 @@
 use std::sync::Arc;
 use std::thread;

-use anyhow::Result;
 use tracing::{error, info, instrument};

 use compute_api::responses::ComputeStatus;

 use crate::compute::ComputeNode;

-#[instrument(skip(compute))]
+#[instrument(skip_all)]
 fn configurator_main_loop(compute: &Arc<ComputeNode>) {
    info!("waiting for reconfiguration requests");
    loop {
@@ -42,13 +41,14 @@ fn configurator_main_loop(compute: &Arc<ComputeNode>) {
    }
 }

-pub fn launch_configurator(compute: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_configurator(compute: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
    let compute = Arc::clone(compute);

-    Ok(thread::Builder::new()
+    thread::Builder::new()
        .name("compute-configurator".into())
        .spawn(move || {
            configurator_main_loop(&compute);
            info!("configurator thread is exited");
-        })?)
+        })
+        .expect("cannot launch configurator thread")
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -18,6 +18,7 @@ use tracing_utils::http::OtelName;

 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
    ComputeStatusResponse {
+        start_time: state.start_time,
        tenant: state
            .pspec
            .as_ref()
@@ -219,8 +220,8 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {

 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
-async fn serve(state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));
+async fn serve(port: u16, state: Arc<ComputeNode>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], port));

    let make_service = make_service_fn(move |_conn| {
        let state = state.clone();
@@ -255,10 +256,10 @@ async fn serve(state: Arc<ComputeNode>) {
 }

 /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
        .name("http-endpoint".into())
-        .spawn(move || serve(state))?)
+        .spawn(move || serve(port, state))?)
 }
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -152,11 +152,14 @@ components:
      type: object
      description: Compute startup metrics.
      required:
+        - wait_for_spec_ms
        - sync_safekeepers_ms
        - basebackup_ms
        - config_ms
        - total_startup_ms
      properties:
+        wait_for_spec_ms:
+          type: integer
        sync_safekeepers_ms:
          type: integer
        basebackup_ms:
@@ -178,18 +181,27 @@ components:
    ComputeState:
      type: object
      required:
+        - start_time
        - status
-        - last_active
      properties:
+        start_time:
+          type: string
+          description: |
+            Time when compute was started. If initially compute was started in the `empty`
+            state and then provided with valid spec, `start_time` will be reset to the
+            moment, when spec was received.
+          example: "2022-10-12T07:20:50.52Z"
        status:
          $ref: '#/components/schemas/ComputeStatus'
        last_active:
          type: string
-          description: The last detected compute activity timestamp in UTC and RFC3339 format.
+          description: |
+            The last detected compute activity timestamp in UTC and RFC3339 format.
+            It could be empty if compute was never used by user since start.
          example: "2022-10-12T07:20:50.52Z"
        error:
          type: string
-          description: Text of the error during compute startup, if any.
+          description: Text of the error during compute startup or reconfiguration, if any.
          example: ""
        tenant:
          type: string
@@ -212,9 +224,12 @@ components:
    ComputeStatus:
      type: string
      enum:
+        - empty
        - init
        - failed
        - running
+        - configuration_pending
+        - configuration
      example: running

    #
--- a/compute_tools/src/logger.rs
+++ b/compute_tools/src/logger.rs
@@ -18,6 +18,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_log_level));

    let fmt_layer = tracing_subscriber::fmt::layer()
+        .with_ansi(false)
        .with_target(false)
        .with_writer(std::io::stderr);

@@ -33,5 +34,7 @@ pub fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result<()> {
        .init();
    tracing::info!("logging and tracing started");

+    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
+
    Ok(())
 }
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -1,7 +1,6 @@
 use std::sync::Arc;
 use std::{thread, time};

-use anyhow::Result;
 use chrono::{DateTime, Utc};
 use postgres::{Client, NoTls};
 use tracing::{debug, info};
@@ -74,7 +73,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                            // Found non-idle backend, so the last activity is NOW.
                            // Save it and exit the for loop. Also clear the idle backend
                            // `state_change` timestamps array as it doesn't matter now.
-                            last_active = Utc::now();
+                            last_active = Some(Utc::now());
                            idle_backs.clear();
                            break;
                        }
@@ -82,15 +81,16 @@ fn watch_compute_activity(compute: &ComputeNode) {

                    // Get idle backend `state_change` with the max timestamp.
                    if let Some(last) = idle_backs.iter().max() {
-                        last_active = *last;
+                        last_active = Some(*last);
                    }
                }

                // Update the last activity in the shared state if we got a more recent one.
                let mut state = compute.state.lock().unwrap();
+                // NB: `Some(<DateTime>)` is always greater than `None`.
                if last_active > state.last_active {
                    state.last_active = last_active;
-                    debug!("set the last compute activity time to: {}", last_active);
+                    debug!("set the last compute activity time to: {:?}", last_active);
                }
            }
            Err(e) => {
@@ -104,10 +104,11 @@ fn watch_compute_activity(compute: &ComputeNode) {
 }

 /// Launch a separate compute monitor thread and return its `JoinHandle`.
-pub fn launch_monitor(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_monitor(state: &Arc<ComputeNode>) -> thread::JoinHandle<()> {
    let state = Arc::clone(state);

-    Ok(thread::Builder::new()
+    thread::Builder::new()
        .name("compute-monitor".into())
-        .spawn(move || watch_compute_activity(&state))?)
+        .spawn(move || watch_compute_activity(&state))
+        .expect("cannot launch compute monitor thread")
 }
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -16,15 +16,26 @@ use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};

 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

-/// Escape a string for including it in a SQL literal
-fn escape_literal(s: &str) -> String {
-    s.replace('\'', "''").replace('\\', "\\\\")
+/// Escape a string for including it in a SQL literal. Wrapping the result
+/// with `E'{}'` or `'{}'` is not required, as it returns a ready-to-use
+/// SQL string literal, e.g. `'db'''` or `E'db\\'`.
+/// See <https://github.com/postgres/postgres/blob/da98d005cdbcd45af563d0c4ac86d0e9772cd15f/src/backend/utils/adt/quote.c#L47>
+/// for the original implementation.
+pub fn escape_literal(s: &str) -> String {
+    let res = s.replace('\'', "''").replace('\\', "\\\\");
+
+    if res.contains('\\') {
+        format!("E'{}'", res)
+    } else {
+        format!("'{}'", res)
+    }
 }

-/// Escape a string so that it can be used in postgresql.conf.
-/// Same as escape_literal, currently.
-fn escape_conf_value(s: &str) -> String {
-    s.replace('\'', "''").replace('\\', "\\\\")
+/// Escape a string so that it can be used in postgresql.conf. Wrapping the result
+/// with `'{}'` is not required, as it returns a ready-to-use config string.
+pub fn escape_conf_value(s: &str) -> String {
+    let res = s.replace('\'', "''").replace('\\', "\\\\");
+    format!("'{}'", res)
 }

 trait GenericOptionExt {
@@ -37,7 +48,7 @@ impl GenericOptionExt for GenericOption {
    fn to_pg_option(&self) -> String {
        if let Some(val) = &self.value {
            match self.vartype.as_ref() {
-                "string" => format!("{} '{}'", self.name, escape_literal(val)),
+                "string" => format!("{} {}", self.name, escape_literal(val)),
                _ => format!("{} {}", self.name, val),
            }
        } else {
@@ -49,7 +60,7 @@ impl GenericOptionExt for GenericOption {
    fn to_pg_setting(&self) -> String {
        if let Some(val) = &self.value {
            match self.vartype.as_ref() {
-                "string" => format!("{} = '{}'", self.name, escape_conf_value(val)),
+                "string" => format!("{} = {}", self.name, escape_conf_value(val)),
                _ => format!("{} = {}", self.name, val),
            }
        } else {
@@ -94,6 +105,7 @@ impl PgOptionsSerialize for GenericOptions {

 pub trait GenericOptionsSearch {
    fn find(&self, name: &str) -> Option<String>;
+    fn find_ref(&self, name: &str) -> Option<&GenericOption>;
 }

 impl GenericOptionsSearch for GenericOptions {
@@ -103,6 +115,12 @@ impl GenericOptionsSearch for GenericOptions {
        let op = ops.iter().find(|s| s.name == name)?;
        op.value.clone()
    }
+
+    /// Lookup option by name, returning ref
+    fn find_ref(&self, name: &str) -> Option<&GenericOption> {
+        let ops = self.as_ref()?;
+        ops.iter().find(|s| s.name == name)
+    }
 }

 pub trait RoleExt {
@@ -114,9 +132,8 @@ impl RoleExt for Role {
    /// string of arguments.
    fn to_pg_options(&self) -> String {
        // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
-        // For now, we do not use generic `options` for roles. Once used, add
-        // `self.options.as_pg_options()` somewhere here.
-        let mut params: String = "LOGIN".to_string();
+        let mut params: String = self.options.as_pg_options();
+        params.push_str(" LOGIN");

        if let Some(pass) = &self.encrypted_password {
            // Some time ago we supported only md5 and treated all encrypted_password as md5.
@@ -209,7 +226,7 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
 /// Wait for Postgres to become ready to accept connections. It's ready to
 /// accept connections when the state-field in `pgdata/postmaster.pid` says
 /// 'ready'.
-#[instrument(skip(pg))]
+#[instrument(skip_all, fields(pgdata = %pgdata.display()))]
 pub fn wait_for_postgres(pg: &mut Child, pgdata: &Path) -> Result<()> {
    let pid_path = pgdata.join("postmaster.pid");

--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,45 +1,121 @@
+use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;

 use anyhow::{anyhow, bail, Result};
 use postgres::config::Config;
 use postgres::{Client, NoTls};
-use tracing::{info, info_span, instrument, span_enabled, warn, Level};
+use reqwest::StatusCode;
+use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};

 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

-use compute_api::responses::ControlPlaneSpecResponse;
+use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
 use compute_api::spec::{ComputeSpec, Database, PgIdent, Role};

-/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
+// Do control plane request and return response if any. In case of error it
+// returns a bool flag indicating whether it makes sense to retry the request
+// and a string with error message.
+fn do_control_plane_request(
+    uri: &str,
+    jwt: &str,
+) -> Result<ControlPlaneSpecResponse, (bool, String)> {
+    let resp = reqwest::blocking::Client::new()
+        .get(uri)
+        .header("Authorization", jwt)
+        .send()
+        .map_err(|e| {
+            (
+                true,
+                format!("could not perform spec request to control plane: {}", e),
+            )
+        })?;
+
+    match resp.status() {
+        StatusCode::OK => match resp.json::<ControlPlaneSpecResponse>() {
+            Ok(spec_resp) => Ok(spec_resp),
+            Err(e) => Err((
+                true,
+                format!("could not deserialize control plane response: {}", e),
+            )),
+        },
+        StatusCode::SERVICE_UNAVAILABLE => {
+            Err((true, "control plane is temporarily unavailable".to_string()))
+        }
+        StatusCode::BAD_GATEWAY => {
+            // We have a problem with intermittent 502 errors now
+            // https://github.com/neondatabase/cloud/issues/2353
+            // It's fine to retry GET request in this case.
+            Err((true, "control plane request failed with 502".to_string()))
+        }
+        // Another code, likely 500 or 404, means that compute is unknown to the control plane
+        // or some internal failure happened. Doesn't make much sense to retry in this case.
+        _ => Err((
+            false,
+            format!(
+                "unexpected control plane response status code: {}",
+                resp.status()
+            ),
+        )),
+    }
+}
+
+/// Request spec from the control-plane by compute_id. If `NEON_CONTROL_PLANE_TOKEN`
 /// env variable is set, it will be used for authorization.
-pub fn get_spec_from_control_plane(base_uri: &str, compute_id: &str) -> Result<ComputeSpec> {
+pub fn get_spec_from_control_plane(
+    base_uri: &str,
+    compute_id: &str,
+) -> Result<Option<ComputeSpec>> {
    let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
-    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
+    let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
        Ok(v) => v,
        Err(_) => "".to_string(),
    };
+    let mut attempt = 1;
+    let mut spec: Result<Option<ComputeSpec>> = Ok(None);
+
    info!("getting spec from control plane: {}", cp_uri);

-    // TODO: check the response. We should distinguish cases when it's
-    // - network error, then retry
-    // - no spec for compute yet, then wait
-    // - compute id is unknown or any other error, then bail out
-    let resp: ControlPlaneSpecResponse = reqwest::blocking::Client::new()
-        .get(cp_uri)
-        .header("Authorization", jwt)
-        .send()
-        .map_err(|e| anyhow!("could not send spec request to control plane: {}", e))?
-        .json()
-        .map_err(|e| anyhow!("could not get compute spec from control plane: {}", e))?;
+    // Do 3 attempts to get spec from the control plane using the following logic:
+    // - network error -> then retry
+    // - compute id is unknown or any other error -> bail out
+    // - no spec for compute yet (Empty state) -> return Ok(None)
+    // - got spec -> return Ok(Some(spec))
+    while attempt < 4 {
+        spec = match do_control_plane_request(&cp_uri, &jwt) {
+            Ok(spec_resp) => match spec_resp.status {
+                ControlPlaneComputeStatus::Empty => Ok(None),
+                ControlPlaneComputeStatus::Attached => {
+                    if let Some(spec) = spec_resp.spec {
+                        Ok(Some(spec))
+                    } else {
+                        bail!("compute is attached, but spec is empty")
+                    }
+                }
+            },
+            Err((retry, msg)) => {
+                if retry {
+                    Err(anyhow!(msg))
+                } else {
+                    bail!(msg);
+                }
+            }
+        };

-    if let Some(spec) = resp.spec {
-        Ok(spec)
-    } else {
-        bail!("could not get compute spec from control plane")
+        if let Err(e) = &spec {
+            error!("attempt {} to get spec failed with: {}", attempt, e);
+        } else {
+            return spec;
+        }
+
+        attempt += 1;
+        std::thread::sleep(std::time::Duration::from_millis(100));
    }
+
+    // All attempts failed, return error.
+    spec
 }

 /// It takes cluster specification and does the following:
@@ -70,6 +146,21 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
    Ok(())
 }

+/// Create a standby.signal file
+pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> {
+    // XXX: consider making it a part of spec.json
+    info!("adding standby.signal");
+    let signalfile = pgdata_path.join("standby.signal");
+
+    if !signalfile.exists() {
+        info!("created standby.signal");
+        File::create(signalfile)?;
+    } else {
+        info!("reused pre-existing standby.signal");
+    }
+    Ok(())
+}
+
 /// Given a cluster spec json and open transaction it handles roles creation,
 /// deletion and update.
 #[instrument(skip_all)]
@@ -178,17 +269,13 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                xact.execute(query.as_str(), &[])?;
            }
            RoleAction::Create => {
-                let mut query: String = format!("CREATE ROLE {} ", name.pg_quote());
+                let mut query: String = format!(
+                    "CREATE ROLE {} CREATEROLE CREATEDB IN ROLE neon_superuser",
+                    name.pg_quote()
+                );
                info!("role create query: '{}'", &query);
                query.push_str(&role.to_pg_options());
                xact.execute(query.as_str(), &[])?;
-
-                let grant_query = format!(
-                    "GRANT pg_read_all_data, pg_write_all_data TO {}",
-                    name.pg_quote()
-                );
-                xact.execute(grant_query.as_str(), &[])?;
-                info!("role grant query: '{}'", &grant_query);
            }
        }

@@ -310,10 +397,44 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                // We do not check either DB exists or not,
                // Postgres will take care of it for us
                "delete_db" => {
-                    let query: String = format!("DROP DATABASE IF EXISTS {}", &op.name.pg_quote());
+                    // In Postgres we can't drop a database if it is a template.
+                    // So we need to unset the template flag first, but it could
+                    // be a retry, so we could've already dropped the database.
+                    // Check that database exists first to make it idempotent.
+                    let unset_template_query: String = format!(
+                        "
+                        DO $$
+                        BEGIN
+                            IF EXISTS(
+                                SELECT 1
+                                FROM pg_catalog.pg_database
+                                WHERE datname = {}
+                            )
+                            THEN
+                            ALTER DATABASE {} is_template false;
+                            END IF;
+                        END
+                        $$;",
+                        escape_literal(&op.name),
+                        &op.name.pg_quote()
+                    );
+                    // Use FORCE to drop database even if there are active connections.
+                    // We run this from `cloud_admin`, so it should have enough privileges.
+                    // NB: there could be other db states, which prevent us from dropping
+                    // the database. For example, if db is used by any active subscription
+                    // or replication slot.
+                    // TODO: deal with it once we allow logical replication. Proper fix should
+                    // involve returning an error code to the control plane, so it could
+                    // figure out that this is a non-retryable error, return it to the user
+                    // and fail operation permanently.
+                    let drop_db_query: String = format!(
+                        "DROP DATABASE IF EXISTS {} WITH (FORCE)",
+                        &op.name.pg_quote()
+                    );

                    warn!("deleting database '{}'", &op.name);
-                    client.execute(query.as_str(), &[])?;
+                    client.execute(unset_template_query.as_str(), &[])?;
+                    client.execute(drop_db_query.as_str(), &[])?;
                }
                "rename_db" => {
                    let new_name = op.new_name.as_ref().unwrap();
@@ -385,6 +506,11 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                query.push_str(&db.to_pg_options());
                let _guard = info_span!("executing", query).entered();
                client.execute(query.as_str(), &[])?;
+                let grant_query: String = format!(
+                    "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser",
+                    name.pg_quote()
+                );
+                client.execute(grant_query.as_str(), &[])?;
            }
        };

@@ -404,35 +530,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
 /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
 /// to allow users creating trusted extensions and re-creating `public` schema, for example.
 #[instrument(skip_all)]
-pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
+pub fn handle_grants(spec: &ComputeSpec, connstr: &str) -> Result<()> {
    info!("cluster spec grants:");

-    // We now have a separate `web_access` role to connect to the database
-    // via the web interface and proxy link auth. And also we grant a
-    // read / write all data privilege to every role. So also grant
-    // create to everyone.
-    // XXX: later we should stop messing with Postgres ACL in such horrible
-    // ways.
-    let roles = spec
-        .cluster
-        .roles
-        .iter()
-        .map(|r| r.name.pg_quote())
-        .collect::<Vec<_>>();
-
-    for db in &spec.cluster.databases {
-        let dbname = &db.name;
-
-        let query: String = format!(
-            "GRANT CREATE ON DATABASE {} TO {}",
-            dbname.pg_quote(),
-            roles.join(", ")
-        );
-        info!("grant query {}", &query);
-
-        client.execute(query.as_str(), &[])?;
-    }
-
    // Do some per-database access adjustments. We'd better do this at db creation time,
    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
    // atomically.
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -16,7 +16,7 @@ mod pg_helpers_tests {
        );
        assert_eq!(
            spec.cluster.roles.first().unwrap().to_pg_options(),
-            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
+            " LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
        );
    }

@@ -89,4 +89,12 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor
        assert_eq!(none_generic_options.find("missed_value"), None);
        assert_eq!(none_generic_options.find("invalid_value"), None);
    }
+
+    #[test]
+    fn test_escape_literal() {
+        assert_eq!(escape_literal("test"), "'test'");
+        assert_eq!(escape_literal("test'"), "'test'''");
+        assert_eq!(escape_literal("test\\'"), "E'test\\\\'''");
+        assert_eq!(escape_literal("test\\'\\'"), "E'test\\\\''\\\\'''");
+    }
 }
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -30,4 +30,5 @@ postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true

+compute_api.workspace = true
 workspace_hack.workspace = true
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -10,7 +10,7 @@
 //! (non-Neon binaries don't necessarily follow our pidfile conventions).
 //! The pid stored in the file is later used to stop the service.
 //!
-//! See [`lock_file`] module for more info.
+//! See the [`lock_file`](utils::lock_file) module for more info.

 use std::ffi::OsStr;
 use std::io::Write;
@@ -180,6 +180,11 @@ pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> any
    }

    // Wait until process is gone
+    wait_until_stopped(process_name, pid)?;
+    Ok(())
+}
+
+pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
    for retries in 0..RETRIES {
        match process_has_stopped(pid) {
            Ok(true) => {
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -7,6 +7,7 @@
 //!
 use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
+use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
 use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::PageServerNode;
@@ -40,7 +41,7 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);

-const DEFAULT_PG_VERSION: &str = "14";
+const DEFAULT_PG_VERSION: &str = "15";

 fn default_conf() -> String {
    format!(
@@ -307,7 +308,8 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {

    let mut env =
        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
-    env.init(pg_version)
+    let force = init_match.get_flag("force");
+    env.init(pg_version, force)
        .context("Failed to initialize neon repository")?;

    // Initialize pageserver, create initial tenant and timeline.
@@ -474,7 +476,15 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;

            println!("Creating endpoint for imported timeline ...");
-            cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?;
+            cplane.new_endpoint(
+                name,
+                tenant_id,
+                timeline_id,
+                None,
+                None,
+                pg_version,
+                ComputeMode::Primary,
+            )?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
@@ -560,20 +570,20 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .iter()
                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
            {
-                let lsn_str = match endpoint.lsn {
-                    None => {
-                        // -> primary endpoint
+                let lsn_str = match endpoint.mode {
+                    ComputeMode::Static(lsn) => {
+                        // -> read-only endpoint
+                        // Use the node's LSN.
+                        lsn.to_string()
+                    }
+                    _ => {
+                        // -> primary endpoint or hot replica
                        // Use the LSN at the end of the timeline.
                        timeline_infos
                            .get(&endpoint.timeline_id)
                            .map(|bi| bi.last_record_lsn.to_string())
                            .unwrap_or_else(|| "?".to_string())
                    }
-                    Some(lsn) => {
-                        // -> read-only endpoint
-                        // Use the endpoint's LSN.
-                        lsn.to_string()
-                    }
                };

                let branch_name = timeline_name_mappings
@@ -583,7 +593,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(

                table.add_row([
                    endpoint_id.as_str(),
-                    &endpoint.address.to_string(),
+                    &endpoint.pg_address.to_string(),
                    &endpoint.timeline_id.to_string(),
                    branch_name,
                    lsn_str.as_str(),
@@ -612,21 +622,57 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .get_branch_timeline_id(branch_name, tenant_id)
                .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;

-            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
-
+            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
+            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
            let pg_version = sub_args
                .get_one::<u32>("pg-version")
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?;
+            let hot_standby = sub_args
+                .get_one::<bool>("hot-standby")
+                .copied()
+                .unwrap_or(false);
+
+            let mode = match (lsn, hot_standby) {
+                (Some(lsn), false) => ComputeMode::Static(lsn),
+                (None, true) => ComputeMode::Replica,
+                (None, false) => ComputeMode::Primary,
+                (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
+            };
+
+            cplane.new_endpoint(
+                &endpoint_id,
+                tenant_id,
+                timeline_id,
+                pg_port,
+                http_port,
+                pg_version,
+                mode,
+            )?;
        }
        "start" => {
-            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
+            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
            let endpoint_id = sub_args
                .get_one::<String>("endpoint_id")
                .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;

+            // If --safekeepers argument is given, use only the listed safekeeper nodes.
+            let safekeepers =
+                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
+                    let mut safekeepers: Vec<NodeId> = Vec::new();
+                    for sk_id in safekeepers_str.split(',').map(str::trim) {
+                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
+                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
+                        })?);
+                        safekeepers.push(sk_id);
+                    }
+                    safekeepers
+                } else {
+                    env.safekeepers.iter().map(|sk| sk.id).collect()
+                };
+
            let endpoint = cplane.endpoints.get(endpoint_id.as_str());

            let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
@@ -637,9 +683,23 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                None
            };

+            let hot_standby = sub_args
+                .get_one::<bool>("hot-standby")
+                .copied()
+                .unwrap_or(false);
+
            if let Some(endpoint) = endpoint {
+                match (&endpoint.mode, hot_standby) {
+                    (ComputeMode::Static(_), true) => {
+                        bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
+                    }
+                    (ComputeMode::Primary, true) => {
+                        bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
+                    }
+                    _ => {}
+                }
                println!("Starting existing endpoint {endpoint_id}...");
-                endpoint.start(&auth_token)?;
+                endpoint.start(&auth_token, safekeepers)?;
            } else {
                let branch_name = sub_args
                    .get_one::<String>("branch-name")
@@ -659,6 +719,14 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                    .get_one::<u32>("pg-version")
                    .copied()
                    .context("Failed to `pg-version` from the argument string")?;
+
+                let mode = match (lsn, hot_standby) {
+                    (Some(lsn), false) => ComputeMode::Static(lsn),
+                    (None, true) => ComputeMode::Replica,
+                    (None, false) => ComputeMode::Primary,
+                    (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
+                };
+
                // when used with custom port this results in non obvious behaviour
                // port is remembered from first start command, i e
                // start --port X
@@ -667,14 +735,15 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");

                let ep = cplane.new_endpoint(
-                    tenant_id,
                    endpoint_id,
+                    tenant_id,
                    timeline_id,
-                    lsn,
-                    port,
+                    pg_port,
+                    http_port,
                    pg_version,
+                    mode,
                )?;
-                ep.start(&auth_token)?;
+                ep.start(&auth_token, safekeepers)?;
            }
        }
        "stop" => {
@@ -902,11 +971,22 @@ fn cli() -> Command {
        .value_parser(value_parser!(u32))
        .default_value(DEFAULT_PG_VERSION);

-    let port_arg = Arg::new("port")
-        .long("port")
+    let pg_port_arg = Arg::new("pg-port")
+        .long("pg-port")
        .required(false)
        .value_parser(value_parser!(u16))
-        .value_name("port");
+        .value_name("pg-port");
+
+    let http_port_arg = Arg::new("http-port")
+        .long("http-port")
+        .required(false)
+        .value_parser(value_parser!(u16))
+        .value_name("http-port");
+
+    let safekeepers_arg = Arg::new("safekeepers")
+        .long("safekeepers")
+        .required(false)
+        .value_name("safekeepers");

    let stop_mode_arg = Arg::new("stop-mode")
        .short('m')
@@ -928,6 +1008,19 @@ fn cli() -> Command {
        .help("Specify Lsn on the timeline to start from. By default, end of the timeline would be used.")
        .required(false);

+    let hot_standby_arg = Arg::new("hot-standby")
+        .value_parser(value_parser!(bool))
+        .long("hot-standby")
+        .help("If set, the node will be a hot replica on the specified timeline")
+        .required(false);
+
+    let force_arg = Arg::new("force")
+        .value_parser(value_parser!(bool))
+        .long("force")
+        .action(ArgAction::SetTrue)
+        .help("Force initialization even if the repository is not empty")
+        .required(false);
+
    Command::new("Neon CLI")
        .arg_required_else_help(true)
        .version(GIT_VERSION)
@@ -943,6 +1036,7 @@ fn cli() -> Command {
                        .value_name("config"),
                )
                .arg(pg_version_arg.clone())
+                .arg(force_arg)
        )
        .subcommand(
            Command::new("timeline")
@@ -1045,13 +1139,15 @@ fn cli() -> Command {
                    .arg(branch_name_arg.clone())
                    .arg(tenant_id_arg.clone())
                    .arg(lsn_arg.clone())
-                    .arg(port_arg.clone())
+                    .arg(pg_port_arg.clone())
+                    .arg(http_port_arg.clone())
                    .arg(
                        Arg::new("config-only")
                            .help("Don't do basebackup, create endpoint directory with only config files")
                            .long("config-only")
                            .required(false))
                    .arg(pg_version_arg.clone())
+                    .arg(hot_standby_arg.clone())
                )
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
@@ -1060,8 +1156,11 @@ fn cli() -> Command {
                    .arg(branch_name_arg)
                    .arg(timeline_id_arg)
                    .arg(lsn_arg)
-                    .arg(port_arg)
+                    .arg(pg_port_arg)
+                    .arg(http_port_arg)
                    .arg(pg_version_arg)
+                    .arg(hot_standby_arg)
+                    .arg(safekeepers_arg)
                )
                .subcommand(
                    Command::new("stop")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,3 +1,10 @@
+//! Code to manage the storage broker
+//!
+//! In the local test environment, the data for each safekeeper is stored in
+//!
+//! ```text
+//!   .neon/safekeepers/<safekeeper id>
+//! ```
 use anyhow::Context;

 use std::path::PathBuf;
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -1,25 +1,77 @@
+//! Code to manage compute endpoints
+//!
+//! In the local test environment, the data for each endpoint is stored in
+//!
+//! ```text
+//!   .neon/endpoints/<endpoint id>
+//! ```
+//!
+//! Some basic information about the endpoint, like the tenant and timeline IDs,
+//! are stored in the `endpoint.json` file. The `endpoint.json` file is created
+//! when the endpoint is created, and doesn't change afterwards.
+//!
+//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
+//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
+//! the basebackup from the pageserver to initialize the the data directory, and
+//! finally launches the PostgreSQL process. It watches the PostgreSQL process
+//! until it exits.
+//!
+//! When an endpoint is created, a `postgresql.conf` file is also created in
+//! the endpoint's directory. The file can be modified before starting PostgreSQL.
+//! However, the `postgresql.conf` file in the endpoint directory is not used directly
+//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
+//! copy of it in the data directory.
+//!
+//! Directory contents:
+//!
+//! ```text
+//! .neon/endpoints/main/
+//!     compute.log               - log output of `compute_ctl` and `postgres`
+//!     endpoint.json             - serialized `EndpointConf` struct
+//!     postgresql.conf           - postgresql settings
+//!     spec.json                 - passed to `compute_ctl`
+//!     pgdata/
+//!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
+//!         zenith.signal
+//!         <other PostgreSQL files>
+//! ```
+//!
 use std::collections::BTreeMap;
-use std::fs::{self, File};
-use std::io::Write;
 use std::net::SocketAddr;
 use std::net::TcpStream;
-use std::os::unix::fs::PermissionsExt;
 use std::path::PathBuf;
-use std::process::{Command, Stdio};
-use std::str::FromStr;
+use std::process::Command;
 use std::sync::Arc;
 use std::time::Duration;

-use anyhow::{Context, Result};
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use anyhow::{anyhow, bail, Context, Result};
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::id::{NodeId, TenantId, TimelineId};

-use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
+use crate::local_env::LocalEnv;
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

+use compute_api::responses::{ComputeState, ComputeStatus};
+use compute_api::spec::{Cluster, ComputeMode, ComputeSpec};
+
+// contents of a endpoint.json file
+#[serde_as]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
+pub struct EndpointConf {
+    endpoint_id: String,
+    #[serde_as(as = "DisplayFromStr")]
+    tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    timeline_id: TimelineId,
+    mode: ComputeMode,
+    pg_port: u16,
+    http_port: u16,
+    pg_version: u32,
+    skip_pg_catalog_updates: bool,
+}
+
 //
 // ComputeControlPlane
 //
@@ -39,11 +91,11 @@ impl ComputeControlPlane {
        let pageserver = Arc::new(PageServerNode::from_env(&env));

        let mut endpoints = BTreeMap::default();
-        for endpoint_dir in fs::read_dir(env.endpoints_path())
+        for endpoint_dir in std::fs::read_dir(env.endpoints_path())
            .with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
        {
            let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
-            endpoints.insert(ep.name.clone(), Arc::new(ep));
+            endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
        }

        Ok(ComputeControlPlane {
@@ -58,36 +110,58 @@ impl ComputeControlPlane {
        1 + self
            .endpoints
            .values()
-            .map(|ep| ep.address.port())
+            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
            .max()
            .unwrap_or(self.base_port)
    }

+    #[allow(clippy::too_many_arguments)]
    pub fn new_endpoint(
        &mut self,
+        endpoint_id: &str,
        tenant_id: TenantId,
-        name: &str,
        timeline_id: TimelineId,
-        lsn: Option<Lsn>,
-        port: Option<u16>,
+        pg_port: Option<u16>,
+        http_port: Option<u16>,
        pg_version: u32,
+        mode: ComputeMode,
    ) -> Result<Arc<Endpoint>> {
-        let port = port.unwrap_or_else(|| self.get_port());
+        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
+        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
        let ep = Arc::new(Endpoint {
-            name: name.to_owned(),
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            endpoint_id: endpoint_id.to_owned(),
+            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
+            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
            env: self.env.clone(),
            pageserver: Arc::clone(&self.pageserver),
            timeline_id,
-            lsn,
+            mode,
            tenant_id,
            pg_version,
+            skip_pg_catalog_updates: false,
        });

-        ep.create_pgdata()?;
-        ep.setup_pg_conf()?;
+        ep.create_endpoint_dir()?;
+        std::fs::write(
+            ep.endpoint_path().join("endpoint.json"),
+            serde_json::to_string_pretty(&EndpointConf {
+                endpoint_id: endpoint_id.to_string(),
+                tenant_id,
+                timeline_id,
+                mode,
+                http_port,
+                pg_port,
+                pg_version,
+                skip_pg_catalog_updates: false,
+            })?,
+        )?;
+        std::fs::write(
+            ep.endpoint_path().join("postgresql.conf"),
+            ep.setup_pg_conf()?.to_string(),
+        )?;

-        self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));
+        self.endpoints
+            .insert(ep.endpoint_id.clone(), Arc::clone(&ep));

        Ok(ep)
    }
@@ -98,20 +172,25 @@ impl ComputeControlPlane {
 #[derive(Debug)]
 pub struct Endpoint {
    /// used as the directory name
-    name: String,
+    endpoint_id: String,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
-    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
-    pub lsn: Option<Lsn>,
+    pub mode: ComputeMode,

-    // port and address of the Postgres server
-    pub address: SocketAddr,
+    // port and address of the Postgres server and `compute_ctl`'s HTTP API
+    pub pg_address: SocketAddr,
+    pub http_address: SocketAddr,
+
+    // postgres major version in the format: 14, 15, etc.
    pg_version: u32,

    // These are not part of the endpoint as such, but the environment
    // the endpoint runs in.
    pub env: LocalEnv,
    pageserver: Arc<PageServerNode>,
+
+    // Optimizations
+    skip_pg_catalog_updates: bool,
 }

 impl Endpoint {
@@ -129,145 +208,37 @@ impl Endpoint {

        // parse data directory name
        let fname = entry.file_name();
-        let name = fname.to_str().unwrap().to_string();
+        let endpoint_id = fname.to_str().unwrap().to_string();

-        // Read config file into memory
-        let cfg_path = entry.path().join("pgdata").join("postgresql.conf");
-        let cfg_path_str = cfg_path.to_string_lossy();
-        let mut conf_file = File::open(&cfg_path)
-            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
-        let conf = PostgresConf::read(&mut conf_file)
-            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
+        // Read the endpoint.json file
+        let conf: EndpointConf =
+            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;

-        // Read a few options from the config file
-        let context = format!("in config file {}", cfg_path_str);
-        let port: u16 = conf.parse_field("port", &context)?;
-        let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
-        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
-
-        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
-        // If it doesn't exist, assume broken data directory and use default pg version.
-        let pg_version_path = entry.path().join("PG_VERSION");
-
-        let pg_version_str =
-            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
-        let pg_version = u32::from_str(&pg_version_str)?;
-
-        // parse recovery_target_lsn, if any
-        let recovery_target_lsn: Option<Lsn> =
-            conf.parse_field_optional("recovery_target_lsn", &context)?;
-
-        // ok now
        Ok(Endpoint {
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
-            name,
+            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
+            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
+            endpoint_id,
            env: env.clone(),
            pageserver: Arc::clone(pageserver),
-            timeline_id,
-            lsn: recovery_target_lsn,
-            tenant_id,
-            pg_version,
+            timeline_id: conf.timeline_id,
+            mode: conf.mode,
+            tenant_id: conf.tenant_id,
+            pg_version: conf.pg_version,
+            skip_pg_catalog_updates: conf.skip_pg_catalog_updates,
        })
    }

-    fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
-        let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
-        let mut cmd = Command::new(pg_path);
-
-        cmd.arg("--sync-safekeepers")
-            .env_clear()
-            .env(
-                "LD_LIBRARY_PATH",
-                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
-            )
-            .env(
-                "DYLD_LIBRARY_PATH",
-                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
-            )
-            .env("PGDATA", self.pgdata().to_str().unwrap())
-            .stdout(Stdio::piped())
-            // Comment this to avoid capturing stderr (useful if command hangs)
-            .stderr(Stdio::piped());
-
-        if let Some(token) = auth_token {
-            cmd.env("NEON_AUTH_TOKEN", token);
-        }
-
-        let sync_handle = cmd
-            .spawn()
-            .expect("postgres --sync-safekeepers failed to start");
-
-        let sync_output = sync_handle
-            .wait_with_output()
-            .expect("postgres --sync-safekeepers failed");
-        if !sync_output.status.success() {
-            anyhow::bail!(
-                "sync-safekeepers failed: '{}'",
-                String::from_utf8_lossy(&sync_output.stderr)
-            );
-        }
-
-        let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
-        println!("Safekeepers synced on {}", lsn);
-        Ok(lsn)
-    }
-
-    /// Get basebackup from the pageserver as a tar archive and extract it
-    /// to the `self.pgdata()` directory.
-    fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
-        println!(
-            "Extracting base backup to create postgres instance: path={} port={}",
-            self.pgdata().display(),
-            self.address.port()
-        );
-
-        let sql = if let Some(lsn) = lsn {
-            format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
-        } else {
-            format!("basebackup {} {}", self.tenant_id, self.timeline_id)
-        };
-
-        let mut client = self
-            .pageserver
-            .page_server_psql_client()
-            .context("connecting to page server failed")?;
-
-        let copyreader = client
-            .copy_out(sql.as_str())
-            .context("page server 'basebackup' command failed")?;
-
-        // Read the archive directly from the `CopyOutReader`
-        //
-        // Set `ignore_zeros` so that unpack() reads all the Copy data and
-        // doesn't stop at the end-of-archive marker. Otherwise, if the server
-        // sends an Error after finishing the tarball, we will not notice it.
-        let mut ar = tar::Archive::new(copyreader);
-        ar.set_ignore_zeros(true);
-        ar.unpack(&self.pgdata())
-            .context("extracting base backup failed")?;
-
-        Ok(())
-    }
-
-    fn create_pgdata(&self) -> Result<()> {
-        fs::create_dir_all(self.pgdata()).with_context(|| {
+    fn create_endpoint_dir(&self) -> Result<()> {
+        std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
            format!(
-                "could not create data directory {}",
-                self.pgdata().display()
+                "could not create endpoint directory {}",
+                self.endpoint_path().display()
            )
-        })?;
-        fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
-            .with_context(|| {
-                format!(
-                    "could not set permissions in data directory {}",
-                    self.pgdata().display()
-                )
-            })
+        })
    }

-    // Write postgresql.conf with default configuration
-    // and PG_VERSION file to the data directory of a new endpoint.
-    fn setup_pg_conf(&self) -> Result<()> {
+    // Generate postgresql.conf with default configuration
+    fn setup_pg_conf(&self) -> Result<PostgresConf> {
        let mut conf = PostgresConf::new();
        conf.append("max_wal_senders", "10");
        conf.append("wal_log_hints", "off");
@@ -280,105 +251,103 @@ impl Endpoint {
        // wal_sender_timeout is the maximum time to wait for WAL replication.
        // It also defines how often the walreciever will send a feedback message to the wal sender.
        conf.append("wal_sender_timeout", "5s");
-        conf.append("listen_addresses", &self.address.ip().to_string());
-        conf.append("port", &self.address.port().to_string());
+        conf.append("listen_addresses", &self.pg_address.ip().to_string());
+        conf.append("port", &self.pg_address.port().to_string());
        conf.append("wal_keep_size", "0");
        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
        conf.append("restart_after_crash", "off");

-        // Configure the Neon Postgres extension to fetch pages from pageserver
-        let pageserver_connstr = {
-            let config = &self.pageserver.pg_connection_config;
-            let (host, port) = (config.host(), config.port());
-
-            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
-            format!("postgresql://no_user@{host}:{port}")
-        };
+        // Load the 'neon' extension
        conf.append("shared_preload_libraries", "neon");
-        conf.append_line("");
-        conf.append("neon.pageserver_connstring", &pageserver_connstr);
-        conf.append("neon.tenant_id", &self.tenant_id.to_string());
-        conf.append("neon.timeline_id", &self.timeline_id.to_string());
-        if let Some(lsn) = self.lsn {
-            conf.append("recovery_target_lsn", &lsn.to_string());
-        }

        conf.append_line("");
-        // Configure backpressure
-        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
-        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
-        //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
-        //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
-        //   updates pages are not requested from pageserver.
-        // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
-        //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
-        //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
-        //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
-        // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
-        //   To be able to restore database in case of pageserver node crash, safekeeper should not
-        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
-        //   (if they are not able to upload WAL to S3).
-        conf.append("max_replication_write_lag", "15MB");
-        conf.append("max_replication_flush_lag", "10GB");
+        // Replication-related configurations, such as WAL sending
+        match &self.mode {
+            ComputeMode::Primary => {
+                // Configure backpressure
+                // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
+                //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
+                //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
+                //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
+                //   updates pages are not requested from pageserver.
+                // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
+                //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
+                //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
+                //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
+                // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
+                //   To be able to restore database in case of pageserver node crash, safekeeper should not
+                //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
+                //   (if they are not able to upload WAL to S3).
+                conf.append("max_replication_write_lag", "15MB");
+                conf.append("max_replication_flush_lag", "10GB");

-        if !self.env.safekeepers.is_empty() {
-            // Configure Postgres to connect to the safekeepers
-            conf.append("synchronous_standby_names", "walproposer");
+                if !self.env.safekeepers.is_empty() {
+                    // Configure Postgres to connect to the safekeepers
+                    conf.append("synchronous_standby_names", "walproposer");

-            let safekeepers = self
-                .env
-                .safekeepers
-                .iter()
-                .map(|sk| format!("localhost:{}", sk.pg_port))
-                .collect::<Vec<String>>()
-                .join(",");
-            conf.append("neon.safekeepers", &safekeepers);
-        } else {
-            // We only use setup without safekeepers for tests,
-            // and don't care about data durability on pageserver,
-            // so set more relaxed synchronous_commit.
-            conf.append("synchronous_commit", "remote_write");
+                    let safekeepers = self
+                        .env
+                        .safekeepers
+                        .iter()
+                        .map(|sk| format!("localhost:{}", sk.get_compute_port()))
+                        .collect::<Vec<String>>()
+                        .join(",");
+                    conf.append("neon.safekeepers", &safekeepers);
+                } else {
+                    // We only use setup without safekeepers for tests,
+                    // and don't care about data durability on pageserver,
+                    // so set more relaxed synchronous_commit.
+                    conf.append("synchronous_commit", "remote_write");

-            // Configure the node to stream WAL directly to the pageserver
-            // This isn't really a supported configuration, but can be useful for
-            // testing.
-            conf.append("synchronous_standby_names", "pageserver");
-        }
-
-        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
-        file.write_all(conf.to_string().as_bytes())?;
-
-        let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
-        file.write_all(self.pg_version.to_string().as_bytes())?;
-
-        Ok(())
-    }
-
-    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
-        let backup_lsn = if let Some(lsn) = self.lsn {
-            Some(lsn)
-        } else if !self.env.safekeepers.is_empty() {
-            // LSN 0 means that it is bootstrap and we need to download just
-            // latest data from the pageserver. That is a bit clumsy but whole bootstrap
-            // procedure evolves quite actively right now, so let's think about it again
-            // when things would be more stable (TODO).
-            let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
-            if lsn == Lsn(0) {
-                None
-            } else {
-                Some(lsn)
+                    // Configure the node to stream WAL directly to the pageserver
+                    // This isn't really a supported configuration, but can be useful for
+                    // testing.
+                    conf.append("synchronous_standby_names", "pageserver");
+                }
            }
-        } else {
-            None
-        };
+            ComputeMode::Static(lsn) => {
+                conf.append("recovery_target_lsn", &lsn.to_string());
+            }
+            ComputeMode::Replica => {
+                assert!(!self.env.safekeepers.is_empty());

-        self.do_basebackup(backup_lsn)?;
+                // TODO: use future host field from safekeeper spec
+                // Pass the list of safekeepers to the replica so that it can connect to any of them,
+                // whichever is availiable.
+                let sk_ports = self
+                    .env
+                    .safekeepers
+                    .iter()
+                    .map(|x| x.get_compute_port().to_string())
+                    .collect::<Vec<_>>()
+                    .join(",");
+                let sk_hosts = vec!["localhost"; self.env.safekeepers.len()].join(",");

-        Ok(())
+                let connstr = format!(
+                    "host={} port={} options='-c timeline_id={} tenant_id={}' application_name=replica replication=true",
+                    sk_hosts,
+                    sk_ports,
+                    &self.timeline_id.to_string(),
+                    &self.tenant_id.to_string(),
+                );
+
+                let slot_name = format!("repl_{}_", self.timeline_id);
+                conf.append("primary_conninfo", connstr.as_str());
+                conf.append("primary_slot_name", slot_name.as_str());
+                conf.append("hot_standby", "on");
+                // prefetching of blocks referenced in WAL doesn't make sense for us
+                // Neon hot standby ignores pages that are not in the shared_buffers
+                if self.pg_version >= 15 {
+                    conf.append("recovery_prefetch", "off");
+                }
+            }
+        }
+
+        Ok(conf)
    }

    pub fn endpoint_path(&self) -> PathBuf {
-        self.env.endpoints_path().join(&self.name)
+        self.env.endpoints_path().join(&self.endpoint_id)
    }

    pub fn pgdata(&self) -> PathBuf {
@@ -388,7 +357,7 @@ impl Endpoint {
    pub fn status(&self) -> &str {
        let timeout = Duration::from_millis(300);
        let has_pidfile = self.pgdata().join("postmaster.pid").exists();
-        let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
+        let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();

        match (has_pidfile, can_connect) {
            (true, true) => "running",
@@ -406,8 +375,6 @@ impl Endpoint {
                &[
                    "-D",
                    self.pgdata().to_str().unwrap(),
-                    "-l",
-                    self.pgdata().join("pg.log").to_str().unwrap(),
                    "-w", //wait till pg_ctl actually does what was asked
                ],
                args,
@@ -440,39 +407,203 @@ impl Endpoint {
                String::from_utf8_lossy(&pg_ctl.stderr),
            );
        }
+
+        // Also wait for the compute_ctl process to die. It might have some cleanup
+        // work to do after postgres stops, like syncing safekeepers, etc.
+        //
+        // TODO use background_process::stop_process instead
+        let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
+        let pid: u32 = std::fs::read_to_string(pidfile_path)?.parse()?;
+        let pid = nix::unistd::Pid::from_raw(pid as i32);
+        crate::background_process::wait_until_stopped("compute_ctl", pid)?;
+
        Ok(())
    }

-    pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
+    pub fn start(&self, auth_token: &Option<String>, safekeepers: Vec<NodeId>) -> Result<()> {
        if self.status() == "running" {
            anyhow::bail!("The endpoint is already running");
        }

-        // 1. We always start Postgres from scratch, so
-        // if old dir exists, preserve 'postgresql.conf' and drop the directory
-        let postgresql_conf_path = self.pgdata().join("postgresql.conf");
-        let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
-            format!(
-                "failed to read config file in {}",
-                postgresql_conf_path.to_str().unwrap()
-            )
-        })?;
-        fs::remove_dir_all(self.pgdata())?;
-        self.create_pgdata()?;
+        // Slurp the endpoints/<endpoint id>/postgresql.conf file into
+        // memory. We will include it in the spec file that we pass to
+        // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
+        // in the data directory.
+        let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
+        let postgresql_conf = match std::fs::read(&postgresql_conf_path) {
+            Ok(content) => String::from_utf8(content)?,
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => "".to_string(),
+            Err(e) => {
+                return Err(anyhow::Error::new(e).context(format!(
+                    "failed to read config file in {}",
+                    postgresql_conf_path.to_str().unwrap()
+                )))
+            }
+        };

-        // 2. Bring back config files
-        fs::write(&postgresql_conf_path, postgresql_conf)?;
-
-        // 3. Load basebackup
-        self.load_basebackup(auth_token)?;
-
-        if self.lsn.is_some() {
-            File::create(self.pgdata().join("standby.signal"))?;
+        // We always start the compute node from scratch, so if the Postgres
+        // data dir exists from a previous launch, remove it first.
+        if self.pgdata().exists() {
+            std::fs::remove_dir_all(self.pgdata())?;
        }

-        // 4. Finally start postgres
-        println!("Starting postgres at '{}'", self.connstr());
-        self.pg_ctl(&["start"], auth_token)
+        let pageserver_connstring = {
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());
+
+            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
+            format!("postgresql://no_user@{host}:{port}")
+        };
+        let mut safekeeper_connstrings = Vec::new();
+        if self.mode == ComputeMode::Primary {
+            for sk_id in safekeepers {
+                let sk = self
+                    .env
+                    .safekeepers
+                    .iter()
+                    .find(|node| node.id == sk_id)
+                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
+                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
+            }
+        }
+
+        // Create spec file
+        let spec = ComputeSpec {
+            skip_pg_catalog_updates: self.skip_pg_catalog_updates,
+            format_version: 1.0,
+            operation_uuid: None,
+            cluster: Cluster {
+                cluster_id: None, // project ID: not used
+                name: None,       // project name: not used
+                state: None,
+                roles: vec![],
+                databases: vec![],
+                settings: None,
+                postgresql_conf: Some(postgresql_conf),
+            },
+            delta_operations: None,
+            tenant_id: Some(self.tenant_id),
+            timeline_id: Some(self.timeline_id),
+            mode: self.mode,
+            pageserver_connstring: Some(pageserver_connstring),
+            safekeeper_connstrings,
+            storage_auth_token: auth_token.clone(),
+        };
+        let spec_path = self.endpoint_path().join("spec.json");
+        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
+
+        // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
+        let logfile = std::fs::OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open(self.endpoint_path().join("compute.log"))?;
+
+        // Launch compute_ctl
+        println!("Starting postgres node at '{}'", self.connstr());
+        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
+        cmd.args(["--http-port", &self.http_address.port().to_string()])
+            .args(["--pgdata", self.pgdata().to_str().unwrap()])
+            .args(["--connstr", &self.connstr()])
+            .args([
+                "--spec-path",
+                self.endpoint_path().join("spec.json").to_str().unwrap(),
+            ])
+            .args([
+                "--pgbin",
+                self.env
+                    .pg_bin_dir(self.pg_version)?
+                    .join("postgres")
+                    .to_str()
+                    .unwrap(),
+            ])
+            .stdin(std::process::Stdio::null())
+            .stderr(logfile.try_clone()?)
+            .stdout(logfile);
+        let child = cmd.spawn()?;
+
+        // Write down the pid so we can wait for it when we want to stop
+        // TODO use background_process::start_process instead
+        let pid = child.id();
+        let pidfile_path = self.endpoint_path().join("compute_ctl.pid");
+        std::fs::write(pidfile_path, pid.to_string())?;
+
+        // Wait for it to start
+        let mut attempt = 0;
+        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
+        const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
+        loop {
+            attempt += 1;
+            match self.get_status() {
+                Ok(state) => {
+                    match state.status {
+                        ComputeStatus::Init => {
+                            if attempt == MAX_ATTEMPTS {
+                                bail!("compute startup timed out; still in Init state");
+                            }
+                            // keep retrying
+                        }
+                        ComputeStatus::Running => {
+                            // All good!
+                            break;
+                        }
+                        ComputeStatus::Failed => {
+                            bail!(
+                                "compute startup failed: {}",
+                                state
+                                    .error
+                                    .as_deref()
+                                    .unwrap_or("<no error from compute_ctl>")
+                            );
+                        }
+                        ComputeStatus::Empty
+                        | ComputeStatus::ConfigurationPending
+                        | ComputeStatus::Configuration => {
+                            bail!("unexpected compute status: {:?}", state.status)
+                        }
+                    }
+                }
+                Err(e) => {
+                    if attempt == MAX_ATTEMPTS {
+                        return Err(e).context(
+                            "timed out waiting to connect to compute_ctl HTTP; last error: {e}",
+                        );
+                    }
+                }
+            }
+            std::thread::sleep(ATTEMPT_INTERVAL);
+        }
+
+        Ok(())
+    }
+
+    // Call the /status HTTP API
+    pub fn get_status(&self) -> Result<ComputeState> {
+        let client = reqwest::blocking::Client::new();
+
+        let response = client
+            .request(
+                reqwest::Method::GET,
+                format!(
+                    "http://{}:{}/status",
+                    self.http_address.ip(),
+                    self.http_address.port()
+                ),
+            )
+            .send()?;
+
+        // Interpret the response
+        let status = response.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            Ok(response.json()?)
+        } else {
+            // reqwest does not export its error construction utility functions, so let's craft the message ourselves
+            let url = response.url().to_owned();
+            let msg = match response.text() {
+                Ok(err_body) => format!("Error: {}", err_body),
+                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
+            };
+            Err(anyhow::anyhow!(msg))
+        }
    }

    pub fn stop(&self, destroy: bool) -> Result<()> {
@@ -489,7 +620,7 @@ impl Endpoint {
                "Destroying postgres data directory '{}'",
                self.pgdata().to_str().unwrap()
            );
-            fs::remove_dir_all(self.endpoint_path())?;
+            std::fs::remove_dir_all(self.endpoint_path())?;
        } else {
            self.pg_ctl(&["stop"], &None)?;
        }
@@ -498,10 +629,10 @@ impl Endpoint {

    pub fn connstr(&self) -> String {
        format!(
-            "host={} port={} user={} dbname={}",
-            self.address.ip(),
-            self.address.port(),
+            "postgresql://{}@{}:{}/{}",
            "cloud_admin",
+            self.pg_address.ip(),
+            self.pg_address.port(),
            "postgres"
        )
    }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -24,7 +24,7 @@ use utils::{

 use crate::safekeeper::SafekeeperNode;

-pub const DEFAULT_PG_VERSION: u32 = 14;
+pub const DEFAULT_PG_VERSION: u32 = 15;

 //
 // This data structures represents neon_local CLI config
@@ -37,7 +37,7 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
-    // compute nodes).
+    // compute endpoints).
    //
    // This is not stored in the config file. Rather, this is the path where the
    // config file itself is. It is read from the NEON_REPO_DIR env variable or
@@ -137,6 +137,7 @@ impl Default for PageServerConf {
 pub struct SafekeeperConf {
    pub id: NodeId,
    pub pg_port: u16,
+    pub pg_tenant_only_port: Option<u16>,
    pub http_port: u16,
    pub sync: bool,
    pub remote_storage: Option<String>,
@@ -149,6 +150,7 @@ impl Default for SafekeeperConf {
        Self {
            id: NodeId(0),
            pg_port: 0,
+            pg_tenant_only_port: None,
            http_port: 0,
            sync: true,
            remote_storage: None,
@@ -158,6 +160,14 @@ impl Default for SafekeeperConf {
    }
 }

+impl SafekeeperConf {
+    /// Compute is served by port on which only tenant scoped tokens allowed, if
+    /// it is configured.
+    pub fn get_compute_port(&self) -> u16 {
+        self.pg_tenant_only_port.unwrap_or(self.pg_port)
+    }
+}
+
 impl LocalEnv {
    pub fn pg_distrib_dir_raw(&self) -> PathBuf {
        self.pg_distrib_dir.clone()
@@ -364,7 +374,7 @@ impl LocalEnv {
    //
    // Initialize a new Neon repository
    //
-    pub fn init(&mut self, pg_version: u32) -> anyhow::Result<()> {
+    pub fn init(&mut self, pg_version: u32, force: bool) -> anyhow::Result<()> {
        // check if config already exists
        let base_path = &self.base_data_dir;
        ensure!(
@@ -372,11 +382,29 @@ impl LocalEnv {
            "repository base path is missing"
        );

-        ensure!(
-            !base_path.exists(),
-            "directory '{}' already exists. Perhaps already initialized?",
-            base_path.display()
-        );
+        if base_path.exists() {
+            if force {
+                println!("removing all contents of '{}'", base_path.display());
+                // instead of directly calling `remove_dir_all`, we keep the original dir but removing
+                // all contents inside. This helps if the developer symbol links another directory (i.e.,
+                // S3 local SSD) to the `.neon` base directory.
+                for entry in std::fs::read_dir(base_path)? {
+                    let entry = entry?;
+                    let path = entry.path();
+                    if path.is_dir() {
+                        fs::remove_dir_all(&path)?;
+                    } else {
+                        fs::remove_file(&path)?;
+                    }
+                }
+            } else {
+                bail!(
+                    "directory '{}' already exists. Perhaps already initialized? (Hint: use --force to remove all contents)",
+                    base_path.display()
+                );
+            }
+        }
+
        if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
            bail!(
                "Can't find postgres binary at {}",
@@ -392,7 +420,9 @@ impl LocalEnv {
            }
        }

-        fs::create_dir(base_path)?;
+        if !base_path.exists() {
+            fs::create_dir(base_path)?;
+        }

        // Generate keypair for JWT.
        //
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,3 +1,9 @@
+//! Code to manage pageservers
+//!
+//! In the local test environment, the pageserver stores its data directly in
+//!
+//!   .neon/
+//!
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fs::File;
@@ -8,9 +14,7 @@ use std::process::{Child, Command};
 use std::{io, result};

 use anyhow::{bail, Context};
-use pageserver_api::models::{
-    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
-};
+use pageserver_api::models::{self, TenantInfo, TimelineInfo};
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
 use reqwest::blocking::{Client, RequestBuilder, Response};
@@ -316,8 +320,8 @@ impl PageServerNode {
        settings: HashMap<&str, &str>,
    ) -> anyhow::Result<TenantId> {
        let mut settings = settings.clone();
-        let request = TenantCreateRequest {
-            new_tenant_id,
+
+        let config = models::TenantConfig {
            checkpoint_distance: settings
                .remove("checkpoint_distance")
                .map(|x| x.parse::<u64>())
@@ -359,8 +363,8 @@ impl PageServerNode {
                .transpose()
                .context("Failed to parse 'trace_read_requests' as bool")?,
            eviction_policy: settings
-                .get("eviction_policy")
-                .map(|x| serde_json::from_str(x))
+                .remove("eviction_policy")
+                .map(serde_json::from_str)
                .transpose()
                .context("Failed to parse 'eviction_policy' json")?,
            min_resident_size_override: settings
@@ -371,6 +375,19 @@ impl PageServerNode {
            evictions_low_residence_duration_metric_threshold: settings
                .remove("evictions_low_residence_duration_metric_threshold")
                .map(|x| x.to_string()),
+            gc_feedback: settings
+                .remove("gc_feedback")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'gc_feedback' as bool")?,
+        };
+
+        // If tenant ID was not specified, generate one
+        let new_tenant_id = new_tenant_id.unwrap_or(TenantId::generate());
+
+        let request = models::TenantCreateRequest {
+            new_tenant_id,
+            config,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -391,67 +408,86 @@ impl PageServerNode {
            })
    }

-    pub fn tenant_config(&self, tenant_id: TenantId, settings: HashMap<&str, &str>) -> Result<()> {
-        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
-            .json(&TenantConfigRequest {
-                tenant_id,
+    pub fn tenant_config(
+        &self,
+        tenant_id: TenantId,
+        mut settings: HashMap<&str, &str>,
+    ) -> anyhow::Result<()> {
+        let config = {
+            // Braces to make the diff easier to read
+            models::TenantConfig {
                checkpoint_distance: settings
-                    .get("checkpoint_distance")
+                    .remove("checkpoint_distance")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'checkpoint_distance' as an integer")?,
-                checkpoint_timeout: settings.get("checkpoint_timeout").map(|x| x.to_string()),
+                checkpoint_timeout: settings.remove("checkpoint_timeout").map(|x| x.to_string()),
                compaction_target_size: settings
-                    .get("compaction_target_size")
+                    .remove("compaction_target_size")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'compaction_target_size' as an integer")?,
-                compaction_period: settings.get("compaction_period").map(|x| x.to_string()),
+                compaction_period: settings.remove("compaction_period").map(|x| x.to_string()),
                compaction_threshold: settings
-                    .get("compaction_threshold")
+                    .remove("compaction_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'compaction_threshold' as an integer")?,
                gc_horizon: settings
-                    .get("gc_horizon")
+                    .remove("gc_horizon")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'gc_horizon' as an integer")?,
-                gc_period: settings.get("gc_period").map(|x| x.to_string()),
+                gc_period: settings.remove("gc_period").map(|x| x.to_string()),
                image_creation_threshold: settings
-                    .get("image_creation_threshold")
+                    .remove("image_creation_threshold")
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
-                pitr_interval: settings.get("pitr_interval").map(|x| x.to_string()),
+                pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
                walreceiver_connect_timeout: settings
-                    .get("walreceiver_connect_timeout")
+                    .remove("walreceiver_connect_timeout")
+                    .map(|x| x.to_string()),
+                lagging_wal_timeout: settings
+                    .remove("lagging_wal_timeout")
                    .map(|x| x.to_string()),
-                lagging_wal_timeout: settings.get("lagging_wal_timeout").map(|x| x.to_string()),
                max_lsn_wal_lag: settings
-                    .get("max_lsn_wal_lag")
+                    .remove("max_lsn_wal_lag")
                    .map(|x| x.parse::<NonZeroU64>())
                    .transpose()
                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
                trace_read_requests: settings
-                    .get("trace_read_requests")
+                    .remove("trace_read_requests")
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'trace_read_requests' as bool")?,
                eviction_policy: settings
-                    .get("eviction_policy")
-                    .map(|x| serde_json::from_str(x))
+                    .remove("eviction_policy")
+                    .map(serde_json::from_str)
                    .transpose()
                    .context("Failed to parse 'eviction_policy' json")?,
                min_resident_size_override: settings
-                    .get("min_resident_size_override")
+                    .remove("min_resident_size_override")
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'min_resident_size_override' as an integer")?,
                evictions_low_residence_duration_metric_threshold: settings
-                    .get("evictions_low_residence_duration_metric_threshold")
+                    .remove("evictions_low_residence_duration_metric_threshold")
                    .map(|x| x.to_string()),
-            })
+                gc_feedback: settings
+                    .remove("gc_feedback")
+                    .map(|x| x.parse::<bool>())
+                    .transpose()
+                    .context("Failed to parse 'gc_feedback' as bool")?,
+            }
+        };
+
+        if !settings.is_empty() {
+            bail!("Unrecognized tenant settings: {settings:?}")
+        }
+
+        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
+            .json(&models::TenantConfigRequest { tenant_id, config })
            .send()?
            .error_from_body()?;

@@ -479,11 +515,14 @@ impl PageServerNode {
        ancestor_timeline_id: Option<TimelineId>,
        pg_version: Option<u32>,
    ) -> anyhow::Result<TimelineInfo> {
+        // If timeline ID was not specified, generate one
+        let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
+
        self.http_request(
            Method::POST,
            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
        )?
-        .json(&TimelineCreateRequest {
+        .json(&models::TimelineCreateRequest {
            new_timeline_id,
            ancestor_start_lsn,
            ancestor_timeline_id,
--- a/control_plane/src/postgresql_conf.rs
+++ b/control_plane/src/postgresql_conf.rs
@@ -13,7 +13,7 @@ use std::io::BufRead;
 use std::str::FromStr;

 /// In-memory representation of a postgresql.conf file
-#[derive(Default)]
+#[derive(Default, Debug)]
 pub struct PostgresConf {
    lines: Vec<String>,
    hash: HashMap<String, String>,
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,3 +1,10 @@
+//! Code to manage safekeepers
+//!
+//! In the local test environment, the data for each safekeeper is stored in
+//!
+//! ```text
+//!   .neon/safekeepers/<safekeeper id>
+//! ```
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::Child;
@@ -113,45 +120,55 @@ impl SafekeeperNode {
        let availability_zone = format!("sk-{}", id_string);

        let mut args = vec![
-            "-D",
-            datadir.to_str().with_context(|| {
-                format!("Datadir path {datadir:?} cannot be represented as a unicode string")
-            })?,
-            "--id",
-            &id_string,
-            "--listen-pg",
-            &listen_pg,
-            "--listen-http",
-            &listen_http,
-            "--availability-zone",
-            &availability_zone,
+            "-D".to_owned(),
+            datadir
+                .to_str()
+                .with_context(|| {
+                    format!("Datadir path {datadir:?} cannot be represented as a unicode string")
+                })?
+                .to_owned(),
+            "--id".to_owned(),
+            id_string,
+            "--listen-pg".to_owned(),
+            listen_pg,
+            "--listen-http".to_owned(),
+            listen_http,
+            "--availability-zone".to_owned(),
+            availability_zone,
        ];
+        if let Some(pg_tenant_only_port) = self.conf.pg_tenant_only_port {
+            let listen_pg_tenant_only = format!("127.0.0.1:{}", pg_tenant_only_port);
+            args.extend(["--listen-pg-tenant-only".to_owned(), listen_pg_tenant_only]);
+        }
        if !self.conf.sync {
-            args.push("--no-sync");
+            args.push("--no-sync".to_owned());
        }

        let broker_endpoint = format!("{}", self.env.broker.client_url());
-        args.extend(["--broker-endpoint", &broker_endpoint]);
+        args.extend(["--broker-endpoint".to_owned(), broker_endpoint]);

        let mut backup_threads = String::new();
        if let Some(threads) = self.conf.backup_threads {
            backup_threads = threads.to_string();
-            args.extend(["--backup-threads", &backup_threads]);
+            args.extend(["--backup-threads".to_owned(), backup_threads]);
        } else {
            drop(backup_threads);
        }

        if let Some(ref remote_storage) = self.conf.remote_storage {
-            args.extend(["--remote-storage", remote_storage]);
+            args.extend(["--remote-storage".to_owned(), remote_storage.clone()]);
        }

        let key_path = self.env.base_data_dir.join("auth_public_key.pem");
        if self.conf.auth_enabled {
            args.extend([
-                "--auth-validation-public-key-path",
-                key_path.to_str().with_context(|| {
-                    format!("Key path {key_path:?} cannot be represented as a unicode string")
-                })?,
+                "--auth-validation-public-key-path".to_owned(),
+                key_path
+                    .to_str()
+                    .with_context(|| {
+                        format!("Key path {key_path:?} cannot be represented as a unicode string")
+                    })?
+                    .to_owned(),
            ]);
        }

--- a/docker-compose/compute_wrapper/shell/compute.sh
+++ b/docker-compose/compute_wrapper/shell/compute.sh
@@ -1,6 +1,14 @@
 #!/bin/bash
 set -eux

+# Generate a random tenant or timeline ID
+#
+# Takes a variable name as argument. The result is stored in that variable.
+generate_id() {
+    local -n resvar=$1
+    printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM
+}
+
 PG_VERSION=${PG_VERSION:-14}

 SPEC_FILE_ORG=/var/db/postgres/specs/spec.json
@@ -13,29 +21,29 @@ done
 echo "Page server is ready."

 echo "Create a tenant and timeline"
+generate_id tenant_id
 PARAMS=(
     -sb 
     -X POST
     -H "Content-Type: application/json"
-     -d "{}"
+     -d "{\"new_tenant_id\": \"${tenant_id}\"}"
     http://pageserver:9898/v1/tenant/
 )
-tenant_id=$(curl "${PARAMS[@]}" | sed 's/"//g')
+result=$(curl "${PARAMS[@]}")
+echo $result | jq .

+generate_id timeline_id
 PARAMS=(
     -sb 
     -X POST
     -H "Content-Type: application/json"
-     -d "{\"tenant_id\":\"${tenant_id}\", \"pg_version\": ${PG_VERSION}}"
+     -d "{\"new_timeline_id\": \"${timeline_id}\", \"pg_version\": ${PG_VERSION}}"
     "http://pageserver:9898/v1/tenant/${tenant_id}/timeline/"
 )
 result=$(curl "${PARAMS[@]}")
 echo $result | jq .

 echo "Overwrite tenant id and timeline id in spec file"
-tenant_id=$(echo ${result} | jq -r .tenant_id)
-timeline_id=$(echo ${result} | jq -r .timeline_id)
-
 sed "s/TENANT_ID/${tenant_id}/" ${SPEC_FILE_ORG} > ${SPEC_FILE}
 sed -i "s/TIMELINE_ID/${timeline_id}/" ${SPEC_FILE}

--- a/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
+++ b/docker-compose/compute_wrapper/var/db/postgres/specs/spec.json
@@ -28,11 +28,6 @@
                "value": "replica",
                "vartype": "enum"
            },
-            {
-                "name": "hot_standby",
-                "value": "on",
-                "vartype": "bool"
-            },
            {
                "name": "wal_log_hints",
                "value": "on",
--- a/docker-compose/docker-compose.yml
+++ b/docker-compose/docker-compose.yml
@@ -189,7 +189,7 @@ services:
      - "/bin/bash"
      - "-c"
    command:
-      - "until pg_isready -h compute -p 55433 ; do
+      - "until pg_isready -h compute -p 55433 -U cloud_admin ; do
            echo 'Waiting to start compute...' && sleep 1;
         done"
    depends_on:
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -48,6 +48,7 @@ Creating docker-compose_storage_broker_1       ... done
 2. connect compute node
 ```
 $ echo "localhost:55433:postgres:cloud_admin:cloud_admin" >> ~/.pgpass
+$ chmod 600 ~/.pgpass
 $ psql -h localhost -p 55433 -U cloud_admin
 postgres=# CREATE TABLE t(key int primary key, value text);
 CREATE TABLE
--- a/docs/pageserver-thread-mgmt.md
+++ b/docs/pageserver-thread-mgmt.md
@@ -4,6 +4,11 @@ The pageserver uses Tokio for handling concurrency. Everything runs in
 Tokio tasks, although some parts are written in blocking style and use
 spawn_blocking().

+We currently use std blocking functions for disk I/O, however.  The
+current model is that we consider disk I/Os to be short enough that we
+perform them while running in a Tokio task. Changing all the disk I/O
+calls to async is a TODO.
+
 Each Tokio task is tracked by the `task_mgr` module. It maintains a
 registry of tasks, and which tenant or timeline they are operating
 on.
@@ -21,19 +26,84 @@ also a `shudown_watcher()` Future that can be used with `tokio::select!`
 or similar, to wake up on shutdown.


-### Sync vs async
+### Async cancellation safety

-We use async to wait for incoming data on network connections, and to
-perform other long-running operations. For example, each WAL receiver
-connection is handled by a tokio Task. Once a piece of WAL has been
-received from the network, the task calls the blocking functions in
-the Repository to process the WAL.
+In async Rust, futures can be "cancelled" at any await point, by
+dropping the Future. For example, `tokio::select!` returns as soon as
+one of the Futures returns, and drops the others. `tokio::time::timeout`
+is another example. In the Rust ecosystem, some functions are
+cancellation-safe, meaning they can be safely dropped without
+side-effects, while others are not. See documentation of
+`tokio::select!` for examples.

-The core storage code in `layered_repository/` is synchronous, with
-blocking locks and I/O calls. The current model is that we consider
-disk I/Os to be short enough that we perform them while running in a
-Tokio task. If that becomes a problem, we should use `spawn_blocking`
-before entering the synchronous parts of the code, or switch to using
-tokio I/O functions.
+In the pageserver and safekeeper, async code is *not*
+cancellation-safe by default. Unless otherwise marked, any async
+function that you call cannot be assumed to be async
+cancellation-safe, and must be polled to completion.

-Be very careful when mixing sync and async code!
+The downside of non-cancellation safe code is that you have to be very
+careful when using `tokio::select!`, `tokio::time::timeout`, and other
+such functions that can cause a Future to be dropped. They can only be
+used with functions that are explicitly documented to be cancellation-safe,
+or you need to spawn a separate task to shield from the cancellation.
+
+At the entry points to the code, we also take care to poll futures to
+completion, or shield the rest of the code from surprise cancellations
+by spawning a separate task. The code that handles incoming HTTP
+requests, for example, spawns a separate task for each request,
+because Hyper will drop the request-handling Future if the HTTP
+connection is lost.
+
+
+#### How to cancel, then?
+
+If our code is not cancellation-safe, how do you cancel long-running
+tasks? Use CancellationTokens.
+
+TODO: More details on that. And we have an ongoing discussion on what
+to do if cancellations might come from multiple sources.
+
+#### Exceptions
+Some library functions are cancellation-safe, and are explicitly marked
+as such. For example, `utils::seqwait`.
+
+#### Rationale
+
+The alternative would be to make all async code cancellation-safe,
+unless otherwise marked. That way, you could use `tokio::select!` more
+liberally. The reasons we didn't choose that are explained in this
+section.
+
+Writing code in a cancellation-safe manner is tedious, as you need to
+scrutinize every `.await` and ensure that if the `.await` call never
+returns, the system is in a safe, consistent state. In some ways, you
+need to do that with `?` and early `returns`, too, but `.await`s are
+easier to miss. It is also easier to perform cleanup tasks when a
+function returns an `Err` than when an `.await` simply never
+returns. You can use `scopeguard` and Drop guards to perform cleanup
+tasks, but it is more tedious. An `.await` that never returns is more
+similar to a panic.
+
+Note that even if you only use building blocks that themselves are
+cancellation-safe, it doesn't mean that the code as whole is
+cancellation-safe. For example, consider the following code:
+
+```
+while let Some(i) = work_inbox.recv().await {
+	if let Err(_) = results_outbox.send(i).await {
+		println!("receiver dropped");
+		return;
+		}
+	}
+}
+```
+
+It reads messages from one channel, sends them to another channel. If
+this code is cancelled at the `results_outbox.send(i).await`, the
+message read from the receiver is lost. That may or may not be OK,
+depending on the context.
+
+Another reason to not require cancellation-safety is historical: we
+already had a lot of async code that was not scrutinized for
+cancellation-safety when this issue was raised. Scrutinizing all
+existing code is no fun.
--- a/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
+++ b/docs/rfcs/023-the-state-of-pageserver-tenant-relocation.md
@@ -0,0 +1,232 @@
+# The state of pageserver tenant relocation
+
+Created on 17.03.23
+
+## Motivation
+
+There were previous write ups on the subject. The design of tenant relocation was planned at the time when we had quite different landscape. I e there was no on-demand download/eviction. They were on the horizon but we still planned for cases when they were not available. Some other things have changed. Now safekeepers offload wal to s3 so we're not risking overflowing their disks. Having all of the above, it makes sense to recap and take a look at the options we have now, which adjustments we'd like to make to original process, etc.
+
+Related (in chronological order):
+
+- Tracking issue with initial discussion: [#886](https://github.com/neondatabase/neon/issues/886)
+- [015. Storage Messaging](015-storage-messaging.md)
+- [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md)
+
+## Summary
+
+The RFC consists of a walkthrough of prior art on tenant relocation and corresponding problems. It describes 3 approaches.
+
+1. Simplistic approach that uses ignore and is the fastest to implement. The main downside is a requirement of short downtime.
+2. More complicated approach that avoids even short downtime.
+3. Even more complicated approach that will allow multiple pageservers to operate concurrently on the same tenant possibly allowing for HA cluster topologies and horizontal scaling of reads (i e compute talks to multiple pageservers).
+
+The order in which solutions are described is a bit different. We start from 2, then move to possible compromises (aka simplistic approach) and then move to discussing directions for solving HA/Pageserver replica case with 3.
+
+## Components
+
+pageserver, control-plane, safekeepers (a bit)
+
+## Requirements
+
+Relocation procedure should move tenant from one pageserver to another without downtime introduced by storage side. For now restarting compute for applying new configuration is fine.
+
+- component restarts
+- component outage
+- pageserver loss
+
+## The original proposed implementation
+
+The starting point is this sequence:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS2: Attach tenant X
+    PS2->>S3: Fetch timelines, indexes for them
+    PS2->>CP: Accepted
+    CP->>CP: Change pageserver id in project
+    CP->>PS1: Detach
+```
+
+Which problems do we have with naive approach?
+
+### Concurrent GC and Compaction
+
+The problem is that they can run on both, PS1 and PS2. Consider this example from [Pageserver S3 Coordination RFC](020-pageserver-s3-coordination.md)
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant PS1
+    participant S3
+    participant PS2
+
+    PS1->>S3: Uploads L1, L2 <br/> Index contains L1 L2
+    PS2->>S3: Attach called, sees L1, L2
+    PS1->>S3: Compaction comes <br/> Removes L1, adds L3
+    note over S3: Index now L2, L3
+    PS2->>S3: Uploads new layer L4 <br/> (added to previous view of the index)
+    note over S3: Index now L1, L2, L4
+```
+
+At this point it is not possible to restore the state from index, it contains L2 which
+is no longer available in s3 and doesnt contain L3 added by compaction by the
+first pageserver. So if any of the pageservers restart, initial sync will fail
+(or in on-demand world it will fail a bit later during page request from
+missing layer)
+
+The problem lies in shared index_part.json. Having intersecting layers from append only edits is expected to work, though this is an uncharted territory without tests.
+
+#### Options
+
+There are several options on how to restrict concurrent access to index file.
+
+First and the simplest one is external orchestration. Control plane which runs migration can use special api call on pageserver to stop background processes (gc, compaction), and even possibly all uploads.
+
+So the sequence becomes:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X.
+    PS2->>S3: Fetch timelines, index, start background operations
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>PS1: Detach
+```
+
+The downside of this sequence is the potential rollback process. What if something goes wrong on new pageserver? Can we safely roll back to source pageserver?
+
+There are two questions:
+
+#### How can we detect that something went wrong?
+
+We can run usual availability check (consists of compute startup and an update of one row).
+Note that we cant run separate compute for that before touching compute that client runs actual workload on, because we cant have two simultaneous computes running in read-write mode on the same timeline (enforced by safekeepers consensus algorithm). So we can either run some readonly check first (basebackup) and then change pageserver id and run availability check. If it failed we can roll it back to the old one.
+
+#### What can go wrong? And how we can safely roll-back?
+
+In the sequence above during attach we start background processes/uploads. They change state in remote storage so it is possible that after rollback remote state will be different from one that was observed by source pageserver. So if target pageserver goes wild then source pageserver may fail to start with changed remote state.
+
+Proposed option would be to implement a barrier (read-only) mode when pageserver does not update remote state.
+
+So the sequence for happy path becomes this one:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X in remote readonly mode.
+    PS2->>S3: Fetch timelines, index
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Run successful availability check
+    CP->>PS2: Start uploads, background tasks
+    CP->>PS1: Detach
+```
+
+With this sequence we restrict any changes to remote storage to one pageserver. So there is no concurrent access at all, not only for index_part.json, but for everything else too. This approach makes it possible to roll back after failure on new pageserver.
+
+The sequence with roll back process:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant S3
+
+    CP->>PS1: Pause background jobs, pause uploading new layers.
+    CP->>PS2: Attach tenant X in remote readonly mode.
+    PS2->>S3: Fetch timelines, index
+    PS2->>CP: Accepted
+    CP->>CP: Monitor PS2 last record lsn, ensure OK lag
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Availability check Failed
+    CP->>CP: Change pageserver id back
+    CP->>PS1: Resume remote operations
+    CP->>PS2: Ignore (instead of detach for investigation purposes)
+```
+
+## Concurrent branch creation
+
+Another problem is a possibility of concurrent branch creation calls.
+
+I e during migration create_branch can be called on old pageserver and newly created branch wont be seen on new pageserver. Prior art includes prototyping an approach of trying to mirror such branches, but currently it lost its importance, because now attach is fast because we dont need to download all data, and additionally to the best of my knowledge of control plane internals (cc @ololobus to confirm) operations on one project are executed sequentially, so it is not possible to have such case. So branch create operation will be executed only when relocation is completed. As a safety measure we can forbid branch creation for tenants that are in readonly remote state.
+
+## Simplistic approach
+
+The difference of simplistic approach from one described above is that it calls ignore on source tenant first and then calls attach on target pageserver. Approach above does it in opposite order thus opening a possibility for race conditions we strive to avoid.
+
+The approach largely follows this guide: <https://github.com/neondatabase/cloud/wiki/Cloud:-Ad-hoc-tenant-relocation>
+
+The happy path sequence:
+
+```mermaid
+sequenceDiagram
+    autonumber
+    participant CP as Control Plane
+    participant PS1 as Pageserver 1
+    participant PS2 as Pageserver 2
+    participant SK as Safekeeper
+    participant S3
+
+    CP->>CP: Enable maintenance mode
+    CP->>PS1: Ignore
+    CP->>PS2: Attach
+    PS2->>CP: Accepted
+    loop Delete layers for each timeline
+        CP->>PS2: Get last record lsn
+        CP->>SK: Get commit lsn
+        CP->>CP: OK? Timed out?
+    end
+    CP->>CP: Change pageserver id in project
+    CP->>CP: Run successful availability check
+    CP->>CP: Disable maintenance mode
+    CP->>PS1: Detach ignored
+```
+
+The sequence contains exactly the same rollback problems as in previous approach described above. They can be resolved the same way.
+
+Most probably we'd like to move forward without this safety measure and implement it on top of this approach to make progress towards the downtime-less one.
+
+## Lease based approach
+
+In order to allow for concurrent operation on the same data on remote storage for multiple pageservers we need to go further than external orchestration.
+
+NOTE: [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) discusses one more approach that relies on duplication of index_part.json for each pageserver operating on the timeline. This approach still requires external coordination which makes certain things easier but requires additional bookkeeping to account for multiple index_part.json files. Discussion/comparison with proposed lease based approach
+
+The problems are outlined in [020. Pageserver S3 Coordination](020-pageserver-s3-coordination.md) and suggested solution includes [Coordination based approach](020-pageserver-s3-coordination.md#coordination-based-approach). This way it will allow to do basic leader election for pageservers so they can decide which node will be responsible for running GC and compaction. The process is based on extensive communication via storage broker and consists of a lease that is taken by one of the pageservers that extends it to continue serving a leader role.
+
+There are two options for ingesting new data into pageserver in follower role. One option is to avoid WAL ingestion at all and rely on notifications from leader to discover new layers on s3. Main downside of this approach is that follower will always lag behind the primary node because it wont have the last layer until it is uploaded to remote storage. In case of a primary failure follower will be required to reingest last segment (up to 256Mb of WAL currently) which slows down recovery. Additionally if compute is connected to follower pageserver it will observe latest data with a delay. Queries from compute will likely experience bigger delays when recent lsn is required.
+
+The second option is to consume WAL stream on both pageservers. In this case the only problem is non deterministic layer generation. Additional bookkeeping will be required to deduplicate layers from primary with local ones. Some process needs to somehow merge them to remove duplicated data. Additionally we need to have good testing coverage to ensure that our implementation of `get_page@lsn` properly handles intersecting layers.
+
+There is another tradeoff. Approaches may be different in amount of traffic between system components. With first approach there can be increased traffic between follower and remote storage. But only in case follower has some activity that actually requests pages (!). With other approach traffic increase will be permanent and will be caused by two WAL streams instead of one.
+
+## Summary
+
+Proposed implementation strategy:
+
+Go with the simplest approach for now. Then work on tech debt, increase test coverage. Then gradually move forward to second approach by implementing safety measures first, finishing with switch of order between ignore and attach operation.
+
+And only then go to lease based approach to solve HA/Pageserver replica use cases.
--- a/docs/rfcs/024-user-mgmt.md
+++ b/docs/rfcs/024-user-mgmt.md
@@ -0,0 +1,84 @@
+# Postgres user and database management
+
+(This supersedes the previous proposal that looked too complicated and desynchronization-prone)
+
+We've accumulated a bunch of problems with our approach to role and database management, namely:
+
+1. we don't allow role and database creation from Postgres, and users are complaining about that
+2. fine-grained role management is not possible both from Postgres and console
+
+Right now, we do store users and databases both in console and Postgres, and there are two main reasons for
+that:
+
+* we want to be able to authenticate users in proxy against the console without Postgres' involvement. Otherwise,
+malicious brute force attempts will wake up Postgres (expensive) and may exhaust the Postgres connections limit (deny of service).
+* it is handy when we can render console UI without waking up compute (e.g., show database list)
+
+This RFC doesn't talk about giving root access to the database, which is blocked by a secure runtime setup.
+
+## Overview
+
+* Add Postgres extension that sends an HTTP request each time transaction that modifies users/databases is about to commit.
+* Add user management API to internal console API. Also, the console should put a JWT token into the compute so that it can access management API.
+
+## Postgres behavior
+
+The default user role (@username) should have `CREATE ROLE`, `CREATE DB`, and `BYPASSRLS` privileges. We expose the Postgres port
+to the open internet, so we need to check password strength. Now console generates strong passwords, so there is no risk of having dumb passwords. With user-provided passwords, such risks exist.
+
+Since we store passwords in the console we should also send unencrypted password when role is created/changed. Hence communication with the console must be encrypted. Postgres also supports creating roles using hashes, in that case, we will not be able to get a raw password. So I can see the following options here:
+  * roles created via SQL will *not* have raw passwords in the console
+  * roles created via SQL will have raw passwords in the console, except ones that were created using hashes
+
+I'm leaning towards the second option here as it is a bit more consistent one -- if raw password storage is enabled then we store passwords in all cases where we can store them.
+
+To send data about roles and databases from Postgres to the console we can create the following Postgres extension:
+
+  * Intercept role/database changes in `ProcessUtility_hook`. Here we have access to the query statement with the raw password. The hook handler itself should not dial the console immediately and rather stash info in some hashmap for later use.
+  * When the transaction is about to commit we execute collected role modifications (all as one -- console should either accept all or reject all, and hence API shouldn't be REST-like). If the console request fails we can roll back the transaction. This way if the transaction is committed we know for sure that console has this information. We can use `XACT_EVENT_PRE_COMMIT` and `XACT_EVENT_PARALLEL_PRE_COMMIT` for that.
+  * Extension should be mindful of the fact that it is possible to create and delete roles within the transaction.
+  * We also need to track who is database owner, some coding around may be needed to get the current user when the database is created.
+
+## Console user management API
+
+The current public API has REST API for role management. We need to have some analog for the internal API (called mgmt API in the console code). But unlike public API here we want to have an atomic way to create several roles/databases (in cases when several roles were created in the same transaction). So something like that may work:
+
+```
+curl -X PATCH /api/v1/roles_and_databases -d '
+[
+    {"op":"create", "type":"role", "name": "kurt", "password":"lYgT3BlbkFJ2vBZrqv"},
+    {"op":"drop", "type":"role", "name": "trout"},
+    {"op":"alter", "type":"role", "name": "kilgore", "password":"3BlbkFJ2vB"},
+    {"op":"create", "type":"database", "name": "db2", "owner": "eliot"},
+]
+'
+```
+
+Makes sense not to error out on duplicated create/delete operations (see failure modes)
+
+## Managing users from the console
+
+Now console puts a spec file with the list of databases/roles and delta operations in all the compute pods. `compute_ctl` then picks up that file and stubbornly executes deltas and checks data in the spec file is the same as in the Postgres. This way if the user creates a role in the UI we restart compute with a new spec file and during the start databases/roles are created. So if Postgres send an HTTP call each time role is created we need to break recursion in that case. We can do that based on application_name or some GUC or user (local == no HTTP hook).
+
+Generally, we have several options when we are creating users via console:
+
+1. restart compute with a new spec file, execute local SQL command; cut recursion in the extension
+2. "push" spec files into running compute, execute local SQL command; cut recursion in the extension
+3. "push" spec files into running compute, execute local SQL command; let extension create those roles in the console
+4. avoid managing roles via spec files, send SQL commands to compute; let extension create those roles in the console
+
+The last option is the most straightforward one, but with the raw password storage opt-out, we will not have the password to establish an SQL connection. Also, we need a spec for provisioning purposes and to address potential desync (but that is quite unlikely). So I think the easiest approach would be:
+
+1. keep role management like it is now and cut the recursion in the extension when SQL is executed by compute_ctl
+2. add "push" endpoint to the compute_ctl to avoid compute restart during the `apply_config` operation -- that can be done as a follow up to avoid increasing scope too much
+
+## Failure modes
+
+* during role creation via SQL role was created in the console but the connection was dropped before Postgres got acknowledgment or some error happened after acknowledgment (out of disk space, deadlock, etc):
+
+  in that case, Postgres won't have a role that exists in the console. Compute restart will heal it (due to the spec file). Also if the console allows repeated creation/deletion user can repeat the transaction.
+
+
+# Scalability
+
+On my laptop, I can create 4200 roles per second. That corresponds to 363 million roles per day. Since each role creation ends up in the console database we can add some limit to the number of roles (could be reasonably big to not run into it often -- like 1k or 10k).
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -0,0 +1,22 @@
+# Useful development tools
+
+This readme contains some hints on how to set up some optional development tools.
+
+## ccls
+
+[ccls](https://github.com/MaskRay/ccls) is a c/c++ language server. It requires some setup
+to work well. There are different ways to do it but here's what works for me:
+1. Make a common parent directory for all your common neon projects. (for example, `~/src/neondatabase/`)
+2. Go to `vendor/postgres-v15`
+3. Run `make clean && ./configure`
+4. Install [bear](https://github.com/rizsotto/Bear), and run `bear -- make -j4`
+5. Copy the generated `compile_commands.json` to `~/src/neondatabase` (or equivalent)
+6. Run `touch ~/src/neondatabase/.ccls-root` this will make the `compile_commands.json` file discoverable in all subdirectories
+
+With this setup you will get decent lsp mileage inside the postgres repo, and also any postgres extensions that you put in `~/src/neondatabase/`, like `pg_embedding`, or inside `~/src/neondatabase/neon/pgxn` as well.
+
+Some additional tips for various IDEs:
+
+### Emacs
+
+To improve performance: `(setq lsp-lens-enable nil)`
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -11,4 +11,5 @@ serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true

+utils = { path = "../utils" }
 workspace_hack.workspace = true
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -5,34 +5,35 @@ use serde::{Deserialize, Serialize, Serializer};

 use crate::spec::ComputeSpec;

-#[derive(Serialize, Debug)]
+#[derive(Serialize, Debug, Deserialize)]
 pub struct GenericAPIError {
    pub error: String,
 }

 /// Response of the /status API
-#[derive(Serialize, Debug)]
+#[derive(Serialize, Debug, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeStatusResponse {
+    pub start_time: DateTime<Utc>,
    pub tenant: Option<String>,
    pub timeline: Option<String>,
    pub status: ComputeStatus,
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
 }

-#[derive(Serialize)]
+#[derive(Deserialize, Serialize)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeState {
    pub status: ComputeStatus,
    /// Timestamp of the last Postgres activity
    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
+    pub last_active: Option<DateTime<Utc>>,
    pub error: Option<String>,
 }

-#[derive(Serialize, Clone, Copy, Debug, PartialEq, Eq)]
+#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
 pub enum ComputeStatus {
    // Spec wasn't provided at start, waiting for it to be
@@ -53,18 +54,25 @@ pub enum ComputeStatus {
    Failed,
 }

-fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
+fn rfc3339_serialize<S>(x: &Option<DateTime<Utc>>, s: S) -> Result<S::Ok, S::Error>
 where
    S: Serializer,
 {
-    x.to_rfc3339().serialize(s)
+    if let Some(x) = x {
+        x.to_rfc3339().serialize(s)
+    } else {
+        s.serialize_none()
+    }
 }

 /// Response of the /metrics.json API
 #[derive(Clone, Debug, Default, Serialize)]
 pub struct ComputeMetrics {
+    pub wait_for_spec_ms: u64,
    pub sync_safekeepers_ms: u64,
    pub basebackup_ms: u64,
+    pub basebackup_bytes: u64,
+    pub start_postgres_ms: u64,
    pub config_ms: u64,
    pub total_startup_ms: u64,
 }
@@ -75,4 +83,16 @@ pub struct ComputeMetrics {
 #[derive(Deserialize, Debug)]
 pub struct ControlPlaneSpecResponse {
    pub spec: Option<ComputeSpec>,
+    pub status: ControlPlaneComputeStatus,
+}
+
+#[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ControlPlaneComputeStatus {
+    // Compute is known to control-plane, but it's not
+    // yet attached to any timeline / endpoint.
+    Empty,
+    // Compute is attached to some timeline / endpoint and
+    // should be able to start with provided spec.
+    Attached,
 }
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -3,8 +3,10 @@
 //! The spec.json file is used to pass information to 'compute_ctl'. It contains
 //! all the information needed to start up the right version of PostgreSQL,
 //! and connect it to the storage nodes.
-use serde::Deserialize;
-use std::collections::HashMap;
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;

 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
@@ -12,7 +14,8 @@ pub type PgIdent = String;

 /// Cluster spec or configuration represented as an optional number of
 /// delta operations + final cluster state description.
-#[derive(Clone, Debug, Default, Deserialize)]
+#[serde_as]
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct ComputeSpec {
    pub format_version: f32,

@@ -24,18 +27,69 @@ pub struct ComputeSpec {
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,

-    pub storage_auth_token: Option<String>,
+    /// An optinal hint that can be passed to speed up startup time if we know
+    /// that no pg catalog mutations (like role creation, database creation,
+    /// extension creation) need to be done on the actual database to start.
+    #[serde(default)] // Default false
+    pub skip_pg_catalog_updates: bool,

-    pub startup_tracing_context: Option<HashMap<String, String>>,
+    // Information needed to connect to the storage layer.
+    //
+    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
+    //
+    // Depending on `mode`, this can be a primary read-write node, a read-only
+    // replica, or a read-only node pinned at an older LSN.
+    // `safekeeper_connstrings` must be set for a primary.
+    //
+    // For backwards compatibility, the control plane may leave out all of
+    // these, and instead set the "neon.tenant_id", "neon.timeline_id",
+    // etc. GUCs in cluster.settings. TODO: Once the control plane has been
+    // updated to fill these fields, we can make these non optional.
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub tenant_id: Option<TenantId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub timeline_id: Option<TimelineId>,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    pub pageserver_connstring: Option<String>,
+    #[serde(default)]
+    pub safekeeper_connstrings: Vec<String>,
+
+    #[serde(default)]
+    pub mode: ComputeMode,
+
+    /// If set, 'storage_auth_token' is used as the password to authenticate to
+    /// the pageserver and safekeepers.
+    pub storage_auth_token: Option<String>,
 }

-#[derive(Clone, Debug, Default, Deserialize)]
+#[serde_as]
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Deserialize, Serialize)]
+pub enum ComputeMode {
+    /// A read-write node
+    #[default]
+    Primary,
+    /// A read-only node, pinned at a particular LSN
+    Static(#[serde_as(as = "DisplayFromStr")] Lsn),
+    /// A read-only node that follows the tip of the branch in hot standby mode
+    ///
+    /// Future versions may want to distinguish between replicas with hot standby
+    /// feedback and other kinds of replication configurations.
+    Replica,
+}
+
+#[derive(Clone, Debug, Default, Deserialize, Serialize)]
 pub struct Cluster {
-    pub cluster_id: String,
-    pub name: String,
+    pub cluster_id: Option<String>,
+    pub name: Option<String>,
    pub state: Option<String>,
    pub roles: Vec<Role>,
    pub databases: Vec<Database>,
+
+    /// Desired contents of 'postgresql.conf' file. (The 'compute_ctl'
+    /// tool may add additional settings to the final file.)
+    pub postgresql_conf: Option<String>,
+
+    /// Additional settings that will be appended to the 'postgresql.conf' file.
    pub settings: GenericOptions,
 }

@@ -45,7 +99,7 @@ pub struct Cluster {
 /// - DROP ROLE
 /// - ALTER ROLE name RENAME TO new_name
 /// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct DeltaOp {
    pub action: String,
    pub name: PgIdent,
@@ -54,7 +108,7 @@ pub struct DeltaOp {

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
@@ -63,7 +117,7 @@ pub struct Role {

 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct Database {
    pub name: PgIdent,
    pub owner: PgIdent,
@@ -73,7 +127,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize)]
+#[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct GenericOption {
    pub name: String,
    pub value: Option<String>,
@@ -94,4 +148,14 @@ mod tests {
        let file = File::open("tests/cluster_spec.json").unwrap();
        let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
    }
+
+    #[test]
+    fn parse_unknown_fields() {
+        // Forward compatibility test
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let mut json: serde_json::Value = serde_json::from_reader(file).unwrap();
+        let ob = json.as_object_mut().unwrap();
+        ob.insert("unknown_field_123123123".into(), "hello".into());
+        let _spec: ComputeSpec = serde_json::from_value(json).unwrap();
+    }
 }
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -6,6 +6,7 @@ use once_cell::sync::Lazy;
 use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
 pub use prometheus::opts;
 pub use prometheus::register;
+pub use prometheus::Error;
 pub use prometheus::{core, default_registry, proto};
 pub use prometheus::{exponential_buckets, linear_buckets};
 pub use prometheus::{register_counter_vec, Counter, CounterVec};
@@ -23,6 +24,7 @@ use prometheus::{Registry, Result};
 pub mod launch_timestamp;
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};
+pub mod metric_vec_duration;

 pub type UIntGauge = GenericGauge<AtomicU64>;
 pub type UIntGaugeVec = GenericGaugeVec<AtomicU64>;
--- a/libs/metrics/src/metric_vec_duration.rs
+++ b/libs/metrics/src/metric_vec_duration.rs
@@ -0,0 +1,23 @@
+//! Helpers for observing duration on `HistogramVec` / `CounterVec` / `GaugeVec` / `MetricVec<T>`.
+
+use std::{future::Future, time::Instant};
+
+pub trait DurationResultObserver {
+    fn observe_result<T, E>(&self, res: &Result<T, E>, duration: std::time::Duration);
+}
+
+pub async fn observe_async_block_duration_by_result<
+    T,
+    E,
+    F: Future<Output = Result<T, E>>,
+    O: DurationResultObserver,
+>(
+    observer: &O,
+    block: F,
+) -> Result<T, E> {
+    let start = Instant::now();
+    let result = block.await;
+    let duration = start.elapsed();
+    observer.observe_result(&result, duration);
+    result
+}
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use strum_macros;
 use utils::{
+    completion,
    history_buffer::HistoryBufferWithDropCounter,
    id::{NodeId, TenantId, TimelineId},
    lsn::Lsn,
@@ -18,7 +19,29 @@ use crate::reltag::RelTag;
 use anyhow::bail;
 use bytes::{BufMut, Bytes, BytesMut};

-/// A state of a tenant in pageserver's memory.
+/// The state of a tenant in this pageserver.
+///
+/// ```mermaid
+/// stateDiagram-v2
+///
+///     [*] --> Loading: spawn_load()
+///     [*] --> Attaching: spawn_attach()
+///
+///     Loading --> Activating: activate()
+///     Attaching --> Activating: activate()
+///     Activating --> Active: infallible
+///
+///     Loading --> Broken: load() failure
+///     Attaching --> Broken: attach() failure
+///
+///     Active --> Stopping: set_stopping(), part of shutdown & detach
+///     Stopping --> Broken: late error in remove_tenant_from_memory
+///
+///     Broken --> [*]: ignore / detach / shutdown
+///     Stopping --> [*]: remove_from_memory complete
+///
+///     Active --> Broken: cfg(testing)-only tenant break point
+/// ```
 #[derive(
    Clone,
    PartialEq,
@@ -26,35 +49,82 @@ use bytes::{BufMut, Bytes, BytesMut};
    serde::Serialize,
    serde::Deserialize,
    strum_macros::Display,
-    strum_macros::EnumString,
    strum_macros::EnumVariantNames,
    strum_macros::AsRefStr,
    strum_macros::IntoStaticStr,
 )]
 #[serde(tag = "slug", content = "data")]
 pub enum TenantState {
-    /// This tenant is being loaded from local disk
+    /// This tenant is being loaded from local disk.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
    Loading,
-    /// This tenant is being downloaded from cloud storage.
+    /// This tenant is being attached to the pageserver.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
    Attaching,
-    /// Tenant is fully operational
+    /// The tenant is transitioning from Loading/Attaching to Active.
+    ///
+    /// While in this state, the individual timelines are being activated.
+    ///
+    /// `set_stopping()` and `set_broken()` do not work in this state and wait for it to pass.
+    Activating(ActivatingFrom),
+    /// The tenant has finished activating and is open for business.
+    ///
+    /// Transitions out of this state are possible through `set_stopping()` and `set_broken()`.
    Active,
-    /// A tenant is recognized by pageserver, but it is being detached or the
+    /// The tenant is recognized by pageserver, but it is being detached or the
    /// system is being shut down.
-    Stopping,
-    /// A tenant is recognized by the pageserver, but can no longer be used for
-    /// any operations, because it failed to be activated.
+    ///
+    /// Transitions out of this state are possible through `set_broken()`.
+    Stopping {
+        // Because of https://github.com/serde-rs/serde/issues/2105 this has to be a named field,
+        // otherwise it will not be skipped during deserialization
+        #[serde(skip)]
+        progress: completion::Barrier,
+    },
+    /// The tenant is recognized by the pageserver, but can no longer be used for
+    /// any operations.
+    ///
+    /// If the tenant fails to load or attach, it will transition to this state
+    /// and it is guaranteed that no background tasks are running in its name.
+    ///
+    /// The other way to transition into this state is from `Stopping` state
+    /// through `set_broken()` called from `remove_tenant_from_memory()`. That happens
+    /// if the cleanup future executed by `remove_tenant_from_memory()` fails.
    Broken { reason: String, backtrace: String },
 }

 impl TenantState {
-    pub fn has_in_progress_downloads(&self) -> bool {
+    pub fn attachment_status(&self) -> TenantAttachmentStatus {
+        use TenantAttachmentStatus::*;
+
+        // Below TenantState::Activating is used as "transient" or "transparent" state for
+        // attachment_status determining.
        match self {
-            Self::Loading => true,
-            Self::Attaching => true,
-            Self::Active => false,
-            Self::Stopping => false,
-            Self::Broken { .. } => false,
+            // The attach procedure writes the marker file before adding the Attaching tenant to the tenants map.
+            // So, technically, we can return Attached here.
+            // However, as soon as Console observes Attached, it will proceed with the Postgres-level health check.
+            // But, our attach task might still be fetching the remote timelines, etc.
+            // So, return `Maybe` while Attaching, making Console wait for the attach task to finish.
+            Self::Attaching | Self::Activating(ActivatingFrom::Attaching) => Maybe,
+            // tenant mgr startup distinguishes attaching from loading via marker file.
+            // If it's loading, there is no attach marker file, i.e., attach had finished in the past.
+            Self::Loading | Self::Activating(ActivatingFrom::Loading) => Attached,
+            // We only reach Active after successful load / attach.
+            // So, call atttachment status Attached.
+            Self::Active => Attached,
+            // If the (initial or resumed) attach procedure fails, the tenant becomes Broken.
+            // However, it also becomes Broken if the regular load fails.
+            // From Console's perspective there's no practical difference
+            // because attachment_status is polled by console only during attach operation execution.
+            Self::Broken { reason, .. } => Failed {
+                reason: reason.to_owned(),
+            },
+            // Why is Stopping a Maybe case? Because, during pageserver shutdown,
+            // we set the Stopping state irrespective of whether the tenant
+            // has finished attaching or not.
+            Self::Stopping { .. } => Maybe,
        }
    }

@@ -78,8 +148,17 @@ impl std::fmt::Debug for TenantState {
    }
 }

+/// The only [`TenantState`] variants we could be `TenantState::Activating` from.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+pub enum ActivatingFrom {
+    /// Arrived to [`TenantState::Activating`] from [`TenantState::Loading`]
+    Loading,
+    /// Arrived to [`TenantState::Activating`] from [`TenantState::Attaching`]
+    Attaching,
+}
+
 /// A state of a timeline in pageserver's memory.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
 pub enum TimelineState {
    /// The timeline is recognized by the pageserver but is not yet operational.
    /// In particular, the walreceiver connection loop is not running for this timeline.
@@ -92,15 +171,14 @@ pub enum TimelineState {
    /// It cannot transition back into any other state.
    Stopping,
    /// The timeline is broken and not operational (previous states: Loading or Active).
-    Broken,
+    Broken { reason: String, backtrace: String },
 }

 #[serde_as]
 #[derive(Serialize, Deserialize)]
 pub struct TimelineCreateRequest {
-    #[serde(default)]
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub new_timeline_id: Option<TimelineId>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub new_timeline_id: TimelineId,
    #[serde(default)]
    #[serde_as(as = "Option<DisplayFromStr>")]
    pub ancestor_timeline_id: Option<TimelineId>,
@@ -111,11 +189,25 @@ pub struct TimelineCreateRequest {
 }

 #[serde_as]
-#[derive(Serialize, Deserialize, Default)]
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(deny_unknown_fields)]
 pub struct TenantCreateRequest {
-    #[serde(default)]
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub new_tenant_id: Option<TenantId>,
+    #[serde_as(as = "DisplayFromStr")]
+    pub new_tenant_id: TenantId,
+    #[serde(flatten)]
+    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
+}
+
+impl std::ops::Deref for TenantCreateRequest {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.config
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug, Default)]
+pub struct TenantConfig {
    pub checkpoint_distance: Option<u64>,
    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
@@ -136,6 +228,7 @@ pub struct TenantCreateRequest {
    pub eviction_policy: Option<serde_json::Value>,
    pub min_resident_size_override: Option<u64>,
    pub evictions_low_residence_duration_metric_threshold: Option<String>,
+    pub gc_feedback: Option<bool>,
 }

 #[serde_as]
@@ -149,46 +242,35 @@ pub struct StatusResponse {
 }

 impl TenantCreateRequest {
-    pub fn new(new_tenant_id: Option<TenantId>) -> TenantCreateRequest {
+    pub fn new(new_tenant_id: TenantId) -> TenantCreateRequest {
        TenantCreateRequest {
            new_tenant_id,
-            ..Default::default()
+            config: TenantConfig::default(),
        }
    }
 }

 #[serde_as]
-#[derive(Serialize, Deserialize)]
+#[derive(Serialize, Deserialize, Debug)]
+#[serde(deny_unknown_fields)]
 pub struct TenantConfigRequest {
    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
-    #[serde(default)]
-    pub checkpoint_distance: Option<u64>,
-    pub checkpoint_timeout: Option<String>,
-    pub compaction_target_size: Option<u64>,
-    pub compaction_period: Option<String>,
-    pub compaction_threshold: Option<usize>,
-    pub gc_horizon: Option<u64>,
-    pub gc_period: Option<String>,
-    pub image_creation_threshold: Option<usize>,
-    pub pitr_interval: Option<String>,
-    pub walreceiver_connect_timeout: Option<String>,
-    pub lagging_wal_timeout: Option<String>,
-    pub max_lsn_wal_lag: Option<NonZeroU64>,
-    pub trace_read_requests: Option<bool>,
-    // We defer the parsing of the eviction_policy field to the request handler.
-    // Otherwise we'd have to move the types for eviction policy into this package.
-    // We might do that once the eviction feature has stabilizied.
-    // For now, this field is not even documented in the openapi_spec.yml.
-    pub eviction_policy: Option<serde_json::Value>,
-    pub min_resident_size_override: Option<u64>,
-    pub evictions_low_residence_duration_metric_threshold: Option<String>,
+    #[serde(flatten)]
+    pub config: TenantConfig, // as we have a flattened field, we should reject all unknown fields in it
+}
+
+impl std::ops::Deref for TenantConfigRequest {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.config
+    }
 }

 impl TenantConfigRequest {
    pub fn new(tenant_id: TenantId) -> TenantConfigRequest {
-        TenantConfigRequest {
-            tenant_id,
+        let config = TenantConfig {
            checkpoint_distance: None,
            checkpoint_timeout: None,
            compaction_target_size: None,
@@ -205,20 +287,54 @@ impl TenantConfigRequest {
            eviction_policy: None,
            min_resident_size_override: None,
            evictions_low_residence_duration_metric_threshold: None,
-        }
+            gc_feedback: None,
+        };
+        TenantConfigRequest { tenant_id, config }
    }
 }

+#[derive(Debug, Serialize, Deserialize)]
+pub struct TenantAttachRequest {
+    pub config: TenantAttachConfig,
+}
+
+/// Newtype to enforce deny_unknown_fields on TenantConfig for
+/// its usage inside `TenantAttachRequest`.
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct TenantAttachConfig {
+    #[serde(flatten)]
+    allowing_unknown_fields: TenantConfig,
+}
+
+impl std::ops::Deref for TenantAttachConfig {
+    type Target = TenantConfig;
+
+    fn deref(&self) -> &Self::Target {
+        &self.allowing_unknown_fields
+    }
+}
+
+/// See [`TenantState::attachment_status`] and the OpenAPI docs for context.
+#[derive(Serialize, Deserialize, Clone)]
+#[serde(tag = "slug", content = "data", rename_all = "snake_case")]
+pub enum TenantAttachmentStatus {
+    Maybe,
+    Attached,
+    Failed { reason: String },
+}
+
 #[serde_as]
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
    #[serde_as(as = "DisplayFromStr")]
    pub id: TenantId,
+    // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
    pub state: TenantState,
    /// Sum of the size of all layer files.
    /// If a layer is present in both local FS and S3, it counts only once.
    pub current_physical_size: Option<u64>, // physical size is only included in `tenant_status` endpoint
-    pub has_in_progress_downloads: Option<bool>,
+    pub attachment_status: TenantAttachmentStatus,
 }

 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
@@ -301,12 +417,16 @@ pub struct LayerResidenceEvent {
    pub reason: LayerResidenceEventReason,
 }

-/// The reason for recording a given [`ResidenceEvent`].
+/// The reason for recording a given [`LayerResidenceEvent`].
 #[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub enum LayerResidenceEventReason {
    /// The layer map is being populated, e.g. during timeline load or attach.
    /// This includes [`RemoteLayer`] objects created in [`reconcile_with_remote`].
    /// We need to record such events because there is no persistent storage for the events.
+    ///
+    // https://github.com/rust-lang/rust/issues/74481
+    /// [`RemoteLayer`]: ../../tenant/storage_layer/struct.RemoteLayer.html
+    /// [`reconcile_with_remote`]: ../../tenant/struct.Timeline.html#method.reconcile_with_remote
    LayerLoad,
    /// We just created the layer (e.g., freeze_and_flush or compaction).
    /// Such layers are always [`LayerResidenceStatus::Resident`].
@@ -691,7 +811,7 @@ mod tests {
            id: TenantId::generate(),
            state: TenantState::Active,
            current_physical_size: Some(42),
-            has_in_progress_downloads: Some(false),
+            attachment_status: TenantAttachmentStatus::Attached,
        };
        let expected_active = json!({
            "id": original_active.id.to_string(),
@@ -699,7 +819,9 @@ mod tests {
                "slug": "Active",
            },
            "current_physical_size": 42,
-            "has_in_progress_downloads": false,
+            "attachment_status": {
+                "slug":"attached",
+            }
        });

        let original_broken = TenantInfo {
@@ -709,7 +831,7 @@ mod tests {
                backtrace: "backtrace info".into(),
            },
            current_physical_size: Some(42),
-            has_in_progress_downloads: Some(false),
+            attachment_status: TenantAttachmentStatus::Attached,
        };
        let expected_broken = json!({
            "id": original_broken.id.to_string(),
@@ -721,7 +843,9 @@ mod tests {
                }
            },
            "current_physical_size": 42,
-            "has_in_progress_downloads": false,
+            "attachment_status": {
+                "slug":"attached",
+            }
        });

        assert_eq!(
@@ -736,4 +860,100 @@ mod tests {
        assert!(format!("{:?}", &original_broken.state).contains("reason"));
        assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
    }
+
+    #[test]
+    fn test_reject_unknown_field() {
+        let id = TenantId::generate();
+        let create_request = json!({
+            "new_tenant_id": id.to_string(),
+            "unknown_field": "unknown_value".to_string(),
+        });
+        let err = serde_json::from_value::<TenantCreateRequest>(create_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+
+        let id = TenantId::generate();
+        let config_request = json!({
+            "tenant_id": id.to_string(),
+            "unknown_field": "unknown_value".to_string(),
+        });
+        let err = serde_json::from_value::<TenantConfigRequest>(config_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+
+        let attach_request = json!({
+            "config": {
+                "unknown_field": "unknown_value".to_string(),
+            },
+        });
+        let err = serde_json::from_value::<TenantAttachRequest>(attach_request).unwrap_err();
+        assert!(
+            err.to_string().contains("unknown field `unknown_field`"),
+            "expect unknown field `unknown_field` error, got: {}",
+            err
+        );
+    }
+
+    #[test]
+    fn tenantstatus_activating_serde() {
+        let states = [
+            TenantState::Activating(ActivatingFrom::Loading),
+            TenantState::Activating(ActivatingFrom::Attaching),
+        ];
+        let expected = "[{\"slug\":\"Activating\",\"data\":\"Loading\"},{\"slug\":\"Activating\",\"data\":\"Attaching\"}]";
+
+        let actual = serde_json::to_string(&states).unwrap();
+
+        assert_eq!(actual, expected);
+
+        let parsed = serde_json::from_str::<Vec<TenantState>>(&actual).unwrap();
+
+        assert_eq!(states.as_slice(), &parsed);
+    }
+
+    #[test]
+    fn tenantstatus_activating_strum() {
+        // tests added, because we use these for metrics
+        let examples = [
+            (line!(), TenantState::Loading, "Loading"),
+            (line!(), TenantState::Attaching, "Attaching"),
+            (
+                line!(),
+                TenantState::Activating(ActivatingFrom::Loading),
+                "Activating",
+            ),
+            (
+                line!(),
+                TenantState::Activating(ActivatingFrom::Attaching),
+                "Activating",
+            ),
+            (line!(), TenantState::Active, "Active"),
+            (
+                line!(),
+                TenantState::Stopping {
+                    progress: utils::completion::Barrier::default(),
+                },
+                "Stopping",
+            ),
+            (
+                line!(),
+                TenantState::Broken {
+                    reason: "Example".into(),
+                    backtrace: "Looooong backtrace".into(),
+                },
+                "Broken",
+            ),
+        ];
+
+        for (line, rendered, expected) in examples {
+            let actual: &'static str = rendered.into();
+            assert_eq!(actual, expected, "example on {line}");
+        }
+    }
 }
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -60,8 +60,9 @@ impl Ord for RelTag {

 /// Display RelTag in the same format that's used in most PostgreSQL debug messages:
 ///
+/// ```text
 /// <spcnode>/<dbnode>/<relnode>[_fsm|_vm|_init]
-///
+/// ```
 impl fmt::Display for RelTag {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if let Some(forkname) = forknumber_to_name(self.forknum) {
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -50,11 +50,14 @@ impl QueryError {
    }
 }

+/// Returns true if the given error is a normal consequence of a network issue,
+/// or the client closing the connection. These errors can happen during normal
+/// operations, and don't indicate a bug in our code.
 pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
        e.kind(),
-        ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
+        BrokenPipe | ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
    )
 }

--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -24,7 +24,6 @@ workspace_hack.workspace = true
 [dev-dependencies]
 env_logger.workspace = true
 postgres.workspace = true
-wal_craft = { path = "wal_craft" }

 [build-dependencies]
 anyhow.workspace = true
--- a/libs/postgres_ffi/src/lib.rs
+++ b/libs/postgres_ffi/src/lib.rs
@@ -33,6 +33,7 @@ macro_rules! postgres_ffi {
            }
            pub mod controlfile_utils;
            pub mod nonrelfile_utils;
+            pub mod wal_craft_test_export;
            pub mod waldecoder_handler;
            pub mod xlog_utils;

@@ -45,8 +46,15 @@ macro_rules! postgres_ffi {
    };
 }

-postgres_ffi!(v14);
-postgres_ffi!(v15);
+#[macro_export]
+macro_rules! for_all_postgres_versions {
+    ($macro:tt) => {
+        $macro!(v14);
+        $macro!(v15);
+    };
+}
+
+for_all_postgres_versions! { postgres_ffi }

 pub mod pg_constants;
 pub mod relfile_utils;
@@ -95,10 +103,13 @@ pub fn generate_wal_segment(
    segno: u64,
    system_id: u64,
    pg_version: u32,
+    lsn: Lsn,
 ) -> Result<Bytes, SerializeError> {
+    assert_eq!(segno, lsn.segment_number(WAL_SEGMENT_SIZE));
+
    match pg_version {
-        14 => v14::xlog_utils::generate_wal_segment(segno, system_id),
-        15 => v15::xlog_utils::generate_wal_segment(segno, system_id),
+        14 => v14::xlog_utils::generate_wal_segment(segno, system_id, lsn),
+        15 => v15::xlog_utils::generate_wal_segment(segno, system_id, lsn),
        _ => Err(SerializeError::BadInput),
    }
 }
--- a/libs/postgres_ffi/src/nonrelfile_utils.rs
+++ b/libs/postgres_ffi/src/nonrelfile_utils.rs
@@ -57,9 +57,9 @@ pub fn slru_may_delete_clogsegment(segpage: u32, cutoff_page: u32) -> bool {
 // Multixact utils

 pub fn mx_offset_to_flags_offset(xid: MultiXactId) -> usize {
-    ((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32) as u16
-        % pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE
-        * pg_constants::MULTIXACT_MEMBERGROUP_SIZE) as usize
+    ((xid / pg_constants::MULTIXACT_MEMBERS_PER_MEMBERGROUP as u32)
+        % pg_constants::MULTIXACT_MEMBERGROUPS_PER_PAGE as u32
+        * pg_constants::MULTIXACT_MEMBERGROUP_SIZE as u32) as usize
 }

 pub fn mx_offset_to_flags_bitshift(xid: MultiXactId) -> u16 {
@@ -81,3 +81,41 @@ fn mx_offset_to_member_page(xid: u32) -> u32 {
 pub fn mx_offset_to_member_segment(xid: u32) -> i32 {
    (mx_offset_to_member_page(xid) / pg_constants::SLRU_PAGES_PER_SEGMENT) as i32
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_multixid_calc() {
+        // Check that the mx_offset_* functions produce the same values as the
+        // corresponding PostgreSQL C macros (MXOffsetTo*). These test values
+        // were generated by calling the PostgreSQL macros with a little C
+        // program.
+        assert_eq!(mx_offset_to_member_segment(0), 0);
+        assert_eq!(mx_offset_to_member_page(0), 0);
+        assert_eq!(mx_offset_to_flags_offset(0), 0);
+        assert_eq!(mx_offset_to_flags_bitshift(0), 0);
+        assert_eq!(mx_offset_to_member_offset(0), 4);
+        assert_eq!(mx_offset_to_member_segment(1), 0);
+        assert_eq!(mx_offset_to_member_page(1), 0);
+        assert_eq!(mx_offset_to_flags_offset(1), 0);
+        assert_eq!(mx_offset_to_flags_bitshift(1), 8);
+        assert_eq!(mx_offset_to_member_offset(1), 8);
+        assert_eq!(mx_offset_to_member_segment(123456789), 2358);
+        assert_eq!(mx_offset_to_member_page(123456789), 75462);
+        assert_eq!(mx_offset_to_flags_offset(123456789), 4780);
+        assert_eq!(mx_offset_to_flags_bitshift(123456789), 8);
+        assert_eq!(mx_offset_to_member_offset(123456789), 4788);
+        assert_eq!(mx_offset_to_member_segment(u32::MAX - 1), 82040);
+        assert_eq!(mx_offset_to_member_page(u32::MAX - 1), 2625285);
+        assert_eq!(mx_offset_to_flags_offset(u32::MAX - 1), 5160);
+        assert_eq!(mx_offset_to_flags_bitshift(u32::MAX - 1), 16);
+        assert_eq!(mx_offset_to_member_offset(u32::MAX - 1), 5172);
+        assert_eq!(mx_offset_to_member_segment(u32::MAX), 82040);
+        assert_eq!(mx_offset_to_member_page(u32::MAX), 2625285);
+        assert_eq!(mx_offset_to_flags_offset(u32::MAX), 5160);
+        assert_eq!(mx_offset_to_flags_bitshift(u32::MAX), 24);
+        assert_eq!(mx_offset_to_member_offset(u32::MAX), 5176);
+    }
+}
--- a/libs/postgres_ffi/src/pg_constants.rs
+++ b/libs/postgres_ffi/src/pg_constants.rs
@@ -146,6 +146,10 @@ pub const XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;
 pub const XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED: u8 = (1 << 1) as u8;
 pub const XLH_DELETE_ALL_VISIBLE_CLEARED: u8 = (1 << 0) as u8;

+// From replication/message.h
+pub const XLOG_LOGICAL_MESSAGE: u8 = 0x00;
+
+// From rmgrlist.h
 pub const RM_XLOG_ID: u8 = 0;
 pub const RM_XACT_ID: u8 = 1;
 pub const RM_SMGR_ID: u8 = 2;
@@ -157,6 +161,7 @@ pub const RM_RELMAP_ID: u8 = 7;
 pub const RM_STANDBY_ID: u8 = 8;
 pub const RM_HEAP2_ID: u8 = 9;
 pub const RM_HEAP_ID: u8 = 10;
+pub const RM_LOGICALMSG_ID: u8 = 21;

 // from xlogreader.h
 pub const XLR_INFO_MASK: u8 = 0x0F;
@@ -195,6 +200,7 @@ pub const FIRST_NORMAL_OBJECT_ID: u32 = 16384;

 pub const XLOG_CHECKPOINT_SHUTDOWN: u8 = 0x00;
 pub const XLOG_CHECKPOINT_ONLINE: u8 = 0x10;
+pub const XLP_FIRST_IS_CONTRECORD: u16 = 0x0001;
 pub const XLP_LONG_HEADER: u16 = 0x0002;

 /* From fsm_internals.h */
--- a/libs/postgres_ffi/src/relfile_utils.rs
+++ b/libs/postgres_ffi/src/relfile_utils.rs
@@ -49,14 +49,16 @@ pub fn forknumber_to_name(forknum: u8) -> Option<&'static str> {
    }
 }

-///
 /// Parse a filename of a relation file. Returns (relfilenode, forknum, segno) tuple.
 ///
 /// Formats:
+///
+/// ```text
 /// <oid>
 /// <oid>_<fork name>
 /// <oid>.<segment number>
 /// <oid>_<fork name>.<segment number>
+/// ```
 ///
 /// See functions relpath() and _mdfd_segpath() in PostgreSQL sources.
 ///
--- a/libs/postgres_ffi/src/wal_craft_test_export.rs
+++ b/libs/postgres_ffi/src/wal_craft_test_export.rs
@@ -0,0 +1,6 @@
+//! This module is for WAL craft to test with postgres_ffi. Should not import any thing in normal usage.
+
+pub use super::PG_MAJORVERSION;
+pub use super::xlog_utils::*;
+pub use super::bindings::*;
+pub use crate::WAL_SEGMENT_SIZE;
--- a/Show More
+++ b/Show More
				`@@ -1 +0,0 @@`
				`{{ pageserver_config \| sivel.toiletwater.to_toml }}`