Test

2026-03-04 08:50:38 +00:00 · 2024-03-27 13:42:33 +01:00
717 changed files with 32625 additions and 88528 deletions
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -23,30 +23,10 @@ platforms = [
 ]
 [final-excludes]
-workspace-members = [
+# vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
-    # vm_monitor benefits from the same Cargo.lock as the rest of our artifacts, but
+# it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
-    # it is built primarly in separate repo neondatabase/autoscaling and thus is excluded
+# from depending on workspace-hack because most of the dependencies are not used.
-    # from depending on workspace-hack because most of the dependencies are not used.
+workspace-members = ["vm_monitor"]
    "vm_monitor",
    # All of these exist in libs and are not usually built independently.
    # Putting workspace hack there adds a bottleneck for cargo builds.
    "compute_api",
    "consumption_metrics",
    "desim",
    "metrics",
    "pageserver_api",
    "postgres_backend",
    "postgres_connection",
    "postgres_ffi",
    "pq_proto",
    "remote_storage",
    "safekeeper_api",
    "tenant_size_model",
    "tracing-utils",
    "utils",
    "wal_craft",
    "walproposer",
 ]
 # Write out exact versions rather than a semver range. (Defaults to false.)
 # exact-versions = true
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -1,2 +1,2 @@
 [profile.default]
-slow-timeout = { period = "60s", terminate-after = 3 }
+slow-timeout = { period = "20s", terminate-after = 3 }
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,7 +8,6 @@
 !scripts/combine_control_files.py
 !scripts/ninstall.sh
 !vm-cgconfig.conf
 !docker-compose/run-tests.sh
 # Directories
 !.cargo/
@@ -18,13 +17,11 @@
 !libs/
 !neon_local/
 !pageserver/
 !patches/
 !pgxn/
 !proxy/
-!storage_scrubber/
+!s3_scrubber/
 !safekeeper/
 !storage_broker/
 !storage_controller/
 !trace/
 !vendor/postgres-*/
 !workspace_hack/
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +0,0 @@
 # allows for nicer hunk headers with git show
 *.rs diff=rust
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -1,15 +1,14 @@
 self-hosted-runner:
  labels:
    - arm64
    - dev
    - gen3
    - large
-    - large-arm64
+    # Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
    - macos-14
    - small
    - small-arm64
    - us-east-2
 config-variables:
  - BENCHMARK_PROJECT_ID_PUB
  - BENCHMARK_PROJECT_ID_SUB
  - REMOTE_STORAGE_AZURE_CONTAINER
  - REMOTE_STORAGE_AZURE_REGION
  - SLACK_UPCOMING_RELEASE_CHANNEL_ID
  - DEV_AWS_OIDC_ROLE_ARN
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -150,7 +150,7 @@ runs:
        # Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
        # and to keep files on the host to upload them to the database
-        time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/"
+        time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"
        # Generate redirect
        cat <<EOF > ${WORKDIR}/index.html
@@ -183,7 +183,7 @@ runs:
      uses: actions/cache@v4
      with:
        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
+        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
    - name: Store Allure test stat in the DB (new)
      if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -26,7 +26,7 @@ runs:
        TARGET: ${{ inputs.path }}
        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
-        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }}
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
        FILENAME=$(basename $ARCHIVE)
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -3,14 +3,14 @@ description: 'Create Branch using API'
 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project to create Branch in'
+    desctiption: 'ID of the Project to create Branch in'
    required: true
  api_host:
-    description: 'Neon API host'
+    desctiption: 'Neon API host'
-    default: console-stage.neon.build
+    default: console.stage.neon.tech
 outputs:
  dsn:
    description: 'Created Branch DSN (for main database)'
--- a/.github/actions/neon-branch-delete/action.yml
+++ b/.github/actions/neon-branch-delete/action.yml
@@ -3,17 +3,17 @@ description: 'Delete Branch using API'
 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project which should be deleted'
+    desctiption: 'ID of the Project which should be deleted'
    required: true
  branch_id:
-    description: 'ID of the branch to delete'
+    desctiption: 'ID of the branch to delete'
    required: true
  api_host:
-    description: 'Neon API host'
+    desctiption: 'Neon API host'
-    default: console-stage.neon.build
+    default: console.stage.neon.tech
 runs:
  using: "composite"
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -3,19 +3,22 @@ description: 'Create Neon Project using API'
 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  region_id:
-    description: 'Region ID, if not set the project will be created in the default region'
+    desctiption: 'Region ID, if not set the project will be created in the default region'
    default: aws-us-east-2
  postgres_version:
-    description: 'Postgres version; default is 16'
+    desctiption: 'Postgres version; default is 15'
-    default: '16'
+    default: 15
  api_host:
-    description: 'Neon API host'
+    desctiption: 'Neon API host'
-    default: console-stage.neon.build
+    default: console.stage.neon.tech
  provisioner:
    desctiption: 'k8s-pod or k8s-neonvm'
    default: 'k8s-pod'
  compute_units:
-    description: '[Min, Max] compute units'
+    desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
    default: '[1, 1]'
 outputs:
@@ -34,6 +37,10 @@ runs:
      # A shell without `set -x` to not to expose password/dsn in logs
      shell: bash -euo pipefail {0}
      run: |
        if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
          echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
        fi
        project=$(curl \
          "https://${API_HOST}/api/v2/projects" \
          --fail \
@@ -45,7 +52,7 @@ runs:
              \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
              \"pg_version\": ${POSTGRES_VERSION},
              \"region_id\": \"${REGION_ID}\",
-              \"provisioner\": \"k8s-neonvm\",
+              \"provisioner\": \"${PROVISIONER}\",
              \"autoscaling_limit_min_cu\": ${MIN_CU},
              \"autoscaling_limit_max_cu\": ${MAX_CU},
              \"settings\": { }
@@ -68,5 +75,6 @@ runs:
        API_KEY: ${{ inputs.api_key }}
        REGION_ID: ${{ inputs.region_id }}
        POSTGRES_VERSION: ${{ inputs.postgres_version }}
        PROVISIONER: ${{ inputs.provisioner }}
        MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
        MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
--- a/.github/actions/neon-project-delete/action.yml
+++ b/.github/actions/neon-project-delete/action.yml
@@ -3,14 +3,14 @@ description: 'Delete Neon Project using API'
 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project to delete'
+    desctiption: 'ID of the Project to delete'
    required: true
  api_host:
-    description: 'Neon API host'
+    desctiption: 'Neon API host'
-    default: console-stage.neon.build
+    default: console.stage.neon.tech
 runs:
  using: "composite"
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -43,7 +43,7 @@ inputs:
  pg_version:
    description: 'Postgres version to use for tests'
    required: false
-    default: 'v16'
+    default: 'v14'
  benchmark_durations:
    description: 'benchmark durations JSON'
    required: false
@@ -56,14 +56,14 @@ runs:
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon
    - name: Download Neon binaries for the previous release
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon-previous
        prefix: latest
@@ -83,12 +83,13 @@ runs:
      uses: actions/checkout@v4
      with:
        submodules: true
        fetch-depth: 1
    - name: Cache poetry deps
      uses: actions/cache@v4
      with:
        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
+        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}
    - name: Install Python deps
      shell: bash -euxo pipefail {0}
@@ -113,8 +114,6 @@ runs:
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
        export DEFAULT_PG_VERSION=${PG_VERSION#v}
        export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
        export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}
        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -130,8 +129,8 @@ runs:
          exit 1
        fi
        if [[ "${{ inputs.run_in_parallel }}" == "true" ]]; then
-          # -n sets the number of parallel processes that pytest-xdist will run
+          # -n16 uses sixteen processes to run tests via pytest-xdist
-          EXTRA_PARAMS="-n12 $EXTRA_PARAMS"
+          EXTRA_PARAMS="-n16 $EXTRA_PARAMS"
          # --dist=loadgroup points tests marked with @pytest.mark.xdist_group
          # to the same worker to make @pytest.mark.order work with xdist
@@ -169,28 +168,23 @@ runs:
          EXTRA_PARAMS="--durations-path $TEST_OUTPUT/benchmark_durations.json $EXTRA_PARAMS"
        fi
-        if [[ $BUILD_TYPE == "debug" && $RUNNER_ARCH == 'X64' ]]; then
+        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
          cov_prefix=()
        else
          cov_prefix=()
        fi
        # Wake up the cluster if we use remote neon instance
        if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
-          QUERIES=("SELECT version()")
+          ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
          if [[ "${PLATFORM}" = "neon"* ]]; then
            QUERIES+=("SHOW neon.tenant_id")
            QUERIES+=("SHOW neon.timeline_id")
          fi
          for q in "${QUERIES[@]}"; do
            ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "${q}"
          done
        fi
        # Run the tests.
        #
-        # --alluredir saves test results in Allure format (in a specified directory)
+        # The junit.xml file allows CI tools to display more fine-grained test information
        # in its "Tests" tab in the results page.
        # --verbose prints name of each test (helpful when there are
        # multiple tests in one file)
        # -rA prints summary in the end
@@ -199,6 +193,7 @@ runs:
        #
        mkdir -p $TEST_OUTPUT/allure/results
        "${cov_prefix[@]}" ./scripts/pytest \
          --junitxml=$TEST_OUTPUT/junit.xml \
          --alluredir=$TEST_OUTPUT/allure/results \
          --tb=short \
          --verbose \
--- a/.github/actions/set-docker-config-dir/action.yml
+++ b/.github/actions/set-docker-config-dir/action.yml
@@ -1,36 +0,0 @@
 name: "Set custom docker config directory"
 description: "Create a directory for docker config and set DOCKER_CONFIG"
 # Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
 runs:
  using: "composite"
  steps:
  - name: Show warning on GitHub-hosted runners
    if: runner.environment == 'github-hosted'
    shell: bash -euo pipefail {0}
    run: |
      # Using the following environment variables to find a path to the workflow file
      # ${GITHUB_WORKFLOW_REF} - octocat/hello-world/.github/workflows/my-workflow.yml@refs/heads/my_branch
      # ${GITHUB_REPOSITORY}   - octocat/hello-world
      # ${GITHUB_REF}          - refs/heads/my_branch
      # From https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/variables
      filename_with_ref=${GITHUB_WORKFLOW_REF#"$GITHUB_REPOSITORY/"}
      filename=${filename_with_ref%"@$GITHUB_REF"}
      # https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#setting-a-warning-message
      title='Unnecessary usage of `.github/actions/set-docker-config-dir`'
      message='No need to use `.github/actions/set-docker-config-dir` action on GitHub-hosted runners'
      echo "::warning file=${filename},title=${title}::${message}"
  - uses: pyTooling/Actions/with-post-step@74afc5a42a17a046c90c68cb5cfa627e5c6c5b6b # v1.0.7
    env:
      DOCKER_CONFIG: .docker-custom-${{ github.run_id }}-${{ github.run_attempt }}
    with:
      main: |
        mkdir -p "${DOCKER_CONFIG}"
        echo DOCKER_CONFIG=${DOCKER_CONFIG} | tee -a $GITHUB_ENV
      post: |
        if [ -d "${DOCKER_CONFIG}" ]; then
          rm -r "${DOCKER_CONFIG}"
        fi
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -8,7 +8,7 @@ inputs:
    description: "A directory or file to upload"
    required: true
  prefix:
-    description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
+    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false
 runs:
@@ -45,7 +45,7 @@ runs:
      env:
        SOURCE: ${{ inputs.path }}
        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
-        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }}
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
        FILENAME=$(basename $ARCHIVE)
--- a/.github/workflows/_benchmarking_preparation.yml
+++ b/.github/workflows/_benchmarking_preparation.yml
@@ -1,154 +0,0 @@
 name: Prepare benchmarking databases by restoring dumps
 on:
  workflow_call:
    # no inputs needed
 defaults:
  run:
    shell: bash -euxo pipefail {0}
 jobs:
  setup-databases:
    strategy:
      fail-fast: false
      matrix:
        platform: [ aws-rds-postgres, aws-aurora-serverless-v2-postgres, neon ] 
        database: [ clickbench, tpch, userexample ]
    env:
      LD_LIBRARY_PATH: /tmp/neon/pg_install/v16/lib
      PLATFORM: ${{ matrix.platform }}
      PG_BINARIES: /tmp/neon/pg_install/v16/bin
    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
      options: --init
    steps:
    - name: Set up Connection String
      id: set-up-prep-connstr
      run: |
        case "${PLATFORM}" in
          neon)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }} 
            ;;
          aws-rds-postgres)
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }} 
            ;;
          aws-aurora-serverless-v2-postgres)
            CONNSTR=${{ secrets.BENCHMARK_RDS_AURORA_CONNSTR }} 
            ;;
          *)
            echo >&2 "Unknown PLATFORM=${PLATFORM}"
            exit 1
            ;;
        esac
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT  
    - uses: actions/checkout@v4
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    # we create a table that has one row for each database that we want to restore with the status whether the restore is done    
    - name: Create benchmark_restore_status table if it does not exist
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
        DATABASE_NAME: ${{ matrix.database }}
      # to avoid a race condition of multiple jobs trying to create the table at the same time, 
      # we use an advisory lock
      run: |
        ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
        SELECT pg_advisory_lock(4711);  
        CREATE TABLE IF NOT EXISTS benchmark_restore_status (
        databasename text primary key,
        restore_done boolean
        );
        SELECT pg_advisory_unlock(4711);
        "
    - name: Check if restore is already done
      id: check-restore-done
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
        DATABASE_NAME: ${{ matrix.database }}
      run: |
        skip=false
        if ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM benchmark_restore_status WHERE databasename='${{ env.DATABASE_NAME }}' AND restore_done=true;" | grep -q 1; then
          echo "Restore already done for database ${{ env.DATABASE_NAME }} on platform ${{ env.PLATFORM }}. Skipping this database."
          skip=true
        fi
        echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
    - name: Check and create database if it does not exist
      if: steps.check-restore-done.outputs.skip != 'true'
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
        DATABASE_NAME: ${{ matrix.database }}
      run: |
        DB_EXISTS=$(${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -tAc "SELECT 1 FROM pg_database WHERE datname='${{ env.DATABASE_NAME }}'")
        if [ "$DB_EXISTS" != "1" ]; then
          echo "Database ${{ env.DATABASE_NAME }} does not exist. Creating it..."
          ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "CREATE DATABASE \"${{ env.DATABASE_NAME }}\";"
        else
          echo "Database ${{ env.DATABASE_NAME }} already exists."
        fi
    - name: Download dump from S3 to /tmp/dumps
      if: steps.check-restore-done.outputs.skip != 'true'
      env:
        DATABASE_NAME: ${{ matrix.database }}
      run: |
        mkdir -p /tmp/dumps
        aws s3 cp s3://neon-github-dev/performance/pgdumps/$DATABASE_NAME/$DATABASE_NAME.pg_dump /tmp/dumps/ 
    - name: Replace database name in connection string
      if: steps.check-restore-done.outputs.skip != 'true'
      id: replace-dbname
      env:
        DATABASE_NAME: ${{ matrix.database }}
        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
      run: |
        # Extract the part before the database name
        base_connstr="${BENCHMARK_CONNSTR%/*}"
        # Extract the query parameters (if any) after the database name
        query_params="${BENCHMARK_CONNSTR#*\?}"
        # Reconstruct the new connection string
        if [ "$query_params" != "$BENCHMARK_CONNSTR" ]; then
          new_connstr="${base_connstr}/${DATABASE_NAME}?${query_params}"
        else
          new_connstr="${base_connstr}/${DATABASE_NAME}"
        fi
        echo "database_connstr=${new_connstr}" >> $GITHUB_OUTPUT  
    - name: Restore dump
      if: steps.check-restore-done.outputs.skip != 'true'
      env:
        DATABASE_NAME: ${{ matrix.database }}
        DATABASE_CONNSTR: ${{ steps.replace-dbname.outputs.database_connstr }}
        # the following works only with larger computes: 
        # PGOPTIONS: "-c maintenance_work_mem=8388608 -c max_parallel_maintenance_workers=7"
        # we add the || true because:
        # the dumps were created with Neon and contain neon extensions that are not 
        # available in RDS, so we will always report an error, but we can ignore it
      run: |
        ${PG_BINARIES}/pg_restore --clean --if-exists --no-owner --jobs=4 \
        -d "${DATABASE_CONNSTR}" /tmp/dumps/${DATABASE_NAME}.pg_dump || true
    - name: Update benchmark_restore_status table
      if: steps.check-restore-done.outputs.skip != 'true'
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-prep-connstr.outputs.connstr }}
        DATABASE_NAME: ${{ matrix.database }}
      run: |
        ${PG_BINARIES}/psql "${{ env.BENCHMARK_CONNSTR }}" -c "
        INSERT INTO benchmark_restore_status (databasename, restore_done) VALUES ('${{ env.DATABASE_NAME }}', true)
        ON CONFLICT (databasename) DO UPDATE SET restore_done = true;
        "
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -1,297 +0,0 @@
 name: Build and Test Locally
 on:
  workflow_call:
    inputs:
      arch:
        description: 'x64 or arm64'
        required: true
        type: string
      build-tag:
        description: 'build tag'
        required: true
        type: string
      build-tools-image:
        description: 'build-tools image'
        required: true
        type: string
      build-type:
        description: 'debug or release'
        required: true
        type: string
      pg-versions:
        description: 'a json array of postgres versions to run regression tests on'
        required: true
        type: string
 defaults:
  run:
    shell: bash -euxo pipefail {0}
 env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
 jobs:
  build-neon:
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      # Raise locked memory limit for tokio-epoll-uring.
      # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
      # io_uring will account the memory of the CQ and SQ as locked.
      # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    env:
      BUILD_TYPE: ${{ inputs.build-type }}
      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
      BUILD_TAG: ${{ inputs.build-tag }}
    steps:
      - name: Fix git ownership
        run: |
          # Workaround for `fatal: detected dubious ownership in repository at ...`
          #
          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
          #   Ref https://github.com/actions/checkout/issues/785
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
          for r in 14 15 16; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
      - name: Set pg 15 revision for caching
        id: pg_v15_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
      - name: Set pg 16 revision for caching
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
      # Set some environment variables used by all the steps.
      #
      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
      #   It also includes --features, if any
      #
      # CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
      #   because "cargo metadata" doesn't accept --release or --debug options
      #
      # We run tests with addtional features, that are turned off by default (e.g. in release builds), see
      # corresponding Cargo.toml files for their descriptions.
      - name: Set env variables
        env:
          ARCH: ${{ inputs.arch }}
        run: |
          CARGO_FEATURES="--features testing"
          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
            CARGO_FLAGS="--locked"
          elif [[ $BUILD_TYPE == "debug" ]]; then
            cov_prefix=""
            CARGO_FLAGS="--locked"
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=""
            CARGO_FLAGS="--locked --release"
          fi
          {
            echo "cov_prefix=${cov_prefix}"
            echo "CARGO_FEATURES=${CARGO_FEATURES}"
            echo "CARGO_FLAGS=${CARGO_FLAGS}"
            echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
          } >> $GITHUB_ENV
      - name: Cache postgres v14 build
        id: cache_pg_14
        uses: actions/cache@v4
        with:
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
      - name: Cache postgres v15 build
        id: cache_pg_15
        uses: actions/cache@v4
        with:
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
      - name: Cache postgres v16 build
        id: cache_pg_16
        uses: actions/cache@v4
        with:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
        run: mold -run make postgres-v14 -j$(nproc)
      - name: Build postgres v15
        if: steps.cache_pg_15.outputs.cache-hit != 'true'
        run: mold -run make postgres-v15 -j$(nproc)
      - name: Build postgres v16
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: mold -run make postgres-v16 -j$(nproc)
      - name: Build neon extensions
        run: mold -run make neon-pg-ext -j$(nproc)
      - name: Build walproposer-lib
        run: mold -run make walproposer-lib -j$(nproc)
      - name: Run cargo build
        run: |
          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
          export PQ_LIB_DIR
          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
      # Do install *before* running rust tests because they might recompile the
      # binaries with different features/flags.
      - name: Install rust binaries
        env:
          ARCH: ${{ inputs.arch }}
        run: |
          # Install target binaries
          mkdir -p /tmp/neon/bin/
          binaries=$(
            ${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
          )
          for bin in $binaries; do
            SRC=target/$BUILD_TYPE/$bin
            DST=/tmp/neon/bin/$bin
            cp "$SRC" "$DST"
          done
          # Install test executables and write list of all binaries (for code coverage)
          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
            # Keep bloated coverage data files away from the rest of the artifact
            mkdir -p /tmp/coverage/
            mkdir -p /tmp/neon/test_bin/
            test_exe_paths=$(
              ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
              # We don't need debug symbols for code coverage, so strip them out to make
              # the artifact smaller.
              strip "$SRC" -o "$DST"
              echo "$DST" >> /tmp/coverage/binaries.list
            done
            for bin in $binaries; do
              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
            done
          fi
      - name: Run rust tests
        env:
          NEXTEST_RETRIES: 3
        run: |
          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
          export PQ_LIB_DIR
          LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
          export LD_LIBRARY_PATH
          #nextest does not yet support running doctests
          ${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
          for io_engine in std-fs tokio-epoll-uring ; do
            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
          done
          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
          # Run separate tests for real Azure Blob Storage
          # XXX: replace region with `eu-central-1`-like region
          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
          export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
          export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
      - name: Install postgres binaries
        run: cp -a pg_install /tmp/neon/pg_install
      - name: Upload Neon artifact
        uses: ./.github/actions/upload
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
          path: /tmp/neon
      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
        if: inputs.build-type == 'debug'
        uses: ./.github/actions/save-coverage-data
  regress-tests:
    # Don't run regression tests on debug arm64 builds
    if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
    needs: [ build-neon ]
    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      # for changed limits, see comments on `options:` earlier in this file
      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    strategy:
      fail-fast: false
      matrix:
        pg_version: ${{ fromJson(inputs.pg-versions) }}
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: true
      - name: Pytest regression tests
        uses: ./.github/actions/run-python-test-set
        timeout-minutes: 60
        with:
          build_type: ${{ inputs.build-type }}
          test_selection: regress
          needs_postgres_source: true
          run_with_real_s3: true
          real_s3_bucket: neon-github-ci-tests
          real_s3_region: eu-central-1
          rerun_flaky: true
          pg_version: ${{ matrix.pg_version }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ inputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
      # Temporary disable this step until we figure out why it's so flaky
      # Ref https://github.com/neondatabase/neon/issues/4540
      - name: Merge and upload coverage data
        if: |
          false &&
          inputs.build-type == 'debug' && matrix.pg_version == 'v16'
        uses: ./.github/actions/save-coverage-data
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -24,7 +24,7 @@ jobs:
  actionlint:
    needs: [ check-permissions ]
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: reviewdog/action-actionlint@v1
@@ -36,16 +36,3 @@ jobs:
          fail_on_error: true
          filter_mode: nofilter
          level: error
      - name: Disallow 'ubuntu-latest' runners
        run: |
          PAT='^\s*runs-on:.*-latest'
          if grep -ERq $PAT .github/workflows; then
            grep -ERl $PAT .github/workflows |\
            while read -r f
            do
              l=$(grep -nE $PAT $f | awk -F: '{print $1}' | head -1)
              echo "::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'"
            done
            exit 1
          fi
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -18,7 +18,6 @@ on:
 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
  cancel-in-progress: false
 env:
  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -44,7 +43,7 @@ jobs:
      contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -60,7 +59,7 @@ jobs:
      github.event.action == 'labeled' &&
      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -69,41 +68,15 @@ jobs:
        with:
          ref: main
          token: ${{ secrets.CI_ACCESS_TOKEN }}
      - name: Look for existing PR
        id: get-pr
        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
          echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
      - name: Get changed labels
        id: get-labels
        if: steps.get-pr.outputs.ALREADY_CREATED != ''
        env:
          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
          ( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
          LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' |  ( grep -E '^run' || true ) | sort ) |\
          paste -sd , -)
          echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
          echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}
      - run: gh pr checkout "${PR_NUMBER}"
      - run: git checkout -b "${BRANCH}"
      - run: git push --force origin "${BRANCH}"
        if: steps.get-pr.outputs.ALREADY_CREATED == ''
      - name: Create a Pull Request for CI run (if required)
-        if: steps.get-pr.outputs.ALREADY_CREATED == ''
+        env:
        env: 
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          cat << EOF > body.md
@@ -114,33 +87,16 @@ jobs:
            Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
          EOF
-          LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER}  --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft  )| \
+          ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
-          grep -E '^run' | paste -sd , -)
+          if [ -z "${ALREADY_CREATED}" ]; then
-          gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
+            gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
                                                       --body-file "body.md" \
                                                       --head "${BRANCH}" \
                                                       --base "main" \
-                                                       --label ${LABELS} \
+                                                       --label "run-e2e-tests-in-draft" \
                                                       --draft
      - name: Modify the existing pull request (if required)
        if: steps.get-pr.outputs.ALREADY_CREATED != ''
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
          LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
        run: |
          ADD_CMD=
          REMOVE_CMD=
          [ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
          [ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
          if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
            gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
          fi
      - run: git push --force origin "${BRANCH}"
        if: steps.get-pr.outputs.ALREADY_CREATED != ''
  cleanup:
    # Close PRs and delete branchs if the original PR is closed.
@@ -152,7 +108,7 @@ jobs:
      github.event.action == 'closed' &&
      github.event.pull_request.head.repo.full_name != github.repository
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -38,11 +38,6 @@ on:
        description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
        required: false
        default: false
      run_only_pgvector_tests:
        type: boolean
        description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
        required: false
        default: false
 defaults:
  run:
@@ -55,118 +50,11 @@ concurrency:
 jobs:
  bench:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
      matrix:
        include:
          - DEFAULT_PG_VERSION: 16
            PLATFORM: "neon-staging"
            region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
            RUNNER: [ self-hosted, us-east-2, x64 ]
            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
          - DEFAULT_PG_VERSION: 16
            PLATFORM: "azure-staging"
            region_id: 'azure-eastus2'
            RUNNER: [ self-hosted, eastus2, x64 ]
            IMAGE: neondatabase/build-tools:pinned
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "300"
      TEST_PG_BENCH_SCALES_MATRIX: "10,100"
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.PLATFORM }}
    runs-on: ${{ matrix.RUNNER }}
    container:
      image: ${{ matrix.IMAGE }}
      options: --init
    steps:
    - uses: actions/checkout@v4
    - name: Configure AWS credentials # necessary on Azure runners
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        role-duration-seconds: 18000 # 5 hours
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Create Neon Project
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
        region_id: ${{ matrix.region_id }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    - name: Run benchmark
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
        test_selection: performance
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
        # Set --sparse-ordering option of pytest-order plugin
        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
        extra_params:
          -m remote_cluster
          --sparse-ordering
          --timeout 14400
          --ignore test_runner/performance/test_perf_olap.py
          --ignore test_runner/performance/test_perf_pgvector_queries.py
          --ignore test_runner/performance/test_logical_replication.py
          --ignore test_runner/performance/test_physical_replication.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
    - name: Delete Neon Project
      if: ${{ always() }}
      uses: ./.github/actions/neon-project-delete
      with:
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: |
          Periodic perf testing: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  replication-tests:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      DEFAULT_PG_VERSION: 16
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -180,81 +68,69 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest
-    - name: Run Logical Replication benchmarks
+    - name: Create Neon Project
-      uses: ./.github/actions/run-python-test-set
+      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        build_type: ${{ env.BUILD_TYPE }}
+        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
-        test_selection: performance/test_logical_replication.py
+        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
-        run_in_parallel: false
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 5400
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
        BENCHMARK_PROJECT_ID_PUB: ${{ vars.BENCHMARK_PROJECT_ID_PUB }}
        BENCHMARK_PROJECT_ID_SUB: ${{ vars.BENCHMARK_PROJECT_ID_SUB }}
-    - name: Run Physical Replication benchmarks
+    - name: Run benchmark
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_physical_replication.py
+        test_selection: performance
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 5400
+        # Set --sparse-ordering option of pytest-order plugin
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
+        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
        extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
+
    - name: Delete Neon Project
      if: ${{ always() }}
      uses: ./.github/actions/neon-project-delete
      with:
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
-        channel-id: "C06T9AMNDQQ" # on-call-compute-staging-stream
+        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: |
+        slack-message: "Periodic perf testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          Periodic replication testing: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  generate-matrices:
    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
    #
    # Available platforms:
-    # - neonvm-captest-new: Freshly created project (1 CU)
+    # - neon-captest-new: Freshly created project (1 CU)
-    # - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU)
+    # - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
-    # - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region
+    # - neon-captest-reuse: Reusing existing project
    # - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
    # - neonvm-captest-reuse: Reusing existing project
    # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
    # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
    env:
      RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
-      DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
+    runs-on: ubuntu-latest
    runs-on: ubuntu-22.04
    outputs:
      pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
      olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
@@ -264,37 +140,22 @@ jobs:
    - name: Generate matrix for pgbench benchmark
      id: pgbench-compare-matrix
      run: |
        region_id_default=${{ env.DEFAULT_REGION_ID }}
        runner_default='["self-hosted", "us-east-2", "x64"]'
        runner_azure='["self-hosted", "eastus2", "x64"]'
        image_default="369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned"
        matrix='{
          "pg_version" : [
            16
          ],
          "region_id" : [
            "'"$region_id_default"'"
            ],
          "platform": [
-            "neonvm-captest-new",
+            "neon-captest-new",
-            "neonvm-captest-reuse",
+            "neon-captest-reuse",
            "neonvm-captest-new"
          ],
          "db_size": [ "10gb" ],
-          "runner": ['"$runner_default"'],
+          "include": [{ "platform": "neon-captest-freetier",   "db_size": "3gb"  },
-          "image": [ "'"$image_default"'" ],
+                      { "platform": "neon-captest-new",        "db_size": "50gb" },
-          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb" ,"runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "platform": "neonvm-captest-freetier", "db_size": "3gb"  },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+                      { "platform": "neonvm-captest-new",      "db_size": "50gb" }]
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" ,"runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "10gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "azure-eastus2",          "platform": "neonvm-azure-captest-new",      "db_size": "50gb","runner": '"$runner_azure"',   "image": "neondatabase/build-tools:pinned" },
                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]
        }'
-        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
+        if [ "$(date +%A)" = "Saturday" ]; then
-          matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" },
+          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
-                                                     { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "rds-aurora", "db_size": "10gb","runner": '"$runner_default"', "image": "'"$image_default"'" }]')
+                                                   { "platform": "rds-aurora",   "db_size": "50gb"}]')
        fi
        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -304,13 +165,13 @@ jobs:
      run: |
        matrix='{
          "platform": [
-            "neonvm-captest-reuse"
+            "neon-captest-reuse"
          ]
        }'
        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
-                                                     { "platform": "rds-aurora"   }]')
+                                                   { "platform": "rds-aurora"   }]')
        fi
        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -320,7 +181,7 @@ jobs:
      run: |
        matrix='{
          "platform": [
-            "neonvm-captest-reuse"
+            "neon-captest-reuse"
          ],
          "scale": [
            "10"
@@ -329,22 +190,13 @@ jobs:
        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
-                                                     { "platform": "rds-aurora",   "scale": "10" }]')
+                                                    { "platform": "rds-aurora",   "scale": "10" }]')
        fi
        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
  prepare_AWS_RDS_databases:
    uses: ./.github/workflows/_benchmarking_preparation.yml
    secrets: inherit
  pgbench-compare:
-    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
+    needs: [ generate-matrices ]
    needs: [ generate-matrices, prepare_AWS_RDS_databases ]
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
@@ -354,15 +206,15 @@ jobs:
      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
      TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}
-    runs-on: ${{ matrix.runner }}
+    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
-      image: ${{ matrix.image }}
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
      options: --init
    # Increase timeout to 8h, default timeout is 6h
@@ -371,41 +223,37 @@ jobs:
    steps:
    - uses: actions/checkout@v4
    - name: Configure AWS credentials # necessary on Azure runners
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        role-duration-seconds: 18000 # 5 hours
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Add Postgres binaries to PATH
      run: |
        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
    - name: Create Neon Project
-      if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
+      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        region_id: ${{ matrix.region_id }}
+        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
+        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
        provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}
    - name: Set up Connection String
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neonvm-captest-reuse)
+          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
            ;;
-          neonvm-captest-sharding-reuse)
+          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
            ;;
          neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -422,6 +270,12 @@ jobs:
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
        QUERY="SELECT version();"
        if [[ "${PLATFORM}" = "neon"* ]]; then
          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
        fi
        psql ${CONNSTR} -c "${QUERY}"
    - name: Benchmark init
      uses: ./.github/actions/run-python-test-set
      with:
@@ -430,7 +284,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -444,7 +297,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -458,7 +310,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -472,7 +323,6 @@ jobs:
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
@@ -481,133 +331,7 @@ jobs:
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: |
+        slack-message: "Periodic perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          Periodic perf testing on ${{ matrix.platform }}: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  pgbench-pgvector:
    permissions:
      contents: write
      statuses: write
      id-token: write # Required for OIDC authentication in azure runners
    strategy:
      fail-fast: false
      matrix:
        include:
          - PLATFORM: "neonvm-captest-pgvector"
            RUNNER: [ self-hosted, us-east-2, x64 ]
            IMAGE: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
          - PLATFORM: "azure-captest-pgvector"
            RUNNER: [ self-hosted, eastus2, x64 ]
            IMAGE: neondatabase/build-tools:pinned
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
      TEST_PG_BENCH_SCALES_MATRIX: "1"
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      DEFAULT_PG_VERSION: 16
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.PLATFORM }}
    runs-on: ${{ matrix.RUNNER }}
    container:
      image: ${{ matrix.IMAGE }}
      options: --init
    steps:
    - uses: actions/checkout@v4
    # until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
    # instead of using Neon artifacts containing pgbench
    - name: Install postgresql-16 where pytest expects it
      run: |
        cd /home/nonroot
        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.4-1.pgdg110%2B1_amd64.deb
        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.4-1.pgdg110%2B1_amd64.deb
        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.4-1.pgdg110%2B1_amd64.deb 
        dpkg -x libpq5_16.4-1.pgdg110+1_amd64.deb pg
        dpkg -x postgresql-client-16_16.4-1.pgdg110+1_amd64.deb pg
        dpkg -x postgresql-16_16.4-1.pgdg110+1_amd64.deb pg
        mkdir -p /tmp/neon/pg_install/v16/bin
        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench  
        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql  
        ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib 
        /tmp/neon/pg_install/v16/bin/pgbench --version
        /tmp/neon/pg_install/v16/bin/psql --version
    - name: Set up Connection String
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
          neonvm-captest-pgvector)
            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
            ;;
          azure-captest-pgvector)
            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
            ;;
          *)
            echo >&2 "Unknown PLATFORM=${PLATFORM}"
            exit 1
            ;;
        esac
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
    - name: Configure AWS credentials # necessary on Azure runners to read/write from/to S3
      uses: aws-actions/configure-aws-credentials@v4
      with:
        aws-region: eu-central-1
        role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
        role-duration-seconds: 18000 # 5 hours
    - name: Benchmark pgvector hnsw indexing
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
        test_selection: performance/test_perf_olap.py
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
    - name: Benchmark pgvector queries
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: ${{ env.BUILD_TYPE }}
        test_selection: performance/test_perf_pgvector_queries.py
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: |
          Periodic perf testing on ${{ env.PLATFORM }}: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -619,8 +343,8 @@ jobs:
    #
    # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
    # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
-    needs: [ generate-matrices, pgbench-compare, prepare_AWS_RDS_databases ]
+    needs: [ generate-matrices, pgbench-compare ]
    strategy:
      fail-fast: false
@@ -628,7 +352,7 @@ jobs:
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
      TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
@@ -647,15 +371,20 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Add Postgres binaries to PATH
      run: |
        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
    - name: Set up Connection String
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neonvm-captest-reuse)
+          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
            ;;
          rds-aurora)
@@ -665,13 +394,19 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
            ;;
          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
        QUERY="SELECT version();"
        if [[ "${PLATFORM}" = "neon"* ]]; then
          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
        fi
        psql ${CONNSTR} -c "${QUERY}"
    - name: ClickBench benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -680,7 +415,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_clickbench
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -690,7 +424,6 @@ jobs:
        TEST_OLAP_SCALE: 10
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
@@ -699,10 +432,7 @@ jobs:
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: |
+        slack-message: "Periodic OLAP perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          Periodic OLAP perf testing on ${{ matrix.platform }}: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
@@ -713,8 +443,8 @@ jobs:
    # We might change it after https://github.com/neondatabase/neon/issues/2900.
    #
    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
-    needs: [ generate-matrices, clickbench-compare, prepare_AWS_RDS_databases ]
+    needs: [ generate-matrices, clickbench-compare ]
    strategy:
      fail-fast: false
@@ -722,7 +452,7 @@ jobs:
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -740,24 +470,29 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Add Postgres binaries to PATH
      run: |
        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
    - name: Get Connstring Secret Name
      run: |
        case "${PLATFORM}" in
-          neonvm-captest-reuse)
+          neon-captest-reuse)
            ENV_PLATFORM=CAPTEST_TPCH
            ;;
          rds-aurora)
            ENV_PLATFORM=RDS_AURORA_TPCH
            ;;
          rds-postgres)
-            ENV_PLATFORM=RDS_POSTGRES_TPCH
+            ENV_PLATFORM=RDS_AURORA_TPCH
            ;;
          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -772,6 +507,12 @@ jobs:
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
        QUERY="SELECT version();"
        if [[ "${PLATFORM}" = "neon"* ]]; then
          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
        fi
        psql ${CONNSTR} -c "${QUERY}"
    - name: Run TPC-H benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -780,7 +521,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_tpch
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -788,7 +528,6 @@ jobs:
        TEST_OLAP_SCALE: ${{ matrix.scale }}
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
@@ -797,16 +536,13 @@ jobs:
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: |
+        slack-message: "Periodic TPC-H perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  user-examples-compare:
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
-    needs: [ generate-matrices, tpch-compare, prepare_AWS_RDS_databases ]
+    needs: [ generate-matrices, tpch-compare ]
    strategy:
      fail-fast: false
@@ -814,7 +550,7 @@ jobs:
    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -831,15 +567,20 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Add Postgres binaries to PATH
      run: |
        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
    - name: Set up Connection String
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neonvm-captest-reuse)
+          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
            ;;
          rds-aurora)
@@ -849,13 +590,19 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
        QUERY="SELECT version();"
        if [[ "${PLATFORM}" = "neon"* ]]; then
          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
        fi
        psql ${CONNSTR} -c "${QUERY}"
    - name: Run user examples
      uses: ./.github/actions/run-python-test-set
      with:
@@ -864,14 +611,12 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
    - name: Create Allure report
      id: create-allure-report
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
@@ -880,10 +625,6 @@ jobs:
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: |
+        slack-message: "Periodic User example perf testing ${{ matrix.platform }}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
          Periodic TPC-H perf testing on ${{ matrix.platform }}: ${{ job.status }}
          <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
          <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -21,7 +21,6 @@ defaults:
 concurrency:
  group: build-build-tools-image-${{ inputs.image-tag }}
  cancel-in-progress: false
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
@@ -30,6 +29,7 @@ jobs:
  check-image:
    uses: ./.github/workflows/check-build-tools-image.yml
  # This job uses older version of GitHub Actions because it's run on gen2 runners, which don't support node 20 (for newer versions)
  build-image:
    needs: [ check-image ]
    if: needs.check-image.outputs.found == 'false'
@@ -38,7 +38,7 @@ jobs:
      matrix:
        arch: [ x64, arm64 ]
-    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}
    env:
      IMAGE_TAG: ${{ inputs.image-tag }}
@@ -54,38 +54,40 @@ jobs:
            exit 1
          fi
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
-      - uses: ./.github/actions/set-docker-config-dir
+      # Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
-      - uses: docker/setup-buildx-action@v3
+      # The default value is ~/.docker
-        with:
+      - name: Set custom docker config directory
-          cache-binary: false
+        run: |
          mkdir -p /tmp/.docker-custom
          echo DOCKER_CONFIG=/tmp/.docker-custom >> $GITHUB_ENV
-      - uses: docker/login-action@v3
+      - uses: docker/setup-buildx-action@v2
      - uses: docker/login-action@v2
        with:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      - uses: docker/login-action@v3
+      - uses: docker/build-push-action@v4
        with:
          registry: cache.neon.build
          username: ${{ secrets.NEON_CI_DOCKERCACHE_USERNAME }}
          password: ${{ secrets.NEON_CI_DOCKERCACHE_PASSWORD }}
      - uses: docker/build-push-action@v6
        with:
          context: .
          provenance: false
          push: true
          pull: true
          file: Dockerfile.build-tools
-          cache-from: type=registry,ref=cache.neon.build/build-tools:cache-${{ matrix.arch }}
+          cache-from: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }}
-          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
+          cache-to: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }},mode=max
          tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}
      - name: Remove custom docker config directory
        run: |
          rm -rf /tmp/.docker-custom
  merge-images:
    needs: [ build-image ]
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    env:
      IMAGE_TAG: ${{ inputs.image-tag }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
--- a/.github/workflows/check-build-tools-image.yml
+++ b/.github/workflows/check-build-tools-image.yml
@@ -19,23 +19,30 @@ permissions: {}
 jobs:
  check-image:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    outputs:
      tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
      found: ${{ steps.check-image.outputs.found }}
    steps:
      - uses: actions/checkout@v4
      - name: Get build-tools image tag for the current commit
        id: get-build-tools-tag
        env:
-          IMAGE_TAG: |
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-            ${{ hashFiles('Dockerfile.build-tools',
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
                          '.github/workflows/check-build-tools-image.yml',
                          '.github/workflows/build-build-tools-image.yml') }}
        run: |
-          echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
+          LAST_BUILD_TOOLS_SHA=$(
            gh api \
              -H "Accept: application/vnd.github+json" \
              -H "X-GitHub-Api-Version: 2022-11-28" \
              --method GET \
              --field path=Dockerfile.build-tools \
              --field sha=${COMMIT_SHA} \
              --field per_page=1 \
              --jq ".[0].sha" \
              "/repos/${GITHUB_REPOSITORY}/commits"
          )
          echo "image-tag=${LAST_BUILD_TOOLS_SHA}" | tee -a $GITHUB_OUTPUT
      - name: Check if such tag found in the registry
        id: check-image
--- a/.github/workflows/check-permissions.yml
+++ b/.github/workflows/check-permissions.yml
@@ -16,7 +16,7 @@ permissions: {}
 jobs:
  check-permissions:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
    - name: Disallow CI runs on PRs from forks
      if: |
--- a/.github/workflows/cleanup-caches-by-a-branch.yml
+++ b/.github/workflows/cleanup-caches-by-a-branch.yml
@@ -9,7 +9,7 @@ on:
 jobs:
  cleanup:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - name: Cleanup
        run: |
--- a/.github/workflows/label-for-external-users.yml
+++ b/.github/workflows/label-for-external-users.yml
@@ -1,54 +0,0 @@
 name: Add `external` label to issues and PRs created by external users
 on:
  issues:
    types:
      - opened
  pull_request_target:
    types:
      - opened
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
 env:
  LABEL: external
 jobs:
  check-user:
    runs-on: ubuntu-22.04
    outputs:
      is-member: ${{ steps.check-user.outputs.is-member }}
    steps:
    - name: Check whether `${{ github.actor }}` is a member of `${{ github.repository_owner }}`
      id: check-user
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      run: |
        if gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/orgs/${GITHUB_REPOSITORY_OWNER}/members/${GITHUB_ACTOR}"; then
          is_member=true
        else
          is_member=false
        fi
        echo "is-member=${is_member}" | tee -a ${GITHUB_OUTPUT}
  add-label:
    if: needs.check-user.outputs.is-member == 'false'
    needs: [ check-user ]
    runs-on: ubuntu-22.04
    permissions:
      pull-requests: write # for `gh pr edit`
      issues: write        # for `gh issue edit`
    steps:
    - name: Add `${{ env.LABEL }}` label
      env:
        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        ITEM_NUMBER: ${{ github.event[github.event_name == 'pull_request_target' && 'pull_request' || 'issue'].number }}
        GH_CLI_COMMAND: ${{ github.event_name == 'pull_request_target' && 'pr' || 'issue' }}
      run: |
        gh ${GH_CLI_COMMAND} --repo ${GITHUB_REPOSITORY} edit --add-label=${LABEL} ${ITEM_NUMBER}
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -56,6 +56,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 1
      - name: Install macOS postgres dependencies
        run: brew install flex bison openssl protobuf icu4c pkg-config
@@ -132,13 +133,212 @@ jobs:
      - name: Check that no warnings are produced
        run: ./run_clippy.sh
  check-linux-arm-build:
    needs: [ check-permissions, build-build-tools-image ]
    timeout-minutes: 90
    runs-on: [ self-hosted, dev, arm64 ]
    env:
      # Use release build only, to have less debug info around
      # Hence keeping target/ (and general cache size) smaller
      BUILD_TYPE: release
      CARGO_FEATURES: --features testing
      CARGO_FLAGS: --release
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init
    steps:
      - name: Fix git ownership
        run: |
          # Workaround for `fatal: detected dubious ownership in repository at ...`
          #
          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
          #   Ref https://github.com/actions/checkout/issues/785
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
          for r in 14 15 16; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 1
      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
      - name: Set pg 15 revision for caching
        id: pg_v15_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
      - name: Set pg 16 revision for caching
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
      - name: Set env variables
        run: |
          echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
      - name: Cache postgres v14 build
        id: cache_pg_14
        uses: actions/cache@v4
        with:
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
      - name: Cache postgres v15 build
        id: cache_pg_15
        uses: actions/cache@v4
        with:
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
      - name: Cache postgres v16 build
        id: cache_pg_16
        uses: actions/cache@v4
        with:
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
        run: mold -run make postgres-v14 -j$(nproc)
      - name: Build postgres v15
        if: steps.cache_pg_15.outputs.cache-hit != 'true'
        run: mold -run make postgres-v15 -j$(nproc)
      - name: Build postgres v16
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: mold -run make postgres-v16 -j$(nproc)
      - name: Build neon extensions
        run: mold -run make neon-pg-ext -j$(nproc)
      - name: Build walproposer-lib
        run: mold -run make walproposer-lib -j$(nproc)
      - name: Run cargo build
        run: |
          mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
      - name: Run cargo test
        env:
          NEXTEST_RETRIES: 3
        run: |
          cargo nextest run $CARGO_FEATURES
          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
          cargo nextest run --package remote_storage --test test_real_s3
          # Run separate tests for real Azure Blob Storage
          # XXX: replace region with `eu-central-1`-like region
          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
          export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
          export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
          cargo nextest run --package remote_storage --test test_real_azure
  check-codestyle-rust-arm:
    needs: [ check-permissions, build-build-tools-image ]
    timeout-minutes: 90
    runs-on: [ self-hosted, dev, arm64 ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init
    steps:
      - name: Fix git ownership
        run: |
          # Workaround for `fatal: detected dubious ownership in repository at ...`
          #
          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
          #   Ref https://github.com/actions/checkout/issues/785
          #
          git config --global --add safe.directory ${{ github.workspace }}
          git config --global --add safe.directory ${GITHUB_WORKSPACE}
          for r in 14 15 16; do
            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
          done
      - name: Checkout
        uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 1
      # Some of our rust modules use FFI and need those to be checked
      - name: Get postgres headers
        run: make postgres-headers -j$(nproc)
      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
      # This will catch compiler & clippy warnings in all feature combinations.
      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
      # NB: keep clippy args in sync with ./run_clippy.sh
      - run: |
          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
            echo "No clippy args found in .neon_clippy_args"
            exit 1
          fi
          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
      - name: Run cargo clippy (debug)
        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
      - name: Run cargo clippy (release)
        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
      - name: Check documentation generation
        run: cargo doc --workspace --no-deps --document-private-items
        env:
            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
      - name: Check formatting
        if: ${{ !cancelled() }}
        run: cargo fmt --all -- --check
      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
      - name: Check rust dependencies
        if: ${{ !cancelled() }}
        run: |
          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack
      # https://github.com/EmbarkStudios/cargo-deny
      - name: Check rust licenses/bans/advisories/sources
        if: ${{ !cancelled() }}
        run: cargo deny check
  gather-rust-build-stats:
    needs: [ check-permissions, build-build-tools-image ]
    if: |
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    runs-on: [ self-hosted, large ]
+    runs-on: [ self-hosted, gen3, large ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
@@ -148,6 +348,8 @@ jobs:
    env:
      BUILD_TYPE: release
      # remove the cachepot wrapper and build without crate caches
      RUSTC_WRAPPER: ""
      # build with incremental compilation produce partial results
      # so do not attempt to cache this build, also disable the incremental compilation
      CARGO_INCREMENTAL: 0
@@ -157,6 +359,7 @@ jobs:
        uses: actions/checkout@v4
        with:
          submodules: true
          fetch-depth: 1
      # Some of our rust modules use FFI and need those to be checked
      - name: Get postgres headers
@@ -166,7 +369,7 @@ jobs:
        run: make walproposer-lib -j$(nproc)
      - name: Produce the build stats
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc)
+        run: cargo build --all --release --timings
      - name: Upload the build stats
        id: upload-stats
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -1,155 +0,0 @@
 name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
 on:
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
    #          │ ┌───────────── hour (0 - 23)
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:  '0 18 * * *' # Runs at 6 PM UTC every day
  workflow_dispatch: # Allows manual triggering of the workflow
    inputs:
      commit_hash:
        type: string
        description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
        required: false
        default: ''
 defaults:
  run:
    shell: bash -euo pipefail {0}
 concurrency:
  group: ${{ github.workflow }}
  cancel-in-progress: false
 jobs:
  trigger_bench_on_ec2_machine_in_eu_central_1:
    runs-on: [ self-hosted, small ]
    container:
      image: neondatabase/build-tools:pinned
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init
    timeout-minutes: 360  # Set the timeout to 6 hours
    env:
      API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
      RUN_ID: ${{ github.run_id }}
      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
      AWS_DEFAULT_REGION : "eu-central-1"
      AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
    steps:
    # we don't need the neon source code because we run everything remotely
    # however we still need the local github actions to run the allure step below
    - uses: actions/checkout@v4
    - name: Show my own (github runner) external IP address - usefull for IP allowlisting
      run: curl https://ifconfig.me
    - name: Start EC2 instance and wait for the instance to boot up
      run: |
        aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
        aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
        sleep 60 # sleep some time to allow cloudinit and our API server to start up
    - name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
      run: |
        public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
        echo "Public IP of the EC2 instance: $public_ip"
        echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
    - name: Determine commit hash
      env:
        INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
      run: |
        if [ -z "$INPUT_COMMIT_HASH" ]; then
          echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
        else
          echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
        fi
    - name: Start Bench with run_id   
      run: |
        curl -k -X 'POST' \
        "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
        -H 'accept: application/json' \
        -H 'Content-Type: application/json' \
        -H "Authorization: Bearer $API_KEY" \
        -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
    - name: Poll Test Status
      id: poll_step
      run: |
        status=""
        while [[ "$status" != "failure" && "$status" != "success" ]]; do
          response=$(curl -k -X 'GET' \
          "${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
          -H 'accept: application/json' \
          -H "Authorization: Bearer $API_KEY")
          echo "Response: $response"
          set +x
          status=$(echo $response | jq -r '.status')
          echo "Test status: $status"
          if [[ "$status" == "failure" ]]; then
            echo "Test failed"
            exit 1 # Fail the job step if status is failure
          elif [[ "$status" == "success" || "$status" == "null" ]]; then
            break
          elif [[ "$status" == "too_many_runs" ]]; then
            echo "Too many runs already running"
            echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
            exit 1
          fi
          sleep 60 # Poll every 60 seconds
        done
    - name: Retrieve Test Logs
      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
        curl -k -X 'GET' \
        "${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
        -H 'accept: application/gzip' \
        -H "Authorization: Bearer $API_KEY" \
        --output "test_log_${GITHUB_RUN_ID}.gz"
    - name: Unzip Test Log and Print it into this job's log
      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
        gzip -d "test_log_${GITHUB_RUN_ID}.gz"
        cat "test_log_${GITHUB_RUN_ID}"
    - name: Create Allure report
      env:
        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report-generate
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
    - name: Cleanup Test Resources
      if: always() 
      run: |
        curl -k -X 'POST' \
        "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
        -H 'accept: application/json' \
        -H "Authorization: Bearer $API_KEY" \
        -d ''
    - name: Stop EC2 instance and wait for the instance to be stopped
      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
      run: |
        aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
        aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -1,211 +0,0 @@
 name: Test Postgres client libraries
 on:
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
    #          │ ┌───────────── hour (0 - 23)
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:  '23 02 * * *' # run once a day, timezone is utc
  pull_request:
    paths:
      - '.github/workflows/pg-clients.yml'
      - 'test_runner/pg_clients/**'
      - 'test_runner/logical_repl/**'
      - 'poetry.lock'
  workflow_dispatch:
 concurrency:
  group: ${{ github.workflow }}-${{ github.ref_name }}
  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
 defaults:
  run:
    shell: bash -euxo pipefail {0}
 env:
  DEFAULT_PG_VERSION: 16
  PLATFORM: neon-captest-new
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
  AWS_DEFAULT_REGION: eu-central-1
 jobs:
  check-permissions:
    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
    uses: ./.github/workflows/check-permissions.yml
    with:
      github-event-name: ${{ github.event_name }}
  check-build-tools-image:
    needs: [ check-permissions ]
    uses: ./.github/workflows/check-build-tools-image.yml
  build-build-tools-image:
    needs: [ check-build-tools-image ]
    uses: ./.github/workflows/build-build-tools-image.yml
    with:
      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
    secrets: inherit
  test-logical-replication:
    needs: [ build-build-tools-image ]
    runs-on: ubuntu-22.04
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init --user root
    services:
      clickhouse:
        image: clickhouse/clickhouse-server:24.6.3.64
        ports:
          - 9000:9000
          - 8123:8123
      zookeeper:
        image: quay.io/debezium/zookeeper:2.7
        ports:
          - 2181:2181
      kafka:
        image: quay.io/debezium/kafka:2.7
        env:
          ZOOKEEPER_CONNECT: "zookeeper:2181"
          KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092
          KAFKA_BROKER_ID: 1
          KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
          KAFKA_JMX_PORT: 9991
        ports:
          - 9092:9092
      debezium:
        image: quay.io/debezium/connect:2.7
        env:
          BOOTSTRAP_SERVERS: kafka:9092
          GROUP_ID: 1
          CONFIG_STORAGE_TOPIC: debezium-config
          OFFSET_STORAGE_TOPIC: debezium-offset
          STATUS_STORAGE_TOPIC: debezium-status
          DEBEZIUM_CONFIG_CONNECTOR_CLASS: io.debezium.connector.postgresql.PostgresConnector
        ports:
          - 8083:8083
    steps:
      - uses: actions/checkout@v4
      - name: Download Neon artifact
        uses: ./.github/actions/download
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
          path: /tmp/neon/
          prefix: latest
      - name: Create Neon Project
        id: create-neon-project
        uses: ./.github/actions/neon-project-create
        with:
          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
          postgres_version: ${{ env.DEFAULT_PG_VERSION }}
      - name: Run tests
        uses: ./.github/actions/run-python-test-set
        with:
          build_type: remote
          test_selection: logical_repl
          run_in_parallel: false
          extra_params: -m remote_cluster
          pg_version: ${{ env.DEFAULT_PG_VERSION }}
        env:
          BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
      - name: Delete Neon Project
        if: always()
        uses: ./.github/actions/neon-project-delete
        with:
          project_id: ${{ steps.create-neon-project.outputs.project_id }}
          api_key: ${{ secrets.NEON_STAGING_API_KEY }}
      - name: Create Allure report
        if: ${{ !cancelled() }}
        id: create-allure-report
        uses: ./.github/actions/allure-report-generate
        with:
          store-test-results-into-db: true
        env:
          REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
      - name: Post to a Slack channel
        if: github.event.schedule && failure()
        uses: slackapi/slack-github-action@v1
        with:
          channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
          slack-message: |
            Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
        env:
          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
  test-postgres-client-libs:
    needs: [ build-build-tools-image ]
    runs-on: ubuntu-22.04
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      options: --init --user root
    steps:
    - uses: actions/checkout@v4
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
        path: /tmp/neon/
        prefix: latest
    - name: Create Neon Project
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
    - name: Run tests
      uses: ./.github/actions/run-python-test-set
      with:
        build_type: remote
        test_selection: pg_clients
        run_in_parallel: false
        extra_params: -m remote_cluster
        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
    - name: Delete Neon Project
      if: always()
      uses: ./.github/actions/neon-project-delete
      with:
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    - name: Create Allure report
      if: ${{ !cancelled() }}
      id: create-allure-report
      uses: ./.github/actions/allure-report-generate
      with:
        store-test-results-into-db: true
      env:
        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
    - name: Post to a Slack channel
      if: github.event.schedule && failure()
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
        slack-message: |
          Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -0,0 +1,98 @@
 name: Test Postgres client libraries
 on:
  schedule:
    # * is a special character in YAML so you have to quote this string
    #          ┌───────────── minute (0 - 59)
    #          │ ┌───────────── hour (0 - 23)
    #          │ │ ┌───────────── day of the month (1 - 31)
    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
    - cron:  '23 02 * * *' # run once a day, timezone is utc
  workflow_dispatch:
 concurrency:
  # Allow only one workflow per any non-`main` branch.
  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true
 jobs:
  test-postgres-client-libs:
    # TODO: switch to gen2 runner, requires docker
    runs-on: [ ubuntu-latest ]
    env:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
    steps:
    - name: Checkout
      uses: actions/checkout@v4
    - uses: actions/setup-python@v4
      with:
        python-version: 3.9
    - name: Install Poetry
      uses: snok/install-poetry@v1
    - name: Cache poetry deps
      uses: actions/cache@v4
      with:
        path: ~/.cache/pypoetry/virtualenvs
        key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
    - name: Install Python deps
      shell: bash -euxo pipefail {0}
      run: ./scripts/pysync
    - name: Create Neon Project
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
    - name: Run pytest
      env:
        REMOTE_ENV: 1
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
      shell: bash -euxo pipefail {0}
      run: |
        # Test framework expects we have psql binary;
        # but since we don't really need it in this test, let's mock it
        mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
        ./scripts/pytest \
          --junitxml=$TEST_OUTPUT/junit.xml \
          --tb=short \
          --verbose \
          -m "remote_cluster" \
          -rA "test_runner/pg_clients"
    - name: Delete Neon Project
      if: ${{ always() }}
      uses: ./.github/actions/neon-project-delete
      with:
        project_id: ${{ steps.create-neon-project.outputs.project_id }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
    # We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
    # It will be fixed after switching to gen2 runner
    - name: Upload python test logs
      if: always()
      uses: actions/upload-artifact@v4
      with:
        retention-days: 7
        name: python-test-pg_clients-${{ runner.os }}-stage-logs
        path: ${{ env.TEST_OUTPUT }}
    - name: Post to a Slack channel
      if: ${{ github.event.schedule && failure() }}
      uses: slackapi/slack-github-action@v1
      with:
        channel-id: "C033QLM5P7D" # dev-staging-stream
        slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -7,20 +7,12 @@ on:
        description: 'Source tag'
        required: true
        type: string
      force:
        description: 'Force the image to be pinned'
        default: false
        type: boolean
  workflow_call:
    inputs:
      from-tag:
        description: 'Source tag'
        required: true
        type: string
      force:
        description: 'Force the image to be pinned'
        default: false
        type: boolean
 defaults:
  run:
@@ -28,20 +20,16 @@ defaults:
 concurrency:
  group: pin-build-tools-image-${{ inputs.from-tag }}
  cancel-in-progress: false
 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
 env:
  FROM_TAG: ${{ inputs.from-tag }}
  TO_TAG: pinned
 jobs:
-  check-manifests:
+  tag-image:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
-    outputs:
+
-      skip: ${{ steps.check-manifests.outputs.skip }}
+    env:
      FROM_TAG: ${{ inputs.from-tag }}
      TO_TAG: pinned
    steps:
      - name: Check if we really need to pin the image
@@ -58,44 +46,27 @@ jobs:
          echo "skip=${skip}" | tee -a $GITHUB_OUTPUT
  tag-image:
    needs: check-manifests
    # use format(..) to catch both inputs.force = true AND inputs.force = 'true'
    if: needs.check-manifests.outputs.skip == 'false' || format('{0}', inputs.force) == 'true'
    runs-on: ubuntu-22.04
    permissions:
      id-token: write # for `azure/login`
    steps:
      - uses: docker/login-action@v3
-
+        if: steps.check-manifests.outputs.skip == 'false'
        with:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub
        if: steps.check-manifests.outputs.skip == 'false'
        run: |
          docker buildx imagetools create -t neondatabase/build-tools:${TO_TAG} \
                                             neondatabase/build-tools:${FROM_TAG}
      - uses: docker/login-action@v3
        if: steps.check-manifests.outputs.skip == 'false'
        with:
          registry: 369495373322.dkr.ecr.eu-central-1.amazonaws.com
          username: ${{ secrets.AWS_ACCESS_KEY_DEV }}
          password: ${{ secrets.AWS_SECRET_KEY_DEV }}
-      - name: Azure login
+      - name: Tag build-tools with `${{ env.TO_TAG }}` in ECR
-        uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a  # @v2.1.1
+        if: steps.check-manifests.outputs.skip == 'false'
        with:
          client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }}
          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
          subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }}
      - name: Login to ACR
        run: |
          az acr login --name=neoneastus2
      - name: Tag build-tools with `${{ env.TO_TAG }}` in Docker Hub, ECR, and ACR
        run: |
          docker buildx imagetools create -t 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:${TO_TAG} \
                                          -t neoneastus2.azurecr.io/neondatabase/build-tools:${TO_TAG} \
                                          -t neondatabase/build-tools:${TO_TAG} \
                                             neondatabase/build-tools:${FROM_TAG}
--- a/.github/workflows/release-notify.yml
+++ b/.github/workflows/release-notify.yml
@@ -19,7 +19,7 @@ on:
 jobs:
  notify:
-    runs-on: ubuntu-22.04
+    runs-on: [ ubuntu-latest ]
    steps:
      - uses: neondatabase/dev-actions/release-pr-notify@main
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,7 +26,7 @@ defaults:
 jobs:
  create-storage-release-branch:
    if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    permissions:
      contents: write # for `git push`
@@ -52,22 +52,20 @@ jobs:
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      run: |
        TITLE="Storage & Compute release ${RELEASE_DATE}"
        cat << EOF > body.md
-          ## ${TITLE}
+          ## Release ${RELEASE_DATE}
          **Please merge this Pull Request using 'Create a merge commit' button**
        EOF
-        gh pr create --title "${TITLE}" \
+        gh pr create --title "Release ${RELEASE_DATE}" \
                     --body-file "body.md" \
                     --head "${RELEASE_BRANCH}" \
                     --base "release"
  create-proxy-release-branch:
    if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    permissions:
      contents: write # for `git push`
@@ -93,15 +91,13 @@ jobs:
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      run: |
        TITLE="Proxy release ${RELEASE_DATE}"
        cat << EOF > body.md
-          ## ${TITLE}
+          ## Proxy release ${RELEASE_DATE}
          **Please merge this Pull Request using 'Create a merge commit' button**
        EOF
-        gh pr create --title "${TITLE}" \
+        gh pr create --title "Proxy release ${RELEASE_DATE}" \
                     --body-file "body.md" \
                     --head "${RELEASE_BRANCH}" \
                     --base "release-proxy"
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -13,11 +13,13 @@ defaults:
 env:
  # A concurrency group that we use for e2e-tests runs, matches `concurrency.group` above with `github.repository` as a prefix
  E2E_CONCURRENCY_GROUP: ${{ github.repository }}-e2e-tests-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
 jobs:
  cancel-previous-e2e-tests:
    if: github.event_name == 'pull_request'
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - name: Cancel previous e2e-tests runs for this PR
@@ -29,7 +31,7 @@ jobs:
              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"
  tag:
-    runs-on: ubuntu-22.04
+    runs-on: [ ubuntu-latest ]
    outputs:
      build-tag: ${{ steps.build-tag.outputs.tag }}
@@ -60,88 +62,58 @@ jobs:
  trigger-e2e-tests:
    needs: [ tag ]
-    runs-on: ubuntu-22.04
+    runs-on: [ self-hosted, gen3, small ]
    env:
      EVENT_ACTION: ${{ github.event.action }}
      GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      TAG: ${{ needs.tag.outputs.build-tag }}
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
    steps:
-      - name: Wait for `promote-images` job to finish
+      - name: check if ecr image are present
        # It's important to have a timeout here, the script in the step can run infinitely
        timeout-minutes: 60
        run: |
-          if [ "${GITHUB_EVENT_NAME}" != "pull_request" ] || [ "${EVENT_ACTION}" != "ready_for_review" ]; then
+          for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
-            exit 0
+            OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
-          fi
+            if [ "$OUTPUT" == "" ]; then
-
+              echo "$REPO with image tag $TAG not found" >> $GITHUB_OUTPUT
-          # For PRs we use the run id as the tag
+              exit 1
-          BUILD_AND_TEST_RUN_ID=${TAG}
+            fi
          while true; do
            conclusion=$(gh run --repo ${GITHUB_REPOSITORY} view ${BUILD_AND_TEST_RUN_ID} --json jobs --jq '.jobs[] | select(.name == "promote-images") | .conclusion')
            case "$conclusion" in
              success)
                break
                ;;
              failure | cancelled | skipped)
                echo "The 'promote-images' job didn't succeed: '${conclusion}'. Exiting..."
                exit 1
                ;;
              *)
                echo "The 'promote-images' hasn't succeed yet. Waiting..."
                sleep 60
                ;;
            esac
          done
      - name: Set e2e-platforms
        id: e2e-platforms
        env:
          PR_NUMBER: ${{ github.event.pull_request.number }}
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          # Default set of platforms to run e2e tests on
          platforms='["docker", "k8s"]'
          # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
          # If the workflow run is not a pull request, add k8s-neonvm to the list.
          if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
            for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
              case "$f" in
                vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
                  ;;
                *)
                  # no-op
                  ;;
              esac
            done
          else
            platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
          fi
          echo "e2e-platforms=${platforms}" | tee -a $GITHUB_OUTPUT
      - name: Set PR's status to pending and request a remote CI test
        env:
          E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}
          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
-          REMOTE_REPO="${GITHUB_REPOSITORY_OWNER}/cloud"
+          # For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
          # but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
          # to place a job run status update later.
          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
          # For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
-          gh api "/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}" \
+          REMOTE_REPO="${{ github.repository_owner }}/cloud"
            --method POST \
            --raw-field "state=pending" \
            --raw-field "description=[$REMOTE_REPO] Remote CI job is about to start" \
            --raw-field "context=neon-cloud-e2e"
-          gh workflow --repo ${REMOTE_REPO} \
+          curl -f -X POST \
-            run testing.yml \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
-              --ref "main" \
+          -H "Accept: application/vnd.github.v3+json" \
-              --raw-field "ci_job_name=neon-cloud-e2e" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
-              --raw-field "commit_hash=$COMMIT_SHA" \
+          --data \
-              --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
+            "{
-              --raw-field "storage_image_tag=${TAG}" \
+              \"state\": \"pending\",
-              --raw-field "compute_image_tag=${TAG}" \
+              \"context\": \"neon-cloud-e2e\",
-              --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
+              \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
-              --raw-field "e2e-platforms=${E2E_PLATFORMS}"
+            }"
          curl -f -X POST \
          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
          -H "Accept: application/vnd.github.v3+json" \
          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
          --data \
            "{
              \"ref\": \"main\",
              \"inputs\": {
                \"ci_job_name\": \"neon-cloud-e2e\",
                \"commit_hash\": \"$COMMIT_SHA\",
                \"remote_repo\": \"${{ github.repository }}\",
                \"storage_image_tag\": \"${TAG}\",
                \"compute_image_tag\": \"${TAG}\",
                \"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
              }
            }"
--- a/.neon_clippy_args
+++ b/.neon_clippy_args
@@ -1,5 +1,4 @@
 # * `-A unknown_lints` – do not warn about unknown lint suppressions
 #                        that people with newer toolchains might use
 # * `-D warnings`      - fail on any warnings (`cargo` returns non-zero exit status)
-# * `-D clippy::todo`  - don't let `todo!()` slip into `main`
+export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"
 export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo"
--- a/10
+++ b/10
@@ -1,13 +1,13 @@
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
-/storage_controller @neondatabase/storage
+/control_plane/attachment_service @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
-/libs/postgres_ffi/ @neondatabase/compute @neondatabase/storage
+/libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
 /libs/remote_storage/ @neondatabase/storage
-/libs/safekeeper_api/ @neondatabase/storage
+/libs/safekeeper_api/ @neondatabase/safekeepers
 /libs/vm_monitor/ @neondatabase/autoscaling
 /pageserver/ @neondatabase/storage
 /pgxn/ @neondatabase/compute
-/pgxn/neon/ @neondatabase/compute @neondatabase/storage
+/pgxn/neon/ @neondatabase/compute @neondatabase/safekeepers
 /proxy/ @neondatabase/proxy
-/safekeeper/ @neondatabase/storage
+/safekeeper/ @neondatabase/safekeepers
 /vendor/ @neondatabase/compute
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ resolver = "2"
 members = [
    "compute_tools",
    "control_plane",
-    "control_plane/storcon_cli",
+    "control_plane/attachment_service",
    "pageserver",
    "pageserver/compaction",
    "pageserver/ctl",
@@ -12,10 +12,9 @@ members = [
    "proxy",
    "safekeeper",
    "storage_broker",
-    "storage_controller",
+    "s3_scrubber",
    "storage_controller/client",
    "storage_scrubber",
    "workspace_hack",
    "trace",
    "libs/compute_api",
    "libs/pageserver_api",
    "libs/postgres_ffi",
@@ -41,26 +40,24 @@ license = "Apache-2.0"
 ## All dependency versions, used in the project
 [workspace.dependencies]
 ahash = "0.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-atomic-take = "1.1.0"
+azure_core = "0.18"
-azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
+azure_identity = "0.18"
-azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
+azure_storage = "0.18"
-azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
+azure_storage_blobs = "0.18"
 azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.3", default-features = false, features=["rustls"] }
+aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "1.26"
+aws-sdk-s3 = "1.14"
 aws-sdk-iam = "1.15.0"
-aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
+aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
-aws-smithy-types = "1.1.9"
+aws-smithy-types = "1.1.4"
-aws-credential-types = "1.2.0"
+aws-credential-types = "1.1.4"
-aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
+aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
-aws-types = "1.2.0"
+aws-types = "1.1.7"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -75,7 +72,6 @@ clap = { version = "4.0", features = ["derive"] }
 comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-deque = "0.8.5"
 crossbeam-utils = "0.8.5"
 dashmap = { version = "5.5.0", features = ["raw-api"] }
 either = "1.8"
@@ -83,13 +79,13 @@ enum-map = "2.4.2"
 enumset = "1.0.12"
 fail = "0.5.0"
 fallible-iterator = "0.2"
-framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
+fs2 = "0.4.3"
 futures = "0.3"
 futures-core = "0.3"
 futures-util = "0.3"
 git-version = "0.3"
-hashbrown = "0.14"
+hashbrown = "0.13"
-hashlink = "0.9.1"
+hashlink = "0.8.4"
 hdrhistogram = "7.5.2"
 hex = "0.4"
 hex-literal = "0.4"
@@ -100,8 +96,7 @@ http-types = { version = "2", default-features = false }
 humantime = "2.1"
 humantime-serde = "1.1.1"
 hyper = "0.14"
-tokio-tungstenite = "0.20.0"
+hyper-tungstenite = "0.11"
 indexmap = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -110,32 +105,32 @@ lasso = "0.7"
 leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
-measured = { version = "0.0.22", features=["lasso"] }
+measured = { version = "0.0.13", features=["default", "lasso"] }
 measured-process = { version = "0.0.22" }
 memoffset = "0.8"
 native-tls = "0.2"
 nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
 opentelemetry = "0.20.0"
-opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.12.0"
 parking_lot = "0.12"
-parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
+parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
-parquet_derive = "51.0.0"
+parquet_derive = "49.0.0"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
-procfs = "0.16"
+procfs = "0.14"
-prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
+prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] }
+reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
-reqwest-middleware = "0.3.0"
+reqwest-middleware = "0.2.0"
-reqwest-retry = "0.5"
+reqwest-retry = "0.2.2"
 routerify = "3"
 rpds = "0.13"
 rustc-hash = "1.1.0"
@@ -145,7 +140,7 @@ rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
-sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
+sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_path_to_error = "0.1"
@@ -159,12 +154,11 @@ socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
 "subtle"  = "2.5.0"
-# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
+svg_fmt = "0.4.1"
 svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
-test-context = "0.3"
+test-context = "0.1"
 thiserror = "1.0"
 tikv-jemallocator = "0.5"
 tikv-jemalloc-ctl = "0.5"
@@ -179,21 +173,17 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tower-service = "0.3.2"
 tracing = "0.1"
 tracing-error = "0.2.0"
-tracing-opentelemetry = "0.21.0"
+tracing-opentelemetry = "0.20.0"
-tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
+tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 try-lock = "0.2.5"
 twox-hash = { version = "1.6.3", default-features = false }
 typed-json = "0.1"
 url = "2.2"
 urlencoding = "2.1"
 uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
 walkdir = "2.3.2"
-rustls-native-certs = "0.7"
+webpki-roots = "0.25"
 x509-parser = "0.15"
 whoami = "1.5.1"
 ## TODO replace this with tracing
 env_logger = "0.10"
@@ -201,10 +191,14 @@ log = "0.4"
 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 ## Other git libraries
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
@@ -220,7 +214,6 @@ remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
 desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 storage_controller_client = { path = "./storage_controller/client" }
 tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
 tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
@@ -239,12 +232,13 @@ tonic-build = "0.9"
 [patch.crates-io]
-# Needed to get `tokio-postgres-rustls` to depend on our fork.
+# This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 # bug fixes for UUID
-parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
+parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
-parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
+parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
 ################# Binary contents sections
--- a/45
+++ b/45
@@ -17,7 +17,7 @@ COPY --chown=nonroot pgxn pgxn
 COPY --chown=nonroot Makefile Makefile
 COPY --chown=nonroot scripts/ninstall.sh scripts/ninstall.sh
-ENV BUILD_TYPE=release
+ENV BUILD_TYPE release
 RUN set -e \
    && mold -run make -j $(nproc) -s neon-pg-ext \
    && rm -rf pg_install/build \
@@ -29,15 +29,25 @@ WORKDIR /home/nonroot
 ARG GIT_VERSION=local
 ARG BUILD_TAG
 # Enable https://github.com/paritytech/cachepot to cache Rust crates' compilation results in Docker builds.
 # Set up cachepot to use an AWS S3 bucket for cache results, to reuse it between `docker build` invocations.
 # cachepot falls back to local filesystem if S3 is misconfigured, not failing the build
 ARG RUSTC_WRAPPER=cachepot
 ENV AWS_REGION=eu-central-1
 ENV CACHEPOT_S3_KEY_PREFIX=cachepot
 ARG CACHEPOT_BUCKET=neon-github-dev
 #ARG AWS_ACCESS_KEY_ID
 #ARG AWS_SECRET_ACCESS_KEY
 COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
 COPY --chown=nonroot . .
-ARG ADDITIONAL_RUSTFLAGS
+# Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment ${ADDITIONAL_RUSTFLAGS}" cargo build \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -46,8 +56,8 @@ RUN set -e \
      --bin storage_controller  \
      --bin proxy  \
      --bin neon_local \
-      --bin storage_scrubber \
+      --locked --release \
-      --locked --release
+    && cachepot -s
 # Build final image
 #
@@ -59,6 +69,8 @@ RUN set -e \
    && apt install -y \
        libreadline-dev \
        libseccomp-dev \
        libicu67 \
        openssl \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
    && useradd -d /data neon \
@@ -72,7 +84,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller  /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin
 COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
 COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
@@ -81,24 +92,20 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/
 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
 # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
-RUN mkdir -p /data/.neon/ && \
+RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
-  echo "id=1234" > "/data/.neon/identity.toml" && \
+    && /usr/local/bin/pageserver -D /data/.neon/ --init \
-  echo "broker_endpoint='http://storage_broker:50051'\n" \
+       -c "id=1234" \
-       "pg_distrib_dir='/usr/local/'\n" \
+       -c "broker_endpoint='http://storage_broker:50051'" \
-       "listen_pg_addr='0.0.0.0:6400'\n" \
+       -c "pg_distrib_dir='/usr/local/'" \
-       "listen_http_addr='0.0.0.0:9898'\n" \
+       -c "listen_pg_addr='0.0.0.0:6400'" \
-  > /data/.neon/pageserver.toml && \
+       -c "listen_http_addr='0.0.0.0:9898'"
  chown -R neon:neon /data/.neon
 # When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
 # that want a particular postgres version will select it explicitly: this is just a default.
-ENV LD_LIBRARY_PATH=/usr/local/v16/lib
+ENV LD_LIBRARY_PATH /usr/local/v16/lib
 VOLUME ["/data"]
 USER neon
 EXPOSE 6400
 EXPOSE 9898
 CMD ["/usr/local/bin/pageserver", "-D", "/data/.neon"]
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -1,13 +1,5 @@
 FROM debian:bullseye-slim
 # Use ARG as a build-time environment variable here to allow.
 # It's not supposed to be set outside.
 # Alternatively it can be obtained using the following command
 # ```
 # . /etc/os-release && echo "${VERSION_CODENAME}"
 # ```
 ARG DEBIAN_VERSION_CODENAME=bullseye
 # Add nonroot user
 RUN useradd -ms /bin/bash nonroot -b /home
 SHELL ["/bin/bash", "-c"]
@@ -34,6 +26,7 @@ RUN set -e \
        liblzma-dev \
        libncurses5-dev \
        libncursesw5-dev \
        libpq-dev \
        libreadline-dev \
        libseccomp-dev \
        libsqlite3-dev \
@@ -58,40 +51,29 @@ RUN set -e \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 # protobuf-compiler (protoc)
-ENV PROTOC_VERSION=25.1
+ENV PROTOC_VERSION 25.1
 RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v${PROTOC_VERSION}/protoc-${PROTOC_VERSION}-linux-$(uname -m | sed 's/aarch64/aarch_64/g').zip" -o "protoc.zip" \
    && unzip -q protoc.zip -d protoc \
    && mv protoc/bin/protoc /usr/local/bin/protoc \
    && mv protoc/include/google /usr/local/include/google \
    && rm -rf protoc.zip protoc
 # s5cmd
 ENV S5CMD_VERSION=2.2.2
 RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
    && chmod +x s5cmd \
    && mv s5cmd /usr/local/bin/s5cmd
 # LLVM
-ENV LLVM_VERSION=18
+ENV LLVM_VERSION=17
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
-    && echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
+    && echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
    && apt update \
    && apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
    && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-# Install docker
+# PostgreSQL 14
-RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
+RUN curl -fsSL 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' | apt-key add - \
-    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \
+    && echo 'deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
    && apt update \
-    && apt install -y docker-ce docker-ce-cli \
+    && apt install -y postgresql-client-14 \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 # Configure sudo & docker
 RUN usermod -aG sudo nonroot && \
    echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \
    usermod -aG docker nonroot
 # AWS CLI
 RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \
    && unzip -q awscliv2.zip \
@@ -99,7 +81,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
    && rm awscliv2.zip
 # Mold: A Modern Linker
-ENV MOLD_VERSION=v2.33.0
+ENV MOLD_VERSION v2.4.0
 RUN set -e \
    && git clone https://github.com/rui314/mold.git \
    && mkdir mold/build \
@@ -124,51 +106,12 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
    && make install \
    && rm -rf ../lcov.tar.gz
 # Compile and install the static OpenSSL library
 ENV OPENSSL_VERSION=1.1.1w
 ENV OPENSSL_PREFIX=/usr/local/openssl
 RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
    echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
    cd /tmp && \
    tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
    rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
    cd /tmp/openssl-${OPENSSL_VERSION} && \
    ./config --prefix=${OPENSSL_PREFIX}  -static --static no-shared -fPIC && \
    make -j "$(nproc)" && \
    make install && \
    cd /tmp && \
    rm -rf /tmp/openssl-${OPENSSL_VERSION}
 # Use the same version of libicu as the compute nodes so that
 # clusters created using inidb on pageserver can be used by computes.
 #
 # TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
 # package, which is 67.1. We're duplicating that knowledge here, and also, technically,
 # Debian has a few patches on top of 67.1 that we're not adding here.
 ENV ICU_VERSION=67.1
 ENV ICU_PREFIX=/usr/local/icu
 # Download and build static ICU
 RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
    echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
    mkdir /tmp/icu && \
    pushd /tmp/icu && \
    tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
    pushd icu/source && \
    ./configure --prefix=${ICU_PREFIX}  --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
    make -j "$(nproc)" && \
    make install && \
    popd && \
    rm -rf icu && \
    rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
    popd
 # Switch to nonroot user
 USER nonroot:nonroot
 WORKDIR /home/nonroot
 # Python
-ENV PYTHON_VERSION=3.9.19 \
+ENV PYTHON_VERSION=3.9.18 \
    PYENV_ROOT=/home/nonroot/.pyenv \
    PATH=/home/nonroot/.pyenv/shims:/home/nonroot/.pyenv/bin:/home/nonroot/.poetry/bin:$PATH
 RUN set -e \
@@ -192,14 +135,9 @@ WORKDIR /home/nonroot
 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.80.1
+ENV RUSTC_VERSION=1.77.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 ARG RUSTFILT_VERSION=0.2.1
 ARG CARGO_HAKARI_VERSION=0.9.30
 ARG CARGO_DENY_VERSION=0.16.1
 ARG CARGO_HACK_VERSION=0.6.31
 ARG CARGO_NEXTEST_VERSION=0.9.72
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
 	chmod +x rustup-init && \
 	./rustup-init -y --default-toolchain ${RUSTC_VERSION} && \
@@ -208,13 +146,15 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    . "$HOME/.cargo/env" && \
    cargo --version && rustup --version && \
    rustup component add llvm-tools-preview rustfmt clippy && \
-    cargo install rustfilt            --version ${RUSTFILT_VERSION} && \
+    cargo install --git https://github.com/paritytech/cachepot && \
-    cargo install cargo-hakari        --version ${CARGO_HAKARI_VERSION} && \
+    cargo install rustfilt && \
-    cargo install cargo-deny --locked --version ${CARGO_DENY_VERSION} && \
+    cargo install cargo-hakari && \
-    cargo install cargo-hack          --version ${CARGO_HACK_VERSION} && \
+    cargo install cargo-deny --locked && \
-    cargo install cargo-nextest       --version ${CARGO_NEXTEST_VERSION} && \
+    cargo install cargo-hack && \
    cargo install cargo-nextest && \
    rm -rf /home/nonroot/.cargo/registry && \
    rm -rf /home/nonroot/.cargo/git
 ENV RUSTC_WRAPPER=cachepot
 # Show versions
 RUN whoami \
@@ -224,6 +164,3 @@ RUN whoami \
    && rustup --version --verbose \
    && rustc --version --verbose \
    && clang --version
 # Set following flag to check in Makefile if its running in Docker
 RUN touch /home/nonroot/.docker_build
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -89,16 +89,16 @@ RUN apt update && \
 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
-    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
+    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
-    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
+    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
@@ -124,7 +124,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg
 RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
-    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
+    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake -DCMAKE_BUILD_TYPE=Release .. && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -149,7 +149,7 @@ RUN apt update && \
 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
-    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
+    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
    # generate and copy upgrade scripts
    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
    cp upgrade/* /usr/local/pgsql/share/extension/ && \
@@ -194,7 +194,7 @@ RUN case "$(uname -m)" in \
 RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
-    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
+    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -204,7 +204,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
 RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
-    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -222,7 +222,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
-    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
+    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
@@ -241,17 +241,11 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
 FROM build-deps AS vector-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY patches/pgvector.patch /pgvector.patch
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
-
+    echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
-# By default, pgvector Makefile uses `-march=native`. We don't want that,
+    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
-# because we build the images on different machines than where we run them.
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-# Pass OPTFLAGS="" to remove it.
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
 RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
    echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
    patch -p1 < /pgvector.patch && \
    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
 #########################################################################################
@@ -266,7 +260,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
 RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
-    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
+    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
@@ -281,7 +275,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
-    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
+    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
@@ -297,7 +291,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
-    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
+    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
@@ -311,12 +305,9 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
 FROM build-deps AS rum-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY patches/rum.patch /rum.patch
 RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
-    mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
+    mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
    patch -p1 < /rum.patch && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
@@ -332,7 +323,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
-    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
+    mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
@@ -348,7 +339,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
-    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
+    mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
@@ -364,7 +355,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
-    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
+    mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
@@ -380,7 +371,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
-    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
+    mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
@@ -396,7 +387,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
-    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
+    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
@@ -411,7 +402,7 @@ FROM build-deps AS timescaledb-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
@@ -427,7 +418,7 @@ RUN case "${PG_VERSION}" in \
    apt-get install -y cmake && \
    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
    echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
-    mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \
+    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
    cd build && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -444,7 +435,7 @@ FROM build-deps AS pg-hint-plan-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin:$PATH"
+ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14") \
@@ -465,11 +456,36 @@ RUN case "${PG_VERSION}" in \
    esac && \
    wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
    echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
-    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
+    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
 #########################################################################################
 #
 # Layer "kq-imcx-pg-build"
 # compile kq_imcx extension
 #
 #########################################################################################
 FROM build-deps AS kq-imcx-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN apt-get update && \
    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    mkdir build && cd build && \
    cmake -DCMAKE_BUILD_TYPE=Release .. && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
    mkdir -p /extensions/kq_imcx && cp /usr/local/pgsql/share/extension/kq_imcx.control /extensions/kq_imcx && \
    sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
    comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/kq_imcx.tar.zst -T -
 #########################################################################################
 #
@@ -480,10 +496,10 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-cron-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
-    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
+    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
@@ -506,10 +522,10 @@ RUN apt-get update && \
        libboost-system1.74-dev \
        libeigen3-dev
-ENV PATH="/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
 RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
-    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
+    mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
        -D RDK_BUILD_INCHI_SUPPORT=ON \
@@ -546,10 +562,10 @@ RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.
 FROM build-deps AS pg-uuidv7-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
-    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
+    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
@@ -563,10 +579,10 @@ RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz
 FROM build-deps AS pg-roaringbitmap-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
-    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
+    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
@@ -580,10 +596,10 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
 FROM build-deps AS pg-semver-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
-    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
+    mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
@@ -598,7 +614,7 @@ FROM build-deps AS pg-embedding-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ARG PG_VERSION
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN case "${PG_VERSION}" in \
      "v14" | "v15") \
        export PG_EMBEDDING_VERSION=0.3.5 \
@@ -609,7 +625,7 @@ RUN case "${PG_VERSION}" in \
    esac && \
    wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
    echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
-    mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
+    mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install
@@ -622,10 +638,10 @@ RUN case "${PG_VERSION}" in \
 FROM build-deps AS pg-anon-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
-    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
@@ -657,7 +673,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
    chmod +x rustup-init && \
    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
    rm rustup-init && \
-    cargo install --locked --version 0.11.3 cargo-pgrx && \
+    cargo install --locked --version 0.10.2 cargo-pgrx && \
    /bin/bash -c 'cargo pgrx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
 USER root
@@ -672,15 +688,10 @@ USER root
 FROM rust-extensions-build AS pg-jsonschema-pg-build
 ARG PG_VERSION
-RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.gz -O pg_jsonschema.tar.gz && \
+RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
-    echo "61df3db1ed83cf24f6aa39c826f8818bfa4f0bd33b587fd6b2b1747985642297 pg_jsonschema.tar.gz" | sha256sum --check && \
+    echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
-    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
+    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
-    # see commit 252b3685a27a0f4c31a0f91e983c6314838e89e8
+    sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    # `unsafe-postgres` feature allows to build pgx extensions
    # against postgres forks that decided to change their ABI name (like us).
    # With that we can build extensions without forking them and using stock
    # pgx. As this feature is new few manual version bumps were required.
    sed -i 's/pgrx = "0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
@@ -694,10 +705,10 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.3.1.tar.
 FROM rust-extensions-build AS pg-graphql-pg-build
 ARG PG_VERSION
-RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.5.7.tar.gz -O pg_graphql.tar.gz && \
+RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
-    echo "2b3e567a5b31019cb97ae0e33263c1bcc28580be5a444ac4c8ece5c4be2aea41 pg_graphql.tar.gz" | sha256sum --check && \
+    echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
-    mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
+    mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
-    sed -i 's/pgrx = "=0.11.3"/pgrx = { version = "0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    # it's needed to enable extension because it uses untrusted C language
    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
@@ -716,10 +727,7 @@ ARG PG_VERSION
 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
 RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
    echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
-    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
+    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
    # TODO update pgrx version in the pg_tiktoken repo and remove this line
    sed -i 's/pgrx = { version = "=0.10.2",/pgrx = { version = "0.11.3",/g' Cargo.toml && \
    sed -i 's/pgrx-tests = "=0.10.2"/pgrx-tests = "0.11.3"/g' Cargo.toml && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control
@@ -733,10 +741,14 @@ RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6
 FROM rust-extensions-build AS pg-pgx-ulid-build
 ARG PG_VERSION
-RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -O pgx_ulid.tar.gz && \
+RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
-    echo "9d1659a2da65af0133d5451c454de31b37364e3502087dadf579f790bc8bef17 pgx_ulid.tar.gz" | sha256sum --check && \
+    echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
-    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
+    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
-    sed -i 's/pgrx       = "^0.11.2"/pgrx = { version = "=0.11.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
+    echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
    wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
    patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
    echo "********************************************************************************************************" && \
    sed -i 's/pgrx       = "=0.10.2"/pgrx = { version = "=0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/ulid.control
@@ -750,10 +762,10 @@ RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.5.tar.gz -
 FROM build-deps AS wal2json-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install
@@ -766,10 +778,10 @@ RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.
 FROM build-deps AS pg-ivm-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
-    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
+    mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
@@ -783,10 +795,10 @@ RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_iv
 FROM build-deps AS pg-partman-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-ENV PATH="/usr/local/pgsql/bin/:$PATH"
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
-    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
+    mkdir pg_partman-src && cd pg_partman-src && tar xvzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
@@ -822,6 +834,7 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -909,70 +922,6 @@ RUN rm -r /usr/local/pgsql/include
 # if they were to be used by other libraries.
 RUN rm /usr/local/pgsql/lib/lib*.a
 #########################################################################################
 #
 # Layer neon-pg-ext-test
 #
 #########################################################################################
 FROM neon-pg-ext-build AS neon-pg-ext-test
 ARG PG_VERSION
 RUN mkdir /ext-src
 #COPY --from=postgis-build /postgis.tar.gz /ext-src/
 #COPY --from=postgis-build /sfcgal/* /usr
 COPY --from=plv8-build /plv8.tar.gz /ext-src/
 COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
 COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
 COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
 COPY --from=vector-pg-build /pgvector.patch /ext-src/
 COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
 #COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
 #COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
 #COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
 COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
 COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
 COPY --from=rum-pg-build /rum.tar.gz /ext-src
 COPY patches/rum.patch /ext-src
 #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
 COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
 COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
 COPY --from=hll-pg-build /hll.tar.gz /ext-src
 COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
 #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
 COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
 COPY patches/pg_hintplan.patch /ext-src
 COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
 COPY patches/pg_cron.patch /ext-src
 #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
 #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
 COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
 COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
 COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
 #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
 #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
 COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
 COPY patches/pg_anon.patch /ext-src
 COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
 COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
 RUN cd /ext-src/ && for f in *.tar.gz; \
    do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
    || exit 1; rm -f $f; done
 RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
 RUN cd /ext-src/rum-src && patch -p1 <../rum.patch
 # cmake is required for the h3 test
 RUN apt-get update && apt-get install -y cmake
 RUN patch -p1 < /ext-src/pg_hintplan.patch
 COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
 RUN patch -p1 </ext-src/pg_anon.patch
 RUN patch -p1 </ext-src/pg_cron.patch
 ENV PATH=/usr/local/pgsql/bin:$PATH
 ENV PGHOST=compute
 ENV PGPORT=55433
 ENV PGUSER=cloud_admin
 ENV PGDATABASE=postgres
 #########################################################################################
 #
 # Final layer
@@ -995,9 +944,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
 # Create remote extension download directory
 RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
 # Install:
 # libreadline8 for psql
 # libicu67, locales for collations (including ICU and plpgsql_check)
@@ -1034,6 +980,6 @@ RUN apt update &&  \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
-ENV LANG=en_US.utf8
+ENV LANG en_US.utf8
 USER postgres
 ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/57
+++ b/57
@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 # Where to install Postgres, default is ./pg_install, maybe useful for package managers
 POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/
 OPENSSL_PREFIX_DIR := /usr/local/openssl
 ICU_PREFIX_DIR := /usr/local/icu
 #
 # We differentiate between release / debug build types using the BUILD_TYPE
 # environment variable.
@@ -23,31 +20,19 @@ else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif
 ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
 	# Exclude static build openssl, icu for local build (MacOS, Linux)
 	# Only keep for build type release and debug
 	PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
 	PG_CONFIGURE_OPTS += --with-icu
 	PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
 	PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
 	PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
 endif
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	# Seccomp BPF is only available for Linux
 	PG_CONFIGURE_OPTS += --with-libseccomp
 else ifeq ($(UNAME_S),Darwin)
-	ifndef DISABLE_HOMEBREW
+	# macOS with brew-installed openssl requires explicit paths
-		# macOS with brew-installed openssl requires explicit paths
+	# It can be configured with OPENSSL_PREFIX variable
-		# It can be configured with OPENSSL_PREFIX variable
+	OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
-		OPENSSL_PREFIX := $(shell brew --prefix openssl@3)
+	PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
-		PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+	PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
-		PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
+	# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
-		# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
+	# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
-		# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
+	EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
 		EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
 	endif
 endif
 # Use -C option so that when PostgreSQL "make install" installs the
@@ -69,8 +54,6 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
 # Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
 CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib
 CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
 #
 # Top level Makefile to build Neon and PostgreSQL
 #
@@ -81,38 +64,26 @@ all: neon postgres neon-pg-ext
 #
 # The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: neon
-neon: postgres-headers walproposer-lib cargo-target-dir
+neon: postgres-headers walproposer-lib
 	+@echo "Compiling Neon"
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
 .PHONY: cargo-target-dir
 cargo-target-dir:
 	# https://github.com/rust-lang/cargo/issues/14281
 	mkdir -p target
 	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG
 ### PostgreSQL parts
 # Some rules are duplicated for Postgres v14 and 15. We may want to refactor
 # to avoid the duplication in the future, but it's tolerable for now.
 #
 $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 	mkdir -p $(POSTGRES_INSTALL_DIR)
 	test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
 	+@echo "Configuring Postgres $* build"
 	@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
 		echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
 		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
 		exit 1; }
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
-
+	(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
-	VERSION=$*; \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
 	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
 	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
 	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
 		CFLAGS='$(PG_CFLAGS)' \
-		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
+		$(PG_CONFIGURE_OPTS) \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)
 # nicer alias to run 'configure'
 # Note: I've been unable to use templates for this part of our configuration.
@@ -148,8 +119,6 @@ postgres-%: postgres-configure-% \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
 	+@echo "Compiling amcheck $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
 	+@echo "Compiling test_decoding $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install
 .PHONY: postgres-clean-%
 postgres-clean-%:
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
-[![Neon](https://github.com/neondatabase/neon/assets/11527560/f15a17f0-836e-40c5-b35d-030606a6b660)](https://neon.tech)
+[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)
 # Neon
@@ -126,7 +124,7 @@ make -j`sysctl -n hw.logicalcpu` -s
 To run the `psql` client, install the `postgresql-client` package or modify `PATH` and `LD_LIBRARY_PATH` to include `pg_install/bin` and `pg_install/lib`, respectively.
 To run the integration tests or Python scripts (not required to use the code), install
-Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.8](https://python-poetry.org/)) in the project directory.
+Python (3.9 or higher), and install the python3 packages using `./scripts/pysync` (requires [poetry>=1.3](https://python-poetry.org/)) in the project directory.
 #### Running neon database
@@ -262,7 +260,7 @@ By default, this runs both debug and release modes, and all supported postgres v
 testing locally, it is convenient to run just one set of permutations, like this:
 ```sh
-DEFAULT_PG_VERSION=16 BUILD_TYPE=release ./scripts/pytest
+DEFAULT_PG_VERSION=15 BUILD_TYPE=release ./scripts/pytest
 ```
 ## Flamegraphs
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -4,11 +4,6 @@ version = "0.1.0"
 edition.workspace = true
 license.workspace = true
 [features]
 default = []
 # Enables test specific features.
 testing = []
 [dependencies]
 anyhow.workspace = true
 async-compression.workspace = true
@@ -32,12 +27,10 @@ reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 tokio-stream.workspace = true
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 thiserror.workspace = true
 url.workspace = true
 compute_api.workspace = true
@@ -49,4 +42,3 @@ vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.13"
 bytes = "1.0"
 rust-ini = "0.20.0"
 rlimit = "0.10.1"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -6,7 +6,7 @@
 //! - Every start is a fresh start, so the data directory is removed and
 //!   initialized again on each run.
 //! - If remote_extension_config is provided, it will be used to fetch extensions list
-//!   and download `shared_preload_libraries` from the remote storage.
+//!  and download `shared_preload_libraries` from the remote storage.
 //! - Next it will put configuration files into the `PGDATA` directory.
 //! - Sync safekeepers and get commit LSN.
 //! - Get `basebackup` from pageserver using the returned on the previous step LSN.
@@ -33,6 +33,7 @@
 //!             -b /usr/local/bin/postgres \
 //!             -r http://pg-ext-s3-gateway \
 //! ```
 //!
 use std::collections::HashMap;
 use std::fs::File;
 use std::path::Path;
@@ -46,11 +47,10 @@ use chrono::Utc;
 use clap::Arg;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
+use tracing::{error, info};
 use url::Url;
 use compute_api::responses::ComputeStatus;
 use compute_api::spec::ComputeSpec;
 use compute_tools::compute::{
    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
@@ -62,45 +62,12 @@ use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
 use compute_tools::swap::resize_swap;
 use rlimit::{setrlimit, Resource};
 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";
 fn main() -> Result<()> {
    let (build_tag, clap_args) = init()?;
    // enable core dumping for all child processes
    setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
    let (pg_handle, start_pg_result) = {
        // Enter startup tracing context
        let _startup_context_guard = startup_context_from_env();
        let cli_args = process_cli(&clap_args)?;
        let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
        let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
        start_postgres(&clap_args, wait_spec_result)?
        // Startup is finished, exit the startup tracing span
    };
    // PostgreSQL is now running, if startup was successful. Wait until it exits.
    let wait_pg_result = wait_postgres(pg_handle)?;
    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
    maybe_delay_exit(delay_exit);
    deinit_and_exit(wait_pg_result);
 }
 fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;
    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -115,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
        .to_string();
    info!("build_tag: {build_tag}");
-    Ok((build_tag, cli().get_matches()))
+    let matches = cli().get_matches();
-}
+    let pgbin_default = String::from("postgres");
-
+    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);
 fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
    let pgbin_default = "postgres";
    let pgbin = matches
        .get_one::<String>("pgbin")
        .map(|s| s.as_str())
        .unwrap_or(pgbin_default);
    let ext_remote_storage = matches
        .get_one::<String>("remote-ext-config")
@@ -149,32 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
        .expect("Postgres connection string is required");
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");
    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");
    Ok(ProcessCliResult {
        connstr,
        pgdata,
        pgbin,
        ext_remote_storage,
        http_port,
        spec_json,
        spec_path,
        resize_swap_on_bind,
    })
 }
 struct ProcessCliResult<'clap> {
    connstr: &'clap str,
    pgdata: &'clap str,
    pgbin: &'clap str,
    ext_remote_storage: Option<&'clap str>,
    http_port: u16,
    spec_json: Option<&'clap String>,
    spec_path: Option<&'clap String>,
    resize_swap_on_bind: bool,
 }
 fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
    // tracing context.
@@ -211,7 +147,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    if let Ok(val) = std::env::var("TRACESTATE") {
        startup_tracing_carrier.insert("tracestate".to_string(), val);
    }
-    if !startup_tracing_carrier.is_empty() {
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
        use opentelemetry::propagation::TextMapPropagator;
        use opentelemetry::sdk::propagation::TraceContextPropagator;
        let guard = TraceContextPropagator::new()
@@ -221,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
        Some(guard)
    } else {
        None
-    }
+    };
 }
 fn try_spec_from_cli(
    matches: &clap::ArgMatches,
    ProcessCliResult {
        spec_json,
        spec_path,
        ..
    }: &ProcessCliResult,
 ) -> Result<CliSpecParams> {
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");
@@ -272,34 +199,6 @@ fn try_spec_from_cli(
        }
    };
    Ok(CliSpecParams {
        spec,
        live_config_allowed,
    })
 }
 struct CliSpecParams {
    /// If a spec was provided via CLI or file, the [`ComputeSpec`]
    spec: Option<ComputeSpec>,
    live_config_allowed: bool,
 }
 fn wait_spec(
    build_tag: String,
    ProcessCliResult {
        connstr,
        pgdata,
        pgbin,
        ext_remote_storage,
        resize_swap_on_bind,
        http_port,
        ..
    }: ProcessCliResult,
    CliSpecParams {
        spec,
        live_config_allowed,
    }: CliSpecParams,
 ) -> Result<WaitSpecResult> {
    let mut new_state = ComputeState::new();
    let spec_set;
@@ -327,17 +226,19 @@ fn wait_spec(
    // If this is a pooled VM, prewarm before starting HTTP server and becoming
    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
+    // because QEMU will already have it's memory allocated from the host, and
    // the necessary binaries will already be cached.
    if !spec_set {
        compute.prewarm_postgres()?;
    }
-    // Launch http service first, so that we can serve control-plane requests
+    // Launch http service first, so we were able to serve control-plane
-    // while configuration is still in progress.
+    // requests, while configuration is still in progress.
    let _http_handle =
        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
    let extension_server_port: u16 = http_port;
    if !spec_set {
        // No spec provided, hang waiting for it.
        info!("no compute spec provided, waiting");
@@ -352,45 +253,21 @@ fn wait_spec(
                break;
            }
        }
        // Record for how long we slept waiting for the spec.
        let now = Utc::now();
        state.metrics.wait_for_spec_ms = now
            .signed_duration_since(state.start_time)
            .to_std()
            .unwrap()
            .as_millis() as u64;
        // Reset start time, so that the total startup time that is calculated later will
        // not include the time that we waited for the spec.
        state.start_time = now;
    }
    Ok(WaitSpecResult {
        compute,
        http_port,
        resize_swap_on_bind,
    })
 }
 struct WaitSpecResult {
    compute: Arc<ComputeNode>,
    // passed through from ProcessCliResult
    http_port: u16,
    resize_swap_on_bind: bool,
 }
 fn start_postgres(
    // need to allow unused because `matches` is only used if target_os = "linux"
    #[allow(unused_variables)] matches: &clap::ArgMatches,
    WaitSpecResult {
        compute,
        http_port,
        resize_swap_on_bind,
    }: WaitSpecResult,
 ) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
    // Record for how long we slept waiting for the spec.
    state.metrics.wait_for_spec_ms = Utc::now()
        .signed_duration_since(state.start_time)
        .to_std()
        .unwrap()
        .as_millis() as u64;
    // Reset start time to the actual start of the configuration, so that
    // total startup time was properly measured at the end.
    state.start_time = Utc::now();
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();
@@ -398,72 +275,33 @@ fn start_postgres(
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
    // before we release the mutex, fetch the swap size (if any) for later.
    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
    drop(state);
    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute);
    let _configurator_handle = launch_configurator(&compute);
    let mut prestartup_failed = false;
    let mut delay_exit = false;
    // Resize swap to the desired size if the compute spec says so
    if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
        // *before* starting postgres.
        //
        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
        // OOM-killed during startup because swap wasn't available yet.
        match resize_swap(size_bytes) {
            Ok(()) => {
                let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
                info!(%size_bytes, %size_gib, "resized swap");
            }
            Err(err) => {
                let err = err.context("failed to resize swap");
                error!("{err:#}");
                // Mark compute startup as failed; don't try to start postgres, and report this
                // error to the control plane when it next asks.
                prestartup_failed = true;
                let mut state = compute.state.lock().unwrap();
                state.error = Some(format!("{err:?}"));
                state.status = ComputeStatus::Failed;
                compute.state_changed.notify_all();
                delay_exit = true;
            }
        }
    }
    let extension_server_port: u16 = http_port;
    // Start Postgres
-    let mut pg = None;
+    let mut delay_exit = false;
-    if !prestartup_failed {
+    let mut exit_code = None;
-        pg = match compute.start_compute(extension_server_port) {
+    let pg = match compute.start_compute(extension_server_port) {
-            Ok(pg) => Some(pg),
+        Ok(pg) => Some(pg),
-            Err(err) => {
+        Err(err) => {
-                error!("could not start the compute node: {:#}", err);
+            error!("could not start the compute node: {:#}", err);
-                let mut state = compute.state.lock().unwrap();
+            let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{:?}", err));
+            state.error = Some(format!("{:?}", err));
-                state.status = ComputeStatus::Failed;
+            state.status = ComputeStatus::Failed;
-                // Notify others that Postgres failed to start. In case of configuring the
+            // Notify others that Postgres failed to start. In case of configuring the
-                // empty compute, it's likely that API handler is still waiting for compute
+            // empty compute, it's likely that API handler is still waiting for compute
-                // state change. With this we will notify it that compute is in Failed state,
+            // state change. With this we will notify it that compute is in Failed state,
-                // so control plane will know about it earlier and record proper error instead
+            // so control plane will know about it earlier and record proper error instead
-                // of timeout.
+            // of timeout.
-                compute.state_changed.notify_all();
+            compute.state_changed.notify_all();
-                drop(state); // unlock
+            drop(state); // unlock
-                delay_exit = true;
+            delay_exit = true;
-                None
+            None
-            }
+        }
-        };
+    };
    } else {
        warn!("skipping postgres startup because pre-startup step failed");
    }
    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
    // because it requires cgroups.
@@ -496,7 +334,7 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();
-            let vm_monitor = rt.as_ref().map(|rt| {
+            let vm_monitor = &rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
@@ -509,41 +347,12 @@ fn start_postgres(
        }
    }
    Ok((
        pg,
        StartPostgresResult {
            delay_exit,
            compute,
            #[cfg(target_os = "linux")]
            rt,
            #[cfg(target_os = "linux")]
            token,
            #[cfg(target_os = "linux")]
            vm_monitor,
        },
    ))
 }
 type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
 struct StartPostgresResult {
    delay_exit: bool,
    // passed through from WaitSpecResult
    compute: Arc<ComputeNode>,
    #[cfg(target_os = "linux")]
    rt: Option<tokio::runtime::Runtime>,
    #[cfg(target_os = "linux")]
    token: tokio_util::sync::CancellationToken,
    #[cfg(target_os = "linux")]
    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
 }
 fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
    // Wait for the child Postgres process forever. In this state Ctrl+C will
    // propagate to Postgres and it will be shut down as well.
    let mut exit_code = None;
    if let Some((mut pg, logs_handle)) = pg {
        // Startup is finished, exit the startup tracing span
        drop(startup_context_guard);
        let ecode = pg
            .wait()
            .expect("failed to start waiting on Postgres process");
@@ -558,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
        exit_code = ecode.code()
    }
    Ok(WaitPostgresResult { exit_code })
 }
 struct WaitPostgresResult {
    exit_code: Option<i32>,
 }
 fn cleanup_after_postgres_exit(
    StartPostgresResult {
        mut delay_exit,
        compute,
        #[cfg(target_os = "linux")]
        vm_monitor,
        #[cfg(target_os = "linux")]
        token,
        #[cfg(target_os = "linux")]
        rt,
    }: StartPostgresResult,
 ) -> Result<bool> {
    // Terminate the vm_monitor so it releases the file watcher on
    // /sys/fs/cgroup/neon-postgres.
    // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -618,19 +408,13 @@ fn cleanup_after_postgres_exit(
        error!("error while checking for core dumps: {err:?}");
    }
    Ok(delay_exit)
 }
 fn maybe_delay_exit(delay_exit: bool) {
    // If launch failed, keep serving HTTP requests for a while, so the cloud
    // control plane can get the actual error.
    if delay_exit {
        info!("giving control plane 30s to collect the error before shutdown");
        thread::sleep(Duration::from_secs(30));
    }
 }
 fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
@@ -738,15 +522,10 @@ fn cli() -> clap::Command {
            Arg::new("filecache-connstr")
                .long("filecache-connstr")
                .default_value(
-                    "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
+                    "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
                )
                .value_name("FILECACHE_CONNSTR"),
        )
        .arg(
            Arg::new("resize-swap-on-bind")
                .long("resize-swap-on-bind")
                .action(clap::ArgAction::SetTrue),
        )
 }
 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,116 +0,0 @@
 use compute_api::{
    responses::CatalogObjects,
    spec::{Database, Role},
 };
 use futures::Stream;
 use postgres::{Client, NoTls};
 use std::{path::Path, process::Stdio, result::Result, sync::Arc};
 use tokio::{
    io::{AsyncBufReadExt, BufReader},
    process::Command,
    task,
 };
 use tokio_stream::{self as stream, StreamExt};
 use tokio_util::codec::{BytesCodec, FramedRead};
 use tracing::warn;
 use crate::{
    compute::ComputeNode,
    pg_helpers::{get_existing_dbs, get_existing_roles},
 };
 pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
    let connstr = compute.connstr.clone();
    task::spawn_blocking(move || {
        let mut client = Client::connect(connstr.as_str(), NoTls)?;
        let roles: Vec<Role>;
        {
            let mut xact = client.transaction()?;
            roles = get_existing_roles(&mut xact)?;
        }
        let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
        Ok(CatalogObjects { roles, databases })
    })
    .await?
 }
 #[derive(Debug, thiserror::Error)]
 pub enum SchemaDumpError {
    #[error("Database does not exist.")]
    DatabaseDoesNotExist,
    #[error("Failed to execute pg_dump.")]
    IO(#[from] std::io::Error),
 }
 // It uses the pg_dump utility to dump the schema of the specified database.
 // The output is streamed back to the caller and supposed to be streamed via HTTP.
 //
 // Before return the result with the output, it checks that pg_dump produced any output.
 // If not, it tries to parse the stderr output to determine if the database does not exist
 // and special error is returned.
 //
 // To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
 pub async fn get_database_schema(
    compute: &Arc<ComputeNode>,
    dbname: &str,
 ) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
    let pgbin = &compute.pgbin;
    let basepath = Path::new(pgbin).parent().unwrap();
    let pgdump = basepath.join("pg_dump");
    let mut connstr = compute.connstr.clone();
    connstr.set_path(dbname);
    let mut cmd = Command::new(pgdump)
        .arg("--schema-only")
        .arg(connstr.as_str())
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .kill_on_drop(true)
        .spawn()?;
    let stdout = cmd.stdout.take().ok_or_else(|| {
        std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
    })?;
    let stderr = cmd.stderr.take().ok_or_else(|| {
        std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
    })?;
    let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
    let stderr_reader = BufReader::new(stderr);
    let first_chunk = match stdout_reader.next().await {
        Some(Ok(bytes)) if !bytes.is_empty() => bytes,
        Some(Err(e)) => {
            return Err(SchemaDumpError::IO(e));
        }
        _ => {
            let mut lines = stderr_reader.lines();
            if let Some(line) = lines.next_line().await? {
                if line.contains(&format!("FATAL:  database \"{}\" does not exist", dbname)) {
                    return Err(SchemaDumpError::DatabaseDoesNotExist);
                }
                warn!("pg_dump stderr: {}", line)
            }
            tokio::spawn(async move {
                while let Ok(Some(line)) = lines.next_line().await {
                    warn!("pg_dump stderr: {}", line)
                }
            });
            return Err(SchemaDumpError::IO(std::io::Error::new(
                std::io::ErrorKind::Other,
                "failed to start pg_dump",
            )));
        }
    };
    let initial_stream = stream::once(Ok(first_chunk.freeze()));
    // Consume stderr and log warnings
    tokio::spawn(async move {
        let mut lines = stderr_reader.lines();
        while let Ok(Some(line)) = lines.next_line().await {
            warn!("pg_dump stderr: {}", line)
        }
    });
    Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -56,7 +56,6 @@ pub struct ComputeNode {
    /// - we push new spec and it does reconfiguration
    /// - but then something happens and compute pod / VM is destroyed,
    ///   so k8s controller starts it again with the **old** spec
    ///
    /// and the same for empty computes:
    /// - we started compute without any spec
    /// - we push spec and it does configuration
@@ -400,15 +399,7 @@ impl ComputeNode {
    pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
        let mut retry_period_ms = 500.0;
        let mut attempts = 0;
-        const DEFAULT_ATTEMPTS: u16 = 10;
+        let max_attempts = 10;
        #[cfg(feature = "testing")]
        let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") {
            u16::from_str(&v).unwrap()
        } else {
            DEFAULT_ATTEMPTS
        };
        #[cfg(not(feature = "testing"))]
        let max_attempts = DEFAULT_ATTEMPTS;
        loop {
            let result = self.try_get_basebackup(compute_state, lsn);
            match result {
@@ -807,11 +798,7 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin` name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut connstr = self.connstr.clone();
+        let connstr = self.connstr.clone();
        connstr
            .query_pairs_mut()
            .append_pair("application_name", "apply_config");
        let mut client = match Client::connect(connstr.as_str(), NoTls) {
            Err(e) => match e.code() {
                Some(&SqlState::INVALID_PASSWORD)
@@ -831,15 +818,9 @@ impl ComputeNode {
                        Client::connect(zenith_admin_connstr.as_str(), NoTls)
                            .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
                    // Disable forwarding so that users don't get a cloud_admin role
-
+                    client.simple_query("SET neon.forward_ddl = false")?;
-                    let mut func = || {
+                    client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
-                        client.simple_query("SET neon.forward_ddl = false")?;
+                    client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                        client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
                        client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                        Ok::<_, anyhow::Error>(())
                    };
                    func().context("apply_config setup cloud_admin")?;
                    drop(client);
                    // reconnect with connstring with expected name
@@ -851,48 +832,39 @@ impl ComputeNode {
        };
        // Disable DDL forwarding because control plane already knows about these roles/databases.
-        client
+        client.simple_query("SET neon.forward_ddl = false")?;
            .simple_query("SET neon.forward_ddl = false")
            .context("apply_config SET neon.forward_ddl = false")?;
        // Proceed with post-startup configuration. Note, that order of operations is important.
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
-        create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
+        create_neon_superuser(spec, &mut client)?;
-        cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
+        cleanup_instance(&mut client)?;
-        handle_roles(spec, &mut client).context("apply_config handle_roles")?;
+        handle_roles(spec, &mut client)?;
-        handle_databases(spec, &mut client).context("apply_config handle_databases")?;
+        handle_databases(spec, &mut client)?;
-        handle_role_deletions(spec, connstr.as_str(), &mut client)
+        handle_role_deletions(spec, connstr.as_str(), &mut client)?;
            .context("apply_config handle_role_deletions")?;
        handle_grants(
            spec,
            &mut client,
            connstr.as_str(),
            self.has_feature(ComputeFeature::AnonExtension),
-        )
+        )?;
-        .context("apply_config handle_grants")?;
+        handle_extensions(spec, &mut client)?;
-        handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
+        handle_extension_neon(&mut client)?;
-        handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
+        create_availability_check_data(&mut client)?;
        create_availability_check_data(&mut client)
            .context("apply_config create_availability_check_data")?;
        // 'Close' connection
        drop(client);
        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
            let mut connstr = connstr.clone();
            connstr
                .query_pairs_mut()
                .append_pair("application_name", "migrations");
            let mut client = Client::connect(connstr.as_str(), NoTls)?;
-            handle_migrations(&mut client).context("apply_config handle_migrations")
+            handle_migrations(&mut client)
        });
        Ok(())
    }
-    // Wrapped this around `pg_ctl reload`, but right now we don't use
+    // We could've wrapped this around `pg_ctl reload`, but right now we don't use
-    // `pg_ctl` for start / stop.
+    // `pg_ctl` for start / stop, so this just seems much easier to do as we already
    // have opened connection to Postgres and superuser access.
    #[instrument(skip_all)]
    fn pg_reload_conf(&self) -> Result<()> {
        let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
@@ -935,39 +907,38 @@ impl ComputeNode {
        // temporarily reset max_cluster_size in config
        // to avoid the possibility of hitting the limit, while we are reconfiguring:
        // creating new extensions, roles, etc...
-        config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
+        config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
-            self.pg_reload_conf()?;
+        self.pg_reload_conf()?;
-            let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
+        let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
-            // Proceed with post-startup configuration. Note, that order of operations is important.
+        // Proceed with post-startup configuration. Note, that order of operations is important.
-            // Disable DDL forwarding because control plane already knows about these roles/databases.
+        // Disable DDL forwarding because control plane already knows about these roles/databases.
-            if spec.mode == ComputeMode::Primary {
+        if spec.mode == ComputeMode::Primary {
-                client.simple_query("SET neon.forward_ddl = false")?;
+            client.simple_query("SET neon.forward_ddl = false")?;
-                cleanup_instance(&mut client)?;
+            cleanup_instance(&mut client)?;
-                handle_roles(&spec, &mut client)?;
+            handle_roles(&spec, &mut client)?;
-                handle_databases(&spec, &mut client)?;
+            handle_databases(&spec, &mut client)?;
-                handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-                handle_grants(
+            handle_grants(
-                    &spec,
+                &spec,
-                    &mut client,
+                &mut client,
-                    self.connstr.as_str(),
+                self.connstr.as_str(),
-                    self.has_feature(ComputeFeature::AnonExtension),
+                self.has_feature(ComputeFeature::AnonExtension),
-                )?;
+            )?;
-                handle_extensions(&spec, &mut client)?;
+            handle_extensions(&spec, &mut client)?;
-                handle_extension_neon(&mut client)?;
+            handle_extension_neon(&mut client)?;
-                // We can skip handle_migrations here because a new migration can only appear
+            // We can skip handle_migrations here because a new migration can only appear
-                // if we have a new version of the compute_ctl binary, which can only happen
+            // if we have a new version of the compute_ctl binary, which can only happen
-                // if compute got restarted, in which case we'll end up inside of apply_config
+            // if compute got restarted, in which case we'll end up inside of apply_config
-                // instead of reconfigure.
+            // instead of reconfigure.
-            }
+        }
-            // 'Close' connection
+        // 'Close' connection
-            drop(client);
+        drop(client);
            Ok(())
        })?;
        // reset max_cluster_size in config back to original value and reload config
        config::compute_ctl_temp_override_remove(pgdata_path)?;
        self.pg_reload_conf()?;
        let unknown_op = "unknown".to_string();
@@ -1058,17 +1029,12 @@ impl ComputeNode {
                // temporarily reset max_cluster_size in config
                // to avoid the possibility of hitting the limit, while we are applying config:
                // creating new extensions, roles, etc...
-                config::with_compute_ctl_tmp_override(
+                config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
-                    pgdata_path,
+                self.pg_reload_conf()?;
                    "neon.max_cluster_size=-1",
                    || {
                        self.pg_reload_conf()?;
-                        self.apply_config(&compute_state)?;
+                self.apply_config(&compute_state)?;
-                        Ok(())
+                config::compute_ctl_temp_override_remove(pgdata_path)?;
                    },
                )?;
                self.pg_reload_conf()?;
            }
            self.post_apply_config()?;
@@ -1125,7 +1091,7 @@ impl ComputeNode {
    // EKS worker nodes have following core dump settings:
    //   /proc/sys/kernel/core_pattern -> core
    //   /proc/sys/kernel/core_uses_pid -> 1
-    //   ulimit -c -> unlimited
+    //   ulimint -c -> unlimited
    // which results in core dumps being written to postgres data directory as core.<pid>.
    //
    // Use that as a default location and pattern, except macos where core dumps are written
@@ -1296,12 +1262,10 @@ LIMIT 100",
        .await
        .map_err(DownloadError::Other);
-        if download_size.is_ok() {
+        self.ext_download_progress
-            self.ext_download_progress
+            .write()
-                .write()
+            .expect("bad lock")
-                .expect("bad lock")
+            .insert(ext_archive_name.to_string(), (download_start, true));
                .insert(ext_archive_name.to_string(), (download_start, true));
        }
        download_size
    }
@@ -1404,9 +1368,7 @@ pub fn forward_termination_signal() {
    let pg_pid = PG_PID.load(Ordering::SeqCst);
    if pg_pid != 0 {
        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
-        // Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for
+        // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
-        // ROs to get a list of running xacts faster instead of going through the CLOG.
+        kill(pg_pid, Signal::SIGQUIT).ok();
        // See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
        kill(pg_pid, Signal::SIGINT).ok();
    }
 }
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,8 +6,8 @@ use std::path::Path;
 use anyhow::Result;
 use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
+use crate::pg_helpers::PgOptionsSerialize;
-use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
+use compute_api::spec::{ComputeMode, ComputeSpec};
 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -83,27 +83,12 @@ pub fn write_postgres_conf(
        ComputeMode::Replica => {
            // hot_standby is 'on' by default, but let's be explicit
            writeln!(file, "hot_standby=on")?;
        }
    }
-    if cfg!(target_os = "linux") {
+            // Inform the replica about the primary state
-        // Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is
+            // Default is 'false'
-        // disabled), then the control plane has enabled swap and we should set
+            if let Some(primary_is_running) = spec.primary_is_running {
-        // dynamic_shared_memory_type = 'mmap'.
+                writeln!(file, "neon.primary_is_running={}", primary_is_running)?;
-        //
+            }
        // This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
        let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
            // ignore any errors - they may be expected to occur under certain situations (e.g. when
            // not running in Linux).
            .unwrap_or_else(|_| String::new());
        if overcommit_memory_contents.trim() == "2" {
            let opt = GenericOption {
                name: "dynamic_shared_memory_type".to_owned(),
                value: Some("mmap".to_owned()),
                vartype: "enum".to_owned(),
            };
            write!(file, "{}", opt.to_pg_setting())?;
        }
    }
@@ -125,17 +110,18 @@ pub fn write_postgres_conf(
    Ok(())
 }
-pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>
+/// create file compute_ctl_temp_override.conf in pgdata_dir
-where
+/// add provided options to this file
-    F: FnOnce() -> Result<()>,
+pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
 {
    let path = pgdata_path.join("compute_ctl_temp_override.conf");
    let mut file = File::create(path)?;
    write!(file, "{}", options)?;
-
+    Ok(())
-    let res = exec();
+}
-
+
-    file.set_len(0)?;
+/// remove file compute_ctl_temp_override.conf in pgdata_dir
-
+pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
-    res
+    let path = pgdata_path.join("compute_ctl_temp_override.conf");
    std::fs::remove_file(path)?;
    Ok(())
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -5,21 +5,17 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;
 use crate::catalog::SchemaDumpError;
 use crate::catalog::{get_database_schema, get_dbs_and_roles};
 use crate::compute::forward_termination_signal;
 use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_api::requests::ConfigurationRequest;
 use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
 use anyhow::Result;
 use hyper::header::CONTENT_TYPE;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
 use tokio::task;
-use tracing::{debug, error, info, warn};
+use tracing::{error, info, warn};
 use tracing_utils::http::OtelName;
 use utils::http::request::must_get_query_param;
 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
    ComputeStatusResponse {
@@ -48,7 +44,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
    match (req.method(), req.uri().path()) {
        // Serialized compute state.
        (&Method::GET, "/status") => {
-            debug!("serving /status GET request");
+            info!("serving /status GET request");
            let state = compute.state.lock().unwrap();
            let status_response = status_response_from_state(&state);
            Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
@@ -137,34 +133,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }
        (&Method::GET, "/dbs_and_roles") => {
            info!("serving /dbs_and_roles GET request",);
            match get_dbs_and_roles(compute).await {
                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
                Err(_) => {
                    render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
                }
            }
        }
        (&Method::GET, "/database_schema") => {
            let database = match must_get_query_param(&req, "database") {
                Err(e) => return e.into_response(),
                Ok(database) => database,
            };
            info!("serving /database_schema GET request with database: {database}",);
            match get_database_schema(compute, &database).await {
                Ok(res) => render_plain(Body::wrap_stream(res)),
                Err(SchemaDumpError::DatabaseDoesNotExist) => {
                    render_json_error("database does not exist", StatusCode::NOT_FOUND)
                }
                Err(e) => {
                    error!("can't get schema dump: {}", e);
                    render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
                }
            }
        }
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
@@ -335,25 +303,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
    };
    Response::builder()
        .status(status)
        .header(CONTENT_TYPE, "application/json")
        .body(Body::from(serde_json::to_string(&error).unwrap()))
        .unwrap()
 }
 fn render_json(body: Body) -> Response<Body> {
    Response::builder()
        .header(CONTENT_TYPE, "application/json")
        .body(body)
        .unwrap()
 }
 fn render_plain(body: Body) -> Response<Body> {
    Response::builder()
        .header(CONTENT_TYPE, "text/plain")
        .body(body)
        .unwrap()
 }
 async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
    {
        let mut state = compute.state.lock().unwrap();
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -68,51 +68,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Info"
  /dbs_and_roles:
    get:
      tags:
        - Info
      summary: Get databases and roles in the catalog.
      description: ""
      operationId: getDbsAndRoles
      responses:
        200:
          description: Compute schema objects
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/DbsAndRoles"
  /database_schema:
    get:
      tags:
        - Info
      summary: Get schema dump
      parameters:
        - name: database
          in: query
          description: Database name to dump.
          required: true
          schema:
            type: string
          example: "postgres"
      description: Get schema dump in SQL format.
      operationId: getDatabaseSchema
      responses:
        200:
          description: Schema dump
          content:
            text/plain:
              schema:
                type: string
                description: Schema dump in SQL format.
        404:
          description: Non existing database.
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/GenericError"
  /check_writability:
    post:
      tags:
@@ -274,73 +229,6 @@ components:
        num_cpus:
          type: integer
    DbsAndRoles:
      type: object
      description: Databases and Roles
      required:
        - roles
        - databases
      properties:
        roles:
          type: array
          items:
            $ref: "#/components/schemas/Role"
        databases:
          type: array
          items:
            $ref: "#/components/schemas/Database"
    Database:
      type: object
      description: Database
      required:
        - name
        - owner
        - restrict_conn
        - invalid
      properties:
        name:
          type: string
        owner:
          type: string
        options:
          type: array
          items:
            $ref: "#/components/schemas/GenericOption"
        restrict_conn:
          type: boolean
        invalid:
          type: boolean
    Role:
      type: object
      description: Role
      required:
        - name
      properties:
        name:
          type: string
        encrypted_password:
          type: string
        options:
          type: array
          items:
            $ref: "#/components/schemas/GenericOption"
    GenericOption:
      type: object
      description: Schema Generic option
      required:
        - name
        - vartype
      properties:
        name:
          type: string
        value:
          type: string
        vartype:
          type: string
    ComputeState:
      type: object
      required:
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -8,13 +8,10 @@ pub mod configurator;
 pub mod http;
 #[macro_use]
 pub mod logger;
 pub mod catalog;
 pub mod compute;
 pub mod extension_server;
 mod migration;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
 pub mod swap;
 pub mod sync_sk;
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,105 +0,0 @@
 use anyhow::{Context, Result};
 use postgres::Client;
 use tracing::info;
 pub(crate) struct MigrationRunner<'m> {
    client: &'m mut Client,
    migrations: &'m [&'m str],
 }
 impl<'m> MigrationRunner<'m> {
    pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
        // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
        assert!(migrations.len() + 1 < i64::MAX as usize);
        Self { client, migrations }
    }
    fn get_migration_id(&mut self) -> Result<i64> {
        let query = "SELECT id FROM neon_migration.migration_id";
        let row = self
            .client
            .query_one(query, &[])
            .context("run_migrations get migration_id")?;
        Ok(row.get::<&str, i64>("id"))
    }
    fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
        let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
        self.client
            .simple_query(&setval)
            .context("run_migrations update id")?;
        Ok(())
    }
    fn prepare_migrations(&mut self) -> Result<()> {
        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
        self.client.simple_query(query)?;
        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
        self.client.simple_query(query)?;
        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
        self.client.simple_query(query)?;
        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
        self.client.simple_query(query)?;
        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
        self.client.simple_query(query)?;
        Ok(())
    }
    pub fn run_migrations(mut self) -> Result<()> {
        self.prepare_migrations()?;
        let mut current_migration = self.get_migration_id()? as usize;
        while current_migration < self.migrations.len() {
            macro_rules! migration_id {
                ($cm:expr) => {
                    ($cm + 1) as i64
                };
            }
            let migration = self.migrations[current_migration];
            if migration.starts_with("-- SKIP") {
                info!("Skipping migration id={}", migration_id!(current_migration));
            } else {
                info!(
                    "Running migration id={}:\n{}\n",
                    migration_id!(current_migration),
                    migration
                );
                self.client
                    .simple_query("BEGIN")
                    .context("begin migration")?;
                self.client.simple_query(migration).with_context(|| {
                    format!(
                        "run_migrations migration id={}",
                        migration_id!(current_migration)
                    )
                })?;
                // Migration IDs start at 1
                self.update_migration_id(migration_id!(current_migration))?;
                self.client
                    .simple_query("COMMIT")
                    .context("commit migration")?;
                info!("Finished migration id={}", migration_id!(current_migration));
            }
            current_migration += 1;
        }
        Ok(())
    }
 }
--- a/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
@@ -1 +0,0 @@
 ALTER ROLE neon_superuser BYPASSRLS;
--- a/compute_tools/src/migrations/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/0002-alter_roles.sql
@@ -1,18 +0,0 @@
 DO $$
 DECLARE
    role_name text;
 BEGIN
    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
    END LOOP;
    FOR role_name IN SELECT rolname FROM pg_roles
        WHERE
            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
    END LOOP;
 END $$;
--- a/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql
@@ -1,6 +0,0 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
    END IF;
 END $$;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -1 +0,0 @@
 GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0005-grant_all_on_tables_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0005-grant_all_on_tables_to_neon_superuser.sql
@@ -1,4 +0,0 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
 --       interacted with by neon_superuser without permission issues.
 ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
--- a/compute_tools/src/migrations/0006-grant_all_on_sequences_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0006-grant_all_on_sequences_to_neon_superuser.sql
@@ -1,4 +0,0 @@
 -- SKIP: Deemed insufficient for allowing relations created by extensions to be
 --       interacted with by neon_superuser without permission issues.
 ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
--- a/compute_tools/src/migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
@@ -1,3 +0,0 @@
 -- SKIP: Moved inline to the handle_grants() functions.
 ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
@@ -1,3 +0,0 @@
 -- SKIP: Moved inline to the handle_grants() functions.
 ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
+++ b/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -1,13 +0,0 @@
 -- SKIP: The original goal of this migration was to prevent creating
 --       subscriptions, but this migration was insufficient.
 DO $$
 DECLARE
    role_name TEXT;
 BEGIN
    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
    END LOOP;
 END $$;
--- a/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
@@ -1,7 +0,0 @@
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
    END IF;
 END $$;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -17,11 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
 // should be handled gracefully.
 fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let mut connstr = compute.connstr.clone();
+    let connstr = compute.connstr.as_str();
    connstr
        .query_pairs_mut()
        .append_pair("application_name", "compute_activity_monitor");
    let connstr = connstr.as_str();
    // During startup and configuration we connect to every Postgres database,
    // but we don't want to count this as some user activity. So wait until
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -44,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
    format!("'{}'", res)
 }
-pub trait GenericOptionExt {
+trait GenericOptionExt {
    fn to_pg_option(&self) -> String;
    fn to_pg_setting(&self) -> String;
 }
@@ -489,7 +489,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
 /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
 /// - next line starts with timestamp
 /// - EOF
-/// - no new lines were written for the last 100 milliseconds
+/// - no new lines were written for the last second
 async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
    let mut lines = tokio::io::BufReader::new(stderr).lines();
    let timeout_duration = Duration::from_millis(100);
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -2,7 +2,7 @@ use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{anyhow, bail, Result};
 use postgres::config::Config;
 use postgres::{Client, NoTls};
 use reqwest::StatusCode;
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};
 use crate::config;
 use crate::logger::inlinify;
 use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;
@@ -303,9 +302,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            RoleAction::Create => {
                // This branch only runs when roles are created through the console, so it is
                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
+                // from neon_superuser. (NOTE: REPLICATION has been removed from here for now).
                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
+                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS IN ROLE neon_superuser",
                    name.pg_quote()
                );
                info!("running role create query: '{}'", &query);
@@ -491,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                "rename_db" => {
                    let new_name = op.new_name.as_ref().unwrap();
-                    if existing_dbs.contains_key(&op.name) {
+                    if existing_dbs.get(&op.name).is_some() {
                        let query: String = format!(
                            "ALTER DATABASE {} RENAME TO {}",
                            op.name.pg_quote(),
@@ -699,8 +698,7 @@ pub fn handle_grants(
        // it is important to run this after all grants
        if enable_anon_extension {
-            handle_extension_anon(spec, &db.owner, &mut db_client, false)
+            handle_extension_anon(spec, &db.owner, &mut db_client, false)?;
                .context("handle_grants handle_extension_anon")?;
        }
    }
@@ -745,24 +743,21 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
    // which may happen in two cases:
    // - extension was just installed
    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
+    // DISABLED due to compute node unpinning epic
-    info!("update neon extension version with query: {}", query);
+    // let query = "ALTER EXTENSION neon UPDATE";
-    if let Err(e) = client.simple_query(query) {
+    // info!("update neon extension version with query: {}", query);
-        error!(
+    // client.simple_query(query)?;
            "failed to upgrade neon extension during `handle_extension_neon`: {}",
            e
        );
    }
    Ok(())
 }
 #[instrument(skip_all)]
-pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
+pub fn handle_neon_extension_upgrade(_client: &mut Client) -> Result<()> {
-    info!("handle neon extension upgrade");
+    info!("handle neon extension upgrade (not really)");
-    let query = "ALTER EXTENSION neon UPDATE";
+    // DISABLED due to compute node unpinning epic
-    info!("update neon extension version with query: {}", query);
+    // let query = "ALTER EXTENSION neon UPDATE";
-    client.simple_query(query)?;
+    // info!("update neon extension version with query: {}", query);
    // client.simple_query(query)?;
    Ok(())
 }
@@ -775,27 +770,103 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
    // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    // Add new migrations in numerical order.
    let migrations = [
-        include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
+        "ALTER ROLE neon_superuser BYPASSRLS",
-        include_str!("./migrations/0002-alter_roles.sql"),
+        r#"
-        include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
+DO $$
-        include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
+DECLARE
-        include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
+    role_name text;
-        include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
+BEGIN
-        include_str!(
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
-            "./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
+    LOOP
-        ),
+        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
-        include_str!(
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
-            "./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
+    END LOOP;
-        ),
+
-        include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
+    FOR role_name IN SELECT rolname FROM pg_roles
-        include_str!(
+        WHERE
-            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
+            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
-        ),
+    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
    END LOOP;
 END $$;
 "#,
        r#"
 DO $$
 BEGIN
    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
    END IF;
 END
 $$;"#,
        "GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
        // Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
        "",
        "",
        "",
        "",
        // Add new migrations below.
        r#"
 DO $$
 DECLARE
    role_name TEXT;
 BEGIN
    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
    LOOP
        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
    END LOOP;
 END
 $$;"#,
    ];
-    MigrationRunner::new(client, &migrations).run_migrations()?;
+    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
    client.simple_query(query)?;
    query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
    client.simple_query(query)?;
    query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
    client.simple_query(query)?;
    query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
    client.simple_query(query)?;
    query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
    client.simple_query(query)?;
    query = "SELECT id FROM neon_migration.migration_id";
    let row = client.query_one(query, &[])?;
    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
    let starting_migration_id = current_migration;
    query = "BEGIN";
    client.simple_query(query)?;
    while current_migration < migrations.len() {
        let migration = &migrations[current_migration];
        if migration.is_empty() {
            info!("Skip migration id={}", current_migration);
        } else {
            info!("Running migration:\n{}\n", migration);
            client.simple_query(migration)?;
        }
        current_migration += 1;
    }
    let setval = format!(
        "UPDATE neon_migration.migration_id SET id={}",
        migrations.len()
    );
    client.simple_query(&setval)?;
    query = "COMMIT";
    client.simple_query(query)?;
    info!(
        "Ran {} migrations",
        (migrations.len() - starting_migration_id)
    );
    Ok(())
 }
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,45 +0,0 @@
 use std::path::Path;
 use anyhow::{anyhow, Context};
 use tracing::warn;
 pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
 pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
    // run `/neonvm/bin/resize-swap --once {size_bytes}`
    //
    // Passing '--once' causes resize-swap to delete itself after successful completion, which
    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
    // postgres is running.
    //
    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
    let child_result = std::process::Command::new("/usr/bin/sudo")
        .arg(RESIZE_SWAP_BIN)
        .arg("--once")
        .arg(size_bytes.to_string())
        .spawn();
    child_result
        .context("spawn() failed")
        .and_then(|mut child| child.wait().context("wait() failed"))
        .and_then(|status| match status.success() {
            true => Ok(()),
            false => {
                // The command failed. Maybe it was because the resize-swap file doesn't exist?
                // The --once flag causes it to delete itself on success so we don't disable swap
                // while postgres is running; maybe this is fine.
                match Path::new(RESIZE_SWAP_BIN).try_exists() {
                    Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
                    // The path doesn't exist; we're actually ok 
                    Ok(false) => {
                        warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
                        Ok(())
                    },
                }
            }
        })
        // wrap any prior error with the overall context that we couldn't run the command
        .with_context(|| {
            format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
        })
 }
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -17,7 +17,6 @@ nix.workspace = true
 once_cell.workspace = true
 postgres.workspace = true
 hex.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
@@ -28,7 +27,6 @@ serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 toml_edit.workspace = true
 tokio.workspace = true
 tokio-postgres.workspace = true
 tokio-util.workspace = true
@@ -40,7 +38,6 @@ safekeeper_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true
 whoami.workspace = true
 compute_api.workspace = true
 workspace_hack.workspace = true
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "storage_controller"
+name = "attachment_service"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
@@ -18,7 +18,6 @@ anyhow.workspace = true
 aws-config.workspace = true
 bytes.workspace = true
 camino.workspace = true
 chrono.workspace = true
 clap.workspace = true
 fail.workspace = true
 futures.workspace = true
@@ -26,14 +25,12 @@ git-version.workspace = true
 hex.workspace = true
 hyper.workspace = true
 humantime.workspace = true
 itertools.workspace = true
 lasso.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 postgres_connection.workspace = true
-rand.workspace = true
+reqwest.workspace = true
 reqwest = { workspace = true, features = ["stream"] }
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -42,20 +39,13 @@ tokio.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 measured.workspace = true
 scopeguard.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
-diesel = { version = "2.1.4", features = [
+diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
    "serde_json",
    "postgres",
    "r2d2",
    "chrono",
 ] }
 diesel_migrations = { version = "2.1.0" }
 r2d2 = { version = "0.8.10" }
-utils = { path = "../libs/utils/" }
+utils = { path = "../../libs/utils/" }
-metrics = { path = "../libs/metrics/" }
+metrics = { path = "../../libs/metrics/" }
-control_plane = { path = "../control_plane" }
+control_plane = { path = ".." }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/control_plane/attachment_service/migrations/.keep
+++ b/control_plane/attachment_service/migrations/.keep
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
--- a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql
+++ b/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql
--- a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql
+++ b/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql
--- a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql
+++ b/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql
--- a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql
+++ b/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql
--- a/control_plane/attachment_service/src/auth.rs
+++ b/control_plane/attachment_service/src/auth.rs
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/control_plane/attachment_service/src/compute_hook.rs
@@ -0,0 +1,462 @@
 use std::{collections::HashMap, time::Duration};
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
 use hyper::{Method, StatusCode};
 use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
 use tokio_util::sync::CancellationToken;
 use utils::{
    backoff::{self},
    id::{NodeId, TenantId},
 };
 use crate::service::Config;
 const BUSY_DELAY: Duration = Duration::from_secs(1);
 const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);
 pub(crate) const API_CONCURRENCY: usize = 32;
 struct ShardedComputeHookTenant {
    stripe_size: ShardStripeSize,
    shard_count: ShardCount,
    shards: Vec<(ShardNumber, NodeId)>,
 }
 enum ComputeHookTenant {
    Unsharded(NodeId),
    Sharded(ShardedComputeHookTenant),
 }
 impl ComputeHookTenant {
    /// Construct with at least one shard's information
    fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self {
        if tenant_shard_id.shard_count.count() > 1 {
            Self::Sharded(ShardedComputeHookTenant {
                shards: vec![(tenant_shard_id.shard_number, node_id)],
                stripe_size,
                shard_count: tenant_shard_id.shard_count,
            })
        } else {
            Self::Unsharded(node_id)
        }
    }
    /// Set one shard's location.  If stripe size or shard count have changed, Self is reset
    /// and drops existing content.
    fn update(
        &mut self,
        tenant_shard_id: TenantShardId,
        stripe_size: ShardStripeSize,
        node_id: NodeId,
    ) {
        match self {
            Self::Unsharded(existing_node_id) if tenant_shard_id.shard_count.count() == 1 => {
                *existing_node_id = node_id
            }
            Self::Sharded(sharded_tenant)
                if sharded_tenant.stripe_size == stripe_size
                    && sharded_tenant.shard_count == tenant_shard_id.shard_count =>
            {
                if let Some(existing) = sharded_tenant
                    .shards
                    .iter()
                    .position(|s| s.0 == tenant_shard_id.shard_number)
                {
                    sharded_tenant.shards.get_mut(existing).unwrap().1 = node_id;
                } else {
                    sharded_tenant
                        .shards
                        .push((tenant_shard_id.shard_number, node_id));
                    sharded_tenant.shards.sort_by_key(|s| s.0)
                }
            }
            _ => {
                // Shard count changed: reset struct.
                *self = Self::new(tenant_shard_id, stripe_size, node_id);
            }
        }
    }
 }
 #[derive(Serialize, Deserialize, Debug)]
 struct ComputeHookNotifyRequestShard {
    node_id: NodeId,
    shard_number: ShardNumber,
 }
 /// Request body that we send to the control plane to notify it of where a tenant is attached
 #[derive(Serialize, Deserialize, Debug)]
 struct ComputeHookNotifyRequest {
    tenant_id: TenantId,
    stripe_size: Option<ShardStripeSize>,
    shards: Vec<ComputeHookNotifyRequestShard>,
 }
 /// Error type for attempts to call into the control plane compute notification hook
 #[derive(thiserror::Error, Debug)]
 pub(crate) enum NotifyError {
    // Request was not send successfully, e.g. transport error
    #[error("Sending request: {0}")]
    Request(#[from] reqwest::Error),
    // Request could not be serviced right now due to ongoing Operation in control plane, but should be possible soon.
    #[error("Control plane tenant busy")]
    Busy,
    // Explicit 429 response asking us to retry less frequently
    #[error("Control plane overloaded")]
    SlowDown,
    // A 503 response indicates the control plane can't handle the request right now
    #[error("Control plane unavailable (status {0})")]
    Unavailable(StatusCode),
    // API returned unexpected non-success status.  We will retry, but log a warning.
    #[error("Control plane returned unexpected status {0}")]
    Unexpected(StatusCode),
    // We shutdown while sending
    #[error("Shutting down")]
    ShuttingDown,
    // A response indicates we will never succeed, such as 400 or 404
    #[error("Non-retryable error {0}")]
    Fatal(StatusCode),
 }
 impl ComputeHookTenant {
    fn maybe_reconfigure(&self, tenant_id: TenantId) -> Option<ComputeHookNotifyRequest> {
        match self {
            Self::Unsharded(node_id) => Some(ComputeHookNotifyRequest {
                tenant_id,
                shards: vec![ComputeHookNotifyRequestShard {
                    shard_number: ShardNumber(0),
                    node_id: *node_id,
                }],
                stripe_size: None,
            }),
            Self::Sharded(sharded_tenant)
                if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize =>
            {
                Some(ComputeHookNotifyRequest {
                    tenant_id,
                    shards: sharded_tenant
                        .shards
                        .iter()
                        .map(|(shard_number, node_id)| ComputeHookNotifyRequestShard {
                            shard_number: *shard_number,
                            node_id: *node_id,
                        })
                        .collect(),
                    stripe_size: Some(sharded_tenant.stripe_size),
                })
            }
            Self::Sharded(sharded_tenant) => {
                // Sharded tenant doesn't yet have information for all its shards
                tracing::info!(
                    "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
                    sharded_tenant.shards.len(),
                    sharded_tenant.shard_count.count()
                );
                None
            }
        }
    }
 }
 /// The compute hook is a destination for notifications about changes to tenant:pageserver
 /// mapping.  It aggregates updates for the shards in a tenant, and when appropriate reconfigures
 /// the compute connection string.
 pub(super) struct ComputeHook {
    config: Config,
    state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
    authorization_header: Option<String>,
 }
 impl ComputeHook {
    pub(super) fn new(config: Config) -> Self {
        let authorization_header = config
            .control_plane_jwt_token
            .clone()
            .map(|jwt| format!("Bearer {}", jwt));
        Self {
            state: Default::default(),
            config,
            authorization_header,
        }
    }
    /// For test environments: use neon_local's LocalEnv to update compute
    async fn do_notify_local(
        &self,
        reconfigure_request: ComputeHookNotifyRequest,
    ) -> anyhow::Result<()> {
        let env = match LocalEnv::load_config() {
            Ok(e) => e,
            Err(e) => {
                tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
                return Ok(());
            }
        };
        let cplane =
            ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
        let ComputeHookNotifyRequest {
            tenant_id,
            shards,
            stripe_size,
        } = reconfigure_request;
        let compute_pageservers = shards
            .into_iter()
            .map(|shard| {
                let ps_conf = env
                    .get_pageserver_conf(shard.node_id)
                    .expect("Unknown pageserver");
                let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
                    .expect("Unable to parse listen_pg_addr");
                (pg_host, pg_port.unwrap_or(5432))
            })
            .collect::<Vec<_>>();
        for (endpoint_name, endpoint) in &cplane.endpoints {
            if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
                tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
                endpoint
                    .reconfigure(compute_pageservers.clone(), stripe_size)
                    .await?;
            }
        }
        Ok(())
    }
    async fn do_notify_iteration(
        &self,
        client: &reqwest::Client,
        url: &String,
        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        let req = client.request(Method::PUT, url);
        let req = if let Some(value) = &self.authorization_header {
            req.header(reqwest::header::AUTHORIZATION, value)
        } else {
            req
        };
        tracing::info!(
            "Sending notify request to {} ({:?})",
            url,
            reconfigure_request
        );
        let send_result = req.json(&reconfigure_request).send().await;
        let response = match send_result {
            Ok(r) => r,
            Err(e) => return Err(e.into()),
        };
        // Treat all 2xx responses as success
        if response.status() >= StatusCode::OK && response.status() < StatusCode::MULTIPLE_CHOICES {
            if response.status() != StatusCode::OK {
                // Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
                // log a warning.
                tracing::warn!(
                    "Unexpected 2xx response code {} from control plane",
                    response.status()
                );
            }
            return Ok(());
        }
        // Error response codes
        match response.status() {
            StatusCode::TOO_MANY_REQUESTS => {
                // TODO: 429 handling should be global: set some state visible to other requests
                // so that they will delay before starting, rather than all notifications trying
                // once before backing off.
                tokio::time::timeout(SLOWDOWN_DELAY, cancel.cancelled())
                    .await
                    .ok();
                Err(NotifyError::SlowDown)
            }
            StatusCode::LOCKED => {
                // Delay our retry if busy: the usual fast exponential backoff in backoff::retry
                // is not appropriate
                tokio::time::timeout(BUSY_DELAY, cancel.cancelled())
                    .await
                    .ok();
                Err(NotifyError::Busy)
            }
            StatusCode::SERVICE_UNAVAILABLE
            | StatusCode::GATEWAY_TIMEOUT
            | StatusCode::BAD_GATEWAY => Err(NotifyError::Unavailable(response.status())),
            StatusCode::BAD_REQUEST | StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
                Err(NotifyError::Fatal(response.status()))
            }
            _ => Err(NotifyError::Unexpected(response.status())),
        }
    }
    async fn do_notify(
        &self,
        url: &String,
        reconfigure_request: ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        let client = reqwest::Client::new();
        backoff::retry(
            || self.do_notify_iteration(&client, url, &reconfigure_request, cancel),
            |e| matches!(e, NotifyError::Fatal(_) | NotifyError::Unexpected(_)),
            3,
            10,
            "Send compute notification",
            cancel,
        )
        .await
        .ok_or_else(|| NotifyError::ShuttingDown)
        .and_then(|x| x)
    }
    /// Call this to notify the compute (postgres) tier of new pageservers to use
    /// for a tenant.  notify() is called by each shard individually, and this function
    /// will decide whether an update to the tenant is sent.  An update is sent on the
    /// condition that:
    /// - We know a pageserver for every shard.
    /// - All the shards have the same shard_count (i.e. we are not mid-split)
    ///
    /// Cancellation token enables callers to drop out, e.g. if calling from a Reconciler
    /// that is cancelled.
    ///
    /// This function is fallible, including in the case that the control plane is transiently
    /// unavailable.  A limited number of retries are done internally to efficiently hide short unavailability
    /// periods, but we don't retry forever.  The **caller** is responsible for handling failures and
    /// ensuring that they eventually call again to ensure that the compute is eventually notified of
    /// the proper pageserver nodes for a tenant.
    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
    pub(super) async fn notify(
        &self,
        tenant_shard_id: TenantShardId,
        node_id: NodeId,
        stripe_size: ShardStripeSize,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
        let mut locked = self.state.lock().await;
        use std::collections::hash_map::Entry;
        let tenant = match locked.entry(tenant_shard_id.tenant_id) {
            Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
                tenant_shard_id,
                stripe_size,
                node_id,
            )),
            Entry::Occupied(e) => {
                let tenant = e.into_mut();
                tenant.update(tenant_shard_id, stripe_size, node_id);
                tenant
            }
        };
        let reconfigure_request = tenant.maybe_reconfigure(tenant_shard_id.tenant_id);
        let Some(reconfigure_request) = reconfigure_request else {
            // The tenant doesn't yet have pageservers for all its shards: we won't notify anything
            // until it does.
            tracing::info!("Tenant isn't yet ready to emit a notification");
            return Ok(());
        };
        if let Some(notify_url) = &self.config.compute_hook_url {
            self.do_notify(notify_url, reconfigure_request, cancel)
                .await
        } else {
            self.do_notify_local(reconfigure_request)
                .await
                .map_err(|e| {
                    // This path is for testing only, so munge the error into our prod-style error type.
                    tracing::error!("Local notification hook failed: {e}");
                    NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
                })
        }
    }
 }
 #[cfg(test)]
 pub(crate) mod tests {
    use pageserver_api::shard::{ShardCount, ShardNumber};
    use utils::id::TenantId;
    use super::*;
    #[test]
    fn tenant_updates() -> anyhow::Result<()> {
        let tenant_id = TenantId::generate();
        let mut tenant_state = ComputeHookTenant::new(
            TenantShardId {
                tenant_id,
                shard_count: ShardCount::new(0),
                shard_number: ShardNumber(0),
            },
            ShardStripeSize(12345),
            NodeId(1),
        );
        // An unsharded tenant is always ready to emit a notification
        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
        assert_eq!(
            tenant_state
                .maybe_reconfigure(tenant_id)
                .unwrap()
                .shards
                .len(),
            1
        );
        assert!(tenant_state
            .maybe_reconfigure(tenant_id)
            .unwrap()
            .stripe_size
            .is_none());
        // Writing the first shard of a multi-sharded situation (i.e. in a split)
        // resets the tenant state and puts it in an non-notifying state (need to
        // see all shards)
        tenant_state.update(
            TenantShardId {
                tenant_id,
                shard_count: ShardCount::new(2),
                shard_number: ShardNumber(1),
            },
            ShardStripeSize(32768),
            NodeId(1),
        );
        assert!(tenant_state.maybe_reconfigure(tenant_id).is_none());
        // Writing the second shard makes it ready to notify
        tenant_state.update(
            TenantShardId {
                tenant_id,
                shard_count: ShardCount::new(2),
                shard_number: ShardNumber(0),
            },
            ShardStripeSize(32768),
            NodeId(1),
        );
        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
        assert_eq!(
            tenant_state
                .maybe_reconfigure(tenant_id)
                .unwrap()
                .shards
                .len(),
            2
        );
        assert_eq!(
            tenant_state
                .maybe_reconfigure(tenant_id)
                .unwrap()
                .stripe_size,
            Some(ShardStripeSize(32768))
        );
        Ok(())
    }
 }
--- a/control_plane/attachment_service/src/heartbeater.rs
+++ b/control_plane/attachment_service/src/heartbeater.rs
@@ -6,7 +6,10 @@ use std::{
 };
 use tokio_util::sync::CancellationToken;
-use pageserver_api::{controller_api::NodeAvailability, models::PageserverUtilization};
+use pageserver_api::{
    controller_api::{NodeAvailability, UtilizationScore},
    models::PageserverUtilization,
 };
 use thiserror::Error;
 use utils::id::NodeId;
@@ -19,8 +22,7 @@ struct HeartbeaterTask {
    state: HashMap<NodeId, PageserverState>,
-    max_offline_interval: Duration,
+    max_unavailable_interval: Duration,
    max_warming_up_interval: Duration,
    jwt_token: Option<String>,
 }
@@ -30,9 +32,6 @@ pub(crate) enum PageserverState {
        last_seen_at: Instant,
        utilization: PageserverUtilization,
    },
    WarmingUp {
        started_at: Instant,
    },
    Offline,
 }
@@ -57,18 +56,12 @@ pub(crate) struct Heartbeater {
 impl Heartbeater {
    pub(crate) fn new(
        jwt_token: Option<String>,
-        max_offline_interval: Duration,
+        max_unavailable_interval: Duration,
        max_warming_up_interval: Duration,
        cancel: CancellationToken,
    ) -> Self {
        let (sender, receiver) = tokio::sync::mpsc::unbounded_channel::<HeartbeatRequest>();
-        let mut heartbeater = HeartbeaterTask::new(
+        let mut heartbeater =
-            receiver,
+            HeartbeaterTask::new(receiver, jwt_token, max_unavailable_interval, cancel);
            jwt_token,
            max_offline_interval,
            max_warming_up_interval,
            cancel,
        );
        tokio::task::spawn(async move { heartbeater.run().await });
        Self { sender }
@@ -84,12 +77,9 @@ impl Heartbeater {
                pageservers,
                reply: sender,
            })
-            .map_err(|_| HeartbeaterError::Cancel)?;
+            .unwrap();
-        receiver
+        receiver.await.unwrap()
            .await
            .map_err(|_| HeartbeaterError::Cancel)
            .and_then(|x| x)
    }
 }
@@ -97,16 +87,14 @@ impl HeartbeaterTask {
    fn new(
        receiver: tokio::sync::mpsc::UnboundedReceiver<HeartbeatRequest>,
        jwt_token: Option<String>,
-        max_offline_interval: Duration,
+        max_unavailable_interval: Duration,
        max_warming_up_interval: Duration,
        cancel: CancellationToken,
    ) -> Self {
        Self {
            receiver,
            cancel,
            state: HashMap::new(),
-            max_offline_interval,
+            max_unavailable_interval,
            max_warming_up_interval,
            jwt_token,
        }
    }
@@ -143,12 +131,11 @@ impl HeartbeaterTask {
                // Clone the node and mark it as available such that the request
                // goes through to the pageserver even when the node is marked offline.
                // This doesn't impact the availability observed by [`crate::service::Service`].
-                let mut node_clone = node.clone();
+                let mut node = node.clone();
-                node_clone
+                node.set_availability(NodeAvailability::Active(UtilizationScore::worst()));
                    .set_availability(NodeAvailability::Active(PageserverUtilization::full()));
                async move {
-                    let response = node_clone
+                    let response = node
                        .with_client_retries(
                            |client| async move { client.get_utilization().await },
                            &jwt_token,
@@ -173,12 +160,6 @@ impl HeartbeaterTask {
                            last_seen_at: Instant::now(),
                            utilization,
                        }
                    } else if let NodeAvailability::WarmingUp(last_seen_at) =
                        node.get_availability()
                    {
                        PageserverState::WarmingUp {
                            started_at: *last_seen_at,
                        }
                    } else {
                        PageserverState::Offline
                    };
@@ -204,66 +185,38 @@ impl HeartbeaterTask {
            }
        }
        let mut warming_up = 0;
        let mut offline = 0;
        for state in new_state.values() {
            match state {
                PageserverState::WarmingUp { .. } => {
                    warming_up += 1;
                }
                PageserverState::Offline { .. } => offline += 1,
                PageserverState::Available { .. } => {}
            }
        }
        tracing::info!(
            "Heartbeat round complete for {} nodes, {} warming-up, {} offline",
            new_state.len(),
            warming_up,
            offline
        );
        let mut deltas = Vec::new();
        let now = Instant::now();
-        for (node_id, ps_state) in new_state.iter_mut() {
+        for (node_id, ps_state) in new_state {
            use std::collections::hash_map::Entry::*;
-            let entry = self.state.entry(*node_id);
+            let entry = self.state.entry(node_id);
            let mut needs_update = false;
            match entry {
                Occupied(ref occ) => match (occ.get(), &ps_state) {
                    (PageserverState::Offline, PageserverState::Offline) => {}
                    (PageserverState::Available { last_seen_at, .. }, PageserverState::Offline) => {
-                        if now - *last_seen_at >= self.max_offline_interval {
+                        if now - *last_seen_at >= self.max_unavailable_interval {
-                            deltas.push((*node_id, ps_state.clone()));
+                            deltas.push((node_id, ps_state.clone()));
                            needs_update = true;
                        }
                    }
                    (_, PageserverState::WarmingUp { started_at }) => {
                        if now - *started_at >= self.max_warming_up_interval {
                            *ps_state = PageserverState::Offline;
                        }
                        deltas.push((*node_id, ps_state.clone()));
                        needs_update = true;
                    }
                    _ => {
-                        deltas.push((*node_id, ps_state.clone()));
+                        deltas.push((node_id, ps_state.clone()));
                        needs_update = true;
                    }
                },
                Vacant(_) => {
-                    // This is a new node. Don't generate a delta for it.
+                    deltas.push((node_id, ps_state.clone()));
                    deltas.push((*node_id, ps_state.clone()));
                }
            }
            match entry {
                Occupied(mut occ) if needs_update => {
-                    (*occ.get_mut()) = ps_state.clone();
+                    (*occ.get_mut()) = ps_state;
                }
                Vacant(vac) => {
-                    vac.insert(ps_state.clone());
+                    vac.insert(ps_state);
                }
                _ => {}
            }
--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -3,20 +3,13 @@ use crate::metrics::{
    METRICS_REGISTRY,
 };
 use crate::reconciler::ReconcileError;
-use crate::service::{LeadershipStatus, Service, STARTUP_RECONCILE_TIMEOUT};
+use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
 use anyhow::Context;
 use futures::Future;
 use hyper::header::CONTENT_TYPE;
 use hyper::{Body, Request, Response};
 use hyper::{StatusCode, Uri};
 use metrics::{BuildInfo, NeonMetrics};
 use pageserver_api::controller_api::{
    MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse,
    MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse,
    TenantCreateRequest,
 };
 use pageserver_api::models::{
-    TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
+    TenantConfigRequest, TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
    TenantTimeTravelRequest, TimelineCreateRequest,
 };
 use pageserver_api::shard::TenantShardId;
@@ -41,8 +34,7 @@ use utils::{
 };
 use pageserver_api::controller_api::{
-    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest,
+    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantShardMigrateRequest,
    TenantShardMigrateRequest,
 };
 use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};
@@ -51,19 +43,15 @@ use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
 use routerify::Middleware;
 /// State available to HTTP request handlers
 #[derive(Clone)]
 pub struct HttpState {
    service: Arc<crate::service::Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
    neon_metrics: NeonMetrics,
    allowlist_routes: Vec<Uri>,
 }
 impl HttpState {
-    pub fn new(
+    pub fn new(service: Arc<crate::service::Service>, auth: Option<Arc<SwappableJwtAuth>>) -> Self {
        service: Arc<crate::service::Service>,
        auth: Option<Arc<SwappableJwtAuth>>,
        build_info: BuildInfo,
    ) -> Self {
        let allowlist_routes = ["/status", "/ready", "/metrics"]
            .iter()
            .map(|v| v.parse().unwrap())
@@ -71,7 +59,6 @@ impl HttpState {
        Self {
            service,
            auth,
            neon_metrics: NeonMetrics::new(build_info),
            allowlist_routes,
        }
    }
@@ -147,6 +134,52 @@ async fn handle_tenant_create(
    )
 }
 // For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
 // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.  This avoids
 // needing to track a "deleting" state for tenants.
 async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
 where
    R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
    F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
 {
    let started_at = Instant::now();
    // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
    // completed.
    let mut retry_period = Duration::from_secs(1);
    // On subsequent retries, wait longer.
    let max_retry_period = Duration::from_secs(5);
    // Enable callers with a 30 second request timeout to reliably get a response
    let max_wait = Duration::from_secs(25);
    loop {
        let status = f(service.clone()).await?;
        match status {
            StatusCode::ACCEPTED => {
                tracing::info!("Deletion accepted, waiting to try again...");
                tokio::time::sleep(retry_period).await;
                retry_period = max_retry_period;
            }
            StatusCode::NOT_FOUND => {
                tracing::info!("Deletion complete");
                return json_response(StatusCode::OK, ());
            }
            _ => {
                tracing::warn!("Unexpected status {status}");
                return json_response(status, ());
            }
        }
        let now = Instant::now();
        if now + retry_period > started_at + max_wait {
            tracing::info!("Deletion timed out waiting for 404");
            // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
            // the pageserver's swagger definition for this endpoint, and has the same desired
            // effect of causing the control plane to retry later.
            return json_response(StatusCode::CONFLICT, ());
        }
    }
 }
 async fn handle_tenant_location_config(
    service: Arc<Service>,
    mut req: Request<Body>,
@@ -218,12 +251,6 @@ async fn handle_tenant_time_travel_remote_storage(
    json_response(StatusCode::OK, ())
 }
 fn map_reqwest_hyper_status(status: reqwest::StatusCode) -> Result<hyper::StatusCode, ApiError> {
    hyper::StatusCode::from_u16(status.as_u16())
        .context("invalid status code")
        .map_err(ApiError::InternalServerError)
 }
 async fn handle_tenant_secondary_download(
    service: Arc<Service>,
    req: Request<Body>,
@@ -232,7 +259,7 @@ async fn handle_tenant_secondary_download(
    let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);
    let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
-    json_response(map_reqwest_hyper_status(status)?, progress)
+    json_response(status, progress)
 }
 async fn handle_tenant_delete(
@@ -242,17 +269,10 @@ async fn handle_tenant_delete(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;
-    let status_code = service
+    deletion_wrapper(service, move |service| async move {
-        .tenant_delete(tenant_id)
+        service.tenant_delete(tenant_id).await
-        .await
+    })
-        .and_then(map_reqwest_hyper_status)?;
+    .await
    if status_code == StatusCode::NOT_FOUND {
        // The pageserver uses 404 for successful deletion, but we use 200
        json_response(StatusCode::OK, ())
    } else {
        json_response(status_code, ())
    }
 }
 async fn handle_tenant_timeline_create(
@@ -280,76 +300,12 @@ async fn handle_tenant_timeline_delete(
    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
    // For timeline deletions, which both implement an "initially return 202, then 404 once
    // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
    async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
    where
        R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
        F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
    {
        let started_at = Instant::now();
        // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
        // completed.
        let mut retry_period = Duration::from_secs(1);
        // On subsequent retries, wait longer.
        let max_retry_period = Duration::from_secs(5);
        // Enable callers with a 30 second request timeout to reliably get a response
        let max_wait = Duration::from_secs(25);
        loop {
            let status = f(service.clone()).await?;
            match status {
                StatusCode::ACCEPTED => {
                    tracing::info!("Deletion accepted, waiting to try again...");
                    tokio::time::sleep(retry_period).await;
                    retry_period = max_retry_period;
                }
                StatusCode::NOT_FOUND => {
                    tracing::info!("Deletion complete");
                    return json_response(StatusCode::OK, ());
                }
                _ => {
                    tracing::warn!("Unexpected status {status}");
                    return json_response(status, ());
                }
            }
            let now = Instant::now();
            if now + retry_period > started_at + max_wait {
                tracing::info!("Deletion timed out waiting for 404");
                // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
                // the pageserver's swagger definition for this endpoint, and has the same desired
                // effect of causing the control plane to retry later.
                return json_response(StatusCode::CONFLICT, ());
            }
        }
    }
    deletion_wrapper(service, move |service| async move {
-        service
+        service.tenant_timeline_delete(tenant_id, timeline_id).await
            .tenant_timeline_delete(tenant_id, timeline_id)
            .await
            .and_then(map_reqwest_hyper_status)
    })
    .await
 }
 async fn handle_tenant_timeline_detach_ancestor(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;
    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
    let res = service
        .tenant_timeline_detach_ancestor(tenant_id, timeline_id)
        .await?;
    json_response(StatusCode::OK, res)
 }
 async fn handle_tenant_timeline_passthrough(
    service: Arc<Service>,
    req: Request<Body>,
@@ -408,9 +364,11 @@ async fn handle_tenant_timeline_passthrough(
    }
    // We have a reqest::Response, would like a http::Response
-    let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?);
+    let mut builder = hyper::Response::builder()
        .status(resp.status())
        .version(resp.version());
    for (k, v) in resp.headers() {
-        builder = builder.header(k.as_str(), v.as_bytes());
+        builder = builder.header(k, v);
    }
    let response = builder
@@ -434,21 +392,12 @@ async fn handle_tenant_describe(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Scrubber)?;
+    check_permissions(&req, Scope::Admin)?;
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
 }
 async fn handle_tenant_list(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    json_response(StatusCode::OK, service.tenant_list())
 }
 async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
@@ -462,10 +411,7 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
-    let nodes = state.service.node_list().await?;
+    json_response(StatusCode::OK, state.service.node_list().await?)
    let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
    json_response(StatusCode::OK, api_nodes)
 }
 async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -476,14 +422,6 @@ async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError
    json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
 }
 async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
 }
 async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
@@ -500,7 +438,7 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
        StatusCode::OK,
        state
            .service
-            .external_node_configure(
+            .node_configure(
                config_req.node_id,
                config_req.availability.map(NodeAvailability::from),
                config_req.scheduling,
@@ -509,119 +447,6 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
    )
 }
 async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    let node_status = state.service.get_node(node_id).await?;
    json_response(StatusCode::OK, node_status)
 }
 async fn handle_get_leader(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let leader = state.service.get_leader().await.map_err(|err| {
        ApiError::InternalServerError(anyhow::anyhow!(
            "Failed to read leader from database: {err}"
        ))
    })?;
    json_response(StatusCode::OK, leader)
 }
 async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    state.service.start_node_drain(node_id).await?;
    json_response(StatusCode::ACCEPTED, ())
 }
 async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    state.service.cancel_node_drain(node_id).await?;
    json_response(StatusCode::ACCEPTED, ())
 }
 async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    state.service.start_node_fill(node_id).await?;
    json_response(StatusCode::ACCEPTED, ())
 }
 async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let node_id: NodeId = parse_request_param(&req, "node_id")?;
    state.service.cancel_node_fill(node_id).await?;
    json_response(StatusCode::ACCEPTED, ())
 }
 async fn handle_metadata_health_update(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Scrubber)?;
    let update_req = json_request::<MetadataHealthUpdateRequest>(&mut req).await?;
    let state = get_state(&req);
    state.service.metadata_health_update(update_req).await?;
    json_response(StatusCode::OK, MetadataHealthUpdateResponse {})
 }
 async fn handle_metadata_health_list_unhealthy(
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    let unhealthy_tenant_shards = state.service.metadata_health_list_unhealthy().await?;
    json_response(
        StatusCode::OK,
        MetadataHealthListUnhealthyResponse {
            unhealthy_tenant_shards,
        },
    )
 }
 async fn handle_metadata_health_list_outdated(
    mut req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let list_outdated_req = json_request::<MetadataHealthListOutdatedRequest>(&mut req).await?;
    let state = get_state(&req);
    let health_records = state
        .service
        .metadata_health_list_outdated(list_outdated_req.not_scrubbed_for)
        .await?;
    json_response(
        StatusCode::OK,
        MetadataHealthListOutdatedResponse { health_records },
    )
 }
 async fn handle_tenant_shard_split(
    service: Arc<Service>,
    mut req: Request<Body>,
@@ -653,29 +478,6 @@ async fn handle_tenant_shard_migrate(
    )
 }
 async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    let update_req = json_request::<TenantPolicyRequest>(&mut req).await?;
    let state = get_state(&req);
    json_response(
        StatusCode::OK,
        state
            .service
            .tenant_update_policy(tenant_id, update_req)
            .await?,
    )
 }
 async fn handle_step_down(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    json_response(StatusCode::OK, state.service.step_down().await)
 }
 async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;
@@ -685,18 +487,6 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
    json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
 }
 async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;
    let state = get_state(&req);
    json_response(
        StatusCode::OK,
        state.service.tenant_import(tenant_id).await?,
    )
 }
 async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
@@ -719,14 +509,6 @@ async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, state.service.consistency_check().await?)
 }
 async fn handle_reconcile_all(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;
    let state = get_state(&req);
    json_response(StatusCode::OK, state.service.reconcile_all_now().await?)
 }
 /// Status endpoint is just used for checking that our HTTP listener is up
 async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(StatusCode::OK, ())
@@ -783,17 +565,9 @@ where
    .await
 }
 /// Check if the required scope is held in the request's token, or if the request has
 /// a token with 'admin' scope then always permit it.
 fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
    check_permission_with(request, |claims| {
-        match crate::auth::check_permission(claims, required_scope) {
+        crate::auth::check_permission(claims, required_scope)
            Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
                Ok(()) => Ok(()),
                Err(_) => Err(e),
            },
            Ok(()) => Ok(()),
        }
    })
 }
@@ -803,47 +577,6 @@ struct RequestMeta {
    at: Instant,
 }
 pub fn prologue_leadership_status_check_middleware<
    B: hyper::body::HttpBody + Send + Sync + 'static,
 >() -> Middleware<B, ApiError> {
    Middleware::pre(move |req| async move {
        let state = get_state(&req);
        let leadership_status = state.service.get_leadership_status();
        enum AllowedRoutes<'a> {
            All,
            Some(Vec<&'a str>),
        }
        let allowed_routes = match leadership_status {
            LeadershipStatus::Leader => AllowedRoutes::All,
            LeadershipStatus::SteppedDown => {
                // TODO: does it make sense to allow /status here?
                AllowedRoutes::Some(["/control/v1/step_down", "/status", "/metrics"].to_vec())
            }
            LeadershipStatus::Candidate => {
                AllowedRoutes::Some(["/ready", "/status", "/metrics"].to_vec())
            }
        };
        let uri = req.uri().to_string();
        match allowed_routes {
            AllowedRoutes::All => Ok(req),
            AllowedRoutes::Some(allowed) if allowed.contains(&uri.as_str()) => Ok(req),
            _ => {
                tracing::info!(
                    "Request {} not allowed due to current leadership state",
                    req.uri()
                );
                Err(ApiError::ResourceUnavailable(
                    format!("Current leadership status is {leadership_status}").into(),
                ))
            }
        }
    })
 }
 fn prologue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
 ) -> Middleware<B, ApiError> {
    Middleware::pre(move |req| async move {
@@ -894,11 +627,10 @@ fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
    })
 }
-pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+pub async fn measured_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";
-    let state = get_state(&req);
+    let payload = crate::metrics::METRICS_REGISTRY.encode();
    let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
    let response = Response::builder()
        .status(200)
        .header(CONTENT_TYPE, TEXT_FORMAT)
@@ -927,10 +659,8 @@ where
 pub fn make_router(
    service: Arc<Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
    build_info: BuildInfo,
 ) -> RouterBuilder<hyper::Body, ApiError> {
    let mut router = endpoint::make_router()
        .middleware(prologue_leadership_status_check_middleware())
        .middleware(prologue_metrics_middleware())
        .middleware(epilogue_metrics_middleware());
    if auth.is_some() {
@@ -945,7 +675,7 @@ pub fn make_router(
    }
    router
-        .data(Arc::new(HttpState::new(service, auth, build_info)))
+        .data(Arc::new(HttpState::new(service, auth)))
        .get("/metrics", |r| {
            named_request_span(r, measured_metrics_handler, RequestName("metrics"))
        })
@@ -976,13 +706,6 @@ pub fn make_router(
        .post("/debug/v1/node/:node_id/drop", |r| {
            named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
        })
        .post("/debug/v1/tenant/:tenant_id/import", |r| {
            named_request_span(
                r,
                handle_tenant_import,
                RequestName("debug_v1_tenant_import"),
            )
        })
        .get("/debug/v1/tenant", |r| {
            named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
        })
@@ -1003,9 +726,6 @@ pub fn make_router(
                RequestName("debug_v1_consistency_check"),
            )
        })
        .post("/debug/v1/reconcile_all", |r| {
            request_span(r, handle_reconcile_all)
        })
        .put("/debug/v1/failpoints", |r| {
            request_span(r, |r| failpoints_handler(r, CancellationToken::new()))
        })
@@ -1013,9 +733,6 @@ pub fn make_router(
        .post("/control/v1/node", |r| {
            named_request_span(r, handle_node_register, RequestName("control_v1_node"))
        })
        .delete("/control/v1/node/:node_id", |r| {
            named_request_span(r, handle_node_delete, RequestName("control_v1_node_delete"))
        })
        .get("/control/v1/node", |r| {
            named_request_span(r, handle_node_list, RequestName("control_v1_node"))
        })
@@ -1026,54 +743,6 @@ pub fn make_router(
                RequestName("control_v1_node_config"),
            )
        })
        .get("/control/v1/node/:node_id", |r| {
            named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
        })
        .get("/control/v1/leader", |r| {
            named_request_span(r, handle_get_leader, RequestName("control_v1_get_leader"))
        })
        .put("/control/v1/node/:node_id/drain", |r| {
            named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
        })
        .delete("/control/v1/node/:node_id/drain", |r| {
            named_request_span(
                r,
                handle_cancel_node_drain,
                RequestName("control_v1_cancel_node_drain"),
            )
        })
        .put("/control/v1/node/:node_id/fill", |r| {
            named_request_span(r, handle_node_fill, RequestName("control_v1_node_fill"))
        })
        .delete("/control/v1/node/:node_id/fill", |r| {
            named_request_span(
                r,
                handle_cancel_node_fill,
                RequestName("control_v1_cancel_node_fill"),
            )
        })
        // Metadata health operations
        .post("/control/v1/metadata_health/update", |r| {
            named_request_span(
                r,
                handle_metadata_health_update,
                RequestName("control_v1_metadata_health_update"),
            )
        })
        .get("/control/v1/metadata_health/unhealthy", |r| {
            named_request_span(
                r,
                handle_metadata_health_list_unhealthy,
                RequestName("control_v1_metadata_health_list_unhealthy"),
            )
        })
        .post("/control/v1/metadata_health/outdated", |r| {
            named_request_span(
                r,
                handle_metadata_health_list_outdated,
                RequestName("control_v1_metadata_health_list_outdated"),
            )
        })
        // Tenant Shard operations
        .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {
            tenant_service_handler(
@@ -1096,19 +765,6 @@ pub fn make_router(
                RequestName("control_v1_tenant_describe"),
            )
        })
        .get("/control/v1/tenant", |r| {
            tenant_service_handler(r, handle_tenant_list, RequestName("control_v1_tenant_list"))
        })
        .put("/control/v1/tenant/:tenant_id/policy", |r| {
            named_request_span(
                r,
                handle_tenant_update_policy,
                RequestName("control_v1_tenant_policy"),
            )
        })
        .put("/control/v1/step_down", |r| {
            named_request_span(r, handle_step_down, RequestName("control_v1_step_down"))
        })
        // Tenant operations
        // The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into
        // this service to manage tenants that actually consist of many tenant shards, as if they are a single entity.
@@ -1160,17 +816,7 @@ pub fn make_router(
                RequestName("v1_tenant_timeline"),
            )
        })
-        .put(
+        // Tenant detail GET passthrough to shard zero
            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor",
            |r| {
                tenant_service_handler(
                    r,
                    handle_tenant_timeline_detach_ancestor,
                    RequestName("v1_tenant_timeline_detach_ancestor"),
                )
            },
        )
        // Tenant detail GET passthrough to shard zero:
        .get("/v1/tenant/:tenant_id", |r| {
            tenant_service_handler(
                r,
@@ -1178,14 +824,13 @@ pub fn make_router(
                RequestName("v1_tenant_passthrough"),
            )
        })
-        // The `*` in the  URL is a wildcard: any tenant/timeline GET APIs on the pageserver
+        // Timeline GET passthrough to shard zero.  Note that the `*` in the URL is a wildcard: any future
-        // are implicitly exposed here.  This must be last in the list to avoid
+        // timeline GET APIs will be implicitly included.
-        // taking precedence over other GET methods we might implement by hand.
+        .get("/v1/tenant/:tenant_id/timeline*", |r| {
        .get("/v1/tenant/:tenant_id/*", |r| {
            tenant_service_handler(
                r,
                handle_tenant_timeline_passthrough,
-                RequestName("v1_tenant_passthrough"),
+                RequestName("v1_tenant_timeline_passthrough"),
            )
        })
 }
--- a/control_plane/attachment_service/src/id_lock_map.rs
+++ b/control_plane/attachment_service/src/id_lock_map.rs
@@ -0,0 +1,54 @@
 use std::{collections::HashMap, sync::Arc};
 /// A map of locks covering some arbitrary identifiers. Useful if you have a collection of objects but don't
 /// want to embed a lock in each one, or if your locking granularity is different to your object granularity.
 /// For example, used in the storage controller where the objects are tenant shards, but sometimes locking
 /// is needed at a tenant-wide granularity.
 pub(crate) struct IdLockMap<T>
 where
    T: Eq + PartialEq + std::hash::Hash,
 {
    /// A synchronous lock for getting/setting the async locks that our callers will wait on.
    entities: std::sync::Mutex<std::collections::HashMap<T, Arc<tokio::sync::RwLock<()>>>>,
 }
 impl<T> IdLockMap<T>
 where
    T: Eq + PartialEq + std::hash::Hash,
 {
    pub(crate) fn shared(
        &self,
        key: T,
    ) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockReadGuard<()>> {
        let mut locked = self.entities.lock().unwrap();
        let entry = locked.entry(key).or_default();
        entry.clone().read_owned()
    }
    pub(crate) fn exclusive(
        &self,
        key: T,
    ) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockWriteGuard<()>> {
        let mut locked = self.entities.lock().unwrap();
        let entry = locked.entry(key).or_default();
        entry.clone().write_owned()
    }
    /// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
    /// periodic housekeeping to avoid the map growing indefinitely
    pub(crate) fn housekeeping(&self) {
        let mut locked = self.entities.lock().unwrap();
        locked.retain(|_k, lock| lock.try_write().is_err())
    }
 }
 impl<T> Default for IdLockMap<T>
 where
    T: Eq + PartialEq + std::hash::Hash,
 {
    fn default() -> Self {
        Self {
            entities: std::sync::Mutex::new(HashMap::new()),
        }
    }
 }
--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -2,23 +2,19 @@ use serde::Serialize;
 use utils::seqwait::MonotonicCounter;
 mod auth;
 mod background_node_operations;
 mod compute_hook;
 mod drain_utils;
 mod heartbeater;
 pub mod http;
 mod id_lock_map;
 mod leadership;
 pub mod metrics;
 mod node;
 mod pageserver_client;
 mod peer_client;
 pub mod persistence;
 mod reconciler;
 mod scheduler;
 mod schema;
 pub mod service;
-mod tenant_shard;
+mod tenant_state;
 #[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
 struct Sequence(u64);
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -1,31 +1,26 @@
 use anyhow::{anyhow, Context};
 use attachment_service::http::make_router;
 use attachment_service::metrics::preinitialize_metrics;
 use attachment_service::persistence::Persistence;
 use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
 use camino::Utf8PathBuf;
 use clap::Parser;
-use hyper::Uri;
+use diesel::Connection;
 use metrics::launch_timestamp::LaunchTimestamp;
 use metrics::BuildInfo;
 use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::Duration;
 use storage_controller::http::make_router;
 use storage_controller::metrics::preinitialize_metrics;
 use storage_controller::persistence::Persistence;
 use storage_controller::service::chaos_injector::ChaosInjector;
 use storage_controller::service::{
    Config, Service, MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT,
    RECONCILER_CONCURRENCY_DEFAULT,
 };
 use tokio::signal::unix::SignalKind;
 use tokio_util::sync::CancellationToken;
 use tracing::Instrument;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};
 use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version, tcp_listener};
 project_git_version!(GIT_VERSION);
 project_build_tag!(BUILD_TAG);
 use diesel_migrations::{embed_migrations, EmbeddedMigrations};
 pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
 #[derive(Parser)]
 #[command(author, version, about, long_about = None)]
 #[command(arg_required_else_help(true))]
@@ -47,14 +42,15 @@ struct Cli {
    #[arg(long)]
    control_plane_jwt_token: Option<String>,
    #[arg(long)]
    peer_jwt_token: Option<String>,
    /// URL to control plane compute notification endpoint
    #[arg(long)]
    compute_hook_url: Option<String>,
-    /// URL to connect to postgres, like postgresql://localhost:1234/storage_controller
+    /// Path to the .json file to store state (will be created if it doesn't exist)
    #[arg(short, long)]
    path: Option<Utf8PathBuf>,
    /// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
    #[arg(long)]
    database_url: Option<String>,
@@ -64,46 +60,7 @@ struct Cli {
    /// Grace period before marking unresponsive pageserver offline
    #[arg(long)]
-    max_offline_interval: Option<humantime::Duration>,
+    max_unavailable_interval: Option<humantime::Duration>,
    /// More tolerant grace period before marking unresponsive pagserver offline used
    /// around pageserver restarts
    #[arg(long)]
    max_warming_up_interval: Option<humantime::Duration>,
    /// Size threshold for automatically splitting shards (disabled by default)
    #[arg(long)]
    split_threshold: Option<u64>,
    /// Maximum number of reconcilers that may run in parallel
    #[arg(long)]
    reconciler_concurrency: Option<usize>,
    /// How long to wait for the initial database connection to be available.
    #[arg(long, default_value = "5s")]
    db_connect_timeout: humantime::Duration,
    #[arg(long, default_value = "false")]
    start_as_candidate: bool,
    // TODO: make this mandatory once the helm chart gets updated
    #[arg(long)]
    address_for_peers: Option<Uri>,
    /// `neon_local` sets this to the path of the neon_local repo dir.
    /// Only relevant for testing.
    // TODO: make `cfg(feature = "testing")`
    #[arg(long)]
    neon_local_repo_dir: Option<PathBuf>,
    /// Chaos testing
    #[arg(long)]
    chaos_interval: Option<humantime::Duration>,
    // Maximum acceptable lag for the secondary location while draining
    // a pageserver
    #[arg(long)]
    max_secondary_lag_bytes: Option<u64>,
 }
 enum StrictMode {
@@ -129,28 +86,28 @@ struct Secrets {
    public_key: Option<JwtAuth>,
    jwt_token: Option<String>,
    control_plane_jwt_token: Option<String>,
    peer_jwt_token: Option<String>,
 }
 impl Secrets {
    const DATABASE_URL_ENV: &'static str = "DATABASE_URL";
    const PAGESERVER_JWT_TOKEN_ENV: &'static str = "PAGESERVER_JWT_TOKEN";
    const CONTROL_PLANE_JWT_TOKEN_ENV: &'static str = "CONTROL_PLANE_JWT_TOKEN";
    const PEER_JWT_TOKEN_ENV: &'static str = "PEER_JWT_TOKEN";
    const PUBLIC_KEY_ENV: &'static str = "PUBLIC_KEY";
    /// Load secrets from, in order of preference:
    /// - CLI args if database URL is provided on the CLI
    /// - Environment variables if DATABASE_URL is set.
    /// - AWS Secrets Manager secrets
    async fn load(args: &Cli) -> anyhow::Result<Self> {
-        let Some(database_url) = Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV)
+        let Some(database_url) =
            Self::load_secret(&args.database_url, Self::DATABASE_URL_ENV).await
        else {
            anyhow::bail!(
                "Database URL is not set (set `--database-url`, or `DATABASE_URL` environment)"
            )
        };
-        let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV) {
+        let public_key = match Self::load_secret(&args.public_key, Self::PUBLIC_KEY_ENV).await {
            Some(v) => Some(JwtAuth::from_key(v).context("Loading public key")?),
            None => None,
        };
@@ -158,18 +115,18 @@ impl Secrets {
        let this = Self {
            database_url,
            public_key,
-            jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV),
+            jwt_token: Self::load_secret(&args.jwt_token, Self::PAGESERVER_JWT_TOKEN_ENV).await,
            control_plane_jwt_token: Self::load_secret(
                &args.control_plane_jwt_token,
                Self::CONTROL_PLANE_JWT_TOKEN_ENV,
-            ),
+            )
-            peer_jwt_token: Self::load_secret(&args.peer_jwt_token, Self::PEER_JWT_TOKEN_ENV),
+            .await,
        };
        Ok(this)
    }
-    fn load_secret(cli: &Option<String>, env_name: &str) -> Option<String> {
+    async fn load_secret(cli: &Option<String>, env_name: &str) -> Option<String> {
        if let Some(v) = cli {
            Some(v.clone())
        } else if let Ok(v) = std::env::var(env_name) {
@@ -180,24 +137,24 @@ impl Secrets {
    }
 }
 /// Execute the diesel migrations that are built into this binary
 async fn migration_run(database_url: &str) -> anyhow::Result<()> {
    use diesel::PgConnection;
    use diesel_migrations::{HarnessWithOutput, MigrationHarness};
    let mut conn = PgConnection::establish(database_url)?;
    HarnessWithOutput::write_to_stdout(&mut conn)
        .run_pending_migrations(MIGRATIONS)
        .map(|_| ())
        .map_err(|e| anyhow::anyhow!(e))?;
    Ok(())
 }
 fn main() -> anyhow::Result<()> {
-    logging::init(
+    let default_panic = std::panic::take_hook();
        LogFormat::Plain,
        logging::TracingErrorLayerEnablement::Disabled,
        logging::Output::Stdout,
    )?;
    // log using tracing so we don't get confused output by default hook writing to stderr
    utils::logging::replace_panic_hook_with_tracing_panic_hook().forget();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
    let hook = std::panic::take_hook();
    std::panic::set_hook(Box::new(move |info| {
-        // let sentry send a message (and flush)
+        default_panic(info);
        // and trace the error
        hook(info);
        std::process::exit(1);
    }));
@@ -214,22 +171,24 @@ fn main() -> anyhow::Result<()> {
 async fn async_main() -> anyhow::Result<()> {
    let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));
    logging::init(
        LogFormat::Plain,
        logging::TracingErrorLayerEnablement::Disabled,
        logging::Output::Stdout,
    )?;
    preinitialize_metrics();
    let args = Cli::parse();
    tracing::info!(
-        "version: {}, launch_timestamp: {}, build_tag {}, listening on {}",
+        "version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
        GIT_VERSION,
        launch_ts.to_string(),
        BUILD_TAG,
        args.path.as_ref().unwrap_or(&Utf8PathBuf::from("<none>")),
        args.listen
    );
    let build_info = BuildInfo {
        revision: GIT_VERSION,
        build_tag: BUILD_TAG,
    };
    let strict_mode = if args.dev {
        StrictMode::Dev
    } else {
@@ -269,31 +228,20 @@ async fn async_main() -> anyhow::Result<()> {
    let config = Config {
        jwt_token: secrets.jwt_token,
        control_plane_jwt_token: secrets.control_plane_jwt_token,
        peer_jwt_token: secrets.peer_jwt_token,
        compute_hook_url: args.compute_hook_url,
-        max_offline_interval: args
+        max_unavailable_interval: args
-            .max_offline_interval
+            .max_unavailable_interval
            .map(humantime::Duration::into)
-            .unwrap_or(MAX_OFFLINE_INTERVAL_DEFAULT),
+            .unwrap_or(MAX_UNAVAILABLE_INTERVAL_DEFAULT),
        max_warming_up_interval: args
            .max_warming_up_interval
            .map(humantime::Duration::into)
            .unwrap_or(MAX_WARMING_UP_INTERVAL_DEFAULT),
        reconciler_concurrency: args
            .reconciler_concurrency
            .unwrap_or(RECONCILER_CONCURRENCY_DEFAULT),
        split_threshold: args.split_threshold,
        neon_local_repo_dir: args.neon_local_repo_dir,
        max_secondary_lag_bytes: args.max_secondary_lag_bytes,
        address_for_peers: args.address_for_peers,
        start_as_candidate: args.start_as_candidate,
        http_service_port: args.listen.port() as i32,
    };
-    // Validate that we can connect to the database
+    // After loading secrets & config, but before starting anything else, apply database migrations
-    Persistence::await_connection(&secrets.database_url, args.db_connect_timeout.into()).await?;
+    migration_run(&secrets.database_url)
        .await
        .context("Running database migrations")?;
-    let persistence = Arc::new(Persistence::new(secrets.database_url));
+    let json_path = args.path;
    let persistence = Arc::new(Persistence::new(secrets.database_url, json_path.clone()));
    let service = Service::spawn(config, persistence.clone()).await?;
@@ -302,7 +250,7 @@ async fn async_main() -> anyhow::Result<()> {
    let auth = secrets
        .public_key
        .map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
-    let router = make_router(service.clone(), auth, build_info)
+    let router = make_router(service.clone(), auth)
        .build()
        .map_err(|err| anyhow!(err))?;
    let router_service = utils::http::RouterService::new(router).unwrap();
@@ -320,22 +268,6 @@ async fn async_main() -> anyhow::Result<()> {
    tracing::info!("Serving on {0}", args.listen);
    let server_task = tokio::task::spawn(server);
    let chaos_task = args.chaos_interval.map(|interval| {
        let service = service.clone();
        let cancel = CancellationToken::new();
        let cancel_bg = cancel.clone();
        (
            tokio::task::spawn(
                async move {
                    let mut chaos_injector = ChaosInjector::new(service, interval.into());
                    chaos_injector.run(cancel_bg).await
                }
                .instrument(tracing::info_span!("chaos_injector")),
            ),
            cancel,
        )
    });
    // Wait until we receive a signal
    let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?;
    let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?;
@@ -347,28 +279,21 @@ async fn async_main() -> anyhow::Result<()> {
    }
    tracing::info!("Terminating on signal");
-    // Stop HTTP server first, so that we don't have to service requests
+    if json_path.is_some() {
-    // while shutting down Service.
+        // Write out a JSON dump on shutdown: this is used in compat tests to avoid passing
-    server_shutdown.cancel();
+        // full postgres dumps around.
-    match tokio::time::timeout(Duration::from_secs(5), server_task).await {
+        if let Err(e) = persistence.write_tenants_json().await {
-        Ok(Ok(_)) => {
+            tracing::error!("Failed to write JSON on shutdown: {e}")
            tracing::info!("Joined HTTP server task");
        }
        Ok(Err(e)) => {
            tracing::error!("Error joining HTTP server task: {e}")
        }
        Err(_) => {
            tracing::warn!("Timed out joining HTTP server task");
            // We will fall through and shut down the service anyway, any request handlers
            // in flight will experience cancellation & their clients will see a torn connection.
        }
    }
-    // If we were injecting chaos, stop that so that we're not calling into Service while it shuts down
+    // Stop HTTP server first, so that we don't have to service requests
-    if let Some((chaos_jh, chaos_cancel)) = chaos_task {
+    // while shutting down Service
-        chaos_cancel.cancel();
+    server_shutdown.cancel();
-        chaos_jh.await.ok();
+    if let Err(e) = server_task.await {
        tracing::error!("Error joining HTTP server task: {e}")
    }
    tracing::info!("Joined HTTP server task");
    service.shutdown().await;
    tracing::info!("Service shutdown complete");
--- a/control_plane/attachment_service/src/metrics.rs
+++ b/control_plane/attachment_service/src/metrics.rs
@@ -8,16 +8,14 @@
 //! The rest of the code defines label group types and deals with converting outer types to labels.
 //!
 use bytes::Bytes;
-use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
+use measured::{
-use metrics::NeonMetrics;
+    label::{LabelValue, StaticLabelSet},
    FixedCardinalityLabel, MetricGroup,
 };
 use once_cell::sync::Lazy;
 use std::sync::Mutex;
 use strum::IntoEnumIterator;
-use crate::{
+use crate::persistence::{DatabaseError, DatabaseOperation};
    persistence::{DatabaseError, DatabaseOperation},
    service::LeadershipStatus,
 };
 pub(crate) static METRICS_REGISTRY: Lazy<StorageControllerMetrics> =
    Lazy::new(StorageControllerMetrics::default);
@@ -28,28 +26,21 @@ pub fn preinitialize_metrics() {
 pub(crate) struct StorageControllerMetrics {
    pub(crate) metrics_group: StorageControllerMetricGroup,
-    encoder: Mutex<measured::text::BufferedTextEncoder>,
+    encoder: Mutex<measured::text::TextEncoder>,
 }
 #[derive(measured::MetricGroup)]
 #[metric(new())]
 pub(crate) struct StorageControllerMetricGroup {
    /// Count of how many times we spawn a reconcile task
    pub(crate) storage_controller_reconcile_spawn: measured::Counter,
    /// Reconciler tasks completed, broken down by success/failure/cancelled
    pub(crate) storage_controller_reconcile_complete:
        measured::CounterVec<ReconcileCompleteLabelGroupSet>,
    /// Count of how many times we make an optimization change to a tenant's scheduling
    pub(crate) storage_controller_schedule_optimization: measured::Counter,
    /// HTTP request status counters for handled requests
    pub(crate) storage_controller_http_request_status:
        measured::CounterVec<HttpRequestStatusLabelGroupSet>,
    /// HTTP request handler latency across all status codes
    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_http_request_latency:
        measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,
@@ -61,7 +52,6 @@ pub(crate) struct StorageControllerMetricGroup {
    /// Latency of HTTP requests to the pageserver, broken down by pageserver
    /// node id, request name and method. This include both successful and unsuccessful
    /// requests.
    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_pageserver_request_latency:
        measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
@@ -73,7 +63,6 @@ pub(crate) struct StorageControllerMetricGroup {
    /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
    /// node id, request name and method. This include both successful and unsuccessful
    /// requests.
    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_passthrough_request_latency:
        measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,
@@ -82,36 +71,75 @@ pub(crate) struct StorageControllerMetricGroup {
        measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,
    /// Latency of database queries, broken down by operation.
    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_database_query_latency:
        measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
    pub(crate) storage_controller_leadership_status: measured::GaugeVec<LeadershipStatusGroupSet>,
 }
 impl StorageControllerMetrics {
-    pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
+    pub(crate) fn encode(&self) -> Bytes {
        let mut encoder = self.encoder.lock().unwrap();
-        neon_metrics
+        self.metrics_group.collect_into(&mut *encoder);
            .collect_group_into(&mut *encoder)
            .unwrap_or_else(|infallible| match infallible {});
        self.metrics_group
            .collect_group_into(&mut *encoder)
            .unwrap_or_else(|infallible| match infallible {});
        encoder.finish()
    }
 }
 impl Default for StorageControllerMetrics {
    fn default() -> Self {
        let mut metrics_group = StorageControllerMetricGroup::new();
        metrics_group
            .storage_controller_reconcile_complete
            .init_all_dense();
        Self {
-            metrics_group,
+            metrics_group: StorageControllerMetricGroup::new(),
-            encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
+            encoder: Mutex::new(measured::text::TextEncoder::new()),
        }
    }
 }
 impl StorageControllerMetricGroup {
    pub(crate) fn new() -> Self {
        Self {
            storage_controller_reconcile_spawn: measured::Counter::new(),
            storage_controller_reconcile_complete: measured::CounterVec::new(
                ReconcileCompleteLabelGroupSet {
                    status: StaticLabelSet::new(),
                },
            ),
            storage_controller_http_request_status: measured::CounterVec::new(
                HttpRequestStatusLabelGroupSet {
                    path: lasso::ThreadedRodeo::new(),
                    method: StaticLabelSet::new(),
                    status: StaticLabelSet::new(),
                },
            ),
            storage_controller_http_request_latency: measured::HistogramVec::new(
                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
            ),
            storage_controller_pageserver_request_error: measured::CounterVec::new(
                PageserverRequestLabelGroupSet {
                    pageserver_id: lasso::ThreadedRodeo::new(),
                    path: lasso::ThreadedRodeo::new(),
                    method: StaticLabelSet::new(),
                },
            ),
            storage_controller_pageserver_request_latency: measured::HistogramVec::new(
                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
            ),
            storage_controller_passthrough_request_error: measured::CounterVec::new(
                PageserverRequestLabelGroupSet {
                    pageserver_id: lasso::ThreadedRodeo::new(),
                    path: lasso::ThreadedRodeo::new(),
                    method: StaticLabelSet::new(),
                },
            ),
            storage_controller_passthrough_request_latency: measured::HistogramVec::new(
                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
            ),
            storage_controller_database_query_error: measured::CounterVec::new(
                DatabaseQueryErrorLabelGroupSet {
                    operation: StaticLabelSet::new(),
                    error_type: StaticLabelSet::new(),
                },
            ),
            storage_controller_database_query_latency: measured::HistogramVec::new(
                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
            ),
        }
    }
 }
@@ -125,7 +153,7 @@ pub(crate) struct ReconcileCompleteLabelGroup {
 #[derive(measured::LabelGroup)]
 #[label(set = HttpRequestStatusLabelGroupSet)]
 pub(crate) struct HttpRequestStatusLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
    pub(crate) status: StatusCode,
@@ -134,21 +162,40 @@ pub(crate) struct HttpRequestStatusLabelGroup<'a> {
 #[derive(measured::LabelGroup)]
 #[label(set = HttpRequestLatencyLabelGroupSet)]
 pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
 }
 impl Default for HttpRequestLatencyLabelGroupSet {
    fn default() -> Self {
        Self {
            path: lasso::ThreadedRodeo::new(),
            method: StaticLabelSet::new(),
        }
    }
 }
 #[derive(measured::LabelGroup, Clone)]
 #[label(set = PageserverRequestLabelGroupSet)]
 pub(crate) struct PageserverRequestLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) pageserver_id: &'a str,
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
 }
 impl Default for PageserverRequestLabelGroupSet {
    fn default() -> Self {
        Self {
            pageserver_id: lasso::ThreadedRodeo::new(),
            path: lasso::ThreadedRodeo::new(),
            method: StaticLabelSet::new(),
        }
    }
 }
 #[derive(measured::LabelGroup)]
 #[label(set = DatabaseQueryErrorLabelGroupSet)]
 pub(crate) struct DatabaseQueryErrorLabelGroup {
@@ -162,13 +209,7 @@ pub(crate) struct DatabaseQueryLatencyLabelGroup {
    pub(crate) operation: DatabaseOperation,
 }
-#[derive(measured::LabelGroup)]
+#[derive(FixedCardinalityLabel)]
 #[label(set = LeadershipStatusGroupSet)]
 pub(crate) struct LeadershipStatusGroup {
    pub(crate) status: LeadershipStatus,
 }
 #[derive(FixedCardinalityLabel, Clone, Copy)]
 pub(crate) enum ReconcileOutcome {
    #[label(rename = "ok")]
    Success,
@@ -176,7 +217,7 @@ pub(crate) enum ReconcileOutcome {
    Cancel,
 }
-#[derive(FixedCardinalityLabel, Copy, Clone)]
+#[derive(FixedCardinalityLabel, Clone)]
 pub(crate) enum Method {
    Get,
    Put,
@@ -201,12 +242,11 @@ impl From<hyper::Method> for Method {
    }
 }
 #[derive(Clone, Copy)]
 pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);
 impl LabelValue for StatusCode {
    fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
-        v.write_int(self.0.as_u16() as i64)
+        v.write_int(self.0.as_u16() as u64)
    }
 }
@@ -224,13 +264,12 @@ impl FixedCardinalityLabel for StatusCode {
    }
 }
-#[derive(FixedCardinalityLabel, Clone, Copy)]
+#[derive(FixedCardinalityLabel)]
 pub(crate) enum DatabaseErrorLabel {
    Query,
    Connection,
    ConnectionPool,
    Logical,
    Migration,
 }
 impl DatabaseError {
@@ -240,22 +279,6 @@ impl DatabaseError {
            Self::Connection(_) => DatabaseErrorLabel::Connection,
            Self::ConnectionPool(_) => DatabaseErrorLabel::ConnectionPool,
            Self::Logical(_) => DatabaseErrorLabel::Logical,
            Self::Migration(_) => DatabaseErrorLabel::Migration,
        }
    }
 }
 /// Update the leadership status metric gauges to reflect the requested status
 pub(crate) fn update_leadership_status(status: LeadershipStatus) {
    let status_metric = &METRICS_REGISTRY
        .metrics_group
        .storage_controller_leadership_status;
    for s in LeadershipStatus::iter() {
        if s == status {
            status_metric.set(LeadershipStatusGroup { status: s }, 1);
        } else {
            status_metric.set(LeadershipStatusGroup { status: s }, 0);
        }
    }
 }
--- a/control_plane/attachment_service/src/node.rs
+++ b/control_plane/attachment_service/src/node.rs
@@ -1,14 +1,13 @@
 use std::{str::FromStr, time::Duration};
 use hyper::StatusCode;
 use pageserver_api::{
    controller_api::{
-        NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
+        NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, TenantLocateResponseShard,
        TenantLocateResponseShard,
    },
    shard::TenantShardId,
 };
 use pageserver_client::mgmt_api;
 use reqwest::StatusCode;
 use serde::Serialize;
 use tokio_util::sync::CancellationToken;
 use utils::{backoff, id::NodeId};
@@ -46,8 +45,6 @@ pub(crate) struct Node {
 /// whether/how they changed it.
 pub(crate) enum AvailabilityTransition {
    ToActive,
    ToWarmingUpFromActive,
    ToWarmingUpFromOffline,
    ToOffline,
    Unchanged,
 }
@@ -61,10 +58,6 @@ impl Node {
        self.id
    }
    pub(crate) fn get_scheduling(&self) -> NodeSchedulingPolicy {
        self.scheduling
    }
    pub(crate) fn set_scheduling(&mut self, scheduling: NodeSchedulingPolicy) {
        self.scheduling = scheduling
    }
@@ -92,52 +85,36 @@ impl Node {
        }
    }
    pub(crate) fn get_availability(&self) -> &NodeAvailability {
        &self.availability
    }
    pub(crate) fn set_availability(&mut self, availability: NodeAvailability) {
-        use AvailabilityTransition::*;
+        match self.get_availability_transition(availability) {
-        use NodeAvailability::WarmingUp;
+            AvailabilityTransition::ToActive => {
        match self.get_availability_transition(&availability) {
            ToActive => {
                // Give the node a new cancellation token, effectively resetting it to un-cancelled.  Any
                // users of previously-cloned copies of the node will still see the old cancellation
                // state.  For example, Reconcilers in flight will have to complete and be spawned
                // again to realize that the node has become available.
                self.cancel = CancellationToken::new();
            }
-            ToOffline | ToWarmingUpFromActive => {
+            AvailabilityTransition::ToOffline => {
                // Fire the node's cancellation token to cancel any in-flight API requests to it
                self.cancel.cancel();
            }
-            Unchanged | ToWarmingUpFromOffline => {}
+            AvailabilityTransition::Unchanged => {}
        }
        if let (WarmingUp(crnt), WarmingUp(proposed)) = (&self.availability, &availability) {
            self.availability = WarmingUp(std::cmp::max(*crnt, *proposed));
        } else {
            self.availability = availability;
        }
        self.availability = availability;
    }
    /// Without modifying the availability of the node, convert the intended availability
    /// into a description of the transition.
    pub(crate) fn get_availability_transition(
        &self,
-        availability: &NodeAvailability,
+        availability: NodeAvailability,
    ) -> AvailabilityTransition {
        use AvailabilityTransition::*;
        use NodeAvailability::*;
-        match (&self.availability, availability) {
+        match (self.availability, availability) {
            (Offline, Active(_)) => ToActive,
            (Active(_), Offline) => ToOffline,
            (Active(_), WarmingUp(_)) => ToWarmingUpFromActive,
            (WarmingUp(_), Offline) => ToOffline,
            (WarmingUp(_), Active(_)) => ToActive,
            (Offline, WarmingUp(_)) => ToWarmingUpFromOffline,
            _ => Unchanged,
        }
    }
@@ -153,17 +130,16 @@ impl Node {
    /// Is this node elegible to have work scheduled onto it?
    pub(crate) fn may_schedule(&self) -> MaySchedule {
-        let utilization = match &self.availability {
+        let score = match self.availability {
-            NodeAvailability::Active(u) => u.clone(),
+            NodeAvailability::Active(score) => score,
-            NodeAvailability::Offline | NodeAvailability::WarmingUp(_) => return MaySchedule::No,
+            NodeAvailability::Offline => return MaySchedule::No,
        };
        match self.scheduling {
-            NodeSchedulingPolicy::Active => MaySchedule::Yes(utilization),
+            NodeSchedulingPolicy::Active => MaySchedule::Yes(score),
            NodeSchedulingPolicy::Draining => MaySchedule::No,
-            NodeSchedulingPolicy::Filling => MaySchedule::Yes(utilization),
+            NodeSchedulingPolicy::Filling => MaySchedule::Yes(score),
            NodeSchedulingPolicy::Pause => MaySchedule::No,
            NodeSchedulingPolicy::PauseForRestart => MaySchedule::No,
        }
    }
@@ -180,7 +156,7 @@ impl Node {
            listen_http_port,
            listen_pg_addr,
            listen_pg_port,
-            scheduling: NodeSchedulingPolicy::Active,
+            scheduling: NodeSchedulingPolicy::Filling,
            availability: NodeAvailability::Offline,
            cancel: CancellationToken::new(),
        }
@@ -234,7 +210,7 @@ impl Node {
        fn is_fatal(e: &mgmt_api::Error) -> bool {
            use mgmt_api::Error::*;
            match e {
-                SendRequest(_) | ReceiveBody(_) | ReceiveErrorBody(_) => false,
+                ReceiveBody(_) | ReceiveErrorBody(_) => false,
                ApiError(StatusCode::SERVICE_UNAVAILABLE, _)
                | ApiError(StatusCode::GATEWAY_TIMEOUT, _)
                | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
@@ -280,19 +256,6 @@ impl Node {
        )
        .await
    }
    /// Generate the simplified API-friendly description of a node's state
    pub(crate) fn describe(&self) -> NodeDescribeResponse {
        NodeDescribeResponse {
            id: self.id,
            availability: self.availability.clone().into(),
            scheduling: self.scheduling,
            listen_http_addr: self.listen_http_addr.clone(),
            listen_http_port: self.listen_http_port,
            listen_pg_addr: self.listen_pg_addr.clone(),
            listen_pg_port: self.listen_pg_port,
        }
    }
 }
 impl std::fmt::Display for Node {
--- a/control_plane/attachment_service/src/pageserver_client.rs
+++ b/control_plane/attachment_service/src/pageserver_client.rs
@@ -1,15 +1,13 @@
 use pageserver_api::{
    models::{
-        detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse,
+        LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
        PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse,
        TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
        TopTenantShardsRequest, TopTenantShardsResponse,
    },
    shard::TenantShardId,
 };
 use pageserver_client::mgmt_api::{Client, Result};
 use reqwest::StatusCode;
-use utils::id::{NodeId, TenantId, TimelineId};
+use utils::id::{NodeId, TimelineId};
 /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
 /// controller to collect metrics in a non-intrusive manner.
@@ -90,18 +88,6 @@ impl PageserverClient {
        )
    }
    pub(crate) async fn tenant_scan_remote_storage(
        &self,
        tenant_id: TenantId,
    ) -> Result<TenantScanRemoteStorageResponse> {
        measured_request!(
            "tenant_scan_remote_storage",
            crate::metrics::Method::Get,
            &self.node_id_label,
            self.inner.tenant_scan_remote_storage(tenant_id).await
        )
    }
    pub(crate) async fn tenant_secondary_download(
        &self,
        tenant_id: TenantShardId,
@@ -115,27 +101,6 @@ impl PageserverClient {
        )
    }
    pub(crate) async fn tenant_secondary_status(
        &self,
        tenant_shard_id: TenantShardId,
    ) -> Result<SecondaryProgress> {
        measured_request!(
            "tenant_secondary_status",
            crate::metrics::Method::Get,
            &self.node_id_label,
            self.inner.tenant_secondary_status(tenant_shard_id).await
        )
    }
    pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
        measured_request!(
            "tenant_heatmap_upload",
            crate::metrics::Method::Post,
            &self.node_id_label,
            self.inner.tenant_heatmap_upload(tenant_id).await
        )
    }
    pub(crate) async fn location_config(
        &self,
        tenant_shard_id: TenantShardId,
@@ -227,21 +192,6 @@ impl PageserverClient {
        )
    }
    pub(crate) async fn timeline_detach_ancestor(
        &self,
        tenant_shard_id: TenantShardId,
        timeline_id: TimelineId,
    ) -> Result<AncestorDetached> {
        measured_request!(
            "timeline_detach_ancestor",
            crate::metrics::Method::Put,
            &self.node_id_label,
            self.inner
                .timeline_detach_ancestor(tenant_shard_id, timeline_id)
                .await
        )
    }
    pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
        measured_request!(
            "utilization",
@@ -250,16 +200,4 @@ impl PageserverClient {
            self.inner.get_utilization().await
        )
    }
    pub(crate) async fn top_tenant_shards(
        &self,
        request: TopTenantShardsRequest,
    ) -> Result<TopTenantShardsResponse> {
        measured_request!(
            "top_tenants",
            crate::metrics::Method::Post,
            &self.node_id_label,
            self.inner.top_tenant_shards(request).await
        )
    }
 }
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -2,14 +2,13 @@ pub(crate) mod split_state;
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::time::Duration;
 use std::time::Instant;
 use self::split_state::SplitState;
 use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use diesel::pg::PgConnection;
 use diesel::prelude::*;
 use diesel::Connection;
 use pageserver_api::controller_api::MetadataHealthRecord;
 use pageserver_api::controller_api::ShardSchedulingPolicy;
 use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
 use pageserver_api::models::TenantConfig;
 use pageserver_api::shard::ShardConfigError;
@@ -25,9 +24,6 @@ use crate::metrics::{
 };
 use crate::node::Node;
 use diesel_migrations::{embed_migrations, EmbeddedMigrations};
 const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
 /// ## What do we store?
 ///
 /// The storage controller service does not store most of its state durably.
@@ -57,6 +53,11 @@ const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations");
 /// we can UPDATE a node's scheduling mode reasonably quickly to mark a bad node offline.
 pub struct Persistence {
    connection_pool: diesel::r2d2::Pool<diesel::r2d2::ConnectionManager<PgConnection>>,
    // In test environments, we support loading+saving a JSON file.  This is temporary, for the benefit of
    // test_compatibility.py, so that we don't have to commit to making the database contents fully backward/forward
    // compatible just yet.
    json_path: Option<Utf8PathBuf>,
 }
 /// Legacy format, for use in JSON compat objects in test environment
@@ -75,11 +76,9 @@ pub(crate) enum DatabaseError {
    ConnectionPool(#[from] r2d2::Error),
    #[error("Logical error: {0}")]
    Logical(String),
    #[error("Migration error: {0}")]
    Migration(String),
 }
-#[derive(measured::FixedCardinalityLabel, Copy, Clone)]
+#[derive(measured::FixedCardinalityLabel, Clone)]
 pub(crate) enum DatabaseOperation {
    InsertNode,
    UpdateNode,
@@ -91,18 +90,11 @@ pub(crate) enum DatabaseOperation {
    Detach,
    ReAttach,
    IncrementGeneration,
    PeekGenerations,
    ListTenantShards,
    InsertTenantShards,
    UpdateTenantShard,
    DeleteTenant,
    UpdateTenantConfig,
    UpdateMetadataHealth,
    ListMetadataHealth,
    ListMetadataHealthUnhealthy,
    ListMetadataHealthOutdated,
    GetLeader,
    UpdateLeader,
 }
 #[must_use]
@@ -115,12 +107,6 @@ pub(crate) enum AbortShardSplitStatus {
 pub(crate) type DatabaseResult<T> = Result<T, DatabaseError>;
 /// Some methods can operate on either a whole tenant or a single shard
 pub(crate) enum TenantFilter {
    Tenant(TenantId),
    Shard(TenantShardId),
 }
 impl Persistence {
    // The default postgres connection limit is 100.  We use up to 99, to leave one free for a human admin under
    // normal circumstances.  This assumes we have exclusive use of the database cluster to which we connect.
@@ -130,7 +116,7 @@ impl Persistence {
    const IDLE_CONNECTION_TIMEOUT: Duration = Duration::from_secs(10);
    const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60);
-    pub fn new(database_url: String) -> Self {
+    pub fn new(database_url: String, json_path: Option<Utf8PathBuf>) -> Self {
        let manager = diesel::r2d2::ConnectionManager::<PgConnection>::new(database_url);
        // We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time
@@ -145,47 +131,12 @@ impl Persistence {
            .build(manager)
            .expect("Could not build connection pool");
-        Self { connection_pool }
+        Self {
-    }
+            connection_pool,
-
+            json_path,
    /// A helper for use during startup, where we would like to tolerate concurrent restarts of the
    /// database and the storage controller, therefore the database might not be available right away
    pub async fn await_connection(
        database_url: &str,
        timeout: Duration,
    ) -> Result<(), diesel::ConnectionError> {
        let started_at = Instant::now();
        loop {
            match PgConnection::establish(database_url) {
                Ok(_) => {
                    tracing::info!("Connected to database.");
                    return Ok(());
                }
                Err(e) => {
                    if started_at.elapsed() > timeout {
                        return Err(e);
                    } else {
                        tracing::info!("Database not yet available, waiting... ({e})");
                        tokio::time::sleep(Duration::from_millis(100)).await;
                    }
                }
            }
        }
    }
    /// Execute the diesel migrations that are built into this binary
    pub(crate) async fn migration_run(&self) -> DatabaseResult<()> {
        use diesel_migrations::{HarnessWithOutput, MigrationHarness};
        self.with_conn(move |conn| -> DatabaseResult<()> {
            HarnessWithOutput::write_to_stdout(conn)
                .run_pending_migrations(MIGRATIONS)
                .map(|_| ())
                .map_err(|e| DatabaseError::Migration(e.to_string()))
        })
        .await
    }
    /// Wraps `with_conn` in order to collect latency and error metrics
    async fn with_measured_conn<F, R>(&self, op: DatabaseOperation, func: F) -> DatabaseResult<R>
    where
@@ -195,7 +146,9 @@ impl Persistence {
        let latency = &METRICS_REGISTRY
            .metrics_group
            .storage_controller_database_query_latency;
-        let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup { operation: op });
+        let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup {
            operation: op.clone(),
        });
        let res = self.with_conn(func).await;
@@ -218,45 +171,10 @@ impl Persistence {
        F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
        R: Send + 'static,
    {
        // A generous allowance for how many times we may retry serializable transactions
        // before giving up.  This is not expected to be hit: it is a defensive measure in case we
        // somehow engineer a situation where duelling transactions might otherwise live-lock.
        const MAX_RETRIES: usize = 128;
        let mut conn = self.connection_pool.get()?;
-        tokio::task::spawn_blocking(move || -> DatabaseResult<R> {
+        tokio::task::spawn_blocking(move || -> DatabaseResult<R> { func(&mut conn) })
-            let mut retry_count = 0;
+            .await
-            loop {
+            .expect("Task panic")
                match conn.build_transaction().serializable().run(|c| func(c)) {
                    Ok(r) => break Ok(r),
                    Err(
                        err @ DatabaseError::Query(diesel::result::Error::DatabaseError(
                            diesel::result::DatabaseErrorKind::SerializationFailure,
                            _,
                        )),
                    ) => {
                        retry_count += 1;
                        if retry_count > MAX_RETRIES {
                            tracing::error!(
                                "Exceeded max retries on SerializationFailure errors: {err:?}"
                            );
                            break Err(err);
                        } else {
                            // Retry on serialization errors: these are expected, because even though our
                            // transactions don't fight for the same rows, they will occasionally collide
                            // on index pages (e.g. increment_generation for unrelated shards can collide)
                            tracing::debug!(
                                "Retrying transaction on serialization failure {err:?}"
                            );
                            continue;
                        }
                    }
                    Err(e) => break Err(e),
                }
            }
        })
        .await
        .expect("Task panic")
    }
    /// When a node is first registered, persist it before using it for anything
@@ -318,13 +236,80 @@ impl Persistence {
    /// At startup, load the high level state for shards, such as their config + policy.  This will
    /// be enriched at runtime with state discovered on pageservers.
    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
-        self.with_measured_conn(
+        let loaded = self
-            DatabaseOperation::ListTenantShards,
+            .with_measured_conn(
-            move |conn| -> DatabaseResult<_> {
+                DatabaseOperation::ListTenantShards,
-                Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
+                move |conn| -> DatabaseResult<_> {
-            },
+                    Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
-        )
+                },
-        .await
+            )
            .await?;
        if loaded.is_empty() {
            if let Some(path) = &self.json_path {
                if tokio::fs::try_exists(path)
                    .await
                    .map_err(|e| DatabaseError::Logical(format!("Error stat'ing JSON file: {e}")))?
                {
                    tracing::info!("Importing from legacy JSON format at {path}");
                    return self.list_tenant_shards_json(path).await;
                }
            }
        }
        Ok(loaded)
    }
    /// Shim for automated compatibility tests: load tenants from a JSON file instead of database
    pub(crate) async fn list_tenant_shards_json(
        &self,
        path: &Utf8Path,
    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
        let bytes = tokio::fs::read(path)
            .await
            .map_err(|e| DatabaseError::Logical(format!("Failed to load JSON: {e}")))?;
        let mut decoded = serde_json::from_slice::<JsonPersistence>(&bytes)
            .map_err(|e| DatabaseError::Logical(format!("Deserialization error: {e}")))?;
        for shard in decoded.tenants.values_mut() {
            if shard.placement_policy == "\"Single\"" {
                // Backward compat for test data after PR https://github.com/neondatabase/neon/pull/7165
                shard.placement_policy = "{\"Attached\":0}".to_string();
            }
        }
        let tenants: Vec<TenantShardPersistence> = decoded.tenants.into_values().collect();
        // Synchronize database with what is in the JSON file
        self.insert_tenant_shards(tenants.clone()).await?;
        Ok(tenants)
    }
    /// For use in testing environments, where we dump out JSON on shutdown.
    pub async fn write_tenants_json(&self) -> anyhow::Result<()> {
        let Some(path) = &self.json_path else {
            anyhow::bail!("Cannot write JSON if path isn't set (test environment bug)");
        };
        tracing::info!("Writing state to {path}...");
        let tenants = self.list_tenant_shards().await?;
        let mut tenants_map = HashMap::new();
        for tsp in tenants {
            let tenant_shard_id = TenantShardId {
                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
                shard_number: ShardNumber(tsp.shard_number as u8),
                shard_count: ShardCount::new(tsp.shard_count as u8),
            };
            tenants_map.insert(tenant_shard_id, tsp);
        }
        let json = serde_json::to_string(&JsonPersistence {
            tenants: tenants_map,
        })?;
        tokio::fs::write(path, &json).await?;
        tracing::info!("Wrote {} bytes to {path}...", json.len());
        Ok(())
    }
    /// Tenants must be persisted before we schedule them for the first time.  This enables us
@@ -333,32 +318,18 @@ impl Persistence {
        &self,
        shards: Vec<TenantShardPersistence>,
    ) -> DatabaseResult<()> {
-        use crate::schema::metadata_health;
+        use crate::schema::tenant_shards::dsl::*;
        use crate::schema::tenant_shards;
        let now = chrono::Utc::now();
        let metadata_health_records = shards
            .iter()
            .map(|t| MetadataHealthPersistence {
                tenant_id: t.tenant_id.clone(),
                shard_number: t.shard_number,
                shard_count: t.shard_count,
                healthy: true,
                last_scrubbed_at: now,
            })
            .collect::<Vec<_>>();
        self.with_measured_conn(
            DatabaseOperation::InsertTenantShards,
            move |conn| -> DatabaseResult<()> {
-                diesel::insert_into(tenant_shards::table)
+                conn.transaction(|conn| -> QueryResult<()> {
-                    .values(&shards)
+                    for tenant in &shards {
-                    .execute(conn)?;
+                        diesel::insert_into(tenant_shards)
-
+                            .values(tenant)
-                diesel::insert_into(metadata_health::table)
+                            .execute(conn)?;
-                    .values(&metadata_health_records)
+                    }
-                    .execute(conn)?;
+                    Ok(())
                })?;
                Ok(())
            },
        )
@@ -372,10 +343,10 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::DeleteTenant,
            move |conn| -> DatabaseResult<()> {
                // `metadata_health` status (if exists) is also deleted based on the cascade behavior.
                diesel::delete(tenant_shards)
                    .filter(tenant_id.eq(del_tenant_id.to_string()))
                    .execute(conn)?;
                Ok(())
            },
        )
@@ -403,15 +374,13 @@ impl Persistence {
    #[tracing::instrument(skip_all, fields(node_id))]
    pub(crate) async fn re_attach(
        &self,
-        input_node_id: NodeId,
+        node_id: NodeId,
    ) -> DatabaseResult<HashMap<TenantShardId, Generation>> {
        use crate::schema::nodes::dsl::scheduling_policy;
        use crate::schema::nodes::dsl::*;
        use crate::schema::tenant_shards::dsl::*;
        let updated = self
            .with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
                let rows_updated = diesel::update(tenant_shards)
-                    .filter(generation_pageserver.eq(input_node_id.0 as i64))
+                    .filter(generation_pageserver.eq(node_id.0 as i64))
                    .set(generation.eq(generation + 1))
                    .execute(conn)?;
@@ -420,23 +389,9 @@ impl Persistence {
                // TODO: UPDATE+SELECT in one query
                let updated = tenant_shards
-                    .filter(generation_pageserver.eq(input_node_id.0 as i64))
+                    .filter(generation_pageserver.eq(node_id.0 as i64))
                    .select(TenantShardPersistence::as_select())
                    .load(conn)?;
                // If the node went through a drain and restart phase before re-attaching,
                // then reset it's node scheduling policy to active.
                diesel::update(nodes)
                    .filter(node_id.eq(input_node_id.0 as i64))
                    .filter(
                        scheduling_policy
                            .eq(String::from(NodeSchedulingPolicy::PauseForRestart))
                            .or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Draining)))
                            .or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Filling))),
                    )
                    .set(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Active)))
                    .execute(conn)?;
                Ok(updated)
            })
            .await?;
@@ -503,44 +458,6 @@ impl Persistence {
        Ok(Generation::new(g as u32))
    }
    /// When we want to call out to the running shards for a tenant, e.g. during timeline CRUD operations,
    /// we need to know where the shard is attached, _and_ the generation, so that we can re-check the generation
    /// afterwards to confirm that our timeline CRUD operation is truly persistent (it must have happened in the
    /// latest generation)
    ///
    /// If the tenant doesn't exist, an empty vector is returned.
    ///
    /// Output is sorted by shard number
    pub(crate) async fn peek_generations(
        &self,
        filter_tenant_id: TenantId,
    ) -> Result<Vec<(TenantShardId, Option<Generation>, Option<NodeId>)>, DatabaseError> {
        use crate::schema::tenant_shards::dsl::*;
        let rows = self
            .with_measured_conn(DatabaseOperation::PeekGenerations, move |conn| {
                let result = tenant_shards
                    .filter(tenant_id.eq(filter_tenant_id.to_string()))
                    .select(TenantShardPersistence::as_select())
                    .order(shard_number)
                    .load(conn)?;
                Ok(result)
            })
            .await?;
        Ok(rows
            .into_iter()
            .map(|p| {
                (
                    p.get_tenant_shard_id()
                        .expect("Corrupt tenant shard id in database"),
                    p.generation.map(|g| Generation::new(g as u32)),
                    p.generation_pageserver.map(|n| NodeId(n as u64)),
                )
            })
            .collect())
    }
    #[allow(non_local_definitions)]
    /// For use when updating a persistent property of a tenant, such as its config or placement_policy.
    ///
    /// Do not use this for settting generation, unless in the special onboarding code path (/location_config)
@@ -548,48 +465,59 @@ impl Persistence {
    /// that we only do the first time a tenant is set to an attached policy via /location_config.
    pub(crate) async fn update_tenant_shard(
        &self,
-        tenant: TenantFilter,
+        tenant_shard_id: TenantShardId,
-        input_placement_policy: Option<PlacementPolicy>,
+        input_placement_policy: PlacementPolicy,
-        input_config: Option<TenantConfig>,
+        input_config: TenantConfig,
        input_generation: Option<Generation>,
        input_scheduling_policy: Option<ShardSchedulingPolicy>,
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::UpdateTenantShard, move |conn| {
-            let query = match tenant {
+            let query = diesel::update(tenant_shards)
-                TenantFilter::Shard(tenant_shard_id) => diesel::update(tenant_shards)
+                .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
-                    .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
+                .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                    .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
+                .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32));
                    .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
                    .into_boxed(),
                TenantFilter::Tenant(input_tenant_id) => diesel::update(tenant_shards)
                    .filter(tenant_id.eq(input_tenant_id.to_string()))
                    .into_boxed(),
            };
-            #[derive(AsChangeset)]
+            if let Some(input_generation) = input_generation {
-            #[diesel(table_name = crate::schema::tenant_shards)]
+                // Update includes generation column
-            struct ShardUpdate {
+                query
-                generation: Option<i32>,
+                    .set((
-                placement_policy: Option<String>,
+                        generation.eq(Some(input_generation.into().unwrap() as i32)),
-                config: Option<String>,
+                        placement_policy
-                scheduling_policy: Option<String>,
+                            .eq(serde_json::to_string(&input_placement_policy).unwrap()),
                        config.eq(serde_json::to_string(&input_config).unwrap()),
                    ))
                    .execute(conn)?;
            } else {
                // Update does not include generation column
                query
                    .set((
                        placement_policy
                            .eq(serde_json::to_string(&input_placement_policy).unwrap()),
                        config.eq(serde_json::to_string(&input_config).unwrap()),
                    ))
                    .execute(conn)?;
            }
-            let update = ShardUpdate {
+            Ok(())
-                generation: input_generation.map(|g| g.into().unwrap() as i32),
+        })
-                placement_policy: input_placement_policy
+        .await?;
                    .as_ref()
                    .map(|p| serde_json::to_string(&p).unwrap()),
                config: input_config
                    .as_ref()
                    .map(|c| serde_json::to_string(&c).unwrap()),
                scheduling_policy: input_scheduling_policy
                    .map(|p| serde_json::to_string(&p).unwrap()),
            };
-            query.set(update).execute(conn)?;
+        Ok(())
    }
    pub(crate) async fn update_tenant_config(
        &self,
        input_tenant_id: TenantId,
        input_config: TenantConfig,
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::UpdateTenantConfig, move |conn| {
            diesel::update(tenant_shards)
                .filter(tenant_id.eq(input_tenant_id.to_string()))
                .set((config.eq(serde_json::to_string(&input_config).unwrap()),))
                .execute(conn)?;
            Ok(())
        })
@@ -631,51 +559,55 @@ impl Persistence {
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::BeginShardSplit, move |conn| -> DatabaseResult<()> {
-            // Mark parent shards as splitting
+            conn.transaction(|conn| -> DatabaseResult<()> {
                // Mark parent shards as splitting
-            let updated = diesel::update(tenant_shards)
+                let updated = diesel::update(tenant_shards)
-                .filter(tenant_id.eq(split_tenant_id.to_string()))
+                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .filter(shard_count.eq(old_shard_count.literal() as i32))
-                .set((splitting.eq(1),))
+                    .set((splitting.eq(1),))
-                .execute(conn)?;
+                    .execute(conn)?;
-            if u8::try_from(updated)
+                if u8::try_from(updated)
-                .map_err(|_| DatabaseError::Logical(
+                    .map_err(|_| DatabaseError::Logical(
-                    format!("Overflow existing shard count {} while splitting", updated))
+                        format!("Overflow existing shard count {} while splitting", updated))
-                )? != old_shard_count.count() {
+                    )? != old_shard_count.count() {
-                // Perhaps a deletion or another split raced with this attempt to split, mutating
+                    // Perhaps a deletion or another split raced with this attempt to split, mutating
-                // the parent shards that we intend to split. In this case the split request should fail.
+                    // the parent shards that we intend to split. In this case the split request should fail.
-                return Err(DatabaseError::Logical(
+                    return Err(DatabaseError::Logical(
-                    format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
+                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
-                ));
+                    ));
            }
            // FIXME: spurious clone to sidestep closure move rules
            let parent_to_children = parent_to_children.clone();
            // Insert child shards
            for (parent_shard_id, children) in parent_to_children {
                let mut parent = crate::schema::tenant_shards::table
                    .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
                    .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
                    .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
                    .load::<TenantShardPersistence>(conn)?;
                let parent = if parent.len() != 1 {
                    return Err(DatabaseError::Logical(format!(
                        "Parent shard {parent_shard_id} not found"
                    )));
                } else {
                    parent.pop().unwrap()
                };
                for mut shard in children {
                    // Carry the parent's generation into the child
                    shard.generation = parent.generation;
                    debug_assert!(shard.splitting == SplitState::Splitting);
                    diesel::insert_into(tenant_shards)
                        .values(shard)
                        .execute(conn)?;
                }
-            }
+
                // FIXME: spurious clone to sidestep closure move rules
                let parent_to_children = parent_to_children.clone();
                // Insert child shards
                for (parent_shard_id, children) in parent_to_children {
                    let mut parent = crate::schema::tenant_shards::table
                        .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
                        .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
                        .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
                        .load::<TenantShardPersistence>(conn)?;
                    let parent = if parent.len() != 1 {
                        return Err(DatabaseError::Logical(format!(
                            "Parent shard {parent_shard_id} not found"
                        )));
                    } else {
                        parent.pop().unwrap()
                    };
                    for mut shard in children {
                        // Carry the parent's generation into the child
                        shard.generation = parent.generation;
                        debug_assert!(shard.splitting == SplitState::Splitting);
                        diesel::insert_into(tenant_shards)
                            .values(shard)
                            .execute(conn)?;
                    }
                }
                Ok(())
            })?;
            Ok(())
        })
@@ -693,18 +625,22 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::CompleteShardSplit,
            move |conn| -> DatabaseResult<()> {
-                // Drop parent shards
+                conn.transaction(|conn| -> QueryResult<()> {
-                diesel::delete(tenant_shards)
+                    // Drop parent shards
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                    diesel::delete(tenant_shards)
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                        .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .execute(conn)?;
+                        .filter(shard_count.eq(old_shard_count.literal() as i32))
                        .execute(conn)?;
-                // Clear sharding flag
+                    // Clear sharding flag
-                let updated = diesel::update(tenant_shards)
+                    let updated = diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                        .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .set((splitting.eq(0),))
+                        .set((splitting.eq(0),))
-                    .execute(conn)?;
+                        .execute(conn)?;
-                debug_assert!(updated > 0);
+                    debug_assert!(updated > 0);
                    Ok(())
                })?;
                Ok(())
            },
@@ -723,192 +659,46 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::AbortShardSplit,
            move |conn| -> DatabaseResult<AbortShardSplitStatus> {
-                // Clear the splitting state on parent shards
+                let aborted =
-                let updated = diesel::update(tenant_shards)
+                    conn.transaction(|conn| -> DatabaseResult<AbortShardSplitStatus> {
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                        // Clear the splitting state on parent shards
-                    .filter(shard_count.ne(new_shard_count.literal() as i32))
+                        let updated = diesel::update(tenant_shards)
-                    .set((splitting.eq(0),))
+                            .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .execute(conn)?;
+                            .filter(shard_count.ne(new_shard_count.literal() as i32))
                            .set((splitting.eq(0),))
                            .execute(conn)?;
-                // Parent shards are already gone: we cannot abort.
+                        // Parent shards are already gone: we cannot abort.
-                if updated == 0 {
+                        if updated == 0 {
-                    return Ok(AbortShardSplitStatus::Complete);
+                            return Ok(AbortShardSplitStatus::Complete);
-                }
+                        }
-                // Sanity check: if parent shards were present, their cardinality should
+                        // Sanity check: if parent shards were present, their cardinality should
-                // be less than the number of child shards.
+                        // be less than the number of child shards.
-                if updated >= new_shard_count.count() as usize {
+                        if updated >= new_shard_count.count() as usize {
-                    return Err(DatabaseError::Logical(format!(
+                            return Err(DatabaseError::Logical(format!(
-                        "Unexpected parent shard count {updated} while aborting split to \
+                                "Unexpected parent shard count {updated} while aborting split to \
                            count {new_shard_count:?} on tenant {split_tenant_id}"
-                    )));
+                            )));
-                }
+                        }
-                // Erase child shards
+                        // Erase child shards
-                diesel::delete(tenant_shards)
+                        diesel::delete(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                            .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(new_shard_count.literal() as i32))
+                            .filter(shard_count.eq(new_shard_count.literal() as i32))
-                    .execute(conn)?;
+                            .execute(conn)?;
-                Ok(AbortShardSplitStatus::Aborted)
+                        Ok(AbortShardSplitStatus::Aborted)
                    })?;
                Ok(aborted)
            },
        )
        .await
    }
    /// Stores all the latest metadata health updates durably. Updates existing entry on conflict.
    ///
    /// **Correctness:** `metadata_health_updates` should all belong the tenant shards managed by the storage controller.
    #[allow(dead_code)]
    pub(crate) async fn update_metadata_health_records(
        &self,
        healthy_records: Vec<MetadataHealthPersistence>,
        unhealthy_records: Vec<MetadataHealthPersistence>,
        now: chrono::DateTime<chrono::Utc>,
    ) -> DatabaseResult<()> {
        use crate::schema::metadata_health::dsl::*;
        self.with_measured_conn(
            DatabaseOperation::UpdateMetadataHealth,
            move |conn| -> DatabaseResult<_> {
                diesel::insert_into(metadata_health)
                    .values(&healthy_records)
                    .on_conflict((tenant_id, shard_number, shard_count))
                    .do_update()
                    .set((healthy.eq(true), last_scrubbed_at.eq(now)))
                    .execute(conn)?;
                diesel::insert_into(metadata_health)
                    .values(&unhealthy_records)
                    .on_conflict((tenant_id, shard_number, shard_count))
                    .do_update()
                    .set((healthy.eq(false), last_scrubbed_at.eq(now)))
                    .execute(conn)?;
                Ok(())
            },
        )
        .await
    }
    /// Lists all the metadata health records.
    #[allow(dead_code)]
    pub(crate) async fn list_metadata_health_records(
        &self,
    ) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
        self.with_measured_conn(
            DatabaseOperation::ListMetadataHealth,
            move |conn| -> DatabaseResult<_> {
                Ok(
                    crate::schema::metadata_health::table
                        .load::<MetadataHealthPersistence>(conn)?,
                )
            },
        )
        .await
    }
    /// Lists all the metadata health records that is unhealthy.
    #[allow(dead_code)]
    pub(crate) async fn list_unhealthy_metadata_health_records(
        &self,
    ) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
        use crate::schema::metadata_health::dsl::*;
        self.with_measured_conn(
            DatabaseOperation::ListMetadataHealthUnhealthy,
            move |conn| -> DatabaseResult<_> {
                Ok(crate::schema::metadata_health::table
                    .filter(healthy.eq(false))
                    .load::<MetadataHealthPersistence>(conn)?)
            },
        )
        .await
    }
    /// Lists all the metadata health records that have not been updated since an `earlier` time.
    #[allow(dead_code)]
    pub(crate) async fn list_outdated_metadata_health_records(
        &self,
        earlier: chrono::DateTime<chrono::Utc>,
    ) -> DatabaseResult<Vec<MetadataHealthPersistence>> {
        use crate::schema::metadata_health::dsl::*;
        self.with_measured_conn(
            DatabaseOperation::ListMetadataHealthOutdated,
            move |conn| -> DatabaseResult<_> {
                let query = metadata_health.filter(last_scrubbed_at.lt(earlier));
                let res = query.load::<MetadataHealthPersistence>(conn)?;
                Ok(res)
            },
        )
        .await
    }
    /// Get the current entry from the `leader` table if one exists.
    /// It is an error for the table to contain more than one entry.
    pub(crate) async fn get_leader(&self) -> DatabaseResult<Option<ControllerPersistence>> {
        let mut leader: Vec<ControllerPersistence> = self
            .with_measured_conn(
                DatabaseOperation::GetLeader,
                move |conn| -> DatabaseResult<_> {
                    Ok(crate::schema::controllers::table.load::<ControllerPersistence>(conn)?)
                },
            )
            .await?;
        if leader.len() > 1 {
            return Err(DatabaseError::Logical(format!(
                "More than one entry present in the leader table: {leader:?}"
            )));
        }
        Ok(leader.pop())
    }
    /// Update the new leader with compare-exchange semantics. If `prev` does not
    /// match the current leader entry, then the update is treated as a failure.
    /// When `prev` is not specified, the update is forced.
    pub(crate) async fn update_leader(
        &self,
        prev: Option<ControllerPersistence>,
        new: ControllerPersistence,
    ) -> DatabaseResult<()> {
        use crate::schema::controllers::dsl::*;
        let updated = self
            .with_measured_conn(
                DatabaseOperation::UpdateLeader,
                move |conn| -> DatabaseResult<usize> {
                    let updated = match &prev {
                        Some(prev) => diesel::update(controllers)
                            .filter(address.eq(prev.address.clone()))
                            .filter(started_at.eq(prev.started_at))
                            .set((
                                address.eq(new.address.clone()),
                                started_at.eq(new.started_at),
                            ))
                            .execute(conn)?,
                        None => diesel::insert_into(controllers)
                            .values(new.clone())
                            .execute(conn)?,
                    };
                    Ok(updated)
                },
            )
            .await?;
        if updated == 0 {
            return Err(DatabaseError::Logical(
                "Leader table update failed".to_string(),
            ));
        }
        Ok(())
    }
 }
-/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
+/// Parts of [`crate::tenant_state::TenantState`] that are stored durably
 #[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
 #[diesel(table_name = crate::schema::tenant_shards)]
 pub(crate) struct TenantShardPersistence {
@@ -938,8 +728,6 @@ pub(crate) struct TenantShardPersistence {
    pub(crate) splitting: SplitState,
    #[serde(default)]
    pub(crate) config: String,
    #[serde(default)]
    pub(crate) scheduling_policy: String,
 }
 impl TenantShardPersistence {
@@ -975,68 +763,3 @@ pub(crate) struct NodePersistence {
    pub(crate) listen_pg_addr: String,
    pub(crate) listen_pg_port: i32,
 }
 /// Tenant metadata health status that are stored durably.
 #[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
 #[diesel(table_name = crate::schema::metadata_health)]
 pub(crate) struct MetadataHealthPersistence {
    #[serde(default)]
    pub(crate) tenant_id: String,
    #[serde(default)]
    pub(crate) shard_number: i32,
    #[serde(default)]
    pub(crate) shard_count: i32,
    pub(crate) healthy: bool,
    pub(crate) last_scrubbed_at: chrono::DateTime<chrono::Utc>,
 }
 impl MetadataHealthPersistence {
    pub fn new(
        tenant_shard_id: TenantShardId,
        healthy: bool,
        last_scrubbed_at: chrono::DateTime<chrono::Utc>,
    ) -> Self {
        let tenant_id = tenant_shard_id.tenant_id.to_string();
        let shard_number = tenant_shard_id.shard_number.0 as i32;
        let shard_count = tenant_shard_id.shard_count.literal() as i32;
        MetadataHealthPersistence {
            tenant_id,
            shard_number,
            shard_count,
            healthy,
            last_scrubbed_at,
        }
    }
    #[allow(dead_code)]
    pub(crate) fn get_tenant_shard_id(&self) -> Result<TenantShardId, hex::FromHexError> {
        Ok(TenantShardId {
            tenant_id: TenantId::from_str(self.tenant_id.as_str())?,
            shard_number: ShardNumber(self.shard_number as u8),
            shard_count: ShardCount::new(self.shard_count as u8),
        })
    }
 }
 impl From<MetadataHealthPersistence> for MetadataHealthRecord {
    fn from(value: MetadataHealthPersistence) -> Self {
        MetadataHealthRecord {
            tenant_shard_id: value
                .get_tenant_shard_id()
                .expect("stored tenant id should be valid"),
            healthy: value.healthy,
            last_scrubbed_at: value.last_scrubbed_at,
        }
    }
 }
 #[derive(
    Serialize, Deserialize, Queryable, Selectable, Insertable, Eq, PartialEq, Debug, Clone,
 )]
 #[diesel(table_name = crate::schema::controllers)]
 pub(crate) struct ControllerPersistence {
    pub(crate) address: String,
    pub(crate) started_at: chrono::DateTime<chrono::Utc>,
 }
--- a/control_plane/attachment_service/src/persistence/split_state.rs
+++ b/control_plane/attachment_service/src/persistence/split_state.rs
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -1,18 +1,16 @@
 use crate::pageserver_client::PageserverClient;
 use crate::persistence::Persistence;
 use crate::service;
-use pageserver_api::controller_api::PlacementPolicy;
+use hyper::StatusCode;
 use pageserver_api::models::{
    LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
 };
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use pageserver_client::mgmt_api;
 use reqwest::StatusCode;
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
 use utils::failpoint_support;
 use utils::generation::Generation;
 use utils::id::{NodeId, TimelineId};
 use utils::lsn::Lsn;
@@ -20,18 +18,17 @@ use utils::sync::gate::GateGuard;
 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
-use crate::tenant_shard::{IntentState, ObservedState, ObservedStateLocation};
+use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};
 const DEFAULT_HEATMAP_PERIOD: &str = "60s";
 /// Object with the lifetime of the background reconcile task that is created
 /// for tenants which have a difference between their intent and observed states.
 pub(super) struct Reconciler {
-    /// See [`crate::tenant_shard::TenantShard`] for the meanings of these fields: they are a snapshot
+    /// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
    /// of a tenant's state from when we spawned a reconcile task.
    pub(super) tenant_shard_id: TenantShardId,
    pub(crate) shard: ShardIdentity,
    pub(crate) placement_policy: PlacementPolicy,
    pub(crate) generation: Option<Generation>,
    pub(crate) intent: TargetState,
@@ -39,9 +36,6 @@ pub(super) struct Reconciler {
    /// to detach this tenant shard.
    pub(crate) detach: Vec<Node>,
    /// Configuration specific to this reconciler
    pub(crate) reconciler_config: ReconcilerConfig,
    pub(crate) config: TenantConfig,
    pub(crate) observed: ObservedState,
@@ -54,15 +48,11 @@ pub(super) struct Reconciler {
    /// To avoid stalling if the cloud control plane is unavailable, we may proceed
    /// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed
-    /// so that we can set [`crate::tenant_shard::TenantShard::pending_compute_notification`] to ensure a later retry.
+    /// so that we can set [`crate::tenant_state::TenantState::pending_compute_notification`] to ensure a later retry.
    pub(crate) compute_notify_failure: bool,
    /// Reconciler is responsible for keeping alive semaphore units that limit concurrency on how many
    /// we will spawn.
    pub(crate) _resource_units: ReconcileUnits,
    /// A means to abort background reconciliation: it is essential to
-    /// call this when something changes in the original TenantShard that
+    /// call this when something changes in the original TenantState that
    /// will make this reconciliation impossible or unnecessary, for
    /// example when a pageserver node goes offline, or the PlacementPolicy for
    /// the tenant is changed.
@@ -76,79 +66,7 @@ pub(super) struct Reconciler {
    pub(crate) persistence: Arc<Persistence>,
 }
-pub(crate) struct ReconcilerConfigBuilder {
+/// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
    config: ReconcilerConfig,
 }
 impl ReconcilerConfigBuilder {
    pub(crate) fn new() -> Self {
        Self {
            config: ReconcilerConfig::default(),
        }
    }
    pub(crate) fn secondary_warmup_timeout(self, value: Duration) -> Self {
        Self {
            config: ReconcilerConfig {
                secondary_warmup_timeout: Some(value),
                ..self.config
            },
        }
    }
    pub(crate) fn secondary_download_request_timeout(self, value: Duration) -> Self {
        Self {
            config: ReconcilerConfig {
                secondary_download_request_timeout: Some(value),
                ..self.config
            },
        }
    }
    pub(crate) fn build(self) -> ReconcilerConfig {
        self.config
    }
 }
 #[derive(Default, Debug, Copy, Clone)]
 pub(crate) struct ReconcilerConfig {
    // During live migration give up on warming-up the secondary
    // after this timeout.
    secondary_warmup_timeout: Option<Duration>,
    // During live migrations this is the amount of time that
    // the pagserver will hold our poll.
    secondary_download_request_timeout: Option<Duration>,
 }
 impl ReconcilerConfig {
    pub(crate) fn get_secondary_warmup_timeout(&self) -> Duration {
        const SECONDARY_WARMUP_TIMEOUT_DEFAULT: Duration = Duration::from_secs(300);
        self.secondary_warmup_timeout
            .unwrap_or(SECONDARY_WARMUP_TIMEOUT_DEFAULT)
    }
    pub(crate) fn get_secondary_download_request_timeout(&self) -> Duration {
        const SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT: Duration = Duration::from_secs(20);
        self.secondary_download_request_timeout
            .unwrap_or(SECONDARY_DOWNLOAD_REQUEST_TIMEOUT_DEFAULT)
    }
 }
 /// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
 pub(crate) struct ReconcileUnits {
    _sem_units: tokio::sync::OwnedSemaphorePermit,
 }
 impl ReconcileUnits {
    pub(crate) fn new(sem_units: tokio::sync::OwnedSemaphorePermit) -> Self {
        Self {
            _sem_units: sem_units,
        }
    }
 }
 /// This is a snapshot of [`crate::tenant_shard::IntentState`], but it does not do any
 /// reference counting for Scheduler.  The IntentState is what the scheduler works with,
 /// and the TargetState is just the instruction for a particular Reconciler run.
 #[derive(Debug)]
@@ -362,13 +280,11 @@ impl Reconciler {
    ) -> Result<(), ReconcileError> {
        // This is not the timeout for a request, but the total amount of time we're willing to wait
        // for a secondary location to get up to date before
-        let total_download_timeout = self.reconciler_config.get_secondary_warmup_timeout();
+        const TOTAL_DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(300);
        // This the long-polling interval for the secondary download requests we send to destination pageserver
        // during a migration.
-        let request_download_timeout = self
+        const REQUEST_DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(20);
            .reconciler_config
            .get_secondary_download_request_timeout();
        let started_at = Instant::now();
@@ -379,14 +295,14 @@ impl Reconciler {
                        client
                            .tenant_secondary_download(
                                tenant_shard_id,
-                                Some(request_download_timeout),
+                                Some(REQUEST_DOWNLOAD_TIMEOUT),
                            )
                            .await
                    },
                    &self.service_config.jwt_token,
                    1,
                    3,
-                    request_download_timeout * 2,
+                    REQUEST_DOWNLOAD_TIMEOUT * 2,
                    &self.cancel,
                )
                .await
@@ -414,7 +330,7 @@ impl Reconciler {
                return Ok(());
            } else if status == StatusCode::ACCEPTED {
                let total_runtime = started_at.elapsed();
-                if total_runtime > total_download_timeout {
+                if total_runtime > TOTAL_DOWNLOAD_TIMEOUT {
                    tracing::warn!("Timed out after {}ms downloading layers to {node}.  Progress so far: {}/{} layers, {}/{} bytes",
                        total_runtime.as_millis(),
                        progress.layers_downloaded,
@@ -571,7 +487,6 @@ impl Reconciler {
        while let Err(e) = self.compute_notify().await {
            match e {
                NotifyError::Fatal(_) => return Err(ReconcileError::Notify(e)),
                NotifyError::ShuttingDown => return Err(ReconcileError::Cancel),
                _ => {
                    tracing::warn!(
                        "Live migration blocked by compute notification error, retrying: {e}"
@@ -708,7 +623,7 @@ impl Reconciler {
                generation,
                &self.shard,
                &self.config,
-                &self.placement_policy,
+                !self.intent.secondary.is_empty(),
            );
            match self.observed.locations.get(&node.get_id()) {
                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
@@ -720,8 +635,11 @@ impl Reconciler {
                    // reconcile this location.  This includes locations with different configurations, as well
                    // as locations with unknown (None) observed state.
-                    // Incrementing generation is the safe general case, but is inefficient for changes that only
+                    // The general case is to increment the generation.  However, there are cases
-                    // modify some details (e.g. the tenant's config).
+                    // where this is not necessary:
                    // - if we are only updating the TenantConf part of the location
                    // - if we are only changing the attachment mode (e.g. going to attachedmulti or attachedstale)
                    //   and the location was already in the correct generation
                    let increment_generation = match observed {
                        None => true,
                        Some(ObservedStateLocation { conf: None }) => true,
@@ -730,11 +648,18 @@ impl Reconciler {
                        }) => {
                            let generations_match = observed.generation == wanted_conf.generation;
-                            // We may skip incrementing the generation if the location is already in the expected mode and
+                            use LocationConfigMode::*;
-                            // generation.  In principle it would also be safe to skip from certain other modes (e.g. AttachedStale),
+                            let mode_transition_requires_gen_inc =
-                            // but such states are handled inside `live_migrate`, and if we see that state here we're cleaning up
+                                match (observed.mode, wanted_conf.mode) {
-                            // after a restart/crash, so fall back to the universally safe path of incrementing generation.
+                                    // Usually the short-lived attachment modes (multi and stale) are only used
-                            !generations_match || (observed.mode != wanted_conf.mode)
+                                    // in the case of [`Self::live_migrate`], but it is simple to handle them correctly
                                    // here too.  Locations are allowed to go Single->Stale and Multi->Single within the same generation.
                                    (AttachedSingle, AttachedStale) => false,
                                    (AttachedMulti, AttachedSingle) => false,
                                    (lhs, rhs) => lhs != rhs,
                                };
                            !generations_match || mode_transition_requires_gen_inc
                        }
                    };
@@ -804,8 +729,6 @@ impl Reconciler {
            self.location_config(&node, conf, None, false).await?;
        }
        failpoint_support::sleep_millis_async!("sleep-on-reconcile-epilogue");
        Ok(())
    }
@@ -826,10 +749,7 @@ impl Reconciler {
                // It is up to the caller whether they want to drop out on this error, but they don't have to:
                // in general we should avoid letting unavailability of the cloud control plane stop us from
                // making progress.
-                if !matches!(e, NotifyError::ShuttingDown) {
+                tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
                    tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
                }
                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
                // needs to retry at some point.
                self.compute_notify_failure = true;
@@ -860,15 +780,8 @@ pub(crate) fn attached_location_conf(
    generation: Generation,
    shard: &ShardIdentity,
    config: &TenantConfig,
-    policy: &PlacementPolicy,
+    has_secondaries: bool,
 ) -> LocationConfig {
    let has_secondaries = match policy {
        PlacementPolicy::Attached(0) | PlacementPolicy::Detached | PlacementPolicy::Secondary => {
            false
        }
        PlacementPolicy::Attached(_) => true,
    };
    LocationConfig {
        mode: LocationConfigMode::AttachedSingle,
        generation: generation.into(),
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/control_plane/attachment_service/src/scheduler.rs
@@ -0,0 +1,352 @@
 use crate::{node::Node, tenant_state::TenantState};
 use pageserver_api::controller_api::UtilizationScore;
 use serde::Serialize;
 use std::collections::HashMap;
 use utils::{http::error::ApiError, id::NodeId};
 /// Scenarios in which we cannot find a suitable location for a tenant shard
 #[derive(thiserror::Error, Debug)]
 pub enum ScheduleError {
    #[error("No pageservers found")]
    NoPageservers,
    #[error("No pageserver found matching constraint")]
    ImpossibleConstraint,
 }
 impl From<ScheduleError> for ApiError {
    fn from(value: ScheduleError) -> Self {
        ApiError::Conflict(format!("Scheduling error: {}", value))
    }
 }
 #[derive(Serialize, Eq, PartialEq)]
 pub enum MaySchedule {
    Yes(UtilizationScore),
    No,
 }
 #[derive(Serialize)]
 struct SchedulerNode {
    /// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
    shard_count: usize,
    /// Whether this node is currently elegible to have new shards scheduled (this is derived
    /// from a node's availability state and scheduling policy).
    may_schedule: MaySchedule,
 }
 impl PartialEq for SchedulerNode {
    fn eq(&self, other: &Self) -> bool {
        let may_schedule_matches = matches!(
            (&self.may_schedule, &other.may_schedule),
            (MaySchedule::Yes(_), MaySchedule::Yes(_)) | (MaySchedule::No, MaySchedule::No)
        );
        may_schedule_matches && self.shard_count == other.shard_count
    }
 }
 impl Eq for SchedulerNode {}
 /// This type is responsible for selecting which node is used when a tenant shard needs to choose a pageserver
 /// on which to run.
 ///
 /// The type has no persistent state of its own: this is all populated at startup.  The Serialize
 /// impl is only for debug dumps.
 #[derive(Serialize)]
 pub(crate) struct Scheduler {
    nodes: HashMap<NodeId, SchedulerNode>,
 }
 impl Scheduler {
    pub(crate) fn new<'a>(nodes: impl Iterator<Item = &'a Node>) -> Self {
        let mut scheduler_nodes = HashMap::new();
        for node in nodes {
            scheduler_nodes.insert(
                node.get_id(),
                SchedulerNode {
                    shard_count: 0,
                    may_schedule: node.may_schedule(),
                },
            );
        }
        Self {
            nodes: scheduler_nodes,
        }
    }
    /// For debug/support: check that our internal statistics are in sync with the state of
    /// the nodes & tenant shards.
    ///
    /// If anything is inconsistent, log details and return an error.
    pub(crate) fn consistency_check<'a>(
        &self,
        nodes: impl Iterator<Item = &'a Node>,
        shards: impl Iterator<Item = &'a TenantState>,
    ) -> anyhow::Result<()> {
        let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
        for node in nodes {
            expect_nodes.insert(
                node.get_id(),
                SchedulerNode {
                    shard_count: 0,
                    may_schedule: node.may_schedule(),
                },
            );
        }
        for shard in shards {
            if let Some(node_id) = shard.intent.get_attached() {
                match expect_nodes.get_mut(node_id) {
                    Some(node) => node.shard_count += 1,
                    None => anyhow::bail!(
                        "Tenant {} references nonexistent node {}",
                        shard.tenant_shard_id,
                        node_id
                    ),
                }
            }
            for node_id in shard.intent.get_secondary() {
                match expect_nodes.get_mut(node_id) {
                    Some(node) => node.shard_count += 1,
                    None => anyhow::bail!(
                        "Tenant {} references nonexistent node {}",
                        shard.tenant_shard_id,
                        node_id
                    ),
                }
            }
        }
        for (node_id, expect_node) in &expect_nodes {
            let Some(self_node) = self.nodes.get(node_id) else {
                anyhow::bail!("Node {node_id} not found in Self")
            };
            if self_node != expect_node {
                tracing::error!("Inconsistency detected in scheduling state for node {node_id}");
                tracing::error!("Expected state: {}", serde_json::to_string(expect_node)?);
                tracing::error!("Self state: {}", serde_json::to_string(self_node)?);
                anyhow::bail!("Inconsistent state on {node_id}");
            }
        }
        if expect_nodes.len() != self.nodes.len() {
            // We just checked that all the expected nodes are present.  If the lengths don't match,
            // it means that we have nodes in Self that are unexpected.
            for node_id in self.nodes.keys() {
                if !expect_nodes.contains_key(node_id) {
                    anyhow::bail!("Node {node_id} found in Self but not in expected nodes");
                }
            }
        }
        Ok(())
    }
    /// Increment the reference count of a node.  This reference count is used to guide scheduling
    /// decisions, not for memory management: it represents one tenant shard whose IntentState targets
    /// this node.
    ///
    /// It is an error to call this for a node that is not known to the scheduler (i.e. passed into
    /// [`Self::new`] or [`Self::node_upsert`])
    pub(crate) fn node_inc_ref(&mut self, node_id: NodeId) {
        let Some(node) = self.nodes.get_mut(&node_id) else {
            tracing::error!("Scheduler missing node {node_id}");
            debug_assert!(false);
            return;
        };
        node.shard_count += 1;
    }
    /// Decrement a node's reference count.  Inverse of [`Self::node_inc_ref`].
    pub(crate) fn node_dec_ref(&mut self, node_id: NodeId) {
        let Some(node) = self.nodes.get_mut(&node_id) else {
            debug_assert!(false);
            tracing::error!("Scheduler missing node {node_id}");
            return;
        };
        node.shard_count -= 1;
    }
    pub(crate) fn node_upsert(&mut self, node: &Node) {
        use std::collections::hash_map::Entry::*;
        match self.nodes.entry(node.get_id()) {
            Occupied(mut entry) => {
                entry.get_mut().may_schedule = node.may_schedule();
            }
            Vacant(entry) => {
                entry.insert(SchedulerNode {
                    shard_count: 0,
                    may_schedule: node.may_schedule(),
                });
            }
        }
    }
    pub(crate) fn node_remove(&mut self, node_id: NodeId) {
        if self.nodes.remove(&node_id).is_none() {
            tracing::warn!(node_id=%node_id, "Removed non-existent node from scheduler");
        }
    }
    /// Where we have several nodes to choose from, for example when picking a secondary location
    /// to promote to an attached location, this method may be used to pick the best choice based
    /// on the scheduler's knowledge of utilization and availability.
    ///
    /// If the input is empty, or all the nodes are not elegible for scheduling, return None: the
    /// caller can pick a node some other way.
    pub(crate) fn node_preferred(&self, nodes: &[NodeId]) -> Option<NodeId> {
        if nodes.is_empty() {
            return None;
        }
        // TODO: When the utilization score returned by the pageserver becomes meaningful,
        // schedule based on that instead of the shard count.
        let node = nodes
            .iter()
            .map(|node_id| {
                let may_schedule = self
                    .nodes
                    .get(node_id)
                    .map(|n| n.may_schedule != MaySchedule::No)
                    .unwrap_or(false);
                (*node_id, may_schedule)
            })
            .max_by_key(|(_n, may_schedule)| *may_schedule);
        // If even the preferred node has may_schedule==false, return None
        node.and_then(|(node_id, may_schedule)| if may_schedule { Some(node_id) } else { None })
    }
    pub(crate) fn schedule_shard(&self, hard_exclude: &[NodeId]) -> Result<NodeId, ScheduleError> {
        if self.nodes.is_empty() {
            return Err(ScheduleError::NoPageservers);
        }
        let mut tenant_counts: Vec<(NodeId, usize)> = self
            .nodes
            .iter()
            .filter_map(|(k, v)| {
                if hard_exclude.contains(k) || v.may_schedule == MaySchedule::No {
                    None
                } else {
                    Some((*k, v.shard_count))
                }
            })
            .collect();
        // Sort by tenant count.  Nodes with the same tenant count are sorted by ID.
        tenant_counts.sort_by_key(|i| (i.1, i.0));
        if tenant_counts.is_empty() {
            // After applying constraints, no pageservers were left.  We log some detail about
            // the state of nodes to help understand why this happened.  This is not logged as an error because
            // it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
            tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
            for (node_id, node) in &self.nodes {
                tracing::info!(
                    "Node {node_id}: may_schedule={} shards={}",
                    node.may_schedule != MaySchedule::No,
                    node.shard_count
                );
            }
            return Err(ScheduleError::ImpossibleConstraint);
        }
        let node_id = tenant_counts.first().unwrap().0;
        tracing::info!(
            "scheduler selected node {node_id} (elegible nodes {:?}, exclude: {hard_exclude:?})",
            tenant_counts.iter().map(|i| i.0 .0).collect::<Vec<_>>()
        );
        // Note that we do not update shard count here to reflect the scheduling: that
        // is IntentState's job when the scheduled location is used.
        Ok(node_id)
    }
 }
 #[cfg(test)]
 pub(crate) mod test_utils {
    use crate::node::Node;
    use pageserver_api::controller_api::{NodeAvailability, UtilizationScore};
    use std::collections::HashMap;
    use utils::id::NodeId;
    /// Test helper: synthesize the requested number of nodes, all in active state.
    ///
    /// Node IDs start at one.
    pub(crate) fn make_test_nodes(n: u64) -> HashMap<NodeId, Node> {
        (1..n + 1)
            .map(|i| {
                (NodeId(i), {
                    let mut node = Node::new(
                        NodeId(i),
                        format!("httphost-{i}"),
                        80 + i as u16,
                        format!("pghost-{i}"),
                        5432 + i as u16,
                    );
                    node.set_availability(NodeAvailability::Active(UtilizationScore::worst()));
                    assert!(node.is_available());
                    node
                })
            })
            .collect()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    use crate::tenant_state::IntentState;
    #[test]
    fn scheduler_basic() -> anyhow::Result<()> {
        let nodes = test_utils::make_test_nodes(2);
        let mut scheduler = Scheduler::new(nodes.values());
        let mut t1_intent = IntentState::new();
        let mut t2_intent = IntentState::new();
        let scheduled = scheduler.schedule_shard(&[])?;
        t1_intent.set_attached(&mut scheduler, Some(scheduled));
        let scheduled = scheduler.schedule_shard(&[])?;
        t2_intent.set_attached(&mut scheduler, Some(scheduled));
        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
        let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers())?;
        t1_intent.push_secondary(&mut scheduler, scheduled);
        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 2);
        t1_intent.clear(&mut scheduler);
        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
        if cfg!(debug_assertions) {
            // Dropping an IntentState without clearing it causes a panic in debug mode,
            // because we have failed to properly update scheduler shard counts.
            let result = std::panic::catch_unwind(move || {
                drop(t2_intent);
            });
            assert!(result.is_err());
        } else {
            t2_intent.clear(&mut scheduler);
            assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
            assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 0);
        }
        Ok(())
    }
 }
--- a/control_plane/attachment_service/src/schema.rs
+++ b/control_plane/attachment_service/src/schema.rs
@@ -1,22 +1,5 @@
 // @generated automatically by Diesel CLI.
 diesel::table! {
    controllers (address, started_at) {
        address -> Varchar,
        started_at -> Timestamptz,
    }
 }
 diesel::table! {
    metadata_health (tenant_id, shard_number, shard_count) {
        tenant_id -> Varchar,
        shard_number -> Int4,
        shard_count -> Int4,
        healthy -> Bool,
        last_scrubbed_at -> Timestamptz,
    }
 }
 diesel::table! {
    nodes (node_id) {
        node_id -> Int8,
@@ -39,8 +22,7 @@ diesel::table! {
        placement_policy -> Varchar,
        splitting -> Int2,
        config -> Text,
        scheduling_policy -> Varchar,
    }
 }
-diesel::allow_tables_to_appear_in_same_query!(controllers, metadata_health, nodes, tenant_shards,);
+diesel::allow_tables_to_appear_in_same_query!(nodes, tenant_shards,);
--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -0,0 +1,983 @@
 use std::{
    collections::{HashMap, HashSet},
    sync::Arc,
    time::Duration,
 };
 use crate::{
    metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome},
    persistence::TenantShardPersistence,
 };
 use pageserver_api::controller_api::PlacementPolicy;
 use pageserver_api::{
    models::{LocationConfig, LocationConfigMode, TenantConfig},
    shard::{ShardIdentity, TenantShardId},
 };
 use serde::Serialize;
 use tokio::task::JoinHandle;
 use tokio_util::sync::CancellationToken;
 use tracing::{instrument, Instrument};
 use utils::{
    generation::Generation,
    id::NodeId,
    seqwait::{SeqWait, SeqWaitError},
    sync::gate::Gate,
 };
 use crate::{
    compute_hook::ComputeHook,
    node::Node,
    persistence::{split_state::SplitState, Persistence},
    reconciler::{
        attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState,
    },
    scheduler::{ScheduleError, Scheduler},
    service, Sequence,
 };
 /// Serialization helper
 fn read_mutex_content<S, T>(v: &std::sync::Mutex<T>, serializer: S) -> Result<S::Ok, S::Error>
 where
    S: serde::ser::Serializer,
    T: Clone + std::fmt::Display,
 {
    serializer.collect_str(&v.lock().unwrap())
 }
 /// In-memory state for a particular tenant shard.
 ///
 /// This struct implement Serialize for debugging purposes, but is _not_ persisted
 /// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
 #[derive(Serialize)]
 pub(crate) struct TenantState {
    pub(crate) tenant_shard_id: TenantShardId,
    pub(crate) shard: ShardIdentity,
    // Runtime only: sequence used to coordinate when updating this object while
    // with background reconcilers may be running.  A reconciler runs to a particular
    // sequence.
    pub(crate) sequence: Sequence,
    // Latest generation number: next time we attach, increment this
    // and use the incremented number when attaching.
    //
    // None represents an incompletely onboarded tenant via the [`Service::location_config`]
    // API, where this tenant may only run in PlacementPolicy::Secondary.
    pub(crate) generation: Option<Generation>,
    // High level description of how the tenant should be set up.  Provided
    // externally.
    pub(crate) policy: PlacementPolicy,
    // Low level description of exactly which pageservers should fulfil
    // which role.  Generated by `Self::schedule`.
    pub(crate) intent: IntentState,
    // Low level description of how the tenant is configured on pageservers:
    // if this does not match `Self::intent` then the tenant needs reconciliation
    // with `Self::reconcile`.
    pub(crate) observed: ObservedState,
    // Tenant configuration, passed through opaquely to the pageserver.  Identical
    // for all shards in a tenant.
    pub(crate) config: TenantConfig,
    /// If a reconcile task is currently in flight, it may be joined here (it is
    /// only safe to join if either the result has been received or the reconciler's
    /// cancellation token has been fired)
    #[serde(skip)]
    pub(crate) reconciler: Option<ReconcilerHandle>,
    /// If a tenant is being split, then all shards with that TenantId will have a
    /// SplitState set, this acts as a guard against other operations such as background
    /// reconciliation, and timeline creation.
    pub(crate) splitting: SplitState,
    /// Optionally wait for reconciliation to complete up to a particular
    /// sequence number.
    #[serde(skip)]
    pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
    /// Indicates sequence number for which we have encountered an error reconciling.  If
    /// this advances ahead of [`Self::waiter`] then a reconciliation error has occurred,
    /// and callers should stop waiting for `waiter` and propagate the error.
    #[serde(skip)]
    pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
    /// The most recent error from a reconcile on this tenant
    /// TODO: generalize to an array of recent events
    /// TOOD: use a ArcSwap instead of mutex for faster reads?
    #[serde(serialize_with = "read_mutex_content")]
    pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,
    /// If we have a pending compute notification that for some reason we weren't able to send,
    /// set this to true. If this is set, calls to [`Self::maybe_reconcile`] will run a task to retry
    /// sending it.  This is the mechanism by which compute notifications are included in the scope
    /// of state that we publish externally in an eventually consistent way.
    pub(crate) pending_compute_notification: bool,
 }
 #[derive(Default, Clone, Debug, Serialize)]
 pub(crate) struct IntentState {
    attached: Option<NodeId>,
    secondary: Vec<NodeId>,
 }
 impl IntentState {
    pub(crate) fn new() -> Self {
        Self {
            attached: None,
            secondary: vec![],
        }
    }
    pub(crate) fn single(scheduler: &mut Scheduler, node_id: Option<NodeId>) -> Self {
        if let Some(node_id) = node_id {
            scheduler.node_inc_ref(node_id);
        }
        Self {
            attached: node_id,
            secondary: vec![],
        }
    }
    pub(crate) fn set_attached(&mut self, scheduler: &mut Scheduler, new_attached: Option<NodeId>) {
        if self.attached != new_attached {
            if let Some(old_attached) = self.attached.take() {
                scheduler.node_dec_ref(old_attached);
            }
            if let Some(new_attached) = &new_attached {
                scheduler.node_inc_ref(*new_attached);
            }
            self.attached = new_attached;
        }
    }
    /// Like set_attached, but the node is from [`Self::secondary`].  This swaps the node from
    /// secondary to attached while maintaining the scheduler's reference counts.
    pub(crate) fn promote_attached(
        &mut self,
        _scheduler: &mut Scheduler,
        promote_secondary: NodeId,
    ) {
        // If we call this with a node that isn't in secondary, it would cause incorrect
        // scheduler reference counting, since we assume the node is already referenced as a secondary.
        debug_assert!(self.secondary.contains(&promote_secondary));
        // TODO: when scheduler starts tracking attached + secondary counts separately, we will
        // need to call into it here.
        self.secondary.retain(|n| n != &promote_secondary);
        self.attached = Some(promote_secondary);
    }
    pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
        debug_assert!(!self.secondary.contains(&new_secondary));
        scheduler.node_inc_ref(new_secondary);
        self.secondary.push(new_secondary);
    }
    /// It is legal to call this with a node that is not currently a secondary: that is a no-op
    pub(crate) fn remove_secondary(&mut self, scheduler: &mut Scheduler, node_id: NodeId) {
        let index = self.secondary.iter().position(|n| *n == node_id);
        if let Some(index) = index {
            scheduler.node_dec_ref(node_id);
            self.secondary.remove(index);
        }
    }
    pub(crate) fn clear_secondary(&mut self, scheduler: &mut Scheduler) {
        for secondary in self.secondary.drain(..) {
            scheduler.node_dec_ref(secondary);
        }
    }
    /// Remove the last secondary node from the list of secondaries
    pub(crate) fn pop_secondary(&mut self, scheduler: &mut Scheduler) {
        if let Some(node_id) = self.secondary.pop() {
            scheduler.node_dec_ref(node_id);
        }
    }
    pub(crate) fn clear(&mut self, scheduler: &mut Scheduler) {
        if let Some(old_attached) = self.attached.take() {
            scheduler.node_dec_ref(old_attached);
        }
        self.clear_secondary(scheduler);
    }
    pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
        let mut result = Vec::new();
        if let Some(p) = self.attached {
            result.push(p)
        }
        result.extend(self.secondary.iter().copied());
        result
    }
    pub(crate) fn get_attached(&self) -> &Option<NodeId> {
        &self.attached
    }
    pub(crate) fn get_secondary(&self) -> &Vec<NodeId> {
        &self.secondary
    }
    /// If the node is in use as the attached location, demote it into
    /// the list of secondary locations.  This is used when a node goes offline,
    /// and we want to use a different node for attachment, but not permanently
    /// forget the location on the offline node.
    ///
    /// Returns true if a change was made
    pub(crate) fn demote_attached(&mut self, node_id: NodeId) -> bool {
        if self.attached == Some(node_id) {
            // TODO: when scheduler starts tracking attached + secondary counts separately, we will
            // need to call into it here.
            self.attached = None;
            self.secondary.push(node_id);
            true
        } else {
            false
        }
    }
 }
 impl Drop for IntentState {
    fn drop(&mut self) {
        // Must clear before dropping, to avoid leaving stale refcounts in the Scheduler
        debug_assert!(self.attached.is_none() && self.secondary.is_empty());
    }
 }
 #[derive(Default, Clone, Serialize)]
 pub(crate) struct ObservedState {
    pub(crate) locations: HashMap<NodeId, ObservedStateLocation>,
 }
 /// Our latest knowledge of how this tenant is configured in the outside world.
 ///
 /// Meaning:
 ///     * No instance of this type exists for a node: we are certain that we have nothing configured on that
 ///       node for this shard.
 ///     * Instance exists with conf==None: we *might* have some state on that node, but we don't know
 ///       what it is (e.g. we failed partway through configuring it)
 ///     * Instance exists with conf==Some: this tells us what we last successfully configured on this node,
 ///       and that configuration will still be present unless something external interfered.
 #[derive(Clone, Serialize)]
 pub(crate) struct ObservedStateLocation {
    /// If None, it means we do not know the status of this shard's location on this node, but
    /// we know that we might have some state on this node.
    pub(crate) conf: Option<LocationConfig>,
 }
 pub(crate) struct ReconcilerWaiter {
    // For observability purposes, remember the ID of the shard we're
    // waiting for.
    pub(crate) tenant_shard_id: TenantShardId,
    seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
    error_seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
    error: std::sync::Arc<std::sync::Mutex<String>>,
    seq: Sequence,
 }
 #[derive(thiserror::Error, Debug)]
 pub enum ReconcileWaitError {
    #[error("Timeout waiting for shard {0}")]
    Timeout(TenantShardId),
    #[error("shutting down")]
    Shutdown,
    #[error("Reconcile error on shard {0}: {1}")]
    Failed(TenantShardId, String),
 }
 impl ReconcilerWaiter {
    pub(crate) async fn wait_timeout(&self, timeout: Duration) -> Result<(), ReconcileWaitError> {
        tokio::select! {
            result = self.seq_wait.wait_for_timeout(self.seq, timeout)=> {
                result.map_err(|e| match e {
                    SeqWaitError::Timeout => ReconcileWaitError::Timeout(self.tenant_shard_id),
                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown
                })?;
            },
            result = self.error_seq_wait.wait_for(self.seq) => {
                result.map_err(|e| match e {
                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown,
                    SeqWaitError::Timeout => unreachable!()
                })?;
                return Err(ReconcileWaitError::Failed(self.tenant_shard_id, self.error.lock().unwrap().clone()))
            }
        }
        Ok(())
    }
 }
 /// Having spawned a reconciler task, the tenant shard's state will carry enough
 /// information to optionally cancel & await it later.
 pub(crate) struct ReconcilerHandle {
    sequence: Sequence,
    handle: JoinHandle<()>,
    cancel: CancellationToken,
 }
 /// When a reconcile task completes, it sends this result object
 /// to be applied to the primary TenantState.
 pub(crate) struct ReconcileResult {
    pub(crate) sequence: Sequence,
    /// On errors, `observed` should be treated as an incompleted description
    /// of state (i.e. any nodes present in the result should override nodes
    /// present in the parent tenant state, but any unmentioned nodes should
    /// not be removed from parent tenant state)
    pub(crate) result: Result<(), ReconcileError>,
    pub(crate) tenant_shard_id: TenantShardId,
    pub(crate) generation: Option<Generation>,
    pub(crate) observed: ObservedState,
    /// Set [`TenantState::pending_compute_notification`] from this flag
    pub(crate) pending_compute_notification: bool,
 }
 impl ObservedState {
    pub(crate) fn new() -> Self {
        Self {
            locations: HashMap::new(),
        }
    }
 }
 impl TenantState {
    pub(crate) fn new(
        tenant_shard_id: TenantShardId,
        shard: ShardIdentity,
        policy: PlacementPolicy,
    ) -> Self {
        Self {
            tenant_shard_id,
            policy,
            intent: IntentState::default(),
            generation: Some(Generation::new(0)),
            shard,
            observed: ObservedState::default(),
            config: TenantConfig::default(),
            reconciler: None,
            splitting: SplitState::Idle,
            sequence: Sequence(1),
            waiter: Arc::new(SeqWait::new(Sequence(0))),
            error_waiter: Arc::new(SeqWait::new(Sequence(0))),
            last_error: Arc::default(),
            pending_compute_notification: false,
        }
    }
    /// For use on startup when learning state from pageservers: generate my [`IntentState`] from my
    /// [`ObservedState`], even if it violates my [`PlacementPolicy`].  Call [`Self::schedule`] next,
    /// to get an intent state that complies with placement policy.  The overall goal is to do scheduling
    /// in a way that makes use of any configured locations that already exist in the outside world.
    pub(crate) fn intent_from_observed(&mut self, scheduler: &mut Scheduler) {
        // Choose an attached location by filtering observed locations, and then sorting to get the highest
        // generation
        let mut attached_locs = self
            .observed
            .locations
            .iter()
            .filter_map(|(node_id, l)| {
                if let Some(conf) = &l.conf {
                    if conf.mode == LocationConfigMode::AttachedMulti
                        || conf.mode == LocationConfigMode::AttachedSingle
                        || conf.mode == LocationConfigMode::AttachedStale
                    {
                        Some((node_id, conf.generation))
                    } else {
                        None
                    }
                } else {
                    None
                }
            })
            .collect::<Vec<_>>();
        attached_locs.sort_by_key(|i| i.1);
        if let Some((node_id, _gen)) = attached_locs.into_iter().last() {
            self.intent.set_attached(scheduler, Some(*node_id));
        }
        // All remaining observed locations generate secondary intents.  This includes None
        // observations, as these may well have some local content on disk that is usable (this
        // is an edge case that might occur if we restarted during a migration or other change)
        //
        // We may leave intent.attached empty if we didn't find any attached locations: [`Self::schedule`]
        // will take care of promoting one of these secondaries to be attached.
        self.observed.locations.keys().for_each(|node_id| {
            if Some(*node_id) != self.intent.attached {
                self.intent.push_secondary(scheduler, *node_id);
            }
        });
    }
    /// Part of [`Self::schedule`] that is used to choose exactly one node to act as the
    /// attached pageserver for a shard.
    ///
    /// Returns whether we modified it, and the NodeId selected.
    fn schedule_attached(
        &mut self,
        scheduler: &mut Scheduler,
    ) -> Result<(bool, NodeId), ScheduleError> {
        // No work to do if we already have an attached tenant
        if let Some(node_id) = self.intent.attached {
            return Ok((false, node_id));
        }
        if let Some(promote_secondary) = scheduler.node_preferred(&self.intent.secondary) {
            // Promote a secondary
            tracing::debug!("Promoted secondary {} to attached", promote_secondary);
            self.intent.promote_attached(scheduler, promote_secondary);
            Ok((true, promote_secondary))
        } else {
            // Pick a fresh node: either we had no secondaries or none were schedulable
            let node_id = scheduler.schedule_shard(&self.intent.secondary)?;
            tracing::debug!("Selected {} as attached", node_id);
            self.intent.set_attached(scheduler, Some(node_id));
            Ok((true, node_id))
        }
    }
    pub(crate) fn schedule(&mut self, scheduler: &mut Scheduler) -> Result<(), ScheduleError> {
        // TODO: before scheduling new nodes, check if any existing content in
        // self.intent refers to pageservers that are offline, and pick other
        // pageservers if so.
        // TODO: respect the splitting bit on tenants: if they are currently splitting then we may not
        // change their attach location.
        // Build the set of pageservers already in use by this tenant, to avoid scheduling
        // more work on the same pageservers we're already using.
        let mut modified = false;
        // Add/remove nodes to fulfil policy
        use PlacementPolicy::*;
        match self.policy {
            Attached(secondary_count) => {
                let retain_secondaries = if self.intent.attached.is_none()
                    && scheduler.node_preferred(&self.intent.secondary).is_some()
                {
                    // If we have no attached, and one of the secondaries is elegible to be promoted, retain
                    // one more secondary than we usually would, as one of them will become attached futher down this function.
                    secondary_count + 1
                } else {
                    secondary_count
                };
                while self.intent.secondary.len() > retain_secondaries {
                    // We have no particular preference for one secondary location over another: just
                    // arbitrarily drop from the end
                    self.intent.pop_secondary(scheduler);
                    modified = true;
                }
                // Should have exactly one attached, and N secondaries
                let (modified_attached, attached_node_id) = self.schedule_attached(scheduler)?;
                modified |= modified_attached;
                let mut used_pageservers = vec![attached_node_id];
                while self.intent.secondary.len() < secondary_count {
                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
                    self.intent.push_secondary(scheduler, node_id);
                    used_pageservers.push(node_id);
                    modified = true;
                }
            }
            Secondary => {
                if let Some(node_id) = self.intent.get_attached() {
                    // Populate secondary by demoting the attached node
                    self.intent.demote_attached(*node_id);
                    modified = true;
                } else if self.intent.secondary.is_empty() {
                    // Populate secondary by scheduling a fresh node
                    let node_id = scheduler.schedule_shard(&[])?;
                    self.intent.push_secondary(scheduler, node_id);
                    modified = true;
                }
                while self.intent.secondary.len() > 1 {
                    // We have no particular preference for one secondary location over another: just
                    // arbitrarily drop from the end
                    self.intent.pop_secondary(scheduler);
                    modified = true;
                }
            }
            Detached => {
                // Never add locations in this mode
                if self.intent.get_attached().is_some() || !self.intent.get_secondary().is_empty() {
                    self.intent.clear(scheduler);
                    modified = true;
                }
            }
        }
        if modified {
            self.sequence.0 += 1;
        }
        Ok(())
    }
    /// Query whether the tenant's observed state for attached node matches its intent state, and if so,
    /// yield the node ID.  This is appropriate for emitting compute hook notifications: we are checking that
    /// the node in question is not only where we intend to attach, but that the tenant is indeed already attached there.
    ///
    /// Reconciliation may still be needed for other aspects of state such as secondaries (see [`Self::dirty`]): this
    /// funciton should not be used to decide whether to reconcile.
    pub(crate) fn stably_attached(&self) -> Option<NodeId> {
        if let Some(attach_intent) = self.intent.attached {
            match self.observed.locations.get(&attach_intent) {
                Some(loc) => match &loc.conf {
                    Some(conf) => match conf.mode {
                        LocationConfigMode::AttachedMulti
                        | LocationConfigMode::AttachedSingle
                        | LocationConfigMode::AttachedStale => {
                            // Our intent and observed state agree that this node is in an attached state.
                            Some(attach_intent)
                        }
                        // Our observed config is not an attached state
                        _ => None,
                    },
                    // Our observed state is None, i.e. in flux
                    None => None,
                },
                // We have no observed state for this node
                None => None,
            }
        } else {
            // Our intent is not to attach
            None
        }
    }
    fn dirty(&self, nodes: &Arc<HashMap<NodeId, Node>>) -> bool {
        let mut dirty_nodes = HashSet::new();
        if let Some(node_id) = self.intent.attached {
            // Maybe panic: it is a severe bug if we try to attach while generation is null.
            let generation = self
                .generation
                .expect("Attempted to enter attached state without a generation");
            let wanted_conf = attached_location_conf(
                generation,
                &self.shard,
                &self.config,
                !self.intent.secondary.is_empty(),
            );
            match self.observed.locations.get(&node_id) {
                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
                Some(_) | None => {
                    dirty_nodes.insert(node_id);
                }
            }
        }
        for node_id in &self.intent.secondary {
            let wanted_conf = secondary_location_conf(&self.shard, &self.config);
            match self.observed.locations.get(node_id) {
                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
                Some(_) | None => {
                    dirty_nodes.insert(*node_id);
                }
            }
        }
        for node_id in self.observed.locations.keys() {
            if self.intent.attached != Some(*node_id) && !self.intent.secondary.contains(node_id) {
                // We have observed state that isn't part of our intent: need to clean it up.
                dirty_nodes.insert(*node_id);
            }
        }
        dirty_nodes.retain(|node_id| {
            nodes
                .get(node_id)
                .map(|n| n.is_available())
                .unwrap_or(false)
        });
        !dirty_nodes.is_empty()
    }
    #[allow(clippy::too_many_arguments)]
    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
    pub(crate) fn maybe_reconcile(
        &mut self,
        result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
        pageservers: &Arc<HashMap<NodeId, Node>>,
        compute_hook: &Arc<ComputeHook>,
        service_config: &service::Config,
        persistence: &Arc<Persistence>,
        gate: &Gate,
        cancel: &CancellationToken,
    ) -> Option<ReconcilerWaiter> {
        // If there are any ambiguous observed states, and the nodes they refer to are available,
        // we should reconcile to clean them up.
        let mut dirty_observed = false;
        for (node_id, observed_loc) in &self.observed.locations {
            let node = pageservers
                .get(node_id)
                .expect("Nodes may not be removed while referenced");
            if observed_loc.conf.is_none() && node.is_available() {
                dirty_observed = true;
                break;
            }
        }
        let active_nodes_dirty = self.dirty(pageservers);
        // Even if there is no pageserver work to be done, if we have a pending notification to computes,
        // wake up a reconciler to send it.
        let do_reconcile =
            active_nodes_dirty || dirty_observed || self.pending_compute_notification;
        if !do_reconcile {
            tracing::info!("Not dirty, no reconciliation needed.");
            return None;
        }
        // If we are currently splitting, then never start a reconciler task: the splitting logic
        // requires that shards are not interfered with while it runs. Do this check here rather than
        // up top, so that we only log this message if we would otherwise have done a reconciliation.
        if !matches!(self.splitting, SplitState::Idle) {
            tracing::info!("Refusing to reconcile, splitting in progress");
            return None;
        }
        // Reconcile already in flight for the current sequence?
        if let Some(handle) = &self.reconciler {
            if handle.sequence == self.sequence {
                tracing::info!(
                    "Reconciliation already in progress for sequence {:?}",
                    self.sequence,
                );
                return Some(ReconcilerWaiter {
                    tenant_shard_id: self.tenant_shard_id,
                    seq_wait: self.waiter.clone(),
                    error_seq_wait: self.error_waiter.clone(),
                    error: self.last_error.clone(),
                    seq: self.sequence,
                });
            }
        }
        // Build list of nodes from which the reconciler should detach
        let mut detach = Vec::new();
        for node_id in self.observed.locations.keys() {
            if self.intent.get_attached() != &Some(*node_id)
                && !self.intent.secondary.contains(node_id)
            {
                detach.push(
                    pageservers
                        .get(node_id)
                        .expect("Intent references non-existent pageserver")
                        .clone(),
                )
            }
        }
        // Reconcile in flight for a stale sequence?  Our sequence's task will wait for it before
        // doing our sequence's work.
        let old_handle = self.reconciler.take();
        let Ok(gate_guard) = gate.enter() else {
            // Shutting down, don't start a reconciler
            return None;
        };
        // Advance the sequence before spawning a reconciler, so that sequence waiters
        // can distinguish between before+after the reconcile completes.
        self.sequence = self.sequence.next();
        let reconciler_cancel = cancel.child_token();
        let reconciler_intent = TargetState::from_intent(pageservers, &self.intent);
        let mut reconciler = Reconciler {
            tenant_shard_id: self.tenant_shard_id,
            shard: self.shard,
            generation: self.generation,
            intent: reconciler_intent,
            detach,
            config: self.config.clone(),
            observed: self.observed.clone(),
            compute_hook: compute_hook.clone(),
            service_config: service_config.clone(),
            _gate_guard: gate_guard,
            cancel: reconciler_cancel.clone(),
            persistence: persistence.clone(),
            compute_notify_failure: false,
        };
        let reconcile_seq = self.sequence;
        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
        let must_notify = self.pending_compute_notification;
        let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
                                                        tenant_id=%reconciler.tenant_shard_id.tenant_id,
                                                        shard_id=%reconciler.tenant_shard_id.shard_slug());
        metrics::METRICS_REGISTRY
            .metrics_group
            .storage_controller_reconcile_spawn
            .inc();
        let result_tx = result_tx.clone();
        let join_handle = tokio::task::spawn(
            async move {
                // Wait for any previous reconcile task to complete before we start
                if let Some(old_handle) = old_handle {
                    old_handle.cancel.cancel();
                    if let Err(e) = old_handle.handle.await {
                        // We can't do much with this other than log it: the task is done, so
                        // we may proceed with our work.
                        tracing::error!("Unexpected join error waiting for reconcile task: {e}");
                    }
                }
                // Early check for cancellation before doing any work
                // TODO: wrap all remote API operations in cancellation check
                // as well.
                if reconciler.cancel.is_cancelled() {
                    metrics::METRICS_REGISTRY
                        .metrics_group
                        .storage_controller_reconcile_complete
                        .inc(ReconcileCompleteLabelGroup {
                            status: ReconcileOutcome::Cancel,
                        });
                    return;
                }
                // Attempt to make observed state match intent state
                let result = reconciler.reconcile().await;
                // If we know we had a pending compute notification from some previous action, send a notification irrespective
                // of whether the above reconcile() did any work
                if result.is_ok() && must_notify {
                    // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
                    reconciler.compute_notify().await.ok();
                }
                // Update result counter
                let outcome_label = match &result {
                    Ok(_) => ReconcileOutcome::Success,
                    Err(ReconcileError::Cancel) => ReconcileOutcome::Cancel,
                    Err(_) => ReconcileOutcome::Error,
                };
                metrics::METRICS_REGISTRY
                    .metrics_group
                    .storage_controller_reconcile_complete
                    .inc(ReconcileCompleteLabelGroup {
                        status: outcome_label,
                    });
                result_tx
                    .send(ReconcileResult {
                        sequence: reconcile_seq,
                        result,
                        tenant_shard_id: reconciler.tenant_shard_id,
                        generation: reconciler.generation,
                        observed: reconciler.observed,
                        pending_compute_notification: reconciler.compute_notify_failure,
                    })
                    .ok();
            }
            .instrument(reconciler_span),
        );
        self.reconciler = Some(ReconcilerHandle {
            sequence: self.sequence,
            handle: join_handle,
            cancel: reconciler_cancel,
        });
        Some(ReconcilerWaiter {
            tenant_shard_id: self.tenant_shard_id,
            seq_wait: self.waiter.clone(),
            error_seq_wait: self.error_waiter.clone(),
            error: self.last_error.clone(),
            seq: self.sequence,
        })
    }
    /// Called when a ReconcileResult has been emitted and the service is updating
    /// our state: if the result is from a sequence >= my ReconcileHandle, then drop
    /// the handle to indicate there is no longer a reconciliation in progress.
    pub(crate) fn reconcile_complete(&mut self, sequence: Sequence) {
        if let Some(reconcile_handle) = &self.reconciler {
            if reconcile_handle.sequence <= sequence {
                self.reconciler = None;
            }
        }
    }
    // If we had any state at all referring to this node ID, drop it.  Does not
    // attempt to reschedule.
    pub(crate) fn deref_node(&mut self, node_id: NodeId) {
        if self.intent.attached == Some(node_id) {
            self.intent.attached = None;
        }
        self.intent.secondary.retain(|n| n != &node_id);
        self.observed.locations.remove(&node_id);
        debug_assert!(!self.intent.all_pageservers().contains(&node_id));
    }
    pub(crate) fn to_persistent(&self) -> TenantShardPersistence {
        TenantShardPersistence {
            tenant_id: self.tenant_shard_id.tenant_id.to_string(),
            shard_number: self.tenant_shard_id.shard_number.0 as i32,
            shard_count: self.tenant_shard_id.shard_count.literal() as i32,
            shard_stripe_size: self.shard.stripe_size.0 as i32,
            generation: self.generation.map(|g| g.into().unwrap_or(0) as i32),
            generation_pageserver: self.intent.get_attached().map(|n| n.0 as i64),
            placement_policy: serde_json::to_string(&self.policy).unwrap(),
            config: serde_json::to_string(&self.config).unwrap(),
            splitting: SplitState::default(),
        }
    }
 }
 #[cfg(test)]
 pub(crate) mod tests {
    use pageserver_api::{
        controller_api::NodeAvailability,
        shard::{ShardCount, ShardNumber},
    };
    use utils::id::TenantId;
    use crate::scheduler::test_utils::make_test_nodes;
    use super::*;
    fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantState {
        let tenant_id = TenantId::generate();
        let shard_number = ShardNumber(0);
        let shard_count = ShardCount::new(1);
        let tenant_shard_id = TenantShardId {
            tenant_id,
            shard_number,
            shard_count,
        };
        TenantState::new(
            tenant_shard_id,
            ShardIdentity::new(
                shard_number,
                shard_count,
                pageserver_api::shard::ShardStripeSize(32768),
            )
            .unwrap(),
            policy,
        )
    }
    /// Test the scheduling behaviors used when a tenant configured for HA is subject
    /// to nodes being marked offline.
    #[test]
    fn tenant_ha_scheduling() -> anyhow::Result<()> {
        // Start with three nodes.  Our tenant will only use two.  The third one is
        // expected to remain unused.
        let mut nodes = make_test_nodes(3);
        let mut scheduler = Scheduler::new(nodes.values());
        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
        tenant_state
            .schedule(&mut scheduler)
            .expect("we have enough nodes, scheduling should work");
        // Expect to initially be schedule on to different nodes
        assert_eq!(tenant_state.intent.secondary.len(), 1);
        assert!(tenant_state.intent.attached.is_some());
        let attached_node_id = tenant_state.intent.attached.unwrap();
        let secondary_node_id = *tenant_state.intent.secondary.iter().last().unwrap();
        assert_ne!(attached_node_id, secondary_node_id);
        // Notifying the attached node is offline should demote it to a secondary
        let changed = tenant_state.intent.demote_attached(attached_node_id);
        assert!(changed);
        assert!(tenant_state.intent.attached.is_none());
        assert_eq!(tenant_state.intent.secondary.len(), 2);
        // Update the scheduler state to indicate the node is offline
        nodes
            .get_mut(&attached_node_id)
            .unwrap()
            .set_availability(NodeAvailability::Offline);
        scheduler.node_upsert(nodes.get(&attached_node_id).unwrap());
        // Scheduling the node should promote the still-available secondary node to attached
        tenant_state
            .schedule(&mut scheduler)
            .expect("active nodes are available");
        assert_eq!(tenant_state.intent.attached.unwrap(), secondary_node_id);
        // The original attached node should have been retained as a secondary
        assert_eq!(
            *tenant_state.intent.secondary.iter().last().unwrap(),
            attached_node_id
        );
        tenant_state.intent.clear(&mut scheduler);
        Ok(())
    }
    #[test]
    fn intent_from_observed() -> anyhow::Result<()> {
        let nodes = make_test_nodes(3);
        let mut scheduler = Scheduler::new(nodes.values());
        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
        tenant_state.observed.locations.insert(
            NodeId(3),
            ObservedStateLocation {
                conf: Some(LocationConfig {
                    mode: LocationConfigMode::AttachedMulti,
                    generation: Some(2),
                    secondary_conf: None,
                    shard_number: tenant_state.shard.number.0,
                    shard_count: tenant_state.shard.count.literal(),
                    shard_stripe_size: tenant_state.shard.stripe_size.0,
                    tenant_conf: TenantConfig::default(),
                }),
            },
        );
        tenant_state.observed.locations.insert(
            NodeId(2),
            ObservedStateLocation {
                conf: Some(LocationConfig {
                    mode: LocationConfigMode::AttachedStale,
                    generation: Some(1),
                    secondary_conf: None,
                    shard_number: tenant_state.shard.number.0,
                    shard_count: tenant_state.shard.count.literal(),
                    shard_stripe_size: tenant_state.shard.stripe_size.0,
                    tenant_conf: TenantConfig::default(),
                }),
            },
        );
        tenant_state.intent_from_observed(&mut scheduler);
        // The highest generationed attached location gets used as attached
        assert_eq!(tenant_state.intent.attached, Some(NodeId(3)));
        // Other locations get used as secondary
        assert_eq!(tenant_state.intent.secondary, vec![NodeId(2)]);
        scheduler.consistency_check(nodes.values(), [&tenant_state].into_iter())?;
        tenant_state.intent.clear(&mut scheduler);
        Ok(())
    }
 }
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -36,11 +36,11 @@ use utils::pid_file::{self, PidFileRead};
 // it's waiting. If the process hasn't started/stopped after 5 seconds,
 // it prints a notice that it's taking long, but keeps waiting.
 //
-const STOP_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
+const RETRY_UNTIL_SECS: u64 = 10;
-const STOP_RETRIES: u128 = STOP_RETRY_TIMEOUT.as_millis() / RETRY_INTERVAL.as_millis();
+const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
-const RETRY_INTERVAL: Duration = Duration::from_millis(100);
+const RETRY_INTERVAL_MILLIS: u64 = 100;
-const DOT_EVERY_RETRIES: u128 = 10;
+const DOT_EVERY_RETRIES: u64 = 10;
-const NOTICE_AFTER_RETRIES: u128 = 50;
+const NOTICE_AFTER_RETRIES: u64 = 50;
 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
@@ -52,7 +52,6 @@ pub enum InitialPidFile {
 }
 /// Start a background child process using the parameters given.
 #[allow(clippy::too_many_arguments)]
 pub async fn start_process<F, Fut, AI, A, EI>(
    process_name: &str,
    datadir: &Path,
@@ -60,7 +59,6 @@ pub async fn start_process<F, Fut, AI, A, EI>(
    args: AI,
    envs: EI,
    initial_pid_file: InitialPidFile,
    retry_timeout: &Duration,
    process_status_check: F,
 ) -> anyhow::Result<()>
 where
@@ -71,10 +69,6 @@ where
    // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
    EI: IntoIterator<Item = (String, String)>,
 {
    let retries: u128 = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
    if !datadir.metadata().context("stat datadir")?.is_dir() {
        anyhow::bail!("`datadir` must be a directory when calling this function: {datadir:?}");
    }
    let log_path = datadir.join(format!("{process_name}.log"));
    let process_log_file = fs::OpenOptions::new()
        .create(true)
@@ -91,17 +85,8 @@ where
    let background_command = command
        .stdout(process_log_file)
        .stderr(same_file_for_stderr)
-        .args(args)
+        .args(args);
-        // spawn all child processes in their datadir, useful for all kinds of things,
+    let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
        // not least cleaning up child processes e.g. after an unclean exit from the test suite:
        // ```
        // lsof  -d cwd -a +D  Users/cs/src/neon/test_output
        // ```
        .current_dir(datadir);
    let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
        fill_rust_env_vars(background_command),
    ));
    filled_cmd.envs(envs);
    let pid_file_to_check = match &initial_pid_file {
@@ -133,7 +118,7 @@ where
        .unwrap();
    });
-    for retries in 0..retries {
+    for retries in 0..RETRIES {
        match process_started(pid, pid_file_to_check, &process_status_check).await {
            Ok(true) => {
                println!("\n{process_name} started and passed status check, pid: {pid}");
@@ -151,7 +136,7 @@ where
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                thread::sleep(RETRY_INTERVAL);
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("error starting process {process_name:?}: {e:#}");
@@ -160,10 +145,9 @@ where
        }
    }
    println!();
-    anyhow::bail!(format!(
+    anyhow::bail!(
-        "{} did not start+pass status checks within {:?} seconds",
+        "{process_name} did not start+pass status checks within {RETRY_UNTIL_SECS} seconds"
-        process_name, retry_timeout
+    );
    ));
 }
 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -219,7 +203,7 @@ pub fn stop_process(
 }
 pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
-    for retries in 0..STOP_RETRIES {
+    for retries in 0..RETRIES {
        match process_has_stopped(pid) {
            Ok(true) => {
                println!("\n{process_name} stopped");
@@ -235,7 +219,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                thread::sleep(RETRY_INTERVAL);
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -244,10 +228,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
        }
    }
    println!();
-    anyhow::bail!(format!(
+    anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
        "{} with pid {} did not stop in {:?} seconds",
        process_name, pid, STOP_RETRY_TIMEOUT
    ));
 }
 fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
@@ -287,15 +268,6 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
    cmd
 }
 fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
    for (var, val) in std::env::vars() {
        if var.starts_with("NEON_") {
            cmd = cmd.env(var, val);
        }
    }
    cmd
 }
 /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
 /// 1. Claims a pidfile with a fcntl lock on it and
 /// 2. Sets up the pidfile's file descriptor so that it (and the lock)
@@ -379,7 +351,7 @@ where
    }
 }
-pub(crate) fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
+fn process_has_stopped(pid: Pid) -> anyhow::Result<bool> {
    match kill(pid, None) {
        // Process exists, keep waiting
        Ok(_) => Ok(false),
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -9,25 +9,22 @@ use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::local_env::{
+use control_plane::local_env::{InitForceMode, LocalEnv};
-    InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
+use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
    SafekeeperConf,
 };
 use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
-use control_plane::storage_controller::{
+use control_plane::storage_controller::StorageController;
    NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController,
 };
 use control_plane::{broker, local_env};
-use pageserver_api::config::{
+use pageserver_api::controller_api::{
    NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
 };
 use pageserver_api::models::{
    ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
 };
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
 use pageserver_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
 };
 use pageserver_api::controller_api::{
    NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest,
 };
 use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
 use safekeeper_api::{
@@ -38,7 +35,6 @@ use std::collections::{BTreeSet, HashMap};
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
 use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
 use url::Host;
 use utils::{
@@ -54,10 +50,48 @@ const DEFAULT_PAGESERVER_ID: NodeId = NodeId(1);
 const DEFAULT_BRANCH_NAME: &str = "main";
 project_git_version!(GIT_VERSION);
-const DEFAULT_PG_VERSION: &str = "16";
+const DEFAULT_PG_VERSION: &str = "15";
 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";
 fn default_conf(num_pageservers: u16) -> String {
    let mut template = format!(
        r#"
 # Default built-in configuration, defined in main.rs
 control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
 [broker]
 listen_addr = '{DEFAULT_BROKER_ADDR}'
 [[safekeepers]]
 id = {DEFAULT_SAFEKEEPER_ID}
 pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
 http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
 "#,
    );
    for i in 0..num_pageservers {
        let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
        let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
        let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
        template += &format!(
            r#"
 [[pageservers]]
 id = {pageserver_id}
 listen_pg_addr = '127.0.0.1:{pg_port}'
 listen_http_addr = '127.0.0.1:{http_port}'
 pg_auth_type = '{trust_auth}'
 http_auth_type = '{trust_auth}'
 "#,
            trust_auth = AuthType::Trust,
        )
    }
    template
 }
 ///
 /// Timelines tree element used as a value in the HashMap.
 ///
@@ -90,8 +124,7 @@ fn main() -> Result<()> {
        handle_init(sub_args).map(Some)
    } else {
        // all other commands need an existing config
-        let mut env =
+        let mut env = LocalEnv::load_config().context("Error loading config")?;
            LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
        let original_env = env.clone();
        let rt = tokio::runtime::Builder::new_current_thread()
@@ -102,7 +135,7 @@ fn main() -> Result<()> {
        let subcommand_result = match sub_name {
            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))),
+            "start" => rt.block_on(handle_start_all(sub_args, &env)),
            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
@@ -121,7 +154,7 @@ fn main() -> Result<()> {
    };
    match subcommand_result {
-        Ok(Some(updated_env)) => updated_env.persist_config()?,
+        Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
        Ok(None) => (),
        Err(e) => {
            eprintln!("command failed: {e:?}");
@@ -310,66 +343,48 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
 }
 fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
-    let num_pageservers = init_match.get_one::<u16>("num-pageservers");
+    let num_pageservers = init_match
-
+        .get_one::<u16>("num-pageservers")
-    let force = init_match.get_one("force").expect("we set a default value");
+        .expect("num-pageservers arg has a default");
-
+    // Create config file
-    // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
+    let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
    let init_conf: NeonLocalInitConf = if let Some(config_path) =
        init_match.get_one::<PathBuf>("config")
    {
        // User (likely the Python test suite) provided a description of the environment.
        if num_pageservers.is_some() {
            bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
        }
        // load and parse the file
-        let contents = std::fs::read_to_string(config_path).with_context(|| {
+        std::fs::read_to_string(config_path).with_context(|| {
            format!(
                "Could not read configuration file '{}'",
                config_path.display()
            )
-        })?;
+        })?
        toml_edit::de::from_str(&contents)?
    } else {
-        // User (likely interactive) did not provide a description of the environment, give them the default
+        // Built-in default config
-        NeonLocalInitConf {
+        default_conf(*num_pageservers)
            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
            broker: NeonBroker {
                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
            },
            safekeepers: vec![SafekeeperConf {
                id: DEFAULT_SAFEKEEPER_ID,
                pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
                http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
                ..Default::default()
            }],
            pageservers: (0..num_pageservers.copied().unwrap_or(1))
                .map(|i| {
                    let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
                    let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
                    let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
                    NeonLocalInitPageserverConf {
                        id: pageserver_id,
                        listen_pg_addr: format!("127.0.0.1:{pg_port}"),
                        listen_http_addr: format!("127.0.0.1:{http_port}"),
                        pg_auth_type: AuthType::Trust,
                        http_auth_type: AuthType::Trust,
                        other: Default::default(),
                    }
                })
                .collect(),
            pg_distrib_dir: None,
            neon_distrib_dir: None,
            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
            storage_controller: None,
            control_plane_compute_hook_api: None,
        }
    };
-    LocalEnv::init(init_conf, force)
+    let pg_version = init_match
-        .context("materialize initial neon_local environment on disk")?;
+        .get_one::<u32>("pg-version")
-    Ok(LocalEnv::load_config(&local_env::base_path())
+        .copied()
-        .expect("freshly written config should be loadable"))
+        .context("Failed to parse postgres version from the argument string")?;
    let mut env =
        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
    let force = init_match.get_one("force").expect("we set a default value");
    env.init(pg_version, force)
        .context("Failed to initialize neon repository")?;
    // Create remote storage location for default LocalFs remote storage
    std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
    // Initialize pageserver, create initial tenant and timeline.
    for ps_conf in &env.pageservers {
        PageServerNode::from_env(&env, ps_conf)
            .initialize(&pageserver_config_overrides(init_match))
            .unwrap_or_else(|e| {
                eprintln!("pageserver init failed: {e:?}");
                exit(1);
            });
    }
    Ok(env)
 }
 /// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
@@ -384,6 +399,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
    PageServerNode::from_env(env, ps_conf)
 }
 fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
    init_match
        .get_many::<String>("pageserver-config-override")
        .into_iter()
        .flatten()
        .map(String::as_str)
        .collect()
 }
 async fn handle_tenant(
    tenant_match: &ArgMatches,
    env: &mut local_env::LocalEnv,
@@ -395,54 +419,6 @@ async fn handle_tenant(
                println!("{} {:?}", t.id, t.state);
            }
        }
        Some(("import", import_match)) => {
            let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
            let storage_controller = StorageController::from_env(env);
            let create_response = storage_controller.tenant_import(tenant_id).await?;
            let shard_zero = create_response
                .shards
                .first()
                .expect("Import response omitted shards");
            let attached_pageserver_id = shard_zero.node_id;
            let pageserver =
                PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);
            println!(
                "Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}"
            );
            let timelines = pageserver
                .http_client
                .list_timelines(shard_zero.shard_id)
                .await?;
            // Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names
            let main_timeline = timelines
                .iter()
                .find(|t| t.ancestor_timeline_id.is_none())
                .expect("No timelines found")
                .timeline_id;
            let mut branch_i = 0;
            for timeline in timelines.iter() {
                let branch_name = if timeline.timeline_id == main_timeline {
                    "main".to_string()
                } else {
                    branch_i += 1;
                    format!("branch_{branch_i}")
                };
                println!(
                    "Importing timeline {tenant_id}/{} as branch {branch_name}",
                    timeline.timeline_id
                );
                env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
            }
        }
        Some(("create", create_match)) => {
            let tenant_conf: HashMap<_, _> = create_match
                .get_many::<String>("config")
@@ -602,9 +578,13 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
        Some(("import", import_match)) => {
            let tenant_id = get_tenant_id(import_match, env)?;
            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
-            let branch_name = import_match
+            let name = import_match
-                .get_one::<String>("branch-name")
+                .get_one::<String>("node-name")
-                .ok_or_else(|| anyhow!("No branch name provided"))?;
+                .ok_or_else(|| anyhow!("No node name provided"))?;
            let update_catalog = import_match
                .get_one::<bool>("update-catalog")
                .cloned()
                .unwrap_or_default();
            // Parse base inputs
            let base_tarfile = import_match
@@ -631,11 +611,24 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .copied()
                .context("Failed to parse postgres version from the argument string")?;
            let mut cplane = ComputeControlPlane::load(env.clone())?;
            println!("Importing timeline into pageserver ...");
            pageserver
                .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
                .await?;
-            env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?;
+            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
            println!("Creating endpoint for imported timeline ...");
            cplane.new_endpoint(
                name,
                tenant_id,
                timeline_id,
                None,
                None,
                pg_version,
                ComputeMode::Primary,
                !update_catalog,
            )?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
@@ -798,8 +791,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .copied()
                .unwrap_or(false);
            let allow_multiple = sub_args.get_flag("allow-multiple");
            let mode = match (lsn, hot_standby) {
                (Some(lsn), false) => ComputeMode::Static(lsn),
                (None, true) => ComputeMode::Replica,
@@ -817,9 +808,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                _ => {}
            }
-            if !allow_multiple {
+            cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
                cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
            }
            cplane.new_endpoint(
                &endpoint_id,
@@ -848,15 +837,20 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
            let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");
-            let allow_multiple = sub_args.get_flag("allow-multiple");
+            // If --safekeepers argument is given, use only the listed safekeeper nodes.
-
+            let safekeepers =
-            // If --safekeepers argument is given, use only the listed
+                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
-            // safekeeper nodes; otherwise all from the env.
+                    let mut safekeepers: Vec<NodeId> = Vec::new();
-            let safekeepers = if let Some(safekeepers) = parse_safekeepers(sub_args)? {
+                    for sk_id in safekeepers_str.split(',').map(str::trim) {
-                safekeepers
+                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
-            } else {
+                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
-                env.safekeepers.iter().map(|sk| sk.id).collect()
+                        })?);
-            };
+                        safekeepers.push(sk_id);
                    }
                    safekeepers
                } else {
                    env.safekeepers.iter().map(|sk| sk.id).collect()
                };
            let endpoint = cplane
                .endpoints
@@ -868,13 +862,11 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .cloned()
                .unwrap_or_default();
-            if !allow_multiple {
+            cplane.check_conflicting_endpoints(
-                cplane.check_conflicting_endpoints(
+                endpoint.mode,
-                    endpoint.mode,
+                endpoint.tenant_id,
-                    endpoint.tenant_id,
+                endpoint.timeline_id,
-                    endpoint.timeline_id,
+            )?;
                )?;
            }
            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
@@ -960,10 +952,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                        })
                        .collect::<Vec<_>>()
                };
-            // If --safekeepers argument is given, use only the listed
+            endpoint.reconfigure(pageservers, None).await?;
            // safekeeper nodes; otherwise all from the env.
            let safekeepers = parse_safekeepers(sub_args)?;
            endpoint.reconfigure(pageservers, None, safekeepers).await?;
        }
        "stop" => {
            let endpoint_id = sub_args
@@ -985,23 +974,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
    Ok(())
 }
 /// Parse --safekeepers as list of safekeeper ids.
 fn parse_safekeepers(sub_args: &ArgMatches) -> Result<Option<Vec<NodeId>>> {
    if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
        let mut safekeepers: Vec<NodeId> = Vec::new();
        for sk_id in safekeepers_str.split(',').map(str::trim) {
            let sk_id = NodeId(
                u64::from_str(sk_id)
                    .map_err(|_| anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list"))?,
            );
            safekeepers.push(sk_id);
        }
        Ok(Some(safekeepers))
    } else {
        Ok(None)
    }
 }
 fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
    let (sub_name, sub_args) = match sub_match.subcommand() {
        Some(ep_subcommand_data) => ep_subcommand_data,
@@ -1047,48 +1019,11 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
    ))
 }
 fn get_start_timeout(args: &ArgMatches) -> &Duration {
    let humantime_duration = args
        .get_one::<humantime::Duration>("start-timeout")
        .expect("invalid value for start-timeout");
    humantime_duration.as_ref()
 }
 fn storage_controller_start_args(args: &ArgMatches) -> NeonStorageControllerStartArgs {
    let maybe_instance_id = args.get_one::<u8>("instance-id");
    let base_port = args.get_one::<u16>("base-port");
    if maybe_instance_id.is_some() && base_port.is_none() {
        panic!("storage-controller start specificied instance-id but did not provide base-port");
    }
    let start_timeout = args
        .get_one::<humantime::Duration>("start-timeout")
        .expect("invalid value for start-timeout");
    NeonStorageControllerStartArgs {
        instance_id: maybe_instance_id.copied().unwrap_or(1),
        base_port: base_port.copied(),
        start_timeout: *start_timeout,
    }
 }
 fn storage_controller_stop_args(args: &ArgMatches) -> NeonStorageControllerStopArgs {
    let maybe_instance_id = args.get_one::<u8>("instance-id");
    let immediate = args.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
    NeonStorageControllerStopArgs {
        instance_id: maybe_instance_id.copied().unwrap_or(1),
        immediate,
    }
 }
 async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
            if let Err(e) = get_pageserver(env, subcommand_args)?
-                .start(get_start_timeout(subcommand_args))
+                .start(&pageserver_config_overrides(subcommand_args))
                .await
            {
                eprintln!("pageserver start failed: {e}");
@@ -1116,12 +1051,30 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }
-            if let Err(e) = pageserver.start(get_start_timeout(sub_match)).await {
+            if let Err(e) = pageserver
                .start(&pageserver_config_overrides(subcommand_args))
                .await
            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
        }
        Some(("set-state", subcommand_args)) => {
            let pageserver = get_pageserver(env, subcommand_args)?;
            let scheduling = subcommand_args.get_one("scheduling");
            let availability = subcommand_args.get_one("availability");
            let storage_controller = StorageController::from_env(env);
            storage_controller
                .node_configure(NodeConfigureRequest {
                    node_id: pageserver.conf.id,
                    scheduling: scheduling.cloned(),
                    availability: availability.cloned(),
                })
                .await?;
        }
        Some(("status", subcommand_args)) => {
            match get_pageserver(env, subcommand_args)?.check_status().await {
                Ok(_) => println!("Page server is up and running"),
@@ -1144,15 +1097,20 @@ async fn handle_storage_controller(
 ) -> Result<()> {
    let svc = StorageController::from_env(env);
    match sub_match.subcommand() {
-        Some(("start", start_match)) => {
+        Some(("start", _start_match)) => {
-            if let Err(e) = svc.start(storage_controller_start_args(start_match)).await {
+            if let Err(e) = svc.start().await {
                eprintln!("start failed: {e}");
                exit(1);
            }
        }
        Some(("stop", stop_match)) => {
-            if let Err(e) = svc.stop(storage_controller_stop_args(stop_match)).await {
+            let immediate = stop_match
                .get_one::<String>("stop-mode")
                .map(|s| s.as_str())
                == Some("immediate");
            if let Err(e) = svc.stop(immediate).await {
                eprintln!("stop failed: {}", e);
                exit(1);
            }
@@ -1199,10 +1157,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
        "start" => {
            let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper
+            if let Err(e) = safekeeper.start(extra_opts).await {
                .start(extra_opts, get_start_timeout(sub_args))
                .await
            {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1228,10 +1183,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
            }
            let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper
+            if let Err(e) = safekeeper.start(extra_opts).await {
                .start(extra_opts, get_start_timeout(sub_args))
                .await
            {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1244,23 +1196,15 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }
-async fn handle_start_all(
+async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
    env: &local_env::LocalEnv,
    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
    // Endpoints are not started automatically
-    broker::start_broker_process(env, retry_timeout).await?;
+    broker::start_broker_process(env).await?;
    // Only start the storage controller if the pageserver is configured to need it
    if env.control_plane_api.is_some() {
        let storage_controller = StorageController::from_env(env);
-        if let Err(e) = storage_controller
+        if let Err(e) = storage_controller.start().await {
            .start(NeonStorageControllerStartArgs::with_default_instance_id(
                (*retry_timeout).into(),
            ))
            .await
        {
            eprintln!("storage_controller start failed: {:#}", e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1269,7 +1213,10 @@ async fn handle_start_all(
    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start(retry_timeout).await {
+        if let Err(e) = pageserver
            .start(&pageserver_config_overrides(sub_match))
            .await
        {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1278,76 +1225,15 @@ async fn handle_start_all(
    for node in env.safekeepers.iter() {
        let safekeeper = SafekeeperNode::from_env(env, node);
-        if let Err(e) = safekeeper.start(vec![], retry_timeout).await {
+        if let Err(e) = safekeeper.start(vec![]).await {
            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
            try_stop_all(env, false).await;
            exit(1);
        }
    }
    neon_start_status_check(env, retry_timeout).await?;
    Ok(())
 }
 async fn neon_start_status_check(
    env: &local_env::LocalEnv,
    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
    const RETRY_INTERVAL: Duration = Duration::from_millis(100);
    const NOTICE_AFTER_RETRIES: Duration = Duration::from_secs(5);
    if env.control_plane_api.is_none() {
        return Ok(());
    }
    let storcon = StorageController::from_env(env);
    let retries = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
    let notice_after_retries = retry_timeout.as_millis() / NOTICE_AFTER_RETRIES.as_millis();
    println!("\nRunning neon status check");
    for retry in 0..retries {
        if retry == notice_after_retries {
            println!("\nNeon status check has not passed yet, continuing to wait")
        }
        let mut passed = true;
        let mut nodes = storcon.node_list().await?;
        let mut pageservers = env.pageservers.clone();
        if nodes.len() != pageservers.len() {
            continue;
        }
        nodes.sort_by_key(|ps| ps.id);
        pageservers.sort_by_key(|ps| ps.id);
        for (idx, pageserver) in pageservers.iter().enumerate() {
            let node = &nodes[idx];
            if node.id != pageserver.id {
                passed = false;
                break;
            }
            if !matches!(node.availability, NodeAvailabilityWrapper::Active) {
                passed = false;
                break;
            }
        }
        if passed {
            println!("\nNeon started and passed status check");
            return Ok(());
        }
        tokio::time::sleep(RETRY_INTERVAL).await;
    }
    anyhow::bail!("\nNeon passed status check")
 }
 async fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    let immediate =
        sub_match.get_one::<String>("stop-mode").map(|s| s.as_str()) == Some("immediate");
@@ -1362,7 +1248,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
    match ComputeControlPlane::load(env.clone()) {
        Ok(cplane) => {
            for (_k, node) in cplane.endpoints {
-                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
+                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast " }, false) {
                    eprintln!("postgres stop failed: {e:#}");
                }
            }
@@ -1390,35 +1276,15 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
        eprintln!("neon broker stop failed: {e:#}");
    }
-    // Stop all storage controller instances. In the most common case there's only one,
+    if env.control_plane_api.is_some() {
    // but iterate though the base data directory in order to discover the instances.
    let storcon_instances = env
        .storage_controller_instances()
        .await
        .expect("Must inspect data dir");
    for (instance_id, _instance_dir_path) in storcon_instances {
        let storage_controller = StorageController::from_env(env);
-        let stop_args = NeonStorageControllerStopArgs {
+        if let Err(e) = storage_controller.stop(immediate).await {
-            instance_id,
+            eprintln!("storage controller stop failed: {e:#}");
            immediate,
        };
        if let Err(e) = storage_controller.stop(stop_args).await {
            eprintln!("Storage controller instance {instance_id} stop failed: {e:#}");
        }
    }
 }
 fn cli() -> Command {
    let timeout_arg = Arg::new("start-timeout")
        .long("start-timeout")
        .short('t')
        .global(true)
        .help("timeout until we fail the command, e.g. 30s")
        .value_parser(value_parser!(humantime::Duration))
        .default_value("10s")
        .required(false);
    let branch_name_arg = Arg::new("branch-name")
        .long("branch-name")
        .help("Name of the branch to be created or used as an alias for other services")
@@ -1491,6 +1357,13 @@ fn cli() -> Command {
        .required(false)
        .value_name("stop-mode");
    let pageserver_config_args = Arg::new("pageserver-config-override")
        .long("pageserver-config-override")
        .num_args(1)
        .action(ArgAction::Append)
        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
        .required(false);
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
@@ -1524,7 +1397,9 @@ fn cli() -> Command {
    let num_pageservers_arg = Arg::new("num-pageservers")
        .value_parser(value_parser!(u16))
        .long("num-pageservers")
-        .help("How many pageservers to create (default 1)");
+        .help("How many pageservers to create (default 1)")
        .required(false)
        .default_value("1");
    let update_catalog = Arg::new("update-catalog")
        .value_parser(value_parser!(bool))
@@ -1538,37 +1413,20 @@ fn cli() -> Command {
        .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
        .required(false);
    let allow_multiple = Arg::new("allow-multiple")
        .help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
        .long("allow-multiple")
        .action(ArgAction::SetTrue)
        .required(false);
    let instance_id = Arg::new("instance-id")
        .long("instance-id")
        .help("Identifier used to distinguish storage controller instances (default 1)")
        .value_parser(value_parser!(u8))
        .required(false);
    let base_port = Arg::new("base-port")
        .long("base-port")
        .help("Base port for the storage controller instance idenfified by instance-id (defaults to pagserver cplane api)")
        .value_parser(value_parser!(u16))
        .required(false);
    Command::new("Neon CLI")
        .arg_required_else_help(true)
        .version(GIT_VERSION)
        .subcommand(
            Command::new("init")
                .about("Initialize a new Neon repository, preparing configs for services to start with")
                .arg(pageserver_config_args.clone())
                .arg(num_pageservers_arg.clone())
                .arg(
                    Arg::new("config")
                        .long("config")
                        .required(false)
                        .value_parser(value_parser!(PathBuf))
-                        .value_name("config")
+                        .value_name("config"),
                )
                .arg(pg_version_arg.clone())
                .arg(force_arg)
@@ -1576,7 +1434,6 @@ fn cli() -> Command {
        .subcommand(
            Command::new("timeline")
            .about("Manage timelines")
            .arg_required_else_help(true)
            .subcommand(Command::new("list")
                .about("List all timelines, available to this pageserver")
                .arg(tenant_id_arg.clone()))
@@ -1599,7 +1456,8 @@ fn cli() -> Command {
                .about("Import timeline from basebackup directory")
                .arg(tenant_id_arg.clone())
                .arg(timeline_id_arg.clone())
-                .arg(branch_name_arg.clone())
+                .arg(Arg::new("node-name").long("node-name")
                    .help("Name to assign to the imported timeline"))
                .arg(Arg::new("base-tarfile")
                    .long("base-tarfile")
                    .value_parser(value_parser!(PathBuf))
@@ -1615,6 +1473,7 @@ fn cli() -> Command {
                .arg(Arg::new("end-lsn").long("end-lsn")
                    .help("Lsn the basebackup ends at"))
                .arg(pg_version_arg.clone())
                .arg(update_catalog.clone())
            )
        ).subcommand(
            Command::new("tenant")
@@ -1637,8 +1496,6 @@ fn cli() -> Command {
            .subcommand(Command::new("config")
                .arg(tenant_id_arg.clone())
                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
            .subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
                .about("Import a tenant that is present in remote storage, and create branches for its timelines"))
        )
        .subcommand(
            Command::new("pageserver")
@@ -1648,7 +1505,7 @@ fn cli() -> Command {
                .subcommand(Command::new("status"))
                .subcommand(Command::new("start")
                    .about("Start local pageserver")
-                    .arg(timeout_arg.clone())
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("stop")
                    .about("Stop local pageserver")
@@ -1656,20 +1513,22 @@ fn cli() -> Command {
                )
                .subcommand(Command::new("restart")
                    .about("Restart local pageserver")
-                    .arg(timeout_arg.clone())
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("set-state")
                    .arg(Arg::new("availability").value_parser(value_parser!(NodeAvailability)).long("availability").action(ArgAction::Set).help("Availability state: offline,active"))
                    .arg(Arg::new("scheduling").value_parser(value_parser!(NodeSchedulingPolicy)).long("scheduling").action(ArgAction::Set).help("Scheduling state: draining,pause,filling,active"))
                    .about("Set scheduling or availability state of pageserver node")
                    .arg(pageserver_config_args.clone())
                )
        )
        .subcommand(
            Command::new("storage_controller")
                .arg_required_else_help(true)
                .about("Manage storage_controller")
-                .subcommand(Command::new("start").about("Start storage controller")
+                .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
-                            .arg(timeout_arg.clone())
+                .subcommand(Command::new("stop").about("Stop local pageserver")
-                            .arg(instance_id.clone())
+                            .arg(stop_mode_arg.clone()))
                            .arg(base_port))
                .subcommand(Command::new("stop").about("Stop storage controller")
                            .arg(stop_mode_arg.clone())
                            .arg(instance_id))
        )
        .subcommand(
            Command::new("safekeeper")
@@ -1679,7 +1538,6 @@ fn cli() -> Command {
                            .about("Start local safekeeper")
                            .arg(safekeeper_id_arg.clone())
                            .arg(safekeeper_extra_opt_arg.clone())
                            .arg(timeout_arg.clone())
                )
                .subcommand(Command::new("stop")
                            .about("Stop local safekeeper")
@@ -1691,7 +1549,6 @@ fn cli() -> Command {
                            .arg(safekeeper_id_arg)
                            .arg(stop_mode_arg.clone())
                            .arg(safekeeper_extra_opt_arg)
                            .arg(timeout_arg.clone())
                )
        )
        .subcommand(
@@ -1716,22 +1573,18 @@ fn cli() -> Command {
                    .arg(pg_version_arg.clone())
                    .arg(hot_standby_arg.clone())
                    .arg(update_catalog)
                    .arg(allow_multiple.clone())
                )
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
                    .arg(endpoint_id_arg.clone())
                    .arg(endpoint_pageserver_id_arg.clone())
-                    .arg(safekeepers_arg.clone())
+                    .arg(safekeepers_arg)
                    .arg(remote_ext_config_args)
                    .arg(create_test_user)
                    .arg(allow_multiple.clone())
                    .arg(timeout_arg.clone())
                )
                .subcommand(Command::new("reconfigure")
                            .about("Reconfigure the endpoint")
                            .arg(endpoint_pageserver_id_arg)
                            .arg(safekeepers_arg)
                            .arg(endpoint_id_arg.clone())
                            .arg(tenant_id_arg.clone())
                )
@@ -1779,7 +1632,7 @@ fn cli() -> Command {
        .subcommand(
            Command::new("start")
                .about("Start page server and safekeepers")
-                .arg(timeout_arg.clone())
+                .arg(pageserver_config_args)
        )
        .subcommand(
            Command::new("stop")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,22 +1,17 @@
 //! Code to manage the storage broker
 //!
-//! In the local test environment, the storage broker stores its data directly in
+//! In the local test environment, the data for each safekeeper is stored in
 //!
 //! ```text
-//!   .neon
+//!   .neon/safekeepers/<safekeeper id>
 //! ```
 use std::time::Duration;
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use crate::{background_process, local_env};
-pub async fn start_broker_process(
+pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    env: &local_env::LocalEnv,
    retry_timeout: &Duration,
 ) -> anyhow::Result<()> {
    let broker = &env.broker;
    let listen_addr = &broker.listen_addr;
@@ -32,7 +27,6 @@ pub async fn start_broker_process(
        args,
        [],
        background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
        retry_timeout,
        || async {
            let url = broker.client_url();
            let status_url = url.join("status").with_context(|| {
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -499,23 +499,6 @@ impl Endpoint {
            .join(",")
    }
    /// Map safekeepers ids to the actual connection strings.
    fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
        let mut safekeeper_connstrings = Vec::new();
        if self.mode == ComputeMode::Primary {
            for sk_id in sk_ids {
                let sk = self
                    .env
                    .safekeepers
                    .iter()
                    .find(|node| node.id == sk_id)
                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
            }
        }
        Ok(safekeeper_connstrings)
    }
    pub async fn start(
        &self,
        auth_token: &Option<String>,
@@ -540,7 +523,18 @@ impl Endpoint {
        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
        assert!(!pageserver_connstring.is_empty());
-        let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
+        let mut safekeeper_connstrings = Vec::new();
        if self.mode == ComputeMode::Primary {
            for sk_id in safekeepers {
                let sk = self
                    .env
                    .safekeepers
                    .iter()
                    .find(|node| node.id == sk_id)
                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
            }
        }
        // check for file remote_extensions_spec.json
        // if it is present, read it and pass to compute_ctl
@@ -560,7 +554,6 @@ impl Endpoint {
            format_version: 1.0,
            operation_uuid: None,
            features: self.features.clone(),
            swap_size_bytes: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
@@ -598,6 +591,7 @@ impl Endpoint {
            remote_extensions,
            pgbouncer_settings: None,
            shard_stripe_size: Some(shard_stripe_size),
            primary_is_running: None,
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
@@ -746,7 +740,6 @@ impl Endpoint {
        &self,
        mut pageservers: Vec<(Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
        safekeepers: Option<Vec<NodeId>>,
    ) -> Result<()> {
        let mut spec: ComputeSpec = {
            let spec_path = self.endpoint_path().join("spec.json");
@@ -781,12 +774,6 @@ impl Endpoint {
            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
        }
        // If safekeepers are not specified, don't change them.
        if let Some(safekeepers) = safekeepers {
            let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
            spec.safekeeper_connstrings = safekeeper_connstrings;
        }
        let client = reqwest::Client::builder()
            .timeout(Duration::from_secs(30))
            .build()
@@ -824,12 +811,11 @@ impl Endpoint {
        // cleanup work to do after postgres stops, like syncing safekeepers,
        // etc.
        //
-        // If destroying or stop mode is immediate, send it SIGTERM before
+        // If destroying, send it SIGTERM before waiting. Sometimes we do *not*
-        // waiting. Sometimes we do *not* want this cleanup: tests intentionally
+        // want this cleanup: tests intentionally do stop when majority of
-        // do stop when majority of safekeepers is down, so sync-safekeepers
+        // safekeepers is down, so sync-safekeepers would hang otherwise. This
-        // would hang otherwise. This could be a separate flag though.
+        // could be a separate flag though.
-        let send_sigterm = destroy || mode == "immediate";
+        self.wait_for_compute_ctl_to_exit(destroy)?;
        self.wait_for_compute_ctl_to_exit(send_sigterm)?;
        if destroy {
            println!(
                "Destroying postgres data directory '{}'",
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -3,7 +3,7 @@
 //! Now it also provides init method which acts like a stub for proper installation
 //! script which will use local paths.
-use anyhow::{bail, Context};
+use anyhow::{bail, ensure, Context};
 use clap::ValueEnum;
 use postgres_backend::AuthType;
@@ -17,17 +17,14 @@ use std::net::Ipv4Addr;
 use std::net::SocketAddr;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
 use std::time::Duration;
 use utils::{
    auth::{encode_from_key_file, Claims},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
 };
 use crate::pageserver::PageServerNode;
 use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
 use crate::safekeeper::SafekeeperNode;
-pub const DEFAULT_PG_VERSION: u32 = 16;
+pub const DEFAULT_PG_VERSION: u32 = 15;
 //
 // This data structures represents neon_local CLI config
@@ -36,107 +33,63 @@ pub const DEFAULT_PG_VERSION: u32 = 16;
 // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
-#[derive(PartialEq, Eq, Clone, Debug)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
    // compute endpoints).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the NEON_REPO_DIR env variable which
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
-    // must be an absolute path. If the env var is not set, $PWD/.neon is used.
+    // '.neon' if not given.
    #[serde(skip)]
    pub base_data_dir: PathBuf,
    // Path to postgres distribution. It's expected that "bin", "include",
    // "lib", "share" from postgres distribution are there. If at some point
    // in time we will be able to run against vanilla postgres we may split that
    // to four separate paths and match OS-specific installation layout.
    #[serde(default)]
    pub pg_distrib_dir: PathBuf,
    // Path to pageserver binary.
    #[serde(default)]
    pub neon_distrib_dir: PathBuf,
    // Default tenant ID to use with the 'neon_local' command line utility, when
    // --tenant_id is not explicitly specified.
    #[serde(default)]
    pub default_tenant_id: Option<TenantId>,
    // used to issue tokens during e.g pg start
    #[serde(default)]
    pub private_key_path: PathBuf,
    pub broker: NeonBroker,
    // Configuration for the storage controller (1 per neon_local environment)
    pub storage_controller: NeonStorageControllerConf,
    /// This Vec must always contain at least one pageserver
    /// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
    /// NB: not used anymore except for informing users that they need to change their `.neon/config`.
    pub pageservers: Vec<PageServerConf>,
    #[serde(default)]
    pub safekeepers: Vec<SafekeeperConf>,
    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
    // be propagated into each pageserver's configuration.
    #[serde(default)]
    pub control_plane_api: Option<Url>,
    // Control plane upcall API for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
    #[serde(default)]
    pub control_plane_compute_hook_api: Option<Url>,
    /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
    #[serde(default)]
    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
    // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
    pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
 }
 /// On-disk state stored in `.neon/config`.
 #[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
 #[serde(default, deny_unknown_fields)]
 pub struct OnDiskConfig {
    pub pg_distrib_dir: PathBuf,
    pub neon_distrib_dir: PathBuf,
    pub default_tenant_id: Option<TenantId>,
    pub private_key_path: PathBuf,
    pub broker: NeonBroker,
    pub storage_controller: NeonStorageControllerConf,
    #[serde(
        skip_serializing,
        deserialize_with = "fail_if_pageservers_field_specified"
    )]
    pub pageservers: Vec<PageServerConf>,
    pub safekeepers: Vec<SafekeeperConf>,
    pub control_plane_api: Option<Url>,
    pub control_plane_compute_hook_api: Option<Url>,
    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
 }
 fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
 where
    D: serde::Deserializer<'de>,
 {
    Err(serde::de::Error::custom(
        "The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
         Please remove the `pageservers` from your .neon/config.",
    ))
 }
 /// The description of the neon_local env to be initialized by `neon_local init --config`.
 #[derive(Clone, Debug, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct NeonLocalInitConf {
    // TODO: do we need this? Seems unused
    pub pg_distrib_dir: Option<PathBuf>,
    // TODO: do we need this? Seems unused
    pub neon_distrib_dir: Option<PathBuf>,
    pub default_tenant_id: TenantId,
    pub broker: NeonBroker,
    pub storage_controller: Option<NeonStorageControllerConf>,
    pub pageservers: Vec<NeonLocalInitPageserverConf>,
    pub safekeepers: Vec<SafekeeperConf>,
    pub control_plane_api: Option<Option<Url>>,
    pub control_plane_compute_hook_api: Option<Option<Url>>,
 }
 /// Broker config for cluster internal communication.
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
@@ -145,48 +98,6 @@ pub struct NeonBroker {
    pub listen_addr: SocketAddr,
 }
 /// Broker config for cluster internal communication.
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
 pub struct NeonStorageControllerConf {
    /// Heartbeat timeout before marking a node offline
    #[serde(with = "humantime_serde")]
    pub max_offline: Duration,
    #[serde(with = "humantime_serde")]
    pub max_warming_up: Duration,
    pub start_as_candidate: bool,
    /// Database url used when running multiple storage controller instances
    pub database_url: Option<SocketAddr>,
    /// Threshold for auto-splitting a tenant into shards
    pub split_threshold: Option<u64>,
    pub max_secondary_lag_bytes: Option<u64>,
 }
 impl NeonStorageControllerConf {
    // Use a shorter pageserver unavailability interval than the default to speed up tests.
    const DEFAULT_MAX_OFFLINE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
    const DEFAULT_MAX_WARMING_UP_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30);
 }
 impl Default for NeonStorageControllerConf {
    fn default() -> Self {
        Self {
            max_offline: Self::DEFAULT_MAX_OFFLINE_INTERVAL,
            max_warming_up: Self::DEFAULT_MAX_WARMING_UP_INTERVAL,
            start_as_candidate: false,
            database_url: None,
            split_threshold: None,
            max_secondary_lag_bytes: None,
        }
    }
 }
 // Dummy Default impl to satisfy Deserialize derive.
 impl Default for NeonBroker {
    fn default() -> Self {
@@ -202,18 +113,22 @@ impl NeonBroker {
    }
 }
 // neon_local needs to know this subset of pageserver configuration.
 // For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
 // It can get stale if `pageserver.toml` is changed.
 // TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default, deny_unknown_fields)]
 pub struct PageServerConf {
    // node id
    pub id: NodeId,
    // Pageserver connection settings
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    // auth type used for the PG and HTTP ports
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
    pub(crate) virtual_file_io_engine: Option<String>,
    pub(crate) get_vectored_impl: Option<String>,
 }
 impl Default for PageServerConf {
@@ -224,40 +139,8 @@ impl Default for PageServerConf {
            listen_http_addr: String::new(),
            pg_auth_type: AuthType::Trust,
            http_auth_type: AuthType::Trust,
-        }
+            virtual_file_io_engine: None,
-    }
+            get_vectored_impl: None,
 }
 /// The toml that can be passed to `neon_local init --config`.
 /// This is a subset of the `pageserver.toml` configuration.
 // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
 #[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
 pub struct NeonLocalInitPageserverConf {
    pub id: NodeId,
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
    #[serde(flatten)]
    pub other: HashMap<String, toml::Value>,
 }
 impl From<&NeonLocalInitPageserverConf> for PageServerConf {
    fn from(conf: &NeonLocalInitPageserverConf) -> Self {
        let NeonLocalInitPageserverConf {
            id,
            listen_pg_addr,
            listen_http_addr,
            pg_auth_type,
            http_auth_type,
            other: _,
        } = conf;
        Self {
            id: *id,
            listen_pg_addr: listen_pg_addr.clone(),
            listen_http_addr: listen_http_addr.clone(),
            pg_auth_type: *pg_auth_type,
            http_auth_type: *http_auth_type,
        }
    }
 }
@@ -273,7 +156,6 @@ pub struct SafekeeperConf {
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
    pub auth_enabled: bool,
    pub listen_addr: Option<String>,
 }
 impl Default for SafekeeperConf {
@@ -287,7 +169,6 @@ impl Default for SafekeeperConf {
            remote_storage: None,
            backup_threads: None,
            auth_enabled: false,
            listen_addr: None,
        }
    }
 }
@@ -340,16 +221,11 @@ impl LocalEnv {
        }
    }
    pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
        Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
    }
    pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
-        self.pg_dir(pg_version, "bin")
+        Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
    }
    pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
-        self.pg_dir(pg_version, "lib")
+        Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
    }
    pub fn pageserver_bin(&self) -> PathBuf {
@@ -399,36 +275,6 @@ impl LocalEnv {
        }
    }
    /// Inspect the base data directory and extract the instance id and instance directory path
    /// for all storage controller instances
    pub async fn storage_controller_instances(&self) -> std::io::Result<Vec<(u8, PathBuf)>> {
        let mut instances = Vec::default();
        let dir = std::fs::read_dir(self.base_data_dir.clone())?;
        for dentry in dir {
            let dentry = dentry?;
            let is_dir = dentry.metadata()?.is_dir();
            let filename = dentry.file_name().into_string().unwrap();
            let parsed_instance_id = match filename.strip_prefix("storage_controller_") {
                Some(suffix) => suffix.parse::<u8>().ok(),
                None => None,
            };
            let is_instance_dir = is_dir && parsed_instance_id.is_some();
            if !is_instance_dir {
                continue;
            }
            instances.push((
                parsed_instance_id.expect("Checked previously"),
                dentry.path(),
            ));
        }
        Ok(instances)
    }
    pub fn register_branch_mapping(
        &mut self,
        branch_name: String,
@@ -480,8 +326,44 @@ impl LocalEnv {
            .collect()
    }
-    ///  Construct `Self` from on-disk state.
+    /// Create a LocalEnv from a config file.
-    pub fn load_config(repopath: &Path) -> anyhow::Result<Self> {
+    ///
    /// Unlike 'load_config', this function fills in any defaults that are missing
    /// from the config file.
    pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
        let mut env: LocalEnv = toml::from_str(toml)?;
        // Find postgres binaries.
        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
        // Note that later in the code we assume, that distrib dirs follow the same pattern
        // for all postgres versions.
        if env.pg_distrib_dir == Path::new("") {
            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
                env.pg_distrib_dir = postgres_bin.into();
            } else {
                let cwd = env::current_dir()?;
                env.pg_distrib_dir = cwd.join("pg_install")
            }
        }
        // Find neon binaries.
        if env.neon_distrib_dir == Path::new("") {
            env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
        }
        if env.pageservers.is_empty() {
            anyhow::bail!("Configuration must contain at least one pageserver");
        }
        env.base_data_dir = base_path();
        Ok(env)
    }
    /// Locate and load config
    pub fn load_config() -> anyhow::Result<Self> {
        let repopath = base_path();
        if !repopath.exists() {
            bail!(
                "Neon config is not found in {}. You need to run 'neon_local init' first",
@@ -492,140 +374,38 @@ impl LocalEnv {
        // TODO: check that it looks like a neon repository
        // load and parse file
-        let config_file_contents = fs::read_to_string(repopath.join("config"))?;
+        let config = fs::read_to_string(repopath.join("config"))?;
-        let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
+        let mut env: LocalEnv = toml::from_str(config.as_str())?;
        let mut env = {
            let OnDiskConfig {
                pg_distrib_dir,
                neon_distrib_dir,
                default_tenant_id,
                private_key_path,
                broker,
                storage_controller,
                pageservers,
                safekeepers,
                control_plane_api,
                control_plane_compute_hook_api,
                branch_name_mappings,
            } = on_disk_config;
            LocalEnv {
                base_data_dir: repopath.to_owned(),
                pg_distrib_dir,
                neon_distrib_dir,
                default_tenant_id,
                private_key_path,
                broker,
                storage_controller,
                pageservers,
                safekeepers,
                control_plane_api,
                control_plane_compute_hook_api,
                branch_name_mappings,
            }
        };
-        // The source of truth for pageserver configuration is the pageserver.toml.
+        env.base_data_dir = repopath;
        assert!(
            env.pageservers.is_empty(),
            "we ensure this during deserialization"
        );
        env.pageservers = {
            let iter = std::fs::read_dir(repopath).context("open dir")?;
            let mut pageservers = Vec::new();
            for res in iter {
                let dentry = res?;
                const PREFIX: &str = "pageserver_";
                let dentry_name = dentry
                    .file_name()
                    .into_string()
                    .ok()
                    .with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
                    .unwrap();
                if !dentry_name.starts_with(PREFIX) {
                    continue;
                }
                if !dentry.file_type().context("determine file type")?.is_dir() {
                    anyhow::bail!("expected a directory, got {:?}", dentry.path());
                }
                let id = dentry_name[PREFIX.len()..]
                    .parse::<NodeId>()
                    .with_context(|| format!("parse id from {:?}", dentry.path()))?;
                // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
                #[derive(serde::Serialize, serde::Deserialize)]
                // (allow unknown fields, unlike PageServerConf)
                struct PageserverConfigTomlSubset {
                    listen_pg_addr: String,
                    listen_http_addr: String,
                    pg_auth_type: AuthType,
                    http_auth_type: AuthType,
                }
                let config_toml_path = dentry.path().join("pageserver.toml");
                let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
                    &std::fs::read_to_string(&config_toml_path)
                        .with_context(|| format!("read {:?}", config_toml_path))?,
                )
                .context("parse pageserver.toml")?;
                let identity_toml_path = dentry.path().join("identity.toml");
                #[derive(serde::Serialize, serde::Deserialize)]
                struct IdentityTomlSubset {
                    id: NodeId,
                }
                let identity_toml: IdentityTomlSubset = toml_edit::de::from_str(
                    &std::fs::read_to_string(&identity_toml_path)
                        .with_context(|| format!("read {:?}", identity_toml_path))?,
                )
                .context("parse identity.toml")?;
                let PageserverConfigTomlSubset {
                    listen_pg_addr,
                    listen_http_addr,
                    pg_auth_type,
                    http_auth_type,
                } = config_toml;
                let IdentityTomlSubset {
                    id: identity_toml_id,
                } = identity_toml;
                let conf = PageServerConf {
                    id: {
                        anyhow::ensure!(
                            identity_toml_id == id,
                            "id mismatch: identity.toml:id={identity_toml_id} pageserver_(.*) id={id}",
                        );
                        id
                    },
                    listen_pg_addr,
                    listen_http_addr,
                    pg_auth_type,
                    http_auth_type,
                };
                pageservers.push(conf);
            }
            pageservers
        };
        Ok(env)
    }
-    pub fn persist_config(&self) -> anyhow::Result<()> {
+    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
-        Self::persist_config_impl(
+        // Currently, the user first passes a config file with 'neon_local init --config=<path>'
-            &self.base_data_dir,
+        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
-            &OnDiskConfig {
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
-                pg_distrib_dir: self.pg_distrib_dir.clone(),
+        // a bit sad.
-                neon_distrib_dir: self.neon_distrib_dir.clone(),
+        let mut conf_content = r#"# This file describes a local deployment of the page server
-                default_tenant_id: self.default_tenant_id,
+# and safekeeeper node. It is read by the 'neon_local' command-line
-                private_key_path: self.private_key_path.clone(),
+# utility.
-                broker: self.broker.clone(),
+"#
-                storage_controller: self.storage_controller.clone(),
+        .to_string();
-                pageservers: vec![], // it's skip_serializing anyway
+
-                safekeepers: self.safekeepers.clone(),
+        // Convert the LocalEnv to a toml file.
-                control_plane_api: self.control_plane_api.clone(),
+        //
-                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
+        // This could be as simple as this:
-                branch_name_mappings: self.branch_name_mappings.clone(),
+        //
-            },
+        // conf_content += &toml::to_string_pretty(env)?;
-        )
+        //
-    }
+        // But it results in a "values must be emitted before tables". I'm not sure
        // why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
        // Maybe rust reorders the fields to squeeze avoid padding or something?
        // In any case, converting to toml::Value first, and serializing that, works.
        // See https://github.com/alexcrichton/toml-rs/issues/142
        conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;
    pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
        let conf_content = &toml::to_string_pretty(config)?;
        let target_config_path = base_path.join("config");
        fs::write(&target_config_path, conf_content).with_context(|| {
            format!(
@@ -650,13 +430,17 @@ impl LocalEnv {
        }
    }
-    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
+    //
-    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
+    // Initialize a new Neon repository
-        let base_path = base_path();
+    //
-        assert_ne!(base_path, Path::new(""));
+    pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
-        let base_path = &base_path;
+        // check if config already exists
        let base_path = &self.base_data_dir;
        ensure!(
            base_path != Path::new(""),
            "repository base path is missing"
        );
        // create base_path dir
        if base_path.exists() {
            match force {
                InitForceMode::MustNotExist => {
@@ -688,115 +472,74 @@ impl LocalEnv {
                }
            }
        }
        if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
            bail!(
                "Can't find postgres binary at {}",
                self.pg_bin_dir(pg_version)?.display()
            );
        }
        for binary in ["pageserver", "safekeeper"] {
            if !self.neon_distrib_dir.join(binary).exists() {
                bail!(
                    "Can't find binary '{binary}' in neon distrib dir '{}'",
                    self.neon_distrib_dir.display()
                );
            }
        }
        if !base_path.exists() {
            fs::create_dir(base_path)?;
        }
        let NeonLocalInitConf {
            pg_distrib_dir,
            neon_distrib_dir,
            default_tenant_id,
            broker,
            storage_controller,
            pageservers,
            safekeepers,
            control_plane_api,
            control_plane_compute_hook_api,
        } = conf;
        // Find postgres binaries.
        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
        // Note that later in the code we assume, that distrib dirs follow the same pattern
        // for all postgres versions.
        let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
                postgres_bin.into()
            } else {
                let cwd = env::current_dir().unwrap();
                cwd.join("pg_install")
            }
        });
        // Find neon binaries.
        let neon_distrib_dir = neon_distrib_dir
            .unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
        // Generate keypair for JWT.
        //
        // The keypair is only needed if authentication is enabled in any of the
        // components. For convenience, we generate the keypair even if authentication
        // is not enabled, so that you can easily enable it after the initialization
-        // step.
+        // step. However, if the key generation fails, we treat it as non-fatal if
-        generate_auth_keys(
+        // authentication was not enabled.
-            base_path.join("auth_private_key.pem").as_path(),
+        if self.private_key_path == PathBuf::new() {
-            base_path.join("auth_public_key.pem").as_path(),
+            match generate_auth_keys(
-        )
+                base_path.join("auth_private_key.pem").as_path(),
-        .context("generate auth keys")?;
+                base_path.join("auth_public_key.pem").as_path(),
-        let private_key_path = PathBuf::from("auth_private_key.pem");
+            ) {
-
+                Ok(()) => {
-        // create the runtime type because the remaining initialization code below needs
+                    self.private_key_path = PathBuf::from("auth_private_key.pem");
-        // a LocalEnv instance op operation
+                }
-        // TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
+                Err(e) => {
-        let env = LocalEnv {
+                    if !self.auth_keys_needed() {
-            base_data_dir: base_path.clone(),
+                        eprintln!("Could not generate keypair for JWT authentication: {e}");
-            pg_distrib_dir,
+                        eprintln!("Continuing anyway because authentication was not enabled");
-            neon_distrib_dir,
+                        self.private_key_path = PathBuf::from("auth_private_key.pem");
-            default_tenant_id: Some(default_tenant_id),
+                    } else {
-            private_key_path,
+                        return Err(e);
-            broker,
+                    }
-            storage_controller: storage_controller.unwrap_or_default(),
+                }
-            pageservers: pageservers.iter().map(Into::into).collect(),
+            }
            safekeepers,
            control_plane_api: control_plane_api.unwrap_or_default(),
            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
            branch_name_mappings: Default::default(),
        };
        // create endpoints dir
        fs::create_dir_all(env.endpoints_path())?;
        // create safekeeper dirs
        for safekeeper in &env.safekeepers {
            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
        }
-        // initialize pageserver state
+        fs::create_dir_all(self.endpoints_path())?;
-        for (i, ps) in pageservers.into_iter().enumerate() {
+
-            let runtime_ps = &env.pageservers[i];
+        for safekeeper in &self.safekeepers {
-            assert_eq!(&PageServerConf::from(&ps), runtime_ps);
+            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
            fs::create_dir(env.pageserver_data_dir(ps.id))?;
            PageServerNode::from_env(&env, runtime_ps)
                .initialize(ps)
                .context("pageserver init failed")?;
        }
-        // setup remote remote location for default LocalFs remote storage
+        self.persist_config(base_path)
-        std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
+    }
-        env.persist_config()
+    fn auth_keys_needed(&self) -> bool {
        self.pageservers.iter().any(|ps| {
            ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
        }) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
    }
 }
-pub fn base_path() -> PathBuf {
+fn base_path() -> PathBuf {
-    let path = match std::env::var_os("NEON_REPO_DIR") {
+    match std::env::var_os("NEON_REPO_DIR") {
-        Some(val) => {
+        Some(val) => PathBuf::from(val),
-            let path = PathBuf::from(val);
+        None => PathBuf::from(".neon"),
-            if !path.is_absolute() {
+    }
                // repeat the env var in the error because our default is always absolute
                panic!("NEON_REPO_DIR must be an absolute path, got {path:?}");
            }
            path
        }
        None => {
            let pwd = std::env::current_dir()
                // technically this can fail but it's quite unlikeley
                .expect("determine current directory");
            let pwd_abs = pwd.canonicalize().expect("canonicalize current directory");
            pwd_abs.join(".neon")
        }
    };
    assert!(path.is_absolute());
    path
 }
 /// Generate a public/private key pair for JWT authentication
@@ -835,3 +578,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
    }
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn simple_conf_parsing() {
        let simple_conf_toml = include_str!("../simple.conf");
        let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
        assert!(
            simple_conf_parse_result.is_ok(),
            "failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
        );
        let string_to_replace = "listen_addr = '127.0.0.1:50051'";
        let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
        let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
        assert!(
            spoiled_url_toml.contains(spoiled_url_str),
            "Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
        );
        let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
        assert!(
            spoiled_url_parse_result.is_err(),
            "expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
        );
    }
 }
--- a/Show More
+++ b/Show More
`@@ -1,2 +1,2 @@`
	`[profile.default]`	`[profile.default]`
	`slow-timeout = { period = "60s", terminate-after = 3 }`	`slow-timeout = { period = "20s", terminate-after = 3 }`
		`@@ -1,2 +0,0 @@`
			`# allows for nicer hunk headers with git show`
			`*.rs diff=rust`
		`@@ -1 +0,0 @@`
			`GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;`
		`@@ -1,3 +0,0 @@`
			`-- SKIP: Moved inline to the handle_grants() functions.`

			`ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;`