Merge pull request #7248 from neondatabase/rc/2024-03-26

Release 2024-03-26
Merge pull request #7219 from neondatabase/rc/2024-03-25
2026-07-02 19:50:40 +00:00 · 2024-03-26 15:17:00 +00:00 · 2024-03-25 12:28:09 +00:00 · 2024-03-19 12:07:14 +01:00 · 2024-03-18 16:28:17 +01:00 · 2024-03-18 13:01:17 +01:00
639 changed files with 28032 additions and 70097 deletions
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -1,2 +1,2 @@
 [profile.default]
-slow-timeout = { period = "60s", terminate-after = 3 }
+slow-timeout = { period = "20s", terminate-after = 3 }
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,7 +8,6 @@
 !scripts/combine_control_files.py
 !scripts/ninstall.sh
 !vm-cgconfig.conf
-!docker-compose/run-tests.sh

 # Directories
 !.cargo/
@@ -18,13 +17,11 @@
 !libs/
 !neon_local/
 !pageserver/
-!patches/
 !pgxn/
 !proxy/
-!storage_scrubber/
+!s3_scrubber/
 !safekeeper/
 !storage_broker/
-!storage_controller/
 !trace/
 !vendor/postgres-*/
 !workspace_hack/
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +0,0 @@
-# allows for nicer hunk headers with git show
-*.rs diff=rust
--- a/.github/actionlint.yml
+++ b/.github/actionlint.yml
@@ -1,11 +1,12 @@
 self-hosted-runner:
  labels:
    - arm64
+    - dev
    - gen3
    - large
-    - large-arm64
+    # Remove `macos-14` from the list after https://github.com/rhysd/actionlint/pull/392 is merged.
+    - macos-14
    - small
-    - small-arm64
    - us-east-2
 config-variables:
  - REMOTE_STORAGE_AZURE_CONTAINER
--- a/.github/actions/allure-report-generate/action.yml
+++ b/.github/actions/allure-report-generate/action.yml
@@ -150,7 +150,7 @@ runs:

        # Use aws s3 cp (instead of aws s3 sync) to keep files from previous runs to make old URLs work,
        # and to keep files on the host to upload them to the database
-        time s5cmd --log error cp "${WORKDIR}/report/*" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}/"
+        time aws s3 cp --recursive --only-show-errors "${WORKDIR}/report" "s3://${BUCKET}/${REPORT_PREFIX}/${GITHUB_RUN_ID}"

        # Generate redirect
        cat <<EOF > ${WORKDIR}/index.html
@@ -183,7 +183,7 @@ runs:
      uses: actions/cache@v4
      with:
        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
+        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Store Allure test stat in the DB (new)
      if: ${{ !cancelled() && inputs.store-test-results-into-db == 'true' }}
--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -26,7 +26,7 @@ runs:
        TARGET: ${{ inputs.path }}
        ARCHIVE: /tmp/downloads/${{ inputs.name }}.tar.zst
        SKIP_IF_DOES_NOT_EXIST: ${{ inputs.skip-if-does-not-exist }}
-        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id, github.run_attempt) }}
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
        FILENAME=$(basename $ARCHIVE)
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -3,14 +3,14 @@ description: 'Create Branch using API'

 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project to create Branch in'
+    desctiption: 'ID of the Project to create Branch in'
    required: true
  api_host:
-    description: 'Neon API host'
-    default: console-stage.neon.build
+    desctiption: 'Neon API host'
+    default: console.stage.neon.tech
 outputs:
  dsn:
    description: 'Created Branch DSN (for main database)'
--- a/.github/actions/neon-branch-delete/action.yml
+++ b/.github/actions/neon-branch-delete/action.yml
@@ -3,17 +3,17 @@ description: 'Delete Branch using API'

 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project which should be deleted'
+    desctiption: 'ID of the Project which should be deleted'
    required: true
  branch_id:
-    description: 'ID of the branch to delete'
+    desctiption: 'ID of the branch to delete'
    required: true
  api_host:
-    description: 'Neon API host'
-    default: console-stage.neon.build
+    desctiption: 'Neon API host'
+    default: console.stage.neon.tech

 runs:
  using: "composite"
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -3,22 +3,22 @@ description: 'Create Neon Project using API'

 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  region_id:
-    description: 'Region ID, if not set the project will be created in the default region'
+    desctiption: 'Region ID, if not set the project will be created in the default region'
    default: aws-us-east-2
  postgres_version:
-    description: 'Postgres version; default is 16'
-    default: '16'
+    desctiption: 'Postgres version; default is 15'
+    default: 15
  api_host:
-    description: 'Neon API host'
-    default: console-stage.neon.build
+    desctiption: 'Neon API host'
+    default: console.stage.neon.tech
  provisioner:
-    description: 'k8s-pod or k8s-neonvm'
+    desctiption: 'k8s-pod or k8s-neonvm'
    default: 'k8s-pod'
  compute_units:
-    description: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
+    desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
    default: '[1, 1]'

 outputs:
--- a/.github/actions/neon-project-delete/action.yml
+++ b/.github/actions/neon-project-delete/action.yml
@@ -3,14 +3,14 @@ description: 'Delete Neon Project using API'

 inputs:
  api_key:
-    description: 'Neon API key'
+    desctiption: 'Neon API key'
    required: true
  project_id:
-    description: 'ID of the Project to delete'
+    desctiption: 'ID of the Project to delete'
    required: true
  api_host:
-    description: 'Neon API host'
-    default: console-stage.neon.build
+    desctiption: 'Neon API host'
+    default: console.stage.neon.tech

 runs:
  using: "composite"
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -56,14 +56,14 @@ runs:
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon

    - name: Download Neon binaries for the previous release
      if: inputs.build_type != 'remote'
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact
+        name: neon-${{ runner.os }}-${{ inputs.build_type }}-artifact
        path: /tmp/neon-previous
        prefix: latest

@@ -89,7 +89,7 @@ runs:
      uses: actions/cache@v4
      with:
        path: ~/.cache/pypoetry/virtualenvs
-        key: v2-${{ runner.os }}-${{ runner.arch }}-python-deps-${{ hashFiles('poetry.lock') }}
+        key: v2-${{ runner.os }}-python-deps-${{ hashFiles('poetry.lock') }}

    - name: Install Python deps
      shell: bash -euxo pipefail {0}
@@ -114,8 +114,6 @@ runs:
        export PLATFORM=${PLATFORM:-github-actions-selfhosted}
        export POSTGRES_DISTRIB_DIR=${POSTGRES_DISTRIB_DIR:-/tmp/neon/pg_install}
        export DEFAULT_PG_VERSION=${PG_VERSION#v}
-        export LD_LIBRARY_PATH=${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/lib
-        export BENCHMARK_CONNSTR=${BENCHMARK_CONNSTR:-}

        if [ "${BUILD_TYPE}" = "remote" ]; then
          export REMOTE_ENV=1
@@ -180,20 +178,13 @@ runs:

        # Wake up the cluster if we use remote neon instance
        if [ "${{ inputs.build_type }}" = "remote" ] && [ -n "${BENCHMARK_CONNSTR}" ]; then
-          QUERIES=("SELECT version()")
-          if [[ "${PLATFORM}" = "neon"* ]]; then
-            QUERIES+=("SHOW neon.tenant_id")
-            QUERIES+=("SHOW neon.timeline_id")
-          fi
-
-          for q in "${QUERIES[@]}"; do
-            ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "${q}"
-          done
+          ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/psql ${BENCHMARK_CONNSTR} -c "SELECT version();"
        fi

        # Run the tests.
        #
-        # --alluredir saves test results in Allure format (in a specified directory)
+        # The junit.xml file allows CI tools to display more fine-grained test information
+        # in its "Tests" tab in the results page.
        # --verbose prints name of each test (helpful when there are
        # multiple tests in one file)
        # -rA prints summary in the end
@@ -202,6 +193,7 @@ runs:
        #
        mkdir -p $TEST_OUTPUT/allure/results
        "${cov_prefix[@]}" ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
          --alluredir=$TEST_OUTPUT/allure/results \
          --tb=short \
          --verbose \
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -8,7 +8,7 @@ inputs:
    description: "A directory or file to upload"
    required: true
  prefix:
-    description: "S3 prefix. Default is '${GITHUB_SHA}/${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
+    description: "S3 prefix. Default is '${GITHUB_RUN_ID}/${GITHUB_RUN_ATTEMPT}'"
    required: false

 runs:
@@ -45,7 +45,7 @@ runs:
      env:
        SOURCE: ${{ inputs.path }}
        ARCHIVE: /tmp/uploads/${{ inputs.name }}.tar.zst
-        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}/{2}', github.event.pull_request.head.sha || github.sha, github.run_id , github.run_attempt) }}
+        PREFIX: artifacts/${{ inputs.prefix || format('{0}/{1}', github.run_id, github.run_attempt) }}
      run: |
        BUCKET=neon-github-public-dev
        FILENAME=$(basename $ARCHIVE)
--- a/.github/workflows/_build-and-test-locally.yml
+++ b/.github/workflows/_build-and-test-locally.yml
@@ -1,291 +0,0 @@
-name: Build and Test Locally
-
-on:
-  workflow_call:
-    inputs:
-      arch:
-        description: 'x64 or arm64'
-        required: true
-        type: string
-      build-tag:
-        description: 'build tag'
-        required: true
-        type: string
-      build-tools-image:
-        description: 'build-tools image'
-        required: true
-        type: string
-      build-type:
-        description: 'debug or release'
-        required: true
-        type: string
-
-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-env:
-  RUST_BACKTRACE: 1
-  COPT: '-Werror'
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-
-jobs:
-  build-neon:
-    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
-    container:
-      image: ${{ inputs.build-tools-image }}
-      credentials:
-        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      # Raise locked memory limit for tokio-epoll-uring.
-      # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
-      # io_uring will account the memory of the CQ and SQ as locked.
-      # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
-    env:
-      BUILD_TYPE: ${{ inputs.build-type }}
-      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
-      BUILD_TAG: ${{ inputs.build-tag }}
-
-    steps:
-      - name: Fix git ownership
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-          for r in 14 15 16; do
-            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
-            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
-          done
-
-      - uses: actions/checkout@v4
-        with:
-          submodules: true
-          fetch-depth: 1
-
-      - name: Set pg 14 revision for caching
-        id: pg_v14_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
-
-      - name: Set pg 15 revision for caching
-        id: pg_v15_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
-
-      - name: Set pg 16 revision for caching
-        id: pg_v16_rev
-        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
-
-      # Set some environment variables used by all the steps.
-      #
-      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
-      #   It also includes --features, if any
-      #
-      # CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
-      #   because "cargo metadata" doesn't accept --release or --debug options
-      #
-      # We run tests with addtional features, that are turned off by default (e.g. in release builds), see
-      # corresponding Cargo.toml files for their descriptions.
-      - name: Set env variables
-        run: |
-          CARGO_FEATURES="--features testing"
-          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
-            CARGO_FLAGS="--locked"
-          elif [[ $BUILD_TYPE == "release" ]]; then
-            cov_prefix=""
-            CARGO_FLAGS="--locked --release"
-          fi
-          {
-            echo "cov_prefix=${cov_prefix}"
-            echo "CARGO_FEATURES=${CARGO_FEATURES}"
-            echo "CARGO_FLAGS=${CARGO_FLAGS}"
-            echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
-          } >> $GITHUB_ENV
-
-      - name: Cache postgres v14 build
-        id: cache_pg_14
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
-
-      - name: Cache postgres v15 build
-        id: cache_pg_15
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
-
-      - name: Cache postgres v16 build
-        id: cache_pg_16
-        uses: actions/cache@v4
-        with:
-          path: pg_install/v16
-          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile', 'Dockerfile.build-tools') }}
-
-      - name: Build postgres v14
-        if: steps.cache_pg_14.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v14 -j$(nproc)
-
-      - name: Build postgres v15
-        if: steps.cache_pg_15.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v15 -j$(nproc)
-
-      - name: Build postgres v16
-        if: steps.cache_pg_16.outputs.cache-hit != 'true'
-        run: mold -run make postgres-v16 -j$(nproc)
-
-      - name: Build neon extensions
-        run: mold -run make neon-pg-ext -j$(nproc)
-
-      - name: Build walproposer-lib
-        run: mold -run make walproposer-lib -j$(nproc)
-
-      - name: Run cargo build
-        run: |
-          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
-          export PQ_LIB_DIR
-          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
-
-      # Do install *before* running rust tests because they might recompile the
-      # binaries with different features/flags.
-      - name: Install rust binaries
-        run: |
-          # Install target binaries
-          mkdir -p /tmp/neon/bin/
-          binaries=$(
-            ${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
-            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
-          )
-          for bin in $binaries; do
-            SRC=target/$BUILD_TYPE/$bin
-            DST=/tmp/neon/bin/$bin
-            cp "$SRC" "$DST"
-          done
-
-          # Install test executables and write list of all binaries (for code coverage)
-          if [[ $BUILD_TYPE == "debug" ]]; then
-            # Keep bloated coverage data files away from the rest of the artifact
-            mkdir -p /tmp/coverage/
-
-            mkdir -p /tmp/neon/test_bin/
-
-            test_exe_paths=$(
-              ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
-              jq -r '.executable | select(. != null)'
-            )
-            for bin in $test_exe_paths; do
-              SRC=$bin
-              DST=/tmp/neon/test_bin/$(basename $bin)
-
-              # We don't need debug symbols for code coverage, so strip them out to make
-              # the artifact smaller.
-              strip "$SRC" -o "$DST"
-              echo "$DST" >> /tmp/coverage/binaries.list
-            done
-
-            for bin in $binaries; do
-              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
-            done
-          fi
-
-      - name: Run rust tests
-        env:
-          NEXTEST_RETRIES: 3
-        run: |
-          PQ_LIB_DIR=$(pwd)/pg_install/v16/lib
-          export PQ_LIB_DIR
-          LD_LIBRARY_PATH=$(pwd)/pg_install/v16/lib
-          export LD_LIBRARY_PATH
-
-          #nextest does not yet support running doctests
-          cargo test --doc $CARGO_FLAGS $CARGO_FEATURES
-
-          for io_engine in std-fs tokio-epoll-uring ; do
-            NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=$io_engine ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES
-          done
-
-          # Run separate tests for real S3
-          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
-          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
-          export REMOTE_STORAGE_S3_REGION=eu-central-1
-          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'
-
-          # Run separate tests for real Azure Blob Storage
-          # XXX: replace region with `eu-central-1`-like region
-          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
-          export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
-          export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
-          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
-          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
-          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'
-
-      - name: Install postgres binaries
-        run: cp -a pg_install /tmp/neon/pg_install
-
-      - name: Upload Neon artifact
-        uses: ./.github/actions/upload
-        with:
-          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact
-          path: /tmp/neon
-
-      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
-      - name: Merge and upload coverage data
-        if: inputs.build-type == 'debug'
-        uses: ./.github/actions/save-coverage-data
-
-  regress-tests:
-    # Run test on x64 only
-    if: inputs.arch == 'x64'
-    needs: [ build-neon ]
-    runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
-    container:
-      image: ${{ inputs.build-tools-image }}
-      credentials:
-        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      # for changed limits, see comments on `options:` earlier in this file
-      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
-    strategy:
-      fail-fast: false
-      matrix:
-        pg_version: [ v14, v15, v16 ]
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: true
-          fetch-depth: 1
-
-      - name: Pytest regression tests
-        uses: ./.github/actions/run-python-test-set
-        timeout-minutes: 60
-        with:
-          build_type: ${{ inputs.build-type }}
-          test_selection: regress
-          needs_postgres_source: true
-          run_with_real_s3: true
-          real_s3_bucket: neon-github-ci-tests
-          real_s3_region: eu-central-1
-          rerun_flaky: true
-          pg_version: ${{ matrix.pg_version }}
-        env:
-          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
-          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
-          BUILD_TAG: ${{ inputs.build-tag }}
-          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
-          PAGESERVER_GET_VECTORED_IMPL: vectored
-          PAGESERVER_GET_IMPL: vectored
-          PAGESERVER_VALIDATE_VEC_GET: true
-
-      # Temporary disable this step until we figure out why it's so flaky
-      # Ref https://github.com/neondatabase/neon/issues/4540
-      - name: Merge and upload coverage data
-        if: |
-          false &&
-          inputs.build-type == 'debug' && matrix.pg_version == 'v14'
-        uses: ./.github/actions/save-coverage-data
--- a/.github/workflows/actionlint.yml
+++ b/.github/workflows/actionlint.yml
@@ -24,7 +24,7 @@ jobs:

  actionlint:
    needs: [ check-permissions ]
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: reviewdog/action-actionlint@v1
@@ -36,16 +36,3 @@ jobs:
          fail_on_error: true
          filter_mode: nofilter
          level: error
-
-      - name: Disallow 'ubuntu-latest' runners
-        run: |
-          PAT='^\s*runs-on:.*-latest'
-          if grep -ERq $PAT .github/workflows; then
-            grep -ERl $PAT .github/workflows |\
-            while read -r f
-            do
-              l=$(grep -nE $PAT .github/workflows/release.yml | awk -F: '{print $1}' | head -1)
-              echo "::error file=$f,line=$l::Please use 'ubuntu-22.04' instead of 'ubuntu-latest'"
-            done
-            exit 1
-          fi
--- a/.github/workflows/approved-for-ci-run.yml
+++ b/.github/workflows/approved-for-ci-run.yml
@@ -18,7 +18,6 @@ on:

 concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
-  cancel-in-progress: false

 env:
  GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -44,7 +43,7 @@ jobs:
      contains(fromJSON('["opened", "synchronize", "reopened", "closed"]'), github.event.action) &&
      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')

-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    steps:
      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -60,7 +59,7 @@ jobs:
      github.event.action == 'labeled' &&
      contains(github.event.pull_request.labels.*.name, 'approved-for-ci-run')

-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    steps:
      - run: gh pr --repo "${GITHUB_REPOSITORY}" edit "${PR_NUMBER}" --remove-label "approved-for-ci-run"
@@ -69,41 +68,15 @@ jobs:
        with:
          ref: main
          token: ${{ secrets.CI_ACCESS_TOKEN }}
-      
-      - name: Look for existing PR
-        id: get-pr
-        env:
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
-          echo "ALREADY_CREATED=${ALREADY_CREATED}" >> ${GITHUB_OUTPUT}
-      
-      - name: Get changed labels
-        id: get-labels
-        if: steps.get-pr.outputs.ALREADY_CREATED != ''
-        env:
-          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
-        run: |
-          LABELS_TO_REMOVE=$(comm -23 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) | sort) \
-          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' | ( grep -E '^run' || true ) | sort ) |\
-          ( grep -v run-e2e-tests-in-draft || true ) | paste -sd , -)
-          LABELS_TO_ADD=$(comm -13 <(gh pr --repo ${GITHUB_REPOSITORY} view ${ALREADY_CREATED} --json labels --jq '.labels.[].name'| ( grep -E '^run' || true ) |sort) \
-          <(gh pr --repo ${GITHUB_REPOSITORY} view ${PR_NUMBER} --json labels --jq '.labels.[].name' |  ( grep -E '^run' || true ) | sort ) |\
-          paste -sd , -)
-          echo "LABELS_TO_ADD=${LABELS_TO_ADD}" >> ${GITHUB_OUTPUT}
-          echo "LABELS_TO_REMOVE=${LABELS_TO_REMOVE}" >> ${GITHUB_OUTPUT}

      - run: gh pr checkout "${PR_NUMBER}"

      - run: git checkout -b "${BRANCH}"

      - run: git push --force origin "${BRANCH}"
-        if: steps.get-pr.outputs.ALREADY_CREATED == ''

      - name: Create a Pull Request for CI run (if required)
-        if: steps.get-pr.outputs.ALREADY_CREATED == ''
-        env: 
+        env:
          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
          cat << EOF > body.md
@@ -114,33 +87,16 @@ jobs:
            Feel free to review/comment/discuss the original PR #${PR_NUMBER}.
          EOF

-          LABELS=$( (gh pr --repo "${GITHUB_REPOSITORY}" view ${PR_NUMBER}  --json labels --jq '.labels.[].name'; echo run-e2e-tests-in-draft  )| \
-          grep -E '^run' | paste -sd , -)
-          gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
+          ALREADY_CREATED="$(gh pr --repo ${GITHUB_REPOSITORY} list --head ${BRANCH} --base main --json number --jq '.[].number')"
+          if [ -z "${ALREADY_CREATED}" ]; then
+            gh pr --repo "${GITHUB_REPOSITORY}" create --title "CI run for PR #${PR_NUMBER}" \
                                                       --body-file "body.md" \
                                                       --head "${BRANCH}" \
                                                       --base "main" \
-                                                       --label ${LABELS} \
+                                                       --label "run-e2e-tests-in-draft" \
                                                       --draft
-      - name: Modify the existing pull request (if required)
-        if: steps.get-pr.outputs.ALREADY_CREATED != ''
-        env:
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          LABELS_TO_ADD: ${{ steps.get-labels.outputs.LABELS_TO_ADD }}
-          LABELS_TO_REMOVE: ${{ steps.get-labels.outputs.LABELS_TO_REMOVE }}
-          ALREADY_CREATED: ${{ steps.get-pr.outputs.ALREADY_CREATED }}
-        run: |
-          ADD_CMD=
-          REMOVE_CMD=
-          [ -z "${LABELS_TO_ADD}" ] || ADD_CMD="--add-label ${LABELS_TO_ADD}"
-          [ -z "${LABELS_TO_REMOVE}" ] || REMOVE_CMD="--remove-label ${LABELS_TO_REMOVE}"
-          if [ -n "${ADD_CMD}" ] || [ -n "${REMOVE_CMD}" ]; then
-            gh pr --repo "${GITHUB_REPOSITORY}" edit ${ALREADY_CREATED} ${ADD_CMD} ${REMOVE_CMD}
          fi

-      - run: git push --force origin "${BRANCH}"
-        if: steps.get-pr.outputs.ALREADY_CREATED != ''
-             
  cleanup:
    # Close PRs and delete branchs if the original PR is closed.

@@ -152,7 +108,7 @@ jobs:
      github.event.action == 'closed' &&
      github.event.pull_request.head.repo.full_name != github.repository

-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    steps:
      - name: Close PR and delete `ci-run/pr-${{ env.PR_NUMBER }}` branch
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -38,11 +38,6 @@ on:
        description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
        required: false
        default: false
-      run_only_pgvector_tests:
-        type: boolean
-        description: 'Run pgvector tests but no other tests. If not set, all tests including pgvector tests will be run'
-        required: false
-        default: false

 defaults:
  run:
@@ -55,28 +50,15 @@ concurrency:

 jobs:
  bench:
-    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - DEFAULT_PG_VERSION: 16
-            PLATFORM: "neon-staging"
-            region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
-            provisioner: 'k8s-pod' 
-          - DEFAULT_PG_VERSION: 16
-            PLATFORM: "azure-staging"
-            region_id: 'azure-eastus2'
-            provisioner: 'k8s-neonvm'
    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "300"
      TEST_PG_BENCH_SCALES_MATRIX: "10,100"
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: ${{ matrix.DEFAULT_PG_VERSION }}
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
-      PLATFORM: ${{ matrix.PLATFORM }}
+      PLATFORM: "neon-staging"

    runs-on: [ self-hosted, us-east-2, x64 ]
    container:
@@ -89,7 +71,7 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest

@@ -97,10 +79,9 @@ jobs:
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        region_id: ${{ matrix.region_id }}
+        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        provisioner: ${{ matrix.provisioner }}

    - name: Run benchmark
      uses: ./.github/actions/run-python-test-set
@@ -109,18 +90,10 @@ jobs:
        test_selection: performance
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
        # Set --sparse-ordering option of pytest-order plugin
        # to ensure tests are running in order of appears in the file.
        # It's important for test_perf_pgbench.py::test_pgbench_remote_* tests
-        extra_params:
-          -m remote_cluster
-          --sparse-ordering
-          --timeout 14400
-          --ignore test_runner/performance/test_perf_olap.py
-          --ignore test_runner/performance/test_perf_pgvector_queries.py
-          --ignore test_runner/performance/test_logical_replication.py
-          --ignore test_runner/performance/test_physical_replication.py
+        extra_params: -m remote_cluster --sparse-ordering --timeout 5400 --ignore test_runner/performance/test_perf_olap.py
      env:
        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -146,87 +119,18 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

-  replication-tests:
-    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
-    env:
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 14
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
-      PLATFORM: "neon-staging"
-
-    runs-on: [ self-hosted, us-east-2, x64 ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
-      options: --init
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Download Neon artifact
-      uses: ./.github/actions/download
-      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
-        path: /tmp/neon/
-        prefix: latest
-
-    - name: Run benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_logical_replication.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 5400
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
-
-    - name: Run benchmark
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_physical_replication.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 5400
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }}
-
-    - name: Create Allure report
-      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic replication testing: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
  generate-matrices:
-    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
    #
    # Available platforms:
    # - neon-captest-new: Freshly created project (1 CU)
    # - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
-    # - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region
-    # - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region
    # - neon-captest-reuse: Reusing existing project
    # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
    # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
    env:
      RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
-      DEFAULT_REGION_ID: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    outputs:
      pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
      olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
@@ -236,33 +140,22 @@ jobs:
    - name: Generate matrix for pgbench benchmark
      id: pgbench-compare-matrix
      run: |
-        region_id_default=${{ env.DEFAULT_REGION_ID }}
        matrix='{
-          "pg_version" : [
-            16
-          ],
-          "region_id" : [
-            "'"$region_id_default"'"
-            ],
          "platform": [
            "neon-captest-new",
            "neon-captest-reuse",
            "neonvm-captest-new"
          ],
          "db_size": [ "10gb" ],
-          "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neon-captest-freetier",         "db_size": "3gb"  },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neon-captest-new",              "db_size": "50gb" },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier",       "db_size": "3gb"  },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new",            "db_size": "50gb" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",      "platform": "neonvm-azure-captest-freetier", "db_size": "3gb"  },
-                      { "pg_version": 16, "region_id": "azure-eastus2",      "platform": "neonvm-azure-captest-new",      "db_size": "10gb" },
-                      { "pg_version": 16, "region_id": "azure-eastus2",      "platform": "neonvm-azure-captest-new",      "db_size": "50gb" },
-                      { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }]
+          "include": [{ "platform": "neon-captest-freetier",   "db_size": "3gb"  },
+                      { "platform": "neon-captest-new",        "db_size": "50gb" },
+                      { "platform": "neonvm-captest-freetier", "db_size": "3gb"  },
+                      { "platform": "neonvm-captest-new",      "db_size": "50gb" }]
        }'

        if [ "$(date +%A)" = "Saturday" ]; then
-          matrix=$(echo "$matrix" | jq '.include += [{ "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-postgres", "db_size": "10gb"},
-                                                     { "pg_version": 14, "region_id": "'"$region_id_default"'", "platform": "rds-aurora",   "db_size": "50gb"}]')
+          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
+                                                   { "platform": "rds-aurora",   "db_size": "50gb"}]')
        fi

        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -278,7 +171,7 @@ jobs:

        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
-                                                     { "platform": "rds-aurora"   }]')
+                                                   { "platform": "rds-aurora"   }]')
        fi

        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -297,13 +190,12 @@ jobs:

        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
          matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
-                                                     { "platform": "rds-aurora",   "scale": "10" }]')
+                                                    { "platform": "rds-aurora",   "scale": "10" }]')
        fi

        echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT

  pgbench-compare:
-    if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }}
    needs: [ generate-matrices ]

    strategy:
@@ -314,7 +206,7 @@ jobs:
      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
      TEST_PG_BENCH_SCALES_MATRIX: ${{ matrix.db_size }}
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: ${{ matrix.pg_version }}
+      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
@@ -334,19 +226,24 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest

+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
    - name: Create Neon Project
-      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform)
+      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
-        region_id: ${{ matrix.region_id }}
+        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }}
+        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}
        provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }}

    - name: Set up Connection String
@@ -356,10 +253,7 @@ jobs:
          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
            ;;
-          neonvm-captest-sharding-reuse)
-            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }}
-            ;;
-          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier)
+          neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -376,6 +270,12 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

+        QUERY="SELECT version();"
+        if [[ "${PLATFORM}" = "neon"* ]]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
+
    - name: Benchmark init
      uses: ./.github/actions/run-python-test-set
      with:
@@ -384,7 +284,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_init
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -398,7 +297,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_simple_update
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -412,7 +310,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_pgbench_remote_select_only
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
@@ -438,110 +335,6 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

-  pgbench-pgvector:
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - PLATFORM: "neon-captest-pgvector"
-          - PLATFORM: "azure-captest-pgvector"
-            
-    env:
-      TEST_PG_BENCH_DURATIONS_MATRIX: "15m"
-      TEST_PG_BENCH_SCALES_MATRIX: "1"
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      LD_LIBRARY_PATH: /home/nonroot/pg/usr/lib/x86_64-linux-gnu
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
-      PLATFORM: ${{ matrix.PLATFORM }}
-
-    runs-on: [ self-hosted, us-east-2, x64 ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/build-tools:pinned
-      options: --init
-
-    steps:
-    - uses: actions/checkout@v4
-
-    # until https://github.com/neondatabase/neon/issues/8275 is fixed we temporarily install postgresql-16
-    # instead of using Neon artifacts containing pgbench
-    - name: Install postgresql-16 where pytest expects it
-      run: |
-        cd /home/nonroot
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/libpq5_16.3-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-client-16_16.3-1.pgdg110%2B1_amd64.deb
-        wget -q https://apt.postgresql.org/pub/repos/apt/pool/main/p/postgresql-16/postgresql-16_16.3-1.pgdg110%2B1_amd64.deb 
-        dpkg -x libpq5_16.3-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-client-16_16.3-1.pgdg110+1_amd64.deb pg
-        dpkg -x postgresql-16_16.3-1.pgdg110+1_amd64.deb pg
-        mkdir -p /tmp/neon/pg_install/v16/bin
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/pgbench /tmp/neon/pg_install/v16/bin/pgbench  
-        ln -s /home/nonroot/pg/usr/lib/postgresql/16/bin/psql /tmp/neon/pg_install/v16/bin/psql  
-        ln -s /home/nonroot/pg/usr/lib/x86_64-linux-gnu /tmp/neon/pg_install/v16/lib 
-        /tmp/neon/pg_install/v16/bin/pgbench --version
-        /tmp/neon/pg_install/v16/bin/psql --version
-
-    - name: Set up Connection String
-      id: set-up-connstr
-      run: |
-        case "${PLATFORM}" in
-          neon-captest-pgvector)
-            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }}
-            ;;
-          azure-captest-pgvector)
-            CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR_AZURE }}
-            ;;
-          *)
-            echo >&2 "Unknown PLATFORM=${PLATFORM}"
-            exit 1
-            ;;
-        esac
-
-        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT
-
-    - name: Benchmark pgvector hnsw indexing
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_perf_olap.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600 -k test_pgvector_indexing
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-      env:
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
-
-    - name: Benchmark pgvector queries
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: ${{ env.BUILD_TYPE }}
-        test_selection: performance/test_perf_pgvector_queries.py
-        run_in_parallel: false
-        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
-        extra_params: -m remote_cluster --timeout 21600
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-      env:
-        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
-        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
-        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
-
-    - name: Create Allure report
-      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic perf testing ${PLATFORM}: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
  clickbench-compare:
    # ClichBench DB for rds-aurora and rds-Postgres deployed to the same clusters
    # we use for performance testing in pgbench-compare.
@@ -550,7 +343,7 @@ jobs:
    #
    # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
    # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, pgbench-compare ]

    strategy:
@@ -578,10 +371,15 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest

+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
    - name: Set up Connection String
      id: set-up-connstr
      run: |
@@ -603,6 +401,12 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

+        QUERY="SELECT version();"
+        if [[ "${PLATFORM}" = "neon"* ]]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
+
    - name: ClickBench benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -639,7 +443,7 @@ jobs:
    # We might change it after https://github.com/neondatabase/neon/issues/2900.
    #
    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, clickbench-compare ]

    strategy:
@@ -666,10 +470,15 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest

+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
    - name: Get Connstring Secret Name
      run: |
        case "${PLATFORM}" in
@@ -698,6 +507,12 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

+        QUERY="SELECT version();"
+        if [[ "${PLATFORM}" = "neon"* ]]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
+
    - name: Run TPC-H benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -726,7 +541,7 @@ jobs:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

  user-examples-compare:
-    if: ${{ !cancelled() && (github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null) }}
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, tpch-compare ]

    strategy:
@@ -752,10 +567,15 @@ jobs:
    - name: Download Neon artifact
      uses: ./.github/actions/download
      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
+        name: neon-${{ runner.os }}-release-artifact
        path: /tmp/neon/
        prefix: latest

+    - name: Add Postgres binaries to PATH
+      run: |
+        ${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin/pgbench --version
+        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH
+
    - name: Set up Connection String
      id: set-up-connstr
      run: |
@@ -777,6 +597,12 @@ jobs:

        echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT

+        QUERY="SELECT version();"
+        if [[ "${PLATFORM}" = "neon"* ]]; then
+          QUERY="${QUERY} SHOW neon.tenant_id; SHOW neon.timeline_id;"
+        fi
+        psql ${CONNSTR} -c "${QUERY}"
+
    - name: Run user examples
      uses: ./.github/actions/run-python-test-set
      with:
@@ -785,7 +611,6 @@ jobs:
        run_in_parallel: false
        save_perf_report: ${{ env.SAVE_PERF_REPORT }}
        extra_params: -m remote_cluster --timeout 21600 -k test_user_examples
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
      env:
        VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
        PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
--- a/.github/workflows/build-build-tools-image.yml
+++ b/.github/workflows/build-build-tools-image.yml
@@ -21,7 +21,6 @@ defaults:

 concurrency:
  group: build-build-tools-image-${{ inputs.image-tag }}
-  cancel-in-progress: false

 # No permission for GITHUB_TOKEN by default; the **minimal required** set of permissions should be granted in each job.
 permissions: {}
@@ -30,6 +29,7 @@ jobs:
  check-image:
    uses: ./.github/workflows/check-build-tools-image.yml

+  # This job uses older version of GitHub Actions because it's run on gen2 runners, which don't support node 20 (for newer versions)
  build-image:
    needs: [ check-image ]
    if: needs.check-image.outputs.found == 'false'
@@ -38,7 +38,7 @@ jobs:
      matrix:
        arch: [ x64, arm64 ]

-    runs-on: ${{ fromJson(format('["self-hosted", "gen3", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }}
+    runs-on: ${{ fromJson(format('["self-hosted", "dev", "{0}"]', matrix.arch)) }}

    env:
      IMAGE_TAG: ${{ inputs.image-tag }}
@@ -54,7 +54,7 @@ jobs:
            exit 1
          fi

-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3

      # Use custom DOCKER_CONFIG directory to avoid conflicts with default settings
      # The default value is ~/.docker
@@ -63,16 +63,14 @@ jobs:
          mkdir -p /tmp/.docker-custom
          echo DOCKER_CONFIG=/tmp/.docker-custom >> $GITHUB_ENV

-      - uses: docker/setup-buildx-action@v3
-        with:
-          cache-binary: false
+      - uses: docker/setup-buildx-action@v2

-      - uses: docker/login-action@v3
+      - uses: docker/login-action@v2
        with:
          username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
          password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}

-      - uses: docker/build-push-action@v6
+      - uses: docker/build-push-action@v4
        with:
          context: .
          provenance: false
@@ -80,17 +78,16 @@ jobs:
          pull: true
          file: Dockerfile.build-tools
          cache-from: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }}
-          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=neondatabase/build-tools:cache-{0},mode=max', matrix.arch) || '' }}
+          cache-to: type=registry,ref=neondatabase/build-tools:cache-${{ matrix.arch }},mode=max
          tags: neondatabase/build-tools:${{ inputs.image-tag }}-${{ matrix.arch }}

      - name: Remove custom docker config directory
-        if: always()
        run: |
          rm -rf /tmp/.docker-custom

  merge-images:
    needs: [ build-image ]
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    env:
      IMAGE_TAG: ${{ inputs.image-tag }}
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
--- a/.github/workflows/check-build-tools-image.yml
+++ b/.github/workflows/check-build-tools-image.yml
@@ -19,23 +19,30 @@ permissions: {}

 jobs:
  check-image:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    outputs:
      tag: ${{ steps.get-build-tools-tag.outputs.image-tag }}
      found: ${{ steps.check-image.outputs.found }}

    steps:
-      - uses: actions/checkout@v4
-
      - name: Get build-tools image tag for the current commit
        id: get-build-tools-tag
        env:
-          IMAGE_TAG: |
-            ${{ hashFiles('Dockerfile.build-tools',
-                          '.github/workflows/check-build-tools-image.yml',
-                          '.github/workflows/build-build-tools-image.yml') }}
+          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          echo "image-tag=${IMAGE_TAG}" | tee -a $GITHUB_OUTPUT
+          LAST_BUILD_TOOLS_SHA=$(
+            gh api \
+              -H "Accept: application/vnd.github+json" \
+              -H "X-GitHub-Api-Version: 2022-11-28" \
+              --method GET \
+              --field path=Dockerfile.build-tools \
+              --field sha=${COMMIT_SHA} \
+              --field per_page=1 \
+              --jq ".[0].sha" \
+              "/repos/${GITHUB_REPOSITORY}/commits"
+          )
+          echo "image-tag=${LAST_BUILD_TOOLS_SHA}" | tee -a $GITHUB_OUTPUT

      - name: Check if such tag found in the registry
        id: check-image
--- a/.github/workflows/check-permissions.yml
+++ b/.github/workflows/check-permissions.yml
@@ -16,7 +16,7 @@ permissions: {}

 jobs:
  check-permissions:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
    - name: Disallow CI runs on PRs from forks
      if: |
--- a/.github/workflows/cleanup-caches-by-a-branch.yml
+++ b/.github/workflows/cleanup-caches-by-a-branch.yml
@@ -9,7 +9,7 @@ on:

 jobs:
  cleanup:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
    steps:
      - name: Cleanup
        run: |
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -133,13 +133,212 @@ jobs:
      - name: Check that no warnings are produced
        run: ./run_clippy.sh

+  check-linux-arm-build:
+    needs: [ check-permissions, build-build-tools-image ]
+    timeout-minutes: 90
+    runs-on: [ self-hosted, dev, arm64 ]
+
+    env:
+      # Use release build only, to have less debug info around
+      # Hence keeping target/ (and general cache size) smaller
+      BUILD_TYPE: release
+      CARGO_FEATURES: --features testing
+      CARGO_FLAGS: --release
+      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
+      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
+
+    container:
+      image: ${{ needs.build-build-tools-image.outputs.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - name: Fix git ownership
+        run: |
+          # Workaround for `fatal: detected dubious ownership in repository at ...`
+          #
+          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
+          #   Ref https://github.com/actions/checkout/issues/785
+          #
+          git config --global --add safe.directory ${{ github.workspace }}
+          git config --global --add safe.directory ${GITHUB_WORKSPACE}
+          for r in 14 15 16; do
+            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
+            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
+          done
+
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      - name: Set pg 14 revision for caching
+        id: pg_v14_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
+
+      - name: Set pg 15 revision for caching
+        id: pg_v15_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT
+
+      - name: Set pg 16 revision for caching
+        id: pg_v16_rev
+        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT
+
+      - name: Set env variables
+        run: |
+          echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" >> $GITHUB_ENV
+
+      - name: Cache postgres v14 build
+        id: cache_pg_14
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v14
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache postgres v15 build
+        id: cache_pg_15
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v15
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Cache postgres v16 build
+        id: cache_pg_16
+        uses: actions/cache@v4
+        with:
+          path: pg_install/v16
+          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+
+      - name: Build postgres v14
+        if: steps.cache_pg_14.outputs.cache-hit != 'true'
+        run: mold -run make postgres-v14 -j$(nproc)
+
+      - name: Build postgres v15
+        if: steps.cache_pg_15.outputs.cache-hit != 'true'
+        run: mold -run make postgres-v15 -j$(nproc)
+
+      - name: Build postgres v16
+        if: steps.cache_pg_16.outputs.cache-hit != 'true'
+        run: mold -run make postgres-v16 -j$(nproc)
+
+      - name: Build neon extensions
+        run: mold -run make neon-pg-ext -j$(nproc)
+
+      - name: Build walproposer-lib
+        run: mold -run make walproposer-lib -j$(nproc)
+
+      - name: Run cargo build
+        run: |
+          mold -run cargo build --locked $CARGO_FLAGS $CARGO_FEATURES --bins --tests
+
+      - name: Run cargo test
+        env:
+          NEXTEST_RETRIES: 3
+        run: |
+          cargo nextest run $CARGO_FEATURES
+
+          # Run separate tests for real S3
+          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
+          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
+          export REMOTE_STORAGE_S3_REGION=eu-central-1
+          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
+          cargo nextest run --package remote_storage --test test_real_s3
+
+          # Run separate tests for real Azure Blob Storage
+          # XXX: replace region with `eu-central-1`-like region
+          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
+          export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
+          export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
+          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
+          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
+          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
+          cargo nextest run --package remote_storage --test test_real_azure
+
+  check-codestyle-rust-arm:
+    needs: [ check-permissions, build-build-tools-image ]
+    timeout-minutes: 90
+    runs-on: [ self-hosted, dev, arm64 ]
+
+    container:
+      image: ${{ needs.build-build-tools-image.outputs.image }}
+      credentials:
+        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
+        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
+      options: --init
+
+    steps:
+      - name: Fix git ownership
+        run: |
+          # Workaround for `fatal: detected dubious ownership in repository at ...`
+          #
+          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
+          #   Ref https://github.com/actions/checkout/issues/785
+          #
+          git config --global --add safe.directory ${{ github.workspace }}
+          git config --global --add safe.directory ${GITHUB_WORKSPACE}
+          for r in 14 15 16; do
+            git config --global --add safe.directory "${{ github.workspace }}/vendor/postgres-v$r"
+            git config --global --add safe.directory "${GITHUB_WORKSPACE}/vendor/postgres-v$r"
+          done
+
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: true
+          fetch-depth: 1
+
+      # Some of our rust modules use FFI and need those to be checked
+      - name: Get postgres headers
+        run: make postgres-headers -j$(nproc)
+
+      # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations.
+      # This will catch compiler & clippy warnings in all feature combinations.
+      # TODO: use cargo hack for build and test as well, but, that's quite expensive.
+      # NB: keep clippy args in sync with ./run_clippy.sh
+      - run: |
+          CLIPPY_COMMON_ARGS="$( source .neon_clippy_args; echo "$CLIPPY_COMMON_ARGS")"
+          if [ "$CLIPPY_COMMON_ARGS" = "" ]; then
+            echo "No clippy args found in .neon_clippy_args"
+            exit 1
+          fi
+          echo "CLIPPY_COMMON_ARGS=${CLIPPY_COMMON_ARGS}" >> $GITHUB_ENV
+      - name: Run cargo clippy (debug)
+        run: cargo hack --feature-powerset clippy $CLIPPY_COMMON_ARGS
+      - name: Run cargo clippy (release)
+        run: cargo hack --feature-powerset clippy --release $CLIPPY_COMMON_ARGS
+
+      - name: Check documentation generation
+        run: cargo doc --workspace --no-deps --document-private-items
+        env:
+            RUSTDOCFLAGS: "-Dwarnings -Arustdoc::private_intra_doc_links"
+
+      # Use `${{ !cancelled() }}` to run quck tests after the longer clippy run
+      - name: Check formatting
+        if: ${{ !cancelled() }}
+        run: cargo fmt --all -- --check
+
+      # https://github.com/facebookincubator/cargo-guppy/tree/bec4e0eb29dcd1faac70b1b5360267fc02bf830e/tools/cargo-hakari#2-keep-the-workspace-hack-up-to-date-in-ci
+      - name: Check rust dependencies
+        if: ${{ !cancelled() }}
+        run: |
+          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
+          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack
+
+      # https://github.com/EmbarkStudios/cargo-deny
+      - name: Check rust licenses/bans/advisories/sources
+        if: ${{ !cancelled() }}
+        run: cargo deny check
+
  gather-rust-build-stats:
    needs: [ check-permissions, build-build-tools-image ]
    if: |
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats') ||
      contains(github.event.pull_request.labels.*.name, 'run-extra-build-*') ||
      github.ref_name == 'main'
-    runs-on: [ self-hosted, large ]
+    runs-on: [ self-hosted, gen3, large ]
    container:
      image: ${{ needs.build-build-tools-image.outputs.image }}
      credentials:
@@ -170,7 +369,7 @@ jobs:
        run: make walproposer-lib -j$(nproc)

      - name: Produce the build stats
-        run: PQ_LIB_DIR=$(pwd)/pg_install/v16/lib cargo build --all --release --timings -j$(nproc)
+        run: cargo build --all --release --timings

      - name: Upload the build stats
        id: upload-stats
--- a/.github/workflows/periodic_pagebench.yml
+++ b/.github/workflows/periodic_pagebench.yml
@@ -1,155 +0,0 @@
-name: Periodic pagebench performance test on dedicated EC2 machine in eu-central-1 region
-
-on:
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │ ┌───────────── hour (0 - 23)
-    #          │ │ ┌───────────── day of the month (1 - 31)
-    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '0 18 * * *' # Runs at 6 PM UTC every day
-  workflow_dispatch: # Allows manual triggering of the workflow
-    inputs:
-      commit_hash:
-        type: string
-        description: 'The long neon repo commit hash for the system under test (pageserver) to be tested.'
-        required: false
-        default: ''
-
-defaults:
-  run:
-    shell: bash -euo pipefail {0}
-
-concurrency:
-  group: ${{ github.workflow }}
-  cancel-in-progress: false
-
-jobs:
-  trigger_bench_on_ec2_machine_in_eu_central_1:
-    runs-on: [ self-hosted, gen3, small ]
-    container:
-      image: neondatabase/build-tools:pinned
-      credentials:
-        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      options: --init
-    timeout-minutes: 360  # Set the timeout to 6 hours
-    env:
-      API_KEY: ${{ secrets.PERIODIC_PAGEBENCH_EC2_RUNNER_API_KEY }}
-      RUN_ID: ${{ github.run_id }}
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_ID }}
-      AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_EC2_US_TEST_RUNNER_ACCESS_KEY_SECRET }}
-      AWS_DEFAULT_REGION : "eu-central-1"
-      AWS_INSTANCE_ID : "i-02a59a3bf86bc7e74"
-    steps:
-    # we don't need the neon source code because we run everything remotely
-    # however we still need the local github actions to run the allure step below
-    - uses: actions/checkout@v4
-
-    - name: Show my own (github runner) external IP address - usefull for IP allowlisting
-      run: curl https://ifconfig.me
-
-    - name: Start EC2 instance and wait for the instance to boot up
-      run: |
-        aws ec2 start-instances --instance-ids $AWS_INSTANCE_ID
-        aws ec2 wait instance-running --instance-ids $AWS_INSTANCE_ID
-        sleep 60 # sleep some time to allow cloudinit and our API server to start up
-
-    - name: Determine public IP of the EC2 instance and set env variable EC2_MACHINE_URL_US
-      run: |
-        public_ip=$(aws ec2 describe-instances --instance-ids $AWS_INSTANCE_ID --query 'Reservations[*].Instances[*].PublicIpAddress' --output text)
-        echo "Public IP of the EC2 instance: $public_ip"
-        echo "EC2_MACHINE_URL_US=https://${public_ip}:8443" >> $GITHUB_ENV
-
-    - name: Determine commit hash
-      env:
-        INPUT_COMMIT_HASH: ${{ github.event.inputs.commit_hash }}
-      run: |
-        if [ -z "$INPUT_COMMIT_HASH" ]; then
-          echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV
-        else
-          echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV
-        fi
-
-    - name: Start Bench with run_id   
-      run: |
-        curl -k -X 'POST' \
-        "${EC2_MACHINE_URL_US}/start_test/${GITHUB_RUN_ID}" \
-        -H 'accept: application/json' \
-        -H 'Content-Type: application/json' \
-        -H "Authorization: Bearer $API_KEY" \
-        -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}"
-
-    - name: Poll Test Status
-      id: poll_step
-      run: |
-        status=""
-        while [[ "$status" != "failure" && "$status" != "success" ]]; do
-          response=$(curl -k -X 'GET' \
-          "${EC2_MACHINE_URL_US}/test_status/${GITHUB_RUN_ID}" \
-          -H 'accept: application/json' \
-          -H "Authorization: Bearer $API_KEY")
-          echo "Response: $response"
-          set +x
-          status=$(echo $response | jq -r '.status')
-          echo "Test status: $status"
-          if [[ "$status" == "failure" ]]; then
-            echo "Test failed"
-            exit 1 # Fail the job step if status is failure
-          elif [[ "$status" == "success" || "$status" == "null" ]]; then
-            break
-          elif [[ "$status" == "too_many_runs" ]]; then
-            echo "Too many runs already running"
-            echo "too_many_runs=true" >> "$GITHUB_OUTPUT"
-            exit 1
-          fi
-
-          sleep 60 # Poll every 60 seconds
-        done
-
-    - name: Retrieve Test Logs
-      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
-      run: |
-        curl -k -X 'GET' \
-        "${EC2_MACHINE_URL_US}/test_log/${GITHUB_RUN_ID}" \
-        -H 'accept: application/gzip' \
-        -H "Authorization: Bearer $API_KEY" \
-        --output "test_log_${GITHUB_RUN_ID}.gz"
-    
-    - name: Unzip Test Log and Print it into this job's log
-      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
-      run: |
-        gzip -d "test_log_${GITHUB_RUN_ID}.gz"
-        cat "test_log_${GITHUB_RUN_ID}"
-
-    - name: Create Allure report
-      env:
-        AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-        AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-      if: ${{ !cancelled() }}
-      uses: ./.github/actions/allure-report-generate
-
-    - name: Post to a Slack channel
-      if: ${{ github.event.schedule && failure() }}
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C033QLM5P7D" # dev-staging-stream
-        slack-message: "Periodic pagebench testing on dedicated hardware: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
-    - name: Cleanup Test Resources
-      if: always() 
-      run: |
-        curl -k -X 'POST' \
-        "${EC2_MACHINE_URL_US}/cleanup_test/${GITHUB_RUN_ID}" \
-        -H 'accept: application/json' \
-        -H "Authorization: Bearer $API_KEY" \
-        -d ''
-
-    - name: Stop EC2 instance and wait for the instance to be stopped
-      if: always() && steps.poll_step.outputs.too_many_runs != 'true'
-      run: |
-        aws ec2 stop-instances --instance-ids $AWS_INSTANCE_ID
-        aws ec2 wait instance-stopped --instance-ids $AWS_INSTANCE_ID
--- a/.github/workflows/pg-clients.yml
+++ b/.github/workflows/pg-clients.yml
@@ -1,115 +0,0 @@
-name: Test Postgres client libraries
-
-on:
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │ ┌───────────── hour (0 - 23)
-    #          │ │ ┌───────────── day of the month (1 - 31)
-    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '23 02 * * *' # run once a day, timezone is utc
-  pull_request:
-    paths:
-      - '.github/workflows/pg-clients.yml'
-      - 'test_runner/pg_clients/**'
-      - 'poetry.lock'
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref_name }}
-  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
-
-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-env:
-  DEFAULT_PG_VERSION: 16
-  PLATFORM: neon-captest-new
-  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-  AWS_DEFAULT_REGION: eu-central-1
-
-jobs:
-  check-permissions:
-    if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }}
-    uses: ./.github/workflows/check-permissions.yml
-    with:
-      github-event-name: ${{ github.event_name }}
-
-  check-build-tools-image:
-    needs: [ check-permissions ]
-    uses: ./.github/workflows/check-build-tools-image.yml
-
-  build-build-tools-image:
-    needs: [ check-build-tools-image ]
-    uses: ./.github/workflows/build-build-tools-image.yml
-    with:
-      image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }}
-    secrets: inherit
-
-  test-postgres-client-libs:
-    needs: [ build-build-tools-image ]
-    runs-on: ubuntu-22.04
-
-    container:
-      image: ${{ needs.build-build-tools-image.outputs.image }}
-      credentials:
-        username: ${{ secrets.NEON_DOCKERHUB_USERNAME }}
-        password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }}
-      options: --init --user root
-
-    steps:
-    - uses: actions/checkout@v4
-
-    - name: Download Neon artifact
-      uses: ./.github/actions/download
-      with:
-        name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
-        path: /tmp/neon/
-        prefix: latest
-
-    - name: Create Neon Project
-      id: create-neon-project
-      uses: ./.github/actions/neon-project-create
-      with:
-        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
-
-    - name: Run tests
-      uses: ./.github/actions/run-python-test-set
-      with:
-        build_type: remote
-        test_selection: pg_clients
-        run_in_parallel: false
-        extra_params: -m remote_cluster
-        pg_version: ${{ env.DEFAULT_PG_VERSION }}
-      env:
-        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
-
-    - name: Delete Neon Project
-      if: always()
-      uses: ./.github/actions/neon-project-delete
-      with:
-        project_id: ${{ steps.create-neon-project.outputs.project_id }}
-        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-
-    - name: Create Allure report
-      if: ${{ !cancelled() }}
-      id: create-allure-report
-      uses: ./.github/actions/allure-report-generate
-      with:
-        store-test-results-into-db: true
-      env:
-        REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
-
-    - name: Post to a Slack channel
-      if: github.event.schedule && failure()
-      uses: slackapi/slack-github-action@v1
-      with:
-        channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream
-        slack-message: |
-          Testing Postgres clients: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>)
-      env:
-        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -0,0 +1,98 @@
+name: Test Postgres client libraries
+
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    #          ┌───────────── minute (0 - 59)
+    #          │ ┌───────────── hour (0 - 23)
+    #          │ │ ┌───────────── day of the month (1 - 31)
+    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
+    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
+    - cron:  '23 02 * * *' # run once a day, timezone is utc
+
+  workflow_dispatch:
+
+concurrency:
+  # Allow only one workflow per any non-`main` branch.
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
+  cancel-in-progress: true
+
+jobs:
+  test-postgres-client-libs:
+    # TODO: switch to gen2 runner, requires docker
+    runs-on: [ ubuntu-latest ]
+
+    env:
+      DEFAULT_PG_VERSION: 14
+      TEST_OUTPUT: /tmp/test_output
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+
+    - uses: actions/setup-python@v4
+      with:
+        python-version: 3.9
+
+    - name: Install Poetry
+      uses: snok/install-poetry@v1
+
+    - name: Cache poetry deps
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/pypoetry/virtualenvs
+        key: v2-${{ runner.os }}-python-deps-ubunutu-latest-${{ hashFiles('poetry.lock') }}
+
+    - name: Install Python deps
+      shell: bash -euxo pipefail {0}
+      run: ./scripts/pysync
+
+    - name: Create Neon Project
+      id: create-neon-project
+      uses: ./.github/actions/neon-project-create
+      with:
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
+
+    - name: Run pytest
+      env:
+        REMOTE_ENV: 1
+        BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }}
+        POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
+      shell: bash -euxo pipefail {0}
+      run: |
+        # Test framework expects we have psql binary;
+        # but since we don't really need it in this test, let's mock it
+        mkdir -p "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin" && touch "$POSTGRES_DISTRIB_DIR/v${DEFAULT_PG_VERSION}/bin/psql";
+        ./scripts/pytest \
+          --junitxml=$TEST_OUTPUT/junit.xml \
+          --tb=short \
+          --verbose \
+          -m "remote_cluster" \
+          -rA "test_runner/pg_clients"
+
+    - name: Delete Neon Project
+      if: ${{ always() }}
+      uses: ./.github/actions/neon-project-delete
+      with:
+        project_id: ${{ steps.create-neon-project.outputs.project_id }}
+        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
+
+    # We use GitHub's action upload-artifact because `ubuntu-latest` doesn't have configured AWS CLI.
+    # It will be fixed after switching to gen2 runner
+    - name: Upload python test logs
+      if: always()
+      uses: actions/upload-artifact@v4
+      with:
+        retention-days: 7
+        name: python-test-pg_clients-${{ runner.os }}-stage-logs
+        path: ${{ env.TEST_OUTPUT }}
+
+    - name: Post to a Slack channel
+      if: ${{ github.event.schedule && failure() }}
+      uses: slackapi/slack-github-action@v1
+      with:
+        channel-id: "C033QLM5P7D" # dev-staging-stream
+        slack-message: "Testing Postgres clients: ${{ job.status }}\n${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
+      env:
+        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
--- a/.github/workflows/pin-build-tools-image.yml
+++ b/.github/workflows/pin-build-tools-image.yml
@@ -20,13 +20,12 @@ defaults:

 concurrency:
  group: pin-build-tools-image-${{ inputs.from-tag }}
-  cancel-in-progress: false

 permissions: {}

 jobs:
  tag-image:
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    env:
      FROM_TAG: ${{ inputs.from-tag }}
--- a/.github/workflows/release-notify.yml
+++ b/.github/workflows/release-notify.yml
@@ -19,7 +19,7 @@ on:

 jobs:
  notify:
-    runs-on: ubuntu-22.04
+    runs-on: [ ubuntu-latest ]

    steps:
      - uses: neondatabase/dev-actions/release-pr-notify@main
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -26,7 +26,7 @@ defaults:
 jobs:
  create-storage-release-branch:
    if: ${{ github.event.schedule == '0 6 * * MON' || format('{0}', inputs.create-storage-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    permissions:
      contents: write # for `git push`
@@ -52,22 +52,20 @@ jobs:
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      run: |
-        TITLE="Storage & Compute release ${RELEASE_DATE}"
-
        cat << EOF > body.md
-          ## ${TITLE}
+          ## Release ${RELEASE_DATE}

          **Please merge this Pull Request using 'Create a merge commit' button**
        EOF

-        gh pr create --title "${TITLE}" \
+        gh pr create --title "Release ${RELEASE_DATE}" \
                     --body-file "body.md" \
                     --head "${RELEASE_BRANCH}" \
                     --base "release"

  create-proxy-release-branch:
    if: ${{ github.event.schedule == '0 6 * * THU' || format('{0}', inputs.create-proxy-release-branch) == 'true' }}
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    permissions:
      contents: write # for `git push`
@@ -93,15 +91,13 @@ jobs:
      env:
        GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
      run: |
-        TITLE="Proxy release ${RELEASE_DATE}"
-
        cat << EOF > body.md
-          ## ${TITLE}
+          ## Proxy release ${RELEASE_DATE}

          **Please merge this Pull Request using 'Create a merge commit' button**
        EOF

-        gh pr create --title "${TITLE}" \
+        gh pr create --title "Proxy release ${RELEASE_DATE}" \
                     --body-file "body.md" \
                     --head "${RELEASE_BRANCH}" \
                     --base "release-proxy"
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -19,7 +19,7 @@ env:
 jobs:
  cancel-previous-e2e-tests:
    if: github.event_name == 'pull_request'
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest

    steps:
      - name: Cancel previous e2e-tests runs for this PR
@@ -31,7 +31,7 @@ jobs:
              --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}"

  tag:
-    runs-on: ubuntu-22.04
+    runs-on: [ ubuntu-latest ]
    outputs:
      build-tag: ${{ steps.build-tag.outputs.tag }}

@@ -62,14 +62,14 @@ jobs:

  trigger-e2e-tests:
    needs: [ tag ]
-    runs-on: ubuntu-22.04
+    runs-on: [ self-hosted, gen3, small ]
    env:
      TAG: ${{ needs.tag.outputs.build-tag }}
+    container:
+      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
+      options: --init
    steps:
      - name: check if ecr image are present
-        env:
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
        run: |
          for REPO in neon compute-tools compute-node-v14 vm-compute-node-v14 compute-node-v15 vm-compute-node-v15 compute-node-v16 vm-compute-node-v16; do
            OUTPUT=$(aws ecr describe-images --repository-name ${REPO} --region eu-central-1 --query "imageDetails[?imageTags[?contains(@, '${TAG}')]]" --output text)
@@ -79,55 +79,41 @@ jobs:
            fi
          done

-      - name: Set e2e-platforms
-        id: e2e-platforms
-        env:
-          PR_NUMBER: ${{ github.event.pull_request.number }}
-          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          # Default set of platforms to run e2e tests on
-          platforms='["docker", "k8s"]'
-
-          # If the PR changes vendor/, pgxn/ or libs/vm_monitor/ directories, or Dockerfile.compute-node, add k8s-neonvm to the list of platforms.
-          # If the workflow run is not a pull request, add k8s-neonvm to the list.
-          if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
-            for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
-              case "$f" in
-                vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
-                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
-                  ;;
-                *)
-                  # no-op
-                  ;;
-              esac
-            done
-          else
-            platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
-          fi
-
-          echo "e2e-platforms=${platforms}" | tee -a $GITHUB_OUTPUT
-
      - name: Set PR's status to pending and request a remote CI test
-        env:
-          E2E_PLATFORMS: ${{ steps.e2e-platforms.outputs.e2e-platforms }}
-          COMMIT_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
        run: |
-          REMOTE_REPO="${GITHUB_REPOSITORY_OWNER}/cloud"
+          # For pull requests, GH Actions set "github.sha" variable to point at a fake merge commit
+          # but we need to use a real sha of a latest commit in the PR's branch for the e2e job,
+          # to place a job run status update later.
+          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
+          # For non-PR kinds of runs, the above will produce an empty variable, pick the original sha value for those
+          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}

-          gh api "/repos/${GITHUB_REPOSITORY}/statuses/${COMMIT_SHA}" \
-            --method POST \
-            --raw-field "state=pending" \
-            --raw-field "description=[$REMOTE_REPO] Remote CI job is about to start" \
-            --raw-field "context=neon-cloud-e2e"
+          REMOTE_REPO="${{ github.repository_owner }}/cloud"

-          gh workflow --repo ${REMOTE_REPO} \
-            run testing.yml \
-              --ref "main" \
-              --raw-field "ci_job_name=neon-cloud-e2e" \
-              --raw-field "commit_hash=$COMMIT_SHA" \
-              --raw-field "remote_repo=${GITHUB_REPOSITORY}" \
-              --raw-field "storage_image_tag=${TAG}" \
-              --raw-field "compute_image_tag=${TAG}" \
-              --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \
-              --raw-field "e2e-platforms=${E2E_PLATFORMS}"
+          curl -f -X POST \
+          https://api.github.com/repos/${{ github.repository }}/statuses/$COMMIT_SHA \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"state\": \"pending\",
+              \"context\": \"neon-cloud-e2e\",
+              \"description\": \"[$REMOTE_REPO] Remote CI job is about to start\"
+            }"
+
+          curl -f -X POST \
+          https://api.github.com/repos/$REMOTE_REPO/actions/workflows/testing.yml/dispatches \
+          -H "Accept: application/vnd.github.v3+json" \
+          --user "${{ secrets.CI_ACCESS_TOKEN }}" \
+          --data \
+            "{
+              \"ref\": \"main\",
+              \"inputs\": {
+                \"ci_job_name\": \"neon-cloud-e2e\",
+                \"commit_hash\": \"$COMMIT_SHA\",
+                \"remote_repo\": \"${{ github.repository }}\",
+                \"storage_image_tag\": \"${TAG}\",
+                \"compute_image_tag\": \"${TAG}\",
+                \"concurrency_group\": \"${{ env.E2E_CONCURRENCY_GROUP }}\"
+              }
+            }"
--- a/.neon_clippy_args
+++ b/.neon_clippy_args
@@ -1,5 +1,4 @@
 # * `-A unknown_lints` – do not warn about unknown lint suppressions
 #                        that people with newer toolchains might use
 # * `-D warnings`      - fail on any warnings (`cargo` returns non-zero exit status)
-# * `-D clippy::todo`  - don't let `todo!()` slip into `main`
-export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings -D clippy::todo"
+export CLIPPY_COMMON_ARGS="--locked --workspace --all-targets -- -A unknown_lints -D warnings"
--- a/2
+++ b/2
@@ -1,5 +1,5 @@
 /compute_tools/ @neondatabase/control-plane @neondatabase/compute
-/storage_controller @neondatabase/storage
+/control_plane/attachment_service @neondatabase/storage
 /libs/pageserver_api/ @neondatabase/storage
 /libs/postgres_ffi/ @neondatabase/compute @neondatabase/safekeepers
 /libs/remote_storage/ @neondatabase/storage
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,7 @@ resolver = "2"
 members = [
    "compute_tools",
    "control_plane",
-    "control_plane/storcon_cli",
+    "control_plane/attachment_service",
    "pageserver",
    "pageserver/compaction",
    "pageserver/ctl",
@@ -12,10 +12,9 @@ members = [
    "proxy",
    "safekeeper",
    "storage_broker",
-    "storage_controller",
-    "storage_controller/client",
-    "storage_scrubber",
+    "s3_scrubber",
    "workspace_hack",
+    "trace",
    "libs/compute_api",
    "libs/pageserver_api",
    "libs/postgres_ffi",
@@ -41,26 +40,24 @@ license = "Apache-2.0"

 ## All dependency versions, used in the project
 [workspace.dependencies]
-ahash = "0.8"
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-atomic-take = "1.1.0"
-azure_core = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls", "hmac_rust"] }
-azure_identity = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
-azure_storage = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
-azure_storage_blobs = { version = "0.19", default-features = false, features = ["enable_reqwest_rustls"] }
+azure_core = "0.18"
+azure_identity = "0.18"
+azure_storage = "0.18"
+azure_storage_blobs = "0.18"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
-aws-config = { version = "1.3", default-features = false, features=["rustls"] }
-aws-sdk-s3 = "1.26"
+aws-config = { version = "1.1.4", default-features = false, features=["rustls"] }
+aws-sdk-s3 = "1.14"
 aws-sdk-iam = "1.15.0"
-aws-smithy-async = { version = "1.2.1", default-features = false, features=["rt-tokio"] }
-aws-smithy-types = "1.1.9"
-aws-credential-types = "1.2.0"
-aws-sigv4 = { version = "1.2.1", features = ["sign-http"] }
-aws-types = "1.2.0"
+aws-smithy-async = { version = "1.1.4", default-features = false, features=["rt-tokio"] }
+aws-smithy-types = "1.1.4"
+aws-credential-types = "1.1.4"
+aws-sigv4 = { version = "1.2.0", features = ["sign-http"] }
+aws-types = "1.1.7"
 axum = { version = "0.6.20", features = ["ws"] }
 base64 = "0.13.0"
 bincode = "1.3"
@@ -75,7 +72,6 @@ clap = { version = "4.0", features = ["derive"] }
 comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
-crossbeam-deque = "0.8.5"
 crossbeam-utils = "0.8.5"
 dashmap = { version = "5.5.0", features = ["raw-api"] }
 either = "1.8"
@@ -83,13 +79,13 @@ enum-map = "2.4.2"
 enumset = "1.0.12"
 fail = "0.5.0"
 fallible-iterator = "0.2"
-framed-websockets = { version = "0.1.0", git = "https://github.com/neondatabase/framed-websockets" }
+fs2 = "0.4.3"
 futures = "0.3"
 futures-core = "0.3"
 futures-util = "0.3"
 git-version = "0.3"
-hashbrown = "0.14"
-hashlink = "0.9.1"
+hashbrown = "0.13"
+hashlink = "0.8.4"
 hdrhistogram = "7.5.2"
 hex = "0.4"
 hex-literal = "0.4"
@@ -100,8 +96,7 @@ http-types = { version = "2", default-features = false }
 humantime = "2.1"
 humantime-serde = "1.1.1"
 hyper = "0.14"
-tokio-tungstenite = "0.20.0"
-indexmap = "2"
+hyper-tungstenite = "0.11"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -110,32 +105,32 @@ lasso = "0.7"
 leaky-bucket = "1.0.1"
 libc = "0.2"
 md5 = "0.7.0"
-measured = { version = "0.0.22", features=["lasso"] }
-measured-process = { version = "0.0.22" }
+measured = { version = "0.0.13", features=["default", "lasso"] }
 memoffset = "0.8"
+native-tls = "0.2"
 nix = { version = "0.27", features = ["fs", "process", "socket", "signal", "poll"] }
 notify = "6.0.0"
 num_cpus = "1.15"
 num-traits = "0.2.15"
 once_cell = "1.13"
 opentelemetry = "0.20.0"
-opentelemetry-otlp = { version = "0.13.0", default-features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
+opentelemetry-otlp = { version = "0.13.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.12.0"
 parking_lot = "0.12"
-parquet = { version = "51.0.0", default-features = false, features = ["zstd"] }
-parquet_derive = "51.0.0"
+parquet = { version = "49.0.0", default-features = false, features = ["zstd"] }
+parquet_derive = "49.0.0"
 pbkdf2 = { version = "0.12.1", features = ["simple", "std"] }
 pin-project-lite = "0.2"
 procfs = "0.14"
-prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency
+prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
 prost = "0.11"
 rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] }
-reqwest-middleware = "0.3.0"
-reqwest-retry = "0.5"
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
+reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
+reqwest-middleware = "0.2.0"
+reqwest-retry = "0.2.2"
 routerify = "3"
 rpds = "0.13"
 rustc-hash = "1.1.0"
@@ -145,7 +140,7 @@ rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
-sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
+sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_path_to_error = "0.1"
@@ -159,12 +154,11 @@ socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
 "subtle"  = "2.5.0"
-# Our PR https://github.com/nical/rust_debug/pull/4 has been merged but no new version released yet
-svg_fmt = { git = "https://github.com/nical/rust_debug", rev = "28a7d96eecff2f28e75b1ea09f2d499a60d0e3b4" }
+svg_fmt = "0.4.1"
 sync_wrapper = "0.1.2"
 tar = "0.4"
 task-local-extensions = "0.1.4"
-test-context = "0.3"
+test-context = "0.1"
 thiserror = "1.0"
 tikv-jemallocator = "0.5"
 tikv-jemalloc-ctl = "0.5"
@@ -179,20 +173,17 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
-tower-service = "0.3.2"
 tracing = "0.1"
 tracing-error = "0.2.0"
-tracing-opentelemetry = "0.21.0"
-tracing-subscriber = { version = "0.3", default-features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
+tracing-opentelemetry = "0.20.0"
+tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 twox-hash = { version = "1.6.3", default-features = false }
-typed-json = "0.1"
 url = "2.2"
 urlencoding = "2.1"
 uuid = { version = "1.6.1", features = ["v4", "v7", "serde"] }
 walkdir = "2.3.2"
-rustls-native-certs = "0.7"
+webpki-roots = "0.25"
 x509-parser = "0.15"
-whoami = "1.5.1"

 ## TODO replace this with tracing
 env_logger = "0.10"
@@ -200,10 +191,14 @@ log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

+## Other git libraries
+heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
+
 ## Local libraries
 compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
@@ -219,7 +214,6 @@ remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
 desim = { version = "0.1", path = "./libs/desim" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
-storage_controller_client = { path = "./storage_controller/client" }
 tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
 tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
@@ -238,12 +232,13 @@ tonic-build = "0.9"

 [patch.crates-io]

-# Needed to get `tokio-postgres-rustls` to depend on our fork.
+# This is only needed for proxy's tests.
+# TODO: we should probably fork `tokio-postgres-rustls` instead.
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", branch="neon" }

 # bug fixes for UUID
-parquet = { git = "https://github.com/apache/arrow-rs", branch = "master" }
-parquet_derive = { git = "https://github.com/apache/arrow-rs", branch = "master" }
+parquet = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }
+parquet_derive = { git = "https://github.com/neondatabase/arrow-rs", branch = "neon-fix-bugs" }

 ################# Binary contents sections

--- a/25
+++ b/25
@@ -42,13 +42,12 @@ ARG CACHEPOT_BUCKET=neon-github-dev
 COPY --from=pg-build /home/nonroot/pg_install/v14/include/postgresql/server pg_install/v14/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v15/include/postgresql/server pg_install/v15/include/postgresql/server
 COPY --from=pg-build /home/nonroot/pg_install/v16/include/postgresql/server pg_install/v16/include/postgresql/server
-COPY --from=pg-build /home/nonroot/pg_install/v16/lib                       pg_install/v16/lib
 COPY --chown=nonroot . .

 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-    && PQ_LIB_DIR=$(pwd)/pg_install/v16/lib RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build \
+    && RUSTFLAGS="-Clinker=clang -Clink-arg=-fuse-ld=mold -Clink-arg=-Wl,--no-rosegment" cargo build  \
      --bin pg_sni_router  \
      --bin pageserver  \
      --bin pagectl  \
@@ -57,7 +56,6 @@ RUN set -e \
      --bin storage_controller  \
      --bin proxy  \
      --bin neon_local \
-      --bin storage_scrubber \
      --locked --release \
    && cachepot -s

@@ -71,6 +69,8 @@ RUN set -e \
    && apt install -y \
        libreadline-dev \
        libseccomp-dev \
+        libicu67 \
+        openssl \
        ca-certificates \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
    && useradd -d /data neon \
@@ -84,7 +84,6 @@ COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_broker
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_controller  /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/proxy               /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/neon_local          /usr/local/bin
-COPY --from=build --chown=neon:neon /home/nonroot/target/release/storage_scrubber    /usr/local/bin

 COPY --from=pg-build /home/nonroot/pg_install/v14 /usr/local/v14/
 COPY --from=pg-build /home/nonroot/pg_install/v15 /usr/local/v15/
@@ -93,14 +92,13 @@ COPY --from=pg-build /home/nonroot/postgres_install.tar.gz /data/

 # By default, pageserver uses `.neon/` working directory in WORKDIR, so create one and fill it with the dummy config.
 # Now, when `docker run ... pageserver` is run, it can start without errors, yet will have some default dummy values.
-RUN mkdir -p /data/.neon/ && \
-  echo "id=1234" > "/data/.neon/identity.toml" && \
-  echo "broker_endpoint='http://storage_broker:50051'\n" \
-       "pg_distrib_dir='/usr/local/'\n" \
-       "listen_pg_addr='0.0.0.0:6400'\n" \
-       "listen_http_addr='0.0.0.0:9898'\n" \
-  > /data/.neon/pageserver.toml && \
-  chown -R neon:neon /data/.neon
+RUN mkdir -p /data/.neon/ && chown -R neon:neon /data/.neon/ \
+    && /usr/local/bin/pageserver -D /data/.neon/ --init \
+       -c "id=1234" \
+       -c "broker_endpoint='http://storage_broker:50051'" \
+       -c "pg_distrib_dir='/usr/local/'" \
+       -c "listen_pg_addr='0.0.0.0:6400'" \
+       -c "listen_http_addr='0.0.0.0:9898'"

 # When running a binary that links with libpq, default to using our most recent postgres version.  Binaries
 # that want a particular postgres version will select it explicitly: this is just a default.
@@ -111,6 +109,3 @@ VOLUME ["/data"]
 USER neon
 EXPOSE 6400
 EXPOSE 9898
-
-CMD /usr/local/bin/pageserver -D /data/.neon
-
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -1,13 +1,5 @@
 FROM debian:bullseye-slim

-# Use ARG as a build-time environment variable here to allow.
-# It's not supposed to be set outside.
-# Alternatively it can be obtained using the following command
-# ```
-# . /etc/os-release && echo "${VERSION_CODENAME}"
-# ```
-ARG DEBIAN_VERSION_CODENAME=bullseye
-
 # Add nonroot user
 RUN useradd -ms /bin/bash nonroot -b /home
 SHELL ["/bin/bash", "-c"]
@@ -34,6 +26,7 @@ RUN set -e \
        liblzma-dev \
        libncurses5-dev \
        libncursesw5-dev \
+        libpq-dev \
        libreadline-dev \
        libseccomp-dev \
        libsqlite3-dev \
@@ -65,33 +58,22 @@ RUN curl -fsSL "https://github.com/protocolbuffers/protobuf/releases/download/v$
    && mv protoc/include/google /usr/local/include/google \
    && rm -rf protoc.zip protoc

-# s5cmd
-ENV S5CMD_VERSION=2.2.2
-RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/s5cmd_${S5CMD_VERSION}_Linux-$(uname -m | sed 's/x86_64/64bit/g' | sed 's/aarch64/arm64/g').tar.gz" | tar zxvf - s5cmd \
-    && chmod +x s5cmd \
-    && mv s5cmd /usr/local/bin/s5cmd
-
 # LLVM
-ENV LLVM_VERSION=18
+ENV LLVM_VERSION=17
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
-    && echo "deb http://apt.llvm.org/${DEBIAN_VERSION_CODENAME}/ llvm-toolchain-${DEBIAN_VERSION_CODENAME}-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
+    && echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
    && apt update \
    && apt install -y clang-${LLVM_VERSION} llvm-${LLVM_VERSION} \
    && bash -c 'for f in /usr/bin/clang*-${LLVM_VERSION} /usr/bin/llvm*-${LLVM_VERSION}; do ln -s "${f}" "${f%-${LLVM_VERSION}}"; done' \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

-# Install docker
-RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | gpg --dearmor -o /usr/share/keyrings/docker-archive-keyring.gpg \
-    && echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/docker-archive-keyring.gpg] https://download.docker.com/linux/debian ${DEBIAN_VERSION_CODENAME} stable" > /etc/apt/sources.list.d/docker.list \
+# PostgreSQL 14
+RUN curl -fsSL 'https://www.postgresql.org/media/keys/ACCC4CF8.asc' | apt-key add - \
+    && echo 'deb http://apt.postgresql.org/pub/repos/apt bullseye-pgdg main' > /etc/apt/sources.list.d/pgdg.list \
    && apt update \
-    && apt install -y docker-ce docker-ce-cli \
+    && apt install -y postgresql-client-14 \
    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

-# Configure sudo & docker
-RUN usermod -aG sudo nonroot && \
-    echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \
-    usermod -aG docker nonroot
-
 # AWS CLI
 RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "awscliv2.zip" \
    && unzip -q awscliv2.zip \
@@ -99,7 +81,7 @@ RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o "aws
    && rm awscliv2.zip

 # Mold: A Modern Linker
-ENV MOLD_VERSION v2.31.0
+ENV MOLD_VERSION v2.4.0
 RUN set -e \
    && git clone https://github.com/rui314/mold.git \
    && mkdir mold/build \
@@ -124,45 +106,6 @@ RUN for package in Capture::Tiny DateTime Devel::Cover Digest::MD5 File::Spec JS
    && make install \
    && rm -rf ../lcov.tar.gz

-# Compile and install the static OpenSSL library
-ENV OPENSSL_VERSION=1.1.1w
-ENV OPENSSL_PREFIX=/usr/local/openssl
-RUN wget -O /tmp/openssl-${OPENSSL_VERSION}.tar.gz https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz && \
-    echo "cf3098950cb4d853ad95c0841f1f9c6d3dc102dccfcacd521d93925208b76ac8 /tmp/openssl-${OPENSSL_VERSION}.tar.gz" | sha256sum --check && \
-    cd /tmp && \
-    tar xzvf /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
-    rm /tmp/openssl-${OPENSSL_VERSION}.tar.gz && \
-    cd /tmp/openssl-${OPENSSL_VERSION} && \
-    ./config --prefix=${OPENSSL_PREFIX}  -static --static no-shared -fPIC && \
-    make -j "$(nproc)" && \
-    make install && \
-    cd /tmp && \
-    rm -rf /tmp/openssl-${OPENSSL_VERSION}
-
-# Use the same version of libicu as the compute nodes so that
-# clusters created using inidb on pageserver can be used by computes.
-#
-# TODO: at this time, Dockerfile.compute-node uses the debian bullseye libicu
-# package, which is 67.1. We're duplicating that knowledge here, and also, technically,
-# Debian has a few patches on top of 67.1 that we're not adding here.
-ENV ICU_VERSION=67.1
-ENV ICU_PREFIX=/usr/local/icu
-
-# Download and build static ICU
-RUN wget -O /tmp/libicu-${ICU_VERSION}.tgz https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION//./-}/icu4c-${ICU_VERSION//./_}-src.tgz && \
-    echo "94a80cd6f251a53bd2a997f6f1b5ac6653fe791dfab66e1eb0227740fb86d5dc /tmp/libicu-${ICU_VERSION}.tgz" | sha256sum --check && \
-    mkdir /tmp/icu && \
-    pushd /tmp/icu && \
-    tar -xzf /tmp/libicu-${ICU_VERSION}.tgz && \
-    pushd icu/source && \
-    ./configure --prefix=${ICU_PREFIX}  --enable-static --enable-shared=no CXXFLAGS="-fPIC" CFLAGS="-fPIC" && \
-    make -j "$(nproc)" && \
-    make install && \
-    popd && \
-    rm -rf icu && \
-    rm -f /tmp/libicu-${ICU_VERSION}.tgz && \
-    popd
-
 # Switch to nonroot user
 USER nonroot:nonroot
 WORKDIR /home/nonroot
@@ -192,7 +135,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.79.0
+ENV RUSTC_VERSION=1.77.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
@@ -221,6 +164,3 @@ RUN whoami \
    && rustup --version --verbose \
    && rustc --version --verbose \
    && clang --version
-
-# Set following flag to check in Makefile if its running in Docker
-RUN touch /home/nonroot/.docker_build
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -89,7 +89,7 @@ RUN apt update && \
 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
-    mkdir sfcgal-src && cd sfcgal-src && tar xzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
+    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake -DCMAKE_BUILD_TYPE=Release . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /
@@ -98,7 +98,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH"

 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postgis.tar.gz && \
    echo "74eb356e3f85f14233791013360881b6748f78081cc688ff9d6f0f673a762d13 postgis.tar.gz" | sha256sum --check && \
-    mkdir postgis-src && cd postgis-src && tar xzf ../postgis.tar.gz --strip-components=1 -C . && \
+    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
@@ -124,7 +124,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.3.tar.gz -O postg

 RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
-    mkdir pgrouting-src && cd pgrouting-src && tar xzf ../pgrouting.tar.gz --strip-components=1 -C . && \
+    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake -DCMAKE_BUILD_TYPE=Release .. && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -149,7 +149,7 @@ RUN apt update && \

 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.10.tar.gz -O plv8.tar.gz && \
    echo "7096c3290928561f0d4901b7a52794295dc47f6303102fae3f8e42dd575ad97d plv8.tar.gz" | sha256sum --check && \
-    mkdir plv8-src && cd plv8-src && tar xzf ../plv8.tar.gz --strip-components=1 -C . && \
+    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
    # generate and copy upgrade scripts
    mkdir -p upgrade && ./generate_upgrade.sh 3.1.10 && \
    cp upgrade/* /usr/local/pgsql/share/extension/ && \
@@ -194,7 +194,7 @@ RUN case "$(uname -m)" in \

 RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
-    mkdir h3-src && cd h3-src && tar xzf ../h3.tar.gz --strip-components=1 -C . && \
+    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -204,7 +204,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz

 RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.3.tar.gz -O h3-pg.tar.gz && \
    echo "5c17f09a820859ffe949f847bebf1be98511fb8f1bd86f94932512c00479e324 h3-pg.tar.gz" | sha256sum --check && \
-    mkdir h3-pg-src && cd h3-pg-src && tar xzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -222,7 +222,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
-    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
+    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
@@ -241,17 +241,11 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
 FROM build-deps AS vector-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-COPY patches/pgvector.patch /pgvector.patch
-
-# By default, pgvector Makefile uses `-march=native`. We don't want that,
-# because we build the images on different machines than where we run them.
-# Pass OPTFLAGS="" to remove it.
-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.7.2.tar.gz -O pgvector.tar.gz && \
-    echo "617fba855c9bcb41a2a9bc78a78567fd2e147c72afd5bf9d37b31b9591632b30 pgvector.tar.gz" | sha256sum --check && \
-    mkdir pgvector-src && cd pgvector-src && tar xzf ../pgvector.tar.gz --strip-components=1 -C . && \
-    patch -p1 < /pgvector.patch && \
-    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) OPTFLAGS="" install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.5.1.tar.gz -O pgvector.tar.gz && \
+    echo "cc7a8e034a96e30a819911ac79d32f6bc47bdd1aa2de4d7d4904e26b83209dc8 pgvector.tar.gz" | sha256sum --check && \
+    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control

 #########################################################################################
@@ -266,7 +260,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
 RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
-    mkdir pgjwt-src && cd pgjwt-src && tar xzf ../pgjwt.tar.gz --strip-components=1 -C . && \
+    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control

@@ -281,7 +275,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.4.0.tar.gz -O hypopg.tar.gz && \
    echo "0821011743083226fc9b813c1f2ef5897a91901b57b6bea85a78e466187c6819 hypopg.tar.gz" | sha256sum --check && \
-    mkdir hypopg-src && cd hypopg-src && tar xzf ../hypopg.tar.gz --strip-components=1 -C . && \
+    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
@@ -297,7 +291,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
-    mkdir pg_hashids-src && cd pg_hashids-src && tar xzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
+    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
@@ -311,12 +305,9 @@ RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz
 FROM build-deps AS rum-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-COPY patches/rum.patch /rum.patch
-
 RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
-    mkdir rum-src && cd rum-src && tar xzf ../rum.tar.gz --strip-components=1 -C . && \
-    patch -p1 < /rum.patch && \
+    mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
@@ -332,7 +323,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
-    mkdir pgtap-src && cd pgtap-src && tar xzf ../pgtap.tar.gz --strip-components=1 -C . && \
+    mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
@@ -348,7 +339,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.2.tar.gz -O ip4r.tar.gz && \
    echo "0f7b1f159974f49a47842a8ab6751aecca1ed1142b6d5e38d81b064b2ead1b4b ip4r.tar.gz" | sha256sum --check && \
-    mkdir ip4r-src && cd ip4r-src && tar xzf ../ip4r.tar.gz --strip-components=1 -C . && \
+    mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
@@ -364,7 +355,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.10.tar.gz -O prefix.tar.gz && \
    echo "4342f251432a5f6fb05b8597139d3ccde8dcf87e8ca1498e7ee931ca057a8575 prefix.tar.gz" | sha256sum --check && \
-    mkdir prefix-src && cd prefix-src && tar xzf ../prefix.tar.gz --strip-components=1 -C . && \
+    mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/prefix.control
@@ -380,7 +371,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.18.tar.gz -O hll.tar.gz && \
    echo "e2f55a6f4c4ab95ee4f1b4a2b73280258c5136b161fe9d059559556079694f0e hll.tar.gz" | sha256sum --check && \
-    mkdir hll-src && cd hll-src && tar xzf ../hll.tar.gz --strip-components=1 -C . && \
+    mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hll.control
@@ -396,7 +387,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.5.3.tar.gz -O plpgsql_check.tar.gz && \
    echo "6631ec3e7fb3769eaaf56e3dfedb829aa761abf163d13dba354b4c218508e1c0 plpgsql_check.tar.gz" | sha256sum --check && \
-    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
+    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control
@@ -427,7 +418,7 @@ RUN case "${PG_VERSION}" in \
    apt-get install -y cmake && \
    wget https://github.com/timescale/timescaledb/archive/refs/tags/${TIMESCALEDB_VERSION}.tar.gz -O timescaledb.tar.gz && \
    echo "${TIMESCALEDB_CHECKSUM} timescaledb.tar.gz" | sha256sum --check && \
-    mkdir timescaledb-src && cd timescaledb-src && tar xzf ../timescaledb.tar.gz --strip-components=1 -C . && \
+    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON -DCMAKE_BUILD_TYPE=Release && \
    cd build && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -465,11 +456,36 @@ RUN case "${PG_VERSION}" in \
    esac && \
    wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
    echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
-    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
+    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make install -j $(getconf _NPROCESSORS_ONLN) && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control

+#########################################################################################
+#
+# Layer "kq-imcx-pg-build"
+# compile kq_imcx extension
+#
+#########################################################################################
+FROM build-deps AS kq-imcx-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN apt-get update && \
+    apt-get install -y git libgtk2.0-dev libpq-dev libpam-dev libxslt-dev libkrb5-dev cmake && \
+    wget https://github.com/ketteq-neon/postgres-exts/archive/e0bd1a9d9313d7120c1b9c7bb15c48c0dede4c4e.tar.gz -O kq_imcx.tar.gz && \
+    echo "dc93a97ff32d152d32737ba7e196d9687041cda15e58ab31344c2f2de8855336 kq_imcx.tar.gz" | sha256sum --check && \
+    mkdir kq_imcx-src && cd kq_imcx-src && tar xvzf ../kq_imcx.tar.gz --strip-components=1 -C . && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
+    mkdir build && cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release .. && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/kq_imcx.control && \
+    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /after.txt &&\
+    mkdir -p /extensions/kq_imcx && cp /usr/local/pgsql/share/extension/kq_imcx.control /extensions/kq_imcx && \
+    sort -o /before.txt /before.txt && sort -o /after.txt /after.txt && \
+    comm -13 /before.txt /after.txt | tar --directory=/usr/local/pgsql --zstd -cf /extensions/kq_imcx.tar.zst -T -

 #########################################################################################
 #
@@ -483,7 +499,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/citusdata/pg_cron/archive/refs/tags/v1.6.0.tar.gz -O pg_cron.tar.gz && \
    echo "383a627867d730222c272bfd25cd5e151c578d73f696d32910c7db8c665cc7db pg_cron.tar.gz" | sha256sum --check && \
-    mkdir pg_cron-src && cd pg_cron-src && tar xzf ../pg_cron.tar.gz --strip-components=1 -C . && \
+    mkdir pg_cron-src && cd pg_cron-src && tar xvzf ../pg_cron.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_cron.control
@@ -509,7 +525,7 @@ RUN apt-get update && \
 ENV PATH "/usr/local/pgsql/bin/:/usr/local/pgsql/:$PATH"
 RUN wget https://github.com/rdkit/rdkit/archive/refs/tags/Release_2023_03_3.tar.gz -O rdkit.tar.gz && \
    echo "bdbf9a2e6988526bfeb8c56ce3cdfe2998d60ac289078e2215374288185e8c8d rdkit.tar.gz" | sha256sum --check && \
-    mkdir rdkit-src && cd rdkit-src && tar xzf ../rdkit.tar.gz --strip-components=1 -C . && \
+    mkdir rdkit-src && cd rdkit-src && tar xvzf ../rdkit.tar.gz --strip-components=1 -C . && \
    cmake \
        -D RDK_BUILD_CAIRO_SUPPORT=OFF \
        -D RDK_BUILD_INCHI_SUPPORT=ON \
@@ -549,7 +565,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/fboulnois/pg_uuidv7/archive/refs/tags/v1.0.1.tar.gz -O pg_uuidv7.tar.gz && \
    echo "0d0759ab01b7fb23851ecffb0bce27822e1868a4a5819bfd276101c716637a7a pg_uuidv7.tar.gz" | sha256sum --check && \
-    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
+    mkdir pg_uuidv7-src && cd pg_uuidv7-src && tar xvzf ../pg_uuidv7.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_uuidv7.control
@@ -566,7 +582,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4.tar.gz -O pg_roaringbitmap.tar.gz && \
    echo "b75201efcb1c2d1b014ec4ae6a22769cc7a224e6e406a587f5784a37b6b5a2aa pg_roaringbitmap.tar.gz" | sha256sum --check && \
-    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
+    mkdir pg_roaringbitmap-src && cd pg_roaringbitmap-src && tar xvzf ../pg_roaringbitmap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
@@ -583,7 +599,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
-    mkdir pg_semver-src && cd pg_semver-src && tar xzf ../pg_semver.tar.gz --strip-components=1 -C . && \
+    mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
@@ -609,7 +625,7 @@ RUN case "${PG_VERSION}" in \
    esac && \
    wget https://github.com/neondatabase/pg_embedding/archive/refs/tags/${PG_EMBEDDING_VERSION}.tar.gz -O pg_embedding.tar.gz && \
    echo "${PG_EMBEDDING_CHECKSUM} pg_embedding.tar.gz" | sha256sum --check && \
-    mkdir pg_embedding-src && cd pg_embedding-src && tar xzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
+    mkdir pg_embedding-src && cd pg_embedding-src && tar xvzf ../pg_embedding.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

@@ -625,7 +641,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget  https://github.com/neondatabase/postgresql_anonymizer/archive/refs/tags/neon_1.1.1.tar.gz -O pg_anon.tar.gz && \
    echo "321ea8d5c1648880aafde850a2c576e4a9e7b9933a34ce272efc839328999fa9  pg_anon.tar.gz" | sha256sum --check && \
-    mkdir pg_anon-src && cd pg_anon-src && tar xzf ../pg_anon.tar.gz --strip-components=1 -C . && \
+    mkdir pg_anon-src && cd pg_anon-src && tar xvzf ../pg_anon.tar.gz --strip-components=1 -C . && \
    find /usr/local/pgsql -type f | sed 's|^/usr/local/pgsql/||' > /before.txt &&\
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/anon.control && \
@@ -674,7 +690,7 @@ ARG PG_VERSION

 RUN wget https://github.com/supabase/pg_jsonschema/archive/refs/tags/v0.2.0.tar.gz -O pg_jsonschema.tar.gz && \
    echo "9118fc508a6e231e7a39acaa6f066fcd79af17a5db757b47d2eefbe14f7794f0 pg_jsonschema.tar.gz" | sha256sum --check && \
-    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
+    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
@@ -691,7 +707,7 @@ ARG PG_VERSION

 RUN wget https://github.com/supabase/pg_graphql/archive/refs/tags/v1.4.0.tar.gz -O pg_graphql.tar.gz && \
    echo "bd8dc7230282b3efa9ae5baf053a54151ed0e66881c7c53750e2d0c765776edc pg_graphql.tar.gz" | sha256sum --check && \
-    mkdir pg_graphql-src && cd pg_graphql-src && tar xzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
+    mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgrx = "=0.10.2"/pgrx = { version = "0.10.2", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgrx install --release && \
    # it's needed to enable extension because it uses untrusted C language
@@ -711,7 +727,7 @@ ARG PG_VERSION
 # 26806147b17b60763039c6a6878884c41a262318 made on 26/09/2023
 RUN wget https://github.com/kelvich/pg_tiktoken/archive/26806147b17b60763039c6a6878884c41a262318.tar.gz -O pg_tiktoken.tar.gz && \
    echo "e64e55aaa38c259512d3e27c572da22c4637418cf124caba904cd50944e5004e pg_tiktoken.tar.gz" | sha256sum --check && \
-    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
+    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
    cargo pgrx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control

@@ -727,7 +743,7 @@ ARG PG_VERSION

 RUN wget https://github.com/pksunkara/pgx_ulid/archive/refs/tags/v0.1.3.tar.gz -O pgx_ulid.tar.gz && \
    echo "ee5db82945d2d9f2d15597a80cf32de9dca67b897f605beb830561705f12683c pgx_ulid.tar.gz" | sha256sum --check && \
-    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
+    mkdir pgx_ulid-src && cd pgx_ulid-src && tar xvzf ../pgx_ulid.tar.gz --strip-components=1 -C . && \
    echo "******************* Apply a patch for Postgres 16 support; delete in the next release ******************" && \
    wget https://github.com/pksunkara/pgx_ulid/commit/f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
    patch -p1 < f84954cf63fc8c80d964ac970d9eceed3c791196.patch && \
@@ -749,7 +765,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/eulerto/wal2json/archive/refs/tags/wal2json_2_5.tar.gz && \
    echo "b516653575541cf221b99cf3f8be9b6821f6dbcfc125675c85f35090f824f00e wal2json_2_5.tar.gz" | sha256sum --check && \
-    mkdir wal2json-src && cd wal2json-src && tar xzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
+    mkdir wal2json-src && cd wal2json-src && tar xvzf ../wal2json_2_5.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install

@@ -765,7 +781,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/sraoss/pg_ivm/archive/refs/tags/v1.7.tar.gz -O pg_ivm.tar.gz && \
    echo "ebfde04f99203c7be4b0e873f91104090e2e83e5429c32ac242d00f334224d5e pg_ivm.tar.gz" | sha256sum --check && \
-    mkdir pg_ivm-src && cd pg_ivm-src && tar xzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
+    mkdir pg_ivm-src && cd pg_ivm-src && tar xvzf ../pg_ivm.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_ivm.control
@@ -782,7 +798,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 ENV PATH "/usr/local/pgsql/bin/:$PATH"
 RUN wget https://github.com/pgpartman/pg_partman/archive/refs/tags/v5.0.1.tar.gz -O pg_partman.tar.gz && \
    echo "75b541733a9659a6c90dbd40fccb904a630a32880a6e3044d0c4c5f4c8a65525 pg_partman.tar.gz" | sha256sum --check && \
-    mkdir pg_partman-src && cd pg_partman-src && tar xzf ../pg_partman.tar.gz --strip-components=1 -C . && \
+    mkdir pg_partman-src && cd pg_partman-src && tar xvzf ../pg_partman.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_partman.control
@@ -818,6 +834,7 @@ COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=kq-imcx-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-cron-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
@@ -905,68 +922,6 @@ RUN rm -r /usr/local/pgsql/include
 # if they were to be used by other libraries.
 RUN rm /usr/local/pgsql/lib/lib*.a

-
-#########################################################################################
-#
-# Layer neon-pg-ext-test
-#
-#########################################################################################
-
-FROM neon-pg-ext-build AS neon-pg-ext-test
-ARG PG_VERSION
-RUN mkdir /ext-src
-
-#COPY --from=postgis-build /postgis.tar.gz /ext-src/
-#COPY --from=postgis-build /sfcgal/* /usr
-COPY --from=plv8-build /plv8.tar.gz /ext-src/
-COPY --from=h3-pg-build /h3-pg.tar.gz /ext-src/
-COPY --from=unit-pg-build /postgresql-unit.tar.gz /ext-src/
-COPY --from=vector-pg-build /pgvector.tar.gz /ext-src/
-COPY --from=vector-pg-build /pgvector.patch /ext-src/
-COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
-#COPY --from=pg-jsonschema-pg-build /home/nonroot/pg_jsonschema.tar.gz /ext-src
-#COPY --from=pg-graphql-pg-build /home/nonroot/pg_graphql.tar.gz /ext-src
-#COPY --from=pg-tiktoken-pg-build /home/nonroot/pg_tiktoken.tar.gz /ext-src
-COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
-COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
-#COPY --from=rum-pg-build /rum.tar.gz /ext-src
-#COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
-COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
-COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
-COPY --from=hll-pg-build /hll.tar.gz /ext-src
-COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
-#COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
-COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
-COPY patches/pg_hintplan.patch /ext-src
-COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
-COPY patches/pg_cron.patch /ext-src
-#COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
-COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
-COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
-COPY --from=pg-roaringbitmap-pg-build /pg_roaringbitmap.tar.gz /ext-src
-COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
-#COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
-#COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
-COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
-COPY patches/pg_anon.patch /ext-src
-COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
-COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
-RUN cd /ext-src/ && for f in *.tar.gz; \
-    do echo $f; dname=$(echo $f | sed 's/\.tar.*//')-src; \
-    rm -rf $dname; mkdir $dname; tar xzf $f --strip-components=1 -C $dname \
-    || exit 1; rm -f $f; done
-RUN cd /ext-src/pgvector-src && patch -p1 <../pgvector.patch
-# cmake is required for the h3 test
-RUN apt-get update && apt-get install -y cmake
-RUN patch -p1 < /ext-src/pg_hintplan.patch
-COPY --chmod=755 docker-compose/run-tests.sh /run-tests.sh
-RUN patch -p1 </ext-src/pg_anon.patch
-RUN patch -p1 </ext-src/pg_cron.patch
-ENV PATH=/usr/local/pgsql/bin:$PATH
-ENV PGHOST=compute
-ENV PGPORT=55433
-ENV PGUSER=cloud_admin
-ENV PGDATABASE=postgres
 #########################################################################################
 #
 # Final layer
@@ -989,9 +944,6 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

-# Create remote extension download directory
-RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions
-
 # Install:
 # libreadline8 for psql
 # libicu67, locales for collations (including ICU and plpgsql_check)
--- a/57
+++ b/57
@@ -3,9 +3,6 @@ ROOT_PROJECT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
 # Where to install Postgres, default is ./pg_install, maybe useful for package managers
 POSTGRES_INSTALL_DIR ?= $(ROOT_PROJECT_DIR)/pg_install/

-OPENSSL_PREFIX_DIR := /usr/local/openssl
-ICU_PREFIX_DIR := /usr/local/icu
-
 #
 # We differentiate between release / debug build types using the BUILD_TYPE
 # environment variable.
@@ -23,31 +20,19 @@ else
 	$(error Bad build type '$(BUILD_TYPE)', see Makefile for options)
 endif

-ifeq ($(shell test -e /home/nonroot/.docker_build && echo -n yes),yes)
-	# Exclude static build openssl, icu for local build (MacOS, Linux)
-	# Only keep for build type release and debug
-	PG_CFLAGS += -I$(OPENSSL_PREFIX_DIR)/include
-	PG_CONFIGURE_OPTS += --with-icu
-	PG_CONFIGURE_OPTS += ICU_CFLAGS='-I/$(ICU_PREFIX_DIR)/include -DU_STATIC_IMPLEMENTATION'
-	PG_CONFIGURE_OPTS += ICU_LIBS='-L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -licui18n -licuuc -licudata -lstdc++ -Wl,-Bdynamic -lm'
-	PG_CONFIGURE_OPTS += LDFLAGS='-L$(OPENSSL_PREFIX_DIR)/lib -L$(OPENSSL_PREFIX_DIR)/lib64 -L$(ICU_PREFIX_DIR)/lib -L$(ICU_PREFIX_DIR)/lib64 -Wl,-Bstatic -lssl -lcrypto -Wl,-Bdynamic -lrt -lm -ldl -lpthread'
-endif
-
 UNAME_S := $(shell uname -s)
 ifeq ($(UNAME_S),Linux)
 	# Seccomp BPF is only available for Linux
 	PG_CONFIGURE_OPTS += --with-libseccomp
 else ifeq ($(UNAME_S),Darwin)
-	ifndef DISABLE_HOMEBREW
-		# macOS with brew-installed openssl requires explicit paths
-		# It can be configured with OPENSSL_PREFIX variable
-		OPENSSL_PREFIX := $(shell brew --prefix openssl@3)
-		PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
-		PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
-		# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
-		# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
-		EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
-	endif
+	# macOS with brew-installed openssl requires explicit paths
+	# It can be configured with OPENSSL_PREFIX variable
+	OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
+	PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+	PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
+	# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
+	# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
+	EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
 endif

 # Use -C option so that when PostgreSQL "make install" installs the
@@ -69,8 +54,6 @@ CARGO_CMD_PREFIX += CARGO_TERM_PROGRESS_WHEN=never CI=1
 # Set PQ_LIB_DIR to make sure `storage_controller` get linked with bundled libpq (through diesel)
 CARGO_CMD_PREFIX += PQ_LIB_DIR=$(POSTGRES_INSTALL_DIR)/v16/lib

-CACHEDIR_TAG_CONTENTS := "Signature: 8a477f597d28d172789f06886806bc55"
-
 #
 # Top level Makefile to build Neon and PostgreSQL
 #
@@ -81,38 +64,26 @@ all: neon postgres neon-pg-ext
 #
 # The 'postgres_ffi' depends on the Postgres headers.
 .PHONY: neon
-neon: postgres-headers walproposer-lib cargo-target-dir
+neon: postgres-headers walproposer-lib
 	+@echo "Compiling Neon"
 	$(CARGO_CMD_PREFIX) cargo build $(CARGO_BUILD_FLAGS)
-.PHONY: cargo-target-dir
-cargo-target-dir:
-	# https://github.com/rust-lang/cargo/issues/14281
-	mkdir -p target
-	test -e target/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > target/CACHEDIR.TAG

 ### PostgreSQL parts
 # Some rules are duplicated for Postgres v14 and 15. We may want to refactor
 # to avoid the duplication in the future, but it's tolerable for now.
 #
 $(POSTGRES_INSTALL_DIR)/build/%/config.status:
-
-	mkdir -p $(POSTGRES_INSTALL_DIR)
-	test -e $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG || echo "$(CACHEDIR_TAG_CONTENTS)" > $(POSTGRES_INSTALL_DIR)/CACHEDIR.TAG
-
 	+@echo "Configuring Postgres $* build"
 	@test -s $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure || { \
 		echo "\nPostgres submodule not found in $(ROOT_PROJECT_DIR)/vendor/postgres-$*/, execute "; \
 		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
 		exit 1; }
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
-
-	VERSION=$*; \
-	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
-	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
+	(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
 		CFLAGS='$(PG_CFLAGS)' \
-		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
+		$(PG_CONFIGURE_OPTS) \
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)

 # nicer alias to run 'configure'
 # Note: I've been unable to use templates for this part of our configuration.
@@ -148,8 +119,6 @@ postgres-%: postgres-configure-% \
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install
 	+@echo "Compiling amcheck $*"
 	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install
-	+@echo "Compiling test_decoding $*"
-	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/test_decoding install

 .PHONY: postgres-clean-%
 postgres-clean-%:
--- a/README.md
+++ b/README.md
@@ -1,6 +1,4 @@
-[![Neon](https://github.com/neondatabase/neon/assets/11527560/f15a17f0-836e-40c5-b35d-030606a6b660)](https://neon.tech)
-
-
+[![Neon](https://user-images.githubusercontent.com/13738772/236813940-dcfdcb5b-69d3-449b-a686-013febe834d4.png)](https://neon.tech)

 # Neon

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -27,12 +27,10 @@ reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
-tokio-stream.workspace = true
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
-thiserror.workspace = true
 url.workspace = true

 compute_api.workspace = true
@@ -44,4 +42,3 @@ vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
 zstd = "0.13"
 bytes = "1.0"
 rust-ini = "0.20.0"
-rlimit = "0.10.1"
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -6,7 +6,7 @@
 //! - Every start is a fresh start, so the data directory is removed and
 //!   initialized again on each run.
 //! - If remote_extension_config is provided, it will be used to fetch extensions list
-//!   and download `shared_preload_libraries` from the remote storage.
+//!  and download `shared_preload_libraries` from the remote storage.
 //! - Next it will put configuration files into the `PGDATA` directory.
 //! - Sync safekeepers and get commit LSN.
 //! - Get `basebackup` from pageserver using the returned on the previous step LSN.
@@ -33,6 +33,7 @@
 //!             -b /usr/local/bin/postgres \
 //!             -r http://pg-ext-s3-gateway \
 //! ```
+//!
 use std::collections::HashMap;
 use std::fs::File;
 use std::path::Path;
@@ -46,11 +47,10 @@ use chrono::Utc;
 use clap::Arg;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
+use tracing::{error, info};
 use url::Url;

 use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeSpec;

 use compute_tools::compute::{
    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
@@ -62,45 +62,12 @@ use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;
-use rlimit::{setrlimit, Resource};

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";

 fn main() -> Result<()> {
-    let (build_tag, clap_args) = init()?;
-
-    // enable core dumping for all child processes
-    setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?;
-
-    let (pg_handle, start_pg_result) = {
-        // Enter startup tracing context
-        let _startup_context_guard = startup_context_from_env();
-
-        let cli_args = process_cli(&clap_args)?;
-
-        let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
-
-        let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
-
-        start_postgres(&clap_args, wait_spec_result)?
-
-        // Startup is finished, exit the startup tracing span
-    };
-
-    // PostgreSQL is now running, if startup was successful. Wait until it exits.
-    let wait_pg_result = wait_postgres(pg_handle)?;
-
-    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
-
-    maybe_delay_exit(delay_exit);
-
-    deinit_and_exit(wait_pg_result);
-}
-
-fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -115,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
        .to_string();
    info!("build_tag: {build_tag}");

-    Ok((build_tag, cli().get_matches()))
-}
-
-fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
-    let pgbin_default = "postgres";
-    let pgbin = matches
-        .get_one::<String>("pgbin")
-        .map(|s| s.as_str())
-        .unwrap_or(pgbin_default);
+    let matches = cli().get_matches();
+    let pgbin_default = String::from("postgres");
+    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);

    let ext_remote_storage = matches
        .get_one::<String>("remote-ext-config")
@@ -149,32 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
        .expect("Postgres connection string is required");
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");
-    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");

-    Ok(ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        http_port,
-        spec_json,
-        spec_path,
-        resize_swap_on_bind,
-    })
-}
-
-struct ProcessCliResult<'clap> {
-    connstr: &'clap str,
-    pgdata: &'clap str,
-    pgbin: &'clap str,
-    ext_remote_storage: Option<&'clap str>,
-    http_port: u16,
-    spec_json: Option<&'clap String>,
-    spec_path: Option<&'clap String>,
-    resize_swap_on_bind: bool,
-}
-
-fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
    // tracing context.
@@ -211,7 +147,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    if let Ok(val) = std::env::var("TRACESTATE") {
        startup_tracing_carrier.insert("tracestate".to_string(), val);
    }
-    if !startup_tracing_carrier.is_empty() {
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
        use opentelemetry::propagation::TextMapPropagator;
        use opentelemetry::sdk::propagation::TraceContextPropagator;
        let guard = TraceContextPropagator::new()
@@ -221,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
        Some(guard)
    } else {
        None
-    }
-}
+    };

-fn try_spec_from_cli(
-    matches: &clap::ArgMatches,
-    ProcessCliResult {
-        spec_json,
-        spec_path,
-        ..
-    }: &ProcessCliResult,
-) -> Result<CliSpecParams> {
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

@@ -272,34 +199,6 @@ fn try_spec_from_cli(
        }
    };

-    Ok(CliSpecParams {
-        spec,
-        live_config_allowed,
-    })
-}
-
-struct CliSpecParams {
-    /// If a spec was provided via CLI or file, the [`ComputeSpec`]
-    spec: Option<ComputeSpec>,
-    live_config_allowed: bool,
-}
-
-fn wait_spec(
-    build_tag: String,
-    ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        resize_swap_on_bind,
-        http_port,
-        ..
-    }: ProcessCliResult,
-    CliSpecParams {
-        spec,
-        live_config_allowed,
-    }: CliSpecParams,
-) -> Result<WaitSpecResult> {
    let mut new_state = ComputeState::new();
    let spec_set;

@@ -327,17 +226,19 @@ fn wait_spec(

    // If this is a pooled VM, prewarm before starting HTTP server and becoming
    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
+    // because QEMU will already have it's memory allocated from the host, and
    // the necessary binaries will already be cached.
    if !spec_set {
        compute.prewarm_postgres()?;
    }

-    // Launch http service first, so that we can serve control-plane requests
-    // while configuration is still in progress.
+    // Launch http service first, so we were able to serve control-plane
+    // requests, while configuration is still in progress.
    let _http_handle =
        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");

+    let extension_server_port: u16 = http_port;
+
    if !spec_set {
        // No spec provided, hang waiting for it.
        info!("no compute spec provided, waiting");
@@ -352,45 +253,21 @@ fn wait_spec(
                break;
            }
        }
-
-        // Record for how long we slept waiting for the spec.
-        let now = Utc::now();
-        state.metrics.wait_for_spec_ms = now
-            .signed_duration_since(state.start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
-
-        // Reset start time, so that the total startup time that is calculated later will
-        // not include the time that we waited for the spec.
-        state.start_time = now;
    }

-    Ok(WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    })
-}
-
-struct WaitSpecResult {
-    compute: Arc<ComputeNode>,
-    // passed through from ProcessCliResult
-    http_port: u16,
-    resize_swap_on_bind: bool,
-}
-
-fn start_postgres(
-    // need to allow unused because `matches` is only used if target_os = "linux"
-    #[allow(unused_variables)] matches: &clap::ArgMatches,
-    WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    }: WaitSpecResult,
-) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
+
+    // Record for how long we slept waiting for the spec.
+    state.metrics.wait_for_spec_ms = Utc::now()
+        .signed_duration_since(state.start_time)
+        .to_std()
+        .unwrap()
+        .as_millis() as u64;
+    // Reset start time to the actual start of the configuration, so that
+    // total startup time was properly measured at the end.
+    state.start_time = Utc::now();
+
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();

@@ -398,72 +275,33 @@ fn start_postgres(
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
-    // before we release the mutex, fetch the swap size (if any) for later.
-    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
    drop(state);

    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute);
    let _configurator_handle = launch_configurator(&compute);

-    let mut prestartup_failed = false;
-    let mut delay_exit = false;
-
-    // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
-        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
-        // *before* starting postgres.
-        //
-        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
-        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
-        // OOM-killed during startup because swap wasn't available yet.
-        match resize_swap(size_bytes) {
-            Ok(()) => {
-                let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_gib, "resized swap");
-            }
-            Err(err) => {
-                let err = err.context("failed to resize swap");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{err:?}"));
-                state.status = ComputeStatus::Failed;
-                compute.state_changed.notify_all();
-                delay_exit = true;
-            }
-        }
-    }
-
-    let extension_server_port: u16 = http_port;
-
    // Start Postgres
-    let mut pg = None;
-    if !prestartup_failed {
-        pg = match compute.start_compute(extension_server_port) {
-            Ok(pg) => Some(pg),
-            Err(err) => {
-                error!("could not start the compute node: {:#}", err);
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{:?}", err));
-                state.status = ComputeStatus::Failed;
-                // Notify others that Postgres failed to start. In case of configuring the
-                // empty compute, it's likely that API handler is still waiting for compute
-                // state change. With this we will notify it that compute is in Failed state,
-                // so control plane will know about it earlier and record proper error instead
-                // of timeout.
-                compute.state_changed.notify_all();
-                drop(state); // unlock
-                delay_exit = true;
-                None
-            }
-        };
-    } else {
-        warn!("skipping postgres startup because pre-startup step failed");
-    }
+    let mut delay_exit = false;
+    let mut exit_code = None;
+    let pg = match compute.start_compute(extension_server_port) {
+        Ok(pg) => Some(pg),
+        Err(err) => {
+            error!("could not start the compute node: {:#}", err);
+            let mut state = compute.state.lock().unwrap();
+            state.error = Some(format!("{:?}", err));
+            state.status = ComputeStatus::Failed;
+            // Notify others that Postgres failed to start. In case of configuring the
+            // empty compute, it's likely that API handler is still waiting for compute
+            // state change. With this we will notify it that compute is in Failed state,
+            // so control plane will know about it earlier and record proper error instead
+            // of timeout.
+            compute.state_changed.notify_all();
+            drop(state); // unlock
+            delay_exit = true;
+            None
+        }
+    };

    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
    // because it requires cgroups.
@@ -496,7 +334,7 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();

-            let vm_monitor = rt.as_ref().map(|rt| {
+            let vm_monitor = &rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
@@ -509,41 +347,12 @@ fn start_postgres(
        }
    }

-    Ok((
-        pg,
-        StartPostgresResult {
-            delay_exit,
-            compute,
-            #[cfg(target_os = "linux")]
-            rt,
-            #[cfg(target_os = "linux")]
-            token,
-            #[cfg(target_os = "linux")]
-            vm_monitor,
-        },
-    ))
-}
-
-type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
-
-struct StartPostgresResult {
-    delay_exit: bool,
-    // passed through from WaitSpecResult
-    compute: Arc<ComputeNode>,
-
-    #[cfg(target_os = "linux")]
-    rt: Option<tokio::runtime::Runtime>,
-    #[cfg(target_os = "linux")]
-    token: tokio_util::sync::CancellationToken,
-    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
-}
-
-fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
    // Wait for the child Postgres process forever. In this state Ctrl+C will
    // propagate to Postgres and it will be shut down as well.
-    let mut exit_code = None;
    if let Some((mut pg, logs_handle)) = pg {
+        // Startup is finished, exit the startup tracing span
+        drop(startup_context_guard);
+
        let ecode = pg
            .wait()
            .expect("failed to start waiting on Postgres process");
@@ -558,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
        exit_code = ecode.code()
    }

-    Ok(WaitPostgresResult { exit_code })
-}
-
-struct WaitPostgresResult {
-    exit_code: Option<i32>,
-}
-
-fn cleanup_after_postgres_exit(
-    StartPostgresResult {
-        mut delay_exit,
-        compute,
-        #[cfg(target_os = "linux")]
-        vm_monitor,
-        #[cfg(target_os = "linux")]
-        token,
-        #[cfg(target_os = "linux")]
-        rt,
-    }: StartPostgresResult,
-) -> Result<bool> {
    // Terminate the vm_monitor so it releases the file watcher on
    // /sys/fs/cgroup/neon-postgres.
    // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -618,19 +408,13 @@ fn cleanup_after_postgres_exit(
        error!("error while checking for core dumps: {err:?}");
    }

-    Ok(delay_exit)
-}
-
-fn maybe_delay_exit(delay_exit: bool) {
    // If launch failed, keep serving HTTP requests for a while, so the cloud
    // control plane can get the actual error.
    if delay_exit {
        info!("giving control plane 30s to collect the error before shutdown");
        thread::sleep(Duration::from_secs(30));
    }
-}

-fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
@@ -738,15 +522,10 @@ fn cli() -> clap::Command {
            Arg::new("filecache-connstr")
                .long("filecache-connstr")
                .default_value(
-                    "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable application_name=vm-monitor",
+                    "host=localhost port=5432 dbname=postgres user=cloud_admin sslmode=disable",
                )
                .value_name("FILECACHE_CONNSTR"),
        )
-        .arg(
-            Arg::new("resize-swap-on-bind")
-                .long("resize-swap-on-bind")
-                .action(clap::ArgAction::SetTrue),
-        )
 }

 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
--- a/compute_tools/src/catalog.rs
+++ b/compute_tools/src/catalog.rs
@@ -1,116 +0,0 @@
-use compute_api::{
-    responses::CatalogObjects,
-    spec::{Database, Role},
-};
-use futures::Stream;
-use postgres::{Client, NoTls};
-use std::{path::Path, process::Stdio, result::Result, sync::Arc};
-use tokio::{
-    io::{AsyncBufReadExt, BufReader},
-    process::Command,
-    task,
-};
-use tokio_stream::{self as stream, StreamExt};
-use tokio_util::codec::{BytesCodec, FramedRead};
-use tracing::warn;
-
-use crate::{
-    compute::ComputeNode,
-    pg_helpers::{get_existing_dbs, get_existing_roles},
-};
-
-pub async fn get_dbs_and_roles(compute: &Arc<ComputeNode>) -> anyhow::Result<CatalogObjects> {
-    let connstr = compute.connstr.clone();
-    task::spawn_blocking(move || {
-        let mut client = Client::connect(connstr.as_str(), NoTls)?;
-        let roles: Vec<Role>;
-        {
-            let mut xact = client.transaction()?;
-            roles = get_existing_roles(&mut xact)?;
-        }
-        let databases: Vec<Database> = get_existing_dbs(&mut client)?.values().cloned().collect();
-
-        Ok(CatalogObjects { roles, databases })
-    })
-    .await?
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum SchemaDumpError {
-    #[error("Database does not exist.")]
-    DatabaseDoesNotExist,
-    #[error("Failed to execute pg_dump.")]
-    IO(#[from] std::io::Error),
-}
-
-// It uses the pg_dump utility to dump the schema of the specified database.
-// The output is streamed back to the caller and supposed to be streamed via HTTP.
-//
-// Before return the result with the output, it checks that pg_dump produced any output.
-// If not, it tries to parse the stderr output to determine if the database does not exist
-// and special error is returned.
-//
-// To make sure that the process is killed when the caller drops the stream, we use tokio kill_on_drop feature.
-pub async fn get_database_schema(
-    compute: &Arc<ComputeNode>,
-    dbname: &str,
-) -> Result<impl Stream<Item = Result<bytes::Bytes, std::io::Error>>, SchemaDumpError> {
-    let pgbin = &compute.pgbin;
-    let basepath = Path::new(pgbin).parent().unwrap();
-    let pgdump = basepath.join("pg_dump");
-    let mut connstr = compute.connstr.clone();
-    connstr.set_path(dbname);
-    let mut cmd = Command::new(pgdump)
-        .arg("--schema-only")
-        .arg(connstr.as_str())
-        .stdout(Stdio::piped())
-        .stderr(Stdio::piped())
-        .kill_on_drop(true)
-        .spawn()?;
-
-    let stdout = cmd.stdout.take().ok_or_else(|| {
-        std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stdout.")
-    })?;
-
-    let stderr = cmd.stderr.take().ok_or_else(|| {
-        std::io::Error::new(std::io::ErrorKind::Other, "Failed to capture stderr.")
-    })?;
-
-    let mut stdout_reader = FramedRead::new(stdout, BytesCodec::new());
-    let stderr_reader = BufReader::new(stderr);
-
-    let first_chunk = match stdout_reader.next().await {
-        Some(Ok(bytes)) if !bytes.is_empty() => bytes,
-        Some(Err(e)) => {
-            return Err(SchemaDumpError::IO(e));
-        }
-        _ => {
-            let mut lines = stderr_reader.lines();
-            if let Some(line) = lines.next_line().await? {
-                if line.contains(&format!("FATAL:  database \"{}\" does not exist", dbname)) {
-                    return Err(SchemaDumpError::DatabaseDoesNotExist);
-                }
-                warn!("pg_dump stderr: {}", line)
-            }
-            tokio::spawn(async move {
-                while let Ok(Some(line)) = lines.next_line().await {
-                    warn!("pg_dump stderr: {}", line)
-                }
-            });
-
-            return Err(SchemaDumpError::IO(std::io::Error::new(
-                std::io::ErrorKind::Other,
-                "failed to start pg_dump",
-            )));
-        }
-    };
-    let initial_stream = stream::once(Ok(first_chunk.freeze()));
-    // Consume stderr and log warnings
-    tokio::spawn(async move {
-        let mut lines = stderr_reader.lines();
-        while let Ok(Some(line)) = lines.next_line().await {
-            warn!("pg_dump stderr: {}", line)
-        }
-    });
-    Ok(initial_stream.chain(stdout_reader.map(|res| res.map(|b| b.freeze()))))
-}
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -56,7 +56,6 @@ pub struct ComputeNode {
    /// - we push new spec and it does reconfiguration
    /// - but then something happens and compute pod / VM is destroyed,
    ///   so k8s controller starts it again with the **old** spec
-    ///
    /// and the same for empty computes:
    /// - we started compute without any spec
    /// - we push spec and it does configuration
@@ -799,11 +798,7 @@ impl ComputeNode {
        // In this case we need to connect with old `zenith_admin` name
        // and create new user. We cannot simply rename connected user,
        // but we can create a new one and grant it all privileges.
-        let mut connstr = self.connstr.clone();
-        connstr
-            .query_pairs_mut()
-            .append_pair("application_name", "apply_config");
-
+        let connstr = self.connstr.clone();
        let mut client = match Client::connect(connstr.as_str(), NoTls) {
            Err(e) => match e.code() {
                Some(&SqlState::INVALID_PASSWORD)
@@ -823,15 +818,9 @@ impl ComputeNode {
                        Client::connect(zenith_admin_connstr.as_str(), NoTls)
                            .context("broken cloud_admin credential: tried connecting with cloud_admin but could not authenticate, and zenith_admin does not work either")?;
                    // Disable forwarding so that users don't get a cloud_admin role
-
-                    let mut func = || {
-                        client.simple_query("SET neon.forward_ddl = false")?;
-                        client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
-                        client.simple_query("GRANT zenith_admin TO cloud_admin")?;
-                        Ok::<_, anyhow::Error>(())
-                    };
-                    func().context("apply_config setup cloud_admin")?;
-
+                    client.simple_query("SET neon.forward_ddl = false")?;
+                    client.simple_query("CREATE USER cloud_admin WITH SUPERUSER")?;
+                    client.simple_query("GRANT zenith_admin TO cloud_admin")?;
                    drop(client);

                    // reconnect with connstring with expected name
@@ -843,48 +832,39 @@ impl ComputeNode {
        };

        // Disable DDL forwarding because control plane already knows about these roles/databases.
-        client
-            .simple_query("SET neon.forward_ddl = false")
-            .context("apply_config SET neon.forward_ddl = false")?;
+        client.simple_query("SET neon.forward_ddl = false")?;

        // Proceed with post-startup configuration. Note, that order of operations is important.
        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
-        create_neon_superuser(spec, &mut client).context("apply_config create_neon_superuser")?;
-        cleanup_instance(&mut client).context("apply_config cleanup_instance")?;
-        handle_roles(spec, &mut client).context("apply_config handle_roles")?;
-        handle_databases(spec, &mut client).context("apply_config handle_databases")?;
-        handle_role_deletions(spec, connstr.as_str(), &mut client)
-            .context("apply_config handle_role_deletions")?;
+        create_neon_superuser(spec, &mut client)?;
+        cleanup_instance(&mut client)?;
+        handle_roles(spec, &mut client)?;
+        handle_databases(spec, &mut client)?;
+        handle_role_deletions(spec, connstr.as_str(), &mut client)?;
        handle_grants(
            spec,
            &mut client,
            connstr.as_str(),
            self.has_feature(ComputeFeature::AnonExtension),
-        )
-        .context("apply_config handle_grants")?;
-        handle_extensions(spec, &mut client).context("apply_config handle_extensions")?;
-        handle_extension_neon(&mut client).context("apply_config handle_extension_neon")?;
-        create_availability_check_data(&mut client)
-            .context("apply_config create_availability_check_data")?;
+        )?;
+        handle_extensions(spec, &mut client)?;
+        handle_extension_neon(&mut client)?;
+        create_availability_check_data(&mut client)?;

        // 'Close' connection
        drop(client);

        // Run migrations separately to not hold up cold starts
        thread::spawn(move || {
-            let mut connstr = connstr.clone();
-            connstr
-                .query_pairs_mut()
-                .append_pair("application_name", "migrations");
-
            let mut client = Client::connect(connstr.as_str(), NoTls)?;
-            handle_migrations(&mut client).context("apply_config handle_migrations")
+            handle_migrations(&mut client)
        });
        Ok(())
    }

-    // Wrapped this around `pg_ctl reload`, but right now we don't use
-    // `pg_ctl` for start / stop.
+    // We could've wrapped this around `pg_ctl reload`, but right now we don't use
+    // `pg_ctl` for start / stop, so this just seems much easier to do as we already
+    // have opened connection to Postgres and superuser access.
    #[instrument(skip_all)]
    fn pg_reload_conf(&self) -> Result<()> {
        let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl");
@@ -927,39 +907,38 @@ impl ComputeNode {
        // temporarily reset max_cluster_size in config
        // to avoid the possibility of hitting the limit, while we are reconfiguring:
        // creating new extensions, roles, etc...
-        config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || {
-            self.pg_reload_conf()?;
+        config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
+        self.pg_reload_conf()?;

-            let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
+        let mut client = Client::connect(self.connstr.as_str(), NoTls)?;

-            // Proceed with post-startup configuration. Note, that order of operations is important.
-            // Disable DDL forwarding because control plane already knows about these roles/databases.
-            if spec.mode == ComputeMode::Primary {
-                client.simple_query("SET neon.forward_ddl = false")?;
-                cleanup_instance(&mut client)?;
-                handle_roles(&spec, &mut client)?;
-                handle_databases(&spec, &mut client)?;
-                handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
-                handle_grants(
-                    &spec,
-                    &mut client,
-                    self.connstr.as_str(),
-                    self.has_feature(ComputeFeature::AnonExtension),
-                )?;
-                handle_extensions(&spec, &mut client)?;
-                handle_extension_neon(&mut client)?;
-                // We can skip handle_migrations here because a new migration can only appear
-                // if we have a new version of the compute_ctl binary, which can only happen
-                // if compute got restarted, in which case we'll end up inside of apply_config
-                // instead of reconfigure.
-            }
+        // Proceed with post-startup configuration. Note, that order of operations is important.
+        // Disable DDL forwarding because control plane already knows about these roles/databases.
+        if spec.mode == ComputeMode::Primary {
+            client.simple_query("SET neon.forward_ddl = false")?;
+            cleanup_instance(&mut client)?;
+            handle_roles(&spec, &mut client)?;
+            handle_databases(&spec, &mut client)?;
+            handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+            handle_grants(
+                &spec,
+                &mut client,
+                self.connstr.as_str(),
+                self.has_feature(ComputeFeature::AnonExtension),
+            )?;
+            handle_extensions(&spec, &mut client)?;
+            handle_extension_neon(&mut client)?;
+            // We can skip handle_migrations here because a new migration can only appear
+            // if we have a new version of the compute_ctl binary, which can only happen
+            // if compute got restarted, in which case we'll end up inside of apply_config
+            // instead of reconfigure.
+        }

-            // 'Close' connection
-            drop(client);
-
-            Ok(())
-        })?;
+        // 'Close' connection
+        drop(client);

+        // reset max_cluster_size in config back to original value and reload config
+        config::compute_ctl_temp_override_remove(pgdata_path)?;
        self.pg_reload_conf()?;

        let unknown_op = "unknown".to_string();
@@ -1050,17 +1029,12 @@ impl ComputeNode {
                // temporarily reset max_cluster_size in config
                // to avoid the possibility of hitting the limit, while we are applying config:
                // creating new extensions, roles, etc...
-                config::with_compute_ctl_tmp_override(
-                    pgdata_path,
-                    "neon.max_cluster_size=-1",
-                    || {
-                        self.pg_reload_conf()?;
+                config::compute_ctl_temp_override_create(pgdata_path, "neon.max_cluster_size=-1")?;
+                self.pg_reload_conf()?;

-                        self.apply_config(&compute_state)?;
+                self.apply_config(&compute_state)?;

-                        Ok(())
-                    },
-                )?;
+                config::compute_ctl_temp_override_remove(pgdata_path)?;
                self.pg_reload_conf()?;
            }
            self.post_apply_config()?;
@@ -1117,7 +1091,7 @@ impl ComputeNode {
    // EKS worker nodes have following core dump settings:
    //   /proc/sys/kernel/core_pattern -> core
    //   /proc/sys/kernel/core_uses_pid -> 1
-    //   ulimit -c -> unlimited
+    //   ulimint -c -> unlimited
    // which results in core dumps being written to postgres data directory as core.<pid>.
    //
    // Use that as a default location and pattern, except macos where core dumps are written
@@ -1288,12 +1262,10 @@ LIMIT 100",
        .await
        .map_err(DownloadError::Other);

-        if download_size.is_ok() {
-            self.ext_download_progress
-                .write()
-                .expect("bad lock")
-                .insert(ext_archive_name.to_string(), (download_start, true));
-        }
+        self.ext_download_progress
+            .write()
+            .expect("bad lock")
+            .insert(ext_archive_name.to_string(), (download_start, true));

        download_size
    }
@@ -1396,9 +1368,7 @@ pub fn forward_termination_signal() {
    let pg_pid = PG_PID.load(Ordering::SeqCst);
    if pg_pid != 0 {
        let pg_pid = nix::unistd::Pid::from_raw(pg_pid as i32);
-        // Use 'fast' shutdown (SIGINT) because it also creates a shutdown checkpoint, which is important for
-        // ROs to get a list of running xacts faster instead of going through the CLOG.
-        // See https://www.postgresql.org/docs/current/server-shutdown.html for the list of modes and signals.
-        kill(pg_pid, Signal::SIGINT).ok();
+        // use 'immediate' shutdown (SIGQUIT): https://www.postgresql.org/docs/current/server-shutdown.html
+        kill(pg_pid, Signal::SIGQUIT).ok();
    }
 }
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -6,8 +6,8 @@ use std::path::Path;
 use anyhow::Result;

 use crate::pg_helpers::escape_conf_value;
-use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize};
-use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption};
+use crate::pg_helpers::PgOptionsSerialize;
+use compute_api::spec::{ComputeMode, ComputeSpec};

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -83,27 +83,12 @@ pub fn write_postgres_conf(
        ComputeMode::Replica => {
            // hot_standby is 'on' by default, but let's be explicit
            writeln!(file, "hot_standby=on")?;
-        }
-    }

-    if cfg!(target_os = "linux") {
-        // Check /proc/sys/vm/overcommit_memory -- if it equals 2 (i.e. linux memory overcommit is
-        // disabled), then the control plane has enabled swap and we should set
-        // dynamic_shared_memory_type = 'mmap'.
-        //
-        // This is (maybe?) temporary - for more, see https://github.com/neondatabase/cloud/issues/12047.
-        let overcommit_memory_contents = std::fs::read_to_string("/proc/sys/vm/overcommit_memory")
-            // ignore any errors - they may be expected to occur under certain situations (e.g. when
-            // not running in Linux).
-            .unwrap_or_else(|_| String::new());
-        if overcommit_memory_contents.trim() == "2" {
-            let opt = GenericOption {
-                name: "dynamic_shared_memory_type".to_owned(),
-                value: Some("mmap".to_owned()),
-                vartype: "enum".to_owned(),
-            };
-
-            write!(file, "{}", opt.to_pg_setting())?;
+            // Inform the replica about the primary state
+            // Default is 'false'
+            if let Some(primary_is_running) = spec.primary_is_running {
+                writeln!(file, "neon.primary_is_running={}", primary_is_running)?;
+            }
        }
    }

@@ -125,17 +110,18 @@ pub fn write_postgres_conf(
    Ok(())
 }

-pub fn with_compute_ctl_tmp_override<F>(pgdata_path: &Path, options: &str, exec: F) -> Result<()>
-where
-    F: FnOnce() -> Result<()>,
-{
+/// create file compute_ctl_temp_override.conf in pgdata_dir
+/// add provided options to this file
+pub fn compute_ctl_temp_override_create(pgdata_path: &Path, options: &str) -> Result<()> {
    let path = pgdata_path.join("compute_ctl_temp_override.conf");
    let mut file = File::create(path)?;
    write!(file, "{}", options)?;
-
-    let res = exec();
-
-    file.set_len(0)?;
-
-    res
+    Ok(())
+}
+
+/// remove file compute_ctl_temp_override.conf in pgdata_dir
+pub fn compute_ctl_temp_override_remove(pgdata_path: &Path) -> Result<()> {
+    let path = pgdata_path.join("compute_ctl_temp_override.conf");
+    std::fs::remove_file(path)?;
+    Ok(())
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -5,21 +5,17 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::catalog::SchemaDumpError;
-use crate::catalog::{get_database_schema, get_dbs_and_roles};
 use crate::compute::forward_termination_signal;
 use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_api::requests::ConfigurationRequest;
 use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};

 use anyhow::Result;
-use hyper::header::CONTENT_TYPE;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
 use tokio::task;
-use tracing::{debug, error, info, warn};
+use tracing::{error, info, warn};
 use tracing_utils::http::OtelName;
-use utils::http::request::must_get_query_param;

 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
    ComputeStatusResponse {
@@ -48,7 +44,7 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
    match (req.method(), req.uri().path()) {
        // Serialized compute state.
        (&Method::GET, "/status") => {
-            debug!("serving /status GET request");
+            info!("serving /status GET request");
            let state = compute.state.lock().unwrap();
            let status_response = status_response_from_state(&state);
            Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
@@ -137,34 +133,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            }
        }

-        (&Method::GET, "/dbs_and_roles") => {
-            info!("serving /dbs_and_roles GET request",);
-            match get_dbs_and_roles(compute).await {
-                Ok(res) => render_json(Body::from(serde_json::to_string(&res).unwrap())),
-                Err(_) => {
-                    render_json_error("can't get dbs and roles", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
-        (&Method::GET, "/database_schema") => {
-            let database = match must_get_query_param(&req, "database") {
-                Err(e) => return e.into_response(),
-                Ok(database) => database,
-            };
-            info!("serving /database_schema GET request with database: {database}",);
-            match get_database_schema(compute, &database).await {
-                Ok(res) => render_plain(Body::wrap_stream(res)),
-                Err(SchemaDumpError::DatabaseDoesNotExist) => {
-                    render_json_error("database does not exist", StatusCode::NOT_FOUND)
-                }
-                Err(e) => {
-                    error!("can't get schema dump: {}", e);
-                    render_json_error("can't get schema dump", StatusCode::INTERNAL_SERVER_ERROR)
-                }
-            }
-        }
-
        // download extension files from remote extension storage on demand
        (&Method::POST, route) if route.starts_with("/extension_server/") => {
            info!("serving {:?} POST request", route);
@@ -335,25 +303,10 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
    };
    Response::builder()
        .status(status)
-        .header(CONTENT_TYPE, "application/json")
        .body(Body::from(serde_json::to_string(&error).unwrap()))
        .unwrap()
 }

-fn render_json(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "application/json")
-        .body(body)
-        .unwrap()
-}
-
-fn render_plain(body: Body) -> Response<Body> {
-    Response::builder()
-        .header(CONTENT_TYPE, "text/plain")
-        .body(body)
-        .unwrap()
-}
-
 async fn handle_terminate_request(compute: &Arc<ComputeNode>) -> Result<(), (String, StatusCode)> {
    {
        let mut state = compute.state.lock().unwrap();
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -68,51 +68,6 @@ paths:
              schema:
                $ref: "#/components/schemas/Info"

-  /dbs_and_roles:
-    get:
-      tags:
-        - Info
-      summary: Get databases and roles in the catalog.
-      description: ""
-      operationId: getDbsAndRoles
-      responses:
-        200:
-          description: Compute schema objects
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/DbsAndRoles"
-
-  /database_schema:
-    get:
-      tags:
-        - Info
-      summary: Get schema dump
-      parameters:
-        - name: database
-          in: query
-          description: Database name to dump.
-          required: true
-          schema:
-            type: string
-          example: "postgres"
-      description: Get schema dump in SQL format.
-      operationId: getDatabaseSchema
-      responses:
-        200:
-          description: Schema dump
-          content:
-            text/plain:
-              schema:
-                type: string
-                description: Schema dump in SQL format.
-        404:
-          description: Non existing database.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-
  /check_writability:
    post:
      tags:
@@ -274,73 +229,6 @@ components:
        num_cpus:
          type: integer

-    DbsAndRoles:
-      type: object
-      description: Databases and Roles
-      required:
-        - roles
-        - databases
-      properties:
-        roles:
-          type: array
-          items:
-            $ref: "#/components/schemas/Role"
-        databases:
-          type: array
-          items:
-            $ref: "#/components/schemas/Database"
-
-    Database:
-      type: object
-      description: Database
-      required:
-        - name
-        - owner
-        - restrict_conn
-        - invalid
-      properties:
-        name:
-          type: string
-        owner:
-          type: string
-        options:
-          type: array
-          items:
-            $ref: "#/components/schemas/GenericOption"
-        restrict_conn:
-          type: boolean
-        invalid:
-          type: boolean
-
-    Role:
-      type: object
-      description: Role
-      required:
-        - name
-      properties:
-        name:
-          type: string
-        encrypted_password:
-          type: string
-        options:
-          type: array
-          items:
-            $ref: "#/components/schemas/GenericOption"
-
-    GenericOption:
-      type: object
-      description: Schema Generic option
-      required:
-        - name
-        - vartype
-      properties:
-        name:
-          type: string
-        value:
-          type: string
-        vartype:
-          type: string
-
    ComputeState:
      type: object
      required:
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -8,13 +8,10 @@ pub mod configurator;
 pub mod http;
 #[macro_use]
 pub mod logger;
-pub mod catalog;
 pub mod compute;
 pub mod extension_server;
-mod migration;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
-pub mod swap;
 pub mod sync_sk;
--- a/compute_tools/src/migration.rs
+++ b/compute_tools/src/migration.rs
@@ -1,105 +0,0 @@
-use anyhow::{Context, Result};
-use postgres::Client;
-use tracing::info;
-
-pub(crate) struct MigrationRunner<'m> {
-    client: &'m mut Client,
-    migrations: &'m [&'m str],
-}
-
-impl<'m> MigrationRunner<'m> {
-    pub fn new(client: &'m mut Client, migrations: &'m [&'m str]) -> Self {
-        // The neon_migration.migration_id::id column is a bigint, which is equivalent to an i64
-        assert!(migrations.len() + 1 < i64::MAX as usize);
-
-        Self { client, migrations }
-    }
-
-    fn get_migration_id(&mut self) -> Result<i64> {
-        let query = "SELECT id FROM neon_migration.migration_id";
-        let row = self
-            .client
-            .query_one(query, &[])
-            .context("run_migrations get migration_id")?;
-
-        Ok(row.get::<&str, i64>("id"))
-    }
-
-    fn update_migration_id(&mut self, migration_id: i64) -> Result<()> {
-        let setval = format!("UPDATE neon_migration.migration_id SET id={}", migration_id);
-
-        self.client
-            .simple_query(&setval)
-            .context("run_migrations update id")?;
-
-        Ok(())
-    }
-
-    fn prepare_migrations(&mut self) -> Result<()> {
-        let query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
-        self.client.simple_query(query)?;
-
-        let query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
-        self.client.simple_query(query)?;
-
-        let query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
-        self.client.simple_query(query)?;
-
-        let query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
-        self.client.simple_query(query)?;
-
-        let query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
-        self.client.simple_query(query)?;
-
-        Ok(())
-    }
-
-    pub fn run_migrations(mut self) -> Result<()> {
-        self.prepare_migrations()?;
-
-        let mut current_migration = self.get_migration_id()? as usize;
-        while current_migration < self.migrations.len() {
-            macro_rules! migration_id {
-                ($cm:expr) => {
-                    ($cm + 1) as i64
-                };
-            }
-
-            let migration = self.migrations[current_migration];
-
-            if migration.starts_with("-- SKIP") {
-                info!("Skipping migration id={}", migration_id!(current_migration));
-            } else {
-                info!(
-                    "Running migration id={}:\n{}\n",
-                    migration_id!(current_migration),
-                    migration
-                );
-
-                self.client
-                    .simple_query("BEGIN")
-                    .context("begin migration")?;
-
-                self.client.simple_query(migration).with_context(|| {
-                    format!(
-                        "run_migrations migration id={}",
-                        migration_id!(current_migration)
-                    )
-                })?;
-
-                // Migration IDs start at 1
-                self.update_migration_id(migration_id!(current_migration))?;
-
-                self.client
-                    .simple_query("COMMIT")
-                    .context("commit migration")?;
-
-                info!("Finished migration id={}", migration_id!(current_migration));
-            }
-
-            current_migration += 1;
-        }
-
-        Ok(())
-    }
-}
--- a/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
+++ b/compute_tools/src/migrations/0001-neon_superuser_bypass_rls.sql
@@ -1 +0,0 @@
-ALTER ROLE neon_superuser BYPASSRLS;
--- a/compute_tools/src/migrations/0002-alter_roles.sql
+++ b/compute_tools/src/migrations/0002-alter_roles.sql
@@ -1,18 +0,0 @@
-DO $$
-DECLARE
-    role_name text;
-BEGIN
-    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
-    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
-        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
-    END LOOP;
-
-    FOR role_name IN SELECT rolname FROM pg_roles
-        WHERE
-            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
-    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
-        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
-    END LOOP;
-END $$;
--- a/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql
@@ -1,6 +0,0 @@
-DO $$
-BEGIN
-    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
-    END IF;
-END $$;
--- a/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0004-grant_pg_monitor_to_neon_superuser.sql
@@ -1 +0,0 @@
-GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;
--- a/compute_tools/src/migrations/0005-grant_all_on_tables_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0005-grant_all_on_tables_to_neon_superuser.sql
@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by neon_superuser without permission issues.
-
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser;
--- a/compute_tools/src/migrations/0006-grant_all_on_sequences_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0006-grant_all_on_sequences_to_neon_superuser.sql
@@ -1,4 +0,0 @@
-- SKIP: Deemed insufficient for allowing relations created by extensions to be
--       interacted with by neon_superuser without permission issues.
-
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser;
--- a/compute_tools/src/migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql
@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
-
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
+++ b/compute_tools/src/migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql
@@ -1,3 +0,0 @@
-- SKIP: Moved inline to the handle_grants() functions.
-
-ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO neon_superuser WITH GRANT OPTION;
--- a/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
+++ b/compute_tools/src/migrations/0009-revoke_replication_for_previously_allowed_roles.sql
@@ -1,13 +0,0 @@
-- SKIP: The original goal of this migration was to prevent creating
--       subscriptions, but this migration was insufficient.
-
-DO $$
-DECLARE
-    role_name TEXT;
-BEGIN
-    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
-    LOOP
-        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
-        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
-    END LOOP;
-END $$;
--- a/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
+++ b/compute_tools/src/migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql
@@ -1,7 +0,0 @@
-DO $$
-BEGIN
-    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_export_snapshot TO neon_superuser';
-       EXECUTE 'GRANT EXECUTE ON FUNCTION pg_log_standby_snapshot TO neon_superuser';
-    END IF;
-END $$;
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -17,11 +17,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500);
 // should be handled gracefully.
 fn watch_compute_activity(compute: &ComputeNode) {
    // Suppose that `connstr` doesn't change
-    let mut connstr = compute.connstr.clone();
-    connstr
-        .query_pairs_mut()
-        .append_pair("application_name", "compute_activity_monitor");
-    let connstr = connstr.as_str();
+    let connstr = compute.connstr.as_str();

    // During startup and configuration we connect to every Postgres database,
    // but we don't want to count this as some user activity. So wait until
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -44,7 +44,7 @@ pub fn escape_conf_value(s: &str) -> String {
    format!("'{}'", res)
 }

-pub trait GenericOptionExt {
+trait GenericOptionExt {
    fn to_pg_option(&self) -> String;
    fn to_pg_setting(&self) -> String;
 }
@@ -489,7 +489,7 @@ pub fn handle_postgres_logs(stderr: std::process::ChildStderr) -> JoinHandle<()>
 /// Read Postgres logs from `stderr` until EOF. Buffer is flushed on one of the following conditions:
 /// - next line starts with timestamp
 /// - EOF
-/// - no new lines were written for the last 100 milliseconds
+/// - no new lines were written for the last second
 async fn handle_postgres_logs_async(stderr: tokio::process::ChildStderr) -> Result<()> {
    let mut lines = tokio::io::BufReader::new(stderr).lines();
    let timeout_duration = Duration::from_millis(100);
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -2,7 +2,7 @@ use std::fs::File;
 use std::path::Path;
 use std::str::FromStr;

-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{anyhow, bail, Result};
 use postgres::config::Config;
 use postgres::{Client, NoTls};
 use reqwest::StatusCode;
@@ -10,7 +10,6 @@ use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};

 use crate::config;
 use crate::logger::inlinify;
-use crate::migration::MigrationRunner;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

@@ -303,9 +302,9 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
            RoleAction::Create => {
                // This branch only runs when roles are created through the console, so it is
                // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
-                // from neon_superuser.
+                // from neon_superuser. (NOTE: REPLICATION has been removed from here for now).
                let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
+                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS IN ROLE neon_superuser",
                    name.pg_quote()
                );
                info!("running role create query: '{}'", &query);
@@ -491,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                "rename_db" => {
                    let new_name = op.new_name.as_ref().unwrap();

-                    if existing_dbs.contains_key(&op.name) {
+                    if existing_dbs.get(&op.name).is_some() {
                        let query: String = format!(
                            "ALTER DATABASE {} RENAME TO {}",
                            op.name.pg_quote(),
@@ -699,8 +698,7 @@ pub fn handle_grants(

        // it is important to run this after all grants
        if enable_anon_extension {
-            handle_extension_anon(spec, &db.owner, &mut db_client, false)
-                .context("handle_grants handle_extension_anon")?;
+            handle_extension_anon(spec, &db.owner, &mut db_client, false)?;
        }
    }

@@ -745,24 +743,21 @@ pub fn handle_extension_neon(client: &mut Client) -> Result<()> {
    // which may happen in two cases:
    // - extension was just installed
    // - extension was already installed and is up to date
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension version with query: {}", query);
-    if let Err(e) = client.simple_query(query) {
-        error!(
-            "failed to upgrade neon extension during `handle_extension_neon`: {}",
-            e
-        );
-    }
+    // DISABLED due to compute node unpinning epic
+    // let query = "ALTER EXTENSION neon UPDATE";
+    // info!("update neon extension version with query: {}", query);
+    // client.simple_query(query)?;

    Ok(())
 }

 #[instrument(skip_all)]
-pub fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> {
-    info!("handle neon extension upgrade");
-    let query = "ALTER EXTENSION neon UPDATE";
-    info!("update neon extension version with query: {}", query);
-    client.simple_query(query)?;
+pub fn handle_neon_extension_upgrade(_client: &mut Client) -> Result<()> {
+    info!("handle neon extension upgrade (not really)");
+    // DISABLED due to compute node unpinning epic
+    // let query = "ALTER EXTENSION neon UPDATE";
+    // info!("update neon extension version with query: {}", query);
+    // client.simple_query(query)?;

    Ok(())
 }
@@ -775,27 +770,103 @@ pub fn handle_migrations(client: &mut Client) -> Result<()> {
    // !BE SURE TO ONLY ADD MIGRATIONS TO THE END OF THIS ARRAY. IF YOU DO NOT, VERY VERY BAD THINGS MAY HAPPEN!
    // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

-    // Add new migrations in numerical order.
    let migrations = [
-        include_str!("./migrations/0001-neon_superuser_bypass_rls.sql"),
-        include_str!("./migrations/0002-alter_roles.sql"),
-        include_str!("./migrations/0003-grant_pg_create_subscription_to_neon_superuser.sql"),
-        include_str!("./migrations/0004-grant_pg_monitor_to_neon_superuser.sql"),
-        include_str!("./migrations/0005-grant_all_on_tables_to_neon_superuser.sql"),
-        include_str!("./migrations/0006-grant_all_on_sequences_to_neon_superuser.sql"),
-        include_str!(
-            "./migrations/0007-grant_all_on_tables_to_neon_superuser_with_grant_option.sql"
-        ),
-        include_str!(
-            "./migrations/0008-grant_all_on_sequences_to_neon_superuser_with_grant_option.sql"
-        ),
-        include_str!("./migrations/0009-revoke_replication_for_previously_allowed_roles.sql"),
-        include_str!(
-            "./migrations/0010-grant_snapshot_synchronization_funcs_to_neon_superuser.sql"
-        ),
+        "ALTER ROLE neon_superuser BYPASSRLS",
+        r#"
+DO $$
+DECLARE
+    role_name text;
+BEGIN
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE pg_has_role(rolname, 'neon_superuser', 'member')
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % INHERIT', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' INHERIT';
+    END LOOP;
+
+    FOR role_name IN SELECT rolname FROM pg_roles
+        WHERE
+            NOT pg_has_role(rolname, 'neon_superuser', 'member') AND NOT starts_with(rolname, 'pg_')
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOBYPASSRLS', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOBYPASSRLS';
+    END LOOP;
+END $$;
+"#,
+        r#"
+DO $$
+BEGIN
+    IF (SELECT setting::numeric >= 160000 FROM pg_settings WHERE name = 'server_version_num') THEN
+        EXECUTE 'GRANT pg_create_subscription TO neon_superuser';
+    END IF;
+END
+$$;"#,
+        "GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION",
+        // Don't remove: these are some SQLs that we originally applied in migrations but turned out to execute somewhere else.
+        "",
+        "",
+        "",
+        "",
+        // Add new migrations below.
+        r#"
+DO $$
+DECLARE
+    role_name TEXT;
+BEGIN
+    FOR role_name IN SELECT rolname FROM pg_roles WHERE rolreplication IS TRUE
+    LOOP
+        RAISE NOTICE 'EXECUTING ALTER ROLE % NOREPLICATION', quote_ident(role_name);
+        EXECUTE 'ALTER ROLE ' || quote_ident(role_name) || ' NOREPLICATION';
+    END LOOP;
+END
+$$;"#,
    ];

-    MigrationRunner::new(client, &migrations).run_migrations()?;
+    let mut query = "CREATE SCHEMA IF NOT EXISTS neon_migration";
+    client.simple_query(query)?;
+
+    query = "CREATE TABLE IF NOT EXISTS neon_migration.migration_id (key INT NOT NULL PRIMARY KEY, id bigint NOT NULL DEFAULT 0)";
+    client.simple_query(query)?;
+
+    query = "INSERT INTO neon_migration.migration_id VALUES (0, 0) ON CONFLICT DO NOTHING";
+    client.simple_query(query)?;
+
+    query = "ALTER SCHEMA neon_migration OWNER TO cloud_admin";
+    client.simple_query(query)?;
+
+    query = "REVOKE ALL ON SCHEMA neon_migration FROM PUBLIC";
+    client.simple_query(query)?;
+
+    query = "SELECT id FROM neon_migration.migration_id";
+    let row = client.query_one(query, &[])?;
+    let mut current_migration: usize = row.get::<&str, i64>("id") as usize;
+    let starting_migration_id = current_migration;
+
+    query = "BEGIN";
+    client.simple_query(query)?;
+
+    while current_migration < migrations.len() {
+        let migration = &migrations[current_migration];
+        if migration.is_empty() {
+            info!("Skip migration id={}", current_migration);
+        } else {
+            info!("Running migration:\n{}\n", migration);
+            client.simple_query(migration)?;
+        }
+        current_migration += 1;
+    }
+    let setval = format!(
+        "UPDATE neon_migration.migration_id SET id={}",
+        migrations.len()
+    );
+    client.simple_query(&setval)?;
+
+    query = "COMMIT";
+    client.simple_query(query)?;
+
+    info!(
+        "Ran {} migrations",
+        (migrations.len() - starting_migration_id)
+    );

    Ok(())
 }
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,45 +0,0 @@
-use std::path::Path;
-
-use anyhow::{anyhow, Context};
-use tracing::warn;
-
-pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
-
-pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
-    // run `/neonvm/bin/resize-swap --once {size_bytes}`
-    //
-    // Passing '--once' causes resize-swap to delete itself after successful completion, which
-    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
-    // postgres is running.
-    //
-    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
-    let child_result = std::process::Command::new("/usr/bin/sudo")
-        .arg(RESIZE_SWAP_BIN)
-        .arg("--once")
-        .arg(size_bytes.to_string())
-        .spawn();
-
-    child_result
-        .context("spawn() failed")
-        .and_then(|mut child| child.wait().context("wait() failed"))
-        .and_then(|status| match status.success() {
-            true => Ok(()),
-            false => {
-                // The command failed. Maybe it was because the resize-swap file doesn't exist?
-                // The --once flag causes it to delete itself on success so we don't disable swap
-                // while postgres is running; maybe this is fine.
-                match Path::new(RESIZE_SWAP_BIN).try_exists() {
-                    Err(_) | Ok(true) => Err(anyhow!("process exited with {status}")),
-                    // The path doesn't exist; we're actually ok 
-                    Ok(false) => {
-                        warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
-                        Ok(())
-                    },
-                }
-            }
-        })
-        // wrap any prior error with the overall context that we couldn't run the command
-        .with_context(|| {
-            format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
-        })
-}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -17,7 +17,6 @@ nix.workspace = true
 once_cell.workspace = true
 postgres.workspace = true
 hex.workspace = true
-humantime-serde.workspace = true
 hyper.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
@@ -28,7 +27,6 @@ serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
-toml_edit.workspace = true
 tokio.workspace = true
 tokio-postgres.workspace = true
 tokio-util.workspace = true
@@ -40,7 +38,6 @@ safekeeper_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true
-whoami.workspace = true

 compute_api.workspace = true
 workspace_hack.workspace = true
--- a/control_plane/attachment_service/Cargo.toml
+++ b/control_plane/attachment_service/Cargo.toml
@@ -1,5 +1,5 @@
 [package]
-name = "storage_controller"
+name = "attachment_service"
 version = "0.1.0"
 edition.workspace = true
 license.workspace = true
@@ -25,13 +25,12 @@ git-version.workspace = true
 hex.workspace = true
 hyper.workspace = true
 humantime.workspace = true
-itertools.workspace = true
 lasso.workspace = true
 once_cell.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 postgres_connection.workspace = true
-reqwest = { workspace = true, features = ["stream"] }
+reqwest.workspace = true
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -40,16 +39,13 @@ tokio.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 measured.workspace = true
-scopeguard.workspace = true
-strum.workspace = true
-strum_macros.workspace = true

 diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
 diesel_migrations = { version = "2.1.0" }
 r2d2 = { version = "0.8.10" }

-utils = { path = "../libs/utils/" }
-metrics = { path = "../libs/metrics/" }
-control_plane = { path = "../control_plane" }
-workspace_hack = { version = "0.1", path = "../workspace_hack" }
+utils = { path = "../../libs/utils/" }
+metrics = { path = "../../libs/metrics/" }
+control_plane = { path = ".." }
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }

--- a/control_plane/attachment_service/migrations/.keep
+++ b/control_plane/attachment_service/migrations/.keep
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/down.sql
--- a/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
+++ b/control_plane/attachment_service/migrations/00000000000000_diesel_initial_setup/up.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/down.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-211257_create_tenant_shards/up.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/down.sql
--- a/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
+++ b/control_plane/attachment_service/migrations/2024-01-07-212945_create_nodes/up.sql
--- a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql
+++ b/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/down.sql
--- a/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql
+++ b/control_plane/attachment_service/migrations/2024-02-29-094122_generations_null/up.sql
--- a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql
+++ b/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/down.sql
--- a/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql
+++ b/control_plane/attachment_service/migrations/2024-03-18-184429_rename_policy/up.sql
--- a/control_plane/attachment_service/src/auth.rs
+++ b/control_plane/attachment_service/src/auth.rs
--- a/control_plane/attachment_service/src/compute_hook.rs
+++ b/control_plane/attachment_service/src/compute_hook.rs
@@ -0,0 +1,462 @@
+use std::{collections::HashMap, time::Duration};
+
+use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
+use control_plane::local_env::LocalEnv;
+use hyper::{Method, StatusCode};
+use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
+use postgres_connection::parse_host_port;
+use serde::{Deserialize, Serialize};
+use tokio_util::sync::CancellationToken;
+use utils::{
+    backoff::{self},
+    id::{NodeId, TenantId},
+};
+
+use crate::service::Config;
+
+const BUSY_DELAY: Duration = Duration::from_secs(1);
+const SLOWDOWN_DELAY: Duration = Duration::from_secs(5);
+
+pub(crate) const API_CONCURRENCY: usize = 32;
+
+struct ShardedComputeHookTenant {
+    stripe_size: ShardStripeSize,
+    shard_count: ShardCount,
+    shards: Vec<(ShardNumber, NodeId)>,
+}
+
+enum ComputeHookTenant {
+    Unsharded(NodeId),
+    Sharded(ShardedComputeHookTenant),
+}
+
+impl ComputeHookTenant {
+    /// Construct with at least one shard's information
+    fn new(tenant_shard_id: TenantShardId, stripe_size: ShardStripeSize, node_id: NodeId) -> Self {
+        if tenant_shard_id.shard_count.count() > 1 {
+            Self::Sharded(ShardedComputeHookTenant {
+                shards: vec![(tenant_shard_id.shard_number, node_id)],
+                stripe_size,
+                shard_count: tenant_shard_id.shard_count,
+            })
+        } else {
+            Self::Unsharded(node_id)
+        }
+    }
+
+    /// Set one shard's location.  If stripe size or shard count have changed, Self is reset
+    /// and drops existing content.
+    fn update(
+        &mut self,
+        tenant_shard_id: TenantShardId,
+        stripe_size: ShardStripeSize,
+        node_id: NodeId,
+    ) {
+        match self {
+            Self::Unsharded(existing_node_id) if tenant_shard_id.shard_count.count() == 1 => {
+                *existing_node_id = node_id
+            }
+            Self::Sharded(sharded_tenant)
+                if sharded_tenant.stripe_size == stripe_size
+                    && sharded_tenant.shard_count == tenant_shard_id.shard_count =>
+            {
+                if let Some(existing) = sharded_tenant
+                    .shards
+                    .iter()
+                    .position(|s| s.0 == tenant_shard_id.shard_number)
+                {
+                    sharded_tenant.shards.get_mut(existing).unwrap().1 = node_id;
+                } else {
+                    sharded_tenant
+                        .shards
+                        .push((tenant_shard_id.shard_number, node_id));
+                    sharded_tenant.shards.sort_by_key(|s| s.0)
+                }
+            }
+            _ => {
+                // Shard count changed: reset struct.
+                *self = Self::new(tenant_shard_id, stripe_size, node_id);
+            }
+        }
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
+struct ComputeHookNotifyRequestShard {
+    node_id: NodeId,
+    shard_number: ShardNumber,
+}
+
+/// Request body that we send to the control plane to notify it of where a tenant is attached
+#[derive(Serialize, Deserialize, Debug)]
+struct ComputeHookNotifyRequest {
+    tenant_id: TenantId,
+    stripe_size: Option<ShardStripeSize>,
+    shards: Vec<ComputeHookNotifyRequestShard>,
+}
+
+/// Error type for attempts to call into the control plane compute notification hook
+#[derive(thiserror::Error, Debug)]
+pub(crate) enum NotifyError {
+    // Request was not send successfully, e.g. transport error
+    #[error("Sending request: {0}")]
+    Request(#[from] reqwest::Error),
+    // Request could not be serviced right now due to ongoing Operation in control plane, but should be possible soon.
+    #[error("Control plane tenant busy")]
+    Busy,
+    // Explicit 429 response asking us to retry less frequently
+    #[error("Control plane overloaded")]
+    SlowDown,
+    // A 503 response indicates the control plane can't handle the request right now
+    #[error("Control plane unavailable (status {0})")]
+    Unavailable(StatusCode),
+    // API returned unexpected non-success status.  We will retry, but log a warning.
+    #[error("Control plane returned unexpected status {0}")]
+    Unexpected(StatusCode),
+    // We shutdown while sending
+    #[error("Shutting down")]
+    ShuttingDown,
+    // A response indicates we will never succeed, such as 400 or 404
+    #[error("Non-retryable error {0}")]
+    Fatal(StatusCode),
+}
+
+impl ComputeHookTenant {
+    fn maybe_reconfigure(&self, tenant_id: TenantId) -> Option<ComputeHookNotifyRequest> {
+        match self {
+            Self::Unsharded(node_id) => Some(ComputeHookNotifyRequest {
+                tenant_id,
+                shards: vec![ComputeHookNotifyRequestShard {
+                    shard_number: ShardNumber(0),
+                    node_id: *node_id,
+                }],
+                stripe_size: None,
+            }),
+            Self::Sharded(sharded_tenant)
+                if sharded_tenant.shards.len() == sharded_tenant.shard_count.count() as usize =>
+            {
+                Some(ComputeHookNotifyRequest {
+                    tenant_id,
+                    shards: sharded_tenant
+                        .shards
+                        .iter()
+                        .map(|(shard_number, node_id)| ComputeHookNotifyRequestShard {
+                            shard_number: *shard_number,
+                            node_id: *node_id,
+                        })
+                        .collect(),
+                    stripe_size: Some(sharded_tenant.stripe_size),
+                })
+            }
+            Self::Sharded(sharded_tenant) => {
+                // Sharded tenant doesn't yet have information for all its shards
+
+                tracing::info!(
+                    "ComputeHookTenant::maybe_reconfigure: not enough shards ({}/{})",
+                    sharded_tenant.shards.len(),
+                    sharded_tenant.shard_count.count()
+                );
+                None
+            }
+        }
+    }
+}
+
+/// The compute hook is a destination for notifications about changes to tenant:pageserver
+/// mapping.  It aggregates updates for the shards in a tenant, and when appropriate reconfigures
+/// the compute connection string.
+pub(super) struct ComputeHook {
+    config: Config,
+    state: tokio::sync::Mutex<HashMap<TenantId, ComputeHookTenant>>,
+    authorization_header: Option<String>,
+}
+
+impl ComputeHook {
+    pub(super) fn new(config: Config) -> Self {
+        let authorization_header = config
+            .control_plane_jwt_token
+            .clone()
+            .map(|jwt| format!("Bearer {}", jwt));
+
+        Self {
+            state: Default::default(),
+            config,
+            authorization_header,
+        }
+    }
+
+    /// For test environments: use neon_local's LocalEnv to update compute
+    async fn do_notify_local(
+        &self,
+        reconfigure_request: ComputeHookNotifyRequest,
+    ) -> anyhow::Result<()> {
+        let env = match LocalEnv::load_config() {
+            Ok(e) => e,
+            Err(e) => {
+                tracing::warn!("Couldn't load neon_local config, skipping compute update ({e})");
+                return Ok(());
+            }
+        };
+        let cplane =
+            ComputeControlPlane::load(env.clone()).expect("Error loading compute control plane");
+        let ComputeHookNotifyRequest {
+            tenant_id,
+            shards,
+            stripe_size,
+        } = reconfigure_request;
+
+        let compute_pageservers = shards
+            .into_iter()
+            .map(|shard| {
+                let ps_conf = env
+                    .get_pageserver_conf(shard.node_id)
+                    .expect("Unknown pageserver");
+                let (pg_host, pg_port) = parse_host_port(&ps_conf.listen_pg_addr)
+                    .expect("Unable to parse listen_pg_addr");
+                (pg_host, pg_port.unwrap_or(5432))
+            })
+            .collect::<Vec<_>>();
+
+        for (endpoint_name, endpoint) in &cplane.endpoints {
+            if endpoint.tenant_id == tenant_id && endpoint.status() == EndpointStatus::Running {
+                tracing::info!("Reconfiguring endpoint {}", endpoint_name,);
+                endpoint
+                    .reconfigure(compute_pageservers.clone(), stripe_size)
+                    .await?;
+            }
+        }
+
+        Ok(())
+    }
+
+    async fn do_notify_iteration(
+        &self,
+        client: &reqwest::Client,
+        url: &String,
+        reconfigure_request: &ComputeHookNotifyRequest,
+        cancel: &CancellationToken,
+    ) -> Result<(), NotifyError> {
+        let req = client.request(Method::PUT, url);
+        let req = if let Some(value) = &self.authorization_header {
+            req.header(reqwest::header::AUTHORIZATION, value)
+        } else {
+            req
+        };
+
+        tracing::info!(
+            "Sending notify request to {} ({:?})",
+            url,
+            reconfigure_request
+        );
+        let send_result = req.json(&reconfigure_request).send().await;
+        let response = match send_result {
+            Ok(r) => r,
+            Err(e) => return Err(e.into()),
+        };
+
+        // Treat all 2xx responses as success
+        if response.status() >= StatusCode::OK && response.status() < StatusCode::MULTIPLE_CHOICES {
+            if response.status() != StatusCode::OK {
+                // Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
+                // log a warning.
+                tracing::warn!(
+                    "Unexpected 2xx response code {} from control plane",
+                    response.status()
+                );
+            }
+
+            return Ok(());
+        }
+
+        // Error response codes
+        match response.status() {
+            StatusCode::TOO_MANY_REQUESTS => {
+                // TODO: 429 handling should be global: set some state visible to other requests
+                // so that they will delay before starting, rather than all notifications trying
+                // once before backing off.
+                tokio::time::timeout(SLOWDOWN_DELAY, cancel.cancelled())
+                    .await
+                    .ok();
+                Err(NotifyError::SlowDown)
+            }
+            StatusCode::LOCKED => {
+                // Delay our retry if busy: the usual fast exponential backoff in backoff::retry
+                // is not appropriate
+                tokio::time::timeout(BUSY_DELAY, cancel.cancelled())
+                    .await
+                    .ok();
+                Err(NotifyError::Busy)
+            }
+            StatusCode::SERVICE_UNAVAILABLE
+            | StatusCode::GATEWAY_TIMEOUT
+            | StatusCode::BAD_GATEWAY => Err(NotifyError::Unavailable(response.status())),
+            StatusCode::BAD_REQUEST | StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
+                Err(NotifyError::Fatal(response.status()))
+            }
+            _ => Err(NotifyError::Unexpected(response.status())),
+        }
+    }
+
+    async fn do_notify(
+        &self,
+        url: &String,
+        reconfigure_request: ComputeHookNotifyRequest,
+        cancel: &CancellationToken,
+    ) -> Result<(), NotifyError> {
+        let client = reqwest::Client::new();
+        backoff::retry(
+            || self.do_notify_iteration(&client, url, &reconfigure_request, cancel),
+            |e| matches!(e, NotifyError::Fatal(_) | NotifyError::Unexpected(_)),
+            3,
+            10,
+            "Send compute notification",
+            cancel,
+        )
+        .await
+        .ok_or_else(|| NotifyError::ShuttingDown)
+        .and_then(|x| x)
+    }
+
+    /// Call this to notify the compute (postgres) tier of new pageservers to use
+    /// for a tenant.  notify() is called by each shard individually, and this function
+    /// will decide whether an update to the tenant is sent.  An update is sent on the
+    /// condition that:
+    /// - We know a pageserver for every shard.
+    /// - All the shards have the same shard_count (i.e. we are not mid-split)
+    ///
+    /// Cancellation token enables callers to drop out, e.g. if calling from a Reconciler
+    /// that is cancelled.
+    ///
+    /// This function is fallible, including in the case that the control plane is transiently
+    /// unavailable.  A limited number of retries are done internally to efficiently hide short unavailability
+    /// periods, but we don't retry forever.  The **caller** is responsible for handling failures and
+    /// ensuring that they eventually call again to ensure that the compute is eventually notified of
+    /// the proper pageserver nodes for a tenant.
+    #[tracing::instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), node_id))]
+    pub(super) async fn notify(
+        &self,
+        tenant_shard_id: TenantShardId,
+        node_id: NodeId,
+        stripe_size: ShardStripeSize,
+        cancel: &CancellationToken,
+    ) -> Result<(), NotifyError> {
+        let mut locked = self.state.lock().await;
+
+        use std::collections::hash_map::Entry;
+        let tenant = match locked.entry(tenant_shard_id.tenant_id) {
+            Entry::Vacant(e) => e.insert(ComputeHookTenant::new(
+                tenant_shard_id,
+                stripe_size,
+                node_id,
+            )),
+            Entry::Occupied(e) => {
+                let tenant = e.into_mut();
+                tenant.update(tenant_shard_id, stripe_size, node_id);
+                tenant
+            }
+        };
+
+        let reconfigure_request = tenant.maybe_reconfigure(tenant_shard_id.tenant_id);
+        let Some(reconfigure_request) = reconfigure_request else {
+            // The tenant doesn't yet have pageservers for all its shards: we won't notify anything
+            // until it does.
+            tracing::info!("Tenant isn't yet ready to emit a notification");
+            return Ok(());
+        };
+
+        if let Some(notify_url) = &self.config.compute_hook_url {
+            self.do_notify(notify_url, reconfigure_request, cancel)
+                .await
+        } else {
+            self.do_notify_local(reconfigure_request)
+                .await
+                .map_err(|e| {
+                    // This path is for testing only, so munge the error into our prod-style error type.
+                    tracing::error!("Local notification hook failed: {e}");
+                    NotifyError::Fatal(StatusCode::INTERNAL_SERVER_ERROR)
+                })
+        }
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use pageserver_api::shard::{ShardCount, ShardNumber};
+    use utils::id::TenantId;
+
+    use super::*;
+
+    #[test]
+    fn tenant_updates() -> anyhow::Result<()> {
+        let tenant_id = TenantId::generate();
+        let mut tenant_state = ComputeHookTenant::new(
+            TenantShardId {
+                tenant_id,
+                shard_count: ShardCount::new(0),
+                shard_number: ShardNumber(0),
+            },
+            ShardStripeSize(12345),
+            NodeId(1),
+        );
+
+        // An unsharded tenant is always ready to emit a notification
+        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
+        assert_eq!(
+            tenant_state
+                .maybe_reconfigure(tenant_id)
+                .unwrap()
+                .shards
+                .len(),
+            1
+        );
+        assert!(tenant_state
+            .maybe_reconfigure(tenant_id)
+            .unwrap()
+            .stripe_size
+            .is_none());
+
+        // Writing the first shard of a multi-sharded situation (i.e. in a split)
+        // resets the tenant state and puts it in an non-notifying state (need to
+        // see all shards)
+        tenant_state.update(
+            TenantShardId {
+                tenant_id,
+                shard_count: ShardCount::new(2),
+                shard_number: ShardNumber(1),
+            },
+            ShardStripeSize(32768),
+            NodeId(1),
+        );
+        assert!(tenant_state.maybe_reconfigure(tenant_id).is_none());
+
+        // Writing the second shard makes it ready to notify
+        tenant_state.update(
+            TenantShardId {
+                tenant_id,
+                shard_count: ShardCount::new(2),
+                shard_number: ShardNumber(0),
+            },
+            ShardStripeSize(32768),
+            NodeId(1),
+        );
+
+        assert!(tenant_state.maybe_reconfigure(tenant_id).is_some());
+        assert_eq!(
+            tenant_state
+                .maybe_reconfigure(tenant_id)
+                .unwrap()
+                .shards
+                .len(),
+            2
+        );
+        assert_eq!(
+            tenant_state
+                .maybe_reconfigure(tenant_id)
+                .unwrap()
+                .stripe_size,
+            Some(ShardStripeSize(32768))
+        );
+
+        Ok(())
+    }
+}
--- a/control_plane/attachment_service/src/heartbeater.rs
+++ b/control_plane/attachment_service/src/heartbeater.rs
@@ -31,7 +31,6 @@ pub(crate) enum PageserverState {
    Available {
        last_seen_at: Instant,
        utilization: PageserverUtilization,
-        new: bool,
    },
    Offline,
 }
@@ -128,7 +127,6 @@ impl HeartbeaterTask {
            heartbeat_futs.push({
                let jwt_token = self.jwt_token.clone();
                let cancel = self.cancel.clone();
-                let new_node = !self.state.contains_key(node_id);

                // Clone the node and mark it as available such that the request
                // goes through to the pageserver even when the node is marked offline.
@@ -161,7 +159,6 @@ impl HeartbeaterTask {
                        PageserverState::Available {
                            last_seen_at: Instant::now(),
                            utilization,
-                            new: new_node,
                        }
                    } else {
                        PageserverState::Offline
@@ -187,19 +184,6 @@ impl HeartbeaterTask {
                }
            }
        }
-        tracing::info!(
-            "Heartbeat round complete for {} nodes, {} offline",
-            new_state.len(),
-            new_state
-                .values()
-                .filter(|s| match s {
-                    PageserverState::Available { .. } => {
-                        false
-                    }
-                    PageserverState::Offline => true,
-                })
-                .count()
-        );

        let mut deltas = Vec::new();
        let now = Instant::now();
@@ -223,7 +207,6 @@ impl HeartbeaterTask {
                    }
                },
                Vacant(_) => {
-                    // This is a new node. Don't generate a delta for it.
                    deltas.push((node_id, ps_state.clone()));
                }
            }
--- a/control_plane/attachment_service/src/http.rs
+++ b/control_plane/attachment_service/src/http.rs
@@ -4,15 +4,12 @@ use crate::metrics::{
 };
 use crate::reconciler::ReconcileError;
 use crate::service::{Service, STARTUP_RECONCILE_TIMEOUT};
-use anyhow::Context;
 use futures::Future;
 use hyper::header::CONTENT_TYPE;
 use hyper::{Body, Request, Response};
 use hyper::{StatusCode, Uri};
-use metrics::{BuildInfo, NeonMetrics};
-use pageserver_api::controller_api::TenantCreateRequest;
 use pageserver_api::models::{
-    TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
+    TenantConfigRequest, TenantCreateRequest, TenantLocationConfigRequest, TenantShardSplitRequest,
    TenantTimeTravelRequest, TimelineCreateRequest,
 };
 use pageserver_api::shard::TenantShardId;
@@ -37,8 +34,7 @@ use utils::{
 };

 use pageserver_api::controller_api::{
-    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest,
-    TenantShardMigrateRequest,
+    NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantShardMigrateRequest,
 };
 use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest};

@@ -47,19 +43,15 @@ use control_plane::storage_controller::{AttachHookRequest, InspectRequest};
 use routerify::Middleware;

 /// State available to HTTP request handlers
+#[derive(Clone)]
 pub struct HttpState {
    service: Arc<crate::service::Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
-    neon_metrics: NeonMetrics,
    allowlist_routes: Vec<Uri>,
 }

 impl HttpState {
-    pub fn new(
-        service: Arc<crate::service::Service>,
-        auth: Option<Arc<SwappableJwtAuth>>,
-        build_info: BuildInfo,
-    ) -> Self {
+    pub fn new(service: Arc<crate::service::Service>, auth: Option<Arc<SwappableJwtAuth>>) -> Self {
        let allowlist_routes = ["/status", "/ready", "/metrics"]
            .iter()
            .map(|v| v.parse().unwrap())
@@ -67,7 +59,6 @@ impl HttpState {
        Self {
            service,
            auth,
-            neon_metrics: NeonMetrics::new(build_info),
            allowlist_routes,
        }
    }
@@ -143,6 +134,52 @@ async fn handle_tenant_create(
    )
 }

+// For tenant and timeline deletions, which both implement an "initially return 202, then 404 once
+// we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.  This avoids
+// needing to track a "deleting" state for tenants.
+async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
+where
+    R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
+    F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
+{
+    let started_at = Instant::now();
+    // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
+    // completed.
+    let mut retry_period = Duration::from_secs(1);
+    // On subsequent retries, wait longer.
+    let max_retry_period = Duration::from_secs(5);
+    // Enable callers with a 30 second request timeout to reliably get a response
+    let max_wait = Duration::from_secs(25);
+
+    loop {
+        let status = f(service.clone()).await?;
+        match status {
+            StatusCode::ACCEPTED => {
+                tracing::info!("Deletion accepted, waiting to try again...");
+                tokio::time::sleep(retry_period).await;
+                retry_period = max_retry_period;
+            }
+            StatusCode::NOT_FOUND => {
+                tracing::info!("Deletion complete");
+                return json_response(StatusCode::OK, ());
+            }
+            _ => {
+                tracing::warn!("Unexpected status {status}");
+                return json_response(status, ());
+            }
+        }
+
+        let now = Instant::now();
+        if now + retry_period > started_at + max_wait {
+            tracing::info!("Deletion timed out waiting for 404");
+            // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
+            // the pageserver's swagger definition for this endpoint, and has the same desired
+            // effect of causing the control plane to retry later.
+            return json_response(StatusCode::CONFLICT, ());
+        }
+    }
+}
+
 async fn handle_tenant_location_config(
    service: Arc<Service>,
    mut req: Request<Body>,
@@ -214,12 +251,6 @@ async fn handle_tenant_time_travel_remote_storage(
    json_response(StatusCode::OK, ())
 }

-fn map_reqwest_hyper_status(status: reqwest::StatusCode) -> Result<hyper::StatusCode, ApiError> {
-    hyper::StatusCode::from_u16(status.as_u16())
-        .context("invalid status code")
-        .map_err(ApiError::InternalServerError)
-}
-
 async fn handle_tenant_secondary_download(
    service: Arc<Service>,
    req: Request<Body>,
@@ -228,7 +259,7 @@ async fn handle_tenant_secondary_download(
    let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis);

    let (status, progress) = service.tenant_secondary_download(tenant_id, wait).await?;
-    json_response(map_reqwest_hyper_status(status)?, progress)
+    json_response(status, progress)
 }

 async fn handle_tenant_delete(
@@ -238,17 +269,10 @@ async fn handle_tenant_delete(
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;

-    let status_code = service
-        .tenant_delete(tenant_id)
-        .await
-        .and_then(map_reqwest_hyper_status)?;
-
-    if status_code == StatusCode::NOT_FOUND {
-        // The pageserver uses 404 for successful deletion, but we use 200
-        json_response(StatusCode::OK, ())
-    } else {
-        json_response(status_code, ())
-    }
+    deletion_wrapper(service, move |service| async move {
+        service.tenant_delete(tenant_id).await
+    })
+    .await
 }

 async fn handle_tenant_timeline_create(
@@ -276,76 +300,12 @@ async fn handle_tenant_timeline_delete(

    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;

-    // For timeline deletions, which both implement an "initially return 202, then 404 once
-    // we're done" semantic, we wrap with a retry loop to expose a simpler API upstream.
-    async fn deletion_wrapper<R, F>(service: Arc<Service>, f: F) -> Result<Response<Body>, ApiError>
-    where
-        R: std::future::Future<Output = Result<StatusCode, ApiError>> + Send + 'static,
-        F: Fn(Arc<Service>) -> R + Send + Sync + 'static,
-    {
-        let started_at = Instant::now();
-        // To keep deletion reasonably snappy for small tenants, initially check after 1 second if deletion
-        // completed.
-        let mut retry_period = Duration::from_secs(1);
-        // On subsequent retries, wait longer.
-        let max_retry_period = Duration::from_secs(5);
-        // Enable callers with a 30 second request timeout to reliably get a response
-        let max_wait = Duration::from_secs(25);
-
-        loop {
-            let status = f(service.clone()).await?;
-            match status {
-                StatusCode::ACCEPTED => {
-                    tracing::info!("Deletion accepted, waiting to try again...");
-                    tokio::time::sleep(retry_period).await;
-                    retry_period = max_retry_period;
-                }
-                StatusCode::NOT_FOUND => {
-                    tracing::info!("Deletion complete");
-                    return json_response(StatusCode::OK, ());
-                }
-                _ => {
-                    tracing::warn!("Unexpected status {status}");
-                    return json_response(status, ());
-                }
-            }
-
-            let now = Instant::now();
-            if now + retry_period > started_at + max_wait {
-                tracing::info!("Deletion timed out waiting for 404");
-                // REQUEST_TIMEOUT would be more appropriate, but CONFLICT is already part of
-                // the pageserver's swagger definition for this endpoint, and has the same desired
-                // effect of causing the control plane to retry later.
-                return json_response(StatusCode::CONFLICT, ());
-            }
-        }
-    }
-
    deletion_wrapper(service, move |service| async move {
-        service
-            .tenant_timeline_delete(tenant_id, timeline_id)
-            .await
-            .and_then(map_reqwest_hyper_status)
+        service.tenant_timeline_delete(tenant_id, timeline_id).await
    })
    .await
 }

-async fn handle_tenant_timeline_detach_ancestor(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    check_permissions(&req, Scope::PageServerApi)?;
-
-    let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?;
-
-    let res = service
-        .tenant_timeline_detach_ancestor(tenant_id, timeline_id)
-        .await?;
-
-    json_response(StatusCode::OK, res)
-}
-
 async fn handle_tenant_timeline_passthrough(
    service: Arc<Service>,
    req: Request<Body>,
@@ -404,9 +364,11 @@ async fn handle_tenant_timeline_passthrough(
    }

    // We have a reqest::Response, would like a http::Response
-    let mut builder = hyper::Response::builder().status(map_reqwest_hyper_status(resp.status())?);
+    let mut builder = hyper::Response::builder()
+        .status(resp.status())
+        .version(resp.version());
    for (k, v) in resp.headers() {
-        builder = builder.header(k.as_str(), v.as_bytes());
+        builder = builder.header(k, v);
    }

    let response = builder
@@ -430,21 +392,12 @@ async fn handle_tenant_describe(
    service: Arc<Service>,
    req: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Scrubber)?;
+    check_permissions(&req, Scope::Admin)?;

    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    json_response(StatusCode::OK, service.tenant_describe(tenant_id)?)
 }

-async fn handle_tenant_list(
-    service: Arc<Service>,
-    req: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    json_response(StatusCode::OK, service.tenant_list())
-}
-
 async fn handle_node_register(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

@@ -458,10 +411,7 @@ async fn handle_node_list(req: Request<Body>) -> Result<Response<Body>, ApiError
    check_permissions(&req, Scope::Admin)?;

    let state = get_state(&req);
-    let nodes = state.service.node_list().await?;
-    let api_nodes = nodes.into_iter().map(|n| n.describe()).collect::<Vec<_>>();
-
-    json_response(StatusCode::OK, api_nodes)
+    json_response(StatusCode::OK, state.service.node_list().await?)
 }

 async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -472,14 +422,6 @@ async fn handle_node_drop(req: Request<Body>) -> Result<Response<Body>, ApiError
    json_response(StatusCode::OK, state.service.node_drop(node_id).await?)
 }

-async fn handle_node_delete(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-    json_response(StatusCode::OK, state.service.node_delete(node_id).await?)
-}
-
 async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

@@ -505,61 +447,6 @@ async fn handle_node_configure(mut req: Request<Body>) -> Result<Response<Body>,
    )
 }

-async fn handle_node_status(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-
-    let node_status = state.service.get_node(node_id).await?;
-
-    json_response(StatusCode::OK, node_status)
-}
-
-async fn handle_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-
-    state.service.start_node_drain(node_id).await?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn handle_cancel_node_drain(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-
-    state.service.cancel_node_drain(node_id).await?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn handle_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-
-    state.service.start_node_fill(node_id).await?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
-async fn handle_cancel_node_fill(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-    let node_id: NodeId = parse_request_param(&req, "node_id")?;
-
-    state.service.cancel_node_fill(node_id).await?;
-
-    json_response(StatusCode::ACCEPTED, ())
-}
-
 async fn handle_tenant_shard_split(
    service: Arc<Service>,
    mut req: Request<Body>,
@@ -591,22 +478,6 @@ async fn handle_tenant_shard_migrate(
    )
 }

-async fn handle_tenant_update_policy(mut req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    let update_req = json_request::<TenantPolicyRequest>(&mut req).await?;
-    let state = get_state(&req);
-
-    json_response(
-        StatusCode::OK,
-        state
-            .service
-            .tenant_update_policy(tenant_id, update_req)
-            .await?,
-    )
-}
-
 async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
    check_permissions(&req, Scope::PageServerApi)?;
@@ -616,18 +487,6 @@ async fn handle_tenant_drop(req: Request<Body>) -> Result<Response<Body>, ApiErr
    json_response(StatusCode::OK, state.service.tenant_drop(tenant_id).await?)
 }

-async fn handle_tenant_import(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?;
-    check_permissions(&req, Scope::PageServerApi)?;
-
-    let state = get_state(&req);
-
-    json_response(
-        StatusCode::OK,
-        state.service.tenant_import(tenant_id).await?,
-    )
-}
-
 async fn handle_tenants_dump(req: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permissions(&req, Scope::Admin)?;

@@ -650,14 +509,6 @@ async fn handle_consistency_check(req: Request<Body>) -> Result<Response<Body>,
    json_response(StatusCode::OK, state.service.consistency_check().await?)
 }

-async fn handle_reconcile_all(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permissions(&req, Scope::Admin)?;
-
-    let state = get_state(&req);
-
-    json_response(StatusCode::OK, state.service.reconcile_all_now().await?)
-}
-
 /// Status endpoint is just used for checking that our HTTP listener is up
 async fn handle_status(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(StatusCode::OK, ())
@@ -714,17 +565,9 @@ where
    .await
 }

-/// Check if the required scope is held in the request's token, or if the request has
-/// a token with 'admin' scope then always permit it.
 fn check_permissions(request: &Request<Body>, required_scope: Scope) -> Result<(), ApiError> {
    check_permission_with(request, |claims| {
-        match crate::auth::check_permission(claims, required_scope) {
-            Err(e) => match crate::auth::check_permission(claims, Scope::Admin) {
-                Ok(()) => Ok(()),
-                Err(_) => Err(e),
-            },
-            Ok(()) => Ok(()),
-        }
+        crate::auth::check_permission(claims, required_scope)
    })
 }

@@ -784,11 +627,10 @@ fn epilogue_metrics_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>
    })
 }

-pub async fn measured_metrics_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
+pub async fn measured_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    pub const TEXT_FORMAT: &str = "text/plain; version=0.0.4";

-    let state = get_state(&req);
-    let payload = crate::metrics::METRICS_REGISTRY.encode(&state.neon_metrics);
+    let payload = crate::metrics::METRICS_REGISTRY.encode();
    let response = Response::builder()
        .status(200)
        .header(CONTENT_TYPE, TEXT_FORMAT)
@@ -817,7 +659,6 @@ where
 pub fn make_router(
    service: Arc<Service>,
    auth: Option<Arc<SwappableJwtAuth>>,
-    build_info: BuildInfo,
 ) -> RouterBuilder<hyper::Body, ApiError> {
    let mut router = endpoint::make_router()
        .middleware(prologue_metrics_middleware())
@@ -834,7 +675,7 @@ pub fn make_router(
    }

    router
-        .data(Arc::new(HttpState::new(service, auth, build_info)))
+        .data(Arc::new(HttpState::new(service, auth)))
        .get("/metrics", |r| {
            named_request_span(r, measured_metrics_handler, RequestName("metrics"))
        })
@@ -865,13 +706,6 @@ pub fn make_router(
        .post("/debug/v1/node/:node_id/drop", |r| {
            named_request_span(r, handle_node_drop, RequestName("debug_v1_node_drop"))
        })
-        .post("/debug/v1/tenant/:tenant_id/import", |r| {
-            named_request_span(
-                r,
-                handle_tenant_import,
-                RequestName("debug_v1_tenant_import"),
-            )
-        })
        .get("/debug/v1/tenant", |r| {
            named_request_span(r, handle_tenants_dump, RequestName("debug_v1_tenant"))
        })
@@ -892,9 +726,6 @@ pub fn make_router(
                RequestName("debug_v1_consistency_check"),
            )
        })
-        .post("/debug/v1/reconcile_all", |r| {
-            request_span(r, handle_reconcile_all)
-        })
        .put("/debug/v1/failpoints", |r| {
            request_span(r, |r| failpoints_handler(r, CancellationToken::new()))
        })
@@ -902,9 +733,6 @@ pub fn make_router(
        .post("/control/v1/node", |r| {
            named_request_span(r, handle_node_register, RequestName("control_v1_node"))
        })
-        .delete("/control/v1/node/:node_id", |r| {
-            named_request_span(r, handle_node_delete, RequestName("control_v1_node_delete"))
-        })
        .get("/control/v1/node", |r| {
            named_request_span(r, handle_node_list, RequestName("control_v1_node"))
        })
@@ -915,30 +743,6 @@ pub fn make_router(
                RequestName("control_v1_node_config"),
            )
        })
-        .get("/control/v1/node/:node_id", |r| {
-            named_request_span(r, handle_node_status, RequestName("control_v1_node_status"))
-        })
-        .put("/control/v1/node/:node_id/drain", |r| {
-            named_request_span(r, handle_node_drain, RequestName("control_v1_node_drain"))
-        })
-        .delete("/control/v1/node/:node_id/drain", |r| {
-            named_request_span(
-                r,
-                handle_cancel_node_drain,
-                RequestName("control_v1_cancel_node_drain"),
-            )
-        })
-        .put("/control/v1/node/:node_id/fill", |r| {
-            named_request_span(r, handle_node_fill, RequestName("control_v1_node_fill"))
-        })
-        .delete("/control/v1/node/:node_id/fill", |r| {
-            named_request_span(
-                r,
-                handle_cancel_node_fill,
-                RequestName("control_v1_cancel_node_fill"),
-            )
-        })
-        // TODO(vlad): endpoint for cancelling drain and fill
        // Tenant Shard operations
        .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| {
            tenant_service_handler(
@@ -961,16 +765,6 @@ pub fn make_router(
                RequestName("control_v1_tenant_describe"),
            )
        })
-        .get("/control/v1/tenant", |r| {
-            tenant_service_handler(r, handle_tenant_list, RequestName("control_v1_tenant_list"))
-        })
-        .put("/control/v1/tenant/:tenant_id/policy", |r| {
-            named_request_span(
-                r,
-                handle_tenant_update_policy,
-                RequestName("control_v1_tenant_policy"),
-            )
-        })
        // Tenant operations
        // The ^/v1/ endpoints act as a "Virtual Pageserver", enabling shard-naive clients to call into
        // this service to manage tenants that actually consist of many tenant shards, as if they are a single entity.
@@ -1022,17 +816,7 @@ pub fn make_router(
                RequestName("v1_tenant_timeline"),
            )
        })
-        .put(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/detach_ancestor",
-            |r| {
-                tenant_service_handler(
-                    r,
-                    handle_tenant_timeline_detach_ancestor,
-                    RequestName("v1_tenant_timeline_detach_ancestor"),
-                )
-            },
-        )
-        // Tenant detail GET passthrough to shard zero:
+        // Tenant detail GET passthrough to shard zero
        .get("/v1/tenant/:tenant_id", |r| {
            tenant_service_handler(
                r,
@@ -1040,14 +824,13 @@ pub fn make_router(
                RequestName("v1_tenant_passthrough"),
            )
        })
-        // The `*` in the  URL is a wildcard: any tenant/timeline GET APIs on the pageserver
-        // are implicitly exposed here.  This must be last in the list to avoid
-        // taking precedence over other GET methods we might implement by hand.
-        .get("/v1/tenant/:tenant_id/*", |r| {
+        // Timeline GET passthrough to shard zero.  Note that the `*` in the URL is a wildcard: any future
+        // timeline GET APIs will be implicitly included.
+        .get("/v1/tenant/:tenant_id/timeline*", |r| {
            tenant_service_handler(
                r,
                handle_tenant_timeline_passthrough,
-                RequestName("v1_tenant_passthrough"),
+                RequestName("v1_tenant_timeline_passthrough"),
            )
        })
 }
--- a/control_plane/attachment_service/src/id_lock_map.rs
+++ b/control_plane/attachment_service/src/id_lock_map.rs
@@ -0,0 +1,54 @@
+use std::{collections::HashMap, sync::Arc};
+
+/// A map of locks covering some arbitrary identifiers. Useful if you have a collection of objects but don't
+/// want to embed a lock in each one, or if your locking granularity is different to your object granularity.
+/// For example, used in the storage controller where the objects are tenant shards, but sometimes locking
+/// is needed at a tenant-wide granularity.
+pub(crate) struct IdLockMap<T>
+where
+    T: Eq + PartialEq + std::hash::Hash,
+{
+    /// A synchronous lock for getting/setting the async locks that our callers will wait on.
+    entities: std::sync::Mutex<std::collections::HashMap<T, Arc<tokio::sync::RwLock<()>>>>,
+}
+
+impl<T> IdLockMap<T>
+where
+    T: Eq + PartialEq + std::hash::Hash,
+{
+    pub(crate) fn shared(
+        &self,
+        key: T,
+    ) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockReadGuard<()>> {
+        let mut locked = self.entities.lock().unwrap();
+        let entry = locked.entry(key).or_default();
+        entry.clone().read_owned()
+    }
+
+    pub(crate) fn exclusive(
+        &self,
+        key: T,
+    ) -> impl std::future::Future<Output = tokio::sync::OwnedRwLockWriteGuard<()>> {
+        let mut locked = self.entities.lock().unwrap();
+        let entry = locked.entry(key).or_default();
+        entry.clone().write_owned()
+    }
+
+    /// Rather than building a lock guard that re-takes the [`Self::entities`] lock, we just do
+    /// periodic housekeeping to avoid the map growing indefinitely
+    pub(crate) fn housekeeping(&self) {
+        let mut locked = self.entities.lock().unwrap();
+        locked.retain(|_k, lock| lock.try_write().is_err())
+    }
+}
+
+impl<T> Default for IdLockMap<T>
+where
+    T: Eq + PartialEq + std::hash::Hash,
+{
+    fn default() -> Self {
+        Self {
+            entities: std::sync::Mutex::new(HashMap::new()),
+        }
+    }
+}
--- a/control_plane/attachment_service/src/lib.rs
+++ b/control_plane/attachment_service/src/lib.rs
@@ -2,7 +2,6 @@ use serde::Serialize;
 use utils::seqwait::MonotonicCounter;

 mod auth;
-mod background_node_operations;
 mod compute_hook;
 mod heartbeater;
 pub mod http;
@@ -15,7 +14,7 @@ mod reconciler;
 mod scheduler;
 mod schema;
 pub mod service;
-mod tenant_shard;
+mod tenant_state;

 #[derive(Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Serialize)]
 struct Sequence(u64);
--- a/control_plane/attachment_service/src/main.rs
+++ b/control_plane/attachment_service/src/main.rs
@@ -1,23 +1,18 @@
 use anyhow::{anyhow, Context};
+use attachment_service::http::make_router;
+use attachment_service::metrics::preinitialize_metrics;
+use attachment_service::persistence::Persistence;
+use attachment_service::service::{Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT};
+use camino::Utf8PathBuf;
 use clap::Parser;
 use diesel::Connection;
 use metrics::launch_timestamp::LaunchTimestamp;
-use metrics::BuildInfo;
-use std::path::PathBuf;
 use std::sync::Arc;
-use std::time::Duration;
-use storage_controller::http::make_router;
-use storage_controller::metrics::preinitialize_metrics;
-use storage_controller::persistence::Persistence;
-use storage_controller::service::{
-    Config, Service, MAX_UNAVAILABLE_INTERVAL_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT,
-};
 use tokio::signal::unix::SignalKind;
 use tokio_util::sync::CancellationToken;
 use utils::auth::{JwtAuth, SwappableJwtAuth};
 use utils::logging::{self, LogFormat};

-use utils::sentry_init::init_sentry;
 use utils::{project_build_tag, project_git_version, tcp_listener};

 project_git_version!(GIT_VERSION);
@@ -51,7 +46,11 @@ struct Cli {
    #[arg(long)]
    compute_hook_url: Option<String>,

-    /// URL to connect to postgres, like postgresql://localhost:1234/storage_controller
+    /// Path to the .json file to store state (will be created if it doesn't exist)
+    #[arg(short, long)]
+    path: Option<Utf8PathBuf>,
+
+    /// URL to connect to postgres, like postgresql://localhost:1234/attachment_service
    #[arg(long)]
    database_url: Option<String>,

@@ -62,24 +61,6 @@ struct Cli {
    /// Grace period before marking unresponsive pageserver offline
    #[arg(long)]
    max_unavailable_interval: Option<humantime::Duration>,
-
-    /// Size threshold for automatically splitting shards (disabled by default)
-    #[arg(long)]
-    split_threshold: Option<u64>,
-
-    /// Maximum number of reconcilers that may run in parallel
-    #[arg(long)]
-    reconciler_concurrency: Option<usize>,
-
-    /// How long to wait for the initial database connection to be available.
-    #[arg(long, default_value = "5s")]
-    db_connect_timeout: humantime::Duration,
-
-    /// `neon_local` sets this to the path of the neon_local repo dir.
-    /// Only relevant for testing.
-    // TODO: make `cfg(feature = "testing")`
-    #[arg(long)]
-    neon_local_repo_dir: Option<PathBuf>,
 }

 enum StrictMode {
@@ -177,8 +158,6 @@ fn main() -> anyhow::Result<()> {
        std::process::exit(1);
    }));

-    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
-
    tokio::runtime::Builder::new_current_thread()
        // We use spawn_blocking for database operations, so require approximately
        // as many blocking threads as we will open database connections.
@@ -202,18 +181,14 @@ async fn async_main() -> anyhow::Result<()> {

    let args = Cli::parse();
    tracing::info!(
-        "version: {}, launch_timestamp: {}, build_tag {}, listening on {}",
+        "version: {}, launch_timestamp: {}, build_tag {}, state at {}, listening on {}",
        GIT_VERSION,
        launch_ts.to_string(),
        BUILD_TAG,
+        args.path.as_ref().unwrap_or(&Utf8PathBuf::from("<none>")),
        args.listen
    );

-    let build_info = BuildInfo {
-        revision: GIT_VERSION,
-        build_tag: BUILD_TAG,
-    };
-
    let strict_mode = if args.dev {
        StrictMode::Dev
    } else {
@@ -258,21 +233,15 @@ async fn async_main() -> anyhow::Result<()> {
            .max_unavailable_interval
            .map(humantime::Duration::into)
            .unwrap_or(MAX_UNAVAILABLE_INTERVAL_DEFAULT),
-        reconciler_concurrency: args
-            .reconciler_concurrency
-            .unwrap_or(RECONCILER_CONCURRENCY_DEFAULT),
-        split_threshold: args.split_threshold,
-        neon_local_repo_dir: args.neon_local_repo_dir,
    };

    // After loading secrets & config, but before starting anything else, apply database migrations
-    Persistence::await_connection(&secrets.database_url, args.db_connect_timeout.into()).await?;
-
    migration_run(&secrets.database_url)
        .await
        .context("Running database migrations")?;

-    let persistence = Arc::new(Persistence::new(secrets.database_url));
+    let json_path = args.path;
+    let persistence = Arc::new(Persistence::new(secrets.database_url, json_path.clone()));

    let service = Service::spawn(config, persistence.clone()).await?;

@@ -281,7 +250,7 @@ async fn async_main() -> anyhow::Result<()> {
    let auth = secrets
        .public_key
        .map(|jwt_auth| Arc::new(SwappableJwtAuth::new(jwt_auth)));
-    let router = make_router(service.clone(), auth, build_info)
+    let router = make_router(service.clone(), auth)
        .build()
        .map_err(|err| anyhow!(err))?;
    let router_service = utils::http::RouterService::new(router).unwrap();
@@ -310,23 +279,22 @@ async fn async_main() -> anyhow::Result<()> {
    }
    tracing::info!("Terminating on signal");

-    // Stop HTTP server first, so that we don't have to service requests
-    // while shutting down Service.
-    server_shutdown.cancel();
-    match tokio::time::timeout(Duration::from_secs(5), server_task).await {
-        Ok(Ok(_)) => {
-            tracing::info!("Joined HTTP server task");
-        }
-        Ok(Err(e)) => {
-            tracing::error!("Error joining HTTP server task: {e}")
-        }
-        Err(_) => {
-            tracing::warn!("Timed out joining HTTP server task");
-            // We will fall through and shut down the service anyway, any request handlers
-            // in flight will experience cancellation & their clients will see a torn connection.
+    if json_path.is_some() {
+        // Write out a JSON dump on shutdown: this is used in compat tests to avoid passing
+        // full postgres dumps around.
+        if let Err(e) = persistence.write_tenants_json().await {
+            tracing::error!("Failed to write JSON on shutdown: {e}")
        }
    }

+    // Stop HTTP server first, so that we don't have to service requests
+    // while shutting down Service
+    server_shutdown.cancel();
+    if let Err(e) = server_task.await {
+        tracing::error!("Error joining HTTP server task: {e}")
+    }
+    tracing::info!("Joined HTTP server task");
+
    service.shutdown().await;
    tracing::info!("Service shutdown complete");

--- a/control_plane/attachment_service/src/metrics.rs
+++ b/control_plane/attachment_service/src/metrics.rs
@@ -8,8 +8,10 @@
 //! The rest of the code defines label group types and deals with converting outer types to labels.
 //!
 use bytes::Bytes;
-use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup};
-use metrics::NeonMetrics;
+use measured::{
+    label::{LabelValue, StaticLabelSet},
+    FixedCardinalityLabel, MetricGroup,
+};
 use once_cell::sync::Lazy;
 use std::sync::Mutex;

@@ -24,28 +26,21 @@ pub fn preinitialize_metrics() {

 pub(crate) struct StorageControllerMetrics {
    pub(crate) metrics_group: StorageControllerMetricGroup,
-    encoder: Mutex<measured::text::BufferedTextEncoder>,
+    encoder: Mutex<measured::text::TextEncoder>,
 }

 #[derive(measured::MetricGroup)]
-#[metric(new())]
 pub(crate) struct StorageControllerMetricGroup {
    /// Count of how many times we spawn a reconcile task
    pub(crate) storage_controller_reconcile_spawn: measured::Counter,
-
    /// Reconciler tasks completed, broken down by success/failure/cancelled
    pub(crate) storage_controller_reconcile_complete:
        measured::CounterVec<ReconcileCompleteLabelGroupSet>,

-    /// Count of how many times we make an optimization change to a tenant's scheduling
-    pub(crate) storage_controller_schedule_optimization: measured::Counter,
-
    /// HTTP request status counters for handled requests
    pub(crate) storage_controller_http_request_status:
        measured::CounterVec<HttpRequestStatusLabelGroupSet>,
-
    /// HTTP request handler latency across all status codes
-    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_http_request_latency:
        measured::HistogramVec<HttpRequestLatencyLabelGroupSet, 5>,

@@ -57,7 +52,6 @@ pub(crate) struct StorageControllerMetricGroup {
    /// Latency of HTTP requests to the pageserver, broken down by pageserver
    /// node id, request name and method. This include both successful and unsuccessful
    /// requests.
-    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_pageserver_request_latency:
        measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,

@@ -69,7 +63,6 @@ pub(crate) struct StorageControllerMetricGroup {
    /// Latency of pass-through HTTP requests to the pageserver, broken down by pageserver
    /// node id, request name and method. This include both successful and unsuccessful
    /// requests.
-    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_passthrough_request_latency:
        measured::HistogramVec<PageserverRequestLabelGroupSet, 5>,

@@ -78,34 +71,75 @@ pub(crate) struct StorageControllerMetricGroup {
        measured::CounterVec<DatabaseQueryErrorLabelGroupSet>,

    /// Latency of database queries, broken down by operation.
-    #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 2.0))]
    pub(crate) storage_controller_database_query_latency:
        measured::HistogramVec<DatabaseQueryLatencyLabelGroupSet, 5>,
 }

 impl StorageControllerMetrics {
-    pub(crate) fn encode(&self, neon_metrics: &NeonMetrics) -> Bytes {
+    pub(crate) fn encode(&self) -> Bytes {
        let mut encoder = self.encoder.lock().unwrap();
-        neon_metrics
-            .collect_group_into(&mut *encoder)
-            .unwrap_or_else(|infallible| match infallible {});
-        self.metrics_group
-            .collect_group_into(&mut *encoder)
-            .unwrap_or_else(|infallible| match infallible {});
+        self.metrics_group.collect_into(&mut *encoder);
        encoder.finish()
    }
 }

 impl Default for StorageControllerMetrics {
    fn default() -> Self {
-        let mut metrics_group = StorageControllerMetricGroup::new();
-        metrics_group
-            .storage_controller_reconcile_complete
-            .init_all_dense();
-
        Self {
-            metrics_group,
-            encoder: Mutex::new(measured::text::BufferedTextEncoder::new()),
+            metrics_group: StorageControllerMetricGroup::new(),
+            encoder: Mutex::new(measured::text::TextEncoder::new()),
+        }
+    }
+}
+
+impl StorageControllerMetricGroup {
+    pub(crate) fn new() -> Self {
+        Self {
+            storage_controller_reconcile_spawn: measured::Counter::new(),
+            storage_controller_reconcile_complete: measured::CounterVec::new(
+                ReconcileCompleteLabelGroupSet {
+                    status: StaticLabelSet::new(),
+                },
+            ),
+            storage_controller_http_request_status: measured::CounterVec::new(
+                HttpRequestStatusLabelGroupSet {
+                    path: lasso::ThreadedRodeo::new(),
+                    method: StaticLabelSet::new(),
+                    status: StaticLabelSet::new(),
+                },
+            ),
+            storage_controller_http_request_latency: measured::HistogramVec::new(
+                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
+            ),
+            storage_controller_pageserver_request_error: measured::CounterVec::new(
+                PageserverRequestLabelGroupSet {
+                    pageserver_id: lasso::ThreadedRodeo::new(),
+                    path: lasso::ThreadedRodeo::new(),
+                    method: StaticLabelSet::new(),
+                },
+            ),
+            storage_controller_pageserver_request_latency: measured::HistogramVec::new(
+                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
+            ),
+            storage_controller_passthrough_request_error: measured::CounterVec::new(
+                PageserverRequestLabelGroupSet {
+                    pageserver_id: lasso::ThreadedRodeo::new(),
+                    path: lasso::ThreadedRodeo::new(),
+                    method: StaticLabelSet::new(),
+                },
+            ),
+            storage_controller_passthrough_request_latency: measured::HistogramVec::new(
+                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
+            ),
+            storage_controller_database_query_error: measured::CounterVec::new(
+                DatabaseQueryErrorLabelGroupSet {
+                    operation: StaticLabelSet::new(),
+                    error_type: StaticLabelSet::new(),
+                },
+            ),
+            storage_controller_database_query_latency: measured::HistogramVec::new(
+                measured::metric::histogram::Thresholds::exponential_buckets(0.1, 2.0),
+            ),
        }
    }
 }
@@ -119,7 +153,7 @@ pub(crate) struct ReconcileCompleteLabelGroup {
 #[derive(measured::LabelGroup)]
 #[label(set = HttpRequestStatusLabelGroupSet)]
 pub(crate) struct HttpRequestStatusLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
    pub(crate) status: StatusCode,
@@ -128,21 +162,40 @@ pub(crate) struct HttpRequestStatusLabelGroup<'a> {
 #[derive(measured::LabelGroup)]
 #[label(set = HttpRequestLatencyLabelGroupSet)]
 pub(crate) struct HttpRequestLatencyLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
 }

+impl Default for HttpRequestLatencyLabelGroupSet {
+    fn default() -> Self {
+        Self {
+            path: lasso::ThreadedRodeo::new(),
+            method: StaticLabelSet::new(),
+        }
+    }
+}
+
 #[derive(measured::LabelGroup, Clone)]
 #[label(set = PageserverRequestLabelGroupSet)]
 pub(crate) struct PageserverRequestLabelGroup<'a> {
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) pageserver_id: &'a str,
-    #[label(dynamic_with = lasso::ThreadedRodeo, default)]
+    #[label(dynamic_with = lasso::ThreadedRodeo)]
    pub(crate) path: &'a str,
    pub(crate) method: Method,
 }

+impl Default for PageserverRequestLabelGroupSet {
+    fn default() -> Self {
+        Self {
+            pageserver_id: lasso::ThreadedRodeo::new(),
+            path: lasso::ThreadedRodeo::new(),
+            method: StaticLabelSet::new(),
+        }
+    }
+}
+
 #[derive(measured::LabelGroup)]
 #[label(set = DatabaseQueryErrorLabelGroupSet)]
 pub(crate) struct DatabaseQueryErrorLabelGroup {
@@ -156,7 +209,7 @@ pub(crate) struct DatabaseQueryLatencyLabelGroup {
    pub(crate) operation: DatabaseOperation,
 }

-#[derive(FixedCardinalityLabel, Clone, Copy)]
+#[derive(FixedCardinalityLabel)]
 pub(crate) enum ReconcileOutcome {
    #[label(rename = "ok")]
    Success,
@@ -164,7 +217,7 @@ pub(crate) enum ReconcileOutcome {
    Cancel,
 }

-#[derive(FixedCardinalityLabel, Copy, Clone)]
+#[derive(FixedCardinalityLabel, Clone)]
 pub(crate) enum Method {
    Get,
    Put,
@@ -189,12 +242,11 @@ impl From<hyper::Method> for Method {
    }
 }

-#[derive(Clone, Copy)]
 pub(crate) struct StatusCode(pub(crate) hyper::http::StatusCode);

 impl LabelValue for StatusCode {
    fn visit<V: measured::label::LabelVisitor>(&self, v: V) -> V::Output {
-        v.write_int(self.0.as_u16() as i64)
+        v.write_int(self.0.as_u16() as u64)
    }
 }

@@ -212,7 +264,7 @@ impl FixedCardinalityLabel for StatusCode {
    }
 }

-#[derive(FixedCardinalityLabel, Clone, Copy)]
+#[derive(FixedCardinalityLabel)]
 pub(crate) enum DatabaseErrorLabel {
    Query,
    Connection,
--- a/control_plane/attachment_service/src/node.rs
+++ b/control_plane/attachment_service/src/node.rs
@@ -1,14 +1,13 @@
 use std::{str::FromStr, time::Duration};

+use hyper::StatusCode;
 use pageserver_api::{
    controller_api::{
-        NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy,
-        TenantLocateResponseShard, UtilizationScore,
+        NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, TenantLocateResponseShard,
    },
    shard::TenantShardId,
 };
 use pageserver_client::mgmt_api;
-use reqwest::StatusCode;
 use serde::Serialize;
 use tokio_util::sync::CancellationToken;
 use utils::{backoff, id::NodeId};
@@ -59,10 +58,6 @@ impl Node {
        self.id
    }

-    pub(crate) fn get_scheduling(&self) -> NodeSchedulingPolicy {
-        self.scheduling
-    }
-
    pub(crate) fn set_scheduling(&mut self, scheduling: NodeSchedulingPolicy) {
        self.scheduling = scheduling
    }
@@ -120,16 +115,6 @@ impl Node {
        match (self.availability, availability) {
            (Offline, Active(_)) => ToActive,
            (Active(_), Offline) => ToOffline,
-            // Consider the case when the storage controller handles the re-attach of a node
-            // before the heartbeats detect that the node is back online. We still need
-            // [`Service::node_configure`] to attempt reconciliations for shards with an
-            // unknown observed location.
-            // The unsavoury match arm below handles this situation.
-            (Active(lhs), Active(rhs))
-                if lhs == UtilizationScore::worst() && rhs < UtilizationScore::worst() =>
-            {
-                ToActive
-            }
            _ => Unchanged,
        }
    }
@@ -155,7 +140,6 @@ impl Node {
            NodeSchedulingPolicy::Draining => MaySchedule::No,
            NodeSchedulingPolicy::Filling => MaySchedule::Yes(score),
            NodeSchedulingPolicy::Pause => MaySchedule::No,
-            NodeSchedulingPolicy::PauseForRestart => MaySchedule::No,
        }
    }

@@ -172,7 +156,7 @@ impl Node {
            listen_http_port,
            listen_pg_addr,
            listen_pg_port,
-            scheduling: NodeSchedulingPolicy::Active,
+            scheduling: NodeSchedulingPolicy::Filling,
            availability: NodeAvailability::Offline,
            cancel: CancellationToken::new(),
        }
@@ -226,7 +210,7 @@ impl Node {
        fn is_fatal(e: &mgmt_api::Error) -> bool {
            use mgmt_api::Error::*;
            match e {
-                SendRequest(_) | ReceiveBody(_) | ReceiveErrorBody(_) => false,
+                ReceiveBody(_) | ReceiveErrorBody(_) => false,
                ApiError(StatusCode::SERVICE_UNAVAILABLE, _)
                | ApiError(StatusCode::GATEWAY_TIMEOUT, _)
                | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false,
@@ -272,19 +256,6 @@ impl Node {
        )
        .await
    }
-
-    /// Generate the simplified API-friendly description of a node's state
-    pub(crate) fn describe(&self) -> NodeDescribeResponse {
-        NodeDescribeResponse {
-            id: self.id,
-            availability: self.availability.into(),
-            scheduling: self.scheduling,
-            listen_http_addr: self.listen_http_addr.clone(),
-            listen_http_port: self.listen_http_port,
-            listen_pg_addr: self.listen_pg_addr.clone(),
-            listen_pg_port: self.listen_pg_port,
-        }
-    }
 }

 impl std::fmt::Display for Node {
--- a/control_plane/attachment_service/src/pageserver_client.rs
+++ b/control_plane/attachment_service/src/pageserver_client.rs
@@ -1,15 +1,13 @@
 use pageserver_api::{
    models::{
-        detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse,
-        PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse,
+        LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress,
        TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
-        TopTenantShardsRequest, TopTenantShardsResponse,
    },
    shard::TenantShardId,
 };
 use pageserver_client::mgmt_api::{Client, Result};
 use reqwest::StatusCode;
-use utils::id::{NodeId, TenantId, TimelineId};
+use utils::id::{NodeId, TimelineId};

 /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage
 /// controller to collect metrics in a non-intrusive manner.
@@ -90,18 +88,6 @@ impl PageserverClient {
        )
    }

-    pub(crate) async fn tenant_scan_remote_storage(
-        &self,
-        tenant_id: TenantId,
-    ) -> Result<TenantScanRemoteStorageResponse> {
-        measured_request!(
-            "tenant_scan_remote_storage",
-            crate::metrics::Method::Get,
-            &self.node_id_label,
-            self.inner.tenant_scan_remote_storage(tenant_id).await
-        )
-    }
-
    pub(crate) async fn tenant_secondary_download(
        &self,
        tenant_id: TenantShardId,
@@ -115,27 +101,6 @@ impl PageserverClient {
        )
    }

-    pub(crate) async fn tenant_secondary_status(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> Result<SecondaryProgress> {
-        measured_request!(
-            "tenant_secondary_status",
-            crate::metrics::Method::Get,
-            &self.node_id_label,
-            self.inner.tenant_secondary_status(tenant_shard_id).await
-        )
-    }
-
-    pub(crate) async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
-        measured_request!(
-            "tenant_heatmap_upload",
-            crate::metrics::Method::Post,
-            &self.node_id_label,
-            self.inner.tenant_heatmap_upload(tenant_id).await
-        )
-    }
-
    pub(crate) async fn location_config(
        &self,
        tenant_shard_id: TenantShardId,
@@ -227,21 +192,6 @@ impl PageserverClient {
        )
    }

-    pub(crate) async fn timeline_detach_ancestor(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-    ) -> Result<AncestorDetached> {
-        measured_request!(
-            "timeline_detach_ancestor",
-            crate::metrics::Method::Put,
-            &self.node_id_label,
-            self.inner
-                .timeline_detach_ancestor(tenant_shard_id, timeline_id)
-                .await
-        )
-    }
-
    pub(crate) async fn get_utilization(&self) -> Result<PageserverUtilization> {
        measured_request!(
            "utilization",
@@ -250,16 +200,4 @@ impl PageserverClient {
            self.inner.get_utilization().await
        )
    }
-
-    pub(crate) async fn top_tenant_shards(
-        &self,
-        request: TopTenantShardsRequest,
-    ) -> Result<TopTenantShardsResponse> {
-        measured_request!(
-            "top_tenants",
-            crate::metrics::Method::Post,
-            &self.node_id_label,
-            self.inner.top_tenant_shards(request).await
-        )
-    }
 }
--- a/control_plane/attachment_service/src/persistence.rs
+++ b/control_plane/attachment_service/src/persistence.rs
@@ -2,13 +2,13 @@ pub(crate) mod split_state;
 use std::collections::HashMap;
 use std::str::FromStr;
 use std::time::Duration;
-use std::time::Instant;

 use self::split_state::SplitState;
+use camino::Utf8Path;
+use camino::Utf8PathBuf;
 use diesel::pg::PgConnection;
 use diesel::prelude::*;
 use diesel::Connection;
-use pageserver_api::controller_api::ShardSchedulingPolicy;
 use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy};
 use pageserver_api::models::TenantConfig;
 use pageserver_api::shard::ShardConfigError;
@@ -53,6 +53,11 @@ use crate::node::Node;
 /// we can UPDATE a node's scheduling mode reasonably quickly to mark a bad node offline.
 pub struct Persistence {
    connection_pool: diesel::r2d2::Pool<diesel::r2d2::ConnectionManager<PgConnection>>,
+
+    // In test environments, we support loading+saving a JSON file.  This is temporary, for the benefit of
+    // test_compatibility.py, so that we don't have to commit to making the database contents fully backward/forward
+    // compatible just yet.
+    json_path: Option<Utf8PathBuf>,
 }

 /// Legacy format, for use in JSON compat objects in test environment
@@ -73,7 +78,7 @@ pub(crate) enum DatabaseError {
    Logical(String),
 }

-#[derive(measured::FixedCardinalityLabel, Copy, Clone)]
+#[derive(measured::FixedCardinalityLabel, Clone)]
 pub(crate) enum DatabaseOperation {
    InsertNode,
    UpdateNode,
@@ -102,12 +107,6 @@ pub(crate) enum AbortShardSplitStatus {

 pub(crate) type DatabaseResult<T> = Result<T, DatabaseError>;

-/// Some methods can operate on either a whole tenant or a single shard
-pub(crate) enum TenantFilter {
-    Tenant(TenantId),
-    Shard(TenantShardId),
-}
-
 impl Persistence {
    // The default postgres connection limit is 100.  We use up to 99, to leave one free for a human admin under
    // normal circumstances.  This assumes we have exclusive use of the database cluster to which we connect.
@@ -117,7 +116,7 @@ impl Persistence {
    const IDLE_CONNECTION_TIMEOUT: Duration = Duration::from_secs(10);
    const MAX_CONNECTION_LIFETIME: Duration = Duration::from_secs(60);

-    pub fn new(database_url: String) -> Self {
+    pub fn new(database_url: String, json_path: Option<Utf8PathBuf>) -> Self {
        let manager = diesel::r2d2::ConnectionManager::<PgConnection>::new(database_url);

        // We will use a connection pool: this is primarily to _limit_ our connection count, rather than to optimize time
@@ -132,31 +131,9 @@ impl Persistence {
            .build(manager)
            .expect("Could not build connection pool");

-        Self { connection_pool }
-    }
-
-    /// A helper for use during startup, where we would like to tolerate concurrent restarts of the
-    /// database and the storage controller, therefore the database might not be available right away
-    pub async fn await_connection(
-        database_url: &str,
-        timeout: Duration,
-    ) -> Result<(), diesel::ConnectionError> {
-        let started_at = Instant::now();
-        loop {
-            match PgConnection::establish(database_url) {
-                Ok(_) => {
-                    tracing::info!("Connected to database.");
-                    return Ok(());
-                }
-                Err(e) => {
-                    if started_at.elapsed() > timeout {
-                        return Err(e);
-                    } else {
-                        tracing::info!("Database not yet available, waiting... ({e})");
-                        tokio::time::sleep(Duration::from_millis(100)).await;
-                    }
-                }
-            }
+        Self {
+            connection_pool,
+            json_path,
        }
    }

@@ -169,7 +146,9 @@ impl Persistence {
        let latency = &METRICS_REGISTRY
            .metrics_group
            .storage_controller_database_query_latency;
-        let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup { operation: op });
+        let _timer = latency.start_timer(DatabaseQueryLatencyLabelGroup {
+            operation: op.clone(),
+        });

        let res = self.with_conn(func).await;

@@ -192,45 +171,10 @@ impl Persistence {
        F: Fn(&mut PgConnection) -> DatabaseResult<R> + Send + 'static,
        R: Send + 'static,
    {
-        // A generous allowance for how many times we may retry serializable transactions
-        // before giving up.  This is not expected to be hit: it is a defensive measure in case we
-        // somehow engineer a situation where duelling transactions might otherwise live-lock.
-        const MAX_RETRIES: usize = 128;
-
        let mut conn = self.connection_pool.get()?;
-        tokio::task::spawn_blocking(move || -> DatabaseResult<R> {
-            let mut retry_count = 0;
-            loop {
-                match conn.build_transaction().serializable().run(|c| func(c)) {
-                    Ok(r) => break Ok(r),
-                    Err(
-                        err @ DatabaseError::Query(diesel::result::Error::DatabaseError(
-                            diesel::result::DatabaseErrorKind::SerializationFailure,
-                            _,
-                        )),
-                    ) => {
-                        retry_count += 1;
-                        if retry_count > MAX_RETRIES {
-                            tracing::error!(
-                                "Exceeded max retries on SerializationFailure errors: {err:?}"
-                            );
-                            break Err(err);
-                        } else {
-                            // Retry on serialization errors: these are expected, because even though our
-                            // transactions don't fight for the same rows, they will occasionally collide
-                            // on index pages (e.g. increment_generation for unrelated shards can collide)
-                            tracing::debug!(
-                                "Retrying transaction on serialization failure {err:?}"
-                            );
-                            continue;
-                        }
-                    }
-                    Err(e) => break Err(e),
-                }
-            }
-        })
-        .await
-        .expect("Task panic")
+        tokio::task::spawn_blocking(move || -> DatabaseResult<R> { func(&mut conn) })
+            .await
+            .expect("Task panic")
    }

    /// When a node is first registered, persist it before using it for anything
@@ -292,13 +236,80 @@ impl Persistence {
    /// At startup, load the high level state for shards, such as their config + policy.  This will
    /// be enriched at runtime with state discovered on pageservers.
    pub(crate) async fn list_tenant_shards(&self) -> DatabaseResult<Vec<TenantShardPersistence>> {
-        self.with_measured_conn(
-            DatabaseOperation::ListTenantShards,
-            move |conn| -> DatabaseResult<_> {
-                Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
-            },
-        )
-        .await
+        let loaded = self
+            .with_measured_conn(
+                DatabaseOperation::ListTenantShards,
+                move |conn| -> DatabaseResult<_> {
+                    Ok(crate::schema::tenant_shards::table.load::<TenantShardPersistence>(conn)?)
+                },
+            )
+            .await?;
+
+        if loaded.is_empty() {
+            if let Some(path) = &self.json_path {
+                if tokio::fs::try_exists(path)
+                    .await
+                    .map_err(|e| DatabaseError::Logical(format!("Error stat'ing JSON file: {e}")))?
+                {
+                    tracing::info!("Importing from legacy JSON format at {path}");
+                    return self.list_tenant_shards_json(path).await;
+                }
+            }
+        }
+        Ok(loaded)
+    }
+
+    /// Shim for automated compatibility tests: load tenants from a JSON file instead of database
+    pub(crate) async fn list_tenant_shards_json(
+        &self,
+        path: &Utf8Path,
+    ) -> DatabaseResult<Vec<TenantShardPersistence>> {
+        let bytes = tokio::fs::read(path)
+            .await
+            .map_err(|e| DatabaseError::Logical(format!("Failed to load JSON: {e}")))?;
+
+        let mut decoded = serde_json::from_slice::<JsonPersistence>(&bytes)
+            .map_err(|e| DatabaseError::Logical(format!("Deserialization error: {e}")))?;
+        for shard in decoded.tenants.values_mut() {
+            if shard.placement_policy == "\"Single\"" {
+                // Backward compat for test data after PR https://github.com/neondatabase/neon/pull/7165
+                shard.placement_policy = "{\"Attached\":0}".to_string();
+            }
+        }
+
+        let tenants: Vec<TenantShardPersistence> = decoded.tenants.into_values().collect();
+
+        // Synchronize database with what is in the JSON file
+        self.insert_tenant_shards(tenants.clone()).await?;
+
+        Ok(tenants)
+    }
+
+    /// For use in testing environments, where we dump out JSON on shutdown.
+    pub async fn write_tenants_json(&self) -> anyhow::Result<()> {
+        let Some(path) = &self.json_path else {
+            anyhow::bail!("Cannot write JSON if path isn't set (test environment bug)");
+        };
+        tracing::info!("Writing state to {path}...");
+        let tenants = self.list_tenant_shards().await?;
+        let mut tenants_map = HashMap::new();
+        for tsp in tenants {
+            let tenant_shard_id = TenantShardId {
+                tenant_id: TenantId::from_str(tsp.tenant_id.as_str())?,
+                shard_number: ShardNumber(tsp.shard_number as u8),
+                shard_count: ShardCount::new(tsp.shard_count as u8),
+            };
+
+            tenants_map.insert(tenant_shard_id, tsp);
+        }
+        let json = serde_json::to_string(&JsonPersistence {
+            tenants: tenants_map,
+        })?;
+
+        tokio::fs::write(path, &json).await?;
+        tracing::info!("Wrote {} bytes to {path}...", json.len());
+
+        Ok(())
    }

    /// Tenants must be persisted before we schedule them for the first time.  This enables us
@@ -311,11 +322,14 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::InsertTenantShards,
            move |conn| -> DatabaseResult<()> {
-                for tenant in &shards {
-                    diesel::insert_into(tenant_shards)
-                        .values(tenant)
-                        .execute(conn)?;
-                }
+                conn.transaction(|conn| -> QueryResult<()> {
+                    for tenant in &shards {
+                        diesel::insert_into(tenant_shards)
+                            .values(tenant)
+                            .execute(conn)?;
+                    }
+                    Ok(())
+                })?;
                Ok(())
            },
        )
@@ -360,15 +374,13 @@ impl Persistence {
    #[tracing::instrument(skip_all, fields(node_id))]
    pub(crate) async fn re_attach(
        &self,
-        input_node_id: NodeId,
+        node_id: NodeId,
    ) -> DatabaseResult<HashMap<TenantShardId, Generation>> {
-        use crate::schema::nodes::dsl::scheduling_policy;
-        use crate::schema::nodes::dsl::*;
        use crate::schema::tenant_shards::dsl::*;
        let updated = self
            .with_measured_conn(DatabaseOperation::ReAttach, move |conn| {
                let rows_updated = diesel::update(tenant_shards)
-                    .filter(generation_pageserver.eq(input_node_id.0 as i64))
+                    .filter(generation_pageserver.eq(node_id.0 as i64))
                    .set(generation.eq(generation + 1))
                    .execute(conn)?;

@@ -377,23 +389,9 @@ impl Persistence {
                // TODO: UPDATE+SELECT in one query

                let updated = tenant_shards
-                    .filter(generation_pageserver.eq(input_node_id.0 as i64))
+                    .filter(generation_pageserver.eq(node_id.0 as i64))
                    .select(TenantShardPersistence::as_select())
                    .load(conn)?;
-
-                // If the node went through a drain and restart phase before re-attaching,
-                // then reset it's node scheduling policy to active.
-                diesel::update(nodes)
-                    .filter(node_id.eq(input_node_id.0 as i64))
-                    .filter(
-                        scheduling_policy
-                            .eq(String::from(NodeSchedulingPolicy::PauseForRestart))
-                            .or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Draining)))
-                            .or(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Filling))),
-                    )
-                    .set(scheduling_policy.eq(String::from(NodeSchedulingPolicy::Active)))
-                    .execute(conn)?;
-
                Ok(updated)
            })
            .await?;
@@ -460,7 +458,6 @@ impl Persistence {
        Ok(Generation::new(g as u32))
    }

-    #[allow(non_local_definitions)]
    /// For use when updating a persistent property of a tenant, such as its config or placement_policy.
    ///
    /// Do not use this for settting generation, unless in the special onboarding code path (/location_config)
@@ -468,48 +465,59 @@ impl Persistence {
    /// that we only do the first time a tenant is set to an attached policy via /location_config.
    pub(crate) async fn update_tenant_shard(
        &self,
-        tenant: TenantFilter,
-        input_placement_policy: Option<PlacementPolicy>,
-        input_config: Option<TenantConfig>,
+        tenant_shard_id: TenantShardId,
+        input_placement_policy: PlacementPolicy,
+        input_config: TenantConfig,
        input_generation: Option<Generation>,
-        input_scheduling_policy: Option<ShardSchedulingPolicy>,
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;

        self.with_measured_conn(DatabaseOperation::UpdateTenantShard, move |conn| {
-            let query = match tenant {
-                TenantFilter::Shard(tenant_shard_id) => diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
-                    .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
-                    .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32))
-                    .into_boxed(),
-                TenantFilter::Tenant(input_tenant_id) => diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(input_tenant_id.to_string()))
-                    .into_boxed(),
-            };
+            let query = diesel::update(tenant_shards)
+                .filter(tenant_id.eq(tenant_shard_id.tenant_id.to_string()))
+                .filter(shard_number.eq(tenant_shard_id.shard_number.0 as i32))
+                .filter(shard_count.eq(tenant_shard_id.shard_count.literal() as i32));

-            #[derive(AsChangeset)]
-            #[diesel(table_name = crate::schema::tenant_shards)]
-            struct ShardUpdate {
-                generation: Option<i32>,
-                placement_policy: Option<String>,
-                config: Option<String>,
-                scheduling_policy: Option<String>,
+            if let Some(input_generation) = input_generation {
+                // Update includes generation column
+                query
+                    .set((
+                        generation.eq(Some(input_generation.into().unwrap() as i32)),
+                        placement_policy
+                            .eq(serde_json::to_string(&input_placement_policy).unwrap()),
+                        config.eq(serde_json::to_string(&input_config).unwrap()),
+                    ))
+                    .execute(conn)?;
+            } else {
+                // Update does not include generation column
+                query
+                    .set((
+                        placement_policy
+                            .eq(serde_json::to_string(&input_placement_policy).unwrap()),
+                        config.eq(serde_json::to_string(&input_config).unwrap()),
+                    ))
+                    .execute(conn)?;
            }

-            let update = ShardUpdate {
-                generation: input_generation.map(|g| g.into().unwrap() as i32),
-                placement_policy: input_placement_policy
-                    .as_ref()
-                    .map(|p| serde_json::to_string(&p).unwrap()),
-                config: input_config
-                    .as_ref()
-                    .map(|c| serde_json::to_string(&c).unwrap()),
-                scheduling_policy: input_scheduling_policy
-                    .map(|p| serde_json::to_string(&p).unwrap()),
-            };
+            Ok(())
+        })
+        .await?;

-            query.set(update).execute(conn)?;
+        Ok(())
+    }
+
+    pub(crate) async fn update_tenant_config(
+        &self,
+        input_tenant_id: TenantId,
+        input_config: TenantConfig,
+    ) -> DatabaseResult<()> {
+        use crate::schema::tenant_shards::dsl::*;
+
+        self.with_measured_conn(DatabaseOperation::UpdateTenantConfig, move |conn| {
+            diesel::update(tenant_shards)
+                .filter(tenant_id.eq(input_tenant_id.to_string()))
+                .set((config.eq(serde_json::to_string(&input_config).unwrap()),))
+                .execute(conn)?;

            Ok(())
        })
@@ -551,51 +559,55 @@ impl Persistence {
    ) -> DatabaseResult<()> {
        use crate::schema::tenant_shards::dsl::*;
        self.with_measured_conn(DatabaseOperation::BeginShardSplit, move |conn| -> DatabaseResult<()> {
-            // Mark parent shards as splitting
+            conn.transaction(|conn| -> DatabaseResult<()> {
+                // Mark parent shards as splitting

-            let updated = diesel::update(tenant_shards)
-                .filter(tenant_id.eq(split_tenant_id.to_string()))
-                .filter(shard_count.eq(old_shard_count.literal() as i32))
-                .set((splitting.eq(1),))
-                .execute(conn)?;
-            if u8::try_from(updated)
-                .map_err(|_| DatabaseError::Logical(
-                    format!("Overflow existing shard count {} while splitting", updated))
-                )? != old_shard_count.count() {
-                // Perhaps a deletion or another split raced with this attempt to split, mutating
-                // the parent shards that we intend to split. In this case the split request should fail.
-                return Err(DatabaseError::Logical(
-                    format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
-                ));
-            }
-
-            // FIXME: spurious clone to sidestep closure move rules
-            let parent_to_children = parent_to_children.clone();
-
-            // Insert child shards
-            for (parent_shard_id, children) in parent_to_children {
-                let mut parent = crate::schema::tenant_shards::table
-                    .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
-                    .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
-                    .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
-                    .load::<TenantShardPersistence>(conn)?;
-                let parent = if parent.len() != 1 {
-                    return Err(DatabaseError::Logical(format!(
-                        "Parent shard {parent_shard_id} not found"
-                    )));
-                } else {
-                    parent.pop().unwrap()
-                };
-                for mut shard in children {
-                    // Carry the parent's generation into the child
-                    shard.generation = parent.generation;
-
-                    debug_assert!(shard.splitting == SplitState::Splitting);
-                    diesel::insert_into(tenant_shards)
-                        .values(shard)
-                        .execute(conn)?;
+                let updated = diesel::update(tenant_shards)
+                    .filter(tenant_id.eq(split_tenant_id.to_string()))
+                    .filter(shard_count.eq(old_shard_count.literal() as i32))
+                    .set((splitting.eq(1),))
+                    .execute(conn)?;
+                if u8::try_from(updated)
+                    .map_err(|_| DatabaseError::Logical(
+                        format!("Overflow existing shard count {} while splitting", updated))
+                    )? != old_shard_count.count() {
+                    // Perhaps a deletion or another split raced with this attempt to split, mutating
+                    // the parent shards that we intend to split. In this case the split request should fail.
+                    return Err(DatabaseError::Logical(
+                        format!("Unexpected existing shard count {updated} when preparing tenant for split (expected {})", old_shard_count.count())
+                    ));
                }
-            }
+
+                // FIXME: spurious clone to sidestep closure move rules
+                let parent_to_children = parent_to_children.clone();
+
+                // Insert child shards
+                for (parent_shard_id, children) in parent_to_children {
+                    let mut parent = crate::schema::tenant_shards::table
+                        .filter(tenant_id.eq(parent_shard_id.tenant_id.to_string()))
+                        .filter(shard_number.eq(parent_shard_id.shard_number.0 as i32))
+                        .filter(shard_count.eq(parent_shard_id.shard_count.literal() as i32))
+                        .load::<TenantShardPersistence>(conn)?;
+                    let parent = if parent.len() != 1 {
+                        return Err(DatabaseError::Logical(format!(
+                            "Parent shard {parent_shard_id} not found"
+                        )));
+                    } else {
+                        parent.pop().unwrap()
+                    };
+                    for mut shard in children {
+                        // Carry the parent's generation into the child
+                        shard.generation = parent.generation;
+
+                        debug_assert!(shard.splitting == SplitState::Splitting);
+                        diesel::insert_into(tenant_shards)
+                            .values(shard)
+                            .execute(conn)?;
+                    }
+                }
+
+                Ok(())
+            })?;

            Ok(())
        })
@@ -613,18 +625,22 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::CompleteShardSplit,
            move |conn| -> DatabaseResult<()> {
-                // Drop parent shards
-                diesel::delete(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(old_shard_count.literal() as i32))
-                    .execute(conn)?;
+                conn.transaction(|conn| -> QueryResult<()> {
+                    // Drop parent shards
+                    diesel::delete(tenant_shards)
+                        .filter(tenant_id.eq(split_tenant_id.to_string()))
+                        .filter(shard_count.eq(old_shard_count.literal() as i32))
+                        .execute(conn)?;

-                // Clear sharding flag
-                let updated = diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .set((splitting.eq(0),))
-                    .execute(conn)?;
-                debug_assert!(updated > 0);
+                    // Clear sharding flag
+                    let updated = diesel::update(tenant_shards)
+                        .filter(tenant_id.eq(split_tenant_id.to_string()))
+                        .set((splitting.eq(0),))
+                        .execute(conn)?;
+                    debug_assert!(updated > 0);
+
+                    Ok(())
+                })?;

                Ok(())
            },
@@ -643,41 +659,46 @@ impl Persistence {
        self.with_measured_conn(
            DatabaseOperation::AbortShardSplit,
            move |conn| -> DatabaseResult<AbortShardSplitStatus> {
-                // Clear the splitting state on parent shards
-                let updated = diesel::update(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.ne(new_shard_count.literal() as i32))
-                    .set((splitting.eq(0),))
-                    .execute(conn)?;
+                let aborted =
+                    conn.transaction(|conn| -> DatabaseResult<AbortShardSplitStatus> {
+                        // Clear the splitting state on parent shards
+                        let updated = diesel::update(tenant_shards)
+                            .filter(tenant_id.eq(split_tenant_id.to_string()))
+                            .filter(shard_count.ne(new_shard_count.literal() as i32))
+                            .set((splitting.eq(0),))
+                            .execute(conn)?;

-                // Parent shards are already gone: we cannot abort.
-                if updated == 0 {
-                    return Ok(AbortShardSplitStatus::Complete);
-                }
+                        // Parent shards are already gone: we cannot abort.
+                        if updated == 0 {
+                            return Ok(AbortShardSplitStatus::Complete);
+                        }

-                // Sanity check: if parent shards were present, their cardinality should
-                // be less than the number of child shards.
-                if updated >= new_shard_count.count() as usize {
-                    return Err(DatabaseError::Logical(format!(
-                        "Unexpected parent shard count {updated} while aborting split to \
+                        // Sanity check: if parent shards were present, their cardinality should
+                        // be less than the number of child shards.
+                        if updated >= new_shard_count.count() as usize {
+                            return Err(DatabaseError::Logical(format!(
+                                "Unexpected parent shard count {updated} while aborting split to \
                            count {new_shard_count:?} on tenant {split_tenant_id}"
-                    )));
-                }
+                            )));
+                        }

-                // Erase child shards
-                diesel::delete(tenant_shards)
-                    .filter(tenant_id.eq(split_tenant_id.to_string()))
-                    .filter(shard_count.eq(new_shard_count.literal() as i32))
-                    .execute(conn)?;
+                        // Erase child shards
+                        diesel::delete(tenant_shards)
+                            .filter(tenant_id.eq(split_tenant_id.to_string()))
+                            .filter(shard_count.eq(new_shard_count.literal() as i32))
+                            .execute(conn)?;

-                Ok(AbortShardSplitStatus::Aborted)
+                        Ok(AbortShardSplitStatus::Aborted)
+                    })?;
+
+                Ok(aborted)
            },
        )
        .await
    }
 }

-/// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably
+/// Parts of [`crate::tenant_state::TenantState`] that are stored durably
 #[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)]
 #[diesel(table_name = crate::schema::tenant_shards)]
 pub(crate) struct TenantShardPersistence {
@@ -707,8 +728,6 @@ pub(crate) struct TenantShardPersistence {
    pub(crate) splitting: SplitState,
    #[serde(default)]
    pub(crate) config: String,
-    #[serde(default)]
-    pub(crate) scheduling_policy: String,
 }

 impl TenantShardPersistence {
--- a/control_plane/attachment_service/src/persistence/split_state.rs
+++ b/control_plane/attachment_service/src/persistence/split_state.rs
--- a/control_plane/attachment_service/src/reconciler.rs
+++ b/control_plane/attachment_service/src/reconciler.rs
@@ -1,13 +1,12 @@
 use crate::pageserver_client::PageserverClient;
 use crate::persistence::Persistence;
 use crate::service;
-use pageserver_api::controller_api::PlacementPolicy;
+use hyper::StatusCode;
 use pageserver_api::models::{
    LocationConfig, LocationConfigMode, LocationConfigSecondary, TenantConfig,
 };
 use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use pageserver_client::mgmt_api;
-use reqwest::StatusCode;
 use std::collections::HashMap;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@@ -19,18 +18,17 @@ use utils::sync::gate::GateGuard;

 use crate::compute_hook::{ComputeHook, NotifyError};
 use crate::node::Node;
-use crate::tenant_shard::{IntentState, ObservedState, ObservedStateLocation};
+use crate::tenant_state::{IntentState, ObservedState, ObservedStateLocation};

 const DEFAULT_HEATMAP_PERIOD: &str = "60s";

 /// Object with the lifetime of the background reconcile task that is created
 /// for tenants which have a difference between their intent and observed states.
 pub(super) struct Reconciler {
-    /// See [`crate::tenant_shard::TenantShard`] for the meanings of these fields: they are a snapshot
+    /// See [`crate::tenant_state::TenantState`] for the meanings of these fields: they are a snapshot
    /// of a tenant's state from when we spawned a reconcile task.
    pub(super) tenant_shard_id: TenantShardId,
    pub(crate) shard: ShardIdentity,
-    pub(crate) placement_policy: PlacementPolicy,
    pub(crate) generation: Option<Generation>,
    pub(crate) intent: TargetState,

@@ -50,15 +48,11 @@ pub(super) struct Reconciler {

    /// To avoid stalling if the cloud control plane is unavailable, we may proceed
    /// past failures in [`ComputeHook::notify`], but we _must_ remember that we failed
-    /// so that we can set [`crate::tenant_shard::TenantShard::pending_compute_notification`] to ensure a later retry.
+    /// so that we can set [`crate::tenant_state::TenantState::pending_compute_notification`] to ensure a later retry.
    pub(crate) compute_notify_failure: bool,

-    /// Reconciler is responsible for keeping alive semaphore units that limit concurrency on how many
-    /// we will spawn.
-    pub(crate) _resource_units: ReconcileUnits,
-
    /// A means to abort background reconciliation: it is essential to
-    /// call this when something changes in the original TenantShard that
+    /// call this when something changes in the original TenantState that
    /// will make this reconciliation impossible or unnecessary, for
    /// example when a pageserver node goes offline, or the PlacementPolicy for
    /// the tenant is changed.
@@ -72,20 +66,7 @@ pub(super) struct Reconciler {
    pub(crate) persistence: Arc<Persistence>,
 }

-/// RAII resource units granted to a Reconciler, which it should keep alive until it finishes doing I/O
-pub(crate) struct ReconcileUnits {
-    _sem_units: tokio::sync::OwnedSemaphorePermit,
-}
-
-impl ReconcileUnits {
-    pub(crate) fn new(sem_units: tokio::sync::OwnedSemaphorePermit) -> Self {
-        Self {
-            _sem_units: sem_units,
-        }
-    }
-}
-
-/// This is a snapshot of [`crate::tenant_shard::IntentState`], but it does not do any
+/// This is a snapshot of [`crate::tenant_state::IntentState`], but it does not do any
 /// reference counting for Scheduler.  The IntentState is what the scheduler works with,
 /// and the TargetState is just the instruction for a particular Reconciler run.
 #[derive(Debug)]
@@ -506,7 +487,6 @@ impl Reconciler {
        while let Err(e) = self.compute_notify().await {
            match e {
                NotifyError::Fatal(_) => return Err(ReconcileError::Notify(e)),
-                NotifyError::ShuttingDown => return Err(ReconcileError::Cancel),
                _ => {
                    tracing::warn!(
                        "Live migration blocked by compute notification error, retrying: {e}"
@@ -643,7 +623,7 @@ impl Reconciler {
                generation,
                &self.shard,
                &self.config,
-                &self.placement_policy,
+                !self.intent.secondary.is_empty(),
            );
            match self.observed.locations.get(&node.get_id()) {
                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {
@@ -769,10 +749,7 @@ impl Reconciler {
                // It is up to the caller whether they want to drop out on this error, but they don't have to:
                // in general we should avoid letting unavailability of the cloud control plane stop us from
                // making progress.
-                if !matches!(e, NotifyError::ShuttingDown) {
-                    tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
-                }
-
+                tracing::warn!("Failed to notify compute of attached pageserver {node}: {e}");
                // Set this flag so that in our ReconcileResult we will set the flag on the shard that it
                // needs to retry at some point.
                self.compute_notify_failure = true;
@@ -803,15 +780,8 @@ pub(crate) fn attached_location_conf(
    generation: Generation,
    shard: &ShardIdentity,
    config: &TenantConfig,
-    policy: &PlacementPolicy,
+    has_secondaries: bool,
 ) -> LocationConfig {
-    let has_secondaries = match policy {
-        PlacementPolicy::Attached(0) | PlacementPolicy::Detached | PlacementPolicy::Secondary => {
-            false
-        }
-        PlacementPolicy::Attached(_) => true,
-    };
-
    LocationConfig {
        mode: LocationConfigMode::AttachedSingle,
        generation: generation.into(),
--- a/control_plane/attachment_service/src/scheduler.rs
+++ b/control_plane/attachment_service/src/scheduler.rs
@@ -0,0 +1,352 @@
+use crate::{node::Node, tenant_state::TenantState};
+use pageserver_api::controller_api::UtilizationScore;
+use serde::Serialize;
+use std::collections::HashMap;
+use utils::{http::error::ApiError, id::NodeId};
+
+/// Scenarios in which we cannot find a suitable location for a tenant shard
+#[derive(thiserror::Error, Debug)]
+pub enum ScheduleError {
+    #[error("No pageservers found")]
+    NoPageservers,
+    #[error("No pageserver found matching constraint")]
+    ImpossibleConstraint,
+}
+
+impl From<ScheduleError> for ApiError {
+    fn from(value: ScheduleError) -> Self {
+        ApiError::Conflict(format!("Scheduling error: {}", value))
+    }
+}
+
+#[derive(Serialize, Eq, PartialEq)]
+pub enum MaySchedule {
+    Yes(UtilizationScore),
+    No,
+}
+
+#[derive(Serialize)]
+struct SchedulerNode {
+    /// How many shards are currently scheduled on this node, via their [`crate::tenant_state::IntentState`].
+    shard_count: usize,
+
+    /// Whether this node is currently elegible to have new shards scheduled (this is derived
+    /// from a node's availability state and scheduling policy).
+    may_schedule: MaySchedule,
+}
+
+impl PartialEq for SchedulerNode {
+    fn eq(&self, other: &Self) -> bool {
+        let may_schedule_matches = matches!(
+            (&self.may_schedule, &other.may_schedule),
+            (MaySchedule::Yes(_), MaySchedule::Yes(_)) | (MaySchedule::No, MaySchedule::No)
+        );
+
+        may_schedule_matches && self.shard_count == other.shard_count
+    }
+}
+
+impl Eq for SchedulerNode {}
+
+/// This type is responsible for selecting which node is used when a tenant shard needs to choose a pageserver
+/// on which to run.
+///
+/// The type has no persistent state of its own: this is all populated at startup.  The Serialize
+/// impl is only for debug dumps.
+#[derive(Serialize)]
+pub(crate) struct Scheduler {
+    nodes: HashMap<NodeId, SchedulerNode>,
+}
+
+impl Scheduler {
+    pub(crate) fn new<'a>(nodes: impl Iterator<Item = &'a Node>) -> Self {
+        let mut scheduler_nodes = HashMap::new();
+        for node in nodes {
+            scheduler_nodes.insert(
+                node.get_id(),
+                SchedulerNode {
+                    shard_count: 0,
+                    may_schedule: node.may_schedule(),
+                },
+            );
+        }
+
+        Self {
+            nodes: scheduler_nodes,
+        }
+    }
+
+    /// For debug/support: check that our internal statistics are in sync with the state of
+    /// the nodes & tenant shards.
+    ///
+    /// If anything is inconsistent, log details and return an error.
+    pub(crate) fn consistency_check<'a>(
+        &self,
+        nodes: impl Iterator<Item = &'a Node>,
+        shards: impl Iterator<Item = &'a TenantState>,
+    ) -> anyhow::Result<()> {
+        let mut expect_nodes: HashMap<NodeId, SchedulerNode> = HashMap::new();
+        for node in nodes {
+            expect_nodes.insert(
+                node.get_id(),
+                SchedulerNode {
+                    shard_count: 0,
+                    may_schedule: node.may_schedule(),
+                },
+            );
+        }
+
+        for shard in shards {
+            if let Some(node_id) = shard.intent.get_attached() {
+                match expect_nodes.get_mut(node_id) {
+                    Some(node) => node.shard_count += 1,
+                    None => anyhow::bail!(
+                        "Tenant {} references nonexistent node {}",
+                        shard.tenant_shard_id,
+                        node_id
+                    ),
+                }
+            }
+
+            for node_id in shard.intent.get_secondary() {
+                match expect_nodes.get_mut(node_id) {
+                    Some(node) => node.shard_count += 1,
+                    None => anyhow::bail!(
+                        "Tenant {} references nonexistent node {}",
+                        shard.tenant_shard_id,
+                        node_id
+                    ),
+                }
+            }
+        }
+
+        for (node_id, expect_node) in &expect_nodes {
+            let Some(self_node) = self.nodes.get(node_id) else {
+                anyhow::bail!("Node {node_id} not found in Self")
+            };
+
+            if self_node != expect_node {
+                tracing::error!("Inconsistency detected in scheduling state for node {node_id}");
+                tracing::error!("Expected state: {}", serde_json::to_string(expect_node)?);
+                tracing::error!("Self state: {}", serde_json::to_string(self_node)?);
+
+                anyhow::bail!("Inconsistent state on {node_id}");
+            }
+        }
+
+        if expect_nodes.len() != self.nodes.len() {
+            // We just checked that all the expected nodes are present.  If the lengths don't match,
+            // it means that we have nodes in Self that are unexpected.
+            for node_id in self.nodes.keys() {
+                if !expect_nodes.contains_key(node_id) {
+                    anyhow::bail!("Node {node_id} found in Self but not in expected nodes");
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Increment the reference count of a node.  This reference count is used to guide scheduling
+    /// decisions, not for memory management: it represents one tenant shard whose IntentState targets
+    /// this node.
+    ///
+    /// It is an error to call this for a node that is not known to the scheduler (i.e. passed into
+    /// [`Self::new`] or [`Self::node_upsert`])
+    pub(crate) fn node_inc_ref(&mut self, node_id: NodeId) {
+        let Some(node) = self.nodes.get_mut(&node_id) else {
+            tracing::error!("Scheduler missing node {node_id}");
+            debug_assert!(false);
+            return;
+        };
+
+        node.shard_count += 1;
+    }
+
+    /// Decrement a node's reference count.  Inverse of [`Self::node_inc_ref`].
+    pub(crate) fn node_dec_ref(&mut self, node_id: NodeId) {
+        let Some(node) = self.nodes.get_mut(&node_id) else {
+            debug_assert!(false);
+            tracing::error!("Scheduler missing node {node_id}");
+            return;
+        };
+
+        node.shard_count -= 1;
+    }
+
+    pub(crate) fn node_upsert(&mut self, node: &Node) {
+        use std::collections::hash_map::Entry::*;
+        match self.nodes.entry(node.get_id()) {
+            Occupied(mut entry) => {
+                entry.get_mut().may_schedule = node.may_schedule();
+            }
+            Vacant(entry) => {
+                entry.insert(SchedulerNode {
+                    shard_count: 0,
+                    may_schedule: node.may_schedule(),
+                });
+            }
+        }
+    }
+
+    pub(crate) fn node_remove(&mut self, node_id: NodeId) {
+        if self.nodes.remove(&node_id).is_none() {
+            tracing::warn!(node_id=%node_id, "Removed non-existent node from scheduler");
+        }
+    }
+
+    /// Where we have several nodes to choose from, for example when picking a secondary location
+    /// to promote to an attached location, this method may be used to pick the best choice based
+    /// on the scheduler's knowledge of utilization and availability.
+    ///
+    /// If the input is empty, or all the nodes are not elegible for scheduling, return None: the
+    /// caller can pick a node some other way.
+    pub(crate) fn node_preferred(&self, nodes: &[NodeId]) -> Option<NodeId> {
+        if nodes.is_empty() {
+            return None;
+        }
+
+        // TODO: When the utilization score returned by the pageserver becomes meaningful,
+        // schedule based on that instead of the shard count.
+        let node = nodes
+            .iter()
+            .map(|node_id| {
+                let may_schedule = self
+                    .nodes
+                    .get(node_id)
+                    .map(|n| n.may_schedule != MaySchedule::No)
+                    .unwrap_or(false);
+                (*node_id, may_schedule)
+            })
+            .max_by_key(|(_n, may_schedule)| *may_schedule);
+
+        // If even the preferred node has may_schedule==false, return None
+        node.and_then(|(node_id, may_schedule)| if may_schedule { Some(node_id) } else { None })
+    }
+
+    pub(crate) fn schedule_shard(&self, hard_exclude: &[NodeId]) -> Result<NodeId, ScheduleError> {
+        if self.nodes.is_empty() {
+            return Err(ScheduleError::NoPageservers);
+        }
+
+        let mut tenant_counts: Vec<(NodeId, usize)> = self
+            .nodes
+            .iter()
+            .filter_map(|(k, v)| {
+                if hard_exclude.contains(k) || v.may_schedule == MaySchedule::No {
+                    None
+                } else {
+                    Some((*k, v.shard_count))
+                }
+            })
+            .collect();
+
+        // Sort by tenant count.  Nodes with the same tenant count are sorted by ID.
+        tenant_counts.sort_by_key(|i| (i.1, i.0));
+
+        if tenant_counts.is_empty() {
+            // After applying constraints, no pageservers were left.  We log some detail about
+            // the state of nodes to help understand why this happened.  This is not logged as an error because
+            // it is legitimately possible for enough nodes to be Offline to prevent scheduling a shard.
+            tracing::info!("Scheduling failure, while excluding {hard_exclude:?}, node states:");
+            for (node_id, node) in &self.nodes {
+                tracing::info!(
+                    "Node {node_id}: may_schedule={} shards={}",
+                    node.may_schedule != MaySchedule::No,
+                    node.shard_count
+                );
+            }
+
+            return Err(ScheduleError::ImpossibleConstraint);
+        }
+
+        let node_id = tenant_counts.first().unwrap().0;
+        tracing::info!(
+            "scheduler selected node {node_id} (elegible nodes {:?}, exclude: {hard_exclude:?})",
+            tenant_counts.iter().map(|i| i.0 .0).collect::<Vec<_>>()
+        );
+
+        // Note that we do not update shard count here to reflect the scheduling: that
+        // is IntentState's job when the scheduled location is used.
+
+        Ok(node_id)
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod test_utils {
+
+    use crate::node::Node;
+    use pageserver_api::controller_api::{NodeAvailability, UtilizationScore};
+    use std::collections::HashMap;
+    use utils::id::NodeId;
+    /// Test helper: synthesize the requested number of nodes, all in active state.
+    ///
+    /// Node IDs start at one.
+    pub(crate) fn make_test_nodes(n: u64) -> HashMap<NodeId, Node> {
+        (1..n + 1)
+            .map(|i| {
+                (NodeId(i), {
+                    let mut node = Node::new(
+                        NodeId(i),
+                        format!("httphost-{i}"),
+                        80 + i as u16,
+                        format!("pghost-{i}"),
+                        5432 + i as u16,
+                    );
+                    node.set_availability(NodeAvailability::Active(UtilizationScore::worst()));
+                    assert!(node.is_available());
+                    node
+                })
+            })
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::tenant_state::IntentState;
+    #[test]
+    fn scheduler_basic() -> anyhow::Result<()> {
+        let nodes = test_utils::make_test_nodes(2);
+
+        let mut scheduler = Scheduler::new(nodes.values());
+        let mut t1_intent = IntentState::new();
+        let mut t2_intent = IntentState::new();
+
+        let scheduled = scheduler.schedule_shard(&[])?;
+        t1_intent.set_attached(&mut scheduler, Some(scheduled));
+        let scheduled = scheduler.schedule_shard(&[])?;
+        t2_intent.set_attached(&mut scheduler, Some(scheduled));
+
+        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
+        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
+
+        let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers())?;
+        t1_intent.push_secondary(&mut scheduler, scheduled);
+
+        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 1);
+        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 2);
+
+        t1_intent.clear(&mut scheduler);
+        assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
+        assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 1);
+
+        if cfg!(debug_assertions) {
+            // Dropping an IntentState without clearing it causes a panic in debug mode,
+            // because we have failed to properly update scheduler shard counts.
+            let result = std::panic::catch_unwind(move || {
+                drop(t2_intent);
+            });
+            assert!(result.is_err());
+        } else {
+            t2_intent.clear(&mut scheduler);
+            assert_eq!(scheduler.nodes.get(&NodeId(1)).unwrap().shard_count, 0);
+            assert_eq!(scheduler.nodes.get(&NodeId(2)).unwrap().shard_count, 0);
+        }
+
+        Ok(())
+    }
+}
--- a/control_plane/attachment_service/src/schema.rs
+++ b/control_plane/attachment_service/src/schema.rs
@@ -22,7 +22,6 @@ diesel::table! {
        placement_policy -> Varchar,
        splitting -> Int2,
        config -> Text,
-        scheduling_policy -> Varchar,
    }
 }

--- a/control_plane/attachment_service/src/service.rs
+++ b/control_plane/attachment_service/src/service.rs
--- a/control_plane/attachment_service/src/tenant_state.rs
+++ b/control_plane/attachment_service/src/tenant_state.rs
@@ -0,0 +1,983 @@
+use std::{
+    collections::{HashMap, HashSet},
+    sync::Arc,
+    time::Duration,
+};
+
+use crate::{
+    metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome},
+    persistence::TenantShardPersistence,
+};
+use pageserver_api::controller_api::PlacementPolicy;
+use pageserver_api::{
+    models::{LocationConfig, LocationConfigMode, TenantConfig},
+    shard::{ShardIdentity, TenantShardId},
+};
+use serde::Serialize;
+use tokio::task::JoinHandle;
+use tokio_util::sync::CancellationToken;
+use tracing::{instrument, Instrument};
+use utils::{
+    generation::Generation,
+    id::NodeId,
+    seqwait::{SeqWait, SeqWaitError},
+    sync::gate::Gate,
+};
+
+use crate::{
+    compute_hook::ComputeHook,
+    node::Node,
+    persistence::{split_state::SplitState, Persistence},
+    reconciler::{
+        attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState,
+    },
+    scheduler::{ScheduleError, Scheduler},
+    service, Sequence,
+};
+
+/// Serialization helper
+fn read_mutex_content<S, T>(v: &std::sync::Mutex<T>, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::ser::Serializer,
+    T: Clone + std::fmt::Display,
+{
+    serializer.collect_str(&v.lock().unwrap())
+}
+
+/// In-memory state for a particular tenant shard.
+///
+/// This struct implement Serialize for debugging purposes, but is _not_ persisted
+/// itself: see [`crate::persistence`] for the subset of tenant shard state that is persisted.
+#[derive(Serialize)]
+pub(crate) struct TenantState {
+    pub(crate) tenant_shard_id: TenantShardId,
+
+    pub(crate) shard: ShardIdentity,
+
+    // Runtime only: sequence used to coordinate when updating this object while
+    // with background reconcilers may be running.  A reconciler runs to a particular
+    // sequence.
+    pub(crate) sequence: Sequence,
+
+    // Latest generation number: next time we attach, increment this
+    // and use the incremented number when attaching.
+    //
+    // None represents an incompletely onboarded tenant via the [`Service::location_config`]
+    // API, where this tenant may only run in PlacementPolicy::Secondary.
+    pub(crate) generation: Option<Generation>,
+
+    // High level description of how the tenant should be set up.  Provided
+    // externally.
+    pub(crate) policy: PlacementPolicy,
+
+    // Low level description of exactly which pageservers should fulfil
+    // which role.  Generated by `Self::schedule`.
+    pub(crate) intent: IntentState,
+
+    // Low level description of how the tenant is configured on pageservers:
+    // if this does not match `Self::intent` then the tenant needs reconciliation
+    // with `Self::reconcile`.
+    pub(crate) observed: ObservedState,
+
+    // Tenant configuration, passed through opaquely to the pageserver.  Identical
+    // for all shards in a tenant.
+    pub(crate) config: TenantConfig,
+
+    /// If a reconcile task is currently in flight, it may be joined here (it is
+    /// only safe to join if either the result has been received or the reconciler's
+    /// cancellation token has been fired)
+    #[serde(skip)]
+    pub(crate) reconciler: Option<ReconcilerHandle>,
+
+    /// If a tenant is being split, then all shards with that TenantId will have a
+    /// SplitState set, this acts as a guard against other operations such as background
+    /// reconciliation, and timeline creation.
+    pub(crate) splitting: SplitState,
+
+    /// Optionally wait for reconciliation to complete up to a particular
+    /// sequence number.
+    #[serde(skip)]
+    pub(crate) waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
+
+    /// Indicates sequence number for which we have encountered an error reconciling.  If
+    /// this advances ahead of [`Self::waiter`] then a reconciliation error has occurred,
+    /// and callers should stop waiting for `waiter` and propagate the error.
+    #[serde(skip)]
+    pub(crate) error_waiter: std::sync::Arc<SeqWait<Sequence, Sequence>>,
+
+    /// The most recent error from a reconcile on this tenant
+    /// TODO: generalize to an array of recent events
+    /// TOOD: use a ArcSwap instead of mutex for faster reads?
+    #[serde(serialize_with = "read_mutex_content")]
+    pub(crate) last_error: std::sync::Arc<std::sync::Mutex<String>>,
+
+    /// If we have a pending compute notification that for some reason we weren't able to send,
+    /// set this to true. If this is set, calls to [`Self::maybe_reconcile`] will run a task to retry
+    /// sending it.  This is the mechanism by which compute notifications are included in the scope
+    /// of state that we publish externally in an eventually consistent way.
+    pub(crate) pending_compute_notification: bool,
+}
+
+#[derive(Default, Clone, Debug, Serialize)]
+pub(crate) struct IntentState {
+    attached: Option<NodeId>,
+    secondary: Vec<NodeId>,
+}
+
+impl IntentState {
+    pub(crate) fn new() -> Self {
+        Self {
+            attached: None,
+            secondary: vec![],
+        }
+    }
+    pub(crate) fn single(scheduler: &mut Scheduler, node_id: Option<NodeId>) -> Self {
+        if let Some(node_id) = node_id {
+            scheduler.node_inc_ref(node_id);
+        }
+        Self {
+            attached: node_id,
+            secondary: vec![],
+        }
+    }
+
+    pub(crate) fn set_attached(&mut self, scheduler: &mut Scheduler, new_attached: Option<NodeId>) {
+        if self.attached != new_attached {
+            if let Some(old_attached) = self.attached.take() {
+                scheduler.node_dec_ref(old_attached);
+            }
+            if let Some(new_attached) = &new_attached {
+                scheduler.node_inc_ref(*new_attached);
+            }
+            self.attached = new_attached;
+        }
+    }
+
+    /// Like set_attached, but the node is from [`Self::secondary`].  This swaps the node from
+    /// secondary to attached while maintaining the scheduler's reference counts.
+    pub(crate) fn promote_attached(
+        &mut self,
+        _scheduler: &mut Scheduler,
+        promote_secondary: NodeId,
+    ) {
+        // If we call this with a node that isn't in secondary, it would cause incorrect
+        // scheduler reference counting, since we assume the node is already referenced as a secondary.
+        debug_assert!(self.secondary.contains(&promote_secondary));
+
+        // TODO: when scheduler starts tracking attached + secondary counts separately, we will
+        // need to call into it here.
+        self.secondary.retain(|n| n != &promote_secondary);
+        self.attached = Some(promote_secondary);
+    }
+
+    pub(crate) fn push_secondary(&mut self, scheduler: &mut Scheduler, new_secondary: NodeId) {
+        debug_assert!(!self.secondary.contains(&new_secondary));
+        scheduler.node_inc_ref(new_secondary);
+        self.secondary.push(new_secondary);
+    }
+
+    /// It is legal to call this with a node that is not currently a secondary: that is a no-op
+    pub(crate) fn remove_secondary(&mut self, scheduler: &mut Scheduler, node_id: NodeId) {
+        let index = self.secondary.iter().position(|n| *n == node_id);
+        if let Some(index) = index {
+            scheduler.node_dec_ref(node_id);
+            self.secondary.remove(index);
+        }
+    }
+
+    pub(crate) fn clear_secondary(&mut self, scheduler: &mut Scheduler) {
+        for secondary in self.secondary.drain(..) {
+            scheduler.node_dec_ref(secondary);
+        }
+    }
+
+    /// Remove the last secondary node from the list of secondaries
+    pub(crate) fn pop_secondary(&mut self, scheduler: &mut Scheduler) {
+        if let Some(node_id) = self.secondary.pop() {
+            scheduler.node_dec_ref(node_id);
+        }
+    }
+
+    pub(crate) fn clear(&mut self, scheduler: &mut Scheduler) {
+        if let Some(old_attached) = self.attached.take() {
+            scheduler.node_dec_ref(old_attached);
+        }
+
+        self.clear_secondary(scheduler);
+    }
+
+    pub(crate) fn all_pageservers(&self) -> Vec<NodeId> {
+        let mut result = Vec::new();
+        if let Some(p) = self.attached {
+            result.push(p)
+        }
+
+        result.extend(self.secondary.iter().copied());
+
+        result
+    }
+
+    pub(crate) fn get_attached(&self) -> &Option<NodeId> {
+        &self.attached
+    }
+
+    pub(crate) fn get_secondary(&self) -> &Vec<NodeId> {
+        &self.secondary
+    }
+
+    /// If the node is in use as the attached location, demote it into
+    /// the list of secondary locations.  This is used when a node goes offline,
+    /// and we want to use a different node for attachment, but not permanently
+    /// forget the location on the offline node.
+    ///
+    /// Returns true if a change was made
+    pub(crate) fn demote_attached(&mut self, node_id: NodeId) -> bool {
+        if self.attached == Some(node_id) {
+            // TODO: when scheduler starts tracking attached + secondary counts separately, we will
+            // need to call into it here.
+            self.attached = None;
+            self.secondary.push(node_id);
+            true
+        } else {
+            false
+        }
+    }
+}
+
+impl Drop for IntentState {
+    fn drop(&mut self) {
+        // Must clear before dropping, to avoid leaving stale refcounts in the Scheduler
+        debug_assert!(self.attached.is_none() && self.secondary.is_empty());
+    }
+}
+
+#[derive(Default, Clone, Serialize)]
+pub(crate) struct ObservedState {
+    pub(crate) locations: HashMap<NodeId, ObservedStateLocation>,
+}
+
+/// Our latest knowledge of how this tenant is configured in the outside world.
+///
+/// Meaning:
+///     * No instance of this type exists for a node: we are certain that we have nothing configured on that
+///       node for this shard.
+///     * Instance exists with conf==None: we *might* have some state on that node, but we don't know
+///       what it is (e.g. we failed partway through configuring it)
+///     * Instance exists with conf==Some: this tells us what we last successfully configured on this node,
+///       and that configuration will still be present unless something external interfered.
+#[derive(Clone, Serialize)]
+pub(crate) struct ObservedStateLocation {
+    /// If None, it means we do not know the status of this shard's location on this node, but
+    /// we know that we might have some state on this node.
+    pub(crate) conf: Option<LocationConfig>,
+}
+pub(crate) struct ReconcilerWaiter {
+    // For observability purposes, remember the ID of the shard we're
+    // waiting for.
+    pub(crate) tenant_shard_id: TenantShardId,
+
+    seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
+    error_seq_wait: std::sync::Arc<SeqWait<Sequence, Sequence>>,
+    error: std::sync::Arc<std::sync::Mutex<String>>,
+    seq: Sequence,
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum ReconcileWaitError {
+    #[error("Timeout waiting for shard {0}")]
+    Timeout(TenantShardId),
+    #[error("shutting down")]
+    Shutdown,
+    #[error("Reconcile error on shard {0}: {1}")]
+    Failed(TenantShardId, String),
+}
+
+impl ReconcilerWaiter {
+    pub(crate) async fn wait_timeout(&self, timeout: Duration) -> Result<(), ReconcileWaitError> {
+        tokio::select! {
+            result = self.seq_wait.wait_for_timeout(self.seq, timeout)=> {
+                result.map_err(|e| match e {
+                    SeqWaitError::Timeout => ReconcileWaitError::Timeout(self.tenant_shard_id),
+                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown
+                })?;
+            },
+            result = self.error_seq_wait.wait_for(self.seq) => {
+                result.map_err(|e| match e {
+                    SeqWaitError::Shutdown => ReconcileWaitError::Shutdown,
+                    SeqWaitError::Timeout => unreachable!()
+                })?;
+
+                return Err(ReconcileWaitError::Failed(self.tenant_shard_id, self.error.lock().unwrap().clone()))
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Having spawned a reconciler task, the tenant shard's state will carry enough
+/// information to optionally cancel & await it later.
+pub(crate) struct ReconcilerHandle {
+    sequence: Sequence,
+    handle: JoinHandle<()>,
+    cancel: CancellationToken,
+}
+
+/// When a reconcile task completes, it sends this result object
+/// to be applied to the primary TenantState.
+pub(crate) struct ReconcileResult {
+    pub(crate) sequence: Sequence,
+    /// On errors, `observed` should be treated as an incompleted description
+    /// of state (i.e. any nodes present in the result should override nodes
+    /// present in the parent tenant state, but any unmentioned nodes should
+    /// not be removed from parent tenant state)
+    pub(crate) result: Result<(), ReconcileError>,
+
+    pub(crate) tenant_shard_id: TenantShardId,
+    pub(crate) generation: Option<Generation>,
+    pub(crate) observed: ObservedState,
+
+    /// Set [`TenantState::pending_compute_notification`] from this flag
+    pub(crate) pending_compute_notification: bool,
+}
+
+impl ObservedState {
+    pub(crate) fn new() -> Self {
+        Self {
+            locations: HashMap::new(),
+        }
+    }
+}
+
+impl TenantState {
+    pub(crate) fn new(
+        tenant_shard_id: TenantShardId,
+        shard: ShardIdentity,
+        policy: PlacementPolicy,
+    ) -> Self {
+        Self {
+            tenant_shard_id,
+            policy,
+            intent: IntentState::default(),
+            generation: Some(Generation::new(0)),
+            shard,
+            observed: ObservedState::default(),
+            config: TenantConfig::default(),
+            reconciler: None,
+            splitting: SplitState::Idle,
+            sequence: Sequence(1),
+            waiter: Arc::new(SeqWait::new(Sequence(0))),
+            error_waiter: Arc::new(SeqWait::new(Sequence(0))),
+            last_error: Arc::default(),
+            pending_compute_notification: false,
+        }
+    }
+
+    /// For use on startup when learning state from pageservers: generate my [`IntentState`] from my
+    /// [`ObservedState`], even if it violates my [`PlacementPolicy`].  Call [`Self::schedule`] next,
+    /// to get an intent state that complies with placement policy.  The overall goal is to do scheduling
+    /// in a way that makes use of any configured locations that already exist in the outside world.
+    pub(crate) fn intent_from_observed(&mut self, scheduler: &mut Scheduler) {
+        // Choose an attached location by filtering observed locations, and then sorting to get the highest
+        // generation
+        let mut attached_locs = self
+            .observed
+            .locations
+            .iter()
+            .filter_map(|(node_id, l)| {
+                if let Some(conf) = &l.conf {
+                    if conf.mode == LocationConfigMode::AttachedMulti
+                        || conf.mode == LocationConfigMode::AttachedSingle
+                        || conf.mode == LocationConfigMode::AttachedStale
+                    {
+                        Some((node_id, conf.generation))
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            })
+            .collect::<Vec<_>>();
+
+        attached_locs.sort_by_key(|i| i.1);
+        if let Some((node_id, _gen)) = attached_locs.into_iter().last() {
+            self.intent.set_attached(scheduler, Some(*node_id));
+        }
+
+        // All remaining observed locations generate secondary intents.  This includes None
+        // observations, as these may well have some local content on disk that is usable (this
+        // is an edge case that might occur if we restarted during a migration or other change)
+        //
+        // We may leave intent.attached empty if we didn't find any attached locations: [`Self::schedule`]
+        // will take care of promoting one of these secondaries to be attached.
+        self.observed.locations.keys().for_each(|node_id| {
+            if Some(*node_id) != self.intent.attached {
+                self.intent.push_secondary(scheduler, *node_id);
+            }
+        });
+    }
+
+    /// Part of [`Self::schedule`] that is used to choose exactly one node to act as the
+    /// attached pageserver for a shard.
+    ///
+    /// Returns whether we modified it, and the NodeId selected.
+    fn schedule_attached(
+        &mut self,
+        scheduler: &mut Scheduler,
+    ) -> Result<(bool, NodeId), ScheduleError> {
+        // No work to do if we already have an attached tenant
+        if let Some(node_id) = self.intent.attached {
+            return Ok((false, node_id));
+        }
+
+        if let Some(promote_secondary) = scheduler.node_preferred(&self.intent.secondary) {
+            // Promote a secondary
+            tracing::debug!("Promoted secondary {} to attached", promote_secondary);
+            self.intent.promote_attached(scheduler, promote_secondary);
+            Ok((true, promote_secondary))
+        } else {
+            // Pick a fresh node: either we had no secondaries or none were schedulable
+            let node_id = scheduler.schedule_shard(&self.intent.secondary)?;
+            tracing::debug!("Selected {} as attached", node_id);
+            self.intent.set_attached(scheduler, Some(node_id));
+            Ok((true, node_id))
+        }
+    }
+
+    pub(crate) fn schedule(&mut self, scheduler: &mut Scheduler) -> Result<(), ScheduleError> {
+        // TODO: before scheduling new nodes, check if any existing content in
+        // self.intent refers to pageservers that are offline, and pick other
+        // pageservers if so.
+
+        // TODO: respect the splitting bit on tenants: if they are currently splitting then we may not
+        // change their attach location.
+
+        // Build the set of pageservers already in use by this tenant, to avoid scheduling
+        // more work on the same pageservers we're already using.
+        let mut modified = false;
+
+        // Add/remove nodes to fulfil policy
+        use PlacementPolicy::*;
+        match self.policy {
+            Attached(secondary_count) => {
+                let retain_secondaries = if self.intent.attached.is_none()
+                    && scheduler.node_preferred(&self.intent.secondary).is_some()
+                {
+                    // If we have no attached, and one of the secondaries is elegible to be promoted, retain
+                    // one more secondary than we usually would, as one of them will become attached futher down this function.
+                    secondary_count + 1
+                } else {
+                    secondary_count
+                };
+
+                while self.intent.secondary.len() > retain_secondaries {
+                    // We have no particular preference for one secondary location over another: just
+                    // arbitrarily drop from the end
+                    self.intent.pop_secondary(scheduler);
+                    modified = true;
+                }
+
+                // Should have exactly one attached, and N secondaries
+                let (modified_attached, attached_node_id) = self.schedule_attached(scheduler)?;
+                modified |= modified_attached;
+
+                let mut used_pageservers = vec![attached_node_id];
+                while self.intent.secondary.len() < secondary_count {
+                    let node_id = scheduler.schedule_shard(&used_pageservers)?;
+                    self.intent.push_secondary(scheduler, node_id);
+                    used_pageservers.push(node_id);
+                    modified = true;
+                }
+            }
+            Secondary => {
+                if let Some(node_id) = self.intent.get_attached() {
+                    // Populate secondary by demoting the attached node
+                    self.intent.demote_attached(*node_id);
+                    modified = true;
+                } else if self.intent.secondary.is_empty() {
+                    // Populate secondary by scheduling a fresh node
+                    let node_id = scheduler.schedule_shard(&[])?;
+                    self.intent.push_secondary(scheduler, node_id);
+                    modified = true;
+                }
+                while self.intent.secondary.len() > 1 {
+                    // We have no particular preference for one secondary location over another: just
+                    // arbitrarily drop from the end
+                    self.intent.pop_secondary(scheduler);
+                    modified = true;
+                }
+            }
+            Detached => {
+                // Never add locations in this mode
+                if self.intent.get_attached().is_some() || !self.intent.get_secondary().is_empty() {
+                    self.intent.clear(scheduler);
+                    modified = true;
+                }
+            }
+        }
+
+        if modified {
+            self.sequence.0 += 1;
+        }
+
+        Ok(())
+    }
+
+    /// Query whether the tenant's observed state for attached node matches its intent state, and if so,
+    /// yield the node ID.  This is appropriate for emitting compute hook notifications: we are checking that
+    /// the node in question is not only where we intend to attach, but that the tenant is indeed already attached there.
+    ///
+    /// Reconciliation may still be needed for other aspects of state such as secondaries (see [`Self::dirty`]): this
+    /// funciton should not be used to decide whether to reconcile.
+    pub(crate) fn stably_attached(&self) -> Option<NodeId> {
+        if let Some(attach_intent) = self.intent.attached {
+            match self.observed.locations.get(&attach_intent) {
+                Some(loc) => match &loc.conf {
+                    Some(conf) => match conf.mode {
+                        LocationConfigMode::AttachedMulti
+                        | LocationConfigMode::AttachedSingle
+                        | LocationConfigMode::AttachedStale => {
+                            // Our intent and observed state agree that this node is in an attached state.
+                            Some(attach_intent)
+                        }
+                        // Our observed config is not an attached state
+                        _ => None,
+                    },
+                    // Our observed state is None, i.e. in flux
+                    None => None,
+                },
+                // We have no observed state for this node
+                None => None,
+            }
+        } else {
+            // Our intent is not to attach
+            None
+        }
+    }
+
+    fn dirty(&self, nodes: &Arc<HashMap<NodeId, Node>>) -> bool {
+        let mut dirty_nodes = HashSet::new();
+
+        if let Some(node_id) = self.intent.attached {
+            // Maybe panic: it is a severe bug if we try to attach while generation is null.
+            let generation = self
+                .generation
+                .expect("Attempted to enter attached state without a generation");
+
+            let wanted_conf = attached_location_conf(
+                generation,
+                &self.shard,
+                &self.config,
+                !self.intent.secondary.is_empty(),
+            );
+            match self.observed.locations.get(&node_id) {
+                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
+                Some(_) | None => {
+                    dirty_nodes.insert(node_id);
+                }
+            }
+        }
+
+        for node_id in &self.intent.secondary {
+            let wanted_conf = secondary_location_conf(&self.shard, &self.config);
+            match self.observed.locations.get(node_id) {
+                Some(conf) if conf.conf.as_ref() == Some(&wanted_conf) => {}
+                Some(_) | None => {
+                    dirty_nodes.insert(*node_id);
+                }
+            }
+        }
+
+        for node_id in self.observed.locations.keys() {
+            if self.intent.attached != Some(*node_id) && !self.intent.secondary.contains(node_id) {
+                // We have observed state that isn't part of our intent: need to clean it up.
+                dirty_nodes.insert(*node_id);
+            }
+        }
+
+        dirty_nodes.retain(|node_id| {
+            nodes
+                .get(node_id)
+                .map(|n| n.is_available())
+                .unwrap_or(false)
+        });
+
+        !dirty_nodes.is_empty()
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))]
+    pub(crate) fn maybe_reconcile(
+        &mut self,
+        result_tx: &tokio::sync::mpsc::UnboundedSender<ReconcileResult>,
+        pageservers: &Arc<HashMap<NodeId, Node>>,
+        compute_hook: &Arc<ComputeHook>,
+        service_config: &service::Config,
+        persistence: &Arc<Persistence>,
+        gate: &Gate,
+        cancel: &CancellationToken,
+    ) -> Option<ReconcilerWaiter> {
+        // If there are any ambiguous observed states, and the nodes they refer to are available,
+        // we should reconcile to clean them up.
+        let mut dirty_observed = false;
+        for (node_id, observed_loc) in &self.observed.locations {
+            let node = pageservers
+                .get(node_id)
+                .expect("Nodes may not be removed while referenced");
+            if observed_loc.conf.is_none() && node.is_available() {
+                dirty_observed = true;
+                break;
+            }
+        }
+
+        let active_nodes_dirty = self.dirty(pageservers);
+
+        // Even if there is no pageserver work to be done, if we have a pending notification to computes,
+        // wake up a reconciler to send it.
+        let do_reconcile =
+            active_nodes_dirty || dirty_observed || self.pending_compute_notification;
+
+        if !do_reconcile {
+            tracing::info!("Not dirty, no reconciliation needed.");
+            return None;
+        }
+
+        // If we are currently splitting, then never start a reconciler task: the splitting logic
+        // requires that shards are not interfered with while it runs. Do this check here rather than
+        // up top, so that we only log this message if we would otherwise have done a reconciliation.
+        if !matches!(self.splitting, SplitState::Idle) {
+            tracing::info!("Refusing to reconcile, splitting in progress");
+            return None;
+        }
+
+        // Reconcile already in flight for the current sequence?
+        if let Some(handle) = &self.reconciler {
+            if handle.sequence == self.sequence {
+                tracing::info!(
+                    "Reconciliation already in progress for sequence {:?}",
+                    self.sequence,
+                );
+                return Some(ReconcilerWaiter {
+                    tenant_shard_id: self.tenant_shard_id,
+                    seq_wait: self.waiter.clone(),
+                    error_seq_wait: self.error_waiter.clone(),
+                    error: self.last_error.clone(),
+                    seq: self.sequence,
+                });
+            }
+        }
+
+        // Build list of nodes from which the reconciler should detach
+        let mut detach = Vec::new();
+        for node_id in self.observed.locations.keys() {
+            if self.intent.get_attached() != &Some(*node_id)
+                && !self.intent.secondary.contains(node_id)
+            {
+                detach.push(
+                    pageservers
+                        .get(node_id)
+                        .expect("Intent references non-existent pageserver")
+                        .clone(),
+                )
+            }
+        }
+
+        // Reconcile in flight for a stale sequence?  Our sequence's task will wait for it before
+        // doing our sequence's work.
+        let old_handle = self.reconciler.take();
+
+        let Ok(gate_guard) = gate.enter() else {
+            // Shutting down, don't start a reconciler
+            return None;
+        };
+
+        // Advance the sequence before spawning a reconciler, so that sequence waiters
+        // can distinguish between before+after the reconcile completes.
+        self.sequence = self.sequence.next();
+
+        let reconciler_cancel = cancel.child_token();
+        let reconciler_intent = TargetState::from_intent(pageservers, &self.intent);
+        let mut reconciler = Reconciler {
+            tenant_shard_id: self.tenant_shard_id,
+            shard: self.shard,
+            generation: self.generation,
+            intent: reconciler_intent,
+            detach,
+            config: self.config.clone(),
+            observed: self.observed.clone(),
+            compute_hook: compute_hook.clone(),
+            service_config: service_config.clone(),
+            _gate_guard: gate_guard,
+            cancel: reconciler_cancel.clone(),
+            persistence: persistence.clone(),
+            compute_notify_failure: false,
+        };
+
+        let reconcile_seq = self.sequence;
+
+        tracing::info!(seq=%reconcile_seq, "Spawning Reconciler for sequence {}", self.sequence);
+        let must_notify = self.pending_compute_notification;
+        let reconciler_span = tracing::info_span!(parent: None, "reconciler", seq=%reconcile_seq,
+                                                        tenant_id=%reconciler.tenant_shard_id.tenant_id,
+                                                        shard_id=%reconciler.tenant_shard_id.shard_slug());
+        metrics::METRICS_REGISTRY
+            .metrics_group
+            .storage_controller_reconcile_spawn
+            .inc();
+        let result_tx = result_tx.clone();
+        let join_handle = tokio::task::spawn(
+            async move {
+                // Wait for any previous reconcile task to complete before we start
+                if let Some(old_handle) = old_handle {
+                    old_handle.cancel.cancel();
+                    if let Err(e) = old_handle.handle.await {
+                        // We can't do much with this other than log it: the task is done, so
+                        // we may proceed with our work.
+                        tracing::error!("Unexpected join error waiting for reconcile task: {e}");
+                    }
+                }
+
+                // Early check for cancellation before doing any work
+                // TODO: wrap all remote API operations in cancellation check
+                // as well.
+                if reconciler.cancel.is_cancelled() {
+                    metrics::METRICS_REGISTRY
+                        .metrics_group
+                        .storage_controller_reconcile_complete
+                        .inc(ReconcileCompleteLabelGroup {
+                            status: ReconcileOutcome::Cancel,
+                        });
+                    return;
+                }
+
+                // Attempt to make observed state match intent state
+                let result = reconciler.reconcile().await;
+
+                // If we know we had a pending compute notification from some previous action, send a notification irrespective
+                // of whether the above reconcile() did any work
+                if result.is_ok() && must_notify {
+                    // If this fails we will send the need to retry in [`ReconcileResult::pending_compute_notification`]
+                    reconciler.compute_notify().await.ok();
+                }
+
+                // Update result counter
+                let outcome_label = match &result {
+                    Ok(_) => ReconcileOutcome::Success,
+                    Err(ReconcileError::Cancel) => ReconcileOutcome::Cancel,
+                    Err(_) => ReconcileOutcome::Error,
+                };
+
+                metrics::METRICS_REGISTRY
+                    .metrics_group
+                    .storage_controller_reconcile_complete
+                    .inc(ReconcileCompleteLabelGroup {
+                        status: outcome_label,
+                    });
+
+                result_tx
+                    .send(ReconcileResult {
+                        sequence: reconcile_seq,
+                        result,
+                        tenant_shard_id: reconciler.tenant_shard_id,
+                        generation: reconciler.generation,
+                        observed: reconciler.observed,
+                        pending_compute_notification: reconciler.compute_notify_failure,
+                    })
+                    .ok();
+            }
+            .instrument(reconciler_span),
+        );
+
+        self.reconciler = Some(ReconcilerHandle {
+            sequence: self.sequence,
+            handle: join_handle,
+            cancel: reconciler_cancel,
+        });
+
+        Some(ReconcilerWaiter {
+            tenant_shard_id: self.tenant_shard_id,
+            seq_wait: self.waiter.clone(),
+            error_seq_wait: self.error_waiter.clone(),
+            error: self.last_error.clone(),
+            seq: self.sequence,
+        })
+    }
+
+    /// Called when a ReconcileResult has been emitted and the service is updating
+    /// our state: if the result is from a sequence >= my ReconcileHandle, then drop
+    /// the handle to indicate there is no longer a reconciliation in progress.
+    pub(crate) fn reconcile_complete(&mut self, sequence: Sequence) {
+        if let Some(reconcile_handle) = &self.reconciler {
+            if reconcile_handle.sequence <= sequence {
+                self.reconciler = None;
+            }
+        }
+    }
+
+    // If we had any state at all referring to this node ID, drop it.  Does not
+    // attempt to reschedule.
+    pub(crate) fn deref_node(&mut self, node_id: NodeId) {
+        if self.intent.attached == Some(node_id) {
+            self.intent.attached = None;
+        }
+
+        self.intent.secondary.retain(|n| n != &node_id);
+
+        self.observed.locations.remove(&node_id);
+
+        debug_assert!(!self.intent.all_pageservers().contains(&node_id));
+    }
+
+    pub(crate) fn to_persistent(&self) -> TenantShardPersistence {
+        TenantShardPersistence {
+            tenant_id: self.tenant_shard_id.tenant_id.to_string(),
+            shard_number: self.tenant_shard_id.shard_number.0 as i32,
+            shard_count: self.tenant_shard_id.shard_count.literal() as i32,
+            shard_stripe_size: self.shard.stripe_size.0 as i32,
+            generation: self.generation.map(|g| g.into().unwrap_or(0) as i32),
+            generation_pageserver: self.intent.get_attached().map(|n| n.0 as i64),
+            placement_policy: serde_json::to_string(&self.policy).unwrap(),
+            config: serde_json::to_string(&self.config).unwrap(),
+            splitting: SplitState::default(),
+        }
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use pageserver_api::{
+        controller_api::NodeAvailability,
+        shard::{ShardCount, ShardNumber},
+    };
+    use utils::id::TenantId;
+
+    use crate::scheduler::test_utils::make_test_nodes;
+
+    use super::*;
+
+    fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantState {
+        let tenant_id = TenantId::generate();
+        let shard_number = ShardNumber(0);
+        let shard_count = ShardCount::new(1);
+
+        let tenant_shard_id = TenantShardId {
+            tenant_id,
+            shard_number,
+            shard_count,
+        };
+        TenantState::new(
+            tenant_shard_id,
+            ShardIdentity::new(
+                shard_number,
+                shard_count,
+                pageserver_api::shard::ShardStripeSize(32768),
+            )
+            .unwrap(),
+            policy,
+        )
+    }
+
+    /// Test the scheduling behaviors used when a tenant configured for HA is subject
+    /// to nodes being marked offline.
+    #[test]
+    fn tenant_ha_scheduling() -> anyhow::Result<()> {
+        // Start with three nodes.  Our tenant will only use two.  The third one is
+        // expected to remain unused.
+        let mut nodes = make_test_nodes(3);
+
+        let mut scheduler = Scheduler::new(nodes.values());
+
+        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
+        tenant_state
+            .schedule(&mut scheduler)
+            .expect("we have enough nodes, scheduling should work");
+
+        // Expect to initially be schedule on to different nodes
+        assert_eq!(tenant_state.intent.secondary.len(), 1);
+        assert!(tenant_state.intent.attached.is_some());
+
+        let attached_node_id = tenant_state.intent.attached.unwrap();
+        let secondary_node_id = *tenant_state.intent.secondary.iter().last().unwrap();
+        assert_ne!(attached_node_id, secondary_node_id);
+
+        // Notifying the attached node is offline should demote it to a secondary
+        let changed = tenant_state.intent.demote_attached(attached_node_id);
+        assert!(changed);
+        assert!(tenant_state.intent.attached.is_none());
+        assert_eq!(tenant_state.intent.secondary.len(), 2);
+
+        // Update the scheduler state to indicate the node is offline
+        nodes
+            .get_mut(&attached_node_id)
+            .unwrap()
+            .set_availability(NodeAvailability::Offline);
+        scheduler.node_upsert(nodes.get(&attached_node_id).unwrap());
+
+        // Scheduling the node should promote the still-available secondary node to attached
+        tenant_state
+            .schedule(&mut scheduler)
+            .expect("active nodes are available");
+        assert_eq!(tenant_state.intent.attached.unwrap(), secondary_node_id);
+
+        // The original attached node should have been retained as a secondary
+        assert_eq!(
+            *tenant_state.intent.secondary.iter().last().unwrap(),
+            attached_node_id
+        );
+
+        tenant_state.intent.clear(&mut scheduler);
+
+        Ok(())
+    }
+
+    #[test]
+    fn intent_from_observed() -> anyhow::Result<()> {
+        let nodes = make_test_nodes(3);
+        let mut scheduler = Scheduler::new(nodes.values());
+
+        let mut tenant_state = make_test_tenant_shard(PlacementPolicy::Attached(1));
+
+        tenant_state.observed.locations.insert(
+            NodeId(3),
+            ObservedStateLocation {
+                conf: Some(LocationConfig {
+                    mode: LocationConfigMode::AttachedMulti,
+                    generation: Some(2),
+                    secondary_conf: None,
+                    shard_number: tenant_state.shard.number.0,
+                    shard_count: tenant_state.shard.count.literal(),
+                    shard_stripe_size: tenant_state.shard.stripe_size.0,
+                    tenant_conf: TenantConfig::default(),
+                }),
+            },
+        );
+
+        tenant_state.observed.locations.insert(
+            NodeId(2),
+            ObservedStateLocation {
+                conf: Some(LocationConfig {
+                    mode: LocationConfigMode::AttachedStale,
+                    generation: Some(1),
+                    secondary_conf: None,
+                    shard_number: tenant_state.shard.number.0,
+                    shard_count: tenant_state.shard.count.literal(),
+                    shard_stripe_size: tenant_state.shard.stripe_size.0,
+                    tenant_conf: TenantConfig::default(),
+                }),
+            },
+        );
+
+        tenant_state.intent_from_observed(&mut scheduler);
+
+        // The highest generationed attached location gets used as attached
+        assert_eq!(tenant_state.intent.attached, Some(NodeId(3)));
+        // Other locations get used as secondary
+        assert_eq!(tenant_state.intent.secondary, vec![NodeId(2)]);
+
+        scheduler.consistency_check(nodes.values(), [&tenant_state].into_iter())?;
+
+        tenant_state.intent.clear(&mut scheduler);
+        Ok(())
+    }
+}
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -36,11 +36,11 @@ use utils::pid_file::{self, PidFileRead};
 // it's waiting. If the process hasn't started/stopped after 5 seconds,
 // it prints a notice that it's taking long, but keeps waiting.
 //
-const STOP_RETRY_TIMEOUT: Duration = Duration::from_secs(10);
-const STOP_RETRIES: u128 = STOP_RETRY_TIMEOUT.as_millis() / RETRY_INTERVAL.as_millis();
-const RETRY_INTERVAL: Duration = Duration::from_millis(100);
-const DOT_EVERY_RETRIES: u128 = 10;
-const NOTICE_AFTER_RETRIES: u128 = 50;
+const RETRY_UNTIL_SECS: u64 = 10;
+const RETRIES: u64 = (RETRY_UNTIL_SECS * 1000) / RETRY_INTERVAL_MILLIS;
+const RETRY_INTERVAL_MILLIS: u64 = 100;
+const DOT_EVERY_RETRIES: u64 = 10;
+const NOTICE_AFTER_RETRIES: u64 = 50;

 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
@@ -52,7 +52,6 @@ pub enum InitialPidFile {
 }

 /// Start a background child process using the parameters given.
-#[allow(clippy::too_many_arguments)]
 pub async fn start_process<F, Fut, AI, A, EI>(
    process_name: &str,
    datadir: &Path,
@@ -60,7 +59,6 @@ pub async fn start_process<F, Fut, AI, A, EI>(
    args: AI,
    envs: EI,
    initial_pid_file: InitialPidFile,
-    retry_timeout: &Duration,
    process_status_check: F,
 ) -> anyhow::Result<()>
 where
@@ -71,10 +69,6 @@ where
    // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
    EI: IntoIterator<Item = (String, String)>,
 {
-    let retries: u128 = retry_timeout.as_millis() / RETRY_INTERVAL.as_millis();
-    if !datadir.metadata().context("stat datadir")?.is_dir() {
-        anyhow::bail!("`datadir` must be a directory when calling this function: {datadir:?}");
-    }
    let log_path = datadir.join(format!("{process_name}.log"));
    let process_log_file = fs::OpenOptions::new()
        .create(true)
@@ -91,17 +85,8 @@ where
    let background_command = command
        .stdout(process_log_file)
        .stderr(same_file_for_stderr)
-        .args(args)
-        // spawn all child processes in their datadir, useful for all kinds of things,
-        // not least cleaning up child processes e.g. after an unclean exit from the test suite:
-        // ```
-        // lsof  -d cwd -a +D  Users/cs/src/neon/test_output
-        // ```
-        .current_dir(datadir);
-
-    let filled_cmd = fill_env_vars_prefixed_neon(fill_remote_storage_secrets_vars(
-        fill_rust_env_vars(background_command),
-    ));
+        .args(args);
+    let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
    filled_cmd.envs(envs);

    let pid_file_to_check = match &initial_pid_file {
@@ -133,7 +118,7 @@ where
        .unwrap();
    });

-    for retries in 0..retries {
+    for retries in 0..RETRIES {
        match process_started(pid, pid_file_to_check, &process_status_check).await {
            Ok(true) => {
                println!("\n{process_name} started and passed status check, pid: {pid}");
@@ -151,7 +136,7 @@ where
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                thread::sleep(RETRY_INTERVAL);
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("error starting process {process_name:?}: {e:#}");
@@ -160,10 +145,9 @@ where
        }
    }
    println!();
-    anyhow::bail!(format!(
-        "{} did not start+pass status checks within {:?} seconds",
-        process_name, retry_timeout
-    ));
+    anyhow::bail!(
+        "{process_name} did not start+pass status checks within {RETRY_UNTIL_SECS} seconds"
+    );
 }

 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
@@ -219,7 +203,7 @@ pub fn stop_process(
 }

 pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
-    for retries in 0..STOP_RETRIES {
+    for retries in 0..RETRIES {
        match process_has_stopped(pid) {
            Ok(true) => {
                println!("\n{process_name} stopped");
@@ -235,7 +219,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
                    print!(".");
                    io::stdout().flush().unwrap();
                }
-                thread::sleep(RETRY_INTERVAL);
+                thread::sleep(Duration::from_millis(RETRY_INTERVAL_MILLIS));
            }
            Err(e) => {
                println!("{process_name} with pid {pid} failed to stop: {e:#}");
@@ -244,10 +228,7 @@ pub fn wait_until_stopped(process_name: &str, pid: Pid) -> anyhow::Result<()> {
        }
    }
    println!();
-    anyhow::bail!(format!(
-        "{} with pid {} did not stop in {:?} seconds",
-        process_name, pid, STOP_RETRY_TIMEOUT
-    ));
+    anyhow::bail!("{process_name} with pid {pid} did not stop in {RETRY_UNTIL_SECS} seconds");
 }

 fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command {
@@ -287,15 +268,6 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command {
    cmd
 }

-fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command {
-    for (var, val) in std::env::vars() {
-        if var.starts_with("NEON_PAGESERVER_") {
-            cmd = cmd.env(var, val);
-        }
-    }
-    cmd
-}
-
 /// Add a `pre_exec` to the cmd that, inbetween fork() and exec(),
 /// 1. Claims a pidfile with a fcntl lock on it and
 /// 2. Sets up the pidfile's file descriptor so that it (and the lock)
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -9,21 +9,22 @@ use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command, ValueEnum};
 use compute_api::spec::ComputeMode;
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::local_env::{
-    InitForceMode, LocalEnv, NeonBroker, NeonLocalInitConf, NeonLocalInitPageserverConf,
-    SafekeeperConf,
-};
-use control_plane::pageserver::PageServerNode;
+use control_plane::local_env::{InitForceMode, LocalEnv};
+use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage_controller::StorageController;
 use control_plane::{broker, local_env};
-use pageserver_api::config::{
+use pageserver_api::controller_api::{
+    NodeAvailability, NodeConfigureRequest, NodeSchedulingPolicy, PlacementPolicy,
+};
+use pageserver_api::models::{
+    ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
+};
+use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
+use pageserver_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
 };
-use pageserver_api::controller_api::{PlacementPolicy, TenantCreateRequest};
-use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo};
-use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
 use safekeeper_api::{
@@ -34,7 +35,6 @@ use std::collections::{BTreeSet, HashMap};
 use std::path::PathBuf;
 use std::process::exit;
 use std::str::FromStr;
-use std::time::Duration;
 use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR;
 use url::Host;
 use utils::{
@@ -54,6 +54,44 @@ const DEFAULT_PG_VERSION: &str = "15";

 const DEFAULT_PAGESERVER_CONTROL_PLANE_API: &str = "http://127.0.0.1:1234/upcall/v1/";

+fn default_conf(num_pageservers: u16) -> String {
+    let mut template = format!(
+        r#"
+# Default built-in configuration, defined in main.rs
+control_plane_api = '{DEFAULT_PAGESERVER_CONTROL_PLANE_API}'
+
+[broker]
+listen_addr = '{DEFAULT_BROKER_ADDR}'
+
+[[safekeepers]]
+id = {DEFAULT_SAFEKEEPER_ID}
+pg_port = {DEFAULT_SAFEKEEPER_PG_PORT}
+http_port = {DEFAULT_SAFEKEEPER_HTTP_PORT}
+
+"#,
+    );
+
+    for i in 0..num_pageservers {
+        let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
+        let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
+        let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
+
+        template += &format!(
+            r#"
+[[pageservers]]
+id = {pageserver_id}
+listen_pg_addr = '127.0.0.1:{pg_port}'
+listen_http_addr = '127.0.0.1:{http_port}'
+pg_auth_type = '{trust_auth}'
+http_auth_type = '{trust_auth}'
+"#,
+            trust_auth = AuthType::Trust,
+        )
+    }
+
+    template
+}
+
 ///
 /// Timelines tree element used as a value in the HashMap.
 ///
@@ -86,8 +124,7 @@ fn main() -> Result<()> {
        handle_init(sub_args).map(Some)
    } else {
        // all other commands need an existing config
-        let mut env =
-            LocalEnv::load_config(&local_env::base_path()).context("Error loading config")?;
+        let mut env = LocalEnv::load_config().context("Error loading config")?;
        let original_env = env.clone();

        let rt = tokio::runtime::Builder::new_current_thread()
@@ -98,7 +135,7 @@ fn main() -> Result<()> {
        let subcommand_result = match sub_name {
            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(&env, get_start_timeout(sub_args))),
+            "start" => rt.block_on(handle_start_all(sub_args, &env)),
            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
@@ -117,7 +154,7 @@ fn main() -> Result<()> {
    };

    match subcommand_result {
-        Ok(Some(updated_env)) => updated_env.persist_config()?,
+        Ok(Some(updated_env)) => updated_env.persist_config(&updated_env.base_data_dir)?,
        Ok(None) => (),
        Err(e) => {
            eprintln!("command failed: {e:?}");
@@ -306,66 +343,48 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
 }

 fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
-    let num_pageservers = init_match.get_one::<u16>("num-pageservers");
-
-    let force = init_match.get_one("force").expect("we set a default value");
-
-    // Create the in-memory `LocalEnv` that we'd normally load from disk in `load_config`.
-    let init_conf: NeonLocalInitConf = if let Some(config_path) =
-        init_match.get_one::<PathBuf>("config")
-    {
-        // User (likely the Python test suite) provided a description of the environment.
-        if num_pageservers.is_some() {
-            bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead");
-        }
+    let num_pageservers = init_match
+        .get_one::<u16>("num-pageservers")
+        .expect("num-pageservers arg has a default");
+    // Create config file
+    let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
        // load and parse the file
-        let contents = std::fs::read_to_string(config_path).with_context(|| {
+        std::fs::read_to_string(config_path).with_context(|| {
            format!(
                "Could not read configuration file '{}'",
                config_path.display()
            )
-        })?;
-        toml_edit::de::from_str(&contents)?
+        })?
    } else {
-        // User (likely interactive) did not provide a description of the environment, give them the default
-        NeonLocalInitConf {
-            control_plane_api: Some(Some(DEFAULT_PAGESERVER_CONTROL_PLANE_API.parse().unwrap())),
-            broker: NeonBroker {
-                listen_addr: DEFAULT_BROKER_ADDR.parse().unwrap(),
-            },
-            safekeepers: vec![SafekeeperConf {
-                id: DEFAULT_SAFEKEEPER_ID,
-                pg_port: DEFAULT_SAFEKEEPER_PG_PORT,
-                http_port: DEFAULT_SAFEKEEPER_HTTP_PORT,
-                ..Default::default()
-            }],
-            pageservers: (0..num_pageservers.copied().unwrap_or(1))
-                .map(|i| {
-                    let pageserver_id = NodeId(DEFAULT_PAGESERVER_ID.0 + i as u64);
-                    let pg_port = DEFAULT_PAGESERVER_PG_PORT + i;
-                    let http_port = DEFAULT_PAGESERVER_HTTP_PORT + i;
-                    NeonLocalInitPageserverConf {
-                        id: pageserver_id,
-                        listen_pg_addr: format!("127.0.0.1:{pg_port}"),
-                        listen_http_addr: format!("127.0.0.1:{http_port}"),
-                        pg_auth_type: AuthType::Trust,
-                        http_auth_type: AuthType::Trust,
-                        other: Default::default(),
-                    }
-                })
-                .collect(),
-            pg_distrib_dir: None,
-            neon_distrib_dir: None,
-            default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)),
-            storage_controller: None,
-            control_plane_compute_hook_api: None,
-        }
+        // Built-in default config
+        default_conf(*num_pageservers)
    };

-    LocalEnv::init(init_conf, force)
-        .context("materialize initial neon_local environment on disk")?;
-    Ok(LocalEnv::load_config(&local_env::base_path())
-        .expect("freshly written config should be loadable"))
+    let pg_version = init_match
+        .get_one::<u32>("pg-version")
+        .copied()
+        .context("Failed to parse postgres version from the argument string")?;
+
+    let mut env =
+        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
+    let force = init_match.get_one("force").expect("we set a default value");
+    env.init(pg_version, force)
+        .context("Failed to initialize neon repository")?;
+
+    // Create remote storage location for default LocalFs remote storage
+    std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
+
+    // Initialize pageserver, create initial tenant and timeline.
+    for ps_conf in &env.pageservers {
+        PageServerNode::from_env(&env, ps_conf)
+            .initialize(&pageserver_config_overrides(init_match))
+            .unwrap_or_else(|e| {
+                eprintln!("pageserver init failed: {e:?}");
+                exit(1);
+            });
+    }
+
+    Ok(env)
 }

 /// The default pageserver is the one where CLI tenant/timeline operations are sent by default.
@@ -380,6 +399,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
    PageServerNode::from_env(env, ps_conf)
 }

+fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
+    init_match
+        .get_many::<String>("pageserver-config-override")
+        .into_iter()
+        .flatten()
+        .map(String::as_str)
+        .collect()
+}
+
 async fn handle_tenant(
    tenant_match: &ArgMatches,
    env: &mut local_env::LocalEnv,
@@ -391,54 +419,6 @@ async fn handle_tenant(
                println!("{} {:?}", t.id, t.state);
            }
        }
-        Some(("import", import_match)) => {
-            let tenant_id = parse_tenant_id(import_match)?.unwrap_or_else(TenantId::generate);
-
-            let storage_controller = StorageController::from_env(env);
-            let create_response = storage_controller.tenant_import(tenant_id).await?;
-
-            let shard_zero = create_response
-                .shards
-                .first()
-                .expect("Import response omitted shards");
-
-            let attached_pageserver_id = shard_zero.node_id;
-            let pageserver =
-                PageServerNode::from_env(env, env.get_pageserver_conf(attached_pageserver_id)?);
-
-            println!(
-                "Imported tenant {tenant_id}, attached to pageserver {attached_pageserver_id}"
-            );
-
-            let timelines = pageserver
-                .http_client
-                .list_timelines(shard_zero.shard_id)
-                .await?;
-
-            // Pick a 'main' timeline that has no ancestors, the rest will get arbitrary names
-            let main_timeline = timelines
-                .iter()
-                .find(|t| t.ancestor_timeline_id.is_none())
-                .expect("No timelines found")
-                .timeline_id;
-
-            let mut branch_i = 0;
-            for timeline in timelines.iter() {
-                let branch_name = if timeline.timeline_id == main_timeline {
-                    "main".to_string()
-                } else {
-                    branch_i += 1;
-                    format!("branch_{branch_i}")
-                };
-
-                println!(
-                    "Importing timeline {tenant_id}/{} as branch {branch_name}",
-                    timeline.timeline_id
-                );
-
-                env.register_branch_mapping(branch_name, tenant_id, timeline.timeline_id)?;
-            }
-        }
        Some(("create", create_match)) => {
            let tenant_conf: HashMap<_, _> = create_match
                .get_many::<String>("config")
@@ -598,9 +578,13 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
        Some(("import", import_match)) => {
            let tenant_id = get_tenant_id(import_match, env)?;
            let timeline_id = parse_timeline_id(import_match)?.expect("No timeline id provided");
-            let branch_name = import_match
-                .get_one::<String>("branch-name")
-                .ok_or_else(|| anyhow!("No branch name provided"))?;
+            let name = import_match
+                .get_one::<String>("node-name")
+                .ok_or_else(|| anyhow!("No node name provided"))?;
+            let update_catalog = import_match
+                .get_one::<bool>("update-catalog")
+                .cloned()
+                .unwrap_or_default();

            // Parse base inputs
            let base_tarfile = import_match
@@ -627,11 +611,24 @@ async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::Local
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

+            let mut cplane = ComputeControlPlane::load(env.clone())?;
            println!("Importing timeline into pageserver ...");
            pageserver
                .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
                .await?;
-            env.register_branch_mapping(branch_name.to_string(), tenant_id, timeline_id)?;
+            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
+
+            println!("Creating endpoint for imported timeline ...");
+            cplane.new_endpoint(
+                name,
+                tenant_id,
+                timeline_id,
+                None,
+                None,
+                pg_version,
+                ComputeMode::Primary,
+                !update_catalog,
+            )?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
@@ -794,8 +791,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .copied()
                .unwrap_or(false);

-            let allow_multiple = sub_args.get_flag("allow-multiple");
-
            let mode = match (lsn, hot_standby) {
                (Some(lsn), false) => ComputeMode::Static(lsn),
                (None, true) => ComputeMode::Replica,
@@ -813,9 +808,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                _ => {}
            }

-            if !allow_multiple {
-                cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
-            }
+            cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;

            cplane.new_endpoint(
                &endpoint_id,
@@ -844,15 +837,20 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re

            let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");

-            let allow_multiple = sub_args.get_flag("allow-multiple");
-
-            // If --safekeepers argument is given, use only the listed
-            // safekeeper nodes; otherwise all from the env.
-            let safekeepers = if let Some(safekeepers) = parse_safekeepers(sub_args)? {
-                safekeepers
-            } else {
-                env.safekeepers.iter().map(|sk| sk.id).collect()
-            };
+            // If --safekeepers argument is given, use only the listed safekeeper nodes.
+            let safekeepers =
+                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
+                    let mut safekeepers: Vec<NodeId> = Vec::new();
+                    for sk_id in safekeepers_str.split(',').map(str::trim) {
+                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
+                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
+                        })?);
+                        safekeepers.push(sk_id);
+                    }
+                    safekeepers
+                } else {
+                    env.safekeepers.iter().map(|sk| sk.id).collect()
+                };

            let endpoint = cplane
                .endpoints
@@ -864,13 +862,11 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .cloned()
                .unwrap_or_default();

-            if !allow_multiple {
-                cplane.check_conflicting_endpoints(
-                    endpoint.mode,
-                    endpoint.tenant_id,
-                    endpoint.timeline_id,
-                )?;
-            }
+            cplane.check_conflicting_endpoints(
+                endpoint.mode,
+                endpoint.tenant_id,
+                endpoint.timeline_id,
+            )?;

            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
@@ -956,10 +952,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                        })
                        .collect::<Vec<_>>()
                };
-            // If --safekeepers argument is given, use only the listed
-            // safekeeper nodes; otherwise all from the env.
-            let safekeepers = parse_safekeepers(sub_args)?;
-            endpoint.reconfigure(pageservers, None, safekeepers).await?;
+            endpoint.reconfigure(pageservers, None).await?;
        }
        "stop" => {
            let endpoint_id = sub_args
@@ -981,23 +974,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
    Ok(())
 }

-/// Parse --safekeepers as list of safekeeper ids.
-fn parse_safekeepers(sub_args: &ArgMatches) -> Result<Option<Vec<NodeId>>> {
-    if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
-        let mut safekeepers: Vec<NodeId> = Vec::new();
-        for sk_id in safekeepers_str.split(',').map(str::trim) {
-            let sk_id = NodeId(
-                u64::from_str(sk_id)
-                    .map_err(|_| anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list"))?,
-            );
-            safekeepers.push(sk_id);
-        }
-        Ok(Some(safekeepers))
-    } else {
-        Ok(None)
-    }
-}
-
 fn handle_mappings(sub_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
    let (sub_name, sub_args) = match sub_match.subcommand() {
        Some(ep_subcommand_data) => ep_subcommand_data,
@@ -1043,18 +1019,11 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
    ))
 }

-fn get_start_timeout(args: &ArgMatches) -> &Duration {
-    let humantime_duration = args
-        .get_one::<humantime::Duration>("start-timeout")
-        .expect("invalid value for start-timeout");
-    humantime_duration.as_ref()
-}
-
 async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
            if let Err(e) = get_pageserver(env, subcommand_args)?
-                .start(get_start_timeout(subcommand_args))
+                .start(&pageserver_config_overrides(subcommand_args))
                .await
            {
                eprintln!("pageserver start failed: {e}");
@@ -1082,12 +1051,30 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }

-            if let Err(e) = pageserver.start(get_start_timeout(sub_match)).await {
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
        }

+        Some(("set-state", subcommand_args)) => {
+            let pageserver = get_pageserver(env, subcommand_args)?;
+            let scheduling = subcommand_args.get_one("scheduling");
+            let availability = subcommand_args.get_one("availability");
+
+            let storage_controller = StorageController::from_env(env);
+            storage_controller
+                .node_configure(NodeConfigureRequest {
+                    node_id: pageserver.conf.id,
+                    scheduling: scheduling.cloned(),
+                    availability: availability.cloned(),
+                })
+                .await?;
+        }
+
        Some(("status", subcommand_args)) => {
            match get_pageserver(env, subcommand_args)?.check_status().await {
                Ok(_) => println!("Page server is up and running"),
@@ -1110,8 +1097,8 @@ async fn handle_storage_controller(
 ) -> Result<()> {
    let svc = StorageController::from_env(env);
    match sub_match.subcommand() {
-        Some(("start", start_match)) => {
-            if let Err(e) = svc.start(get_start_timeout(start_match)).await {
+        Some(("start", _start_match)) => {
+            if let Err(e) = svc.start().await {
                eprintln!("start failed: {e}");
                exit(1);
            }
@@ -1170,10 +1157,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
        "start" => {
            let extra_opts = safekeeper_extra_opts(sub_args);

-            if let Err(e) = safekeeper
-                .start(extra_opts, get_start_timeout(sub_args))
-                .await
-            {
+            if let Err(e) = safekeeper.start(extra_opts).await {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1199,10 +1183,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
            }

            let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper
-                .start(extra_opts, get_start_timeout(sub_args))
-                .await
-            {
+            if let Err(e) = safekeeper.start(extra_opts).await {
                eprintln!("safekeeper start failed: {}", e);
                exit(1);
            }
@@ -1215,18 +1196,15 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_start_all(
-    env: &local_env::LocalEnv,
-    retry_timeout: &Duration,
-) -> anyhow::Result<()> {
+async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
    // Endpoints are not started automatically

-    broker::start_broker_process(env, retry_timeout).await?;
+    broker::start_broker_process(env).await?;

    // Only start the storage controller if the pageserver is configured to need it
    if env.control_plane_api.is_some() {
        let storage_controller = StorageController::from_env(env);
-        if let Err(e) = storage_controller.start(retry_timeout).await {
+        if let Err(e) = storage_controller.start().await {
            eprintln!("storage_controller start failed: {:#}", e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1235,7 +1213,10 @@ async fn handle_start_all(

    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start(retry_timeout).await {
+        if let Err(e) = pageserver
+            .start(&pageserver_config_overrides(sub_match))
+            .await
+        {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1244,7 +1225,7 @@ async fn handle_start_all(

    for node in env.safekeepers.iter() {
        let safekeeper = SafekeeperNode::from_env(env, node);
-        if let Err(e) = safekeeper.start(vec![], retry_timeout).await {
+        if let Err(e) = safekeeper.start(vec![]).await {
            eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
            try_stop_all(env, false).await;
            exit(1);
@@ -1267,7 +1248,7 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
    match ComputeControlPlane::load(env.clone()) {
        Ok(cplane) => {
            for (_k, node) in cplane.endpoints {
-                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast" }, false) {
+                if let Err(e) = node.stop(if immediate { "immediate" } else { "fast " }, false) {
                    eprintln!("postgres stop failed: {e:#}");
                }
            }
@@ -1304,15 +1285,6 @@ async fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
 }

 fn cli() -> Command {
-    let timeout_arg = Arg::new("start-timeout")
-        .long("start-timeout")
-        .short('t')
-        .global(true)
-        .help("timeout until we fail the command, e.g. 30s")
-        .value_parser(value_parser!(humantime::Duration))
-        .default_value("10s")
-        .required(false);
-
    let branch_name_arg = Arg::new("branch-name")
        .long("branch-name")
        .help("Name of the branch to be created or used as an alias for other services")
@@ -1385,6 +1357,13 @@ fn cli() -> Command {
        .required(false)
        .value_name("stop-mode");

+    let pageserver_config_args = Arg::new("pageserver-config-override")
+        .long("pageserver-config-override")
+        .num_args(1)
+        .action(ArgAction::Append)
+        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
+        .required(false);
+
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
@@ -1418,7 +1397,9 @@ fn cli() -> Command {
    let num_pageservers_arg = Arg::new("num-pageservers")
        .value_parser(value_parser!(u16))
        .long("num-pageservers")
-        .help("How many pageservers to create (default 1)");
+        .help("How many pageservers to create (default 1)")
+        .required(false)
+        .default_value("1");

    let update_catalog = Arg::new("update-catalog")
        .value_parser(value_parser!(bool))
@@ -1432,25 +1413,20 @@ fn cli() -> Command {
        .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
        .required(false);

-    let allow_multiple = Arg::new("allow-multiple")
-        .help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
-        .long("allow-multiple")
-        .action(ArgAction::SetTrue)
-        .required(false);
-
    Command::new("Neon CLI")
        .arg_required_else_help(true)
        .version(GIT_VERSION)
        .subcommand(
            Command::new("init")
                .about("Initialize a new Neon repository, preparing configs for services to start with")
+                .arg(pageserver_config_args.clone())
                .arg(num_pageservers_arg.clone())
                .arg(
                    Arg::new("config")
                        .long("config")
                        .required(false)
                        .value_parser(value_parser!(PathBuf))
-                        .value_name("config")
+                        .value_name("config"),
                )
                .arg(pg_version_arg.clone())
                .arg(force_arg)
@@ -1458,7 +1434,6 @@ fn cli() -> Command {
        .subcommand(
            Command::new("timeline")
            .about("Manage timelines")
-            .arg_required_else_help(true)
            .subcommand(Command::new("list")
                .about("List all timelines, available to this pageserver")
                .arg(tenant_id_arg.clone()))
@@ -1481,7 +1456,8 @@ fn cli() -> Command {
                .about("Import timeline from basebackup directory")
                .arg(tenant_id_arg.clone())
                .arg(timeline_id_arg.clone())
-                .arg(branch_name_arg.clone())
+                .arg(Arg::new("node-name").long("node-name")
+                    .help("Name to assign to the imported timeline"))
                .arg(Arg::new("base-tarfile")
                    .long("base-tarfile")
                    .value_parser(value_parser!(PathBuf))
@@ -1497,6 +1473,7 @@ fn cli() -> Command {
                .arg(Arg::new("end-lsn").long("end-lsn")
                    .help("Lsn the basebackup ends at"))
                .arg(pg_version_arg.clone())
+                .arg(update_catalog.clone())
            )
        ).subcommand(
            Command::new("tenant")
@@ -1519,8 +1496,6 @@ fn cli() -> Command {
            .subcommand(Command::new("config")
                .arg(tenant_id_arg.clone())
                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
-            .subcommand(Command::new("import").arg(tenant_id_arg.clone().required(true))
-                .about("Import a tenant that is present in remote storage, and create branches for its timelines"))
        )
        .subcommand(
            Command::new("pageserver")
@@ -1530,7 +1505,7 @@ fn cli() -> Command {
                .subcommand(Command::new("status"))
                .subcommand(Command::new("start")
                    .about("Start local pageserver")
-                    .arg(timeout_arg.clone())
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("stop")
                    .about("Stop local pageserver")
@@ -1538,16 +1513,21 @@ fn cli() -> Command {
                )
                .subcommand(Command::new("restart")
                    .about("Restart local pageserver")
-                    .arg(timeout_arg.clone())
+                    .arg(pageserver_config_args.clone())
+                )
+                .subcommand(Command::new("set-state")
+                    .arg(Arg::new("availability").value_parser(value_parser!(NodeAvailability)).long("availability").action(ArgAction::Set).help("Availability state: offline,active"))
+                    .arg(Arg::new("scheduling").value_parser(value_parser!(NodeSchedulingPolicy)).long("scheduling").action(ArgAction::Set).help("Scheduling state: draining,pause,filling,active"))
+                    .about("Set scheduling or availability state of pageserver node")
+                    .arg(pageserver_config_args.clone())
                )
        )
        .subcommand(
            Command::new("storage_controller")
                .arg_required_else_help(true)
                .about("Manage storage_controller")
-                .subcommand(Command::new("start").about("Start storage controller")
-                            .arg(timeout_arg.clone()))
-                .subcommand(Command::new("stop").about("Stop storage controller")
+                .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
+                .subcommand(Command::new("stop").about("Stop local pageserver")
                            .arg(stop_mode_arg.clone()))
        )
        .subcommand(
@@ -1558,7 +1538,6 @@ fn cli() -> Command {
                            .about("Start local safekeeper")
                            .arg(safekeeper_id_arg.clone())
                            .arg(safekeeper_extra_opt_arg.clone())
-                            .arg(timeout_arg.clone())
                )
                .subcommand(Command::new("stop")
                            .about("Stop local safekeeper")
@@ -1570,7 +1549,6 @@ fn cli() -> Command {
                            .arg(safekeeper_id_arg)
                            .arg(stop_mode_arg.clone())
                            .arg(safekeeper_extra_opt_arg)
-                            .arg(timeout_arg.clone())
                )
        )
        .subcommand(
@@ -1595,22 +1573,18 @@ fn cli() -> Command {
                    .arg(pg_version_arg.clone())
                    .arg(hot_standby_arg.clone())
                    .arg(update_catalog)
-                    .arg(allow_multiple.clone())
                )
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
                    .arg(endpoint_id_arg.clone())
                    .arg(endpoint_pageserver_id_arg.clone())
-                    .arg(safekeepers_arg.clone())
+                    .arg(safekeepers_arg)
                    .arg(remote_ext_config_args)
                    .arg(create_test_user)
-                    .arg(allow_multiple.clone())
-                    .arg(timeout_arg.clone())
                )
                .subcommand(Command::new("reconfigure")
                            .about("Reconfigure the endpoint")
                            .arg(endpoint_pageserver_id_arg)
-                            .arg(safekeepers_arg)
                            .arg(endpoint_id_arg.clone())
                            .arg(tenant_id_arg.clone())
                )
@@ -1658,7 +1632,7 @@ fn cli() -> Command {
        .subcommand(
            Command::new("start")
                .about("Start page server and safekeepers")
-                .arg(timeout_arg.clone())
+                .arg(pageserver_config_args)
        )
        .subcommand(
            Command::new("stop")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,22 +1,17 @@
 //! Code to manage the storage broker
 //!
-//! In the local test environment, the storage broker stores its data directly in
+//! In the local test environment, the data for each safekeeper is stored in
 //!
 //! ```text
-//!   .neon
+//!   .neon/safekeepers/<safekeeper id>
 //! ```
-use std::time::Duration;
-
 use anyhow::Context;

 use camino::Utf8PathBuf;

 use crate::{background_process, local_env};

-pub async fn start_broker_process(
-    env: &local_env::LocalEnv,
-    retry_timeout: &Duration,
-) -> anyhow::Result<()> {
+pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
    let broker = &env.broker;
    let listen_addr = &broker.listen_addr;

@@ -32,7 +27,6 @@ pub async fn start_broker_process(
        args,
        [],
        background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
-        retry_timeout,
        || async {
            let url = broker.client_url();
            let status_url = url.join("status").with_context(|| {
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -499,23 +499,6 @@ impl Endpoint {
            .join(",")
    }

-    /// Map safekeepers ids to the actual connection strings.
-    fn build_safekeepers_connstrs(&self, sk_ids: Vec<NodeId>) -> Result<Vec<String>> {
-        let mut safekeeper_connstrings = Vec::new();
-        if self.mode == ComputeMode::Primary {
-            for sk_id in sk_ids {
-                let sk = self
-                    .env
-                    .safekeepers
-                    .iter()
-                    .find(|node| node.id == sk_id)
-                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
-                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
-            }
-        }
-        Ok(safekeeper_connstrings)
-    }
-
    pub async fn start(
        &self,
        auth_token: &Option<String>,
@@ -540,7 +523,18 @@ impl Endpoint {
        let pageserver_connstring = Self::build_pageserver_connstr(&pageservers);
        assert!(!pageserver_connstring.is_empty());

-        let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
+        let mut safekeeper_connstrings = Vec::new();
+        if self.mode == ComputeMode::Primary {
+            for sk_id in safekeepers {
+                let sk = self
+                    .env
+                    .safekeepers
+                    .iter()
+                    .find(|node| node.id == sk_id)
+                    .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
+                safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.get_compute_port()));
+            }
+        }

        // check for file remote_extensions_spec.json
        // if it is present, read it and pass to compute_ctl
@@ -560,7 +554,6 @@ impl Endpoint {
            format_version: 1.0,
            operation_uuid: None,
            features: self.features.clone(),
-            swap_size_bytes: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
@@ -598,6 +591,7 @@ impl Endpoint {
            remote_extensions,
            pgbouncer_settings: None,
            shard_stripe_size: Some(shard_stripe_size),
+            primary_is_running: None,
        };
        let spec_path = self.endpoint_path().join("spec.json");
        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
@@ -746,7 +740,6 @@ impl Endpoint {
        &self,
        mut pageservers: Vec<(Host, u16)>,
        stripe_size: Option<ShardStripeSize>,
-        safekeepers: Option<Vec<NodeId>>,
    ) -> Result<()> {
        let mut spec: ComputeSpec = {
            let spec_path = self.endpoint_path().join("spec.json");
@@ -781,12 +774,6 @@ impl Endpoint {
            spec.shard_stripe_size = stripe_size.map(|s| s.0 as usize);
        }

-        // If safekeepers are not specified, don't change them.
-        if let Some(safekeepers) = safekeepers {
-            let safekeeper_connstrings = self.build_safekeepers_connstrs(safekeepers)?;
-            spec.safekeeper_connstrings = safekeeper_connstrings;
-        }
-
        let client = reqwest::Client::builder()
            .timeout(Duration::from_secs(30))
            .build()
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -3,7 +3,7 @@
 //! Now it also provides init method which acts like a stub for proper installation
 //! script which will use local paths.

-use anyhow::{bail, Context};
+use anyhow::{bail, ensure, Context};

 use clap::ValueEnum;
 use postgres_backend::AuthType;
@@ -17,14 +17,11 @@ use std::net::Ipv4Addr;
 use std::net::SocketAddr;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use std::time::Duration;
 use utils::{
    auth::{encode_from_key_file, Claims},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
 };

-use crate::pageserver::PageServerNode;
-use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR;
 use crate::safekeeper::SafekeeperNode;

 pub const DEFAULT_PG_VERSION: u32 = 15;
@@ -36,107 +33,63 @@ pub const DEFAULT_PG_VERSION: u32 = 15;
 // to 'neon_local init --config=<path>' option. See control_plane/simple.conf for
 // an example.
 //
-#[derive(PartialEq, Eq, Clone, Debug)]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
    // compute endpoints).
    //
    // This is not stored in the config file. Rather, this is the path where the
-    // config file itself is. It is read from the NEON_REPO_DIR env variable which
-    // must be an absolute path. If the env var is not set, $PWD/.neon is used.
+    // config file itself is. It is read from the NEON_REPO_DIR env variable or
+    // '.neon' if not given.
+    #[serde(skip)]
    pub base_data_dir: PathBuf,

    // Path to postgres distribution. It's expected that "bin", "include",
    // "lib", "share" from postgres distribution are there. If at some point
    // in time we will be able to run against vanilla postgres we may split that
    // to four separate paths and match OS-specific installation layout.
+    #[serde(default)]
    pub pg_distrib_dir: PathBuf,

    // Path to pageserver binary.
+    #[serde(default)]
    pub neon_distrib_dir: PathBuf,

    // Default tenant ID to use with the 'neon_local' command line utility, when
    // --tenant_id is not explicitly specified.
+    #[serde(default)]
    pub default_tenant_id: Option<TenantId>,

    // used to issue tokens during e.g pg start
+    #[serde(default)]
    pub private_key_path: PathBuf,

    pub broker: NeonBroker,

-    // Configuration for the storage controller (1 per neon_local environment)
-    pub storage_controller: NeonStorageControllerConf,
-
    /// This Vec must always contain at least one pageserver
-    /// Populdated by [`Self::load_config`] from the individual `pageserver.toml`s.
-    /// NB: not used anymore except for informing users that they need to change their `.neon/config`.
    pub pageservers: Vec<PageServerConf>,

+    #[serde(default)]
    pub safekeepers: Vec<SafekeeperConf>,

    // Control plane upcall API for pageserver: if None, we will not run storage_controller  If set, this will
    // be propagated into each pageserver's configuration.
+    #[serde(default)]
    pub control_plane_api: Option<Url>,

    // Control plane upcall API for storage controller.  If set, this will be propagated into the
    // storage controller's configuration.
+    #[serde(default)]
    pub control_plane_compute_hook_api: Option<Url>,

    /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user.
+    #[serde(default)]
    // A `HashMap<String, HashMap<TenantId, TimelineId>>` would be more appropriate here,
    // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error.
    // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table".
-    pub branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
-}
-
-/// On-disk state stored in `.neon/config`.
-#[derive(PartialEq, Eq, Clone, Debug, Default, Serialize, Deserialize)]
-#[serde(default, deny_unknown_fields)]
-pub struct OnDiskConfig {
-    pub pg_distrib_dir: PathBuf,
-    pub neon_distrib_dir: PathBuf,
-    pub default_tenant_id: Option<TenantId>,
-    pub private_key_path: PathBuf,
-    pub broker: NeonBroker,
-    pub storage_controller: NeonStorageControllerConf,
-    #[serde(
-        skip_serializing,
-        deserialize_with = "fail_if_pageservers_field_specified"
-    )]
-    pub pageservers: Vec<PageServerConf>,
-    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Url>,
-    pub control_plane_compute_hook_api: Option<Url>,
    branch_name_mappings: HashMap<String, Vec<(TenantId, TimelineId)>>,
 }

-fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result<Vec<PageServerConf>, D::Error>
-where
-    D: serde::Deserializer<'de>,
-{
-    Err(serde::de::Error::custom(
-        "The 'pageservers' field is no longer used; pageserver.toml is now authoritative; \
-         Please remove the `pageservers` from your .neon/config.",
-    ))
-}
-
-/// The description of the neon_local env to be initialized by `neon_local init --config`.
-#[derive(Clone, Debug, Deserialize)]
-#[serde(deny_unknown_fields)]
-pub struct NeonLocalInitConf {
-    // TODO: do we need this? Seems unused
-    pub pg_distrib_dir: Option<PathBuf>,
-    // TODO: do we need this? Seems unused
-    pub neon_distrib_dir: Option<PathBuf>,
-    pub default_tenant_id: TenantId,
-    pub broker: NeonBroker,
-    pub storage_controller: Option<NeonStorageControllerConf>,
-    pub pageservers: Vec<NeonLocalInitPageserverConf>,
-    pub safekeepers: Vec<SafekeeperConf>,
-    pub control_plane_api: Option<Option<Url>>,
-    pub control_plane_compute_hook_api: Option<Option<Url>>,
-}
-
 /// Broker config for cluster internal communication.
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default)]
@@ -145,33 +98,6 @@ pub struct NeonBroker {
    pub listen_addr: SocketAddr,
 }

-/// Broker config for cluster internal communication.
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-#[serde(default)]
-pub struct NeonStorageControllerConf {
-    /// Heartbeat timeout before marking a node offline
-    #[serde(with = "humantime_serde")]
-    pub max_unavailable: Duration,
-
-    /// Threshold for auto-splitting a tenant into shards
-    pub split_threshold: Option<u64>,
-}
-
-impl NeonStorageControllerConf {
-    // Use a shorter pageserver unavailability interval than the default to speed up tests.
-    const DEFAULT_MAX_UNAVAILABLE_INTERVAL: std::time::Duration =
-        std::time::Duration::from_secs(10);
-}
-
-impl Default for NeonStorageControllerConf {
-    fn default() -> Self {
-        Self {
-            max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
-            split_threshold: None,
-        }
-    }
-}
-
 // Dummy Default impl to satisfy Deserialize derive.
 impl Default for NeonBroker {
    fn default() -> Self {
@@ -187,18 +113,22 @@ impl NeonBroker {
    }
 }

-// neon_local needs to know this subset of pageserver configuration.
-// For legacy reasons, this information is duplicated from `pageserver.toml` into `.neon/config`.
-// It can get stale if `pageserver.toml` is changed.
-// TODO(christian): don't store this at all in `.neon/config`, always load it from `pageserver.toml`
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 #[serde(default, deny_unknown_fields)]
 pub struct PageServerConf {
+    // node id
    pub id: NodeId,
+
+    // Pageserver connection settings
    pub listen_pg_addr: String,
    pub listen_http_addr: String,
+
+    // auth type used for the PG and HTTP ports
    pub pg_auth_type: AuthType,
    pub http_auth_type: AuthType,
+
+    pub(crate) virtual_file_io_engine: Option<String>,
+    pub(crate) get_vectored_impl: Option<String>,
 }

 impl Default for PageServerConf {
@@ -209,40 +139,8 @@ impl Default for PageServerConf {
            listen_http_addr: String::new(),
            pg_auth_type: AuthType::Trust,
            http_auth_type: AuthType::Trust,
-        }
-    }
-}
-
-/// The toml that can be passed to `neon_local init --config`.
-/// This is a subset of the `pageserver.toml` configuration.
-// TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
-#[derive(Clone, Debug, serde::Deserialize, serde::Serialize)]
-pub struct NeonLocalInitPageserverConf {
-    pub id: NodeId,
-    pub listen_pg_addr: String,
-    pub listen_http_addr: String,
-    pub pg_auth_type: AuthType,
-    pub http_auth_type: AuthType,
-    #[serde(flatten)]
-    pub other: HashMap<String, toml::Value>,
-}
-
-impl From<&NeonLocalInitPageserverConf> for PageServerConf {
-    fn from(conf: &NeonLocalInitPageserverConf) -> Self {
-        let NeonLocalInitPageserverConf {
-            id,
-            listen_pg_addr,
-            listen_http_addr,
-            pg_auth_type,
-            http_auth_type,
-            other: _,
-        } = conf;
-        Self {
-            id: *id,
-            listen_pg_addr: listen_pg_addr.clone(),
-            listen_http_addr: listen_http_addr.clone(),
-            pg_auth_type: *pg_auth_type,
-            http_auth_type: *http_auth_type,
+            virtual_file_io_engine: None,
+            get_vectored_impl: None,
        }
    }
 }
@@ -258,7 +156,6 @@ pub struct SafekeeperConf {
    pub remote_storage: Option<String>,
    pub backup_threads: Option<u32>,
    pub auth_enabled: bool,
-    pub listen_addr: Option<String>,
 }

 impl Default for SafekeeperConf {
@@ -272,7 +169,6 @@ impl Default for SafekeeperConf {
            remote_storage: None,
            backup_threads: None,
            auth_enabled: false,
-            listen_addr: None,
        }
    }
 }
@@ -325,16 +221,11 @@ impl LocalEnv {
        }
    }

-    pub fn pg_dir(&self, pg_version: u32, dir_name: &str) -> anyhow::Result<PathBuf> {
-        Ok(self.pg_distrib_dir(pg_version)?.join(dir_name))
-    }
-
    pub fn pg_bin_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
-        self.pg_dir(pg_version, "bin")
+        Ok(self.pg_distrib_dir(pg_version)?.join("bin"))
    }
-
    pub fn pg_lib_dir(&self, pg_version: u32) -> anyhow::Result<PathBuf> {
-        self.pg_dir(pg_version, "lib")
+        Ok(self.pg_distrib_dir(pg_version)?.join("lib"))
    }

    pub fn pageserver_bin(&self) -> PathBuf {
@@ -435,8 +326,44 @@ impl LocalEnv {
            .collect()
    }

-    ///  Construct `Self` from on-disk state.
-    pub fn load_config(repopath: &Path) -> anyhow::Result<Self> {
+    /// Create a LocalEnv from a config file.
+    ///
+    /// Unlike 'load_config', this function fills in any defaults that are missing
+    /// from the config file.
+    pub fn parse_config(toml: &str) -> anyhow::Result<Self> {
+        let mut env: LocalEnv = toml::from_str(toml)?;
+
+        // Find postgres binaries.
+        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
+        // Note that later in the code we assume, that distrib dirs follow the same pattern
+        // for all postgres versions.
+        if env.pg_distrib_dir == Path::new("") {
+            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
+                env.pg_distrib_dir = postgres_bin.into();
+            } else {
+                let cwd = env::current_dir()?;
+                env.pg_distrib_dir = cwd.join("pg_install")
+            }
+        }
+
+        // Find neon binaries.
+        if env.neon_distrib_dir == Path::new("") {
+            env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
+        }
+
+        if env.pageservers.is_empty() {
+            anyhow::bail!("Configuration must contain at least one pageserver");
+        }
+
+        env.base_data_dir = base_path();
+
+        Ok(env)
+    }
+
+    /// Locate and load config
+    pub fn load_config() -> anyhow::Result<Self> {
+        let repopath = base_path();
+
        if !repopath.exists() {
            bail!(
                "Neon config is not found in {}. You need to run 'neon_local init' first",
@@ -447,129 +374,38 @@ impl LocalEnv {
        // TODO: check that it looks like a neon repository

        // load and parse file
-        let config_file_contents = fs::read_to_string(repopath.join("config"))?;
-        let on_disk_config: OnDiskConfig = toml::from_str(config_file_contents.as_str())?;
-        let mut env = {
-            let OnDiskConfig {
-                pg_distrib_dir,
-                neon_distrib_dir,
-                default_tenant_id,
-                private_key_path,
-                broker,
-                storage_controller,
-                pageservers,
-                safekeepers,
-                control_plane_api,
-                control_plane_compute_hook_api,
-                branch_name_mappings,
-            } = on_disk_config;
-            LocalEnv {
-                base_data_dir: repopath.to_owned(),
-                pg_distrib_dir,
-                neon_distrib_dir,
-                default_tenant_id,
-                private_key_path,
-                broker,
-                storage_controller,
-                pageservers,
-                safekeepers,
-                control_plane_api,
-                control_plane_compute_hook_api,
-                branch_name_mappings,
-            }
-        };
+        let config = fs::read_to_string(repopath.join("config"))?;
+        let mut env: LocalEnv = toml::from_str(config.as_str())?;

-        // The source of truth for pageserver configuration is the pageserver.toml.
-        assert!(
-            env.pageservers.is_empty(),
-            "we ensure this during deserialization"
-        );
-        env.pageservers = {
-            let iter = std::fs::read_dir(repopath).context("open dir")?;
-            let mut pageservers = Vec::new();
-            for res in iter {
-                let dentry = res?;
-                const PREFIX: &str = "pageserver_";
-                let dentry_name = dentry
-                    .file_name()
-                    .into_string()
-                    .ok()
-                    .with_context(|| format!("non-utf8 dentry: {:?}", dentry.path()))
-                    .unwrap();
-                if !dentry_name.starts_with(PREFIX) {
-                    continue;
-                }
-                if !dentry.file_type().context("determine file type")?.is_dir() {
-                    anyhow::bail!("expected a directory, got {:?}", dentry.path());
-                }
-                let id = dentry_name[PREFIX.len()..]
-                    .parse::<NodeId>()
-                    .with_context(|| format!("parse id from {:?}", dentry.path()))?;
-                // TODO(christian): use pageserver_api::config::ConfigToml (PR #7656)
-                #[derive(serde::Serialize, serde::Deserialize)]
-                // (allow unknown fields, unlike PageServerConf)
-                struct PageserverConfigTomlSubset {
-                    id: NodeId,
-                    listen_pg_addr: String,
-                    listen_http_addr: String,
-                    pg_auth_type: AuthType,
-                    http_auth_type: AuthType,
-                }
-                let config_toml_path = dentry.path().join("pageserver.toml");
-                let config_toml: PageserverConfigTomlSubset = toml_edit::de::from_str(
-                    &std::fs::read_to_string(&config_toml_path)
-                        .with_context(|| format!("read {:?}", config_toml_path))?,
-                )
-                .context("parse pageserver.toml")?;
-                let PageserverConfigTomlSubset {
-                    id: config_toml_id,
-                    listen_pg_addr,
-                    listen_http_addr,
-                    pg_auth_type,
-                    http_auth_type,
-                } = config_toml;
-                let conf = PageServerConf {
-                    id: {
-                        anyhow::ensure!(
-                            config_toml_id == id,
-                            "id mismatch: config_toml.id={config_toml_id} id={id}",
-                        );
-                        id
-                    },
-                    listen_pg_addr,
-                    listen_http_addr,
-                    pg_auth_type,
-                    http_auth_type,
-                };
-                pageservers.push(conf);
-            }
-            pageservers
-        };
+        env.base_data_dir = repopath;

        Ok(env)
    }

-    pub fn persist_config(&self) -> anyhow::Result<()> {
-        Self::persist_config_impl(
-            &self.base_data_dir,
-            &OnDiskConfig {
-                pg_distrib_dir: self.pg_distrib_dir.clone(),
-                neon_distrib_dir: self.neon_distrib_dir.clone(),
-                default_tenant_id: self.default_tenant_id,
-                private_key_path: self.private_key_path.clone(),
-                broker: self.broker.clone(),
-                storage_controller: self.storage_controller.clone(),
-                pageservers: vec![], // it's skip_serializing anyway
-                safekeepers: self.safekeepers.clone(),
-                control_plane_api: self.control_plane_api.clone(),
-                control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(),
-                branch_name_mappings: self.branch_name_mappings.clone(),
-            },
-        )
-    }
+    pub fn persist_config(&self, base_path: &Path) -> anyhow::Result<()> {
+        // Currently, the user first passes a config file with 'neon_local init --config=<path>'
+        // We read that in, in `create_config`, and fill any missing defaults. Then it's saved
+        // to .neon/config. TODO: We lose any formatting and comments along the way, which is
+        // a bit sad.
+        let mut conf_content = r#"# This file describes a local deployment of the page server
+# and safekeeeper node. It is read by the 'neon_local' command-line
+# utility.
+"#
+        .to_string();
+
+        // Convert the LocalEnv to a toml file.
+        //
+        // This could be as simple as this:
+        //
+        // conf_content += &toml::to_string_pretty(env)?;
+        //
+        // But it results in a "values must be emitted before tables". I'm not sure
+        // why, AFAICS the table, i.e. 'safekeepers: Vec<SafekeeperConf>' is last.
+        // Maybe rust reorders the fields to squeeze avoid padding or something?
+        // In any case, converting to toml::Value first, and serializing that, works.
+        // See https://github.com/alexcrichton/toml-rs/issues/142
+        conf_content += &toml::to_string_pretty(&toml::Value::try_from(self)?)?;

-    pub fn persist_config_impl(base_path: &Path, config: &OnDiskConfig) -> anyhow::Result<()> {
-        let conf_content = &toml::to_string_pretty(config)?;
        let target_config_path = base_path.join("config");
        fs::write(&target_config_path, conf_content).with_context(|| {
            format!(
@@ -594,13 +430,17 @@ impl LocalEnv {
        }
    }

-    /// Materialize the [`NeonLocalInitConf`] to disk. Called during [`neon_local init`].
-    pub fn init(conf: NeonLocalInitConf, force: &InitForceMode) -> anyhow::Result<()> {
-        let base_path = base_path();
-        assert_ne!(base_path, Path::new(""));
-        let base_path = &base_path;
+    //
+    // Initialize a new Neon repository
+    //
+    pub fn init(&mut self, pg_version: u32, force: &InitForceMode) -> anyhow::Result<()> {
+        // check if config already exists
+        let base_path = &self.base_data_dir;
+        ensure!(
+            base_path != Path::new(""),
+            "repository base path is missing"
+        );

-        // create base_path dir
        if base_path.exists() {
            match force {
                InitForceMode::MustNotExist => {
@@ -632,115 +472,74 @@ impl LocalEnv {
                }
            }
        }
+
+        if !self.pg_bin_dir(pg_version)?.join("postgres").exists() {
+            bail!(
+                "Can't find postgres binary at {}",
+                self.pg_bin_dir(pg_version)?.display()
+            );
+        }
+        for binary in ["pageserver", "safekeeper"] {
+            if !self.neon_distrib_dir.join(binary).exists() {
+                bail!(
+                    "Can't find binary '{binary}' in neon distrib dir '{}'",
+                    self.neon_distrib_dir.display()
+                );
+            }
+        }
+
        if !base_path.exists() {
            fs::create_dir(base_path)?;
        }

-        let NeonLocalInitConf {
-            pg_distrib_dir,
-            neon_distrib_dir,
-            default_tenant_id,
-            broker,
-            storage_controller,
-            pageservers,
-            safekeepers,
-            control_plane_api,
-            control_plane_compute_hook_api,
-        } = conf;
-
-        // Find postgres binaries.
-        // Follow POSTGRES_DISTRIB_DIR if set, otherwise look in "pg_install".
-        // Note that later in the code we assume, that distrib dirs follow the same pattern
-        // for all postgres versions.
-        let pg_distrib_dir = pg_distrib_dir.unwrap_or_else(|| {
-            if let Some(postgres_bin) = env::var_os("POSTGRES_DISTRIB_DIR") {
-                postgres_bin.into()
-            } else {
-                let cwd = env::current_dir().unwrap();
-                cwd.join("pg_install")
-            }
-        });
-
-        // Find neon binaries.
-        let neon_distrib_dir = neon_distrib_dir
-            .unwrap_or_else(|| env::current_exe().unwrap().parent().unwrap().to_owned());
-
        // Generate keypair for JWT.
        //
        // The keypair is only needed if authentication is enabled in any of the
        // components. For convenience, we generate the keypair even if authentication
        // is not enabled, so that you can easily enable it after the initialization
-        // step.
-        generate_auth_keys(
-            base_path.join("auth_private_key.pem").as_path(),
-            base_path.join("auth_public_key.pem").as_path(),
-        )
-        .context("generate auth keys")?;
-        let private_key_path = PathBuf::from("auth_private_key.pem");
-
-        // create the runtime type because the remaining initialization code below needs
-        // a LocalEnv instance op operation
-        // TODO: refactor to avoid this, LocalEnv should only be constructed from on-disk state
-        let env = LocalEnv {
-            base_data_dir: base_path.clone(),
-            pg_distrib_dir,
-            neon_distrib_dir,
-            default_tenant_id: Some(default_tenant_id),
-            private_key_path,
-            broker,
-            storage_controller: storage_controller.unwrap_or_default(),
-            pageservers: pageservers.iter().map(Into::into).collect(),
-            safekeepers,
-            control_plane_api: control_plane_api.unwrap_or_default(),
-            control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(),
-            branch_name_mappings: Default::default(),
-        };
-
-        // create endpoints dir
-        fs::create_dir_all(env.endpoints_path())?;
-
-        // create safekeeper dirs
-        for safekeeper in &env.safekeepers {
-            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?;
+        // step. However, if the key generation fails, we treat it as non-fatal if
+        // authentication was not enabled.
+        if self.private_key_path == PathBuf::new() {
+            match generate_auth_keys(
+                base_path.join("auth_private_key.pem").as_path(),
+                base_path.join("auth_public_key.pem").as_path(),
+            ) {
+                Ok(()) => {
+                    self.private_key_path = PathBuf::from("auth_private_key.pem");
+                }
+                Err(e) => {
+                    if !self.auth_keys_needed() {
+                        eprintln!("Could not generate keypair for JWT authentication: {e}");
+                        eprintln!("Continuing anyway because authentication was not enabled");
+                        self.private_key_path = PathBuf::from("auth_private_key.pem");
+                    } else {
+                        return Err(e);
+                    }
+                }
+            }
        }

-        // initialize pageserver state
-        for (i, ps) in pageservers.into_iter().enumerate() {
-            let runtime_ps = &env.pageservers[i];
-            assert_eq!(&PageServerConf::from(&ps), runtime_ps);
-            fs::create_dir(env.pageserver_data_dir(ps.id))?;
-            PageServerNode::from_env(&env, runtime_ps)
-                .initialize(ps)
-                .context("pageserver init failed")?;
+        fs::create_dir_all(self.endpoints_path())?;
+
+        for safekeeper in &self.safekeepers {
+            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
        }

-        // setup remote remote location for default LocalFs remote storage
-        std::fs::create_dir_all(env.base_data_dir.join(PAGESERVER_REMOTE_STORAGE_DIR))?;
+        self.persist_config(base_path)
+    }

-        env.persist_config()
+    fn auth_keys_needed(&self) -> bool {
+        self.pageservers.iter().any(|ps| {
+            ps.pg_auth_type == AuthType::NeonJWT || ps.http_auth_type == AuthType::NeonJWT
+        }) || self.safekeepers.iter().any(|sk| sk.auth_enabled)
    }
 }

-pub fn base_path() -> PathBuf {
-    let path = match std::env::var_os("NEON_REPO_DIR") {
-        Some(val) => {
-            let path = PathBuf::from(val);
-            if !path.is_absolute() {
-                // repeat the env var in the error because our default is always absolute
-                panic!("NEON_REPO_DIR must be an absolute path, got {path:?}");
-            }
-            path
-        }
-        None => {
-            let pwd = std::env::current_dir()
-                // technically this can fail but it's quite unlikeley
-                .expect("determine current directory");
-            let pwd_abs = pwd.canonicalize().expect("canonicalize current directory");
-            pwd_abs.join(".neon")
-        }
-    };
-    assert!(path.is_absolute());
-    path
+fn base_path() -> PathBuf {
+    match std::env::var_os("NEON_REPO_DIR") {
+        Some(val) => PathBuf::from(val),
+        None => PathBuf::from(".neon"),
+    }
 }

 /// Generate a public/private key pair for JWT authentication
@@ -779,3 +578,31 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow
    }
    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn simple_conf_parsing() {
+        let simple_conf_toml = include_str!("../simple.conf");
+        let simple_conf_parse_result = LocalEnv::parse_config(simple_conf_toml);
+        assert!(
+            simple_conf_parse_result.is_ok(),
+            "failed to parse simple config {simple_conf_toml}, reason: {simple_conf_parse_result:?}"
+        );
+
+        let string_to_replace = "listen_addr = '127.0.0.1:50051'";
+        let spoiled_url_str = "listen_addr = '!@$XOXO%^&'";
+        let spoiled_url_toml = simple_conf_toml.replace(string_to_replace, spoiled_url_str);
+        assert!(
+            spoiled_url_toml.contains(spoiled_url_str),
+            "Failed to replace string {string_to_replace} in the toml file {simple_conf_toml}"
+        );
+        let spoiled_url_parse_result = LocalEnv::parse_config(&spoiled_url_toml);
+        assert!(
+            spoiled_url_parse_result.is_err(),
+            "expected toml with invalid Url {spoiled_url_toml} to fail the parsing, but got {spoiled_url_parse_result:?}"
+        );
+    }
+}
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,37 +1,36 @@
 //! Code to manage pageservers
 //!
-//! In the local test environment, the data for each pageserver is stored in
+//! In the local test environment, the pageserver stores its data directly in
 //!
-//! ```text
-//!   .neon/pageserver_<pageserver_id>
-//! ```
+//!   .neon/
 //!
+use std::borrow::Cow;
 use std::collections::HashMap;

 use std::io;
 use std::io::Write;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
-use std::str::FromStr;
+use std::process::Command;
 use std::time::Duration;

 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
+use futures::SinkExt;
 use pageserver_api::models::{
-    self, AuxFilePolicy, LocationConfig, TenantHistorySize, TenantInfo, TimelineInfo,
+    self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
 use utils::auth::{Claims, Scope};
-use utils::id::NodeId;
 use utils::{
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };

-use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf};
+use crate::local_env::PageServerConf;
 use crate::{background_process, local_env::LocalEnv};

 /// Directory within .neon which will be used by default for LocalFs remote storage.
@@ -75,27 +74,57 @@ impl PageServerNode {
        }
    }

-    fn pageserver_make_identity_toml(&self, node_id: NodeId) -> toml_edit::Document {
-        toml_edit::Document::from_str(&format!("id={node_id}")).unwrap()
-    }
-
-    fn pageserver_init_make_toml(
-        &self,
-        conf: NeonLocalInitPageserverConf,
-    ) -> anyhow::Result<toml_edit::Document> {
-        assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully");
-
-        // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656)
-
+    /// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
+    ///
+    /// These all end up on the command line of the `pageserver` binary.
+    fn neon_local_overrides(&self, cli_overrides: &[&str]) -> Vec<String> {
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
        let pg_distrib_dir_param = format!(
            "pg_distrib_dir='{}'",
            self.env.pg_distrib_dir_raw().display()
        );

+        let PageServerConf {
+            id,
+            listen_pg_addr,
+            listen_http_addr,
+            pg_auth_type,
+            http_auth_type,
+            virtual_file_io_engine,
+            get_vectored_impl,
+        } = &self.conf;
+
+        let id = format!("id={}", id);
+
+        let http_auth_type_param = format!("http_auth_type='{}'", http_auth_type);
+        let listen_http_addr_param = format!("listen_http_addr='{}'", listen_http_addr);
+
+        let pg_auth_type_param = format!("pg_auth_type='{}'", pg_auth_type);
+        let listen_pg_addr_param = format!("listen_pg_addr='{}'", listen_pg_addr);
+        let virtual_file_io_engine = if let Some(virtual_file_io_engine) = virtual_file_io_engine {
+            format!("virtual_file_io_engine='{virtual_file_io_engine}'")
+        } else {
+            String::new()
+        };
+        let get_vectored_impl = if let Some(get_vectored_impl) = get_vectored_impl {
+            format!("get_vectored_impl='{get_vectored_impl}'")
+        } else {
+            String::new()
+        };
+
        let broker_endpoint_param = format!("broker_endpoint='{}'", self.env.broker.client_url());

-        let mut overrides = vec![pg_distrib_dir_param, broker_endpoint_param];
+        let mut overrides = vec![
+            id,
+            pg_distrib_dir_param,
+            http_auth_type_param,
+            pg_auth_type_param,
+            listen_http_addr_param,
+            listen_pg_addr_param,
+            broker_endpoint_param,
+            virtual_file_io_engine,
+            get_vectored_impl,
+        ];

        if let Some(control_plane_api) = &self.env.control_plane_api {
            overrides.push(format!(
@@ -105,7 +134,7 @@ impl PageServerNode {

            // Storage controller uses the same auth as pageserver: if JWT is enabled
            // for us, we will also need it to talk to them.
-            if matches!(conf.http_auth_type, AuthType::NeonJWT) {
+            if matches!(http_auth_type, AuthType::NeonJWT) {
                let jwt_token = self
                    .env
                    .generate_auth_token(&Claims::new(None, Scope::GenerationsApi))
@@ -114,40 +143,31 @@ impl PageServerNode {
            }
        }

-        if !conf.other.contains_key("remote_storage") {
+        if !cli_overrides
+            .iter()
+            .any(|c| c.starts_with("remote_storage"))
+        {
            overrides.push(format!(
                "remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
            ));
        }

-        if conf.http_auth_type != AuthType::Trust || conf.pg_auth_type != AuthType::Trust {
+        if *http_auth_type != AuthType::Trust || *pg_auth_type != AuthType::Trust {
            // Keys are generated in the toplevel repo dir, pageservers' workdirs
            // are one level below that, so refer to keys with ../
            overrides.push("auth_validation_public_key_path='../auth_public_key.pem'".to_owned());
        }

        // Apply the user-provided overrides
-        overrides.push(
-            toml_edit::ser::to_string_pretty(&conf)
-                .expect("we deserialized this from toml earlier"),
-        );
+        overrides.extend(cli_overrides.iter().map(|&c| c.to_owned()));

-        // Turn `overrides` into a toml document.
-        // TODO: above code is legacy code, it should be refactored to use toml_edit directly.
-        let mut config_toml = toml_edit::Document::new();
-        for fragment_str in overrides {
-            let fragment = toml_edit::Document::from_str(&fragment_str)
-                .expect("all fragments in `overrides` are valid toml documents, this function controls that");
-            for (key, item) in fragment.iter() {
-                config_toml.insert(key, item.clone());
-            }
-        }
-        Ok(config_toml)
+        overrides
    }

    /// Initializes a pageserver node by creating its config with the overrides provided.
-    pub fn initialize(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
-        self.pageserver_init(conf)
+    pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        // First, run `pageserver --init` and wait for it to write a config into FS and exit.
+        self.pageserver_init(config_overrides)
            .with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
    }

@@ -163,11 +183,11 @@ impl PageServerNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
-        self.start_node(retry_timeout).await
+    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        self.start_node(config_overrides, false).await
    }

-    fn pageserver_init(&self, conf: NeonLocalInitPageserverConf) -> anyhow::Result<()> {
+    fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
        let datadir = self.repo_path();
        let node_id = self.conf.id;
        println!(
@@ -178,33 +198,29 @@ impl PageServerNode {
        );
        io::stdout().flush()?;

-        let config = self
-            .pageserver_init_make_toml(conf)
-            .context("make pageserver toml")?;
-        let config_file_path = datadir.join("pageserver.toml");
-        let mut config_file = std::fs::OpenOptions::new()
-            .create_new(true)
-            .write(true)
-            .open(&config_file_path)
-            .with_context(|| format!("open pageserver toml for write: {config_file_path:?}"))?;
-        config_file
-            .write_all(config.to_string().as_bytes())
-            .context("write pageserver toml")?;
-        drop(config_file);
+        if !datadir.exists() {
+            std::fs::create_dir(&datadir)?;
+        }

-        let identity_file_path = datadir.join("identity.toml");
-        let mut identity_file = std::fs::OpenOptions::new()
-            .create_new(true)
-            .write(true)
-            .open(identity_file_path)
-            .with_context(|| format!("open identity toml for write: {config_file_path:?}"))?;
-        let identity_toml = self.pageserver_make_identity_toml(node_id);
-        identity_file
-            .write_all(identity_toml.to_string().as_bytes())
-            .context("write identity toml")?;
-        drop(identity_toml);
+        let datadir_path_str = datadir.to_str().with_context(|| {
+            format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
+        })?;
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        args.push(Cow::Borrowed("--init"));

-        // TODO: invoke a TBD config-check command to validate that pageserver will start with the written config
+        let init_output = Command::new(self.env.pageserver_bin())
+            .args(args.iter().map(Cow::as_ref))
+            .envs(self.pageserver_env_variables()?)
+            .output()
+            .with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
+
+        anyhow::ensure!(
+            init_output.status.success(),
+            "Pageserver init for node {} did not finish successfully, stdout: {}, stderr: {}",
+            node_id,
+            String::from_utf8_lossy(&init_output.stdout),
+            String::from_utf8_lossy(&init_output.stderr),
+        );

        // Write metadata file, used by pageserver on startup to register itself with
        // the storage controller
@@ -218,13 +234,12 @@ impl PageServerNode {
        // situation: the metadata is written by some other script.
        std::fs::write(
            metadata_path,
-            serde_json::to_vec(&pageserver_api::config::NodeMetadata {
-                postgres_host: "localhost".to_string(),
-                postgres_port: self.pg_connection_config.port(),
-                http_host: "localhost".to_string(),
-                http_port,
-                other: HashMap::new(),
-            })
+            serde_json::to_vec(&serde_json::json!({
+                "host": "localhost",
+                "port": self.pg_connection_config.port(),
+                "http_host": "localhost",
+                "http_port": http_port,
+            }))
            .unwrap(),
        )
        .expect("Failed to write metadata file");
@@ -232,15 +247,18 @@ impl PageServerNode {
        Ok(())
    }

-    async fn start_node(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
+    async fn start_node(
+        &self,
+        config_overrides: &[&str],
+        update_config: bool,
+    ) -> anyhow::Result<()> {
        // TODO: using a thread here because start_process() is not async but we need to call check_status()
        let datadir = self.repo_path();
        print!(
-            "Starting pageserver node {} at '{}' in {:?}, retrying for {:?}",
+            "Starting pageserver node {} at '{}' in {:?}",
            self.conf.id,
            self.pg_connection_config.raw_address(),
-            datadir,
-            retry_timeout
+            datadir
        );
        io::stdout().flush().context("flush stdout")?;

@@ -250,15 +268,17 @@ impl PageServerNode {
                self.conf.id, datadir,
            )
        })?;
-        let args = vec!["-D", datadir_path_str];
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        if update_config {
+            args.push(Cow::Borrowed("--update-config"));
+        }
        background_process::start_process(
            "pageserver",
            &datadir,
            &self.env.pageserver_bin(),
-            args,
+            args.iter().map(Cow::as_ref),
            self.pageserver_env_variables()?,
            background_process::InitialPidFile::Expect(self.pid_file()),
-            retry_timeout,
            || async {
                let st = self.check_status().await;
                match st {
@@ -273,6 +293,22 @@ impl PageServerNode {
        Ok(())
    }

+    fn pageserver_basic_args<'a>(
+        &self,
+        config_overrides: &'a [&'a str],
+        datadir_path_str: &'a str,
+    ) -> Vec<Cow<'a, str>> {
+        let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)];
+
+        let overrides = self.neon_local_overrides(config_overrides);
+        for config_override in overrides {
+            args.push(Cow::Borrowed("-c"));
+            args.push(Cow::Owned(config_override));
+        }
+
+        args
+    }
+
    fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
        // FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
        // needs a token, and how to generate that token, seems independent to whether
@@ -353,10 +389,6 @@ impl PageServerNode {
                .remove("image_creation_threshold")
                .map(|x| x.parse::<usize>())
                .transpose()?,
-            image_layer_creation_check_threshold: settings
-                .remove("image_layer_creation_check_threshold")
-                .map(|x| x.parse::<u8>())
-                .transpose()?,
            pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
            walreceiver_connect_timeout: settings
                .remove("walreceiver_connect_timeout")
@@ -369,6 +401,11 @@ impl PageServerNode {
                .map(|x| x.parse::<NonZeroU64>())
                .transpose()
                .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
+            trace_read_requests: settings
+                .remove("trace_read_requests")
+                .map(|x| x.parse::<bool>())
+                .transpose()
+                .context("Failed to parse 'trace_read_requests' as bool")?,
            eviction_policy: settings
                .remove("eviction_policy")
                .map(serde_json::from_str)
@@ -393,15 +430,6 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            switch_aux_file_policy: settings
-                .remove("switch_aux_file_policy")
-                .map(|x| x.parse::<AuxFilePolicy>())
-                .transpose()
-                .context("Failed to parse 'switch_aux_file_policy'")?,
-            lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
-            lsn_lease_length_for_ts: settings
-                .remove("lsn_lease_length_for_ts")
-                .map(|x| x.to_string()),
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -410,6 +438,28 @@ impl PageServerNode {
        }
    }

+    pub async fn tenant_create(
+        &self,
+        new_tenant_id: TenantId,
+        generation: Option<u32>,
+        settings: HashMap<&str, &str>,
+    ) -> anyhow::Result<TenantId> {
+        let config = Self::parse_config(settings.clone())?;
+
+        let request = models::TenantCreateRequest {
+            new_tenant_id: TenantShardId::unsharded(new_tenant_id),
+            generation,
+            config,
+            shard_parameters: ShardParameters::default(),
+            // Placement policy is not meaningful for creations not done via storage controller
+            placement_policy: None,
+        };
+        if !settings.is_empty() {
+            bail!("Unrecognized tenant settings: {settings:?}")
+        }
+        Ok(self.http_client.tenant_create(&request).await?)
+    }
+
    pub async fn tenant_config(
        &self,
        tenant_id: TenantId,
@@ -451,12 +501,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<usize>())
                    .transpose()
                    .context("Failed to parse 'image_creation_threshold' as non zero integer")?,
-                image_layer_creation_check_threshold: settings
-                    .remove("image_layer_creation_check_threshold")
-                    .map(|x| x.parse::<u8>())
-                    .transpose()
-                    .context("Failed to parse 'image_creation_check_threshold' as integer")?,
-
                pitr_interval: settings.remove("pitr_interval").map(|x| x.to_string()),
                walreceiver_connect_timeout: settings
                    .remove("walreceiver_connect_timeout")
@@ -469,6 +513,11 @@ impl PageServerNode {
                    .map(|x| x.parse::<NonZeroU64>())
                    .transpose()
                    .context("Failed to parse 'max_lsn_wal_lag' as non zero integer")?,
+                trace_read_requests: settings
+                    .remove("trace_read_requests")
+                    .map(|x| x.parse::<bool>())
+                    .transpose()
+                    .context("Failed to parse 'trace_read_requests' as bool")?,
                eviction_policy: settings
                    .remove("eviction_policy")
                    .map(serde_json::from_str)
@@ -493,15 +542,6 @@ impl PageServerNode {
                    .map(serde_json::from_str)
                    .transpose()
                    .context("parse `timeline_get_throttle` from json")?,
-                switch_aux_file_policy: settings
-                    .remove("switch_aux_file_policy")
-                    .map(|x| x.parse::<AuxFilePolicy>())
-                    .transpose()
-                    .context("Failed to parse 'switch_aux_file_policy'")?,
-                lsn_lease_length: settings.remove("lsn_lease_length").map(|x| x.to_string()),
-                lsn_lease_length_for_ts: settings
-                    .remove("lsn_lease_length_for_ts")
-                    .map(|x| x.to_string()),
            }
        };

@@ -575,39 +615,60 @@ impl PageServerNode {
        pg_wal: Option<(Lsn, PathBuf)>,
        pg_version: u32,
    ) -> anyhow::Result<()> {
+        let (client, conn) = self.page_server_psql_client().await?;
+        // The connection object performs the actual communication with the database,
+        // so spawn it off to run on its own.
+        tokio::spawn(async move {
+            if let Err(e) = conn.await {
+                eprintln!("connection error: {}", e);
+            }
+        });
+        let client = std::pin::pin!(client);
+
        // Init base reader
        let (start_lsn, base_tarfile_path) = base;
        let base_tarfile = tokio::fs::File::open(base_tarfile_path).await?;
-        let base_tarfile =
-            mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(base_tarfile));
+        let base_tarfile = tokio_util::io::ReaderStream::new(base_tarfile);

        // Init wal reader if necessary
        let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
            let wal_tarfile = tokio::fs::File::open(wal_tarfile_path).await?;
-            let wal_reader =
-                mgmt_api::ReqwestBody::wrap_stream(tokio_util::io::ReaderStream::new(wal_tarfile));
+            let wal_reader = tokio_util::io::ReaderStream::new(wal_tarfile);
            (end_lsn, Some(wal_reader))
        } else {
            (start_lsn, None)
        };

-        // Import base
-        self.http_client
-            .import_basebackup(
-                tenant_id,
-                timeline_id,
-                start_lsn,
-                end_lsn,
-                pg_version,
-                base_tarfile,
-            )
-            .await?;
+        let copy_in = |reader, cmd| {
+            let client = &client;
+            async move {
+                let writer = client.copy_in(&cmd).await?;
+                let writer = std::pin::pin!(writer);
+                let mut writer = writer.sink_map_err(|e| {
+                    std::io::Error::new(std::io::ErrorKind::Other, format!("{e}"))
+                });
+                let mut reader = std::pin::pin!(reader);
+                writer.send_all(&mut reader).await?;
+                writer.into_inner().finish().await?;
+                anyhow::Ok(())
+            }
+        };

+        // Import base
+        copy_in(
+            base_tarfile,
+            format!(
+                "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
+            ),
+        )
+        .await?;
        // Import wal if necessary
        if let Some(wal_reader) = wal_reader {
-            self.http_client
-                .import_wal(tenant_id, timeline_id, start_lsn, end_lsn, wal_reader)
-                .await?;
+            copy_in(
+                wal_reader,
+                format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}"),
+            )
+            .await?;
        }

        Ok(())
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -7,7 +7,6 @@
 //! ```
 use std::io::Write;
 use std::path::PathBuf;
-use std::time::Duration;
 use std::{io, result};

 use anyhow::Context;
@@ -15,7 +14,6 @@ use camino::Utf8PathBuf;
 use postgres_connection::PgConnectionConfig;
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
-use utils::auth::{Claims, Scope};
 use utils::{http::error::HttpErrorBody, id::NodeId};

 use crate::{
@@ -72,31 +70,24 @@ pub struct SafekeeperNode {
    pub pg_connection_config: PgConnectionConfig,
    pub env: LocalEnv,
    pub http_client: reqwest::Client,
-    pub listen_addr: String,
    pub http_base_url: String,
 }

 impl SafekeeperNode {
    pub fn from_env(env: &LocalEnv, conf: &SafekeeperConf) -> SafekeeperNode {
-        let listen_addr = if let Some(ref listen_addr) = conf.listen_addr {
-            listen_addr.clone()
-        } else {
-            "127.0.0.1".to_string()
-        };
        SafekeeperNode {
            id: conf.id,
            conf: conf.clone(),
-            pg_connection_config: Self::safekeeper_connection_config(&listen_addr, conf.pg_port),
+            pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
            env: env.clone(),
            http_client: reqwest::Client::new(),
-            http_base_url: format!("http://{}:{}/v1", listen_addr, conf.http_port),
-            listen_addr,
+            http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
        }
    }

    /// Construct libpq connection string for connecting to this safekeeper.
-    fn safekeeper_connection_config(addr: &str, port: u16) -> PgConnectionConfig {
-        PgConnectionConfig::new_host_port(url::Host::parse(addr).unwrap(), port)
+    fn safekeeper_connection_config(port: u16) -> PgConnectionConfig {
+        PgConnectionConfig::new_host_port(url::Host::parse("127.0.0.1").unwrap(), port)
    }

    pub fn datadir_path_by_id(env: &LocalEnv, sk_id: NodeId) -> PathBuf {
@@ -112,21 +103,16 @@ impl SafekeeperNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(
-        &self,
-        extra_opts: Vec<String>,
-        retry_timeout: &Duration,
-    ) -> anyhow::Result<()> {
+    pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<()> {
        print!(
-            "Starting safekeeper at '{}' in '{}', retrying for {:?}",
+            "Starting safekeeper at '{}' in '{}'",
            self.pg_connection_config.raw_address(),
-            self.datadir_path().display(),
-            retry_timeout,
+            self.datadir_path().display()
        );
        io::stdout().flush().unwrap();

-        let listen_pg = format!("{}:{}", self.listen_addr, self.conf.pg_port);
-        let listen_http = format!("{}:{}", self.listen_addr, self.conf.http_port);
+        let listen_pg = format!("127.0.0.1:{}", self.conf.pg_port);
+        let listen_http = format!("127.0.0.1:{}", self.conf.http_port);
        let id = self.id;
        let datadir = self.datadir_path();

@@ -153,7 +139,7 @@ impl SafekeeperNode {
            availability_zone,
        ];
        if let Some(pg_tenant_only_port) = self.conf.pg_tenant_only_port {
-            let listen_pg_tenant_only = format!("{}:{}", self.listen_addr, pg_tenant_only_port);
+            let listen_pg_tenant_only = format!("127.0.0.1:{}", pg_tenant_only_port);
            args.extend(["--listen-pg-tenant-only".to_owned(), listen_pg_tenant_only]);
        }
        if !self.conf.sync {
@@ -204,9 +190,8 @@ impl SafekeeperNode {
            &datadir,
            &self.env.safekeeper_bin(),
            &args,
-            self.safekeeper_env_variables()?,
+            [],
            background_process::InitialPidFile::Expect(self.pid_file()),
-            retry_timeout,
            || async {
                match self.check_status().await {
                    Ok(()) => Ok(true),
@@ -218,18 +203,6 @@ impl SafekeeperNode {
        .await
    }

-    fn safekeeper_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
-        // Generate a token to connect from safekeeper to peers
-        if self.conf.auth_enabled {
-            let token = self
-                .env
-                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
-            Ok(vec![("SAFEKEEPER_AUTH_TOKEN".to_owned(), token)])
-        } else {
-            Ok(Vec::new())
-        }
-    }
-
    ///
    /// Stop the server.
    ///
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -1,23 +1,21 @@
-use crate::{
-    background_process,
-    local_env::{LocalEnv, NeonStorageControllerConf},
-};
+use crate::{background_process, local_env::LocalEnv};
 use camino::{Utf8Path, Utf8PathBuf};
+use hyper::Method;
 use pageserver_api::{
    controller_api::{
-        NodeConfigureRequest, NodeRegisterRequest, TenantCreateRequest, TenantCreateResponse,
-        TenantLocateResponse, TenantShardMigrateRequest, TenantShardMigrateResponse,
+        NodeConfigureRequest, NodeRegisterRequest, TenantCreateResponse, TenantLocateResponse,
+        TenantShardMigrateRequest, TenantShardMigrateResponse,
    },
    models::{
-        TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo,
+        TenantCreateRequest, TenantShardSplitRequest, TenantShardSplitResponse,
+        TimelineCreateRequest, TimelineInfo,
    },
    shard::{ShardStripeSize, TenantShardId},
 };
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
-use reqwest::Method;
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use std::{fs, str::FromStr, time::Duration};
+use std::{fs, str::FromStr};
 use tokio::process::Command;
 use tracing::instrument;
 use url::Url;
@@ -29,24 +27,24 @@ use utils::{
 pub struct StorageController {
    env: LocalEnv,
    listen: String,
+    path: Utf8PathBuf,
    private_key: Option<Vec<u8>>,
    public_key: Option<String>,
    postgres_port: u16,
    client: reqwest::Client,
-    config: NeonStorageControllerConf,
 }

 const COMMAND: &str = "storage_controller";

 const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;

-const DB_NAME: &str = "storage_controller";
+// Use a shorter pageserver unavailability interval than the default to speed up tests.
+const NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);

 #[derive(Serialize, Deserialize)]
 pub struct AttachHookRequest {
    pub tenant_shard_id: TenantShardId,
    pub node_id: Option<NodeId>,
-    pub generation_override: Option<i32>,
 }

 #[derive(Serialize, Deserialize)]
@@ -66,6 +64,10 @@ pub struct InspectResponse {

 impl StorageController {
    pub fn from_env(env: &LocalEnv) -> Self {
+        let path = Utf8PathBuf::from_path_buf(env.base_data_dir.clone())
+            .unwrap()
+            .join("attachments.json");
+
        // Makes no sense to construct this if pageservers aren't going to use it: assume
        // pageservers have control plane API set
        let listen_url = env.control_plane_api.clone().unwrap();
@@ -125,6 +127,7 @@ impl StorageController {

        Self {
            env: env.clone(),
+            path,
            listen,
            private_key,
            public_key,
@@ -132,7 +135,6 @@ impl StorageController {
            client: reqwest::ClientBuilder::new()
                .build()
                .expect("Failed to construct http client"),
-            config: env.storage_controller.clone(),
        }
    }

@@ -151,16 +153,16 @@ impl StorageController {
        .expect("non-Unicode path")
    }

-    /// Find the directory containing postgres subdirectories, such `bin` and `lib`
+    /// Find the directory containing postgres binaries, such as `initdb` and `pg_ctl`
    ///
    /// This usually uses STORAGE_CONTROLLER_POSTGRES_VERSION of postgres, but will fall back
    /// to other versions if that one isn't found.  Some automated tests create circumstances
    /// where only one version is available in pg_distrib_dir, such as `test_remote_extensions`.
-    async fn get_pg_dir(&self, dir_name: &str) -> anyhow::Result<Utf8PathBuf> {
+    pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
        let prefer_versions = [STORAGE_CONTROLLER_POSTGRES_VERSION, 15, 14];

        for v in prefer_versions {
-            let path = Utf8PathBuf::from_path_buf(self.env.pg_dir(v, dir_name)?).unwrap();
+            let path = Utf8PathBuf::from_path_buf(self.env.pg_bin_dir(v)?).unwrap();
            if tokio::fs::try_exists(&path).await? {
                return Ok(path);
            }
@@ -168,20 +170,11 @@ impl StorageController {

        // Fall through
        anyhow::bail!(
-            "Postgres directory '{}' not found in {}",
-            dir_name,
-            self.env.pg_distrib_dir.display(),
+            "Postgres binaries not found in {}",
+            self.env.pg_distrib_dir.display()
        );
    }

-    pub async fn get_pg_bin_dir(&self) -> anyhow::Result<Utf8PathBuf> {
-        self.get_pg_dir("bin").await
-    }
-
-    pub async fn get_pg_lib_dir(&self) -> anyhow::Result<Utf8PathBuf> {
-        self.get_pg_dir("lib").await
-    }
-
    /// Readiness check for our postgres process
    async fn pg_isready(&self, pg_bin_dir: &Utf8Path) -> anyhow::Result<bool> {
        let bin_path = pg_bin_dir.join("pg_isready");
@@ -199,6 +192,7 @@ impl StorageController {
    ///
    /// Returns the database url
    pub async fn setup_database(&self) -> anyhow::Result<String> {
+        const DB_NAME: &str = "storage_controller";
        let database_url = format!("postgresql://localhost:{}/{DB_NAME}", self.postgres_port);

        let pg_bin_dir = self.get_pg_bin_dir().await?;
@@ -227,47 +221,18 @@ impl StorageController {
        Ok(database_url)
    }

-    pub async fn connect_to_database(
-        &self,
-    ) -> anyhow::Result<(
-        tokio_postgres::Client,
-        tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
-    )> {
-        tokio_postgres::Config::new()
-            .host("localhost")
-            .port(self.postgres_port)
-            // The user is the ambient operating system user name.
-            // That is an impurity which we want to fix in => TODO https://github.com/neondatabase/neon/issues/8400
-            //
-            // Until we get there, use the ambient operating system user name.
-            // Recent tokio-postgres versions default to this if the user isn't specified.
-            // But tokio-postgres fork doesn't have this upstream commit:
-            // https://github.com/sfackler/rust-postgres/commit/cb609be758f3fb5af537f04b584a2ee0cebd5e79
-            // => we should rebase our fork => TODO https://github.com/neondatabase/neon/issues/8399
-            .user(&whoami::username())
-            .dbname(DB_NAME)
-            .connect(tokio_postgres::NoTls)
-            .await
-            .map_err(anyhow::Error::new)
-    }
-
-    pub async fn start(&self, retry_timeout: &Duration) -> anyhow::Result<()> {
+    pub async fn start(&self) -> anyhow::Result<()> {
        // Start a vanilla Postgres process used by the storage controller for persistence.
        let pg_data_path = Utf8PathBuf::from_path_buf(self.env.base_data_dir.clone())
            .unwrap()
            .join("storage_controller_db");
        let pg_bin_dir = self.get_pg_bin_dir().await?;
-        let pg_lib_dir = self.get_pg_lib_dir().await?;
        let pg_log_path = pg_data_path.join("postgres.log");

        if !tokio::fs::try_exists(&pg_data_path).await? {
            // Initialize empty database
            let initdb_path = pg_bin_dir.join("initdb");
            let mut child = Command::new(&initdb_path)
-                .envs(vec![
-                    ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-                    ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-                ])
                .args(["-D", pg_data_path.as_ref()])
                .spawn()
                .expect("Failed to spawn initdb");
@@ -275,20 +240,13 @@ impl StorageController {
            if !status.success() {
                anyhow::bail!("initdb failed with status {status}");
            }
-        };

-        // Write a minimal config file:
-        // - Specify the port, since this is chosen dynamically
-        // - Switch off fsync, since we're running on lightweight test environments and when e.g. scale testing
-        //   the storage controller we don't want a slow local disk to interfere with that.
-        //
-        // NB: it's important that we rewrite this file on each start command so we propagate changes
-        // from `LocalEnv`'s config file (`.neon/config`).
-        tokio::fs::write(
-            &pg_data_path.join("postgresql.conf"),
-            format!("port = {}\nfsync=off\n", self.postgres_port),
-        )
-        .await?;
+            tokio::fs::write(
+                &pg_data_path.join("postgresql.conf"),
+                format!("port = {}", self.postgres_port),
+            )
+            .await?;
+        };

        println!("Starting storage controller database...");
        let db_start_args = [
@@ -305,12 +263,8 @@ impl StorageController {
            &self.env.base_data_dir,
            pg_bin_dir.join("pg_ctl").as_std_path(),
            db_start_args,
-            vec![
-                ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-                ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-            ],
+            [],
            background_process::InitialPidFile::Create(self.postgres_pid_file()),
-            retry_timeout,
            || self.pg_isready(&pg_bin_dir),
        )
        .await?;
@@ -318,43 +272,18 @@ impl StorageController {
        // Run migrations on every startup, in case something changed.
        let database_url = self.setup_database().await?;

-        // We support running a startup SQL script to fiddle with the database before we launch storcon.
-        // This is used by the test suite.
-        let startup_script_path = self
-            .env
-            .base_data_dir
-            .join("storage_controller_db.startup.sql");
-        let startup_script = match tokio::fs::read_to_string(&startup_script_path).await {
-            Ok(script) => {
-                tokio::fs::remove_file(startup_script_path).await?;
-                script
-            }
-            Err(e) => {
-                if e.kind() == std::io::ErrorKind::NotFound {
-                    // always run some startup script so that this code path doesn't bit rot
-                    "BEGIN; COMMIT;".to_string()
-                } else {
-                    anyhow::bail!("Failed to read startup script: {e}")
-                }
-            }
-        };
-        let (mut client, conn) = self.connect_to_database().await?;
-        let conn = tokio::spawn(conn);
-        let tx = client.build_transaction();
-        let tx = tx.start().await?;
-        tx.batch_execute(&startup_script).await?;
-        tx.commit().await?;
-        drop(client);
-        conn.await??;
+        let max_unavailable: humantime::Duration = NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL.into();

        let mut args = vec![
            "-l",
            &self.listen,
+            "-p",
+            self.path.as_ref(),
            "--dev",
            "--database-url",
            &database_url,
            "--max-unavailable-interval",
-            &humantime::Duration::from(self.config.max_unavailable).to_string(),
+            &max_unavailable.to_string(),
        ]
        .into_iter()
        .map(|s| s.to_string())
@@ -376,26 +305,16 @@ impl StorageController {
            ));
        }

-        if let Some(split_threshold) = self.config.split_threshold.as_ref() {
-            args.push(format!("--split-threshold={split_threshold}"))
-        }
-
-        args.push(format!(
-            "--neon-local-repo-dir={}",
-            self.env.base_data_dir.display()
-        ));
-
        background_process::start_process(
            COMMAND,
            &self.env.base_data_dir,
            &self.env.storage_controller_bin(),
            args,
-            vec![
-                ("LD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-                ("DYLD_LIBRARY_PATH".to_owned(), pg_lib_dir.to_string()),
-            ],
+            [(
+                "NEON_REPO_DIR".to_string(),
+                self.env.base_data_dir.to_string_lossy().to_string(),
+            )],
            background_process::InitialPidFile::Create(self.pid_file()),
-            retry_timeout,
            || async {
                match self.ready().await {
                    Ok(_) => Ok(true),
@@ -460,7 +379,7 @@ impl StorageController {
    /// Simple HTTP request wrapper for calling into storage controller
    async fn dispatch<RQ, RS>(
        &self,
-        method: reqwest::Method,
+        method: hyper::Method,
        path: String,
        body: Option<RQ>,
    ) -> anyhow::Result<RS>
@@ -513,7 +432,6 @@ impl StorageController {
        let request = AttachHookRequest {
            tenant_shard_id,
            node_id: Some(pageserver_id),
-            generation_override: None,
        };

        let response = self
@@ -554,16 +472,6 @@ impl StorageController {
            .await
    }

-    #[instrument(skip(self))]
-    pub async fn tenant_import(&self, tenant_id: TenantId) -> anyhow::Result<TenantCreateResponse> {
-        self.dispatch::<(), TenantCreateResponse>(
-            Method::POST,
-            format!("debug/v1/tenant/{tenant_id}/import"),
-            None,
-        )
-        .await
-    }
-
    #[instrument(skip(self))]
    pub async fn tenant_locate(&self, tenant_id: TenantId) -> anyhow::Result<TenantLocateResponse> {
        self.dispatch::<(), _>(
--- a/control_plane/storcon_cli/Cargo.toml
+++ b/control_plane/storcon_cli/Cargo.toml
@@ -1,26 +0,0 @@
-[package]
-name = "storcon_cli"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-
-[dependencies]
-anyhow.workspace = true
-clap.workspace = true
-comfy-table.workspace = true
-futures.workspace = true
-humantime.workspace = true
-hyper.workspace = true
-pageserver_api.workspace = true
-pageserver_client.workspace = true
-reqwest.workspace = true
-serde.workspace = true
-serde_json = { workspace = true, features = ["raw_value"] }
-storage_controller_client.workspace = true
-thiserror.workspace = true
-tokio.workspace = true
-tracing.workspace = true
-utils.workspace = true
-workspace_hack.workspace = true
-
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
John Spray	4e5724d9c3	Merge pull request #7248 from neondatabase/rc/2024-03-26 Release 2024-03-26	2024-03-26 15:17:00 +00:00
John Spray	0d3e499059	Merge pull request #7219 from neondatabase/rc/2024-03-25 Release 2024-03-25	2024-03-25 12:28:09 +00:00
Arpad Müller	7b860b837c	Merge pull request #7154 from neondatabase/rc/2024-03-18 Release 2024-03-18	2024-03-19 12:07:14 +01:00
Christian Schwarz	41fc96e20f	fixup(#7160 / tokio_epoll_uring_ext): double-panic caused by info! in thread-local's drop() (#7164 ) Manual testing of the changes in #7160 revealed that, if the thread-local destructor ever runs (it apparently doesn't in our test suite runs, otherwise #7160 would not have auto-merged), we can encounter an `abort()` due to a double-panic in the tracing code. This github comment here contains the stack trace: https://github.com/neondatabase/neon/pull/7160#issuecomment-2003778176 This PR reverts #7160 and uses a atomic counter to identify the thread-local in log messages, instead of the memory address of the thread local, which may be re-used.	2024-03-18 16:28:17 +01:00
Christian Schwarz	fb2b1ce57b	fixup(#7141 / tokio_epoll_uring_ext): high frequency log message The PR #7141 added log message ``` ThreadLocalState is being dropped and id might be re-used in the future ``` which was supposed to be emitted when the thread-local is destroyed. Instead, it was emitted on _each_ call to `thread_local_system()`, ie.., on each tokio-epoll-uring operation.	2024-03-18 13:01:17 +01:00
Joonas Koivunen	464717451b	build: make procfs linux only dependency (#7156 ) the dependency refuses to build on macos so builds on `main` are broken right now, including the `release` PR.	2024-03-18 09:32:49 +00:00
Joonas Koivunen	c6ed86d3d0	Merge pull request #7081 from neondatabase/rc/2024-03-11 Release 2024-03-11	2024-03-11 14:41:39 +02:00
Roman Zaynetdinov	f0a9017008	Export db size, deadlocks and changed row metrics (#7050 ) ## Problem We want to report metrics for the oldest user database.	2024-03-11 11:55:06 +00:00
Christian Schwarz	bb7949ba00	Merge pull request #6993 from neondatabase/rc/2024-03-04 Release 2024-03-04	2024-03-04 13:08:44 +01:00
Arthur Petukhovsky	1df0f69664	Merge pull request #6973 from neondatabase/rc/2024-02-29-manual Release 2024-02-29	2024-02-29 17:26:33 +00:00
Vlad Lazar	970066a914	libs: fix expired token in auth decode test (#6963 ) The test token expired earlier today (1709200879). I regenerated the token, but without an expiration date this time.	2024-02-29 17:23:25 +00:00
Arthur Petukhovsky	1ebd3897c0	Merge pull request #6956 from neondatabase/rc/2024-02-28 Release 2024-02-28	2024-02-29 16:39:52 +00:00
Arthur Petukhovsky	6460beffcd	Merge pull request #6901 from neondatabase/rc/2024-02-26 Release 2024-02-26	2024-02-26 17:08:19 +00:00
John Spray	6f7f8958db	pageserver: only write out legacy tenant config if no generation (#6891 ) ## Problem Previously we always wrote out both legacy and modern tenant config files. The legacy write enabled rollbacks, but we are long past the point where that is needed. We still need the legacy format for situations where someone is running tenants without generations (that will be yanked as well eventually), but we can avoid writing it out at all if we do have a generation number set. We implicitly also avoid writing the legacy config if our mode is Secondary (secondary mode is newer than generations). ## Summary of changes - Make writing legacy tenant config conditional on there being no generation number set.	2024-02-26 10:25:25 +00:00
Christian Schwarz	936a00e077	pageserver: remove two obsolete/unused per-timeline metrics (#6893 ) over-compensating the addition of a new per-timeline metric in https://github.com/neondatabase/neon/pull/6834 part of https://github.com/neondatabase/neon/issues/6737	2024-02-26 09:16:24 +00:00
				`@@ -1 +0,0 @@`
				`GRANT pg_monitor TO neon_superuser WITH ADMIN OPTION;`