storcon: skip spurious detach on node activation

2026-05-24 08:30:37 +00:00 · 2024-09-23 17:33:18 +01:00
52 changed files with 721 additions and 5756 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -13,7 +13,6 @@
 # Directories
 !.cargo/
 !.config/
-!compute/
 !compute_tools/
 !control_plane/
 !libs/
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -651,7 +651,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/Dockerfile.compute-node
+          file: Dockerfile.compute-node
          cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version }}:cache-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
          tags: |
@@ -670,7 +670,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/Dockerfile.compute-node
+          file: Dockerfile.compute-node
          target: neon-pg-ext-test
          cache-from: type=registry,ref=cache.neon.build/neon-test-extensions-${{ matrix.version }}:cache-${{ matrix.arch }}
          cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon-test-extensions-{0}:cache-{1},mode=max', matrix.version, matrix.arch) || '' }}
@@ -691,7 +691,7 @@ jobs:
          provenance: false
          push: true
          pull: true
-          file: compute/Dockerfile.compute-node
+          file: Dockerfile.compute-node
          tags: |
            neondatabase/compute-tools:${{ needs.tag.outputs.build-tag }}-${{ matrix.arch }}

@@ -779,7 +779,7 @@ jobs:
      - name: Build vm image
        run: |
          ./vm-builder \
-            -spec=compute/vm-image-spec.yaml \
+            -spec=vm-image-spec.yaml \
            -src=neondatabase/compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }} \
            -dst=neondatabase/vm-compute-node-${{ matrix.version }}:${{ needs.tag.outputs.build-tag }}

@@ -843,9 +843,6 @@ jobs:
    needs: [ check-permissions, tag, test-images, vm-compute-node-image ]
    runs-on: ubuntu-22.04

-    permissions:
-      id-token: write # for `aws-actions/configure-aws-credentials`
-
    env:
      VERSIONS: v14 v15 v16 v17

@@ -890,19 +887,13 @@ jobs:
          docker buildx imagetools create -t neondatabase/neon-test-extensions-v16:latest \
                                              neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}

-      - name: Configure AWS-prod credentials
-        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
-        uses: aws-actions/configure-aws-credentials@v4
-        with:
-          aws-region: eu-central-1
-          mask-aws-account-id: true
-          role-to-assume: ${{ secrets.PROD_GHA_OIDC_ROLE }}
-
      - name: Login to prod ECR
        uses: docker/login-action@v3
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
        with:
          registry: 093970136003.dkr.ecr.eu-central-1.amazonaws.com
+          username: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_ACCESS_KEY_ID }}
+          password: ${{ secrets.PROD_GHA_RUNNER_LIMITED_AWS_SECRET_ACCESS_KEY }}

      - name: Copy all images to prod ECR
        if: github.ref_name == 'release'|| github.ref_name == 'release-proxy'
@@ -1216,6 +1207,7 @@ jobs:
    # Usually we do `needs: [...]`
    needs:
      - build-and-test-locally
+      - check-submodules
      - check-codestyle-python
      - check-codestyle-rust
      - promote-images
--- a/.github/workflows/cloud-regress.yml
+++ b/.github/workflows/cloud-regress.yml
@@ -1,102 +0,0 @@
-name: Cloud Regression Test
-on:
-  schedule:
-    # * is a special character in YAML so you have to quote this string
-    #          ┌───────────── minute (0 - 59)
-    #          │ ┌───────────── hour (0 - 23)
-    #          │ │ ┌───────────── day of the month (1 - 31)
-    #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
-    #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '45 1 * * *' # run once a day, timezone is utc
-  workflow_dispatch: # adds ability to run this manually
-
-defaults:
-  run:
-    shell: bash -euxo pipefail {0}
-
-concurrency:
-  # Allow only one workflow
-  group: ${{ github.workflow }}
-  cancel-in-progress: true
-
-jobs:
-  regress:
-    env:
-      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
-      DEFAULT_PG_VERSION: 16
-      TEST_OUTPUT: /tmp/test_output
-      BUILD_TYPE: remote
-      AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-      AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-
-    runs-on: us-east-2
-    container:
-      image: neondatabase/build-tools:pinned
-      options: --init
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: true
-
-      - name: Patch the test
-        run: |
-          cd "vendor/postgres-v${DEFAULT_PG_VERSION}"
-          patch -p1 < "../../patches/cloud_regress_pg${DEFAULT_PG_VERSION}.patch"
-
-      - name: Generate a random password
-        id: pwgen
-        run: |
-          set +x
-          DBPASS=$(dd if=/dev/random bs=48 count=1 2>/dev/null | base64)
-          echo "::add-mask::${DBPASS//\//}"
-          echo DBPASS="${DBPASS//\//}" >> "${GITHUB_OUTPUT}"
-
-      - name: Change tests according to the generated password
-        env:
-          DBPASS: ${{ steps.pwgen.outputs.DBPASS }}
-        run: |
-          cd vendor/postgres-v"${DEFAULT_PG_VERSION}"/src/test/regress
-          for fname in sql/*.sql expected/*.out; do
-            sed -i.bak s/NEON_PASSWORD_PLACEHOLDER/"'${DBPASS}'"/ "${fname}"
-          done
-          for ph in $(grep NEON_MD5_PLACEHOLDER expected/password.out | awk '{print $3;}' | sort | uniq); do
-            USER=$(echo "${ph}" | cut -c 22-)
-            MD5=md5$(echo -n "${DBPASS}${USER}" | md5sum | awk '{print $1;}')
-            sed -i.bak "s/${ph}/${MD5}/" expected/password.out
-          done
-
-      - name: Download Neon artifact
-        uses: ./.github/actions/download
-        with:
-          name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact
-          path: /tmp/neon/
-          prefix: latest
-
-      - name: Run the regression tests
-        uses: ./.github/actions/run-python-test-set
-        with:
-          build_type: ${{ env.BUILD_TYPE }}
-          test_selection: cloud_regress
-          pg_version: ${{ env.DEFAULT_PG_VERSION }}
-          extra_params: -m remote_cluster
-        env:
-          BENCHMARK_CONNSTR: ${{ secrets.PG_REGRESS_CONNSTR }}
-
-      - name: Create Allure report
-        id: create-allure-report
-        if: ${{ !cancelled() }}
-        uses: ./.github/actions/allure-report-generate
-
-      - name: Post to a Slack channel
-        if: ${{ github.event.schedule && failure() }}
-        uses: slackapi/slack-github-action@v1
-        with:
-          channel-id: "C033QLM5P7D" # on-call-staging-stream
-          slack-message: |
-            Periodic pg_regress on staging: ${{ job.status }}
-            <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>
-            <${{ steps.create-allure-report.outputs.report-url }}|Allure report>
-        env:
-          SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
-
--- a/.github/workflows/trigger-e2e-tests.yml
+++ b/.github/workflows/trigger-e2e-tests.yml
@@ -107,7 +107,7 @@ jobs:
          if [ "$GITHUB_EVENT_NAME" == "pull_request" ]; then
            for f in $(gh api "/repos/${GITHUB_REPOSITORY}/pulls/${PR_NUMBER}/files" --paginate --jq '.[].filename'); do
              case "$f" in
-                vendor/*|pgxn/*|libs/vm_monitor/*|compute/Dockerfile.compute-node)
+                vendor/*|pgxn/*|libs/vm_monitor/*|Dockerfile.compute-node)
                  platforms=$(echo "${platforms}" | jq --compact-output '. += ["k8s-neonvm"] | unique')
                  ;;
                *)
--- a/compute/Dockerfile.compute-node
+++ b/compute/Dockerfile.compute-node
@@ -280,7 +280,7 @@ FROM build-deps AS vector-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-COPY compute/patches/pgvector.patch /pgvector.patch
+COPY patches/pgvector.patch /pgvector.patch

 # By default, pgvector Makefile uses `-march=native`. We don't want that,
 # because we build the images on different machines than where we run them.
@@ -366,7 +366,7 @@ FROM build-deps AS rum-pg-build
 ARG PG_VERSION
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-COPY compute/patches/rum.patch /rum.patch
+COPY patches/rum.patch /rum.patch

 RUN case "${PG_VERSION}" in "v17") \
    echo "v17 extensions are not supported yet. Quit" && exit 0;; \
@@ -1031,41 +1031,6 @@ FROM debian:bullseye-slim AS compute-tools-image

 COPY --from=compute-tools /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

-#########################################################################################
-#
-# Layer "pgbouncer"
-#
-#########################################################################################
-
-FROM debian:bullseye-slim AS pgbouncer
-RUN set -e \
-    && apt-get update \
-    && apt-get install -y \
-        build-essential \
-        git \
-        libevent-dev \
-        libtool \
-        pkg-config
-
-# Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
-ENV PGBOUNCER_TAG=pgbouncer_1_22_1
-RUN set -e \
-    && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
-    && cd pgbouncer \
-    && ./autogen.sh \
-    && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
-    && make -j $(nproc) dist_man_MANS= \
-    && make install dist_man_MANS=
-
-#########################################################################################
-#
-# Layers "postgres-exporter" and "sql-exporter"
-#
-#########################################################################################
-
-FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
-FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
-
 #########################################################################################
 #
 # Clean up postgres folder before inclusion
@@ -1113,7 +1078,7 @@ COPY --from=pgjwt-pg-build /pgjwt.tar.gz /ext-src
 COPY --from=hypopg-pg-build /hypopg.tar.gz /ext-src
 COPY --from=pg-hashids-pg-build /pg_hashids.tar.gz /ext-src
 COPY --from=rum-pg-build /rum.tar.gz /ext-src
-COPY compute/patches/rum.patch /ext-src
+COPY patches/rum.patch /ext-src
 #COPY --from=pgtap-pg-build /pgtap.tar.gz /ext-src
 COPY --from=ip4r-pg-build /ip4r.tar.gz /ext-src
 COPY --from=prefix-pg-build /prefix.tar.gz /ext-src
@@ -1121,9 +1086,9 @@ COPY --from=hll-pg-build /hll.tar.gz /ext-src
 COPY --from=plpgsql-check-pg-build /plpgsql_check.tar.gz /ext-src
 #COPY --from=timescaledb-pg-build /timescaledb.tar.gz /ext-src
 COPY --from=pg-hint-plan-pg-build /pg_hint_plan.tar.gz /ext-src
-COPY compute/patches/pg_hint_plan.patch /ext-src
+COPY patches/pg_hint_plan.patch /ext-src
 COPY --from=pg-cron-pg-build /pg_cron.tar.gz /ext-src
-COPY compute/patches/pg_cron.patch /ext-src
+COPY patches/pg_cron.patch /ext-src
 #COPY --from=pg-pgx-ulid-build /home/nonroot/pgx_ulid.tar.gz /ext-src
 #COPY --from=rdkit-pg-build /rdkit.tar.gz /ext-src
 COPY --from=pg-uuidv7-pg-build /pg_uuidv7.tar.gz /ext-src
@@ -1132,7 +1097,7 @@ COPY --from=pg-semver-pg-build /pg_semver.tar.gz /ext-src
 #COPY --from=pg-embedding-pg-build /home/nonroot/pg_embedding-src/ /ext-src
 #COPY --from=wal2json-pg-build /wal2json_2_5.tar.gz /ext-src
 COPY --from=pg-anon-pg-build /pg_anon.tar.gz /ext-src
-COPY compute/patches/pg_anon.patch /ext-src
+COPY patches/pg_anon.patch /ext-src
 COPY --from=pg-ivm-build /pg_ivm.tar.gz /ext-src
 COPY --from=pg-partman-build /pg_partman.tar.gz /ext-src
 RUN case "${PG_VERSION}" in "v17") \
@@ -1195,23 +1160,9 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

-# pgbouncer and its config
-COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer
-COPY --chmod=0666 --chown=postgres compute/etc/pgbouncer.ini /etc/pgbouncer.ini
-
-# Metrics exporter binaries and  configuration files
-COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
-COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
-
-COPY --chmod=0644 compute/etc/sql_exporter.yml               /etc/sql_exporter.yml
-COPY --chmod=0644 compute/etc/neon_collector.yml             /etc/neon_collector.yml
-COPY --chmod=0644 compute/etc/sql_exporter_autoscaling.yml   /etc/sql_exporter_autoscaling.yml
-COPY --chmod=0644 compute/etc/neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
-
 # Create remote extension download directory
 RUN mkdir /usr/local/download_extensions && chown -R postgres:postgres /usr/local/download_extensions

-
 # Install:
 # libreadline8 for psql
 # libicu67, locales for collations (including ICU and plpgsql_check)
--- a/compute/README.md
+++ b/compute/README.md
@@ -1,21 +0,0 @@
-This directory contains files that are needed to build the compute
-images, or included in the compute images.
-
-Dockerfile.compute-node
-	To build the compute image
-
-vm-image-spec.yaml
-	Instructions for vm-builder, to turn the compute-node image into
-	corresponding vm-compute-node image.
-
-etc/
-	Configuration files included in /etc in the compute image
-
-patches/
-	Some extensions need to be patched to work with Neon. This
-	directory contains such patches. They are applied to the extension
-	sources in Dockerfile.compute-node
-
-In addition to these, postgres itself, the neon postgres extension,
-and compute_ctl are built and copied into the compute image by
-Dockerfile.compute-node.
--- a/compute/etc/neon_collector.yml
+++ b/compute/etc/neon_collector.yml
@@ -1,247 +0,0 @@
-collector_name: neon_collector
-metrics:
- metric_name: lfc_misses
-  type: gauge
-  help: 'lfc_misses'
-  key_labels:
-  values: [lfc_misses]
-  query: |
-    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
-
- metric_name: lfc_used
-  type: gauge
-  help: 'LFC chunks used (chunk = 1MB)'
-  key_labels:
-  values: [lfc_used]
-  query: |
-    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
-
- metric_name: lfc_hits
-  type: gauge
-  help: 'lfc_hits'
-  key_labels:
-  values: [lfc_hits]
-  query: |
-    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
-
- metric_name: lfc_writes
-  type: gauge
-  help: 'lfc_writes'
-  key_labels:
-  values: [lfc_writes]
-  query: |
-    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
-
- metric_name: lfc_cache_size_limit
-  type: gauge
-  help: 'LFC cache size limit in bytes'
-  key_labels:
-  values: [lfc_cache_size_limit]
-  query: |
-    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
-
- metric_name: connection_counts
-  type: gauge
-  help: 'Connection counts'
-  key_labels:
-    - datname
-    - state
-  values: [count]
-  query: |
-    select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
-
- metric_name: pg_stats_userdb
-  type: gauge
-  help: 'Stats for several oldest non-system dbs'
-  key_labels:
-    - datname
-  value_label: kind
-  values:
-    - db_size
-    - deadlocks
-    # Rows
-    - inserted
-    - updated
-    - deleted
-  # We export stats for 10 non-system database. Without this limit
-  # it is too easy to abuse the system by creating lots of databases.
-  query: |
-    select pg_database_size(datname) as db_size, deadlocks,
-       tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
-       datname
-     from pg_stat_database
-     where datname IN (
-       select datname
-       from pg_database
-       where datname <> 'postgres' and not datistemplate
-       order by oid
-       limit 10
-     );
-
- metric_name: max_cluster_size
-  type: gauge
-  help: 'neon.max_cluster_size setting'
-  key_labels:
-  values: [max_cluster_size]
-  query: |
-    select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
-
- metric_name: db_total_size
-  type: gauge
-  help: 'Size of all databases'
-  key_labels:
-  values: [total]
-  query: |
-    select sum(pg_database_size(datname)) as total from pg_database;
-
-# DEPRECATED
- metric_name: lfc_approximate_working_set_size
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels:
-  values: [approximate_working_set_size]
-  query: |
-    select neon.approximate_working_set_size(false) as approximate_working_set_size;
-
- metric_name: lfc_approximate_working_set_size_windows
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels: [duration]
-  values: [size]
-  # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
-  # of durations in a pretty-printed form.
-  query: |
-    select
-      x as duration,
-      neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
-    from
-      (values ('5m'),('15m'),('1h')) as t (x);
-
- metric_name: compute_current_lsn
-  type: gauge
-  help: 'Current LSN of the database'
-  key_labels:
-  values: [lsn]
-  query: |
-    select
-      case
-        when pg_catalog.pg_is_in_recovery()
-        then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
-        else (pg_current_wal_lsn() - '0/0')::FLOAT8
-      end as lsn;
-
- metric_name: compute_receive_lsn
-  type: gauge
-  help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
-  key_labels:
-  values: [lsn]
-  query: |
-    SELECT
-      CASE
-        WHEN pg_catalog.pg_is_in_recovery()
-        THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
-        ELSE 0
-      END AS lsn;
-
- metric_name: replication_delay_bytes
-  type: gauge
-  help: 'Bytes between received and replayed LSN'
-  key_labels:
-  values: [replication_delay_bytes]
-  # We use a GREATEST call here because this calculation can be negative.
-  # The calculation is not atomic, meaning after we've gotten the receive
-  # LSN, the replay LSN may have advanced past the receive LSN we
-  # are using for the calculation.
-  query: |
-    SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
-
- metric_name: replication_delay_seconds
-  type: gauge
-  help: 'Time since last LSN was replayed'
-  key_labels:
-  values: [replication_delay_seconds]
-  query: |
-    SELECT
-      CASE
-        WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
-        ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
-     END AS replication_delay_seconds;
-
- metric_name: checkpoints_req
-  type: gauge
-  help: 'Number of requested checkpoints'
-  key_labels:
-  values: [checkpoints_req]
-  query: |
-    SELECT checkpoints_req FROM pg_stat_bgwriter;
-
- metric_name: checkpoints_timed
-  type: gauge
-  help: 'Number of scheduled checkpoints'
-  key_labels:
-  values: [checkpoints_timed]
-  query: |
-    SELECT checkpoints_timed FROM pg_stat_bgwriter;
-
- metric_name: compute_logical_snapshot_files
-  type: gauge
-  help: 'Number of snapshot files in pg_logical/snapshot'
-  key_labels:
-    - timeline_id
-  values: [num_logical_snapshot_files]
-  query: |
-    SELECT
-      (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
-      -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
-      -- temporary snapshot files are renamed to the actual snapshot files after they are
-      -- completely built. We only WAL-log the completely built snapshot files.
-      (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
-
-# In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
-# It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
-
-# Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
- metric_name: logical_slot_restart_lsn
-  type: gauge
-  help: 'restart_lsn of logical slots'
-  key_labels:
-    - slot_name
-  values: [restart_lsn]
-  query: |
-    select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
-    from pg_replication_slots
-    where slot_type = 'logical';
-
- metric_name: compute_subscriptions_count
-  type: gauge
-  help: 'Number of logical replication subscriptions grouped by enabled/disabled'
-  key_labels:
-    - enabled
-  values: [subscriptions_count]
-  query: |
-    select subenabled::text as enabled, count(*) as subscriptions_count
-    from pg_subscription
-    group by subenabled;
-
- metric_name: retained_wal
-  type: gauge
-  help: 'Retained WAL in inactive replication slots'
-  key_labels:
-    - slot_name
-  values: [retained_wal]
-  query: |
-    SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
-    FROM pg_replication_slots
-    WHERE active = false;
-
- metric_name: wal_is_lost
-  type: gauge
-  help: 'Whether or not the replication slot wal_status is lost'
-  key_labels:
-    - slot_name
-  values: [wal_is_lost]
-  query: |
-    SELECT slot_name,
-           CASE WHEN wal_status = 'lost' THEN 1 ELSE 0 END AS wal_is_lost
-    FROM pg_replication_slots;
-
--- a/compute/etc/neon_collector_autoscaling.yml
+++ b/compute/etc/neon_collector_autoscaling.yml
@@ -1,55 +0,0 @@
-collector_name: neon_collector_autoscaling
-metrics:
- metric_name: lfc_misses
-  type: gauge
-  help: 'lfc_misses'
-  key_labels:
-  values: [lfc_misses]
-  query: |
-    select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
-
- metric_name: lfc_used
-  type: gauge
-  help: 'LFC chunks used (chunk = 1MB)'
-  key_labels:
-  values: [lfc_used]
-  query: |
-    select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
-
- metric_name: lfc_hits
-  type: gauge
-  help: 'lfc_hits'
-  key_labels:
-  values: [lfc_hits]
-  query: |
-    select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
-
- metric_name: lfc_writes
-  type: gauge
-  help: 'lfc_writes'
-  key_labels:
-  values: [lfc_writes]
-  query: |
-    select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
-
- metric_name: lfc_cache_size_limit
-  type: gauge
-  help: 'LFC cache size limit in bytes'
-  key_labels:
-  values: [lfc_cache_size_limit]
-  query: |
-    select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
-
- metric_name: lfc_approximate_working_set_size_windows
-  type: gauge
-  help: 'Approximate working set size in pages of 8192 bytes'
-  key_labels: [duration_seconds]
-  values: [size]
-  # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
-  # size looking back 1..60 minutes, labeled with the number of minutes.
-  query: |
-    select
-      x::text as duration_seconds,
-      neon.approximate_working_set_size_seconds(x) as size
-    from
-      (select generate_series * 60 as x from generate_series(1, 60)) as t (x);
--- a/compute/etc/pgbouncer.ini
+++ b/compute/etc/pgbouncer.ini
@@ -1,17 +0,0 @@
-[databases]
-*=host=localhost port=5432 auth_user=cloud_admin
-[pgbouncer]
-listen_port=6432
-listen_addr=0.0.0.0
-auth_type=scram-sha-256
-auth_user=cloud_admin
-auth_dbname=postgres
-client_tls_sslmode=disable
-server_tls_sslmode=disable
-pool_mode=transaction
-max_client_conn=10000
-default_pool_size=64
-max_prepared_statements=0
-admin_users=postgres
-unix_socket_dir=/tmp/
-unix_socket_mode=0777
--- a/compute/etc/sql_exporter.yml
+++ b/compute/etc/sql_exporter.yml
@@ -1,33 +0,0 @@
-# Configuration for sql_exporter
-# Global defaults.
-global:
-  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
-  scrape_timeout: 10s
-  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
-  scrape_timeout_offset: 500ms
-  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
-  min_interval: 0s
-  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
-  # as will concurrent scrapes.
-  max_connections: 1
-  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
-  # always be the same as max_connections.
-  max_idle_connections: 1
-  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
-  # If 0, connections are not closed due to a connection's age.
-  max_connection_lifetime: 5m
-
-# The target to monitor and the collectors to execute on it.
-target:
-  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
-  # the schema gets dropped or replaced to match the driver expected DSN format.
-  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
-
-  # Collectors (referenced by name) to execute on the target.
-  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-  collectors: [neon_collector]
-
-# Collector files specifies a list of globs. One collector definition is read from each matching file.
-# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-collector_files:
-  - "neon_collector.yml"
--- a/compute/etc/sql_exporter_autoscaling.yml
+++ b/compute/etc/sql_exporter_autoscaling.yml
@@ -1,33 +0,0 @@
-# Configuration for sql_exporter for autoscaling-agent
-# Global defaults.
-global:
-  # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
-  scrape_timeout: 10s
-  # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
-  scrape_timeout_offset: 500ms
-  # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
-  min_interval: 0s
-  # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
-  # as will concurrent scrapes.
-  max_connections: 1
-  # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
-  # always be the same as max_connections.
-  max_idle_connections: 1
-  # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
-  # If 0, connections are not closed due to a connection's age.
-  max_connection_lifetime: 5m
-
-# The target to monitor and the collectors to execute on it.
-target:
-  # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
-  # the schema gets dropped or replaced to match the driver expected DSN format.
-  data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
-
-  # Collectors (referenced by name) to execute on the target.
-  # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-  collectors: [neon_collector_autoscaling]
-
-# Collector files specifies a list of globs. One collector definition is read from each matching file.
-# Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
-collector_files:
-  - "neon_collector_autoscaling.yml"
--- a/compute/vm-image-spec.yaml
+++ b/compute/vm-image-spec.yaml
@@ -1,112 +0,0 @@
-# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
---
-commands:
-  - name: cgconfigparser
-    user: root
-    sysvInitAction: sysinit
-    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
-  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
-  # running it as root.
-  - name: chmod-resize-swap
-    user: root
-    sysvInitAction: sysinit
-    shell: 'chmod 711 /neonvm/bin/resize-swap'
-  - name: pgbouncer
-    user: postgres
-    sysvInitAction: respawn
-    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
-  - name: postgres-exporter
-    user: nobody
-    sysvInitAction: respawn
-    shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
-  - name: sql-exporter
-    user: nobody
-    sysvInitAction: respawn
-    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
-  - name: sql-exporter-autoscaling
-    user: nobody
-    sysvInitAction: respawn
-    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
-shutdownHook: |
-  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
-files:
-  - filename: compute_ctl-resize-swap
-    content: |
-      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
-      # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
-      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
-  - filename: cgconfig.conf
-    content: |
-      # Configuration for cgroups in VM compute nodes
-      group neon-postgres {
-          perm {
-              admin {
-                  uid = postgres;
-              }
-              task {
-                  gid = users;
-              }
-          }
-          memory {}
-      }
-build: |
-  # Build cgroup-tools
-  #
-  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
-  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
-  # requires cgroup v2, so we'll build cgroup-tools ourselves.
-  FROM debian:bullseye-slim as libcgroup-builder
-  ENV LIBCGROUP_VERSION=v2.0.3
-
-  RUN set -exu \
-      && apt update \
-      && apt install --no-install-recommends -y \
-          git \
-          ca-certificates \
-          automake \
-          cmake \
-          make \
-          gcc \
-          byacc \
-          flex \
-          libtool \
-          libpam0g-dev \
-      && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
-      && INSTALL_DIR="/libcgroup-install" \
-      && mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
-      && cd libcgroup \
-      # extracted from bootstrap.sh, with modified flags:
-      && (test -d m4 || mkdir m4) \
-      && autoreconf -fi \
-      && rm -rf autom4te.cache \
-      && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
-      # actually build the thing...
-      && make install
-merge: |
-  # tweak nofile limits
-  RUN set -e \
-      && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
-      && test ! -e /etc/security || ( \
-         echo '*    - nofile 1048576' >>/etc/security/limits.conf \
-      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \
-         )
-
-  # Allow postgres user (compute_ctl) to run swap resizer.
-  # Need to install sudo in order to allow this.
-  #
-  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
-  RUN set -e \
-      && apt update \
-      && apt install --no-install-recommends -y \
-             sudo \
-      && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-  COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
-
-  COPY cgconfig.conf /etc/cgconfig.conf
-
-  RUN set -e \
-      && chmod 0644 /etc/cgconfig.conf
-
-  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
-  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
-  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1383,7 +1383,7 @@ impl SmgrQueryTimePerTimeline {
        &'a self,
        op: SmgrQueryType,
        ctx: &'c RequestContext,
-    ) -> Option<impl Drop + 'a> {
+    ) -> Option<impl Drop + '_> {
        let start = Instant::now();

        self.global_started[op as usize].inc();
@@ -1534,7 +1534,7 @@ impl BasebackupQueryTime {
    pub(crate) fn start_recording<'c: 'a, 'a>(
        &'a self,
        ctx: &'c RequestContext,
-    ) -> BasebackupQueryTimeOngoingRecording<'a, 'a> {
+    ) -> BasebackupQueryTimeOngoingRecording<'_, '_> {
        let start = Instant::now();
        match ctx.micros_spent_throttled.open() {
            Ok(()) => (),
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3627,7 +3627,7 @@ impl Tenant {
        start_lsn: Lsn,
        ancestor: Option<Arc<Timeline>>,
        last_aux_file_policy: Option<AuxFilePolicy>,
-    ) -> anyhow::Result<UninitializedTimeline<'a>> {
+    ) -> anyhow::Result<UninitializedTimeline> {
        let tenant_shard_id = self.tenant_shard_id;

        let resources = self.build_timeline_resources(new_timeline_id);
--- a/patches/cloud_regress_pg16.patch
+++ b/patches/cloud_regress_pg16.patch
--- a/compute/patches/pg_anon.patch
+++ b/compute/patches/pg_anon.patch
--- a/compute/patches/pg_cron.patch
+++ b/compute/patches/pg_cron.patch
--- a/compute/patches/pg_hint_plan.patch
+++ b/compute/patches/pg_hint_plan.patch
--- a/compute/patches/pgvector.patch
+++ b/compute/patches/pgvector.patch
--- a/compute/patches/rum.patch
+++ b/compute/patches/rum.patch
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -9,8 +9,6 @@ OBJS = \
 	hll.o \
 	libpagestore.o \
 	neon.o \
-	neon_pgversioncompat.o \
-	neon_perf_counters.o \
 	neon_utils.o \
 	neon_walreader.o \
 	pagestore_smgr.o \
@@ -25,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
+DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -109,7 +109,6 @@ typedef struct FileCacheControl
 								 * reenabling */
 	uint32		size;			/* size of cache file in chunks */
 	uint32		used;			/* number of used chunks */
-	uint32		used_pages;		/* number of used pages */
 	uint32		limit;			/* shared copy of lfc_size_limit */
 	uint64		hits;
 	uint64		misses;
@@ -906,10 +905,6 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,
 				/* Cache overflow: evict least recently used chunk */
 				FileCacheEntry *victim = dlist_container(FileCacheEntry, list_node, dlist_pop_head_node(&lfc_ctl->lru));
 	
-				for (int i = 0; i < BLOCKS_PER_CHUNK; i++)
-				{
-					lfc_ctl->used_pages -= (victim->bitmap[i >> 5] >> (i & 31)) & 1;
-				}
 				CriticalAssert(victim->access_count == 0);
 				entry->offset = victim->offset; /* grab victim's chunk */
 				hash_search_with_hash_value(lfc_hash, &victim->key, victim->hash, HASH_REMOVE, NULL);
@@ -964,7 +959,6 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno,

 				for (int i = 0; i < blocks_in_chunk; i++)
 				{
-					lfc_ctl->used_pages += 1 - ((entry->bitmap[(chunk_offs + i) >> 5] >> ((chunk_offs + i) & 31)) & 1);
 					entry->bitmap[(chunk_offs + i) >> 5] |=
 						(1 << ((chunk_offs + i) & 31));
 				}
@@ -1057,11 +1051,6 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS)
 			if (lfc_ctl)
 				value = lfc_ctl->size;
 			break;
-		case 5:
-			key = "file_cache_used_pages";
-			if (lfc_ctl)
-				value = lfc_ctl->used_pages;
-			break;
 		default:
 			SRF_RETURN_DONE(funcctx);
 	}
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -30,7 +30,6 @@
 #include "utils/guc.h"

 #include "neon.h"
-#include "neon_perf_counters.h"
 #include "neon_utils.h"
 #include "pagestore_client.h"
 #include "walproposer.h"
@@ -332,7 +331,6 @@ CLEANUP_AND_DISCONNECT(PageServer *shard)
 	}
 	if (shard->conn)
 	{
-		MyNeonCounters->pageserver_disconnects_total++;
 		PQfinish(shard->conn);
 		shard->conn = NULL;
 	}
@@ -739,8 +737,6 @@ pageserver_send(shardno_t shard_no, NeonRequest *request)
 	PageServer *shard = &page_servers[shard_no];
 	PGconn	   *pageserver_conn;

-	MyNeonCounters->pageserver_requests_sent_total++;
-
 	/* If the connection was lost for some reason, reconnect */
 	if (shard->state == PS_Connected && PQstatus(shard->conn) == CONNECTION_BAD)
 	{
@@ -893,7 +889,6 @@ pageserver_flush(shardno_t shard_no)
 	}
 	else
 	{
-		MyNeonCounters->pageserver_send_flushes_total++;
 		if (PQflush(pageserver_conn))
 		{
 			char	   *msg = pchomp(PQerrorMessage(pageserver_conn));
@@ -927,7 +922,7 @@ check_neon_id(char **newval, void **extra, GucSource source)
 static Size
 PagestoreShmemSize(void)
 {
-	return add_size(sizeof(PagestoreShmemState), NeonPerfCountersShmemSize());
+	return sizeof(PagestoreShmemState);
 }

 static bool
@@ -946,9 +941,6 @@ PagestoreShmemInit(void)
 		memset(&pagestore_shared->shard_map, 0, sizeof(ShardMap));
 		AssignPageserverConnstring(page_server_connstring, NULL);
 	}
-
-	NeonPerfCountersShmemInit();
-
 	LWLockRelease(AddinShmemInitLock);
 	return found;
 }
--- a/pgxn/neon/neon--1.4--1.5.sql
+++ b/pgxn/neon/neon--1.4--1.5.sql
@@ -1,39 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
-
-
-CREATE FUNCTION get_backend_perf_counters()
-RETURNS SETOF RECORD
-AS 'MODULE_PATHNAME', 'neon_get_backend_perf_counters'
-LANGUAGE C PARALLEL SAFE;
-
-CREATE FUNCTION get_perf_counters()
-RETURNS SETOF RECORD
-AS 'MODULE_PATHNAME', 'neon_get_perf_counters'
-LANGUAGE C PARALLEL SAFE;
-
-- Show various metrics, for each backend. Note that the values are not reset
-- when a backend exits. When a new backend starts with the backend ID, it will
-- continue accumulating the values from where the old backend left. If you are
-- only interested in the changes from your own session, store the values at the
-- beginning of the session somewhere, and subtract them on subsequent calls.
--
-- For histograms, 'bucket_le' is the upper bound of the histogram bucket.
-CREATE VIEW neon_backend_perf_counters AS
-  SELECT P.procno, P.pid, P.metric, P.bucket_le, P.value
-  FROM get_backend_perf_counters() AS P (
-    procno integer,
-    pid integer,
-    metric text,
-    bucket_le float8,
-    value float8
-  );
-
-- Summary across all backends. (This could also be implemented with
-- an aggregate query over neon_backend_perf_counters view.)
-CREATE VIEW neon_perf_counters AS
-  SELECT P.metric, P.bucket_le, P.value
-  FROM get_perf_counters() AS P (
-    metric text,
-    bucket_le float8,
-    value float8
-  );
--- a/pgxn/neon/neon--1.5--1.4.sql
+++ b/pgxn/neon/neon--1.5--1.4.sql
@@ -1,4 +0,0 @@
-DROP VIEW IF EXISTS neon_perf_counters;
-DROP VIEW IF EXISTS neon_backend_perf_counters;
-DROP FUNCTION IF EXISTS get_perf_counters();
-DROP FUNCTION IF EXISTS get_backend_perf_counters();
--- a/pgxn/neon/neon.control
+++ b/pgxn/neon/neon.control
@@ -1,7 +1,5 @@
 # neon extension
 comment = 'cloud storage for PostgreSQL'
-# TODO: bump default version to 1.5, after we are certain that we don't
-# need to rollback the compute image
 default_version = '1.4'
 module_pathname = '$libdir/neon'
 relocatable = true
--- a/pgxn/neon/neon_perf_counters.c
+++ b/pgxn/neon/neon_perf_counters.c
@@ -1,261 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * neon_perf_counters.c
- *	  Collect statistics about Neon I/O
- *
- * Each backend has its own set of counters in shared memory.
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <math.h>
-
-#include "funcapi.h"
-#include "miscadmin.h"
-#include "storage/proc.h"
-#include "storage/shmem.h"
-#include "utils/builtins.h"
-
-#include "neon_perf_counters.h"
-#include "neon_pgversioncompat.h"
-
-neon_per_backend_counters *neon_per_backend_counters_shared;
-
-Size
-NeonPerfCountersShmemSize(void)
-{
-	Size		size = 0;
-
-	size = add_size(size, mul_size(MaxBackends, sizeof(neon_per_backend_counters)));
-
-	return size;
-}
-
-bool
-NeonPerfCountersShmemInit(void)
-{
-	bool		found;
-
-	neon_per_backend_counters_shared =
-		ShmemInitStruct("Neon perf counters",
-						mul_size(MaxBackends,
-								 sizeof(neon_per_backend_counters)),
-						&found);
-	Assert(found == IsUnderPostmaster);
-	if (!found)
-	{
-		/* shared memory is initialized to zeros, so nothing to do here */
-	}
-}
-
-/*
- * Count a GetPage wait operation.
- */
-void
-inc_getpage_wait(uint64 latency_us)
-{
-	int			lo = 0;
-	int			hi = NUM_GETPAGE_WAIT_BUCKETS - 1;
-
-	/* Find the right bucket with binary search */
-	while (lo < hi)
-	{
-		int			mid = (lo + hi) / 2;
-
-		if (latency_us < getpage_wait_bucket_thresholds[mid])
-			hi = mid;
-		else
-			lo = mid + 1;
-	}
-	MyNeonCounters->getpage_wait_us_bucket[lo]++;
-	MyNeonCounters->getpage_wait_us_sum += latency_us;
-	MyNeonCounters->getpage_wait_us_count++;
-}
-
-/*
- * Support functions for the views, neon_backend_perf_counters and
- * neon_perf_counters.
- */
-
-typedef struct
-{
-	char	   *name;
-	bool		is_bucket;
-	double		bucket_le;
-	double		value;
-} metric_t;
-
-static metric_t *
-neon_perf_counters_to_metrics(neon_per_backend_counters *counters)
-{
-#define NUM_METRICS (2 + NUM_GETPAGE_WAIT_BUCKETS + 8)
-	metric_t   *metrics = palloc((NUM_METRICS + 1) * sizeof(metric_t));
-	uint64		bucket_accum;
-	int			i = 0;
-	Datum		getpage_wait_str;
-
-	metrics[i].name = "getpage_wait_seconds_count";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_wait_us_count;
-	i++;
-	metrics[i].name = "getpage_wait_seconds_sum";
-	metrics[i].is_bucket = false;
-	metrics[i].value = ((double) counters->getpage_wait_us_sum) / 1000000.0;
-	i++;
-
-	bucket_accum = 0;
-	for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
-	{
-		uint64		threshold = getpage_wait_bucket_thresholds[bucketno];
-
-		bucket_accum += counters->getpage_wait_us_bucket[bucketno];
-
-		metrics[i].name = "getpage_wait_seconds_bucket";
-		metrics[i].is_bucket = true;
-		metrics[i].bucket_le = (threshold == UINT64_MAX) ? INFINITY : ((double) threshold) / 1000000.0;
-		metrics[i].value = (double) bucket_accum;
-		i++;
-	}
-	metrics[i].name = "getpage_prefetch_requests_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_requests_total;
-	i++;
-	metrics[i].name = "getpage_sync_requests_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_sync_requests_total;
-	i++;
-	metrics[i].name = "getpage_prefetch_misses_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_misses_total;
-	i++;
-	metrics[i].name = "getpage_prefetch_discards_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->getpage_prefetch_discards_total;
-	i++;
-	metrics[i].name = "pageserver_requests_sent_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_requests_sent_total;
-	i++;
-	metrics[i].name = "pageserver_requests_disconnects_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_disconnects_total;
-	i++;
-	metrics[i].name = "pageserver_send_flushes_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->pageserver_send_flushes_total;
-	i++;
-	metrics[i].name = "file_cache_hits_total";
-	metrics[i].is_bucket = false;
-	metrics[i].value = (double) counters->file_cache_hits_total;
-	i++;
-
-	Assert(i == NUM_METRICS);
-
-	/* NULL entry marks end of array */
-	metrics[i].name = NULL;
-	metrics[i].value = 0;
-
-	return metrics;
-}
-
-/*
- * Write metric to three output Datums
- */
-static void
-metric_to_datums(metric_t *m, Datum *values, bool *nulls)
-{
-	values[0] = CStringGetTextDatum(m->name);
-	nulls[0] = false;
-	if (m->is_bucket)
-	{
-		values[1] = Float8GetDatum(m->bucket_le);
-		nulls[1] = false;
-	}
-	else
-	{
-		values[1] = (Datum) 0;
-		nulls[1] = true;
-	}
-	values[2] = Float8GetDatum(m->value);
-	nulls[2] = false;
-}
-
-PG_FUNCTION_INFO_V1(neon_get_backend_perf_counters);
-Datum
-neon_get_backend_perf_counters(PG_FUNCTION_ARGS)
-{
-	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-	Datum		values[5];
-	bool		nulls[5];
-
-	/* We put all the tuples into a tuplestore in one go. */
-	InitMaterializedSRF(fcinfo, 0);
-
-	for (int procno = 0; procno < MaxBackends; procno++)
-	{
-		PGPROC	   *proc = GetPGProcByNumber(procno);
-		int			pid = proc->pid;
-		neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
-		metric_t   *metrics = neon_perf_counters_to_metrics(counters);
-
-		values[0] = Int32GetDatum(procno);
-		nulls[0] = false;
-		values[1] = Int32GetDatum(pid);
-		nulls[1] = false;
-
-		for (int i = 0; metrics[i].name != NULL; i++)
-		{
-			metric_to_datums(&metrics[i], &values[2], &nulls[2]);
-			tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
-		}
-
-		pfree(metrics);
-	}
-
-	return (Datum) 0;
-}
-
-PG_FUNCTION_INFO_V1(neon_get_perf_counters);
-Datum
-neon_get_perf_counters(PG_FUNCTION_ARGS)
-{
-	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-	Datum		values[3];
-	bool		nulls[3];
-	Datum		getpage_wait_str;
-	neon_per_backend_counters totals = {0};
-	metric_t   *metrics;
-
-	/* We put all the tuples into a tuplestore in one go. */
-	InitMaterializedSRF(fcinfo, 0);
-
-	/* Aggregate the counters across all backends */
-	for (int procno = 0; procno < MaxBackends; procno++)
-	{
-		neon_per_backend_counters *counters = &neon_per_backend_counters_shared[procno];
-
-		totals.getpage_wait_us_count += counters->getpage_wait_us_count;
-		totals.getpage_wait_us_sum += counters->getpage_wait_us_sum;
-		for (int bucketno = 0; bucketno < NUM_GETPAGE_WAIT_BUCKETS; bucketno++)
-			totals.getpage_wait_us_bucket[bucketno] += counters->getpage_wait_us_bucket[bucketno];
-		totals.getpage_prefetch_requests_total += counters->getpage_prefetch_requests_total;
-		totals.getpage_sync_requests_total += counters->getpage_sync_requests_total;
-		totals.getpage_prefetch_misses_total += counters->getpage_prefetch_misses_total;
-		totals.getpage_prefetch_discards_total += counters->getpage_prefetch_discards_total;
-		totals.pageserver_requests_sent_total += counters->pageserver_requests_sent_total;
-		totals.pageserver_disconnects_total += counters->pageserver_disconnects_total;
-		totals.pageserver_send_flushes_total += counters->pageserver_send_flushes_total;
-		totals.file_cache_hits_total += counters->file_cache_hits_total;
-	}
-
-	metrics = neon_perf_counters_to_metrics(&totals);
-	for (int i = 0; metrics[i].name != NULL; i++)
-	{
-		metric_to_datums(&metrics[i], &values[0], &nulls[0]);
-		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
-	}
-	pfree(metrics);
-
-	return (Datum) 0;
-}
--- a/pgxn/neon/neon_perf_counters.h
+++ b/pgxn/neon/neon_perf_counters.h
@@ -1,111 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * neon_perf_counters.h
- *	  Performance counters for neon storage requests
- *-------------------------------------------------------------------------
- */
-
-#ifndef NEON_PERF_COUNTERS_H
-#define NEON_PERF_COUNTERS_H
-
-#if PG_VERSION_NUM >= 170000
-#include "storage/procnumber.h"
-#else
-#include "storage/backendid.h"
-#include "storage/proc.h"
-#endif
-
-static const uint64 getpage_wait_bucket_thresholds[] = {
-	      20,       30,       60,       100,  /* 0      -  100 us */
-	     200,      300,      600,	   1000,  /* 100 us - 1 ms */
-	    2000,     3000,     6000,     10000,  /* 1 ms   - 10 ms */
-	   20000,    30000,    60000,    100000,  /* 10 ms  - 100 ms */
-	  200000,   300000,   600000,   1000000,  /* 100 ms - 1 s */
-	 2000000,  3000000,  6000000,  10000000,  /* 1 s - 10 s */
-    20000000, 30000000, 60000000, 100000000,  /* 10 s - 100 s */
-	UINT64_MAX,
-};
-#define NUM_GETPAGE_WAIT_BUCKETS (lengthof(getpage_wait_bucket_thresholds))
-
-typedef struct
-{
-	/*
-	 * Histogram for how long an smgrread() request needs to wait for response
-	 * from pageserver. When prefetching is effective, these wait times can be
-	 * lower than the network latency to the pageserver, even zero, if the
-	 * page is already readily prefetched whenever we need to read a page.
-	 *
-	 * Note: we accumulate these in microseconds, because that's convenient in
-	 * the backend, but the 'neon_backend_perf_counters' view will convert
-	 * them to seconds, to make them more idiomatic as prometheus metrics.
-	 */
-	uint64		getpage_wait_us_count;
-	uint64		getpage_wait_us_sum;
-	uint64		getpage_wait_us_bucket[NUM_GETPAGE_WAIT_BUCKETS];
-
-	/*
-	 * Total number of speculative prefetch Getpage requests and synchronous
-	 * GetPage requests sent.
-	 */
-	uint64		getpage_prefetch_requests_total;
-	uint64		getpage_sync_requests_total;
-
-	/* XXX: It's not clear to me when these misses happen. */
-	uint64		getpage_prefetch_misses_total;
-
-	/*
-	 * Number of prefetched responses that were discarded becuase the
-	 * prefetched page was not needed or because it was concurrently fetched /
-	 * modified by another backend.
-	 */
-	uint64		getpage_prefetch_discards_total;
-
-	/*
-	 * Total number of requests send to pageserver. (prefetch_requests_total
-	 * and sync_request_total count only GetPage requests, this counts all
-	 * request types.)
-	 */
-	uint64		pageserver_requests_sent_total;
-
-	/*
-	 * Number of times the connection to the pageserver was lost and the
-	 * backend had to reconnect. Note that this doesn't count the first
-	 * connection in each backend, only reconnects.
-	 */
-	uint64		pageserver_disconnects_total;
-
-	/*
-	 * Number of network flushes to the pageserver. Synchronous requests are
-	 * flushed immediately, but when prefetching requests are sent in batches,
-	 * this can be smaller than pageserver_requests_sent_total.
-	 */
-	uint64		pageserver_send_flushes_total;
-
-	/*
-	 * Number of requests satisfied from the LFC.
-	 *
-	 * This is redundant with the server-wide file_cache_hits, but this gives
-	 * per-backend granularity, and it's handy to have this in the same place
-	 * as counters for requests that went to the pageserver. Maybe move all
-	 * the LFC stats to this struct in the future?
-	 */
-	uint64		file_cache_hits_total;
-
-} neon_per_backend_counters;
-
-/* Pointer to the shared memory array of neon_per_backend_counters structs */
-extern neon_per_backend_counters *neon_per_backend_counters_shared;
-
-#if PG_VERSION_NUM >= 170000
-#define MyNeonCounters (&neon_per_backend_counters_shared[MyProcNumber])
-#else
-#define MyNeonCounters (&neon_per_backend_counters_shared[MyProc->pgprocno])
-#endif
-
-extern void inc_getpage_wait(uint64 latency);
-
-extern Size NeonPerfCountersShmemSize(void);
-extern bool NeonPerfCountersShmemInit(void);
-
-
-#endif							/* NEON_PERF_COUNTERS_H */
--- a/pgxn/neon/neon_pgversioncompat.c
+++ b/pgxn/neon/neon_pgversioncompat.c
@@ -1,44 +0,0 @@
-/*
- * Support functions for the compatibility macros in neon_pgversioncompat.h
- */
-#include "postgres.h"
-
-#include "funcapi.h"
-#include "miscadmin.h"
-#include "utils/tuplestore.h"
-
-#include "neon_pgversioncompat.h"
-
-#if PG_MAJORVERSION_NUM < 15
-void
-InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
-{
-	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-	Tuplestorestate *tupstore;
-	MemoryContext old_context,
-				per_query_ctx;
-	TupleDesc	stored_tupdesc;
-
-	/* check to see if caller supports returning a tuplestore */
-	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("set-valued function called in context that cannot accept a set")));
-
-	/*
-	 * Store the tuplestore and the tuple descriptor in ReturnSetInfo.  This
-	 * must be done in the per-query memory context.
-	 */
-	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
-	old_context = MemoryContextSwitchTo(per_query_ctx);
-
-	if (get_call_result_type(fcinfo, NULL, &stored_tupdesc) != TYPEFUNC_COMPOSITE)
-		elog(ERROR, "return type must be a row type");
-
-	tupstore = tuplestore_begin_heap(false, false, work_mem);
-	rsinfo->returnMode = SFRM_Materialize;
-	rsinfo->setResult = tupstore;
-	rsinfo->setDesc = stored_tupdesc;
-	MemoryContextSwitchTo(old_context);
-}
-#endif
--- a/pgxn/neon/neon_pgversioncompat.h
+++ b/pgxn/neon/neon_pgversioncompat.h
@@ -6,8 +6,6 @@
 #ifndef NEON_PGVERSIONCOMPAT_H
 #define NEON_PGVERSIONCOMPAT_H

-#include "fmgr.h"
-
 #if PG_MAJORVERSION_NUM < 17
 #define NRelFileInfoBackendIsTemp(rinfo) (rinfo.backend != InvalidBackendId)
 #else
@@ -125,8 +123,4 @@
 #define AmAutoVacuumWorkerProcess() (IsAutoVacuumWorkerProcess())
 #endif

-#if PG_MAJORVERSION_NUM < 15
-extern void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags);
-#endif
-
 #endif							/* NEON_PGVERSIONCOMPAT_H */
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -66,7 +66,6 @@
 #include "storage/md.h"
 #include "storage/smgr.h"

-#include "neon_perf_counters.h"
 #include "pagestore_client.h"
 #include "bitmap.h"

@@ -290,6 +289,7 @@ static PrefetchState *MyPState;

 static bool compact_prefetch_buffers(void);
 static void consume_prefetch_responses(void);
+static uint64 prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns);
 static bool prefetch_read(PrefetchRequest *slot);
 static void prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns);
 static bool prefetch_wait_for(uint64 ring_index);
@@ -780,27 +780,21 @@ prefetch_do_request(PrefetchRequest *slot, neon_request_lsns *force_request_lsns
 }

 /*
- * prefetch_register_bufferv() - register and prefetch buffers
+ * prefetch_register_buffer() - register and prefetch buffer
 *
 * Register that we may want the contents of BufferTag in the near future.
- * This is used when issuing a speculative prefetch request, but also when
- * performing a synchronous request and need the buffer right now.
 *
 * If force_request_lsns is not NULL, those values are sent to the
 * pageserver. If NULL, we utilize the lastWrittenLsn -infrastructure
 * to calculate the LSNs to send.
 *
- * When performing a prefetch rather than a synchronous request,
- * is_prefetch==true. Currently, it only affects how the request is accounted
- * in the perf counters.
- *
 * NOTE: this function may indirectly update MyPState->pfs_hash; which
 * invalidates any active pointers into the hash table.
 */
+
 static uint64
 prefetch_register_bufferv(BufferTag tag, neon_request_lsns *frlsns,
-						  BlockNumber nblocks, const bits8 *mask,
-						  bool is_prefetch)
+						  BlockNumber nblocks, const bits8 *mask)
 {
 	uint64		min_ring_index;
 	PrefetchRequest req;
@@ -821,7 +815,6 @@ Retry:
 		PrfHashEntry *entry = NULL;
 		uint64		ring_index;
 		neon_request_lsns *lsns;
-
 		if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
 			continue;

@@ -865,7 +858,6 @@ Retry:
 					prefetch_set_unused(ring_index);
 					entry = NULL;
 					slot = NULL;
-					MyNeonCounters->getpage_prefetch_discards_total++;
 				}
 			}

@@ -980,11 +972,6 @@ Retry:

 		min_ring_index = Min(min_ring_index, ring_index);

-		if (is_prefetch)
-			MyNeonCounters->getpage_prefetch_requests_total++;
-		else
-			MyNeonCounters->getpage_sync_requests_total++;
-
 		prefetch_do_request(slot, lsns);
 	}

@@ -1013,6 +1000,13 @@ Retry:
 }


+static uint64
+prefetch_register_buffer(BufferTag tag, neon_request_lsns *force_request_lsns)
+{
+	return prefetch_register_bufferv(tag, force_request_lsns, 1, NULL);
+}
+
+
 /*
 * Note: this function can get canceled and use a long jump to the next catch
 * context. Take care.
@@ -2618,7 +2612,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 			lfc_present[i] = ~(lfc_present[i]);

 		ring_index = prefetch_register_bufferv(tag, NULL, iterblocks,
-											   lfc_present, true);
+											   lfc_present);
 		nblocks -= iterblocks;
 		blocknum += iterblocks;

@@ -2662,7 +2656,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)

 	CopyNRelFileInfoToBufTag(tag, InfoFromSMgrRel(reln));

-	ring_index = prefetch_register_bufferv(tag, NULL, 1, NULL, true);
+	ring_index = prefetch_register_buffer(tag, NULL);

 	Assert(ring_index < MyPState->ring_unused &&
 		   MyPState->ring_last <= ring_index);
@@ -2753,20 +2747,17 @@ neon_read_at_lsnv(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber base_block
 	 * weren't for the behaviour of the LwLsn cache that uses the highest
 	 * value of the LwLsn cache when the entry is not found.
 	 */
-	prefetch_register_bufferv(buftag, request_lsns, nblocks, mask, false);
+	prefetch_register_bufferv(buftag, request_lsns, nblocks, mask);

 	for (int i = 0; i < nblocks; i++)
 	{
 		void	   *buffer = buffers[i];
 		BlockNumber blockno = base_blockno + i;
 		neon_request_lsns *reqlsns = &request_lsns[i];
-		TimestampTz		start_ts, end_ts;

 		if (PointerIsValid(mask) && !BITMAP_ISSET(mask, i))
 			continue;

-		start_ts = GetCurrentTimestamp();
-
 		if (RecoveryInProgress() && MyBackendType != B_STARTUP)
 			XLogWaitForReplayOf(reqlsns[0].request_lsn);

@@ -2803,7 +2794,6 @@ Retry:
 				/* drop caches */
 				prefetch_set_unused(slot->my_ring_index);
 				pgBufferUsage.prefetch.expired += 1;
-				MyNeonCounters->getpage_prefetch_discards_total++;
 				/* make it look like a prefetch cache miss */
 				entry = NULL;
 			}
@@ -2814,9 +2804,8 @@ Retry:
 			if (entry == NULL)
 			{
 				pgBufferUsage.prefetch.misses += 1;
-				MyNeonCounters->getpage_prefetch_misses_total++;

-				ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL, false);
+				ring_index = prefetch_register_bufferv(buftag, reqlsns, 1, NULL);
 				Assert(ring_index != UINT64_MAX);
 				slot = GetPrfSlot(ring_index);
 			}
@@ -2871,9 +2860,6 @@ Retry:
 		/* buffer was used, clean up for later reuse */
 		prefetch_set_unused(ring_index);
 		prefetch_cleanup_trailing_unused();
-
-		end_ts = GetCurrentTimestamp();
-		inc_getpage_wait(end_ts >= start_ts ? (end_ts - start_ts) : 0);
 	}
 }

@@ -2927,7 +2913,6 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer
 	/* Try to read from local file cache */
 	if (lfc_read(InfoFromSMgrRel(reln), forkNum, blkno, buffer))
 	{
-		MyNeonCounters->file_cache_hits_total++;
 		return;
 	}

@@ -3112,7 +3097,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 				/* assume heap */
 				RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno);
 				RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno);
-
+	
 				if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0)
 				{
 					neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n",
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -444,7 +444,7 @@ impl<'a> Backend<'a, ComputeUserInfoMaybeEndpoint, &()> {
            Self::Web(url, ()) => {
                info!("performing web authentication");

-                let info = web::authenticate(ctx, config, &url, client).await?;
+                let info = web::authenticate(ctx, &url, client).await?;

                Backend::Web(url, info)
            }
--- a/proxy/src/auth/backend/web.rs
+++ b/proxy/src/auth/backend/web.rs
@@ -1,6 +1,5 @@
 use crate::{
    auth, compute,
-    config::AuthenticationConfig,
    console::{self, provider::NodeInfo},
    context::RequestMonitoring,
    error::{ReportableError, UserFacingError},
@@ -59,7 +58,6 @@ pub(crate) fn new_psql_session_id() -> String {

 pub(super) async fn authenticate(
    ctx: &RequestMonitoring,
-    auth_config: &'static AuthenticationConfig,
    link_uri: &reqwest::Url,
    client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin>,
 ) -> auth::Result<NodeInfo> {
@@ -91,14 +89,6 @@ pub(super) async fn authenticate(
    info!(parent: &span, "waiting for console's reply...");
    let db_info = waiter.await.map_err(WebAuthError::from)?;

-    if auth_config.ip_allowlist_check_enabled {
-        if let Some(allowed_ips) = &db_info.allowed_ips {
-            if !auth::check_peer_addr_is_in_list(&ctx.peer_addr(), allowed_ips) {
-                return Err(auth::AuthError::ip_address_not_allowed(ctx.peer_addr()));
-            }
-        }
-    }
-
    client.write_message_noflush(&Be::NoticeResponse("Connecting to database."))?;

    // This config should be self-contained, because we won't
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -284,8 +284,6 @@ pub(crate) struct DatabaseInfo {
    /// be inconvenient for debug with local PG instance.
    pub(crate) password: Option<Box<str>>,
    pub(crate) aux: MetricsAuxInfo,
-    #[serde(default)]
-    pub(crate) allowed_ips: Option<Vec<IpPattern>>,
 }

 // Manually implement debug to omit sensitive info.
@@ -296,7 +294,6 @@ impl fmt::Debug for DatabaseInfo {
            .field("port", &self.port)
            .field("dbname", &self.dbname)
            .field("user", &self.user)
-            .field("allowed_ips", &self.allowed_ips)
            .finish_non_exhaustive()
    }
 }
@@ -435,22 +432,6 @@ mod tests {
            "aux": dummy_aux(),
        }))?;

-        // with allowed_ips
-        let dbinfo = serde_json::from_value::<DatabaseInfo>(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "password": "password",
-            "aux": dummy_aux(),
-            "allowed_ips": ["127.0.0.1"],
-        }))?;
-
-        assert_eq!(
-            dbinfo.allowed_ips,
-            Some(vec![IpPattern::Single("127.0.0.1".parse()?)])
-        );
-
        Ok(())
    }

--- a/storage_controller/src/scheduler.rs
+++ b/storage_controller/src/scheduler.rs
@@ -2,7 +2,7 @@ use crate::{node::Node, tenant_shard::TenantShard};
 use itertools::Itertools;
 use pageserver_api::models::PageserverUtilization;
 use serde::Serialize;
-use std::{collections::HashMap, fmt::Debug};
+use std::collections::HashMap;
 use utils::{http::error::ApiError, id::NodeId};

 /// Scenarios in which we cannot find a suitable location for a tenant shard
@@ -27,7 +27,7 @@ pub enum MaySchedule {
 }

 #[derive(Serialize)]
-pub(crate) struct SchedulerNode {
+struct SchedulerNode {
    /// How many shards are currently scheduled on this node, via their [`crate::tenant_shard::IntentState`].
    shard_count: usize,
    /// How many shards are currently attached on this node, via their [`crate::tenant_shard::IntentState`].
@@ -38,137 +38,6 @@ pub(crate) struct SchedulerNode {
    may_schedule: MaySchedule,
 }

-pub(crate) trait NodeSchedulingScore: Debug + Ord + Copy + Sized {
-    fn generate(
-        node_id: &NodeId,
-        node: &mut SchedulerNode,
-        context: &ScheduleContext,
-    ) -> Option<Self>;
-    fn is_overloaded(&self) -> bool;
-    fn node_id(&self) -> NodeId;
-}
-
-pub(crate) trait ShardTag {
-    type Score: NodeSchedulingScore;
-}
-
-pub(crate) struct AttachedShardTag {}
-impl ShardTag for AttachedShardTag {
-    type Score = NodeAttachmentSchedulingScore;
-}
-
-pub(crate) struct SecondaryShardTag {}
-impl ShardTag for SecondaryShardTag {
-    type Score = NodeSecondarySchedulingScore;
-}
-
-/// Scheduling score of a given node for shard attachments.
-/// Lower scores indicate more suitable nodes.
-/// Ordering is given by member declaration order (top to bottom).
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
-pub(crate) struct NodeAttachmentSchedulingScore {
-    /// The number of shards belonging to the tenant currently being
-    /// scheduled that are attached to this node.
-    affinity_score: AffinityScore,
-    /// Size of [`ScheduleContext::attached_nodes`] for the current node.
-    /// This normally tracks the number of attached shards belonging to the
-    /// tenant being scheduled that are already on this node.
-    attached_shards_in_context: usize,
-    /// Utilisation score that combines shard count and disk utilisation
-    utilization_score: u64,
-    /// Total number of shards attached to this node. When nodes have identical utilisation, this
-    /// acts as an anti-affinity between attached shards.
-    total_attached_shard_count: usize,
-    /// Convenience to make selection deterministic in tests and empty systems
-    node_id: NodeId,
-}
-
-impl NodeSchedulingScore for NodeAttachmentSchedulingScore {
-    fn generate(
-        node_id: &NodeId,
-        node: &mut SchedulerNode,
-        context: &ScheduleContext,
-    ) -> Option<Self> {
-        let utilization = match &mut node.may_schedule {
-            MaySchedule::Yes(u) => u,
-            MaySchedule::No => {
-                return None;
-            }
-        };
-
-        Some(Self {
-            affinity_score: context
-                .nodes
-                .get(node_id)
-                .copied()
-                .unwrap_or(AffinityScore::FREE),
-            attached_shards_in_context: context.attached_nodes.get(node_id).copied().unwrap_or(0),
-            utilization_score: utilization.cached_score(),
-            total_attached_shard_count: node.attached_shard_count,
-            node_id: *node_id,
-        })
-    }
-
-    fn is_overloaded(&self) -> bool {
-        PageserverUtilization::is_overloaded(self.utilization_score)
-    }
-
-    fn node_id(&self) -> NodeId {
-        self.node_id
-    }
-}
-
-/// Scheduling score of a given node for shard secondaries.
-/// Lower scores indicate more suitable nodes.
-/// Ordering is given by member declaration order (top to bottom).
-#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
-pub(crate) struct NodeSecondarySchedulingScore {
-    /// The number of shards belonging to the tenant currently being
-    /// scheduled that are attached to this node.
-    affinity_score: AffinityScore,
-    /// Utilisation score that combines shard count and disk utilisation
-    utilization_score: u64,
-    /// Total number of shards attached to this node. When nodes have identical utilisation, this
-    /// acts as an anti-affinity between attached shards.
-    total_attached_shard_count: usize,
-    /// Convenience to make selection deterministic in tests and empty systems
-    node_id: NodeId,
-}
-
-impl NodeSchedulingScore for NodeSecondarySchedulingScore {
-    fn generate(
-        node_id: &NodeId,
-        node: &mut SchedulerNode,
-        context: &ScheduleContext,
-    ) -> Option<Self> {
-        let utilization = match &mut node.may_schedule {
-            MaySchedule::Yes(u) => u,
-            MaySchedule::No => {
-                return None;
-            }
-        };
-
-        Some(Self {
-            affinity_score: context
-                .nodes
-                .get(node_id)
-                .copied()
-                .unwrap_or(AffinityScore::FREE),
-            utilization_score: utilization.cached_score(),
-            total_attached_shard_count: node.attached_shard_count,
-            node_id: *node_id,
-        })
-    }
-
-    fn is_overloaded(&self) -> bool {
-        PageserverUtilization::is_overloaded(self.utilization_score)
-    }
-
-    fn node_id(&self) -> NodeId {
-        self.node_id
-    }
-}
-
 impl PartialEq for SchedulerNode {
    fn eq(&self, other: &Self) -> bool {
        let may_schedule_matches = matches!(
@@ -537,28 +406,6 @@ impl Scheduler {
        node.and_then(|(node_id, may_schedule)| if may_schedule { Some(node_id) } else { None })
    }

-    /// Compute a schedulling score for each node that the scheduler knows of
-    /// minus a set of hard excluded nodes.
-    fn compute_node_scores<Score>(
-        &mut self,
-        hard_exclude: &[NodeId],
-        context: &ScheduleContext,
-    ) -> Vec<Score>
-    where
-        Score: NodeSchedulingScore,
-    {
-        self.nodes
-            .iter_mut()
-            .filter_map(|(k, v)| {
-                if hard_exclude.contains(k) {
-                    None
-                } else {
-                    Score::generate(k, v, context)
-                }
-            })
-            .collect()
-    }
-
    /// hard_exclude: it is forbidden to use nodes in this list, typically becacuse they
    /// are already in use by this shard -- we use this to avoid picking the same node
    /// as both attached and secondary location.  This is a hard constraint: if we cannot
@@ -568,7 +415,7 @@ impl Scheduler {
    /// to their anti-affinity score.  We use this to prefeer to avoid placing shards in
    /// the same tenant on the same node.  This is a soft constraint: the context will never
    /// cause us to fail to schedule a shard.
-    pub(crate) fn schedule_shard<Tag: ShardTag>(
+    pub(crate) fn schedule_shard(
        &mut self,
        hard_exclude: &[NodeId],
        context: &ScheduleContext,
@@ -577,7 +424,20 @@ impl Scheduler {
            return Err(ScheduleError::NoPageservers);
        }

-        let mut scores = self.compute_node_scores::<Tag::Score>(hard_exclude, context);
+        let mut scores: Vec<(NodeId, AffinityScore, u64, usize)> = self
+            .nodes
+            .iter_mut()
+            .filter_map(|(k, v)| match &mut v.may_schedule {
+                MaySchedule::No => None,
+                MaySchedule::Yes(_) if hard_exclude.contains(k) => None,
+                MaySchedule::Yes(utilization) => Some((
+                    *k,
+                    context.nodes.get(k).copied().unwrap_or(AffinityScore::FREE),
+                    utilization.cached_score(),
+                    v.attached_shard_count,
+                )),
+            })
+            .collect();

        // Exclude nodes whose utilization is critically high, if there are alternatives available.  This will
        // cause us to violate affinity rules if it is necessary to avoid critically overloading nodes: for example
@@ -585,18 +445,20 @@ impl Scheduler {
        // overloaded.
        let non_overloaded_scores = scores
            .iter()
-            .filter(|i| !i.is_overloaded())
+            .filter(|i| !PageserverUtilization::is_overloaded(i.2))
            .copied()
            .collect::<Vec<_>>();
        if !non_overloaded_scores.is_empty() {
            scores = non_overloaded_scores;
        }

-        // Sort the nodes by score. The one with the lowest scores will be the preferred node.
-        // Refer to [`NodeAttachmentSchedulingScore`] for attached locations and
-        // [`NodeSecondarySchedulingScore`] for secondary locations to understand how the nodes
-        // are ranked.
-        scores.sort();
+        // Sort by, in order of precedence:
+        //  1st: Affinity score.  We should never pick a higher-score node if a lower-score node is available
+        //  2nd: Utilization score (this combines shard count and disk utilization)
+        //  3rd: Attached shard count.  When nodes have identical utilization (e.g. when populating some
+        //       empty nodes), this acts as an anti-affinity between attached shards.
+        //  4th: Node ID.  This is a convenience to make selection deterministic in tests and empty systems.
+        scores.sort_by_key(|i| (i.1, i.2, i.3, i.0));

        if scores.is_empty() {
            // After applying constraints, no pageservers were left.
@@ -619,12 +481,12 @@ impl Scheduler {
        }

        // Lowest score wins
-        let node_id = scores.first().unwrap().node_id();
+        let node_id = scores.first().unwrap().0;

        if !matches!(context.mode, ScheduleMode::Speculative) {
            tracing::info!(
            "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?})",
-            scores.iter().map(|i| i.node_id().0).collect::<Vec<_>>()
+            scores.iter().map(|i| i.0 .0).collect::<Vec<_>>()
        );
        }

@@ -694,9 +556,9 @@ mod tests {

        let context = ScheduleContext::default();

-        let scheduled = scheduler.schedule_shard::<AttachedShardTag>(&[], &context)?;
+        let scheduled = scheduler.schedule_shard(&[], &context)?;
        t1_intent.set_attached(&mut scheduler, Some(scheduled));
-        let scheduled = scheduler.schedule_shard::<AttachedShardTag>(&[], &context)?;
+        let scheduled = scheduler.schedule_shard(&[], &context)?;
        t2_intent.set_attached(&mut scheduler, Some(scheduled));

        assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1);
@@ -705,8 +567,7 @@ mod tests {
        assert_eq!(scheduler.get_node_shard_count(NodeId(2)), 1);
        assert_eq!(scheduler.get_node_attached_shard_count(NodeId(2)), 1);

-        let scheduled =
-            scheduler.schedule_shard::<AttachedShardTag>(&t1_intent.all_pageservers(), &context)?;
+        let scheduled = scheduler.schedule_shard(&t1_intent.all_pageservers(), &context)?;
        t1_intent.push_secondary(&mut scheduler, scheduled);

        assert_eq!(scheduler.get_node_shard_count(NodeId(1)), 1);
@@ -760,9 +621,7 @@ mod tests {
            scheduler: &mut Scheduler,
            context: &ScheduleContext,
        ) {
-            let scheduled = scheduler
-                .schedule_shard::<AttachedShardTag>(&[], context)
-                .unwrap();
+            let scheduled = scheduler.schedule_shard(&[], context).unwrap();
            let mut intent = IntentState::new();
            intent.set_attached(scheduler, Some(scheduled));
            scheduled_intents.push(intent);
--- a/storage_controller/src/service.rs
+++ b/storage_controller/src/service.rs
@@ -26,7 +26,7 @@ use crate::{
        ShardGenerationState, TenantFilter,
    },
    reconciler::{ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder},
-    scheduler::{AttachedShardTag, MaySchedule, ScheduleContext, ScheduleError, ScheduleMode},
+    scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode},
    tenant_shard::{
        MigrateAttachment, ReconcileNeeded, ReconcilerStatus, ScheduleOptimization,
        ScheduleOptimizationAction,
@@ -2629,8 +2629,7 @@ impl Service {
            let scheduler = &mut locked.scheduler;
            // Right now we only perform the operation on a single node without parallelization
            // TODO fan out the operation to multiple nodes for better performance
-            let node_id =
-                scheduler.schedule_shard::<AttachedShardTag>(&[], &ScheduleContext::default())?;
+            let node_id = scheduler.schedule_shard(&[], &ScheduleContext::default())?;
            let node = locked
                .nodes
                .get(&node_id)
@@ -2816,8 +2815,7 @@ impl Service {

            // Pick an arbitrary node to use for remote deletions (does not have to be where the tenant
            // was attached, just has to be able to see the S3 content)
-            let node_id =
-                scheduler.schedule_shard::<AttachedShardTag>(&[], &ScheduleContext::default())?;
+            let node_id = scheduler.schedule_shard(&[], &ScheduleContext::default())?;
            let node = nodes
                .get(&node_id)
                .expect("Pageservers may not be deleted while lock is active");
@@ -5274,7 +5272,7 @@ impl Service {
            }
            AvailabilityTransition::ToActive => {
                tracing::info!("Node {} transition to active", node_id);
-                // When a node comes back online, we must reconcile any tenant that has a None observed
+                // When a node comes back online, we must reconcile any non-detached tenant that has a None observed
                // location on the node.
                for tenant_shard in locked.tenants.values_mut() {
                    // If a reconciliation is already in progress, rely on the previous scheduling
@@ -5284,7 +5282,9 @@ impl Service {
                    }

                    if let Some(observed_loc) = tenant_shard.observed.locations.get_mut(&node_id) {
-                        if observed_loc.conf.is_none() {
+                        if observed_loc.conf.is_none()
+                            && !matches!(tenant_shard.policy, PlacementPolicy::Detached)
+                        {
                            self.maybe_reconcile_shard(tenant_shard, &new_nodes);
                        }
                    }
--- a/storage_controller/src/tenant_shard.rs
+++ b/storage_controller/src/tenant_shard.rs
@@ -8,10 +8,7 @@ use crate::{
    metrics::{self, ReconcileCompleteLabelGroup, ReconcileOutcome},
    persistence::TenantShardPersistence,
    reconciler::{ReconcileUnits, ReconcilerConfig},
-    scheduler::{
-        AffinityScore, AttachedShardTag, MaySchedule, RefCountUpdate, ScheduleContext,
-        SecondaryShardTag,
-    },
+    scheduler::{AffinityScore, MaySchedule, RefCountUpdate, ScheduleContext},
    service::ReconcileResultRequest,
 };
 use pageserver_api::controller_api::{
@@ -338,19 +335,19 @@ pub(crate) enum ReconcileWaitError {
    Failed(TenantShardId, Arc<ReconcileError>),
 }

-#[derive(Eq, PartialEq, Debug, Clone)]
+#[derive(Eq, PartialEq, Debug)]
 pub(crate) struct ReplaceSecondary {
    old_node_id: NodeId,
    new_node_id: NodeId,
 }

-#[derive(Eq, PartialEq, Debug, Clone)]
+#[derive(Eq, PartialEq, Debug)]
 pub(crate) struct MigrateAttachment {
    pub(crate) old_attached_node_id: NodeId,
    pub(crate) new_attached_node_id: NodeId,
 }

-#[derive(Eq, PartialEq, Debug, Clone)]
+#[derive(Eq, PartialEq, Debug)]
 pub(crate) enum ScheduleOptimizationAction {
    // Replace one of our secondary locations with a different node
    ReplaceSecondary(ReplaceSecondary),
@@ -358,7 +355,7 @@ pub(crate) enum ScheduleOptimizationAction {
    MigrateAttachment(MigrateAttachment),
 }

-#[derive(Eq, PartialEq, Debug, Clone)]
+#[derive(Eq, PartialEq, Debug)]
 pub(crate) struct ScheduleOptimization {
    // What was the reconcile sequence when we generated this optimization?  The optimization
    // should only be applied if the shard's sequence is still at this value, in case other changes
@@ -540,8 +537,7 @@ impl TenantShard {
            Ok((true, promote_secondary))
        } else {
            // Pick a fresh node: either we had no secondaries or none were schedulable
-            let node_id =
-                scheduler.schedule_shard::<AttachedShardTag>(&self.intent.secondary, context)?;
+            let node_id = scheduler.schedule_shard(&self.intent.secondary, context)?;
            tracing::debug!("Selected {} as attached", node_id);
            self.intent.set_attached(scheduler, Some(node_id));
            Ok((true, node_id))
@@ -617,8 +613,7 @@ impl TenantShard {

                let mut used_pageservers = vec![attached_node_id];
                while self.intent.secondary.len() < secondary_count {
-                    let node_id = scheduler
-                        .schedule_shard::<SecondaryShardTag>(&used_pageservers, context)?;
+                    let node_id = scheduler.schedule_shard(&used_pageservers, context)?;
                    self.intent.push_secondary(scheduler, node_id);
                    used_pageservers.push(node_id);
                    modified = true;
@@ -631,7 +626,7 @@ impl TenantShard {
                    modified = true;
                } else if self.intent.secondary.is_empty() {
                    // Populate secondary by scheduling a fresh node
-                    let node_id = scheduler.schedule_shard::<SecondaryShardTag>(&[], context)?;
+                    let node_id = scheduler.schedule_shard(&[], context)?;
                    self.intent.push_secondary(scheduler, node_id);
                    modified = true;
                }
@@ -808,10 +803,9 @@ impl TenantShard {
            // Let the scheduler suggest a node, where it would put us if we were scheduling afresh
            // This implicitly limits the choice to nodes that are available, and prefers nodes
            // with lower utilization.
-            let Ok(candidate_node) = scheduler.schedule_shard::<SecondaryShardTag>(
-                &self.intent.all_pageservers(),
-                schedule_context,
-            ) else {
+            let Ok(candidate_node) =
+                scheduler.schedule_shard(&self.intent.all_pageservers(), schedule_context)
+            else {
                // A scheduling error means we have no possible candidate replacements
                continue;
            };
@@ -1339,8 +1333,6 @@ impl TenantShard {

 #[cfg(test)]
 pub(crate) mod tests {
-    use std::{cell::RefCell, rc::Rc};
-
    use pageserver_api::{
        controller_api::NodeAvailability,
        shard::{ShardCount, ShardNumber},
@@ -1645,14 +1637,12 @@ pub(crate) mod tests {

    // Optimize til quiescent: this emulates what Service::optimize_all does, when
    // called repeatedly in the background.
-    // Returns the applied optimizations
    fn optimize_til_idle(
        nodes: &HashMap<NodeId, Node>,
        scheduler: &mut Scheduler,
        shards: &mut [TenantShard],
-    ) -> Vec<ScheduleOptimization> {
+    ) {
        let mut loop_n = 0;
-        let mut optimizations = Vec::default();
        loop {
            let mut schedule_context = ScheduleContext::default();
            let mut any_changed = false;
@@ -1667,7 +1657,6 @@ pub(crate) mod tests {
            for shard in shards.iter_mut() {
                let optimization = shard.optimize_attachment(nodes, &schedule_context);
                if let Some(optimization) = optimization {
-                    optimizations.push(optimization.clone());
                    shard.apply_optimization(scheduler, optimization);
                    any_changed = true;
                    break;
@@ -1675,7 +1664,6 @@ pub(crate) mod tests {

                let optimization = shard.optimize_secondary(scheduler, &schedule_context);
                if let Some(optimization) = optimization {
-                    optimizations.push(optimization.clone());
                    shard.apply_optimization(scheduler, optimization);
                    any_changed = true;
                    break;
@@ -1690,8 +1678,6 @@ pub(crate) mod tests {
            loop_n += 1;
            assert!(loop_n < 1000);
        }
-
-        optimizations
    }

    /// Test the balancing behavior of shard scheduling: that it achieves a balance, and
@@ -1744,48 +1730,4 @@ pub(crate) mod tests {

        Ok(())
    }
-
-    /// Test that initial shard scheduling is optimal. By optimal we mean
-    /// that the optimizer cannot find a way to improve it.
-    ///
-    /// This test is an example of the scheduling issue described in
-    /// https://github.com/neondatabase/neon/issues/8969
-    #[test]
-    fn initial_scheduling_is_optimal() -> anyhow::Result<()> {
-        use itertools::Itertools;
-
-        let nodes = make_test_nodes(2);
-
-        let mut scheduler = Scheduler::new([].iter());
-        scheduler.node_upsert(nodes.get(&NodeId(1)).unwrap());
-        scheduler.node_upsert(nodes.get(&NodeId(2)).unwrap());
-
-        let mut a = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4));
-        let a_context = Rc::new(RefCell::new(ScheduleContext::default()));
-
-        let mut b = make_test_tenant(PlacementPolicy::Attached(1), ShardCount::new(4));
-        let b_context = Rc::new(RefCell::new(ScheduleContext::default()));
-
-        let a_shards_with_context = a.iter_mut().map(|shard| (shard, a_context.clone()));
-        let b_shards_with_context = b.iter_mut().map(|shard| (shard, b_context.clone()));
-
-        let schedule_order = a_shards_with_context.interleave(b_shards_with_context);
-
-        for (shard, context) in schedule_order {
-            let context = &mut *context.borrow_mut();
-            shard.schedule(&mut scheduler, context).unwrap();
-        }
-
-        let applied_to_a = optimize_til_idle(&nodes, &mut scheduler, &mut a);
-        assert_eq!(applied_to_a, vec![]);
-
-        let applied_to_b = optimize_til_idle(&nodes, &mut scheduler, &mut b);
-        assert_eq!(applied_to_b, vec![]);
-
-        for shard in a.iter_mut().chain(b.iter_mut()) {
-            shard.intent.clear(&mut scheduler);
-        }
-
-        Ok(())
-    }
 }
--- a/storage_scrubber/src/checks.rs
+++ b/storage_scrubber/src/checks.rs
@@ -1,12 +1,13 @@
 use std::collections::{HashMap, HashSet};

+use anyhow::Context;
 use itertools::Itertools;
 use pageserver::tenant::checks::check_valid_layermap;
 use pageserver::tenant::layer_map::LayerMap;
 use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata;
 use pageserver_api::shard::ShardIndex;
 use tokio_util::sync::CancellationToken;
-use tracing::{info, warn};
+use tracing::{error, info, warn};
 use utils::generation::Generation;
 use utils::id::TimelineId;

@@ -28,8 +29,9 @@ pub(crate) struct TimelineAnalysis {
    /// yet.
    pub(crate) warnings: Vec<String>,

-    /// Objects whose keys were not recognized at all, i.e. not layer files, not indices, and not initdb archive.
-    pub(crate) unknown_keys: Vec<String>,
+    /// Keys not referenced in metadata: candidates for removal, but NOT NECESSARILY: beware
+    /// of races between reading the metadata and reading the objects.
+    pub(crate) garbage_keys: Vec<String>,
 }

 impl TimelineAnalysis {
@@ -37,7 +39,7 @@ impl TimelineAnalysis {
        Self {
            errors: Vec::new(),
            warnings: Vec::new(),
-            unknown_keys: Vec::new(),
+            garbage_keys: Vec::new(),
        }
    }

@@ -57,7 +59,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
 ) -> TimelineAnalysis {
    let mut result = TimelineAnalysis::new();

-    info!("Checking timeline");
+    info!("Checking timeline {id}");

    if let Some(s3_active_branch) = s3_active_branch {
        info!(
@@ -78,7 +80,7 @@ pub(crate) async fn branch_cleanup_and_check_errors(
    match s3_data {
        Some(s3_data) => {
            result
-                .unknown_keys
+                .garbage_keys
                .extend(s3_data.unknown_keys.into_iter().map(|k| k.key.to_string()));

            match s3_data.blob_data {
@@ -202,10 +204,10 @@ pub(crate) async fn branch_cleanup_and_check_errors(
        warn!("Timeline metadata warnings: {0:?}", result.warnings);
    }

-    if !result.unknown_keys.is_empty() {
-        warn!(
-            "The following keys are not recognized: {0:?}",
-            result.unknown_keys
+    if !result.garbage_keys.is_empty() {
+        error!(
+            "The following keys should be removed from S3: {0:?}",
+            result.garbage_keys
        )
    }

@@ -292,10 +294,10 @@ impl TenantObjectListing {
 pub(crate) struct RemoteTimelineBlobData {
    pub(crate) blob_data: BlobDataParseResult,

-    /// Index objects that were not used when loading `blob_data`, e.g. those from old generations
+    // Index objects that were not used when loading `blob_data`, e.g. those from old generations
    pub(crate) unused_index_keys: Vec<ListingObject>,

-    /// Objects whose keys were not recognized at all, i.e. not layer files, not indices
+    // Objects whose keys were not recognized at all, i.e. not layer files, not indices
    pub(crate) unknown_keys: Vec<ListingObject>,
 }

@@ -327,54 +329,11 @@ pub(crate) fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generati
    }
 }

-/// Note (<https://github.com/neondatabase/neon/issues/8872>):
-/// Since we do not gurantee the order of the listing, we could list layer keys right before
-/// pageserver `RemoteTimelineClient` deletes the layer files and then the index.
-/// In the rare case, this would give back a transient error where the index key is missing.
-///
-/// To avoid generating false positive, we try streaming the listing for a second time.
 pub(crate) async fn list_timeline_blobs(
    remote_client: &GenericRemoteStorage,
    id: TenantShardTimelineId,
    root_target: &RootTarget,
 ) -> anyhow::Result<RemoteTimelineBlobData> {
-    let res = list_timeline_blobs_impl(remote_client, id, root_target).await?;
-    match res {
-        ListTimelineBlobsResult::Ready(data) => Ok(data),
-        ListTimelineBlobsResult::MissingIndexPart(_) => {
-            // Retry if index is missing.
-            let data = list_timeline_blobs_impl(remote_client, id, root_target)
-                .await?
-                .into_data();
-            Ok(data)
-        }
-    }
-}
-
-enum ListTimelineBlobsResult {
-    /// Blob data is ready to be intepreted.
-    Ready(RemoteTimelineBlobData),
-    /// List timeline blobs has layer files but is missing [`IndexPart`].
-    MissingIndexPart(RemoteTimelineBlobData),
-}
-
-impl ListTimelineBlobsResult {
-    /// Get the inner blob data regardless the status.
-    pub fn into_data(self) -> RemoteTimelineBlobData {
-        match self {
-            ListTimelineBlobsResult::Ready(data) => data,
-            ListTimelineBlobsResult::MissingIndexPart(data) => data,
-        }
-    }
-}
-
-/// Returns [`ListTimelineBlobsResult::MissingIndexPart`] if blob data has layer files
-/// but is missing [`IndexPart`], otherwise returns [`ListTimelineBlobsResult::Ready`].
-async fn list_timeline_blobs_impl(
-    remote_client: &GenericRemoteStorage,
-    id: TenantShardTimelineId,
-    root_target: &RootTarget,
-) -> anyhow::Result<ListTimelineBlobsResult> {
    let mut s3_layers = HashSet::new();

    let mut errors = Vec::new();
@@ -416,28 +375,30 @@ async fn list_timeline_blobs_impl(
                    s3_layers.insert((new_layer, gen));
                }
                Err(e) => {
-                    tracing::info!("Error parsing {maybe_layer_name} as layer name: {e}");
+                    tracing::info!("Error parsing key {maybe_layer_name}");
+                    errors.push(
+                        format!("S3 list response got an object with key {key} that is not a layer name: {e}"),
+                    );
                    unknown_keys.push(obj);
                }
            },
            None => {
-                tracing::info!("S3 listed an unknown key: {key}");
+                tracing::warn!("Unknown key {key}");
+                errors.push(format!("S3 list response got an object with odd key {key}"));
                unknown_keys.push(obj);
            }
        }
    }

-    if index_part_keys.is_empty() && s3_layers.is_empty() {
-        tracing::debug!("Timeline is empty: expected post-deletion state.");
-        if initdb_archive {
-            tracing::info!("Timeline is post deletion but initdb archive is still present.");
-        }
-
-        return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
+    if index_part_keys.is_empty() && s3_layers.is_empty() && initdb_archive {
+        tracing::debug!(
+            "Timeline is empty apart from initdb archive: expected post-deletion state."
+        );
+        return Ok(RemoteTimelineBlobData {
            blob_data: BlobDataParseResult::Relic,
            unused_index_keys: index_part_keys,
-            unknown_keys,
-        }));
+            unknown_keys: Vec::new(),
+        });
    }

    // Choose the index_part with the highest generation
@@ -463,43 +424,19 @@ async fn list_timeline_blobs_impl(
    match index_part_object.as_ref() {
        Some(selected) => index_part_keys.retain(|k| k != selected),
        None => {
-            // It is possible that the branch gets deleted after we got some layer files listed
-            // and we no longer have the index file in the listing.
-            errors.push(
-                "S3 list response got no index_part.json file but still has layer files"
-                    .to_string(),
-            );
-            return Ok(ListTimelineBlobsResult::MissingIndexPart(
-                RemoteTimelineBlobData {
-                    blob_data: BlobDataParseResult::Incorrect { errors, s3_layers },
-                    unused_index_keys: index_part_keys,
-                    unknown_keys,
-                },
-            ));
+            errors.push("S3 list response got no index_part.json file".to_string());
        }
    }

    if let Some(index_part_object_key) = index_part_object.as_ref() {
        let index_part_bytes =
-            match download_object_with_retries(remote_client, &index_part_object_key.key).await {
-                Ok(index_part_bytes) => index_part_bytes,
-                Err(e) => {
-                    // It is possible that the branch gets deleted in-between we list the objects
-                    // and we download the index part file.
-                    errors.push(format!("failed to download index_part.json: {e}"));
-                    return Ok(ListTimelineBlobsResult::MissingIndexPart(
-                        RemoteTimelineBlobData {
-                            blob_data: BlobDataParseResult::Incorrect { errors, s3_layers },
-                            unused_index_keys: index_part_keys,
-                            unknown_keys,
-                        },
-                    ));
-                }
-            };
+            download_object_with_retries(remote_client, &index_part_object_key.key)
+                .await
+                .context("index_part.json download")?;

        match serde_json::from_slice(&index_part_bytes) {
            Ok(index_part) => {
-                return Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
+                return Ok(RemoteTimelineBlobData {
                    blob_data: BlobDataParseResult::Parsed {
                        index_part: Box::new(index_part),
                        index_part_generation,
@@ -507,7 +444,7 @@ async fn list_timeline_blobs_impl(
                    },
                    unused_index_keys: index_part_keys,
                    unknown_keys,
-                }))
+                })
            }
            Err(index_parse_error) => errors.push(format!(
                "index_part.json body parsing error: {index_parse_error}"
@@ -521,9 +458,9 @@ async fn list_timeline_blobs_impl(
        );
    }

-    Ok(ListTimelineBlobsResult::Ready(RemoteTimelineBlobData {
+    Ok(RemoteTimelineBlobData {
        blob_data: BlobDataParseResult::Incorrect { errors, s3_layers },
        unused_index_keys: index_part_keys,
        unknown_keys,
-    }))
+    })
 }
--- a/storage_scrubber/src/main.rs
+++ b/storage_scrubber/src/main.rs
@@ -41,10 +41,6 @@ struct Cli {
    #[arg(long)]
    /// JWT token for authenticating with storage controller.  Requires scope 'scrubber' or 'admin'.
    controller_jwt: Option<String>,
-
-    /// If set to true, the scrubber will exit with error code on fatal error.
-    #[arg(long, default_value_t = false)]
-    exit_code: bool,
 }

 #[derive(Subcommand, Debug)]
@@ -207,7 +203,6 @@ async fn main() -> anyhow::Result<()> {
                    tenant_ids,
                    json,
                    post_to_storcon,
-                    cli.exit_code,
                )
                .await
            }
@@ -274,7 +269,6 @@ async fn main() -> anyhow::Result<()> {
                gc_min_age,
                gc_mode,
                post_to_storcon,
-                cli.exit_code,
            )
            .await
        }
@@ -290,7 +284,6 @@ pub async fn run_cron_job(
    gc_min_age: humantime::Duration,
    gc_mode: GcMode,
    post_to_storcon: bool,
-    exit_code: bool,
 ) -> anyhow::Result<()> {
    tracing::info!(%gc_min_age, %gc_mode, "Running pageserver-physical-gc");
    pageserver_physical_gc_cmd(
@@ -308,7 +301,6 @@ pub async fn run_cron_job(
        Vec::new(),
        true,
        post_to_storcon,
-        exit_code,
    )
    .await?;

@@ -357,7 +349,6 @@ pub async fn scan_pageserver_metadata_cmd(
    tenant_shard_ids: Vec<TenantShardId>,
    json: bool,
    post_to_storcon: bool,
-    exit_code: bool,
 ) -> anyhow::Result<()> {
    if controller_client.is_none() && post_to_storcon {
        return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run"));
@@ -389,9 +380,6 @@ pub async fn scan_pageserver_metadata_cmd(

            if summary.is_fatal() {
                tracing::error!("Fatal scrub errors detected");
-                if exit_code {
-                    std::process::exit(1);
-                }
            } else if summary.is_empty() {
                // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
                // scrubber they were likely expecting to scan something, and if we see no timelines
@@ -403,9 +391,6 @@ pub async fn scan_pageserver_metadata_cmd(
                        .prefix_in_bucket
                        .unwrap_or("<none>".to_string())
                );
-                if exit_code {
-                    std::process::exit(1);
-                }
            }

            Ok(())
--- a/storage_scrubber/src/scan_pageserver_metadata.rs
+++ b/storage_scrubber/src/scan_pageserver_metadata.rs
@@ -12,7 +12,6 @@ use pageserver_api::controller_api::MetadataHealthUpdateRequest;
 use pageserver_api::shard::TenantShardId;
 use remote_storage::GenericRemoteStorage;
 use serde::Serialize;
-use tracing::{info_span, Instrument};
 use utils::id::TenantId;
 use utils::shard::ShardCount;

@@ -170,54 +169,45 @@ pub async fn scan_pageserver_metadata(
        let mut timeline_ids = HashSet::new();
        let mut timeline_generations = HashMap::new();
        for (ttid, data) in timelines {
-            async {
-                if ttid.tenant_shard_id.shard_count == highest_shard_count {
-                    // Only analyze `TenantShardId`s with highest shard count.
+            if ttid.tenant_shard_id.shard_count == highest_shard_count {
+                // Only analyze `TenantShardId`s with highest shard count.

-                    // Stash the generation of each timeline, for later use identifying orphan layers
-                    if let BlobDataParseResult::Parsed {
-                        index_part,
-                        index_part_generation,
-                        s3_layers: _s3_layers,
-                    } = &data.blob_data
-                    {
-                        if index_part.deleted_at.is_some() {
-                            // skip deleted timeline.
-                            tracing::info!(
-                                "Skip analysis of {} b/c timeline is already deleted",
-                                ttid
-                            );
-                            return;
-                        }
-                        timeline_generations.insert(ttid, *index_part_generation);
+                // Stash the generation of each timeline, for later use identifying orphan layers
+                if let BlobDataParseResult::Parsed {
+                    index_part,
+                    index_part_generation,
+                    s3_layers: _s3_layers,
+                } = &data.blob_data
+                {
+                    if index_part.deleted_at.is_some() {
+                        // skip deleted timeline.
+                        tracing::info!("Skip analysis of {} b/c timeline is already deleted", ttid);
+                        continue;
                    }
-
-                    // Apply checks to this timeline shard's metadata, and in the process update `tenant_objects`
-                    // reference counts for layers across the tenant.
-                    let analysis = branch_cleanup_and_check_errors(
-                        remote_client,
-                        &ttid,
-                        &mut tenant_objects,
-                        None,
-                        None,
-                        Some(data),
-                    )
-                    .await;
-                    summary.update_analysis(&ttid, &analysis);
-
-                    timeline_ids.insert(ttid.timeline_id);
-                } else {
-                    tracing::info!(
-                        "Skip analysis of {} b/c a lower shard count than {}",
-                        ttid,
-                        highest_shard_count.0,
-                    );
+                    timeline_generations.insert(ttid, *index_part_generation);
                }
+
+                // Apply checks to this timeline shard's metadata, and in the process update `tenant_objects`
+                // reference counts for layers across the tenant.
+                let analysis = branch_cleanup_and_check_errors(
+                    remote_client,
+                    &ttid,
+                    &mut tenant_objects,
+                    None,
+                    None,
+                    Some(data),
+                )
+                .await;
+                summary.update_analysis(&ttid, &analysis);
+
+                timeline_ids.insert(ttid.timeline_id);
+            } else {
+                tracing::info!(
+                    "Skip analysis of {} b/c a lower shard count than {}",
+                    ttid,
+                    highest_shard_count.0,
+                );
            }
-            .instrument(
-                info_span!("analyze-timeline", shard = %ttid.tenant_shard_id.shard_slug(), timeline = %ttid.timeline_id),
-            )
-            .await
        }

        summary.timeline_count += timeline_ids.len();
@@ -288,7 +278,6 @@ pub async fn scan_pageserver_metadata(
                        timelines,
                        highest_shard_count,
                    )
-                    .instrument(info_span!("analyze-tenant", tenant = %prev_tenant_id))
                    .await;
                    tenant_id = Some(ttid.tenant_shard_id.tenant_id);
                    highest_shard_count = ttid.tenant_shard_id.shard_count;
@@ -317,18 +306,15 @@ pub async fn scan_pageserver_metadata(
        tenant_timeline_results.push((ttid, data));
    }

-    let tenant_id = tenant_id.expect("Must be set if results are present");
-
    if !tenant_timeline_results.is_empty() {
        analyze_tenant(
            &remote_client,
-            tenant_id,
+            tenant_id.expect("Must be set if results are present"),
            &mut summary,
            tenant_objects,
            tenant_timeline_results,
            highest_shard_count,
        )
-        .instrument(info_span!("analyze-tenant", tenant = %tenant_id))
        .await;
    }

--- a/test_runner/cloud_regress/test_cloud_regress.py
+++ b/test_runner/cloud_regress/test_cloud_regress.py
@@ -1,100 +0,0 @@
-"""
-Run the regression tests on the cloud instance of Neon
-"""
-
-from pathlib import Path
-from typing import Any
-
-import psycopg2
-import pytest
-from fixtures.log_helper import log
-from fixtures.neon_fixtures import RemotePostgres
-from fixtures.pg_version import PgVersion
-
-
-@pytest.fixture
-def setup(remote_pg: RemotePostgres):
-    """
-    Setup and teardown of the tests
-    """
-    with psycopg2.connect(remote_pg.connstr()) as conn:
-        with conn.cursor() as cur:
-            log.info("Creating the extension")
-            cur.execute("CREATE EXTENSION IF NOT EXISTS regress_so")
-            conn.commit()
-            # TODO: Migrate to branches and remove this code
-            log.info("Looking for subscriptions in the regress database")
-            cur.execute(
-                "SELECT subname FROM pg_catalog.pg_subscription WHERE "
-                "subdbid = (SELECT oid FROM pg_catalog.pg_database WHERE datname='regression');"
-            )
-            if cur.rowcount > 0:
-                with psycopg2.connect(
-                    dbname="regression",
-                    host=remote_pg.default_options["host"],
-                    user=remote_pg.default_options["user"],
-                    password=remote_pg.default_options["password"],
-                ) as regress_conn:
-                    with regress_conn.cursor() as regress_cur:
-                        for sub in cur:
-                            regress_cur.execute(f"ALTER SUBSCRIPTION {sub[0]} DISABLE")
-                            regress_cur.execute(
-                                f"ALTER SUBSCRIPTION {sub[0]} SET (slot_name = NONE)"
-                            )
-                            regress_cur.execute(f"DROP SUBSCRIPTION {sub[0]}")
-                        regress_conn.commit()
-
-    yield
-    # TODO: Migrate to branches and remove this code
-    log.info("Looking for extra roles...")
-    with psycopg2.connect(remote_pg.connstr()) as conn:
-        with conn.cursor() as cur:
-            cur.execute(
-                "SELECT rolname FROM pg_catalog.pg_roles WHERE oid > 16384 AND rolname <> 'neondb_owner'"
-            )
-            roles: list[Any] = []
-            for role in cur:
-                log.info("Role found: %s", role[0])
-                roles.append(role[0])
-            for role in roles:
-                cur.execute(f"DROP ROLE {role}")
-            conn.commit()
-
-
-@pytest.mark.timeout(7200)
-@pytest.mark.remote_cluster
-def test_cloud_regress(
-    setup,
-    remote_pg: RemotePostgres,
-    pg_version: PgVersion,
-    pg_distrib_dir: Path,
-    base_dir: Path,
-    test_output_dir: Path,
-):
-    """
-    Run the regression tests
-    """
-    regress_bin = (
-        pg_distrib_dir / f"{pg_version.v_prefixed}/lib/postgresql/pgxs/src/test/regress/pg_regress"
-    )
-    test_path = base_dir / f"vendor/postgres-{pg_version.v_prefixed}/src/test/regress"
-
-    env_vars = {
-        "PGHOST": remote_pg.default_options["host"],
-        "PGPORT": str(
-            remote_pg.default_options["port"] if "port" in remote_pg.default_options else 5432
-        ),
-        "PGUSER": remote_pg.default_options["user"],
-        "PGPASSWORD": remote_pg.default_options["password"],
-        "PGDATABASE": remote_pg.default_options["dbname"],
-    }
-    regress_cmd = [
-        str(regress_bin),
-        f"--inputdir={test_path}",
-        f"--bindir={pg_distrib_dir}/{pg_version.v_prefixed}/bin",
-        "--dlpath=/usr/local/lib",
-        "--max-concurrent-tests=20",
-        f"--schedule={test_path}/parallel_schedule",
-        "--max-connections=5",
-    ]
-    remote_pg.pg_bin.run(regress_cmd, env=env_vars, cwd=test_output_dir)
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -849,7 +849,7 @@ class NeonEnvBuilder:

        for directory_to_clean in reversed(directories_to_clean):
            if not os.listdir(directory_to_clean):
-                log.debug(f"Removing empty directory {directory_to_clean}")
+                log.info(f"Removing empty directory {directory_to_clean}")
                try:
                    directory_to_clean.rmdir()
                except Exception as e:
--- a/test_runner/fixtures/utils.py
+++ b/test_runner/fixtures/utils.py
@@ -236,7 +236,7 @@ def get_scale_for_db(size_mb: int) -> int:


 ATTACHMENT_NAME_REGEX: re.Pattern = re.compile(  # type: ignore[type-arg]
-    r"regression\.(diffs|out)|.+\.(?:log|stderr|stdout|filediff|metrics|html|walredo)"
+    r"regression\.diffs|.+\.(?:log|stderr|stdout|filediff|metrics|html|walredo)"
 )


--- a/test_runner/regress/test_compute_metrics.py
+++ b/test_runner/regress/test_compute_metrics.py
@@ -1,21 +0,0 @@
-from fixtures.neon_fixtures import NeonEnv
-
-
-def test_compute_metrics(neon_simple_env: NeonEnv):
-    """
-    Test compute metrics, exposed in the neon_backend_perf_counters and
-    neon_perf_counters views
-    """
-    env = neon_simple_env
-    endpoint = env.endpoints.create_start("main")
-
-    conn = endpoint.connect()
-    cur = conn.cursor()
-
-    # We don't check that the values make sense, this is just a very
-    # basic check that the server doesn't crash or something like that.
-    #
-    # 1.5 is the minimum version to contain these views.
-    cur.execute("CREATE EXTENSION neon VERSION '1.5'")
-    cur.execute("SELECT * FROM neon_perf_counters")
-    cur.execute("SELECT * FROM neon_backend_perf_counters")
--- a/test_runner/regress/test_neon_extension.py
+++ b/test_runner/regress/test_neon_extension.py
@@ -50,8 +50,8 @@ def test_neon_extension_compatibility(neon_env_builder: NeonEnvBuilder):
            # Ensure that the default version is also updated in the neon.control file
            assert cur.fetchone() == ("1.4",)
            cur.execute("SELECT * from neon.NEON_STAT_FILE_CACHE")
-            all_versions = ["1.5", "1.4", "1.3", "1.2", "1.1", "1.0"]
-            current_version = "1.5"
+            all_versions = ["1.4", "1.3", "1.2", "1.1", "1.0"]
+            current_version = "1.4"
            for idx, begin_version in enumerate(all_versions):
                for target_version in all_versions[idx + 1 :]:
                    if current_version != begin_version:
--- a/test_runner/regress/test_unlogged.py
+++ b/test_runner/regress/test_unlogged.py
@@ -15,13 +15,8 @@ def test_unlogged(neon_simple_env: NeonEnv):
    cur = conn.cursor()

    cur.execute("CREATE UNLOGGED TABLE iut (id int);")
-    # create index to test unlogged index relations as well
+    # create index to test unlogged index relation as well
    cur.execute("CREATE UNIQUE INDEX iut_idx ON iut (id);")
-    cur.execute("CREATE INDEX ON iut USING gist (int4range(id, id, '[]'));")
-    cur.execute("CREATE INDEX ON iut USING spgist (int4range(id, id, '[]'));")
-    cur.execute("CREATE INDEX ON iut USING gin ((id::text::jsonb));")
-    cur.execute("CREATE INDEX ON iut USING brin (id);")
-    cur.execute("CREATE INDEX ON iut USING hash (id);")
    cur.execute("ALTER TABLE iut ADD COLUMN seq int GENERATED ALWAYS AS IDENTITY;")
    cur.execute("INSERT INTO iut (id) values (42);")

@@ -44,12 +39,3 @@ def test_unlogged(neon_simple_env: NeonEnv):
        assert results == [(43, 2)]
    else:
        assert results == [(43, 1)]
-
-    # Flush all data and compact it, so we detect any errors related to
-    # unlogged indexes materialization.
-    ps_http = env.pageserver.http_client()
-    ps_http.timeline_compact(
-        tenant_id=env.initial_tenant,
-        timeline_id=env.initial_timeline,
-        force_image_layer_creation=True,
-    )
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
-    "17.0",
-    "68b5038f27e493bde6ae552fe066f10cbdfe6a14"
+    "17rc1",
+    "2cf120e7393ca5f537c6a38b457585576dc035fc"
  ],
  "v16": [
    "16.4",
-    "e131a9c027b202ce92bd7b9cf2569d48a6f9948e"
+    "1d7081a3b076ddf5086e0b118d4329820e6a7427"
  ],
  "v15": [
    "15.8",
-    "22e580fe9ffcea7e02592110b1c9bf426d83cada"
+    "16c3c6b64f1420a367a2a9b2510f20d94f791af8"
  ],
  "v14": [
    "14.13",
-    "2199b83fb72680001ce0f43bf6187a21dfb8f45d"
+    "a38d15f3233a4c07f2bf3335fcbd874dd1f4e386"
  ]
 }
--- a/vm-image-spec.yaml
+++ b/vm-image-spec.yaml
@@ -0,0 +1,550 @@
+# Supplemental file for neondatabase/autoscaling's vm-builder, for producing the VM compute image.
+---
+commands:
+  - name: cgconfigparser
+    user: root
+    sysvInitAction: sysinit
+    shell: 'cgconfigparser -l /etc/cgconfig.conf -s 1664'
+  # restrict permissions on /neonvm/bin/resize-swap, because we grant access to compute_ctl for
+  # running it as root.
+  - name: chmod-resize-swap
+    user: root
+    sysvInitAction: sysinit
+    shell: 'chmod 711 /neonvm/bin/resize-swap'
+  - name: pgbouncer
+    user: postgres
+    sysvInitAction: respawn
+    shell: '/usr/local/bin/pgbouncer /etc/pgbouncer.ini'
+  - name: postgres-exporter
+    user: nobody
+    sysvInitAction: respawn
+    shell: 'DATA_SOURCE_NAME="user=cloud_admin sslmode=disable dbname=postgres application_name=postgres-exporter" /bin/postgres_exporter'
+  - name: sql-exporter
+    user: nobody
+    sysvInitAction: respawn
+    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter.yml -web.listen-address=:9399'
+  - name: sql-exporter-autoscaling
+    user: nobody
+    sysvInitAction: respawn
+    shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499'
+shutdownHook: |
+  su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10'
+files:
+  - filename: compute_ctl-resize-swap
+    content: |
+      # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap
+      # as root without requiring entering a password (NOPASSWD), regardless of hostname (ALL)
+      postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap
+  - filename: pgbouncer.ini
+    content: |
+      [databases]
+      *=host=localhost port=5432 auth_user=cloud_admin
+      [pgbouncer]
+      listen_port=6432
+      listen_addr=0.0.0.0
+      auth_type=scram-sha-256
+      auth_user=cloud_admin
+      auth_dbname=postgres
+      client_tls_sslmode=disable
+      server_tls_sslmode=disable
+      pool_mode=transaction
+      max_client_conn=10000
+      default_pool_size=64
+      max_prepared_statements=0
+      admin_users=postgres
+      unix_socket_dir=/tmp/
+      unix_socket_mode=0777
+  - filename: cgconfig.conf
+    content: |
+      # Configuration for cgroups in VM compute nodes
+      group neon-postgres {
+          perm {
+              admin {
+                  uid = postgres;
+              }
+              task {
+                  gid = users;
+              }
+          }
+          memory {}
+      }
+  - filename: sql_exporter.yml
+    content: |
+      # Configuration for sql_exporter
+      # Global defaults.
+      global:
+        # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
+        scrape_timeout: 10s
+        # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
+        scrape_timeout_offset: 500ms
+        # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
+        min_interval: 0s
+        # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
+        # as will concurrent scrapes.
+        max_connections: 1
+        # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
+        # always be the same as max_connections.
+        max_idle_connections: 1
+        # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
+        # If 0, connections are not closed due to a connection's age.
+        max_connection_lifetime: 5m
+
+      # The target to monitor and the collectors to execute on it.
+      target:
+        # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
+        # the schema gets dropped or replaced to match the driver expected DSN format.
+        data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter'
+
+        # Collectors (referenced by name) to execute on the target.
+        # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+        collectors: [neon_collector]
+
+      # Collector files specifies a list of globs. One collector definition is read from each matching file.
+      # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+      collector_files:
+        - "neon_collector.yml"
+  - filename: sql_exporter_autoscaling.yml
+    content: |
+      # Configuration for sql_exporter for autoscaling-agent
+      # Global defaults.
+      global:
+        # If scrape_timeout <= 0, no timeout is set unless Prometheus provides one. The default is 10s.
+        scrape_timeout: 10s
+        # Subtracted from Prometheus' scrape_timeout to give us some headroom and prevent Prometheus from timing out first.
+        scrape_timeout_offset: 500ms
+        # Minimum interval between collector runs: by default (0s) collectors are executed on every scrape.
+        min_interval: 0s
+        # Maximum number of open connections to any one target. Metric queries will run concurrently on multiple connections,
+        # as will concurrent scrapes.
+        max_connections: 1
+        # Maximum number of idle connections to any one target. Unless you use very long collection intervals, this should
+        # always be the same as max_connections.
+        max_idle_connections: 1
+        # Maximum number of maximum amount of time a connection may be reused. Expired connections may be closed lazily before reuse.
+        # If 0, connections are not closed due to a connection's age.
+        max_connection_lifetime: 5m
+
+      # The target to monitor and the collectors to execute on it.
+      target:
+        # Data source name always has a URI schema that matches the driver name. In some cases (e.g. MySQL)
+        # the schema gets dropped or replaced to match the driver expected DSN format.
+        data_source_name: 'postgresql://cloud_admin@127.0.0.1:5432/postgres?sslmode=disable&application_name=sql_exporter_autoscaling'
+
+        # Collectors (referenced by name) to execute on the target.
+        # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+        collectors: [neon_collector_autoscaling]
+
+      # Collector files specifies a list of globs. One collector definition is read from each matching file.
+      # Glob patterns are supported (see <https://pkg.go.dev/path/filepath#Match> for syntax).
+      collector_files:
+        - "neon_collector_autoscaling.yml"
+  - filename: neon_collector.yml
+    content: |
+      collector_name: neon_collector
+      metrics:
+      - metric_name: lfc_misses
+        type: gauge
+        help: 'lfc_misses'
+        key_labels:
+        values: [lfc_misses]
+        query: |
+          select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
+
+      - metric_name: lfc_used
+        type: gauge
+        help: 'LFC chunks used (chunk = 1MB)'
+        key_labels:
+        values: [lfc_used]
+        query: |
+          select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
+
+      - metric_name: lfc_hits
+        type: gauge
+        help: 'lfc_hits'
+        key_labels:
+        values: [lfc_hits]
+        query: |
+          select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
+
+      - metric_name: lfc_writes
+        type: gauge
+        help: 'lfc_writes'
+        key_labels:
+        values: [lfc_writes]
+        query: |
+          select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
+
+      - metric_name: lfc_cache_size_limit
+        type: gauge
+        help: 'LFC cache size limit in bytes'
+        key_labels:
+        values: [lfc_cache_size_limit]
+        query: |
+          select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
+
+      - metric_name: connection_counts
+        type: gauge
+        help: 'Connection counts'
+        key_labels:
+          - datname
+          - state
+        values: [count]
+        query: |
+          select datname, state, count(*) as count from pg_stat_activity where state <> '' group by datname, state;
+
+      - metric_name: pg_stats_userdb
+        type: gauge
+        help: 'Stats for several oldest non-system dbs'
+        key_labels:
+          - datname
+        value_label: kind
+        values:
+          - db_size
+          - deadlocks
+          # Rows
+          - inserted
+          - updated
+          - deleted
+        # We export stats for 10 non-system database. Without this limit
+        # it is too easy to abuse the system by creating lots of databases.
+        query: |
+          select pg_database_size(datname) as db_size, deadlocks,
+                 tup_inserted as inserted, tup_updated as updated, tup_deleted as deleted,
+                 datname
+            from pg_stat_database
+           where datname IN (
+             select datname
+               from pg_database
+              where datname <> 'postgres' and not datistemplate
+              order by oid
+              limit 10
+           );
+
+      - metric_name: max_cluster_size
+        type: gauge
+        help: 'neon.max_cluster_size setting'
+        key_labels:
+        values: [max_cluster_size]
+        query: |
+          select setting::int as max_cluster_size from pg_settings where name = 'neon.max_cluster_size';
+
+      - metric_name: db_total_size
+        type: gauge
+        help: 'Size of all databases'
+        key_labels:
+        values: [total]
+        query: |
+          select sum(pg_database_size(datname)) as total from pg_database;
+
+      # DEPRECATED
+      - metric_name: lfc_approximate_working_set_size
+        type: gauge
+        help: 'Approximate working set size in pages of 8192 bytes'
+        key_labels:
+        values: [approximate_working_set_size]
+        query: |
+          select neon.approximate_working_set_size(false) as approximate_working_set_size;
+
+      - metric_name: lfc_approximate_working_set_size_windows
+        type: gauge
+        help: 'Approximate working set size in pages of 8192 bytes'
+        key_labels: [duration]
+        values: [size]
+        # NOTE: This is the "public" / "human-readable" version. Here, we supply a small selection
+        # of durations in a pretty-printed form.
+        query: |
+          select
+            x as duration,
+            neon.approximate_working_set_size_seconds(extract('epoch' from x::interval)::int) as size
+          from
+            (values ('5m'),('15m'),('1h')) as t (x);
+
+      - metric_name: compute_current_lsn
+        type: gauge
+        help: 'Current LSN of the database'
+        key_labels:
+        values: [lsn]
+        query: |
+          select
+            case
+              when pg_catalog.pg_is_in_recovery()
+              then (pg_last_wal_replay_lsn() - '0/0')::FLOAT8
+              else (pg_current_wal_lsn() - '0/0')::FLOAT8
+            end as lsn;
+
+      - metric_name: compute_receive_lsn
+        type: gauge
+        help: 'Returns the last write-ahead log location that has been received and synced to disk by streaming replication'
+        key_labels:
+        values: [lsn]
+        query: |
+          SELECT
+            CASE
+              WHEN pg_catalog.pg_is_in_recovery()
+              THEN (pg_last_wal_receive_lsn() - '0/0')::FLOAT8
+              ELSE 0
+            END AS lsn;
+
+      - metric_name: replication_delay_bytes
+        type: gauge
+        help: 'Bytes between received and replayed LSN'
+        key_labels:
+        values: [replication_delay_bytes]
+        # We use a GREATEST call here because this calculation can be negative.
+        # The calculation is not atomic, meaning after we've gotten the receive
+        # LSN, the replay LSN may have advanced past the receive LSN we
+        # are using for the calculation.
+        query: |
+          SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes;
+
+      - metric_name: replication_delay_seconds
+        type: gauge
+        help: 'Time since last LSN was replayed'
+        key_labels:
+        values: [replication_delay_seconds]
+        query: |
+          SELECT
+            CASE
+              WHEN pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn() THEN 0
+              ELSE GREATEST (0, EXTRACT (EPOCH FROM now() - pg_last_xact_replay_timestamp()))
+            END AS replication_delay_seconds;
+
+      - metric_name: checkpoints_req
+        type: gauge
+        help: 'Number of requested checkpoints'
+        key_labels:
+        values: [checkpoints_req]
+        query: |
+          SELECT checkpoints_req FROM pg_stat_bgwriter;
+
+      - metric_name: checkpoints_timed
+        type: gauge
+        help: 'Number of scheduled checkpoints'
+        key_labels:
+        values: [checkpoints_timed]
+        query: |
+          SELECT checkpoints_timed FROM pg_stat_bgwriter;
+
+      - metric_name: compute_logical_snapshot_files
+        type: gauge
+        help: 'Number of snapshot files in pg_logical/snapshot'
+        key_labels:
+          - timeline_id
+        values: [num_logical_snapshot_files]
+        query: |
+          SELECT
+            (SELECT setting FROM pg_settings WHERE name = 'neon.timeline_id') AS timeline_id,
+            -- Postgres creates temporary snapshot files of the form %X-%X.snap.%d.tmp. These
+            -- temporary snapshot files are renamed to the actual snapshot files after they are
+            -- completely built. We only WAL-log the completely built snapshot files.
+            (SELECT COUNT(*) FROM pg_ls_logicalsnapdir() WHERE name LIKE '%.snap') AS num_logical_snapshot_files;
+
+      # In all the below metrics, we cast LSNs to floats because Prometheus only supports floats.
+      # It's probably fine because float64 can store integers from -2^53 to +2^53 exactly.
+
+      # Number of slots is limited by max_replication_slots, so collecting position for all of them shouldn't be bad.
+      - metric_name: logical_slot_restart_lsn
+        type: gauge
+        help: 'restart_lsn of logical slots'
+        key_labels:
+          - slot_name
+        values: [restart_lsn]
+        query: |
+          select slot_name, (restart_lsn - '0/0')::FLOAT8 as restart_lsn
+          from pg_replication_slots
+          where slot_type = 'logical';
+
+      - metric_name: compute_subscriptions_count
+        type: gauge
+        help: 'Number of logical replication subscriptions grouped by enabled/disabled'
+        key_labels:
+          - enabled
+        values: [subscriptions_count]
+        query: |
+          select subenabled::text as enabled, count(*) as subscriptions_count
+          from pg_subscription
+          group by subenabled;
+
+      - metric_name: retained_wal
+        type: gauge
+        help: 'Retained WAL in inactive replication slots'
+        key_labels:
+          - slot_name
+        values: [retained_wal]
+        query: |
+          SELECT slot_name, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::FLOAT8 AS retained_wal
+          FROM pg_replication_slots
+          WHERE active = false;
+
+      - metric_name: wal_is_lost
+        type: gauge
+        help: 'Whether or not the replication slot wal_status is lost'
+        key_labels:
+          - slot_name
+        values: [wal_is_lost]
+        query: |
+          SELECT slot_name,
+          CASE
+            WHEN wal_status = 'lost' THEN 1
+            ELSE 0
+          END AS wal_is_lost
+          FROM pg_replication_slots;
+
+  - filename: neon_collector_autoscaling.yml
+    content: |
+      collector_name: neon_collector_autoscaling
+      metrics:
+      - metric_name: lfc_misses
+        type: gauge
+        help: 'lfc_misses'
+        key_labels:
+        values: [lfc_misses]
+        query: |
+          select lfc_value as lfc_misses from neon.neon_lfc_stats where lfc_key='file_cache_misses';
+
+      - metric_name: lfc_used
+        type: gauge
+        help: 'LFC chunks used (chunk = 1MB)'
+        key_labels:
+        values: [lfc_used]
+        query: |
+          select lfc_value as lfc_used from neon.neon_lfc_stats where lfc_key='file_cache_used';
+
+      - metric_name: lfc_hits
+        type: gauge
+        help: 'lfc_hits'
+        key_labels:
+        values: [lfc_hits]
+        query: |
+          select lfc_value as lfc_hits from neon.neon_lfc_stats where lfc_key='file_cache_hits';
+
+      - metric_name: lfc_writes
+        type: gauge
+        help: 'lfc_writes'
+        key_labels:
+        values: [lfc_writes]
+        query: |
+          select lfc_value as lfc_writes from neon.neon_lfc_stats where lfc_key='file_cache_writes';
+
+      - metric_name: lfc_cache_size_limit
+        type: gauge
+        help: 'LFC cache size limit in bytes'
+        key_labels:
+        values: [lfc_cache_size_limit]
+        query: |
+          select pg_size_bytes(current_setting('neon.file_cache_size_limit')) as lfc_cache_size_limit;
+
+      - metric_name: lfc_approximate_working_set_size_windows
+        type: gauge
+        help: 'Approximate working set size in pages of 8192 bytes'
+        key_labels: [duration_seconds]
+        values: [size]
+        # NOTE: This is the "internal" / "machine-readable" version. This outputs the working set
+        # size looking back 1..60 minutes, labeled with the number of minutes.
+        query: |
+          select
+            x::text as duration_seconds,
+            neon.approximate_working_set_size_seconds(x) as size
+          from
+            (select generate_series * 60 as x from generate_series(1, 60)) as t (x);
+build: |
+  # Build cgroup-tools
+  #
+  # At time of writing (2023-03-14), debian bullseye has a version of cgroup-tools (technically
+  # libcgroup) that doesn't support cgroup v2 (version 0.41-11). Unfortunately, the vm-monitor
+  # requires cgroup v2, so we'll build cgroup-tools ourselves.
+  FROM debian:bullseye-slim as libcgroup-builder
+  ENV LIBCGROUP_VERSION=v2.0.3
+
+  RUN set -exu \
+      && apt update \
+      && apt install --no-install-recommends -y \
+          git \
+          ca-certificates \
+          automake \
+          cmake \
+          make \
+          gcc \
+          byacc \
+          flex \
+          libtool \
+          libpam0g-dev \
+      && git clone --depth 1 -b $LIBCGROUP_VERSION https://github.com/libcgroup/libcgroup \
+      && INSTALL_DIR="/libcgroup-install" \
+      && mkdir -p "$INSTALL_DIR/bin" "$INSTALL_DIR/include" \
+      && cd libcgroup \
+      # extracted from bootstrap.sh, with modified flags:
+      && (test -d m4 || mkdir m4) \
+      && autoreconf -fi \
+      && rm -rf autom4te.cache \
+      && CFLAGS="-O3" ./configure --prefix="$INSTALL_DIR" --sysconfdir=/etc --localstatedir=/var --enable-opaque-hierarchy="name=systemd" \
+      # actually build the thing...
+      && make install
+
+  FROM quay.io/prometheuscommunity/postgres-exporter:v0.12.1 AS postgres-exporter
+
+  FROM burningalchemist/sql_exporter:0.13 AS sql-exporter
+
+  # Build pgbouncer
+  #
+  FROM debian:bullseye-slim AS pgbouncer
+  RUN set -e \
+      && apt-get update \
+      && apt-get install -y \
+          build-essential \
+          git \
+          libevent-dev \
+          libtool \
+          pkg-config
+
+  # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc)
+  ENV PGBOUNCER_TAG=pgbouncer_1_22_1
+  RUN set -e \
+      && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \
+      && cd pgbouncer \
+      && ./autogen.sh \
+      && LDFLAGS=-static ./configure --prefix=/usr/local/pgbouncer --without-openssl \
+      && make -j $(nproc) dist_man_MANS= \
+      && make install dist_man_MANS=
+merge: |
+  # tweak nofile limits
+  RUN set -e \
+      && echo 'fs.file-max = 1048576' >>/etc/sysctl.conf \
+      && test ! -e /etc/security || ( \
+         echo '*    - nofile 1048576' >>/etc/security/limits.conf \
+      && echo 'root - nofile 1048576' >>/etc/security/limits.conf \
+         )
+
+  # Allow postgres user (compute_ctl) to run swap resizer.
+  # Need to install sudo in order to allow this.
+  #
+  # Also, remove the 'read' permission from group/other on /neonvm/bin/resize-swap, just to be safe.
+  RUN set -e \
+      && apt update \
+      && apt install --no-install-recommends -y \
+             sudo \
+      && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+  COPY compute_ctl-resize-swap /etc/sudoers.d/compute_ctl-resize-swap
+
+  COPY cgconfig.conf /etc/cgconfig.conf
+  COPY pgbouncer.ini /etc/pgbouncer.ini
+  COPY sql_exporter.yml /etc/sql_exporter.yml
+  COPY neon_collector.yml /etc/neon_collector.yml
+  COPY sql_exporter_autoscaling.yml /etc/sql_exporter_autoscaling.yml
+  COPY neon_collector_autoscaling.yml /etc/neon_collector_autoscaling.yml
+
+  RUN set -e \
+      && chown postgres:postgres /etc/pgbouncer.ini \
+      && chmod 0666 /etc/pgbouncer.ini \
+      && chmod 0644 /etc/cgconfig.conf \
+      && chmod 0644 /etc/sql_exporter.yml \
+      && chmod 0644 /etc/neon_collector.yml \
+      && chmod 0644 /etc/sql_exporter_autoscaling.yml \
+      && chmod 0644 /etc/neon_collector_autoscaling.yml
+
+  COPY --from=libcgroup-builder /libcgroup-install/bin/*  /usr/bin/
+  COPY --from=libcgroup-builder /libcgroup-install/lib/*  /usr/lib/
+  COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/
+  COPY --from=postgres-exporter /bin/postgres_exporter /bin/postgres_exporter
+  COPY --from=sql-exporter      /bin/sql_exporter      /bin/sql_exporter
+  COPY --from=pgbouncer         /usr/local/pgbouncer/bin/pgbouncer /usr/local/bin/pgbouncer