Improve the scaling, add Play/Stop buttons

WIP: Collect and draw layer trace
Add test from PR #3673
2026-06-04 22:10:39 +00:00 · 2023-03-22 19:40:54 +02:00 · 2023-03-22 19:40:54 +02:00 · 2023-03-22 19:40:54 +02:00
215 changed files with 4686 additions and 7889 deletions
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -15,32 +15,10 @@ outputs:
  report-url:
    description: 'Allure report URL'
    value: ${{ steps.generate-report.outputs.report-url }}
-  report-json-url:
-    description: 'Allure report JSON URL'
-    value: ${{ steps.generate-report.outputs.report-json-url }}

 runs:
  using: "composite"
-
  steps:
-    # We're using some of env variables quite offen, so let's set them once.
-    #
-    # It would be nice to have them set in common runs.env[0] section, but it doesn't work[1]
-    #
-    # - [0] https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#runsenv
-    # - [1] https://github.com/neondatabase/neon/pull/3907#discussion_r1154703456
-    #
-    - name: Set common environment variables
-      shell: bash -euxo pipefail {0}
-      run: |
-        echo "BUILD_TYPE=${BUILD_TYPE}"   >> $GITHUB_ENV
-        echo "BUCKET=${BUCKET}"           >> $GITHUB_ENV
-        echo "TEST_OUTPUT=${TEST_OUTPUT}" >> $GITHUB_ENV
-      env:
-        BUILD_TYPE: ${{ inputs.build_type }}
-        BUCKET: neon-github-public-dev
-        TEST_OUTPUT: /tmp/test_output
-
    - name: Validate input parameters
      shell: bash -euxo pipefail {0}
      run: |
@@ -98,14 +76,16 @@ runs:
          rm -f ${ALLURE_ZIP}
        fi
      env:
-        ALLURE_VERSION: 2.21.0
-        ALLURE_ZIP_MD5: c8db4dd8e2a7882583d569ed2c82879c
+        ALLURE_VERSION: 2.19.0
+        ALLURE_ZIP_MD5: ced21401a1a8b9dfb68cee9e4c210464

    - name: Upload Allure results
      if: ${{ inputs.action == 'store' }}
      env:
        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+        TEST_OUTPUT: /tmp/test_output
+        BUCKET: neon-github-public-dev
        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
      shell: bash -euxo pipefail {0}
      run: |
@@ -124,7 +104,7 @@ runs:
        EOF
        cat <<EOF > $TEST_OUTPUT/allure/results/environment.properties
          TEST_SELECTION=${{ inputs.test_selection }}
-          BUILD_TYPE=${BUILD_TYPE}
+          BUILD_TYPE=${{ inputs.build_type }}
        EOF

        ARCHIVE="${GITHUB_RUN_ID}-${TEST_SELECTION}-${GITHUB_RUN_ATTEMPT}-$(date +%s).tar.zst"
@@ -133,12 +113,13 @@ runs:
        tar -C ${TEST_OUTPUT}/allure/results -cf ${ARCHIVE} --zstd .
        aws s3 mv --only-show-errors ${ARCHIVE} "s3://${BUCKET}/${RAW_PREFIX}/${ARCHIVE}"

-    # Potentially we could have several running build for the same key (for example for the main branch), so we use improvised lock for this
+    # Potentially we could have several running build for the same key (for example for the main branch),  so we use improvised lock for this
    - name: Acquire Allure lock
      if: ${{ inputs.action == 'generate' }}
      shell: bash -euxo pipefail {0}
      env:
        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
+        BUCKET: neon-github-public-dev
        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
      run: |
        LOCK_TIMEOUT=300 # seconds
@@ -168,6 +149,8 @@ runs:
      env:
        REPORT_PREFIX: reports/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
        RAW_PREFIX: reports-raw/${{ steps.calculate-vars.outputs.KEY }}/${{ inputs.build_type }}
+        TEST_OUTPUT: /tmp/test_output
+        BUCKET: neon-github-public-dev
      shell: bash -euxo pipefail {0}
      run: |
        # Get previously uploaded data for this run
@@ -203,24 +186,24 @@ runs:
        REPORT_URL=https://${BUCKET}.s3.amazonaws.com/${REPORT_PREFIX}/${GITHUB_RUN_ID}/index.html

        # Generate redirect
-        cat <<EOF > ${TEST_OUTPUT}/allure/index.html
+        cat <<EOF > ./index.html
          <!DOCTYPE html>

          <meta charset="utf-8">
          <title>Redirecting to ${REPORT_URL}</title>
          <meta http-equiv="refresh" content="0; URL=${REPORT_URL}">
        EOF
-        aws s3 cp --only-show-errors ${TEST_OUTPUT}/allure/index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"
+        aws s3 cp --only-show-errors ./index.html "s3://${BUCKET}/${REPORT_PREFIX}/latest/index.html"

        echo "[Allure Report](${REPORT_URL})" >> ${GITHUB_STEP_SUMMARY}
        echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
-        echo "report-json-url=${REPORT_URL%/index.html}/data/suites.json" >> $GITHUB_OUTPUT

    - name: Release Allure lock
      if: ${{ inputs.action == 'generate' && always() }}
      shell: bash -euxo pipefail {0}
      env:
        LOCK_FILE: reports/${{ steps.calculate-vars.outputs.KEY }}/lock.txt
+        BUCKET: neon-github-public-dev
        TEST_SELECTION: ${{ steps.calculate-vars.outputs.TEST_SELECTION }}
      run: |
        aws s3 cp --only-show-errors "s3://${BUCKET}/${LOCK_FILE}" ./lock.txt || exit 0
@@ -229,16 +212,11 @@ runs:
          aws s3 rm "s3://${BUCKET}/${LOCK_FILE}"
        fi

-    - name: Cleanup
-      if: always()
-      shell: bash -euxo pipefail {0}
-      run: |
-        rm -rf ${TEST_OUTPUT}/allure
-
    - uses: actions/github-script@v6
      if: ${{ inputs.action == 'generate' && always() }}
      env:
        REPORT_URL: ${{ steps.generate-report.outputs.report-url }}
+        BUILD_TYPE: ${{ inputs.build_type }}
        SHA: ${{ github.event.pull_request.head.sha || github.sha }}
      with:
        script: |
--- a/.github/actions/neon-project-create/action.yml
+++ b/.github/actions/neon-project-create/action.yml
@@ -14,12 +14,6 @@ inputs:
  api_host:
    desctiption: 'Neon API host'
    default: console.stage.neon.tech
-  provisioner:
-    desctiption: 'k8s-pod or k8s-neonvm'
-    default: 'k8s-pod'
-  compute_units:
-    desctiption: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal'
-    default: '[1, 1]'

 outputs:
  dsn:
@@ -37,10 +31,6 @@ runs:
      # A shell without `set -x` to not to expose password/dsn in logs
      shell: bash -euo pipefail {0}
      run: |
-        if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then
-          echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU"
-        fi
-
        project=$(curl \
          "https://${API_HOST}/api/v2/projects" \
          --fail \
@@ -52,9 +42,6 @@ runs:
              \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\",
              \"pg_version\": ${POSTGRES_VERSION},
              \"region_id\": \"${REGION_ID}\",
-              \"provisioner\": \"${PROVISIONER}\",
-              \"autoscaling_limit_min_cu\": ${MIN_CU},
-              \"autoscaling_limit_max_cu\": ${MAX_CU},
              \"settings\": { }
            }
          }")
@@ -75,6 +62,3 @@ runs:
        API_KEY: ${{ inputs.api_key }}
        REGION_ID: ${{ inputs.region_id }}
        POSTGRES_VERSION: ${{ inputs.postgres_version }}
-        PROVISIONER: ${{ inputs.provisioner }}
-        MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }}
-        MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }}
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -44,10 +44,6 @@ inputs:
    description: 'Secret access key'
    required: false
    default: ''
-  rerun_flaky:
-    description: 'Whether to rerun flaky tests'
-    required: false
-    default: 'false'

 runs:
  using: "composite"
@@ -105,7 +101,6 @@ runs:
        COMPATIBILITY_SNAPSHOT_DIR: /tmp/compatibility_snapshot_pg14
        ALLOW_BACKWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'backward compatibility breakage')
        ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage')
-        RERUN_FLAKY: ${{ inputs.rerun_flaky }}
      shell: bash -euxo pipefail {0}
      run: |
        # PLATFORM will be embedded in the perf test report
@@ -148,13 +143,6 @@ runs:
          EXTRA_PARAMS="--out-dir $PERF_REPORT_DIR $EXTRA_PARAMS"
        fi

-        if [ "${RERUN_FLAKY}" == "true" ]; then
-          mkdir -p $TEST_OUTPUT
-          poetry run ./scripts/flaky_tests.py "${TEST_RESULT_CONNSTR}" --days 10 --output "$TEST_OUTPUT/flaky.json"
-
-          EXTRA_PARAMS="--flaky-tests-json $TEST_OUTPUT/flaky.json $EXTRA_PARAMS"
-        fi
-
        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -8,16 +8,6 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -8,16 +8,6 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -8,16 +8,6 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -8,16 +8,6 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
-        min_avail_bytes: 0
-        period: "10s"
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "10m"
-          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/scripts/init_pageserver.sh
+++ b/.github/ansible/scripts/init_pageserver.sh
@@ -3,8 +3,6 @@
 # fetch params from meta-data service
 INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id)
 AZ_ID=$(curl -s http://169.254.169.254/latest/meta-data/placement/availability-zone)
-INSTANCE_TYPE=$(curl -s http://169.254.169.254/latest/meta-data/instance-type)
-DISK_SIZE=$(df -B1 /storage | tail -1 | awk '{print $2}')

 # store fqdn hostname in var
 HOST=$(hostname -f)
@@ -20,9 +18,7 @@ cat <<EOF | tee /tmp/payload
  "http_host": "${HOST}",
  "http_port": 9898,
  "active": false,
-  "availability_zone_id": "${AZ_ID}",
-  "disk_size": ${DISK_SIZE},
-  "instance_type": "${INSTANCE_TYPE}"
+  "availability_zone_id": "${AZ_ID}"
 }
 EOF

--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -8,16 +8,11 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
      tenant_config:
        eviction_policy:
          kind: "LayerAccessThreshold"
          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -8,16 +8,11 @@ storage:
      pg_distrib_dir: /usr/local
      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      disk_usage_based_eviction:
-        max_usage_pct: 80
-        min_avail_bytes: 0
-        period: "10s"
      tenant_config:
        eviction_policy:
          kind: "LayerAccessThreshold"
          period: "20m"
-          threshold: &default_eviction_threshold "20m"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -30,9 +30,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: eu-west-1
+  zenith_service: proxy-scram
+  zenith_env: dev
+  zenith_region: eu-west-1
+  zenith_region_slug: eu-west-1

 exposedService:
  annotations:
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -15,9 +15,10 @@ settings:

 # -- Additional labels for neon-proxy-link pods
 podLabels:
-  neon_service: proxy
-  neon_env: dev
-  neon_region: us-east-2
+  zenith_service: proxy
+  zenith_env: dev
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2

 service:
  type: LoadBalancer
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -15,9 +15,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram-legacy
-  neon_env: dev
-  neon_region: us-east-2
+  zenith_service: proxy-scram-legacy
+  zenith_env: dev
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2

 exposedService:
  annotations:
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -23,7 +23,6 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
  domain: "*.us-east-2.aws.neon.build"
-  extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
  sentryEnvironment: "staging"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
@@ -31,9 +30,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: dev
-  neon_region: us-east-2
+  zenith_service: proxy-scram
+  zenith_env: dev
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2

 exposedService:
  annotations:
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -24,7 +24,6 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
  domain: "*.ap-southeast-1.aws.neon.tech"
-  extraDomains: ["*.ap-southeast-1.retooldb.com", "*.ap-southeast-1.postgres.vercel-storage.com"]
  sentryEnvironment: "production"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
@@ -32,9 +31,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: ap-southeast-1
+  zenith_service: proxy-scram
+  zenith_env: prod
+  zenith_region: ap-southeast-1
+  zenith_region_slug: ap-southeast-1

 exposedService:
  annotations:
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -24,7 +24,6 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
  domain: "*.eu-central-1.aws.neon.tech"
-  extraDomains: ["*.eu-central-1.retooldb.com", "*.eu-central-1.postgres.vercel-storage.com"]
  sentryEnvironment: "production"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
@@ -32,9 +31,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: eu-central-1
+  zenith_service: proxy-scram
+  zenith_env: prod
+  zenith_region: eu-central-1
+  zenith_region_slug: eu-central-1

 exposedService:
  annotations:
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-link.yaml
@@ -13,9 +13,10 @@ settings:

 # -- Additional labels for zenith-proxy pods
 podLabels:
-  neon_service: proxy
-  neon_env: production
-  neon_region: us-east-2
+  zenith_service: proxy
+  zenith_env: production
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2

 service:
  type: LoadBalancer
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -24,7 +24,6 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
  domain: "*.us-east-2.aws.neon.tech"
-  extraDomains: ["*.us-east-2.retooldb.com", "*.us-east-2.postgres.vercel-storage.com"]
  sentryEnvironment: "production"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
@@ -32,9 +31,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-east-2
+  zenith_service: proxy-scram
+  zenith_env: prod
+  zenith_region: us-east-2
+  zenith_region_slug: us-east-2

 exposedService:
  annotations:
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -31,9 +31,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
+  zenith_service: proxy-scram
+  zenith_env: prod
+  zenith_region: us-west-2
+  zenith_region_slug: us-west-2

 exposedService:
  annotations:
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -24,7 +24,6 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
  domain: "*.us-west-2.aws.neon.tech"
-  extraDomains: ["*.us-west-2.retooldb.com", "*.us-west-2.postgres.vercel-storage.com"]
  sentryEnvironment: "production"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
@@ -32,9 +31,10 @@ settings:

 # -- Additional labels for neon-proxy pods
 podLabels:
-  neon_service: proxy-scram
-  neon_env: prod
-  neon_region: us-west-2
+  zenith_service: proxy-scram
+  zenith_env: prod
+  zenith_region: us-west-2
+  zenith_region_slug: us-west-2

 exposedService:
  annotations:
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -3,12 +3,8 @@
 ## Issue ticket number and link

 ## Checklist before requesting a review
-
 - [ ] I have performed a self-review of my code.
 - [ ] If it is a core feature, I have added thorough tests.
 - [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
 - [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.

-## Checklist before merging
-
- [ ] Do not forget to reformat commit message to not include the above checklist
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -107,65 +107,25 @@ jobs:
      env:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

-  generate-matrices:
-    # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday)
-    #
-    # Available platforms:
-    # - neon-captest-new: Freshly created project (1 CU)
-    # - neon-captest-freetier: Use freetier-sized compute (0.25 CU)
-    # - neon-captest-reuse: Reusing existing project
-    # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
-    # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
-    runs-on: ubuntu-latest
-    outputs:
-      pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
-      olap-compare-matrix: ${{ steps.olap-compare-matrix.outputs.matrix }}
-
-    steps:
-    - name: Generate matrix for pgbench benchmark
-      id: pgbench-compare-matrix
-      run: |
-        matrix='{
-          "platform": [
-            "neon-captest-new",
-            "neon-captest-reuse"
-          ],
-          "db_size": [ "10gb" ],
-          "include": [
-            { "platform": "neon-captest-freetier", "db_size": "3gb"  },
-            { "platform": "neon-captest-new",      "db_size": "50gb" }
-          ]
-        }'
-
-        if [ "$(date +%A)" = "Saturday" ]; then
-          matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres", "db_size": "10gb"},
-                                                   { "platform": "rds-aurora",   "db_size": "50gb"}]')
-        fi
-
-        echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
-
-    - name: Generate matrix for OLAP benchmarks
-      id: olap-compare-matrix
-      run: |
-        matrix='{
-          "platform": [
-            "neon-captest-reuse"
-          ]
-        }'
-
-        if [ "$(date +%A)" = "Saturday" ]; then
-          matrix=$(echo $matrix | jq '.include += [{ "platform": "rds-postgres" },
-                                                   { "platform": "rds-aurora"   }]')
-        fi
-
-        echo "matrix=$(echo $matrix | jq --compact-output '.')" >> $GITHUB_OUTPUT
-
  pgbench-compare:
-    needs: [ generate-matrices ]
-
    strategy:
      fail-fast: false
-      matrix: ${{fromJson(needs.generate-matrices.outputs.pgbench-compare-matrix)}}
+      matrix:
+        # neon-captest-new: Run pgbench in a freshly created project
+        # neon-captest-reuse: Same, but reusing existing project
+        # neon-captest-prefetch: Same, with prefetching enabled (new project)
+        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
+        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+        platform: [ neon-captest-reuse, neon-captest-prefetch, rds-postgres ]
+        db_size: [ 10gb ]
+        runner: [ us-east-2 ]
+        include:
+          - platform: neon-captest-prefetch
+            db_size: 50gb
+            runner: us-east-2
+          - platform: rds-aurora
+            db_size: 50gb
+            runner: us-east-2

    env:
      TEST_PG_BENCH_DURATIONS_MATRIX: "60m"
@@ -177,7 +137,7 @@ jobs:
      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
      PLATFORM: ${{ matrix.platform }}

-    runs-on: [ self-hosted, us-east-2, x64 ]
+    runs-on: [ self-hosted, "${{ matrix.runner }}", x64 ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
@@ -200,14 +160,13 @@ jobs:
        echo "${POSTGRES_DISTRIB_DIR}/v${DEFAULT_PG_VERSION}/bin" >> $GITHUB_PATH

    - name: Create Neon Project
-      if: contains(fromJson('["neon-captest-new", "neon-captest-freetier"]'), matrix.platform)
+      if: contains(fromJson('["neon-captest-new", "neon-captest-prefetch"]'), matrix.platform)
      id: create-neon-project
      uses: ./.github/actions/neon-project-create
      with:
        region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }}
        postgres_version: ${{ env.DEFAULT_PG_VERSION }}
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}
-        compute_units: ${{ (matrix.platform == 'neon-captest-freetier' && '[0.25, 0.25]') || '[1, 1]' }}

    - name: Set up Connection String
      id: set-up-connstr
@@ -216,7 +175,7 @@ jobs:
          neon-captest-reuse)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }}
            ;;
-          neon-captest-new | neon-captest-freetier)
+          neon-captest-new | neon-captest-prefetch)
            CONNSTR=${{ steps.create-neon-project.outputs.dsn }}
            ;;
          rds-aurora)
@@ -226,7 +185,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -235,6 +194,17 @@ jobs:

        psql ${CONNSTR} -c "SELECT version();"

+    - name: Set database options
+      if: matrix.platform == 'neon-captest-prefetch'
+      run: |
+        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
+
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+
    - name: Benchmark init
      uses: ./.github/actions/run-python-test-set
      with:
@@ -306,11 +276,15 @@ jobs:
    # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
    # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
    if: success() || failure()
-    needs: [ generate-matrices, pgbench-compare ]
+    needs: [ pgbench-compare ]

    strategy:
      fail-fast: false
-      matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
+      matrix:
+        # neon-captest-prefetch: We have pre-created projects with prefetch enabled
+        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
+        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+        platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]

    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
@@ -346,7 +320,7 @@ jobs:
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neon-captest-reuse)
+          neon-captest-prefetch)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }}
            ;;
          rds-aurora)
@@ -356,7 +330,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -365,6 +339,17 @@ jobs:

        psql ${CONNSTR} -c "SELECT version();"

+    - name: Set database options
+      if: matrix.platform == 'neon-captest-prefetch'
+      run: |
+        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
+
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+
    - name: ClickBench benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -402,11 +387,15 @@ jobs:
    #
    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
    if: success() || failure()
-    needs: [ generate-matrices, clickbench-compare ]
+    needs: [ clickbench-compare ]

    strategy:
      fail-fast: false
-      matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
+      matrix:
+        # neon-captest-prefetch: We have pre-created projects with prefetch enabled
+        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
+        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+        platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]

    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
@@ -442,7 +431,7 @@ jobs:
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neon-captest-reuse)
+          neon-captest-prefetch)
            CONNSTR=${{ secrets.BENCHMARK_CAPTEST_TPCH_S10_CONNSTR }}
            ;;
          rds-aurora)
@@ -452,7 +441,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -461,6 +450,17 @@ jobs:

        psql ${CONNSTR} -c "SELECT version();"

+    - name: Set database options
+      if: matrix.platform == 'neon-captest-prefetch'
+      run: |
+        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
+
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+
    - name: Run TPC-H benchmark
      uses: ./.github/actions/run-python-test-set
      with:
@@ -492,11 +492,15 @@ jobs:

  user-examples-compare:
    if: success() || failure()
-    needs: [ generate-matrices, tpch-compare ]
+    needs: [ tpch-compare ]

    strategy:
      fail-fast: false
-      matrix: ${{ fromJson(needs.generate-matrices.outputs.olap-compare-matrix) }}
+      matrix:
+        # neon-captest-prefetch: We have pre-created projects with prefetch enabled
+        # rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
+        # rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+        platform: [ neon-captest-prefetch, rds-postgres, rds-aurora ]

    env:
      POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
@@ -532,7 +536,7 @@ jobs:
      id: set-up-connstr
      run: |
        case "${PLATFORM}" in
-          neon-captest-reuse)
+          neon-captest-prefetch)
            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }}
            ;;
          rds-aurora)
@@ -542,7 +546,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-prefetch', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -551,6 +555,17 @@ jobs:

        psql ${CONNSTR} -c "SELECT version();"

+    - name: Set database options
+      if: matrix.platform == 'neon-captest-prefetch'
+      run: |
+        DB_NAME=$(psql ${BENCHMARK_CONNSTR} --no-align --quiet -t -c "SELECT current_database()")
+
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET enable_seqscan_prefetch=on"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET effective_io_concurrency=32"
+        psql ${BENCHMARK_CONNSTR} -c "ALTER DATABASE ${DB_NAME} SET maintenance_io_concurrency=32"
+      env:
+        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
+
    - name: Run user examples
      uses: ./.github/actions/run-python-test-set
      with:
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -184,10 +184,10 @@ jobs:
          CARGO_FEATURES="--features testing"
          if [[ $BUILD_TYPE == "debug" ]]; then
            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
-            CARGO_FLAGS="--locked"
+            CARGO_FLAGS="--locked $CARGO_FEATURES"
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=""
-            CARGO_FLAGS="--locked --release"
+            CARGO_FLAGS="--locked --release $CARGO_FEATURES"
          fi
          echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
          echo "CARGO_FEATURES=${CARGO_FEATURES}" >> $GITHUB_ENV
@@ -240,18 +240,11 @@ jobs:

      - name: Run cargo build
        run: |
-          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests
+          ${cov_prefix} mold -run cargo build $CARGO_FLAGS --bins --tests

      - name: Run cargo test
        run: |
-          ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES
-
-          # Run separate tests for real S3
-          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
-          export REMOTE_STORAGE_S3_BUCKET=neon-github-public-dev
-          export REMOTE_STORAGE_S3_REGION=eu-central-1
-          # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now
-          ${cov_prefix} cargo test $CARGO_FLAGS --package remote_storage --test pagination_tests -- s3_pagination_should_work --exact
+          ${cov_prefix} cargo test $CARGO_FLAGS

      - name: Install rust binaries
        run: |
@@ -275,7 +268,7 @@ jobs:
            mkdir -p /tmp/neon/test_bin/

            test_exe_paths=$(
-              ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
+              ${cov_prefix} cargo test $CARGO_FLAGS --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )
            for bin in $test_exe_paths; do
@@ -335,10 +328,6 @@ jobs:
          real_s3_region: us-west-2
          real_s3_access_key_id: "${{ secrets.AWS_ACCESS_KEY_ID_CI_TESTS_S3 }}"
          real_s3_secret_access_key: "${{ secrets.AWS_SECRET_ACCESS_KEY_CI_TESTS_S3 }}"
-          rerun_flaky: true
-        env:
-          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
-          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty

      - name: Merge and upload coverage data
        if: matrix.build_type == 'debug'
@@ -375,90 +364,42 @@ jobs:
      # XXX: no coverage data handling here, since benchmarks are run on release builds,
      # while coverage is currently collected for the debug ones

-  create-test-report:
+  merge-allure-report:
    runs-on: [ self-hosted, gen3, small ]
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
    needs: [ regress-tests, benchmarks ]
    if: ${{ !cancelled() }}
-
+    strategy:
+      fail-fast: false
+      matrix:
+        build_type: [ debug, release ]
    steps:
-      - uses: actions/checkout@v3
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: false

-      - name: Create Allure report (debug)
-        if: ${{ !cancelled() }}
-        id: create-allure-report-debug
+      - name: Create Allure report
+        id: create-allure-report
        uses: ./.github/actions/allure-report
        with:
          action: generate
-          build_type: debug
-
-      - name: Create Allure report (release)
-        if: ${{ !cancelled() }}
-        id: create-allure-report-release
-        uses: ./.github/actions/allure-report
-        with:
-          action: generate
-          build_type: release
-
-      - uses: actions/github-script@v6
-        if: >
-          !cancelled() &&
-          github.event_name == 'pull_request' && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
-        with:
-          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
-          retries: 5
-          script: |
-            const reports = [{
-              buildType: "debug",
-              reportUrl: "${{ steps.create-allure-report-debug.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-debug.outputs.report-json-url }}",
-            }, {
-              buildType: "release",
-              reportUrl: "${{ steps.create-allure-report-release.outputs.report-url }}",
-              jsonUrl:   "${{ steps.create-allure-report-release.outputs.report-json-url }}",
-            }]
-
-            const script = require("./scripts/pr-comment-test-report.js")
-            await script({
-              github,
-              context,
-              fetch,
-              reports,
-            })
+          build_type: ${{ matrix.build_type }}

      - name: Store Allure test stat in the DB
-        if: >
-          !cancelled() && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+        if: ${{ steps.create-allure-report.outputs.report-url }}
        env:
+          BUILD_TYPE: ${{ matrix.build_type }}
          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          REPORT_JSON_URL_DEBUG: ${{ steps.create-allure-report-debug.outputs.report-json-url }}
-          REPORT_JSON_URL_RELEASE: ${{ steps.create-allure-report-release.outputs.report-json-url }}
+          REPORT_URL: ${{ steps.create-allure-report.outputs.report-url }}
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR }}
        run: |
+          curl --fail --output suites.json ${REPORT_URL%/index.html}/data/suites.json
          ./scripts/pysync

-          for report_url in $REPORT_JSON_URL_DEBUG $REPORT_JSON_URL_RELEASE; do
-            if [ -z "$report_url" ]; then
-              continue
-            fi
-
-            if [[ "$report_url" == "$REPORT_JSON_URL_DEBUG" ]]; then
-              BUILD_TYPE=debug
-            else
-              BUILD_TYPE=release
-            fi
-
-            curl --fail --output suites.json "${report_url}"
-            DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json
-          done
+          DATABASE_URL="$TEST_RESULT_CONNSTR" poetry run python3 scripts/ingest_regress_test_result.py --revision ${SHA} --reference ${GITHUB_REF} --build-type ${BUILD_TYPE} --ingest suites.json

  coverage-report:
    runs-on: [ self-hosted, gen3, small ]
@@ -950,16 +891,6 @@ jobs:
    needs: [ push-docker-hub, tag, regress-tests ]
    if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
    steps:
-      - name: Fix git ownership
-        run: |
-          # Workaround for `fatal: detected dubious ownership in repository at ...`
-          #
-          # Use both ${{ github.workspace }} and ${GITHUB_WORKSPACE} because they're different on host and in containers
-          #   Ref https://github.com/actions/checkout/issues/785
-          #
-          git config --global --add safe.directory ${{ github.workspace }}
-          git config --global --add safe.directory ${GITHUB_WORKSPACE}
-
      - name: Checkout
        uses: actions/checkout@v3
        with:
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -53,14 +53,14 @@ jobs:
        uses: actions/cache@v3
        with:
          path: pg_install/v14
-          key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
        uses: actions/cache@v3
        with:
          path: pg_install/v15
-          key: v1-${{ runner.os }}-${{ env.BUILD_TYPE }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}
+          key: v1-${{ runner.os }}-${{ matrix.build_type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-${{ hashFiles('Makefile') }}

      - name: Set extra env for macOS
        run: |
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -31,3 +31,4 @@ jobs:
        head: releases/${{ steps.date.outputs.date }}
        base: release
        title: Release ${{ steps.date.outputs.date }}
+        team_reviewers: release
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -841,19 +841,6 @@ dependencies = [
 "unicode-width",
 ]

-[[package]]
-name = "compute_api"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "chrono",
- "serde",
- "serde_json",
- "serde_with",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "compute_tools"
 version = "0.1.0"
@@ -861,7 +848,6 @@ dependencies = [
 "anyhow",
 "chrono",
 "clap 4.1.4",
- "compute_api",
 "futures",
 "hyper",
 "notify",
@@ -880,7 +866,6 @@ dependencies = [
 "tracing-subscriber",
 "tracing-utils",
 "url",
- "utils",
 "workspace_hack",
 ]

@@ -922,10 +907,8 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "chrono",
 "clap 4.1.4",
 "comfy-table",
- "compute_api",
 "git-version",
 "nix",
 "once_cell",
@@ -2491,7 +2474,6 @@ dependencies = [
 "strum",
 "strum_macros",
 "svg_fmt",
- "sync_wrapper",
 "tempfile",
 "tenant_size_model",
 "thiserror",
@@ -3103,7 +3085,6 @@ dependencies = [
 "serde",
 "serde_json",
 "tempfile",
- "test-context",
 "tokio",
 "tokio-util",
 "toml_edit",
@@ -3371,7 +3352,6 @@ dependencies = [
 "tempfile",
 "thiserror",
 "tokio",
- "tokio-io-timeout",
 "tokio-postgres",
 "toml_edit",
 "tracing",
@@ -3908,27 +3888,6 @@ dependencies = [
 "winapi-util",
 ]

-[[package]]
-name = "test-context"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "055831a02a4f5aa28fede67f2902014273eb8c21b958ac5ebbd59b71ef30dbc3"
-dependencies = [
- "async-trait",
- "futures",
- "test-context-macros",
-]
-
-[[package]]
-name = "test-context-macros"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8901a55b0a7a06ebc4a674dcca925170da8e613fa3b163a1df804ed10afb154d"
-dependencies = [
- "quote",
- "syn",
-]
-
 [[package]]
 name = "textwrap"
 version = "0.16.0"
@@ -4575,7 +4534,6 @@ dependencies = [
 "once_cell",
 "pin-project-lite",
 "rand",
- "regex",
 "routerify",
 "sentry",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -97,11 +97,9 @@ strum_macros = "0.24"
 svg_fmt = "0.4.1"
 sync_wrapper = "0.1.2"
 tar = "0.4"
-test-context = "0.1"
 thiserror = "1.0"
 tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
 tokio = { version = "1.17", features = ["macros"] }
-tokio-io-timeout = "1.2.0"
 tokio-postgres-rustls = "0.9.0"
 tokio-rustls = "0.23"
 tokio-stream = "0.1"
@@ -133,7 +131,6 @@ tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df6
 heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending

 ## Local libraries
-compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -38,7 +38,6 @@ RUN cd postgres && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_stat_statements.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
@@ -301,27 +300,6 @@ RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plpgsql_check.control

-#########################################################################################
-#
-# Layer "timescaledb-pg-build"
-# compile timescaledb extension
-#
-#########################################################################################
-FROM build-deps AS timescaledb-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-ENV PATH "/usr/local/pgsql/bin:$PATH"
-
-RUN apt-get update && \
-    apt-get install -y cmake && \
-    wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
-    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
-    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
-    cd build && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make install -j $(getconf _NPROCESSORS_ONLN) && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control
-
 #########################################################################################
 # 
 # Layer "rust extensions"
@@ -426,7 +404,6 @@ COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
--- a/README.md
+++ b/README.md
@@ -40,8 +40,6 @@ pacman -S base-devel readline zlib libseccomp openssl clang \
 postgresql-libs cmake postgresql protobuf
 ```

-Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your distribution provides an older version, you can install a newer version from [here](https://github.com/protocolbuffers/protobuf/releases).
-
 2. [Install Rust](https://www.rust-lang.org/tools/install)
 ```
 # recommended approach from https://www.rust-lang.org/tools/install
@@ -147,15 +145,15 @@ Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50
 Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one

 # start postgres compute node
-> ./target/debug/neon_local endpoint start main
-Starting new endpoint main (PostgreSQL v14) on timeline de200bd42b49cc1814412c7e592dd6e9 ...
+> ./target/debug/neon_local pg start main
+Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/main port=55432
-Starting postgres at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'
+Starting postgres node at 'host=127.0.0.1 port=55432 user=cloud_admin dbname=postgres'

 # check list of running postgres instances
-> ./target/debug/neon_local endpoint list
- ENDPOINT  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
- main      127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
+> ./target/debug/neon_local pg list
+ NODE  ADDRESS          TIMELINE                          BRANCH NAME  LSN        STATUS
+ main  127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main         0/16B5BA8  running
 ```

 2. Now, it is possible to connect to postgres and run some queries:
@@ -184,14 +182,14 @@ Created timeline 'b3b863fa45fa9e57e615f9f2d944e601' at Lsn 0/16F9A00 for tenant:
 (L) ┗━ @0/16F9A00: migration_check [b3b863fa45fa9e57e615f9f2d944e601]

 # start postgres on that branch
-> ./target/debug/neon_local endpoint start migration_check --branch-name migration_check
-Starting new endpoint migration_check (PostgreSQL v14) on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
+> ./target/debug/neon_local pg start migration_check --branch-name migration_check
+Starting new postgres migration_check on timeline b3b863fa45fa9e57e615f9f2d944e601 ...
 Extracting base backup to create postgres instance: path=.neon/pgdatadirs/tenants/9ef87a5bf0d92544f6fafeeb3239695c/migration_check port=55433
-Starting postgres at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'
+Starting postgres node at 'host=127.0.0.1 port=55433 user=cloud_admin dbname=postgres'

 # check the new list of running postgres instances
-> ./target/debug/neon_local endpoint list
- ENDPOINT         ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
+> ./target/debug/neon_local pg list
+ NODE             ADDRESS          TIMELINE                          BRANCH NAME      LSN        STATUS
 main             127.0.0.1:55432  de200bd42b49cc1814412c7e592dd6e9  main             0/16F9A38  running
 migration_check  127.0.0.1:55433  b3b863fa45fa9e57e615f9f2d944e601  migration_check  0/16F9A70  running

--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -27,6 +27,4 @@ tracing-subscriber.workspace = true
 tracing-utils.workspace = true
 url.workspace = true

-compute_api.workspace = true
-utils.workspace = true
 workspace_hack.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -34,40 +34,35 @@ use std::fs::File;
 use std::panic;
 use std::path::Path;
 use std::process::exit;
-use std::sync::{mpsc, Arc, Condvar, Mutex};
+use std::sync::{Arc, RwLock};
 use std::{thread, time::Duration};

 use anyhow::{Context, Result};
 use chrono::Utc;
 use clap::Arg;
 use tracing::{error, info};
-use url::Url;

-use compute_api::responses::ComputeStatus;
-use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2};
-
-use compute_tools::compute::{ComputeNode, ComputeState};
+use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
 use compute_tools::http::api::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
+use compute_tools::pg_helpers::*;
 use compute_tools::spec::*;
+use url::Url;

 fn main() -> Result<()> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

    let matches = cli().get_matches();

-    let http_port = *matches
-        .get_one::<u16>("http-port")
-        .expect("http-port is required");
    let pgdata = matches
        .get_one::<String>("pgdata")
        .expect("PGDATA path is required");
    let connstr = matches
        .get_one::<String>("connstr")
        .expect("Postgres connection string is required");
-    let spec_json = matches.get_one::<String>("spec");
+    let spec = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

    let compute_id = matches.get_one::<String>("compute-id");
@@ -76,86 +71,40 @@ fn main() -> Result<()> {
    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

-    let mut spec: Option<ComputeSpecAnyVersion> = None;
-    let mut live_config_allowed = false;
-    match spec_json {
+    let spec: ComputeSpec = match spec {
        // First, try to get cluster spec from the cli argument
-        Some(json) => {
-            spec = Some(serde_json::from_str(json)?);
-        }
+        Some(json) => serde_json::from_str(json)?,
        None => {
            // Second, try to read it from the file if path is provided
            if let Some(sp) = spec_path {
                let path = Path::new(sp);
                let file = File::open(path)?;
-                spec = Some(serde_json::from_reader(file)?);
+                serde_json::from_reader(file)?
            } else if let Some(id) = compute_id {
                if let Some(cp_base) = control_plane_uri {
-                    live_config_allowed = true;
-                    if let Ok(s) = get_spec_from_control_plane(cp_base, id) {
-                        spec = Some(s);
-                    }
+                    let cp_uri = format!("{cp_base}/management/api/v1/{id}/spec");
+                    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
+                        Ok(v) => v,
+                        Err(_) => "".to_string(),
+                    };
+
+                    reqwest::blocking::Client::new()
+                        .get(cp_uri)
+                        .header("Authorization", jwt)
+                        .send()?
+                        .json()?
                } else {
-                    panic!("must specify both --control-plane-uri and --compute-id or none");
+                    panic!(
+                        "must specify --control-plane-uri \"{:#?}\" and --compute-id \"{:#?}\"",
+                        control_plane_uri, compute_id
+                    );
                }
            } else {
-                panic!(
-                    "compute spec should be provided by one of the following ways: \
-                    --spec OR --spec-path OR --control-plane-uri and --compute-id"
-                );
+                panic!("compute spec should be provided via --spec or --spec-path argument");
            }
        }
    };

-    let mut new_state = ComputeState::new();
-    let spec_set;
-    if let Some(spec) = spec {
-        // Parse the spec file, upgrading it from older format if necessary
-        let spec: ComputeSpecV2 = ComputeSpecV2::try_from(spec)?;
-        new_state.spec = Some(spec);
-        spec_set = true;
-    } else {
-        spec_set = false;
-    }
-    let compute_node = ComputeNode {
-        start_time: Utc::now(),
-        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
-        pgdata: pgdata.to_string(),
-        pgbin: pgbin.to_string(),
-        live_config_allowed,
-        state: Mutex::new(new_state),
-        state_changed: Condvar::new(),
-    };
-    let compute = Arc::new(compute_node);
-
-    // Launch http service first, so we were able to serve control-plane
-    // requests, while configuration is still in progress.
-    let _http_handle =
-        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
-
-    if !spec_set {
-        // No spec provided, hang waiting for it.
-        info!("no compute spec provided, waiting");
-        let mut state = compute.state.lock().unwrap();
-        while state.status != ComputeStatus::ConfigurationPending {
-            state = compute.state_changed.wait(state).unwrap();
-
-            if state.status == ComputeStatus::ConfigurationPending {
-                info!("got spec, continue configuration");
-                // Spec is already set by the http server handler.
-                break;
-            }
-        }
-    }
-
-    // We got all we need, update the state.
-    let mut state = compute.state.lock().unwrap();
-    let spec = state.spec.as_ref().expect("spec must be set");
-    let startup_tracing_context = spec.startup_tracing_context.clone();
-    state.status = ComputeStatus::Init;
-    compute.state_changed.notify_all();
-    drop(state);
-
    // Extract OpenTelemetry context for the startup actions from the spec, and
    // attach it to the current tracing context.
    //
@@ -171,7 +120,7 @@ fn main() -> Result<()> {
    // postgres is configured and up-and-running, we exit this span. Any other
    // actions that are performed on incoming HTTP requests, for example, are
    // performed in separate spans.
-    let startup_context_guard = if let Some(ref carrier) = startup_tracing_context {
+    let startup_context_guard = if let Some(ref carrier) = spec.startup_tracing_context {
        use opentelemetry::propagation::TextMapPropagator;
        use opentelemetry::sdk::propagation::TraceContextPropagator;
        Some(TraceContextPropagator::new().extract(carrier).attach())
@@ -179,7 +128,41 @@ fn main() -> Result<()> {
        None
    };

-    // Launch remaining service threads
+    let pageserver_connstr = spec
+        .cluster
+        .settings
+        .find("neon.pageserver_connstring")
+        .expect("pageserver connstr should be provided");
+    let storage_auth_token = spec.storage_auth_token.clone();
+    let tenant = spec
+        .cluster
+        .settings
+        .find("neon.tenant_id")
+        .expect("tenant id should be provided");
+    let timeline = spec
+        .cluster
+        .settings
+        .find("neon.timeline_id")
+        .expect("tenant id should be provided");
+
+    let compute_state = ComputeNode {
+        start_time: Utc::now(),
+        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
+        pgdata: pgdata.to_string(),
+        pgbin: pgbin.to_string(),
+        spec,
+        tenant,
+        timeline,
+        pageserver_connstr,
+        storage_auth_token,
+        metrics: ComputeMetrics::default(),
+        state: RwLock::new(ComputeState::new()),
+    };
+    let compute = Arc::new(compute_state);
+
+    // Launch service threads first, so we were able to serve availability
+    // requests, while configuration is still in progress.
+    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");

    // Start Postgres
@@ -189,7 +172,7 @@ fn main() -> Result<()> {
        Ok(pg) => Some(pg),
        Err(err) => {
            error!("could not start the compute node: {:?}", err);
-            let mut state = compute.state.lock().unwrap();
+            let mut state = compute.state.write().unwrap();
            state.error = Some(format!("{:?}", err));
            state.status = ComputeStatus::Failed;
            drop(state);
@@ -220,29 +203,13 @@ fn main() -> Result<()> {
    if delay_exit {
        info!("giving control plane 30s to collect the error before shutdown");
        thread::sleep(Duration::from_secs(30));
+        info!("shutting down");
    }

    // Shutdown trace pipeline gracefully, so that it has a chance to send any
-    // pending traces before we exit. Shutting down OTEL tracing provider may
-    // hang for quite some time, see, for example:
-    // - https://github.com/open-telemetry/opentelemetry-rust/issues/868
-    // - and our problems with staging https://github.com/neondatabase/cloud/issues/3707#issuecomment-1493983636
-    //
-    // Yet, we want computes to shut down fast enough, as we may need a new one
-    // for the same timeline ASAP. So wait no longer than 2s for the shutdown to
-    // complete, then just error out and exit the main thread.
-    info!("shutting down tracing");
-    let (sender, receiver) = mpsc::channel();
-    let _ = thread::spawn(move || {
-        tracing_utils::shutdown_tracing();
-        sender.send(()).ok()
-    });
-    let shutdown_res = receiver.recv_timeout(Duration::from_millis(2000));
-    if shutdown_res.is_err() {
-        error!("timed out while shutting down tracing, exiting anyway");
-    }
+    // pending traces before we exit.
+    tracing_utils::shutdown_tracing();

-    info!("shutting down");
    exit(exit_code.unwrap_or(1))
 }

@@ -251,14 +218,6 @@ fn cli() -> clap::Command {
    let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
    clap::Command::new("compute_ctl")
        .version(version)
-        .arg(
-            Arg::new("http-port")
-                .long("http-port")
-                .value_name("HTTP_PORT")
-                .default_value("3080")
-                .value_parser(clap::value_parser!(u16))
-                .required(false),
-        )
        .arg(
            Arg::new("connstr")
                .short('C')
@@ -302,7 +261,7 @@ fn cli() -> clap::Command {
            Arg::new("control-plane-uri")
                .short('p')
                .long("control-plane-uri")
-                .value_name("CONTROL_PLANE_API_BASE_URI"),
+                .value_name("CONTROL_PLANE"),
        )
 }

--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -19,17 +19,15 @@ use std::os::unix::fs::PermissionsExt;
 use std::path::Path;
 use std::process::{Command, Stdio};
 use std::str::FromStr;
-use std::sync::{Condvar, Mutex};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::RwLock;

 use anyhow::{Context, Result};
 use chrono::{DateTime, Utc};
 use postgres::{Client, NoTls};
+use serde::{Serialize, Serializer};
 use tokio_postgres;
 use tracing::{info, instrument, warn};
-use utils::lsn::Lsn;
-
-use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::ComputeSpecV2;

 use crate::checker::create_writability_check_data;
 use crate::config;
@@ -43,45 +41,41 @@ pub struct ComputeNode {
    pub connstr: url::Url,
    pub pgdata: String,
    pub pgbin: String,
-    /// We should only allow live re- / configuration of the compute node if
-    /// it uses 'pull model', i.e. it can go to control-plane and fetch
-    /// the latest configuration. Otherwise, there could be a case:
-    /// - we start compute with some spec provided as argument
-    /// - we push new spec and it does reconfiguration
-    /// - but then something happens and compute pod / VM is destroyed,
-    ///   so k8s controller starts it again with the **old** spec
-    /// and the same for empty computes:
-    /// - we started compute without any spec
-    /// - we push spec and it does configuration
-    /// - but then it is restarted without any spec again
-    pub live_config_allowed: bool,
-    /// Volatile part of the `ComputeNode`, which should be used under `Mutex`.
-    /// To allow HTTP API server to serving status requests, while configuration
-    /// is in progress, lock should be held only for short periods of time to do
-    /// read/write, not the whole configuration process.
-    pub state: Mutex<ComputeState>,
-    /// `Condvar` to allow notifying waiters about state changes.
-    pub state_changed: Condvar,
+    pub spec: ComputeSpec,
+    pub tenant: String,
+    pub timeline: String,
+    pub pageserver_connstr: String,
+    pub storage_auth_token: Option<String>,
+    pub metrics: ComputeMetrics,
+    /// Volatile part of the `ComputeNode` so should be used under `RwLock`
+    /// to allow HTTP API server to serve status requests, while configuration
+    /// is in progress.
+    pub state: RwLock<ComputeState>,
 }

-#[derive(Clone, Debug)]
+fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    x.to_rfc3339().serialize(s)
+}
+
+#[derive(Serialize)]
+#[serde(rename_all = "snake_case")]
 pub struct ComputeState {
    pub status: ComputeStatus,
    /// Timestamp of the last Postgres activity
+    #[serde(serialize_with = "rfc3339_serialize")]
    pub last_active: DateTime<Utc>,
    pub error: Option<String>,
-    pub spec: Option<ComputeSpecV2>,
-    pub metrics: ComputeMetrics,
 }

 impl ComputeState {
    pub fn new() -> Self {
        Self {
-            status: ComputeStatus::Empty,
+            status: ComputeStatus::Init,
            last_active: Utc::now(),
            error: None,
-            spec: None,
-            metrics: ComputeMetrics::default(),
        }
    }
 }
@@ -92,15 +86,29 @@ impl Default for ComputeState {
    }
 }

+#[derive(Serialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ComputeStatus {
+    Init,
+    Running,
+    Failed,
+}
+
+#[derive(Default, Serialize)]
+pub struct ComputeMetrics {
+    pub sync_safekeepers_ms: AtomicU64,
+    pub basebackup_ms: AtomicU64,
+    pub config_ms: AtomicU64,
+    pub total_startup_ms: AtomicU64,
+}
+
 impl ComputeNode {
    pub fn set_status(&self, status: ComputeStatus) {
-        let mut state = self.state.lock().unwrap();
-        state.status = status;
-        self.state_changed.notify_all();
+        self.state.write().unwrap().status = status;
    }

    pub fn get_status(&self) -> ComputeStatus {
-        self.state.lock().unwrap().status
+        self.state.read().unwrap().status
    }

    // Remove `pgdata` directory and create it again with right permissions.
@@ -116,16 +124,15 @@ impl ComputeNode {

    // Get basebackup from the libpq connection to pageserver using `connstr` and
    // unarchive it to `pgdata` directory overriding all its previous content.
-    #[instrument(skip(self, compute_state))]
-    fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
+    #[instrument(skip(self))]
+    fn get_basebackup(&self, lsn: &str) -> Result<()> {
        let start_time = Utc::now();

-        let mut config = postgres::Config::from_str(&spec.pageserver_connstring)?;
+        let mut config = postgres::Config::from_str(&self.pageserver_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
-        if let Some(storage_auth_token) = &spec.storage_auth_token {
+        if let Some(storage_auth_token) = &self.storage_auth_token {
            info!("Got storage auth token from spec file");
            config.password(storage_auth_token);
        } else {
@@ -134,8 +141,8 @@ impl ComputeNode {

        let mut client = config.connect(NoTls)?;
        let basebackup_cmd = match lsn {
-            Lsn(0) => format!("basebackup {} {}", spec.tenant_id, spec.timeline_id), // First start of the compute
-            _ => format!("basebackup {} {} {}", spec.tenant_id, spec.timeline_id, lsn),
+            "0/0" => format!("basebackup {} {}", &self.tenant, &self.timeline), // First start of the compute
+            _ => format!("basebackup {} {} {}", &self.tenant, &self.timeline, lsn),
        };
        let copyreader = client.copy_out(basebackup_cmd.as_str())?;

@@ -148,24 +155,28 @@ impl ComputeNode {
        ar.set_ignore_zeros(true);
        ar.unpack(&self.pgdata)?;

-        self.state.lock().unwrap().metrics.basebackup_ms = Utc::now()
-            .signed_duration_since(start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
+        self.metrics.basebackup_ms.store(
+            Utc::now()
+                .signed_duration_since(start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+
        Ok(())
    }

    // Run `postgres` in a special mode with `--sync-safekeepers` argument
    // and return the reported LSN back to the caller.
-    #[instrument(skip(self, storage_auth_token))]
-    fn sync_safekeepers(&self, storage_auth_token: Option<String>) -> Result<Lsn> {
+    #[instrument(skip(self))]
+    fn sync_safekeepers(&self) -> Result<String> {
        let start_time = Utc::now();

        let sync_handle = Command::new(&self.pgbin)
            .args(["--sync-safekeepers"])
            .env("PGDATA", &self.pgdata) // we cannot use -D in this mode
-            .envs(if let Some(storage_auth_token) = &storage_auth_token {
+            .envs(if let Some(storage_auth_token) = &self.storage_auth_token {
                vec![("NEON_AUTH_TOKEN", storage_auth_token)]
            } else {
                vec![]
@@ -190,49 +201,45 @@ impl ComputeNode {
            );
        }

-        self.state.lock().unwrap().metrics.sync_safekeepers_ms = Utc::now()
-            .signed_duration_since(start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
+        self.metrics.sync_safekeepers_ms.store(
+            Utc::now()
+                .signed_duration_since(start_time)
+                .to_std()
+                .unwrap()
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );

-        let lsn = Lsn::from_str(String::from_utf8(sync_output.stdout)?.trim())?;
+        let lsn = String::from(String::from_utf8(sync_output.stdout)?.trim());

        Ok(lsn)
    }

    /// Do all the preparations like PGDATA directory creation, configuration,
    /// safekeepers sync, basebackup, etc.
-    #[instrument(skip(self, compute_state))]
-    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
+    #[instrument(skip(self))]
+    pub fn prepare_pgdata(&self) -> Result<()> {
+        let spec = &self.spec;
        let pgdata_path = Path::new(&self.pgdata);

        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
-        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
+        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;

-        let lsn = if let Some(lsn) = spec.lsn {
-            // Read-only node, anchored at 'lsn'
-            lsn
-        } else {
-            // Primary that continues to write at end of the timeline
-            info!("starting safekeepers syncing");
-            let last_lsn = self
-                .sync_safekeepers(spec.storage_auth_token.clone())
-                .with_context(|| "failed to sync safekeepers")?;
-            info!("safekeepers synced at LSN {}", last_lsn);
-            last_lsn
-        };
+        info!("starting safekeepers syncing");
+        let lsn = self
+            .sync_safekeepers()
+            .with_context(|| "failed to sync safekeepers")?;
+        info!("safekeepers synced at LSN {}", lsn);

        info!(
            "getting basebackup@{} from pageserver {}",
-            lsn, &spec.pageserver_connstring
+            lsn, &self.pageserver_connstr
        );
-        self.get_basebackup(compute_state, lsn).with_context(|| {
+        self.get_basebackup(&lsn).with_context(|| {
            format!(
                "failed to get basebackup@{} from pageserver {}",
-                lsn, &spec.pageserver_connstring
+                lsn, &self.pageserver_connstr
            )
        })?;

@@ -245,16 +252,13 @@ impl ComputeNode {
    /// Start Postgres as a child process and manage DBs/roles.
    /// After that this will hang waiting on the postmaster process to exit.
    #[instrument(skip(self))]
-    pub fn start_postgres(
-        &self,
-        storage_auth_token: Option<String>,
-    ) -> Result<std::process::Child> {
+    pub fn start_postgres(&self) -> Result<std::process::Child> {
        let pgdata_path = Path::new(&self.pgdata);

        // Run postgres as a child process.
        let mut pg = Command::new(&self.pgbin)
            .args(["-D", &self.pgdata])
-            .envs(if let Some(storage_auth_token) = &storage_auth_token {
+            .envs(if let Some(storage_auth_token) = &self.storage_auth_token {
                vec![("NEON_AUTH_TOKEN", storage_auth_token)]
            } else {
                vec![]
@@ -267,9 +271,8 @@ impl ComputeNode {
        Ok(pg)
    }

-    /// Do initial configuration of the already started Postgres.
-    #[instrument(skip(self, compute_state))]
-    pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> {
+    #[instrument(skip(self))]
+    pub fn apply_config(&self) -> Result<()> {
        // If connection fails,
        // it may be the old node with `zenith_admin` superuser.
        //
@@ -300,56 +303,60 @@ impl ComputeNode {
        };

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        let spec = &compute_state.spec.as_ref().expect("spec must be set");
-        handle_roles(spec, &mut client)?;
-        handle_databases(spec, &mut client)?;
-        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
-        handle_grants(spec, self.connstr.as_str(), &mut client)?;
+        handle_roles(&self.spec, &mut client)?;
+        handle_databases(&self.spec, &mut client)?;
+        handle_role_deletions(self, &mut client)?;
+        handle_grants(self, &mut client)?;
        create_writability_check_data(&mut client)?;
-        handle_extensions(spec, &mut client)?;
+        handle_extensions(&self.spec, &mut client)?;

        // 'Close' connection
        drop(client);

-        info!("finished configuration of compute");
+        info!(
+            "finished configuration of compute for project {}",
+            self.spec.cluster.cluster_id
+        );

        Ok(())
    }

    #[instrument(skip(self))]
    pub fn start_compute(&self) -> Result<std::process::Child> {
-        let compute_state = self.state.lock().unwrap().clone();
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
        info!(
            "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            spec.project_id.as_deref().unwrap_or("None"),
-            spec.operation_uuid.as_deref().unwrap_or("None"),
-            spec.tenant_id,
-            spec.timeline_id,
+            self.spec.cluster.cluster_id,
+            self.spec.operation_uuid.as_ref().unwrap(),
+            self.tenant,
+            self.timeline,
        );

-        self.prepare_pgdata(&compute_state)?;
+        self.prepare_pgdata()?;

        let start_time = Utc::now();

-        let pg = self.start_postgres(spec.storage_auth_token.clone())?;
+        let pg = self.start_postgres()?;

-        self.apply_config(&compute_state)?;
+        self.apply_config()?;

        let startup_end_time = Utc::now();
-        {
-            let mut state = self.state.lock().unwrap();
-            state.metrics.config_ms = startup_end_time
+        self.metrics.config_ms.store(
+            startup_end_time
                .signed_duration_since(start_time)
                .to_std()
                .unwrap()
-                .as_millis() as u64;
-            state.metrics.total_startup_ms = startup_end_time
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+        self.metrics.total_startup_ms.store(
+            startup_end_time
                .signed_duration_since(self.start_time)
                .to_std()
                .unwrap()
-                .as_millis() as u64;
-        }
+                .as_millis() as u64,
+            Ordering::Relaxed,
+        );
+
        self.set_status(ComputeStatus::Running);

        Ok(pg)
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -5,8 +5,8 @@ use std::path::Path;

 use anyhow::Result;

-use crate::pg_helpers::escape_conf_value;
-use compute_api::spec::ComputeSpecV2;
+use crate::pg_helpers::PgOptionsSerialize;
+use crate::spec::ComputeSpec;

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -32,54 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 }

 /// Create or completely rewrite configuration file specified by `path`
-pub fn write_postgres_conf(path: &Path, spec: &ComputeSpecV2) -> Result<()> {
+pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
-    let mut file = File::create(path)?;
+    let mut postgres_conf = File::create(path)?;

-    // Write the postgresql.conf content from the spec file as is.
-    if let Some(conf) = &spec.postgresql_conf {
-        writeln!(file, "{}", conf)?;
-    }
-
-    // Append any extra options from the spec file
-    if let Some(settings) = &spec.settings {
-        writeln!(file, "\n# Extra settings from spec document")?;
-
-        for setting in settings {
-            if let Some(value) = &setting.value {
-                let escaped_value: String = value.replace('\'', "''").replace('\\', "\\\\");
-                writeln!(file, "{} = '{}'", setting.name, escaped_value)?;
-            } else {
-                // If there is no value, then just append the line verbatim
-                writeln!(file, "{}", setting.name)?;
-            }
-        }
-    }
-
-    // Append options for connecting to storage
-    writeln!(file, "\n# Neon storage settings")?;
-    writeln!(
-        file,
-        "neon.pageserver_connstring='{}'",
-        escape_conf_value(&spec.pageserver_connstring)
-    )?;
-    if !spec.safekeeper_connstrings.is_empty() {
-        writeln!(
-            file,
-            "neon.safekeepers='{}'",
-            escape_conf_value(&spec.safekeeper_connstrings.join(","))
-        )?;
-    }
-    writeln!(
-        file,
-        "neon.tenant_id='{}'",
-        escape_conf_value(&spec.tenant_id.to_string())
-    )?;
-    writeln!(
-        file,
-        "neon.timeline_id='{}'",
-        escape_conf_value(&spec.timeline_id.to_string())
-    )?;
+    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+
+    Ok(())
+}
+
+// Write Postgres config block wrapped with generated comment section
+fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
+    writeln!(file, "# Managed by compute_ctl: begin")?;
+    writeln!(file, "{}", buf)?;
+    writeln!(file, "# Managed by compute_ctl: end")?;

    Ok(())
 }
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -3,30 +3,15 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::compute::{ComputeNode, ComputeState};
-use compute_api::requests::ConfigurationRequest;
-use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
-use compute_api::spec::ComputeSpecV2;
-
+use crate::compute::ComputeNode;
 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
 use num_cpus;
 use serde_json;
-use tokio::task;
 use tracing::{error, info};
 use tracing_utils::http::OtelName;

-fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
-    ComputeStatusResponse {
-        tenant: state.spec.as_ref().map(|spec| spec.tenant_id.to_string()),
-        timeline: state.spec.as_ref().map(|spec| spec.timeline_id.to_string()),
-        status: state.status,
-        last_active: state.last_active,
-        error: state.error.clone(),
-    }
-}
-
 // Service function to handle all available routes.
 async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
    //
@@ -38,45 +23,26 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
        // Serialized compute state.
        (&Method::GET, "/status") => {
            info!("serving /status GET request");
-            let state = compute.state.lock().unwrap();
-            let status_response = status_response_from_state(&state);
-            Response::new(Body::from(serde_json::to_string(&status_response).unwrap()))
+            let state = compute.state.read().unwrap();
+            Response::new(Body::from(serde_json::to_string(&*state).unwrap()))
        }

        // Startup metrics in JSON format. Keep /metrics reserved for a possible
        // future use for Prometheus metrics format.
        (&Method::GET, "/metrics.json") => {
            info!("serving /metrics.json GET request");
-            let metrics = compute.state.lock().unwrap().metrics.clone();
-            Response::new(Body::from(serde_json::to_string(&metrics).unwrap()))
+            Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
        }

        // Collect Postgres current usage insights
        (&Method::GET, "/insights") => {
            info!("serving /insights GET request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!("compute is not running, current status: {:?}", status);
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
            let insights = compute.collect_insights().await;
            Response::new(Body::from(insights))
        }

        (&Method::POST, "/check_writability") => {
            info!("serving /check_writability POST request");
-            let status = compute.get_status();
-            if status != ComputeStatus::Running {
-                let msg = format!(
-                    "invalid compute status for check_writability request: {:?}",
-                    status
-                );
-                error!(msg);
-                return Response::new(Body::from(msg));
-            }
-
            let res = crate::checker::check_writability(compute).await;
            match res {
                Ok(_) => Response::new(Body::from("true")),
@@ -95,23 +61,6 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            ))
        }

-        // Accept spec in JSON format and request compute configuration. If
-        // anything goes wrong after we set the compute status to `ConfigurationPending`
-        // and update compute state with new spec, we basically leave compute
-        // in the potentially wrong state. That said, it's control-plane's
-        // responsibility to watch compute state after reconfiguration request
-        // and to clean restart in case of errors.
-        (&Method::POST, "/configure") => {
-            info!("serving /configure POST request");
-            match handle_configure_request(req, compute).await {
-                Ok(msg) => Response::new(Body::from(msg)),
-                Err((msg, code)) => {
-                    error!("error handling /configure request: {msg}");
-                    render_json_error(&msg, code)
-                }
-            }
-        }
-
        // Return the `404 Not Found` for any other routes.
        _ => {
            let mut not_found = Response::new(Body::from("404 Not Found"));
@@ -121,96 +70,10 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
    }
 }

-async fn handle_configure_request(
-    req: Request<Body>,
-    compute: &Arc<ComputeNode>,
-) -> Result<String, (String, StatusCode)> {
-    if !compute.live_config_allowed {
-        return Err((
-            "live configuration is not allowed for this compute node".to_string(),
-            StatusCode::PRECONDITION_FAILED,
-        ));
-    }
-
-    let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
-    let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
-    if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
-        let specv2 = match ComputeSpecV2::try_from(request.spec) {
-            Ok(ps) => ps,
-            Err(err) => return Err((err.to_string(), StatusCode::PRECONDITION_FAILED)),
-        };
-
-        // XXX: wrap state update under lock in code blocks. Otherwise,
-        // we will try to `Send` `mut state` into the spawned thread
-        // bellow, which will cause error:
-        // ```
-        // error: future cannot be sent between threads safely
-        // ```
-        {
-            let mut state = compute.state.lock().unwrap();
-            if state.status != ComputeStatus::Empty {
-                let msg = format!(
-                    "invalid compute status for configuration request: {:?}",
-                    state.status.clone()
-                );
-                return Err((msg, StatusCode::PRECONDITION_FAILED));
-            }
-            state.spec = Some(specv2);
-            state.status = ComputeStatus::ConfigurationPending;
-            compute.state_changed.notify_all();
-            drop(state);
-            info!("set new spec and notified waiters");
-        }
-
-        // Spawn a blocking thread to wait for compute to become Running.
-        // This is needed to do not block the main pool of workers and
-        // be able to serve other requests while some particular request
-        // is waiting for compute to finish configuration.
-        let c = compute.clone();
-        task::spawn_blocking(move || {
-            let mut state = c.state.lock().unwrap();
-            while state.status != ComputeStatus::Running {
-                state = c.state_changed.wait(state).unwrap();
-                info!(
-                    "waiting for compute to become Running, current status: {:?}",
-                    state.status
-                );
-
-                if state.status == ComputeStatus::Failed {
-                    let err = state.error.as_ref().map_or("unknown error", |x| x);
-                    let msg = format!("compute configuration failed: {:?}", err);
-                    return Err((msg, StatusCode::INTERNAL_SERVER_ERROR));
-                }
-            }
-
-            Ok(())
-        })
-        .await
-        .unwrap()?;
-
-        // Return current compute state if everything went well.
-        let state = compute.state.lock().unwrap().clone();
-        let status_response = status_response_from_state(&state);
-        Ok(serde_json::to_string(&status_response).unwrap())
-    } else {
-        Err(("invalid spec".to_string(), StatusCode::BAD_REQUEST))
-    }
-}
-
-fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
-    let error = GenericAPIError {
-        error: e.to_string(),
-    };
-    Response::builder()
-        .status(status)
-        .body(Body::from(serde_json::to_string(&error).unwrap()))
-        .unwrap()
-}
-
 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
-async fn serve(port: u16, state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], port));
+async fn serve(state: Arc<ComputeNode>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));

    let make_service = make_service_fn(move |_conn| {
        let state = state.clone();
@@ -245,10 +108,10 @@ async fn serve(port: u16, state: Arc<ComputeNode>) {
 }

 /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
        .name("http-endpoint".into())
-        .spawn(move || serve(port, state))?)
+        .spawn(move || serve(state))?)
 }
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -11,7 +11,7 @@ paths:
    get:
      tags:
      - Info
-      summary: Get compute node internal status.
+      summary: Get compute node internal status
      description: ""
      operationId: getComputeStatus
      responses:
@@ -26,7 +26,7 @@ paths:
    get:
      tags:
      - Info
-      summary: Get compute node startup metrics in JSON format.
+      summary: Get compute node startup metrics in JSON format
      description: ""
      operationId: getComputeMetricsJSON
      responses:
@@ -41,9 +41,9 @@ paths:
    get:
      tags:
      - Info
-      summary: Get current compute insights in JSON format.
+      summary: Get current compute insights in JSON format
      description: |
-        Note, that this doesn't include any historical data.
+        Note, that this doesn't include any historical data
      operationId: getComputeInsights
      responses:
        200:
@@ -56,12 +56,12 @@ paths:
  /info:
    get:
      tags:
-      - Info
-      summary: Get info about the compute pod / VM.
+      - "info"
+      summary: Get info about the compute Pod/VM
      description: ""
      operationId: getInfo
      responses:
-        200:
+        "200":
          description: Info
          content:
            application/json:
@@ -72,7 +72,7 @@ paths:
    post:
      tags:
      - Check
-      summary: Check that we can write new data on this compute.
+      summary: Check that we can write new data on this compute
      description: ""
      operationId: checkComputeWritability
      responses:
@@ -82,64 +82,9 @@ paths:
            text/plain:
              schema:
                type: string
-                description: Error text or 'true' if check passed.
+                description: Error text or 'true' if check passed
                example: "true"

-  /configure:
-    post:
-      tags:
-      - Configure
-      summary: Perform compute node configuration.
-      description: |
-        This is a blocking API endpoint, i.e. it blocks waiting until
-        compute is finished configuration and is in `Running` state.
-        Optional non-blocking mode could be added later.
-      operationId: configureCompute
-      requestBody:
-        description: Configuration request.
-        required: true
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - spec
-              properties:
-                spec:
-                  # XXX: I don't want to explain current spec in the OpenAPI format,
-                  # as it could be changed really soon. Consider doing it later.
-                  type: object
-      responses:
-        200:
-          description: Compute configuration finished.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ComputeState"
-        400:
-          description: Provided spec is invalid.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-        412:
-          description: |
-            It's not possible to do live-configuration of the compute.
-            It's either in the wrong state, or compute doesn't use pull
-            mode of configuration.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-        500:
-          description: |
-            Compute configuration request was processed, but error
-            occurred. Compute will likely shutdown soon.
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/GenericError"
-
 components:
  securitySchemes:
    JWT:
@@ -150,7 +95,7 @@ components:
  schemas:
    ComputeMetrics:
      type: object
-      description: Compute startup metrics.
+      description: Compute startup metrics
      required:
        - sync_safekeepers_ms
        - basebackup_ms
@@ -168,7 +113,7 @@ components:

    Info:
      type: object
-      description: Information about VM/Pod.
+      description: Information about VM/Pod
      required:
        - num_cpus
      properties:
@@ -185,26 +130,17 @@ components:
          $ref: '#/components/schemas/ComputeStatus'
        last_active:
          type: string
-          description: The last detected compute activity timestamp in UTC and RFC3339 format.
+          description: The last detected compute activity timestamp in UTC and RFC3339 format
          example: "2022-10-12T07:20:50.52Z"
        error:
          type: string
-          description: Text of the error during compute startup, if any.
-          example: ""
-        tenant:
-          type: string
-          description: Identifier of the current tenant served by compute node, if any.
-          example: c9269c359e9a199fad1ea0981246a78f
-        timeline:
-          type: string
-          description: Identifier of the current timeline served by compute node, if any.
-          example: ece7de74d4b8cbe5433a68ce4d1b97b4
+          description: Text of the error during compute startup, if any

    ComputeInsights:
      type: object
      properties:
        pg_stat_statements:
-          description: Contains raw output from pg_stat_statements in JSON format.
+          description: Contains raw output from pg_stat_statements in JSON format
          type: array
          items:
            type: object
@@ -215,19 +151,6 @@ components:
        - init
        - failed
        - running
-      example: running
-
-    #
-    # Errors
-    #
-
-    GenericError:
-      type: object
-      required:
-        - error
-      properties:
-        error:
-          type: string

 security:
  - JWT: []
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -46,7 +46,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                            AND usename != 'cloud_admin';", // XXX: find a better way to filter other monitors?
                        &[],
                    );
-                let mut last_active = compute.state.lock().unwrap().last_active;
+                let mut last_active = compute.state.read().unwrap().last_active;

                if let Ok(backs) = backends {
                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];
@@ -87,7 +87,7 @@ fn watch_compute_activity(compute: &ComputeNode) {
                }

                // Update the last activity in the shared state if we got a more recent one.
-                let mut state = compute.state.lock().unwrap();
+                let mut state = compute.state.write().unwrap();
                if last_active > state.last_active {
                    state.last_active = last_active;
                    debug!("set the last compute activity time to: {}", last_active);
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -10,12 +10,43 @@ use std::time::{Duration, Instant};
 use anyhow::{bail, Result};
 use notify::{RecursiveMode, Watcher};
 use postgres::{Client, Transaction};
+use serde::Deserialize;
 use tracing::{debug, instrument};

-use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role};
-
 const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds

+/// Rust representation of Postgres role info with only those fields
+/// that matter for us.
+#[derive(Clone, Deserialize)]
+pub struct Role {
+    pub name: PgIdent,
+    pub encrypted_password: Option<String>,
+    pub options: GenericOptions,
+}
+
+/// Rust representation of Postgres database info with only those fields
+/// that matter for us.
+#[derive(Clone, Deserialize)]
+pub struct Database {
+    pub name: PgIdent,
+    pub owner: PgIdent,
+    pub options: GenericOptions,
+}
+
+/// Common type representing both SQL statement params with or without value,
+/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
+/// options like `wal_level = logical`.
+#[derive(Clone, Deserialize)]
+pub struct GenericOption {
+    pub name: String,
+    pub value: Option<String>,
+    pub vartype: String,
+}
+
+/// Optional collection of `GenericOption`'s. Type alias allows us to
+/// declare a `trait` on it.
+pub type GenericOptions = Option<Vec<GenericOption>>;
+
 /// Escape a string for including it in a SQL literal
 fn escape_literal(s: &str) -> String {
    s.replace('\'', "''").replace('\\', "\\\\")
@@ -23,17 +54,13 @@ fn escape_literal(s: &str) -> String {

 /// Escape a string so that it can be used in postgresql.conf.
 /// Same as escape_literal, currently.
-pub fn escape_conf_value(s: &str) -> String {
+fn escape_conf_value(s: &str) -> String {
    s.replace('\'', "''").replace('\\', "\\\\")
 }

-trait GenericOptionExt {
-    fn to_pg_option(&self) -> String;
-}
-
-impl GenericOptionExt for GenericOption {
+impl GenericOption {
    /// Represent `GenericOption` as SQL statement parameter.
-    fn to_pg_option(&self) -> String {
+    pub fn to_pg_option(&self) -> String {
        if let Some(val) = &self.value {
            match self.vartype.as_ref() {
                "string" => format!("{} '{}'", self.name, escape_literal(val)),
@@ -43,10 +70,32 @@ impl GenericOptionExt for GenericOption {
            self.name.to_owned()
        }
    }
+
+    /// Represent `GenericOption` as configuration option.
+    pub fn to_pg_setting(&self) -> String {
+        if let Some(val) = &self.value {
+            // TODO: check in the console DB that we don't have these settings
+            // set for any non-deleted project and drop this override.
+            let name = match self.name.as_str() {
+                "safekeepers" => "neon.safekeepers",
+                "wal_acceptor_reconnect" => "neon.safekeeper_reconnect_timeout",
+                "wal_acceptor_connection_timeout" => "neon.safekeeper_connection_timeout",
+                it => it,
+            };
+
+            match self.vartype.as_ref() {
+                "string" => format!("{} = '{}'", name, escape_conf_value(val)),
+                _ => format!("{} = {}", name, val),
+            }
+        } else {
+            self.name.to_owned()
+        }
+    }
 }

 pub trait PgOptionsSerialize {
    fn as_pg_options(&self) -> String;
+    fn as_pg_settings(&self) -> String;
 }

 impl PgOptionsSerialize for GenericOptions {
@@ -62,6 +111,20 @@ impl PgOptionsSerialize for GenericOptions {
            "".to_string()
        }
    }
+
+    /// Serialize an optional collection of `GenericOption`'s to
+    /// `postgresql.conf` compatible format.
+    fn as_pg_settings(&self) -> String {
+        if let Some(ops) = &self {
+            ops.iter()
+                .map(|op| op.to_pg_setting())
+                .collect::<Vec<String>>()
+                .join("\n")
+                + "\n" // newline after last setting
+        } else {
+            "".to_string()
+        }
+    }
 }

 pub trait GenericOptionsSearch {
@@ -77,14 +140,10 @@ impl GenericOptionsSearch for GenericOptions {
    }
 }

-pub trait RoleExt {
-    fn to_pg_options(&self) -> String;
-}
-
-impl RoleExt for Role {
+impl Role {
    /// Serialize a list of role parameters into a Postgres-acceptable
    /// string of arguments.
-    fn to_pg_options(&self) -> String {
+    pub fn to_pg_options(&self) -> String {
        // XXX: consider putting LOGIN as a default option somewhere higher, e.g. in control-plane.
        // For now, we do not use generic `options` for roles. Once used, add
        // `self.options.as_pg_options()` somewhere here.
@@ -109,17 +168,21 @@ impl RoleExt for Role {
    }
 }

-pub trait DatabaseExt {
-    fn to_pg_options(&self) -> String;
-}
+impl Database {
+    pub fn new(name: PgIdent, owner: PgIdent) -> Self {
+        Self {
+            name,
+            owner,
+            options: None,
+        }
+    }

-impl DatabaseExt for Database {
    /// Serialize a list of database parameters into a Postgres-acceptable
    /// string of arguments.
    /// NB: `TEMPLATE` is actually also an identifier, but so far we only need
    /// to use `template0` and `template1`, so it is not a problem. Yet in the future
    /// it may require a proper quoting too.
-    fn to_pg_options(&self) -> String {
+    pub fn to_pg_options(&self) -> String {
        let mut params: String = self.options.as_pg_options();
        write!(params, " OWNER {}", &self.owner.pg_quote())
            .expect("String is documented to not to error during write operations");
@@ -128,6 +191,10 @@ impl DatabaseExt for Database {
    }
 }

+/// String type alias representing Postgres identifier and
+/// intended to be used for DB / role names.
+pub type PgIdent = String;
+
 /// Generic trait used to provide quoting / encoding for strings used in the
 /// Postgres SQL queries and DATABASE_URL.
 pub trait Escaping {
@@ -168,11 +235,7 @@ pub fn get_existing_dbs(client: &mut Client) -> Result<Vec<Database>> {
            &[],
        )?
        .iter()
-        .map(|row| Database {
-            name: row.get("datname"),
-            owner: row.get("owner"),
-            options: None,
-        })
+        .map(|row| Database::new(row.get("datname"), row.get("owner")))
        .collect();

    Ok(postgres_dbs)
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,47 +1,63 @@
-//! Functions to reconciliate Postgres cluster with the spec file
+use std::collections::HashMap;
 use std::path::Path;
 use std::str::FromStr;

 use anyhow::Result;
 use postgres::config::Config;
 use postgres::{Client, NoTls};
+use serde::Deserialize;
 use tracing::{info, info_span, instrument, span_enabled, warn, Level};

+use crate::compute::ComputeNode;
 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

-use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2, Database, PgIdent, Role};
+/// Cluster spec or configuration represented as an optional number of
+/// delta operations + final cluster state description.
+#[derive(Clone, Deserialize)]
+pub struct ComputeSpec {
+    pub format_version: f32,
+    pub timestamp: String,
+    pub operation_uuid: Option<String>,
+    /// Expected cluster state at the end of transition process.
+    pub cluster: Cluster,
+    pub delta_operations: Option<Vec<DeltaOp>>,

-/// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
-/// env variable is set, it will be used for authorization.
-pub fn get_spec_from_control_plane(
-    base_uri: &str,
-    compute_id: &str,
-) -> Result<ComputeSpecAnyVersion> {
-    let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
-    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
-        Ok(v) => v,
-        Err(_) => "".to_string(),
-    };
-    info!("getting spec from control plane: {}", cp_uri);
+    pub storage_auth_token: Option<String>,

-    // TODO: check the response. We should distinguish cases when it's
-    // - network error, then retry
-    // - no spec for compute yet, then wait
-    // - compute id is unknown or any other error, then bail out
-    let json = reqwest::blocking::Client::new()
-        .get(cp_uri)
-        .header("Authorization", jwt)
-        .send()?
-        .json()?;
-    Ok(ComputeSpecAnyVersion(json))
+    pub startup_tracing_context: Option<HashMap<String, String>>,
+}
+
+/// Cluster state seen from the perspective of the external tools
+/// like Rails web console.
+#[derive(Clone, Deserialize)]
+pub struct Cluster {
+    pub cluster_id: String,
+    pub name: String,
+    pub state: Option<String>,
+    pub roles: Vec<Role>,
+    pub databases: Vec<Database>,
+    pub settings: GenericOptions,
+}
+
+/// Single cluster state changing operation that could not be represented as
+/// a static `Cluster` structure. For example:
+/// - DROP DATABASE
+/// - DROP ROLE
+/// - ALTER ROLE name RENAME TO new_name
+/// - ALTER DATABASE name RENAME TO new_name
+#[derive(Clone, Deserialize)]
+pub struct DeltaOp {
+    pub action: String,
+    pub name: PgIdent,
+    pub new_name: Option<PgIdent>,
 }

 /// It takes cluster specification and does the following:
 /// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
 /// - Update `pg_hba.conf` to allow external connections.
-pub fn handle_configuration(spec: &ComputeSpecV2, pgdata_path: &Path) -> Result<()> {
+pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
    // File `postgresql.conf` is no longer included into `basebackup`, so just
    // always write all config into it creating new file.
    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
@@ -69,7 +85,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
 /// Given a cluster spec json and open transaction it handles roles creation,
 /// deletion and update.
 #[instrument(skip_all)]
-pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
+pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

@@ -125,7 +141,7 @@ pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

    info!("cluster spec roles:");
-    for role in &spec.roles {
+    for role in &spec.cluster.roles {
        let name = &role.name;
        // XXX: with a limited number of roles it is fine, but consider making it a HashMap
        let pg_role = existing_roles.iter().find(|r| r.name == *name);
@@ -210,12 +226,8 @@ pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {

 /// Reassign all dependent objects and delete requested roles.
 #[instrument(skip_all)]
-pub fn handle_role_deletions(
-    spec: &ComputeSpecV2,
-    connstr: &str,
-    client: &mut Client,
-) -> Result<()> {
-    if let Some(ops) = &spec.delta_operations {
+pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
+    if let Some(ops) = &node.spec.delta_operations {
        // First, reassign all dependent objects to db owners.
        info!("reassigning dependent objects of to-be-deleted roles");

@@ -232,7 +244,7 @@ pub fn handle_role_deletions(
            // Check that role is still present in Postgres, as this could be a
            // restart with the same spec after role deletion.
            if op.action == "delete_role" && existing_roles.iter().any(|r| r.name == op.name) {
-                reassign_owned_objects(spec, connstr, &op.name)?;
+                reassign_owned_objects(node, &op.name)?;
            }
        }

@@ -256,10 +268,10 @@ pub fn handle_role_deletions(
 }

 // Reassign all owned objects in all databases to the owner of the database.
-fn reassign_owned_objects(spec: &ComputeSpecV2, connstr: &str, role_name: &PgIdent) -> Result<()> {
-    for db in &spec.databases {
+fn reassign_owned_objects(node: &ComputeNode, role_name: &PgIdent) -> Result<()> {
+    for db in &node.spec.cluster.databases {
        if db.owner != *role_name {
-            let mut conf = Config::from_str(connstr)?;
+            let mut conf = Config::from_str(node.connstr.as_str())?;
            conf.dbname(&db.name);

            let mut client = conf.connect(NoTls)?;
@@ -291,7 +303,7 @@ fn reassign_owned_objects(spec: &ComputeSpecV2, connstr: &str, role_name: &PgIde
 /// atomicity should be enough here due to the order of operations and various checks,
 /// which together provide us idempotency.
 #[instrument(skip_all)]
-pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
+pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    // Print a list of existing Postgres databases (only in debug mode)
@@ -339,7 +351,7 @@ pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()>
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    info!("cluster spec databases:");
-    for db in &spec.databases {
+    for db in &spec.cluster.databases {
        let name = &db.name;

        // XXX: with a limited number of databases it is fine, but consider making it a HashMap
@@ -404,7 +416,9 @@ pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()>
 /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
 /// to allow users creating trusted extensions and re-creating `public` schema, for example.
 #[instrument(skip_all)]
-pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -> Result<()> {
+pub fn handle_grants(node: &ComputeNode, client: &mut Client) -> Result<()> {
+    let spec = &node.spec;
+
    info!("cluster spec grants:");

    // We now have a separate `web_access` role to connect to the database
@@ -414,12 +428,13 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -
    // XXX: later we should stop messing with Postgres ACL in such horrible
    // ways.
    let roles = spec
+        .cluster
        .roles
        .iter()
        .map(|r| r.name.pg_quote())
        .collect::<Vec<_>>();

-    for db in &spec.databases {
+    for db in &spec.cluster.databases {
        let dbname = &db.name;

        let query: String = format!(
@@ -435,8 +450,8 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -
    // Do some per-database access adjustments. We'd better do this at db creation time,
    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
    // atomically.
-    for db in &spec.databases {
-        let mut conf = Config::from_str(connstr)?;
+    for db in &node.spec.cluster.databases {
+        let mut conf = Config::from_str(node.connstr.as_str())?;
        conf.dbname(&db.name);

        let mut db_client = conf.connect(NoTls)?;
@@ -505,11 +520,14 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -

 /// Create required system extensions
 #[instrument(skip_all)]
-pub fn handle_extensions(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
-    for extension in &spec.extensions {
-        let query = format!("CREATE EXTENSION IF NOT EXISTS {}", extension.pg_quote());
-        info!("creating system extensions with query: {}", query);
-        client.simple_query(&query)?;
+pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+    if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
+        if libs.contains("pg_stat_statements") {
+            // Create extension only if this compute really needs it
+            let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
+            info!("creating system extensions with query: {}", query);
+            client.simple_query(query)?;
+        }
    }

    Ok(())
--- a/compute_tools/tests/cluster_spec.json
+++ b/compute_tools/tests/cluster_spec.json
@@ -0,0 +1,209 @@
+{
+    "format_version": 1.0,
+
+    "timestamp": "2021-05-23T18:25:43.511Z",
+    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
+
+    "cluster": {
+        "cluster_id": "test-cluster-42",
+        "name": "Zenith Test",
+        "state": "restarted",
+        "roles": [
+            {
+                "name": "postgres",
+                "encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "alexk",
+                "encrypted_password": null,
+                "options": null
+            },
+            {
+                "name": "zenith \"new\"",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "zen",
+                "encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "\"name\";\\n select 1;",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "MyRole",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            }
+        ],
+        "databases": [
+            {
+                "name": "DB2",
+                "owner": "alexk",
+                "options": [
+                    {
+                        "name": "LC_COLLATE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "LC_CTYPE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "TEMPLATE",
+                        "value": "template0",
+                        "vartype": "enum"
+                    }
+                ]
+            },
+            {
+                "name": "zenith",
+                "owner": "MyRole"
+            },
+            {
+                "name": "zen",
+                "owner": "zen"
+            }
+        ],
+        "settings": [
+            {
+                "name": "fsync",
+                "value": "off",
+                "vartype": "bool"
+            },
+            {
+                "name": "wal_level",
+                "value": "replica",
+                "vartype": "enum"
+            },
+            {
+                "name": "hot_standby",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "neon.safekeepers",
+                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_log_hints",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "log_connections",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "shared_buffers",
+                "value": "32768",
+                "vartype": "integer"
+            },
+            {
+                "name": "port",
+                "value": "55432",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_connections",
+                "value": "100",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_wal_senders",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "listen_addresses",
+                "value": "0.0.0.0",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_sender_timeout",
+                "value": "0",
+                "vartype": "integer"
+            },
+            {
+                "name": "password_encryption",
+                "value": "md5",
+                "vartype": "enum"
+            },
+            {
+                "name": "maintenance_work_mem",
+                "value": "65536",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_parallel_workers",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_worker_processes",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "neon.tenant_id",
+                "value": "b0554b632bd4d547a63b86c3630317e8",
+                "vartype": "string"
+            },
+            {
+                "name": "max_replication_slots",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "neon.timeline_id",
+                "value": "2414a61ffc94e428f14b5758fe308e13",
+                "vartype": "string"
+            },
+            {
+                "name": "shared_preload_libraries",
+                "value": "neon",
+                "vartype": "string"
+            },
+            {
+                "name": "synchronous_standby_names",
+                "value": "walproposer",
+                "vartype": "string"
+            },
+            {
+                "name": "neon.pageserver_connstring",
+                "value": "host=127.0.0.1 port=6400",
+                "vartype": "string"
+            },
+            {
+                "name": "test.escaping",
+                "value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
+                "vartype": "string"
+            }
+        ]
+    },
+    "delta_operations": [
+        {
+            "action": "delete_db",
+            "name": "zenith_test"
+        },
+        {
+            "action": "rename_db",
+            "name": "DB",
+            "new_name": "DB2"
+        },
+        {
+            "action": "delete_role",
+            "name": "zenith2"
+        },
+        {
+            "action": "rename_role",
+            "name": "zenith new",
+            "new_name": "zenith \"new\""
+        }
+    ]
+}
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -1,24 +1,58 @@
 #[cfg(test)]
 mod pg_helpers_tests {
-    use anyhow::Result;
-    use compute_api::spec::{ComputeSpecV2, GenericOption, GenericOptions, PgIdent};
+
+    use std::fs::File;
+
    use compute_tools::pg_helpers::*;
+    use compute_tools::spec::ComputeSpec;

    #[test]
-    fn params_serialize() -> Result<()> {
-        let spec_v1_str =
-            std::fs::read_to_string("../libs/compute_api/tests/spec-v1.json").unwrap();
-        let spec = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
+    fn params_serialize() {
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
-            spec.databases.first().unwrap().to_pg_options(),
+            spec.cluster.databases.first().unwrap().to_pg_options(),
            "LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \"alexk\""
        );
        assert_eq!(
-            spec.roles.first().unwrap().to_pg_options(),
+            spec.cluster.roles.first().unwrap().to_pg_options(),
            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
        );
-        Ok(())
+    }
+
+    #[test]
+    fn settings_serialize() {
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
+
+        assert_eq!(
+            spec.cluster.settings.as_pg_settings(),
+            r#"fsync = off
+wal_level = replica
+hot_standby = on
+neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
+wal_log_hints = on
+log_connections = on
+shared_buffers = 32768
+port = 55432
+max_connections = 100
+max_wal_senders = 10
+listen_addresses = '0.0.0.0'
+wal_sender_timeout = 0
+password_encryption = md5
+maintenance_work_mem = 65536
+max_parallel_workers = 8
+max_worker_processes = 8
+neon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'
+max_replication_slots = 10
+neon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'
+shared_preload_libraries = 'neon'
+synchronous_standby_names = 'walproposer'
+neon.pageserver_connstring = 'host=127.0.0.1 port=6400'
+test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hooray'
+"#
+        );
    }

    #[test]
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,7 +6,6 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-chrono.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
 git-version.workspace = true
@@ -27,7 +26,6 @@ url.workspace = true
 pageserver_api.workspace = true
 postgres_backend.workspace = true
 safekeeper_api.workspace = true
-compute_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -7,7 +7,7 @@
 //!
 use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
-use control_plane::endpoint::ComputeControlPlane;
+use control_plane::compute::ComputeControlPlane;
 use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
@@ -106,8 +106,8 @@ fn main() -> Result<()> {
            "start" => handle_start_all(sub_args, &env),
            "stop" => handle_stop_all(sub_args, &env),
            "pageserver" => handle_pageserver(sub_args, &env),
+            "pg" => handle_pg(sub_args, &env),
            "safekeeper" => handle_safekeeper(sub_args, &env),
-            "endpoint" => handle_endpoint(sub_args, &env),
            _ => bail!("unexpected subcommand {sub_name}"),
        };

@@ -470,10 +470,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            let mut cplane = ComputeControlPlane::load(env.clone())?;
            println!("Importing timeline into pageserver ...");
            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
+            println!("Creating node for imported timeline ...");
            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;

-            println!("Creating endpoint for imported timeline ...");
-            cplane.new_endpoint(name, tenant_id, timeline_id, None, None, None, pg_version)?;
+            cplane.new_node(tenant_id, name, timeline_id, None, None, pg_version)?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
@@ -521,10 +521,10 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
    Ok(())
 }

-fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
-    let (sub_name, sub_args) = match ep_match.subcommand() {
-        Some(ep_subcommand_data) => ep_subcommand_data,
-        None => bail!("no endpoint subcommand provided"),
+fn handle_pg(pg_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+    let (sub_name, sub_args) = match pg_match.subcommand() {
+        Some(pg_subcommand_data) => pg_subcommand_data,
+        None => bail!("no pg subcommand provided"),
    };

    let mut cplane = ComputeControlPlane::load(env.clone())?;
@@ -546,7 +546,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
            table.load_preset(comfy_table::presets::NOTHING);

            table.set_header([
-                "ENDPOINT",
+                "NODE",
                "ADDRESS",
                "TIMELINE",
                "BRANCH NAME",
@@ -554,39 +554,39 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                "STATUS",
            ]);

-            for (endpoint_id, endpoint) in cplane
-                .endpoints
+            for ((_, node_name), node) in cplane
+                .nodes
                .iter()
-                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
+                .filter(|((node_tenant_id, _), _)| node_tenant_id == &tenant_id)
            {
-                let lsn_str = match endpoint.lsn {
+                let lsn_str = match node.lsn {
                    None => {
-                        // -> primary endpoint
+                        // -> primary node
                        // Use the LSN at the end of the timeline.
                        timeline_infos
-                            .get(&endpoint.timeline_id)
+                            .get(&node.timeline_id)
                            .map(|bi| bi.last_record_lsn.to_string())
                            .unwrap_or_else(|| "?".to_string())
                    }
                    Some(lsn) => {
-                        // -> read-only endpoint
-                        // Use the endpoint's LSN.
+                        // -> read-only node
+                        // Use the node's LSN.
                        lsn.to_string()
                    }
                };

                let branch_name = timeline_name_mappings
-                    .get(&TenantTimelineId::new(tenant_id, endpoint.timeline_id))
+                    .get(&TenantTimelineId::new(tenant_id, node.timeline_id))
                    .map(|name| name.as_str())
                    .unwrap_or("?");

                table.add_row([
-                    endpoint_id.as_str(),
-                    &endpoint.pg_address.to_string(),
-                    &endpoint.timeline_id.to_string(),
+                    node_name.as_str(),
+                    &node.address.to_string(),
+                    &node.timeline_id.to_string(),
                    branch_name,
                    lsn_str.as_str(),
-                    endpoint.status(),
+                    node.status(),
                ]);
            }

@@ -597,10 +597,10 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .get_one::<String>("branch-name")
                .map(|s| s.as_str())
                .unwrap_or(DEFAULT_BRANCH_NAME);
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .map(String::to_string)
-                .unwrap_or_else(|| format!("ep-{branch_name}"));
+            let node_name = sub_args
+                .get_one::<String>("node")
+                .map(|node_name| node_name.to_string())
+                .unwrap_or_else(|| format!("{branch_name}_node"));

            let lsn = sub_args
                .get_one::<String>("lsn")
@@ -611,46 +611,22 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .get_branch_timeline_id(branch_name, tenant_id)
                .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;

-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+
            let pg_version = sub_args
                .get_one::<u32>("pg-version")
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            cplane.new_endpoint(
-                &endpoint_id,
-                tenant_id,
-                timeline_id,
-                lsn,
-                pg_port,
-                http_port,
-                pg_version,
-            )?;
+            cplane.new_node(tenant_id, &node_name, timeline_id, lsn, port, pg_version)?;
        }
        "start" => {
-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+            let node_name = sub_args
+                .get_one::<String>("node")
+                .ok_or_else(|| anyhow!("No node name was provided to start"))?;

-            // If --safekeepers argument is given, use only the listed safekeeper nodes.
-            let safekeepers =
-                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
-                    let mut safekeepers: Vec<NodeId> = Vec::new();
-                    for sk_id in safekeepers_str.split(',').map(str::trim) {
-                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
-                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
-                        })?);
-                        safekeepers.push(sk_id);
-                    }
-                    safekeepers
-                } else {
-                    env.safekeepers.iter().map(|sk| sk.id).collect()
-                };
-
-            let endpoint = cplane.endpoints.get(endpoint_id.as_str());
+            let node = cplane.nodes.get(&(tenant_id, node_name.to_string()));

            let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
                let claims = Claims::new(Some(tenant_id), Scope::Tenant);
@@ -660,9 +636,9 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                None
            };

-            if let Some(endpoint) = endpoint {
-                println!("Starting existing endpoint {endpoint_id}...");
-                endpoint.start(&auth_token, safekeepers)?;
+            if let Some(node) = node {
+                println!("Starting existing postgres {node_name}...");
+                node.start(&auth_token)?;
            } else {
                let branch_name = sub_args
                    .get_one::<String>("branch-name")
@@ -687,34 +663,27 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                // start --port X
                // stop
                // start <-- will also use port X even without explicit port argument
-                println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");
+                println!("Starting new postgres (v{pg_version}) {node_name} on timeline {timeline_id} ...");

-                let ep = cplane.new_endpoint(
-                    endpoint_id,
-                    tenant_id,
-                    timeline_id,
-                    lsn,
-                    pg_port,
-                    http_port,
-                    pg_version,
-                )?;
-                ep.start(&auth_token, safekeepers)?;
+                let node =
+                    cplane.new_node(tenant_id, node_name, timeline_id, lsn, port, pg_version)?;
+                node.start(&auth_token)?;
            }
        }
        "stop" => {
-            let endpoint_id = sub_args
-                .get_one::<String>("endpoint_id")
-                .ok_or_else(|| anyhow!("No endpoint ID was provided to stop"))?;
+            let node_name = sub_args
+                .get_one::<String>("node")
+                .ok_or_else(|| anyhow!("No node name was provided to stop"))?;
            let destroy = sub_args.get_flag("destroy");

-            let endpoint = cplane
-                .endpoints
-                .get(endpoint_id.as_str())
-                .with_context(|| format!("postgres endpoint {endpoint_id} is not found"))?;
-            endpoint.stop(destroy)?;
+            let node = cplane
+                .nodes
+                .get(&(tenant_id, node_name.to_string()))
+                .with_context(|| format!("postgres {node_name} is not found"))?;
+            node.stop(destroy)?;
        }

-        _ => bail!("Unexpected endpoint subcommand '{sub_name}'"),
+        _ => bail!("Unexpected pg subcommand '{sub_name}'"),
    }

    Ok(())
@@ -833,7 +802,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
 }

 fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
-    // Endpoints are not started automatically
+    // Postgres nodes are not started automatically

    broker::start_broker_process(env)?;

@@ -867,10 +836,10 @@ fn handle_stop_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<
 fn try_stop_all(env: &local_env::LocalEnv, immediate: bool) {
    let pageserver = PageServerNode::from_env(env);

-    // Stop all endpoints
+    // Stop all compute nodes
    match ComputeControlPlane::load(env.clone()) {
        Ok(cplane) => {
-            for (_k, node) in cplane.endpoints {
+            for (_k, node) in cplane.nodes {
                if let Err(e) = node.stop(false) {
                    eprintln!("postgres stop failed: {e:#}");
                }
@@ -903,9 +872,7 @@ fn cli() -> Command {
        .help("Name of the branch to be created or used as an alias for other services")
        .required(false);

-    let endpoint_id_arg = Arg::new("endpoint_id")
-        .help("Postgres endpoint id")
-        .required(false);
+    let pg_node_arg = Arg::new("node").help("Postgres node name").required(false);

    let safekeeper_id_arg = Arg::new("id").help("safekeeper id").required(false);

@@ -926,22 +893,11 @@ fn cli() -> Command {
        .value_parser(value_parser!(u32))
        .default_value(DEFAULT_PG_VERSION);

-    let pg_port_arg = Arg::new("pg-port")
-        .long("pg-port")
+    let port_arg = Arg::new("port")
+        .long("port")
        .required(false)
        .value_parser(value_parser!(u16))
-        .value_name("pg-port");
-
-    let http_port_arg = Arg::new("http-port")
-        .long("http-port")
-        .required(false)
-        .value_parser(value_parser!(u16))
-        .value_name("http-port");
-
-    let safekeepers_arg = Arg::new("safekeepers")
-        .long("safekeepers")
-        .required(false)
-        .value_name("safekeepers");
+        .value_name("port");

    let stop_mode_arg = Arg::new("stop-mode")
        .short('m')
@@ -1070,40 +1026,37 @@ fn cli() -> Command {
                )
        )
        .subcommand(
-            Command::new("endpoint")
+            Command::new("pg")
                .arg_required_else_help(true)
                .about("Manage postgres instances")
                .subcommand(Command::new("list").arg(tenant_id_arg.clone()))
                .subcommand(Command::new("create")
-                    .about("Create a compute endpoint")
-                    .arg(endpoint_id_arg.clone())
+                    .about("Create a postgres compute node")
+                    .arg(pg_node_arg.clone())
                    .arg(branch_name_arg.clone())
                    .arg(tenant_id_arg.clone())
                    .arg(lsn_arg.clone())
-                    .arg(pg_port_arg.clone())
-                    .arg(http_port_arg.clone())
+                    .arg(port_arg.clone())
                    .arg(
                        Arg::new("config-only")
-                            .help("Don't do basebackup, create endpoint directory with only config files")
+                            .help("Don't do basebackup, create compute node with only config files")
                            .long("config-only")
                            .required(false))
                    .arg(pg_version_arg.clone())
                )
                .subcommand(Command::new("start")
-                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
-                    .arg(endpoint_id_arg.clone())
+                    .about("Start a postgres compute node.\n This command actually creates new node from scratch, but preserves existing config files")
+                    .arg(pg_node_arg.clone())
                    .arg(tenant_id_arg.clone())
                    .arg(branch_name_arg)
                    .arg(timeline_id_arg)
                    .arg(lsn_arg)
-                    .arg(pg_port_arg)
-                    .arg(http_port_arg)
+                    .arg(port_arg)
                    .arg(pg_version_arg)
-                    .arg(safekeepers_arg)
                )
                .subcommand(
                    Command::new("stop")
-                    .arg(endpoint_id_arg)
+                    .arg(pg_node_arg)
                    .arg(tenant_id_arg)
                    .arg(
                        Arg::new("destroy")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,9 +1,3 @@
-//! Code to manage the storage broker
-//!
-//! In the local test environment, the data for each safekeeper is stored in
-//!
-//!   .neon/safekeepers/<safekeeper id>
-//!
 use anyhow::Context;

 use std::path::PathBuf;
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -0,0 +1,539 @@
+use std::collections::BTreeMap;
+use std::fs::{self, File};
+use std::io::Write;
+use std::net::SocketAddr;
+use std::net::TcpStream;
+use std::os::unix::fs::PermissionsExt;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::{Context, Result};
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
+use crate::pageserver::PageServerNode;
+use crate::postgresql_conf::PostgresConf;
+
+//
+// ComputeControlPlane
+//
+pub struct ComputeControlPlane {
+    base_port: u16,
+    pageserver: Arc<PageServerNode>,
+    pub nodes: BTreeMap<(TenantId, String), Arc<PostgresNode>>,
+    env: LocalEnv,
+}
+
+impl ComputeControlPlane {
+    // Load current nodes with ports from data directories on disk
+    // Directory structure has the following layout:
+    // pgdatadirs
+    // |- tenants
+    // |  |- <tenant_id>
+    // |  |   |- <node name>
+    pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
+        let pageserver = Arc::new(PageServerNode::from_env(&env));
+
+        let mut nodes = BTreeMap::default();
+        let pgdatadirspath = &env.pg_data_dirs_path();
+
+        for tenant_dir in fs::read_dir(pgdatadirspath)
+            .with_context(|| format!("failed to list {}", pgdatadirspath.display()))?
+        {
+            let tenant_dir = tenant_dir?;
+            for timeline_dir in fs::read_dir(tenant_dir.path())
+                .with_context(|| format!("failed to list {}", tenant_dir.path().display()))?
+            {
+                let node = PostgresNode::from_dir_entry(timeline_dir?, &env, &pageserver)?;
+                nodes.insert((node.tenant_id, node.name.clone()), Arc::new(node));
+            }
+        }
+
+        Ok(ComputeControlPlane {
+            base_port: 55431,
+            pageserver,
+            nodes,
+            env,
+        })
+    }
+
+    fn get_port(&mut self) -> u16 {
+        1 + self
+            .nodes
+            .values()
+            .map(|node| node.address.port())
+            .max()
+            .unwrap_or(self.base_port)
+    }
+
+    pub fn new_node(
+        &mut self,
+        tenant_id: TenantId,
+        name: &str,
+        timeline_id: TimelineId,
+        lsn: Option<Lsn>,
+        port: Option<u16>,
+        pg_version: u32,
+    ) -> Result<Arc<PostgresNode>> {
+        let port = port.unwrap_or_else(|| self.get_port());
+        let node = Arc::new(PostgresNode {
+            name: name.to_owned(),
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            env: self.env.clone(),
+            pageserver: Arc::clone(&self.pageserver),
+            is_test: false,
+            timeline_id,
+            lsn,
+            tenant_id,
+            uses_wal_proposer: false,
+            pg_version,
+        });
+
+        node.create_pgdata()?;
+        node.setup_pg_conf()?;
+
+        self.nodes
+            .insert((tenant_id, node.name.clone()), Arc::clone(&node));
+
+        Ok(node)
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+#[derive(Debug)]
+pub struct PostgresNode {
+    pub address: SocketAddr,
+    name: String,
+    pub env: LocalEnv,
+    pageserver: Arc<PageServerNode>,
+    is_test: bool,
+    pub timeline_id: TimelineId,
+    pub lsn: Option<Lsn>, // if it's a read-only node. None for primary
+    pub tenant_id: TenantId,
+    uses_wal_proposer: bool,
+    pg_version: u32,
+}
+
+impl PostgresNode {
+    fn from_dir_entry(
+        entry: std::fs::DirEntry,
+        env: &LocalEnv,
+        pageserver: &Arc<PageServerNode>,
+    ) -> Result<PostgresNode> {
+        if !entry.file_type()?.is_dir() {
+            anyhow::bail!(
+                "PostgresNode::from_dir_entry failed: '{}' is not a directory",
+                entry.path().display()
+            );
+        }
+
+        // parse data directory name
+        let fname = entry.file_name();
+        let name = fname.to_str().unwrap().to_string();
+
+        // Read config file into memory
+        let cfg_path = entry.path().join("postgresql.conf");
+        let cfg_path_str = cfg_path.to_string_lossy();
+        let mut conf_file = File::open(&cfg_path)
+            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
+        let conf = PostgresConf::read(&mut conf_file)
+            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
+
+        // Read a few options from the config file
+        let context = format!("in config file {}", cfg_path_str);
+        let port: u16 = conf.parse_field("port", &context)?;
+        let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
+        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
+        let uses_wal_proposer = conf.get("neon.safekeepers").is_some();
+
+        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
+        // If it doesn't exist, assume broken data directory and use default pg version.
+        let pg_version_path = entry.path().join("PG_VERSION");
+
+        let pg_version_str =
+            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
+        let pg_version = u32::from_str(&pg_version_str)?;
+
+        // parse recovery_target_lsn, if any
+        let recovery_target_lsn: Option<Lsn> =
+            conf.parse_field_optional("recovery_target_lsn", &context)?;
+
+        // ok now
+        Ok(PostgresNode {
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            name,
+            env: env.clone(),
+            pageserver: Arc::clone(pageserver),
+            is_test: false,
+            timeline_id,
+            lsn: recovery_target_lsn,
+            tenant_id,
+            uses_wal_proposer,
+            pg_version,
+        })
+    }
+
+    fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
+        let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
+        let mut cmd = Command::new(pg_path);
+
+        cmd.arg("--sync-safekeepers")
+            .env_clear()
+            .env(
+                "LD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
+            )
+            .env(
+                "DYLD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
+            )
+            .env("PGDATA", self.pgdata().to_str().unwrap())
+            .stdout(Stdio::piped())
+            // Comment this to avoid capturing stderr (useful if command hangs)
+            .stderr(Stdio::piped());
+
+        if let Some(token) = auth_token {
+            cmd.env("NEON_AUTH_TOKEN", token);
+        }
+
+        let sync_handle = cmd
+            .spawn()
+            .expect("postgres --sync-safekeepers failed to start");
+
+        let sync_output = sync_handle
+            .wait_with_output()
+            .expect("postgres --sync-safekeepers failed");
+        if !sync_output.status.success() {
+            anyhow::bail!(
+                "sync-safekeepers failed: '{}'",
+                String::from_utf8_lossy(&sync_output.stderr)
+            );
+        }
+
+        let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
+        println!("Safekeepers synced on {}", lsn);
+        Ok(lsn)
+    }
+
+    /// Get basebackup from the pageserver as a tar archive and extract it
+    /// to the `self.pgdata()` directory.
+    fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
+        println!(
+            "Extracting base backup to create postgres instance: path={} port={}",
+            self.pgdata().display(),
+            self.address.port()
+        );
+
+        let sql = if let Some(lsn) = lsn {
+            format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
+        } else {
+            format!("basebackup {} {}", self.tenant_id, self.timeline_id)
+        };
+
+        let mut client = self
+            .pageserver
+            .page_server_psql_client()
+            .context("connecting to page server failed")?;
+
+        let copyreader = client
+            .copy_out(sql.as_str())
+            .context("page server 'basebackup' command failed")?;
+
+        // Read the archive directly from the `CopyOutReader`
+        //
+        // Set `ignore_zeros` so that unpack() reads all the Copy data and
+        // doesn't stop at the end-of-archive marker. Otherwise, if the server
+        // sends an Error after finishing the tarball, we will not notice it.
+        let mut ar = tar::Archive::new(copyreader);
+        ar.set_ignore_zeros(true);
+        ar.unpack(&self.pgdata())
+            .context("extracting base backup failed")?;
+
+        Ok(())
+    }
+
+    fn create_pgdata(&self) -> Result<()> {
+        fs::create_dir_all(self.pgdata()).with_context(|| {
+            format!(
+                "could not create data directory {}",
+                self.pgdata().display()
+            )
+        })?;
+        fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
+            .with_context(|| {
+                format!(
+                    "could not set permissions in data directory {}",
+                    self.pgdata().display()
+                )
+            })
+    }
+
+    // Write postgresql.conf with default configuration
+    // and PG_VERSION file to the data directory of a new node.
+    fn setup_pg_conf(&self) -> Result<()> {
+        let mut conf = PostgresConf::new();
+        conf.append("max_wal_senders", "10");
+        conf.append("wal_log_hints", "off");
+        conf.append("max_replication_slots", "10");
+        conf.append("hot_standby", "on");
+        conf.append("shared_buffers", "1MB");
+        conf.append("fsync", "off");
+        conf.append("max_connections", "100");
+        conf.append("wal_level", "replica");
+        // wal_sender_timeout is the maximum time to wait for WAL replication.
+        // It also defines how often the walreciever will send a feedback message to the wal sender.
+        conf.append("wal_sender_timeout", "5s");
+        conf.append("listen_addresses", &self.address.ip().to_string());
+        conf.append("port", &self.address.port().to_string());
+        conf.append("wal_keep_size", "0");
+        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
+        conf.append("restart_after_crash", "off");
+
+        // Configure the node to fetch pages from pageserver
+        let pageserver_connstr = {
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());
+
+            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
+            format!("postgresql://no_user@{host}:{port}")
+        };
+        conf.append("shared_preload_libraries", "neon");
+        conf.append_line("");
+        conf.append("neon.pageserver_connstring", &pageserver_connstr);
+        conf.append("neon.tenant_id", &self.tenant_id.to_string());
+        conf.append("neon.timeline_id", &self.timeline_id.to_string());
+        if let Some(lsn) = self.lsn {
+            conf.append("recovery_target_lsn", &lsn.to_string());
+        }
+
+        conf.append_line("");
+        // Configure backpressure
+        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
+        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
+        //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
+        //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
+        //   updates pages are not requested from pageserver.
+        // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
+        //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
+        //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
+        //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
+        // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
+        //   To be able to restore database in case of pageserver node crash, safekeeper should not
+        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
+        //   (if they are not able to upload WAL to S3).
+        conf.append("max_replication_write_lag", "15MB");
+        conf.append("max_replication_flush_lag", "10GB");
+
+        if !self.env.safekeepers.is_empty() {
+            // Configure the node to connect to the safekeepers
+            conf.append("synchronous_standby_names", "walproposer");
+
+            let safekeepers = self
+                .env
+                .safekeepers
+                .iter()
+                .map(|sk| format!("localhost:{}", sk.pg_port))
+                .collect::<Vec<String>>()
+                .join(",");
+            conf.append("neon.safekeepers", &safekeepers);
+        } else {
+            // We only use setup without safekeepers for tests,
+            // and don't care about data durability on pageserver,
+            // so set more relaxed synchronous_commit.
+            conf.append("synchronous_commit", "remote_write");
+
+            // Configure the node to stream WAL directly to the pageserver
+            // This isn't really a supported configuration, but can be useful for
+            // testing.
+            conf.append("synchronous_standby_names", "pageserver");
+        }
+
+        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
+        file.write_all(conf.to_string().as_bytes())?;
+
+        let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
+        file.write_all(self.pg_version.to_string().as_bytes())?;
+
+        Ok(())
+    }
+
+    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
+        let backup_lsn = if let Some(lsn) = self.lsn {
+            Some(lsn)
+        } else if self.uses_wal_proposer {
+            // LSN 0 means that it is bootstrap and we need to download just
+            // latest data from the pageserver. That is a bit clumsy but whole bootstrap
+            // procedure evolves quite actively right now, so let's think about it again
+            // when things would be more stable (TODO).
+            let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
+            if lsn == Lsn(0) {
+                None
+            } else {
+                Some(lsn)
+            }
+        } else {
+            None
+        };
+
+        self.do_basebackup(backup_lsn)?;
+
+        Ok(())
+    }
+
+    pub fn pgdata(&self) -> PathBuf {
+        self.env.pg_data_dir(&self.tenant_id, &self.name)
+    }
+
+    pub fn status(&self) -> &str {
+        let timeout = Duration::from_millis(300);
+        let has_pidfile = self.pgdata().join("postmaster.pid").exists();
+        let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();
+
+        match (has_pidfile, can_connect) {
+            (true, true) => "running",
+            (false, false) => "stopped",
+            (true, false) => "crashed",
+            (false, true) => "running, no pidfile",
+        }
+    }
+
+    fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
+        let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
+        let mut cmd = Command::new(pg_ctl_path);
+        cmd.args(
+            [
+                &[
+                    "-D",
+                    self.pgdata().to_str().unwrap(),
+                    "-l",
+                    self.pgdata().join("pg.log").to_str().unwrap(),
+                    "-w", //wait till pg_ctl actually does what was asked
+                ],
+                args,
+            ]
+            .concat(),
+        )
+        .env_clear()
+        .env(
+            "LD_LIBRARY_PATH",
+            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
+        )
+        .env(
+            "DYLD_LIBRARY_PATH",
+            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
+        );
+
+        // Pass authentication token used for the connections to pageserver and safekeepers
+        if let Some(token) = auth_token {
+            cmd.env("NEON_AUTH_TOKEN", token);
+        }
+
+        let pg_ctl = cmd.output().context("pg_ctl failed")?;
+        if !pg_ctl.status.success() {
+            anyhow::bail!(
+                "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
+                pg_ctl.status,
+                String::from_utf8_lossy(&pg_ctl.stdout),
+                String::from_utf8_lossy(&pg_ctl.stderr),
+            );
+        }
+        Ok(())
+    }
+
+    pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
+        // Bail if the node already running.
+        if self.status() == "running" {
+            anyhow::bail!("The node is already running");
+        }
+
+        // 1. We always start compute node from scratch, so
+        // if old dir exists, preserve 'postgresql.conf' and drop the directory
+        let postgresql_conf_path = self.pgdata().join("postgresql.conf");
+        let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
+            format!(
+                "failed to read config file in {}",
+                postgresql_conf_path.to_str().unwrap()
+            )
+        })?;
+        fs::remove_dir_all(self.pgdata())?;
+        self.create_pgdata()?;
+
+        // 2. Bring back config files
+        fs::write(&postgresql_conf_path, postgresql_conf)?;
+
+        // 3. Load basebackup
+        self.load_basebackup(auth_token)?;
+
+        if self.lsn.is_some() {
+            File::create(self.pgdata().join("standby.signal"))?;
+        }
+
+        // 4. Finally start the compute node postgres
+        println!("Starting postgres node at '{}'", self.connstr());
+        self.pg_ctl(&["start"], auth_token)
+    }
+
+    pub fn restart(&self, auth_token: &Option<String>) -> Result<()> {
+        self.pg_ctl(&["restart"], auth_token)
+    }
+
+    pub fn stop(&self, destroy: bool) -> Result<()> {
+        // If we are going to destroy data directory,
+        // use immediate shutdown mode, otherwise,
+        // shutdown gracefully to leave the data directory sane.
+        //
+        // Compute node always starts from scratch, so stop
+        // without destroy only used for testing and debugging.
+        //
+        if destroy {
+            self.pg_ctl(&["-m", "immediate", "stop"], &None)?;
+            println!(
+                "Destroying postgres data directory '{}'",
+                self.pgdata().to_str().unwrap()
+            );
+            fs::remove_dir_all(self.pgdata())?;
+        } else {
+            self.pg_ctl(&["stop"], &None)?;
+        }
+        Ok(())
+    }
+
+    pub fn connstr(&self) -> String {
+        format!(
+            "host={} port={} user={} dbname={}",
+            self.address.ip(),
+            self.address.port(),
+            "cloud_admin",
+            "postgres"
+        )
+    }
+
+    // XXX: cache that in control plane
+    pub fn whoami(&self) -> String {
+        let output = Command::new("whoami")
+            .output()
+            .expect("failed to execute whoami");
+
+        assert!(output.status.success(), "whoami failed");
+
+        String::from_utf8(output.stdout).unwrap().trim().to_string()
+    }
+}
+
+impl Drop for PostgresNode {
+    // destructor to clean up state after test is done
+    // XXX: we may detect failed test by setting some flag in catch_unwind()
+    // and checking it here. But let just clean datadirs on start.
+    fn drop(&mut self) {
+        if self.is_test {
+            let _ = self.stop(true);
+        }
+    }
+}
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -1,568 +0,0 @@
-//! Code to manage compute endpoints
-//!
-//! In the local test environment, the data for each endpoint is stored in
-//!
-//!   .neon/endpoints/<endpoint id>
-//!
-//! Some basic information about the endpoint, like the tenant and timeline IDs,
-//! are stored in the `endpoint.json` file. The `endpoint.json` file is created
-//! when the endpoint is created, and doesn't change afterwards.
-//!
-//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
-//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
-//! the basebackup from the pageserver to initialize the the data directory, and
-//! finally launches the PostgreSQL process. It watches the PostgreSQL process
-//! until it exits.
-//!
-//! When an endpoint is created, a `postgresql.conf` file is also created in
-//! the endpoint's directory. The file can be modified before starting PostgreSQL.
-//! However, the `postgresql.conf` file in the endpoint directory is not used directly
-//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
-//! copy of it in the data directory.
-//!
-//! Directory contents:
-//!
-//! ```ignore
-//! .neon/endpoints/main/
-//!     compute.log               - log output of `compute_ctl` and `postgres`
-//!     endpoint.json             - serialized `EndpointConf` struct
-//!     postgresql.conf           - postgresql settings
-//!     spec.json                 - passed to `compute_ctl`
-//!     pgdata/
-//!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
-//!         zenith.signal
-//!         <other PostgreSQL files>
-//! ```
-//!
-use std::collections::BTreeMap;
-use std::net::SocketAddr;
-use std::net::TcpStream;
-use std::path::PathBuf;
-use std::process::Command;
-use std::sync::Arc;
-use std::time::Duration;
-
-use anyhow::{anyhow, bail, Context, Result};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-use utils::{
-    id::{NodeId, TenantId, TimelineId},
-    lsn::Lsn,
-};
-
-use crate::local_env::LocalEnv;
-use crate::pageserver::PageServerNode;
-use crate::postgresql_conf::PostgresConf;
-
-use compute_api::responses::{ComputeState, ComputeStatus};
-use compute_api::spec::ComputeSpecV2;
-
-// contents of a endpoint.json file
-#[serde_as]
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-pub struct EndpointConf {
-    endpoint_id: String,
-    #[serde_as(as = "DisplayFromStr")]
-    tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    timeline_id: TimelineId,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    lsn: Option<Lsn>,
-    pg_port: u16,
-    http_port: u16,
-    pg_version: u32,
-}
-
-//
-// ComputeControlPlane
-//
-pub struct ComputeControlPlane {
-    base_port: u16,
-
-    // endpoint ID is the key
-    pub endpoints: BTreeMap<String, Arc<Endpoint>>,
-
-    env: LocalEnv,
-    pageserver: Arc<PageServerNode>,
-}
-
-impl ComputeControlPlane {
-    // Load current endpoints from the endpoints/ subdirectories
-    pub fn load(env: LocalEnv) -> Result<ComputeControlPlane> {
-        let pageserver = Arc::new(PageServerNode::from_env(&env));
-
-        let mut endpoints = BTreeMap::default();
-        for endpoint_dir in std::fs::read_dir(env.endpoints_path())
-            .with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
-        {
-            let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
-            endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
-        }
-
-        Ok(ComputeControlPlane {
-            base_port: 55431,
-            endpoints,
-            env,
-            pageserver,
-        })
-    }
-
-    fn get_port(&mut self) -> u16 {
-        1 + self
-            .endpoints
-            .values()
-            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
-            .max()
-            .unwrap_or(self.base_port)
-    }
-
-    #[allow(clippy::too_many_arguments)]
-    pub fn new_endpoint(
-        &mut self,
-        endpoint_id: &str,
-        tenant_id: TenantId,
-        timeline_id: TimelineId,
-        lsn: Option<Lsn>,
-        pg_port: Option<u16>,
-        http_port: Option<u16>,
-        pg_version: u32,
-    ) -> Result<Arc<Endpoint>> {
-        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
-        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
-        let ep = Arc::new(Endpoint {
-            endpoint_id: endpoint_id.to_owned(),
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
-            env: self.env.clone(),
-            pageserver: Arc::clone(&self.pageserver),
-            timeline_id,
-            lsn,
-            tenant_id,
-            pg_version,
-        });
-
-        ep.create_endpoint_dir()?;
-        std::fs::write(
-            ep.endpoint_path().join("endpoint.json"),
-            serde_json::to_string_pretty(&EndpointConf {
-                endpoint_id: endpoint_id.to_string(),
-                tenant_id,
-                timeline_id,
-                lsn,
-                http_port,
-                pg_port,
-                pg_version,
-            })?,
-        )?;
-        std::fs::write(
-            ep.endpoint_path().join("postgresql.conf"),
-            ep.setup_pg_conf()?.to_string(),
-        )?;
-
-        self.endpoints
-            .insert(ep.endpoint_id.clone(), Arc::clone(&ep));
-
-        Ok(ep)
-    }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-
-#[derive(Debug)]
-pub struct Endpoint {
-    /// used as the directory name
-    endpoint_id: String,
-    pub tenant_id: TenantId,
-    pub timeline_id: TimelineId,
-    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
-    pub lsn: Option<Lsn>,
-
-    // port and address of the Postgres server and `compute_ctl`'s HTTP API
-    pub pg_address: SocketAddr,
-    pub http_address: SocketAddr,
-    pg_version: u32,
-
-    // These are not part of the endpoint as such, but the environment
-    // the endpoint runs in.
-    pub env: LocalEnv,
-    pageserver: Arc<PageServerNode>,
-}
-
-impl Endpoint {
-    fn from_dir_entry(
-        entry: std::fs::DirEntry,
-        env: &LocalEnv,
-        pageserver: &Arc<PageServerNode>,
-    ) -> Result<Endpoint> {
-        if !entry.file_type()?.is_dir() {
-            anyhow::bail!(
-                "Endpoint::from_dir_entry failed: '{}' is not a directory",
-                entry.path().display()
-            );
-        }
-
-        // parse data directory name
-        let fname = entry.file_name();
-        let endpoint_id = fname.to_str().unwrap().to_string();
-
-        // Read the endpoint.json file
-        let conf: EndpointConf =
-            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
-
-        Ok(Endpoint {
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
-            endpoint_id,
-            env: env.clone(),
-            pageserver: Arc::clone(pageserver),
-            timeline_id: conf.timeline_id,
-            lsn: conf.lsn,
-            tenant_id: conf.tenant_id,
-            pg_version: conf.pg_version,
-        })
-    }
-
-    fn create_endpoint_dir(&self) -> Result<()> {
-        std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
-            format!(
-                "could not create endpoint directory {}",
-                self.endpoint_path().display()
-            )
-        })
-    }
-
-    // Generate postgresql.conf with default configuration
-    fn setup_pg_conf(&self) -> Result<PostgresConf> {
-        let mut conf = PostgresConf::new();
-        conf.append("max_wal_senders", "10");
-        conf.append("wal_log_hints", "off");
-        conf.append("max_replication_slots", "10");
-        conf.append("hot_standby", "on");
-        conf.append("shared_buffers", "1MB");
-        conf.append("fsync", "off");
-        conf.append("max_connections", "100");
-        conf.append("wal_level", "replica");
-        // wal_sender_timeout is the maximum time to wait for WAL replication.
-        // It also defines how often the walreciever will send a feedback message to the wal sender.
-        conf.append("wal_sender_timeout", "5s");
-        conf.append("listen_addresses", &self.pg_address.ip().to_string());
-        conf.append("port", &self.pg_address.port().to_string());
-        conf.append("wal_keep_size", "0");
-        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
-        conf.append("restart_after_crash", "off");
-
-        // Load the 'neon' extension
-        conf.append("shared_preload_libraries", "neon");
-        conf.append_line("");
-
-        // Configure backpressure
-        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
-        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
-        //   so to avoid expiration of 1 minute timeout, this lag should not be larger than 600MB.
-        //   Actually latency should be much smaller (better if < 1sec). But we assume that recently
-        //   updates pages are not requested from pageserver.
-        // - Replication flush lag depends on speed of persisting data by checkpointer (creation of
-        //   delta/image layers) and advancing disk_consistent_lsn. Safekeepers are able to
-        //   remove/archive WAL only beyond disk_consistent_lsn. Too large a lag can cause long
-        //   recovery time (in case of pageserver crash) and disk space overflow at safekeepers.
-        // - Replication apply lag depends on speed of uploading changes to S3 by uploader thread.
-        //   To be able to restore database in case of pageserver node crash, safekeeper should not
-        //   remove WAL beyond this point. Too large lag can cause space exhaustion in safekeepers
-        //   (if they are not able to upload WAL to S3).
-        conf.append("max_replication_write_lag", "15MB");
-        conf.append("max_replication_flush_lag", "10GB");
-
-        if !self.env.safekeepers.is_empty() {
-            // Configure Postgres to connect to the safekeepers
-            conf.append("synchronous_standby_names", "walproposer");
-        } else {
-            // We only use setup without safekeepers for tests,
-            // and don't care about data durability on pageserver,
-            // so set more relaxed synchronous_commit.
-            conf.append("synchronous_commit", "remote_write");
-
-            // Configure the node to stream WAL directly to the pageserver
-            // This isn't really a supported configuration, but can be useful for
-            // testing.
-            conf.append("synchronous_standby_names", "pageserver");
-        }
-
-        Ok(conf)
-    }
-
-    pub fn endpoint_path(&self) -> PathBuf {
-        self.env.endpoints_path().join(&self.endpoint_id)
-    }
-
-    pub fn pgdata(&self) -> PathBuf {
-        self.endpoint_path().join("pgdata")
-    }
-
-    pub fn status(&self) -> &str {
-        let timeout = Duration::from_millis(300);
-        let has_pidfile = self.pgdata().join("postmaster.pid").exists();
-        let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();
-
-        match (has_pidfile, can_connect) {
-            (true, true) => "running",
-            (false, false) => "stopped",
-            (true, false) => "crashed",
-            (false, true) => "running, no pidfile",
-        }
-    }
-
-    fn pg_ctl(&self, args: &[&str], auth_token: &Option<String>) -> Result<()> {
-        let pg_ctl_path = self.env.pg_bin_dir(self.pg_version)?.join("pg_ctl");
-        let mut cmd = Command::new(&pg_ctl_path);
-        cmd.args(
-            [
-                &[
-                    "-D",
-                    self.pgdata().to_str().unwrap(),
-                    "-l", // FIXME: does this make sense when we don't use pg_ctl start ?
-                    self.endpoint_path().join("pg.log").to_str().unwrap(),
-                    "-w", //wait till pg_ctl actually does what was asked
-                ],
-                args,
-            ]
-            .concat(),
-        )
-        .env_clear()
-        .env(
-            "LD_LIBRARY_PATH",
-            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
-        )
-        .env(
-            "DYLD_LIBRARY_PATH",
-            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
-        );
-
-        // Pass authentication token used for the connections to pageserver and safekeepers
-        if let Some(token) = auth_token {
-            cmd.env("NEON_AUTH_TOKEN", token);
-        }
-
-        let pg_ctl = cmd
-            .output()
-            .context(format!("{} failed", pg_ctl_path.display()))?;
-        if !pg_ctl.status.success() {
-            anyhow::bail!(
-                "pg_ctl failed, exit code: {}, stdout: {}, stderr: {}",
-                pg_ctl.status,
-                String::from_utf8_lossy(&pg_ctl.stdout),
-                String::from_utf8_lossy(&pg_ctl.stderr),
-            );
-        }
-        Ok(())
-    }
-
-    pub fn start(&self, auth_token: &Option<String>, safekeepers: Vec<NodeId>) -> Result<()> {
-        if self.status() == "running" {
-            anyhow::bail!("The endpoint is already running");
-        }
-
-        // Slurp the endpoints/<endpoint id>/postgresql.conf file into
-        // memory. We will include it in the spec file that we pass to
-        // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
-        // in the data directory.
-        let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
-        let postgresql_conf = match std::fs::read(&postgresql_conf_path) {
-            Ok(content) => String::from_utf8(content)?,
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => "".to_string(),
-            Err(e) => {
-                return Err(anyhow::Error::new(e).context(format!(
-                    "failed to read config file in {}",
-                    postgresql_conf_path.to_str().unwrap()
-                )))
-            }
-        };
-
-        // We always start the compute node from scratch, so if the Postgres
-        // data dir exists from a previous launch, remove it first.
-        if self.pgdata().exists() {
-            std::fs::remove_dir_all(self.pgdata())?;
-        }
-
-        let pageserver_connstring = {
-            let config = &self.pageserver.pg_connection_config;
-            let (host, port) = (config.host(), config.port());
-
-            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
-            format!("postgresql://no_user@{host}:{port}")
-        };
-        let mut safekeeper_connstrings = Vec::new();
-        for sk_id in safekeepers {
-            let sk = self
-                .env
-                .safekeepers
-                .iter()
-                .find(|node| node.id == sk_id)
-                .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
-            safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.pg_port));
-        }
-
-        // Create spec file
-        let spec = ComputeSpecV2 {
-            format_version: 2,
-
-            project_id: None,
-            endpoint_id: Some(self.endpoint_id.clone()),
-            operation_uuid: None,
-
-            startup_tracing_context: None,
-
-            tenant_id: self.tenant_id,
-            timeline_id: self.timeline_id,
-            lsn: self.lsn,
-            pageserver_connstring,
-            safekeeper_connstrings,
-            storage_auth_token: auth_token.clone(),
-
-            postgresql_conf: Some(postgresql_conf),
-            settings: None,
-
-            roles: vec![],
-            databases: vec![],
-            extensions: vec![],
-            delta_operations: None,
-        };
-        let spec_path = self.endpoint_path().join("spec.json");
-        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
-
-        // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
-        let logfile = std::fs::OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(self.endpoint_path().join("compute.log"))?;
-
-        // Launch compute_ctl
-        println!("Starting postgres node at '{}'", self.connstr());
-        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
-        cmd.args(["--http-port", &self.http_address.port().to_string()])
-            .args(["--pgdata", self.pgdata().to_str().unwrap()])
-            .args(["--connstr", &self.connstr()])
-            .args([
-                "--spec-path",
-                self.endpoint_path().join("spec.json").to_str().unwrap(),
-            ])
-            .args([
-                "--pgbin",
-                self.env
-                    .pg_bin_dir(self.pg_version)?
-                    .join("postgres")
-                    .to_str()
-                    .unwrap(),
-            ])
-            .stdin(std::process::Stdio::null())
-            .stderr(logfile.try_clone()?)
-            .stdout(logfile);
-        let _child = cmd.spawn()?;
-
-        // Wait for it to start
-        let mut attempt = 0;
-        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
-        const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
-        loop {
-            attempt += 1;
-            match self.get_status() {
-                Ok(state) => {
-                    match state.status {
-                        ComputeStatus::Init => {
-                            if attempt == MAX_ATTEMPTS {
-                                bail!("compute startup timed out; still in Init state");
-                            }
-                            // keep retrying
-                        }
-                        ComputeStatus::Running => {
-                            // All good!
-                            break;
-                        }
-                        ComputeStatus::Failed => {
-                            bail!(
-                                "compute startup failed: {}",
-                                state
-                                    .error
-                                    .as_deref()
-                                    .unwrap_or("<no error from compute_ctl>")
-                            );
-                        }
-                        ComputeStatus::Empty | ComputeStatus::ConfigurationPending => {
-                            bail!("unexpected compute status: {:?}", state.status)
-                        }
-                    }
-                }
-                Err(e) => {
-                    if attempt == MAX_ATTEMPTS {
-                        return Err(e).context(
-                            "timed out waiting to connect to compute_ctl HTTP; last error: {e}",
-                        );
-                    }
-                }
-            }
-            std::thread::sleep(ATTEMPT_INTERVAL);
-        }
-
-        Ok(())
-    }
-
-    // Call the /status HTTP API
-    pub fn get_status(&self) -> Result<ComputeState> {
-        let client = reqwest::blocking::Client::new();
-
-        let response = client
-            .request(
-                reqwest::Method::GET,
-                format!(
-                    "http://{}:{}/status",
-                    self.http_address.ip(),
-                    self.http_address.port()
-                ),
-            )
-            .send()?;
-
-        // Interpret the response
-        let status = response.status();
-        if !(status.is_client_error() || status.is_server_error()) {
-            Ok(response.json()?)
-        } else {
-            // reqwest does not export its error construction utility functions, so let's craft the message ourselves
-            let url = response.url().to_owned();
-            let msg = match response.text() {
-                Ok(err_body) => format!("Error: {}", err_body),
-                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
-            };
-            Err(anyhow::anyhow!(msg))
-        }
-    }
-
-    pub fn stop(&self, destroy: bool) -> Result<()> {
-        // If we are going to destroy data directory,
-        // use immediate shutdown mode, otherwise,
-        // shutdown gracefully to leave the data directory sane.
-        //
-        // Postgres is always started from scratch, so stop
-        // without destroy only used for testing and debugging.
-        //
-        if destroy {
-            self.pg_ctl(&["-m", "immediate", "stop"], &None)?;
-            println!(
-                "Destroying postgres data directory '{}'",
-                self.pgdata().to_str().unwrap()
-            );
-            std::fs::remove_dir_all(self.endpoint_path())?;
-        } else {
-            self.pg_ctl(&["stop"], &None)?;
-        }
-        Ok(())
-    }
-
-    pub fn connstr(&self) -> String {
-        format!(
-            "postgresql://{}@{}:{}/{}",
-            "cloud_admin",
-            self.pg_address.ip(),
-            self.pg_address.port(),
-            "postgres"
-        )
-    }
-}
--- a/control_plane/src/lib.rs
+++ b/control_plane/src/lib.rs
@@ -9,7 +9,7 @@

 mod background_process;
 pub mod broker;
-pub mod endpoint;
+pub mod compute;
 pub mod local_env;
 pub mod pageserver;
 pub mod postgresql_conf;
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -37,7 +37,7 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
-    // compute endpoints).
+    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
    // config file itself is. It is read from the NEON_REPO_DIR env variable or
@@ -200,8 +200,14 @@ impl LocalEnv {
        self.neon_distrib_dir.join("storage_broker")
    }

-    pub fn endpoints_path(&self) -> PathBuf {
-        self.base_data_dir.join("endpoints")
+    pub fn pg_data_dirs_path(&self) -> PathBuf {
+        self.base_data_dir.join("pgdatadirs").join("tenants")
+    }
+
+    pub fn pg_data_dir(&self, tenant_id: &TenantId, branch_name: &str) -> PathBuf {
+        self.pg_data_dirs_path()
+            .join(tenant_id.to_string())
+            .join(branch_name)
    }

    // TODO: move pageserver files into ./pageserver
@@ -421,7 +427,7 @@ impl LocalEnv {
            }
        }

-        fs::create_dir_all(self.endpoints_path())?;
+        fs::create_dir_all(self.pg_data_dirs_path())?;

        for safekeeper in &self.safekeepers {
            fs::create_dir_all(SafekeeperNode::datadir_path_by_id(self, safekeeper.id))?;
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,9 +1,3 @@
-//! Code to manage pageservers
-//!
-//! In the local test environment, the pageserver stores its data directly in
-//!
-//!   .neon/
-//!
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fs::File;
@@ -369,11 +363,6 @@ impl PageServerNode {
                .map(|x| serde_json::from_str(x))
                .transpose()
                .context("Failed to parse 'eviction_policy' json")?,
-            min_resident_size_override: settings
-                .remove("min_resident_size_override")
-                .map(|x| x.parse::<u64>())
-                .transpose()
-                .context("Failed to parse 'min_resident_size_override' as integer")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -446,11 +435,6 @@ impl PageServerNode {
                    .map(|x| serde_json::from_str(x))
                    .transpose()
                    .context("Failed to parse 'eviction_policy' json")?,
-                min_resident_size_override: settings
-                    .get("min_resident_size_override")
-                    .map(|x| x.parse::<u64>())
-                    .transpose()
-                    .context("Failed to parse 'min_resident_size_override' as an integer")?,
            })
            .send()?
            .error_from_body()?;
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,9 +1,3 @@
-//! Code to manage safekeepers
-//!
-//! In the local test environment, the data for each safekeeper is stored in
-//!
-//!   .neon/safekeepers/<safekeeper id>
-//!
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::Child;
@@ -162,7 +156,7 @@ impl SafekeeperNode {
        }

        background_process::start_process(
-            &format!("safekeeper-{id}"),
+            &format!("safekeeper {id}"),
            &datadir,
            &self.env.safekeeper_bin(),
            &args,
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -1,15 +0,0 @@
-[package]
-name = "compute_api"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-anyhow.workspace = true
-chrono.workspace = true
-serde.workspace = true
-serde_with.workspace = true
-serde_json.workspace = true
-utils.workspace = true
-
-workspace_hack.workspace = true
--- a/libs/compute_api/src/lib.rs
+++ b/libs/compute_api/src/lib.rs
@@ -1,3 +0,0 @@
-pub mod requests;
-pub mod responses;
-pub mod spec;
--- a/libs/compute_api/src/models.rs
+++ b/libs/compute_api/src/models.rs
@@ -1,32 +0,0 @@
-//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-use crate::rfc3339_serialize;
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-
-/// Response of the /status API
-#[derive(Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
-    pub error: Option<String>,
-}
-
-#[derive(Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
-#[serde(rename_all = "snake_case")]
-pub enum ComputeStatus {
-    Init,
-    Running,
-    Failed,
-}
-
-/// Response of the /metrics.json API
-#[derive(Clone, Default, Serialize)]
-pub struct ComputeMetrics {
-    pub sync_safekeepers_ms: u64,
-    pub basebackup_ms: u64,
-    pub config_ms: u64,
-    pub total_startup_ms: u64,
-}
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,14 +0,0 @@
-//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-
-use crate::spec::ComputeSpecAnyVersion;
-use serde::Deserialize;
-
-/// Request of the /configure API
-///
-/// We now pass only `spec` in the configuration request, but later we can
-/// extend it and something like `restart: bool` or something else. So put
-/// `spec` into a struct initially to be more flexible in the future.
-#[derive(Deserialize, Debug)]
-pub struct ConfigurationRequest {
-    pub spec: ComputeSpecAnyVersion,
-}
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -1,66 +0,0 @@
-//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize, Serializer};
-
-#[derive(Serialize, Debug, Deserialize)]
-pub struct GenericAPIError {
-    pub error: String,
-}
-
-/// Response of the /status API
-#[derive(Serialize, Debug, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeStatusResponse {
-    pub tenant: Option<String>,
-    pub timeline: Option<String>,
-    pub status: ComputeStatus,
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
-    pub error: Option<String>,
-}
-
-#[derive(Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
-    pub error: Option<String>,
-}
-
-#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
-#[serde(rename_all = "snake_case")]
-pub enum ComputeStatus {
-    // Spec wasn't provided at start, waiting for it to be
-    // provided by control-plane.
-    Empty,
-    // Compute configuration was requested.
-    ConfigurationPending,
-    // Compute node has spec and initial startup and
-    // configuration is in progress.
-    Init,
-    // Compute is configured and running.
-    Running,
-    // Either startup or configuration failed,
-    // compute will exit soon or is waiting for
-    // control-plane to terminate it.
-    Failed,
-}
-
-fn rfc3339_serialize<S>(x: &DateTime<Utc>, s: S) -> Result<S::Ok, S::Error>
-where
-    S: Serializer,
-{
-    x.to_rfc3339().serialize(s)
-}
-
-/// Response of the /metrics.json API
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct ComputeMetrics {
-    pub sync_safekeepers_ms: u64,
-    pub basebackup_ms: u64,
-    pub config_ms: u64,
-    pub total_startup_ms: u64,
-}
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -1,321 +0,0 @@
-//! `ComputeSpec` represents the contents of the spec.json file.
-//!
-//! The spec.json file is used to pass information to 'compute_ctl'. It contains
-//! all the information needed to start up the right version of PostgreSQL,
-//! and connect it to the storage nodes.
-use anyhow::anyhow;
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
-use std::collections::HashMap;
-use std::str::FromStr;
-use utils::id::{TenantId, TimelineId};
-use utils::lsn::Lsn;
-
-/// String type alias representing Postgres identifier and
-/// intended to be used for DB / role names.
-pub type PgIdent = String;
-
-/// Cluster spec or configuration represented as an optional number of
-/// delta operations + final cluster state description.
-#[serde_as]
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct ComputeSpecV2 {
-    pub format_version: u64,
-
-    // For debugging purposes only
-    pub project_id: Option<String>,
-    pub endpoint_id: Option<String>,
-    pub operation_uuid: Option<String>,
-
-    /// W3C trace context of the launch operation, for OpenTelemetry tracing
-    pub startup_tracing_context: Option<HashMap<String, String>>,
-
-    // Information needed to connect to the storage layer.
-    //
-    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
-    //
-    // If Lsn == None, this is a primary endpoint that continues writing WAL at
-    // the end of the timeline. If 'lsn' is set, this is a read-only node
-    // "anchored" at that LSN. 'safekeeper_connstrings' must be non-empty for a
-    // primary.
-    #[serde_as(as = "DisplayFromStr")]
-    pub tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    pub timeline_id: TimelineId,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub lsn: Option<Lsn>,
-    pub pageserver_connstring: String,
-    pub safekeeper_connstrings: Vec<String>,
-
-    /// If set, 'storage_auth_token' is used as the password to authenticate to
-    /// the pageserver and safekeepers.
-    pub storage_auth_token: Option<String>,
-
-    /// Contents of postgresql.conf file
-    pub postgresql_conf: Option<String>,
-
-    /// Extra settings to append to the postgresql.conf
-    pub settings: GenericOptions,
-
-    // Expected cluster state at the end of transition process.
-    pub roles: Vec<Role>,
-    pub databases: Vec<Database>,
-    pub extensions: Vec<PgIdent>,
-    pub delta_operations: Option<Vec<DeltaOp>>,
-}
-
-#[derive(Deserialize)]
-struct FormatVersionOnly {
-    format_version: u64,
-}
-
-impl TryFrom<ComputeSpecAnyVersion> for ComputeSpecV2 {
-    type Error = anyhow::Error;
-
-    fn try_from(input: ComputeSpecAnyVersion) -> Result<ComputeSpecV2, anyhow::Error> {
-        // First check the 'format_version' field
-        match serde_json::from_value::<FormatVersionOnly>(input.0.clone())?.format_version {
-            1 => {
-                let v1: ComputeSpecV1 = serde_json::from_value(input.0)?;
-
-                ComputeSpecV2::upgrade_from_v1(v1)
-            }
-            2 => {
-                let v2: ComputeSpecV2 = serde_json::from_value(input.0)?;
-                Ok(v2)
-            }
-            other => Err(anyhow::anyhow!(
-                "unexpected format version {other} in spec file"
-            )),
-        }
-    }
-}
-
-impl ComputeSpecV2 {
-    pub fn parse_and_upgrade(input: &str) -> anyhow::Result<ComputeSpecV2> {
-        ComputeSpecV2::try_from(ComputeSpecAnyVersion(serde_json::from_str::<
-            serde_json::Value,
-        >(input)?))
-    }
-
-    pub fn upgrade_from_v1(spec_v1: ComputeSpecV1) -> anyhow::Result<ComputeSpecV2> {
-        let mut tenant_id = None;
-        let mut timeline_id = None;
-        let mut pageserver_connstring = None;
-        let mut safekeeper_connstrings: Vec<String> = Vec::new();
-
-        let mut extensions: Vec<String> = Vec::new();
-
-        let mut settings: Vec<GenericOption> = Vec::new();
-        for setting in &spec_v1.cluster.settings {
-            if let Some(value) = &setting.value {
-                match setting.name.as_str() {
-                    "neon.tenant_id" => {
-                        tenant_id = Some(TenantId::from_str(value)?);
-                    }
-                    "neon.timeline_id" => {
-                        timeline_id = Some(TimelineId::from_str(value)?);
-                    }
-                    "neon.pageserver_connstring" => {
-                        pageserver_connstring = Some(value.clone());
-                    }
-                    "neon.safekeepers" => {
-                        // neon.safekeepers is a comma-separated list of poestgres connection URLs
-                        safekeeper_connstrings =
-                            value.split(',').map(|s| s.trim().to_string()).collect();
-                    }
-                    "shared_preload_libraries" => {
-                        if value.contains("pg_stat_statements") {
-                            extensions.push("pg_stat_statements".to_string());
-                        }
-                        settings.push(setting.clone())
-                    }
-                    _ => settings.push(setting.clone()),
-                }
-            } else {
-                settings.push(setting.clone())
-            }
-        }
-        let tenant_id =
-            tenant_id.ok_or_else(|| anyhow!("neon.tenant_id missing from spec file"))?;
-        let timeline_id =
-            timeline_id.ok_or_else(|| anyhow!("neon.timeline_id missing from spec file"))?;
-        let pageserver_connstring = pageserver_connstring
-            .ok_or_else(|| anyhow!("neon.pageserver_connstring missing from spec file"))?;
-
-        Ok(ComputeSpecV2 {
-            format_version: 2,
-
-            project_id: Some(spec_v1.cluster.cluster_id),
-            endpoint_id: Some(spec_v1.cluster.name),
-            operation_uuid: spec_v1.operation_uuid,
-
-            startup_tracing_context: spec_v1.startup_tracing_context,
-
-            tenant_id,
-            timeline_id,
-            lsn: None, // Not supported in V1
-            pageserver_connstring,
-            safekeeper_connstrings,
-
-            storage_auth_token: spec_v1.storage_auth_token,
-
-            postgresql_conf: None,
-            settings: Some(settings),
-
-            roles: spec_v1.cluster.roles,
-            databases: spec_v1.cluster.databases,
-            extensions,
-            delta_operations: spec_v1.delta_operations,
-        })
-    }
-}
-
-#[serde_as]
-#[derive(Deserialize, Debug)]
-pub struct ComputeSpecAnyVersion(pub serde_json::Value);
-
-// Old format that didn't have explicit 'tenant_id', 'timeline_id, 'pageserver_connstring'
-// and 'safekeeper_connstrings' fields. They were stored in as GUCS in the 'cluster.settings'
-// list
-#[serde_as]
-#[derive(Clone, Deserialize, Serialize)]
-pub struct ComputeSpecV1 {
-    pub format_version: u64,
-
-    // The control plane also includes a 'timestamp' field in the JSON document,
-    // but we don't use it for anything. Serde will ignore missing fields when
-    // deserializing it.
-    pub operation_uuid: Option<String>,
-    pub cluster: ClusterV1,
-    pub delta_operations: Option<Vec<DeltaOp>>,
-    pub storage_auth_token: Option<String>,
-
-    pub startup_tracing_context: Option<HashMap<String, String>>,
-}
-
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct ClusterV1 {
-    pub cluster_id: String,
-    pub name: String,
-    pub state: Option<String>,
-    pub roles: Vec<Role>,
-    pub databases: Vec<Database>,
-    pub settings: Vec<GenericOption>,
-}
-
-/// Single cluster state changing operation that could not be represented as
-/// a static `Cluster` structure. For example:
-/// - DROP DATABASE
-/// - DROP ROLE
-/// - ALTER ROLE name RENAME TO new_name
-/// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct DeltaOp {
-    pub action: String,
-    pub name: PgIdent,
-    pub new_name: Option<PgIdent>,
-}
-
-/// Rust representation of Postgres role info with only those fields
-/// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct Role {
-    pub name: PgIdent,
-    pub encrypted_password: Option<String>,
-    pub options: GenericOptions,
-}
-
-/// Rust representation of Postgres database info with only those fields
-/// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct Database {
-    pub name: PgIdent,
-    pub owner: PgIdent,
-    pub options: GenericOptions,
-}
-
-/// Common type representing both SQL statement params with or without value,
-/// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
-/// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
-pub struct GenericOption {
-    pub name: String,
-    pub value: Option<String>,
-    pub vartype: String,
-}
-
-/// Optional collection of `GenericOption`'s. Type alias allows us to
-/// declare a `trait` on it.
-pub type GenericOptions = Option<Vec<GenericOption>>;
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_upgrade_v1_to_v2() -> anyhow::Result<()> {
-        let spec_v1_str = std::fs::read_to_string("tests/spec-v1.json").unwrap();
-        let spec_v2 = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
-
-        // The original V1 file contains also neon.tenant_id, neon.timeline_id,
-        // neon.pageserver_connstring and neon.safekeepers. They are put to exclicit
-        // fields at the top level in V2.
-        assert_eq!(
-            spec_v2.tenant_id,
-            TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?
-        );
-        assert_eq!(
-            spec_v2.timeline_id,
-            TimelineId::from_str("7f2aff2a1042b93a2617f44851638422")?
-        );
-        assert_eq!(spec_v2.pageserver_connstring, "host=172.30.42.12 port=6400");
-        assert_eq!(
-            spec_v2.safekeeper_connstrings,
-            vec![
-                "172.30.42.23:6500",
-                "172.30.42.22:6500",
-                "172.30.42.21:6500"
-            ]
-        );
-
-        fn opt(name: &str, value: &str, vartype: &str) -> GenericOption {
-            GenericOption {
-                name: name.to_string(),
-                value: Some(value.to_string()),
-                vartype: vartype.to_string(),
-            }
-        }
-
-        assert_eq!(spec_v2.postgresql_conf, None);
-        assert_eq!(
-            spec_v2.settings.as_ref().unwrap(),
-            &vec![
-                opt("max_replication_write_lag", "500", "integer"),
-                opt("restart_after_crash", "off", "bool"),
-                opt("password_encryption", "md5", "enum"),
-                opt(
-                    "shared_preload_libraries",
-                    "neon, pg_stat_statements",
-                    "string"
-                ),
-                opt("synchronous_standby_names", "walproposer", "string"),
-                opt("wal_level", "replica", "enum"),
-                opt("listen_addresses", "0.0.0.0", "string"),
-                opt("neon.max_cluster_size", "10240", "integer"),
-                opt("shared_buffers", "65536", "integer"),
-                opt(
-                    "test.escaping",
-                    r#"here's a backslash \ and a quote ' and a double-quote " hooray"#,
-                    "string"
-                ),
-            ]
-        );
-
-        assert_eq!(spec_v2.extensions, vec!["pg_stat_statements"]);
-
-        eprintln!("SPEC: {}", serde_json::to_string_pretty(&spec_v2)?);
-
-        Ok(())
-    }
-}
--- a/libs/compute_api/tests/spec-v1.json
+++ b/libs/compute_api/tests/spec-v1.json
@@ -1,175 +0,0 @@
-{
-  "cluster": {
-    "cluster_id": "young-snowflake-871338",
-    "name": "young-snowflake-871338",
-    "settings": [
-      {
-        "name": "max_replication_write_lag",
-        "value": "500",
-        "vartype": "integer"
-      },
-      {
-        "name": "neon.pageserver_connstring",
-        "value": "host=172.30.42.12 port=6400",
-        "vartype": "string"
-      },
-      {
-        "name": "restart_after_crash",
-        "value": "off",
-        "vartype": "bool"
-      },
-      {
-        "name": "password_encryption",
-        "value": "md5",
-        "vartype": "enum"
-      },
-      {
-        "name": "shared_preload_libraries",
-        "value": "neon, pg_stat_statements",
-        "vartype": "string"
-      },
-      {
-        "name": "synchronous_standby_names",
-        "value": "walproposer",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.tenant_id",
-        "value": "3d1f7595b468230304e0b73cecbcb081",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.timeline_id",
-        "value": "7f2aff2a1042b93a2617f44851638422",
-        "vartype": "string"
-      },
-      {
-        "name": "wal_level",
-        "value": "replica",
-        "vartype": "enum"
-      },
-      {
-        "name": "listen_addresses",
-        "value": "0.0.0.0",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.safekeepers",
-        "value": "172.30.42.23:6500,172.30.42.22:6500,172.30.42.21:6500",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.max_cluster_size",
-        "value": "10240",
-        "vartype": "integer"
-      },
-      {
-        "name": "shared_buffers",
-        "value": "65536",
-        "vartype": "integer"
-      },
-      {
-        "name": "test.escaping",
-        "value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
-        "vartype": "string"
-      }
-    ],
-    "roles": [
-      {
-        "name": "postgres",
-        "encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
-        "options": null
-      },
-      {
-        "name": "testuser",
-        "encrypted_password": "SCRAM-SHA-256$4096:R4V8wIc+aH8T7vy3weC5qg==$aXXM6IQKnEWsRgeyjbxydif6f29LZOGvAWe/oOnuXSM=:5IE7U/woZLZbYSYOJ3v4x3qlLOXS6xcsdJYnMdVkzQY=",
-        "options": null
-      },
-      {
-        "name": "alexk",
-        "encrypted_password": null,
-        "options": null
-      },
-      {
-        "name": "neon \"new\"",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
-        "options": null
-      },
-      {
-        "name": "bar",
-        "encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
-      },
-      {
-        "name": "\"name\";\\n select 1;",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
-      },
-      {
-        "name": "MyRole",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
-      }
-    ],
-    "databases": [
-      {
-        "name": "DB2",
-        "owner": "alexk",
-        "options": [
-          {
-            "name": "LC_COLLATE",
-            "value": "C",
-            "vartype": "string"
-          },
-          {
-            "name": "LC_CTYPE",
-            "value": "C",
-            "vartype": "string"
-          },
-          {
-            "name": "TEMPLATE",
-            "value": "template0",
-            "vartype": "enum"
-          }
-        ]
-      },
-      {
-        "name": "neondb",
-        "owner": "testuser",
-        "options": null
-      },
-      {
-        "name": "mydb",
-        "owner": "MyRole"
-      },
-      {
-        "name": "foo",
-        "owner": "bar"
-      }
-    ]
-  },
-  "delta_operations": [
-    {
-      "action": "delete_db",
-      "name": "neon_test"
-    },
-    {
-      "action": "rename_db",
-      "name": "DB",
-      "new_name": "DB2"
-    },
-    {
-      "action": "delete_role",
-      "name": "neon2"
-    },
-    {
-      "action": "rename_role",
-      "name": "neon new",
-      "new_name": "neon \"new\""
-    }
-  ],
-  "format_version": 1,
-  "operation_uuid": "73c843c3-46dd-496f-b819-e6c5a190f584",
-  "timestamp": "2023-03-25T21:36:16.729366596Z",
-  "storage_auth_token": "dummy",
-  "startup_tracing_context": {
-    "traceparent": "00-1b79dca0e798ee42961cd13990326551-5e0222e8d7314785-01"
-  }
-}
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -120,7 +120,6 @@ pub struct TenantCreateRequest {
    // We might do that once the eviction feature has stabilizied.
    // For now, this field is not even documented in the openapi_spec.yml.
    pub eviction_policy: Option<serde_json::Value>,
-    pub min_resident_size_override: Option<u64>,
 }

 #[serde_as]
@@ -166,7 +165,6 @@ pub struct TenantConfigRequest {
    // We might do that once the eviction feature has stabilizied.
    // For now, this field is not even documented in the openapi_spec.yml.
    pub eviction_policy: Option<serde_json::Value>,
-    pub min_resident_size_override: Option<u64>,
 }

 impl TenantConfigRequest {
@@ -187,7 +185,6 @@ impl TenantConfigRequest {
            max_lsn_wal_lag: None,
            trace_read_requests: None,
            eviction_policy: None,
-            min_resident_size_override: None,
        }
    }
 }
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -54,7 +54,7 @@ pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
        e.kind(),
-        ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
+        ConnectionRefused | ConnectionAborted | ConnectionReset
    )
 }

@@ -320,17 +320,9 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
        if let ProtoState::Closed = self.state {
            Ok(None)
        } else {
-            match self.framed.read_message().await {
-                Ok(m) => {
-                    trace!("read msg {:?}", m);
-                    Ok(m)
-                }
-                Err(e) => {
-                    // remember not to try to read anymore
-                    self.state = ProtoState::Closed;
-                    Err(e)
-                }
-            }
+            let m = self.framed.read_message().await?;
+            trace!("read msg {:?}", m);
+            Ok(m)
        }
    }

@@ -501,10 +493,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
            MaybeWriteOnly::Full(framed) => {
                let (reader, writer) = framed.split();
                self.framed = MaybeWriteOnly::WriteOnly(writer);
-                Ok(PostgresBackendReader {
-                    reader,
-                    closed: false,
-                })
+                Ok(PostgresBackendReader(reader))
            }
            MaybeWriteOnly::WriteOnly(_) => {
                anyhow::bail!("PostgresBackend is already split")
@@ -521,12 +510,8 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
                anyhow::bail!("PostgresBackend is not split")
            }
            MaybeWriteOnly::WriteOnly(writer) => {
-                let joined = Framed::unsplit(reader.reader, writer);
+                let joined = Framed::unsplit(reader.0, writer);
                self.framed = MaybeWriteOnly::Full(joined);
-                // if reader encountered connection error, do not attempt reading anymore
-                if reader.closed {
-                    self.state = ProtoState::Closed;
-                }
                Ok(())
            }
            MaybeWriteOnly::Broken => panic!("unsplit on framed in invalid state"),
@@ -812,25 +797,15 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackend<IO> {
    }
 }

-pub struct PostgresBackendReader<IO> {
-    reader: FramedReader<MaybeTlsStream<IO>>,
-    closed: bool, // true if received error closing the connection
-}
+pub struct PostgresBackendReader<IO>(FramedReader<MaybeTlsStream<IO>>);

 impl<IO: AsyncRead + AsyncWrite + Unpin> PostgresBackendReader<IO> {
    /// Read full message or return None if connection is cleanly closed with no
    /// unprocessed data.
    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        match self.reader.read_message().await {
-            Ok(m) => {
-                trace!("read msg {:?}", m);
-                Ok(m)
-            }
-            Err(e) => {
-                self.closed = true;
-                Err(e)
-            }
-        }
+        let m = self.0.read_message().await?;
+        trace!("read msg {:?}", m);
+        Ok(m)
    }

    /// Get CopyData contents of the next message in COPY stream or error
@@ -948,7 +923,7 @@ pub enum CopyStreamHandlerEnd {
    #[error("EOF on COPY stream")]
    EOF,
    /// The connection was lost
-    #[error("connection error: {0}")]
+    #[error(transparent)]
    Disconnected(#[from] ConnectionError),
    /// Some other error
    #[error(transparent)]
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -293,9 +293,6 @@ impl FeStartupPacket {
        // We shouldn't advance `buf` as probably full message is not there yet,
        // so can't directly use Bytes::get_u32 etc.
        let len = (&buf[0..4]).read_u32::<BigEndian>().unwrap() as usize;
-        // The proposed replacement is `!(4..=MAX_STARTUP_PACKET_LENGTH).contains(&len)`
-        // which is less readable
-        #[allow(clippy::manual_range_contains)]
        if len < 4 || len > MAX_STARTUP_PACKET_LENGTH {
            return Err(ProtocolError::Protocol(format!(
                "invalid startup packet message length {}",
@@ -939,40 +936,35 @@ impl<'a> BeMessage<'a> {
    }
 }

-/// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
-/// Serialized in custom flexible key/value format. In replication protocol, it
-/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
-/// Standby status update / Hot standby feedback messages.
+// Neon extension of postgres replication protocol
+// See NEON_STATUS_UPDATE_TAG_BYTE
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct PageserverFeedback {
-    /// Last known size of the timeline. Used to enforce timeline size limit.
+pub struct ReplicationFeedback {
+    // Last known size of the timeline. Used to enforce timeline size limit.
    pub current_timeline_size: u64,
-    /// LSN last received and ingested by the pageserver.
-    pub last_received_lsn: u64,
-    /// LSN up to which data is persisted by the pageserver to its local disc.
-    pub disk_consistent_lsn: u64,
-    /// LSN up to which data is persisted by the pageserver on s3; safekeepers
-    /// consider WAL before it can be removed.
-    pub remote_consistent_lsn: u64,
-    pub replytime: SystemTime,
+    // Parts of StandbyStatusUpdate we resend to compute via safekeeper
+    pub ps_writelsn: u64,
+    pub ps_applylsn: u64,
+    pub ps_flushlsn: u64,
+    pub ps_replytime: SystemTime,
 }

-// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback.
+// NOTE: Do not forget to increment this number when adding new fields to ReplicationFeedback.
 // Do not remove previously available fields because this might be backwards incompatible.
-pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+pub const REPLICATION_FEEDBACK_FIELDS_NUMBER: u8 = 5;

-impl PageserverFeedback {
-    pub fn empty() -> PageserverFeedback {
-        PageserverFeedback {
+impl ReplicationFeedback {
+    pub fn empty() -> ReplicationFeedback {
+        ReplicationFeedback {
            current_timeline_size: 0,
-            last_received_lsn: 0,
-            remote_consistent_lsn: 0,
-            disk_consistent_lsn: 0,
-            replytime: SystemTime::now(),
+            ps_writelsn: 0,
+            ps_applylsn: 0,
+            ps_flushlsn: 0,
+            ps_replytime: SystemTime::now(),
        }
    }

-    // Serialize PageserverFeedback using custom format
+    // Serialize ReplicationFeedback using custom format
    // to support protocol extensibility.
    //
    // Following layout is used:
@@ -982,26 +974,24 @@ impl PageserverFeedback {
    // null-terminated string - key,
    // uint32 - value length in bytes
    // value itself
-    //
-    // TODO: change serialized fields names once all computes migrate to rename.
    pub fn serialize(&self, buf: &mut BytesMut) {
-        buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        buf.put_slice(b"current_timeline_size\0");
        buf.put_i32(8);
        buf.put_u64(self.current_timeline_size);

        buf.put_slice(b"ps_writelsn\0");
        buf.put_i32(8);
-        buf.put_u64(self.last_received_lsn);
+        buf.put_u64(self.ps_writelsn);
        buf.put_slice(b"ps_flushlsn\0");
        buf.put_i32(8);
-        buf.put_u64(self.disk_consistent_lsn);
+        buf.put_u64(self.ps_flushlsn);
        buf.put_slice(b"ps_applylsn\0");
        buf.put_i32(8);
-        buf.put_u64(self.remote_consistent_lsn);
+        buf.put_u64(self.ps_applylsn);

        let timestamp = self
-            .replytime
+            .ps_replytime
            .duration_since(*PG_EPOCH)
            .expect("failed to serialize pg_replytime earlier than PG_EPOCH")
            .as_micros() as i64;
@@ -1011,10 +1001,9 @@ impl PageserverFeedback {
        buf.put_i64(timestamp);
    }

-    // Deserialize PageserverFeedback message
-    // TODO: change serialized fields names once all computes migrate to rename.
-    pub fn parse(mut buf: Bytes) -> PageserverFeedback {
-        let mut rf = PageserverFeedback::empty();
+    // Deserialize ReplicationFeedback message
+    pub fn parse(mut buf: Bytes) -> ReplicationFeedback {
+        let mut rf = ReplicationFeedback::empty();
        let nfields = buf.get_u8();
        for _ in 0..nfields {
            let key = read_cstr(&mut buf).unwrap();
@@ -1027,39 +1016,39 @@ impl PageserverFeedback {
                b"ps_writelsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
-                    rf.last_received_lsn = buf.get_u64();
+                    rf.ps_writelsn = buf.get_u64();
                }
                b"ps_flushlsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
-                    rf.disk_consistent_lsn = buf.get_u64();
+                    rf.ps_flushlsn = buf.get_u64();
                }
                b"ps_applylsn" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
-                    rf.remote_consistent_lsn = buf.get_u64();
+                    rf.ps_applylsn = buf.get_u64();
                }
                b"ps_replytime" => {
                    let len = buf.get_i32();
                    assert_eq!(len, 8);
                    let raw_time = buf.get_i64();
                    if raw_time > 0 {
-                        rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64);
+                        rf.ps_replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64);
                    } else {
-                        rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64);
+                        rf.ps_replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64);
                    }
                }
                _ => {
                    let len = buf.get_i32();
                    warn!(
-                        "PageserverFeedback parse. unknown key {} of len {len}. Skip it.",
+                        "ReplicationFeedback parse. unknown key {} of len {len}. Skip it.",
                        String::from_utf8_lossy(key.as_ref())
                    );
                    buf.advance(len as usize);
                }
            }
        }
-        trace!("PageserverFeedback parsed is {:?}", rf);
+        trace!("ReplicationFeedback parsed is {:?}", rf);
        rf
    }
 }
@@ -1070,33 +1059,33 @@ mod tests {

    #[test]
    fn test_replication_feedback_serialization() {
-        let mut rf = PageserverFeedback::empty();
+        let mut rf = ReplicationFeedback::empty();
        // Fill rf with some values
        rf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
-        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
+        rf.ps_replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
        let mut data = BytesMut::new();
        rf.serialize(&mut data);

-        let rf_parsed = PageserverFeedback::parse(data.freeze());
+        let rf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(rf, rf_parsed);
    }

    #[test]
    fn test_replication_feedback_unknown_key() {
-        let mut rf = PageserverFeedback::empty();
+        let mut rf = ReplicationFeedback::empty();
        // Fill rf with some values
        rf.current_timeline_size = 12345678;
        // Set rounded time to be able to compare it with deserialized value,
        // because it is rounded up to microseconds during serialization.
-        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
+        rf.ps_replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
        let mut data = BytesMut::new();
        rf.serialize(&mut data);

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
-            *first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1;
+            *first = REPLICATION_FEEDBACK_FIELDS_NUMBER + 1;
        }

        data.put_slice(b"new_field_one\0");
@@ -1104,7 +1093,7 @@ mod tests {
        data.put_u64(42);

        // Parse serialized data and check that new field is not parsed
-        let rf_parsed = PageserverFeedback::parse(data.freeze());
+        let rf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(rf, rf_parsed);
    }

--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -26,4 +26,3 @@ workspace_hack.workspace = true

 [dev-dependencies]
 tempfile.workspace = true
-test-context.workspace = true
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -13,6 +13,7 @@ use std::{
    collections::HashMap,
    fmt::Debug,
    num::{NonZeroU32, NonZeroUsize},
+    ops::Deref,
    path::{Path, PathBuf},
    pin::Pin,
    sync::Arc,
@@ -38,9 +39,6 @@ pub const DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS: u32 = 10;
 /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
 /// https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
-/// No limits on the client side, which currenltly means 1000 for AWS S3.
-/// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax
-pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;

 const REMOTE_STORAGE_PREFIX_SEPARATOR: char = '/';

@@ -66,10 +64,6 @@ impl RemotePath {
    pub fn object_name(&self) -> Option<&str> {
        self.0.file_name().and_then(|os_str| os_str.to_str())
    }
-
-    pub fn join(&self, segment: &Path) -> Self {
-        Self(self.0.join(segment))
-    }
 }

 /// Storage (potentially remote) API to manage its state.
@@ -77,6 +71,9 @@ impl RemotePath {
 /// providing basic CRUD operations for storage files.
 #[async_trait::async_trait]
 pub trait RemoteStorage: Send + Sync + 'static {
+    /// Lists all items the storage has right now.
+    async fn list(&self) -> anyhow::Result<Vec<RemotePath>>;
+
    /// Lists all top level subdirectories for a given prefix
    /// Note: here we assume that if the prefix is passed it was obtained via remote_object_id
    /// which already takes into account any kind of global prefix (prefix_in_bucket for S3 or storage_root for LocalFS)
@@ -89,7 +86,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
    /// Streams the local file contents into remote into the remote storage entry.
    async fn upload(
        &self,
-        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
        data_size_bytes: usize,
@@ -160,67 +157,14 @@ pub enum GenericRemoteStorage {
    Unreliable(Arc<UnreliableWrapper>),
 }

-impl GenericRemoteStorage {
-    pub async fn list_prefixes(
-        &self,
-        prefix: Option<&RemotePath>,
-    ) -> Result<Vec<RemotePath>, DownloadError> {
-        match self {
-            Self::LocalFs(s) => s.list_prefixes(prefix).await,
-            Self::AwsS3(s) => s.list_prefixes(prefix).await,
-            Self::Unreliable(s) => s.list_prefixes(prefix).await,
-        }
-    }
+impl Deref for GenericRemoteStorage {
+    type Target = dyn RemoteStorage;

-    pub async fn upload(
-        &self,
-        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
-        data_size_bytes: usize,
-        to: &RemotePath,
-        metadata: Option<StorageMetadata>,
-    ) -> anyhow::Result<()> {
+    fn deref(&self) -> &Self::Target {
        match self {
-            Self::LocalFs(s) => s.upload(from, data_size_bytes, to, metadata).await,
-            Self::AwsS3(s) => s.upload(from, data_size_bytes, to, metadata).await,
-            Self::Unreliable(s) => s.upload(from, data_size_bytes, to, metadata).await,
-        }
-    }
-
-    pub async fn download(&self, from: &RemotePath) -> Result<Download, DownloadError> {
-        match self {
-            Self::LocalFs(s) => s.download(from).await,
-            Self::AwsS3(s) => s.download(from).await,
-            Self::Unreliable(s) => s.download(from).await,
-        }
-    }
-
-    pub async fn download_byte_range(
-        &self,
-        from: &RemotePath,
-        start_inclusive: u64,
-        end_exclusive: Option<u64>,
-    ) -> Result<Download, DownloadError> {
-        match self {
-            Self::LocalFs(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive)
-                    .await
-            }
-            Self::AwsS3(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive)
-                    .await
-            }
-            Self::Unreliable(s) => {
-                s.download_byte_range(from, start_inclusive, end_exclusive)
-                    .await
-            }
-        }
-    }
-
-    pub async fn delete(&self, path: &RemotePath) -> anyhow::Result<()> {
-        match self {
-            Self::LocalFs(s) => s.delete(path).await,
-            Self::AwsS3(s) => s.delete(path).await,
-            Self::Unreliable(s) => s.delete(path).await,
+            GenericRemoteStorage::LocalFs(local_fs) => local_fs,
+            GenericRemoteStorage::AwsS3(s3_bucket) => s3_bucket.as_ref(),
+            GenericRemoteStorage::Unreliable(s) => s.as_ref(),
        }
    }
 }
@@ -251,7 +195,7 @@ impl GenericRemoteStorage {
    /// this path is used for the remote object id conversion only.
    pub async fn upload_storage_object(
        &self,
-        from: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
+        from: Box<dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static>,
        from_size_bytes: usize,
        to: &RemotePath,
    ) -> anyhow::Result<()> {
@@ -322,7 +266,6 @@ pub struct S3Config {
    /// AWS S3 has various limits on its API calls, we need not to exceed those.
    /// See [`DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT`] for more details.
    pub concurrency_limit: NonZeroUsize,
-    pub max_keys_per_list_response: Option<i32>,
 }

 impl Debug for S3Config {
@@ -332,10 +275,6 @@ impl Debug for S3Config {
            .field("bucket_region", &self.bucket_region)
            .field("prefix_in_bucket", &self.prefix_in_bucket)
            .field("concurrency_limit", &self.concurrency_limit)
-            .field(
-                "max_keys_per_list_response",
-                &self.max_keys_per_list_response,
-            )
            .finish()
    }
 }
@@ -364,11 +303,6 @@ impl RemoteStorageConfig {
        )
        .context("Failed to parse 'concurrency_limit' as a positive integer")?;

-        let max_keys_per_list_response =
-            parse_optional_integer::<i32, _>("max_keys_per_list_response", toml)
-                .context("Failed to parse 'max_keys_per_list_response' as a positive integer")?
-                .or(DEFAULT_MAX_KEYS_PER_LIST_RESPONSE);
-
        let storage = match (local_path, bucket_name, bucket_region) {
            // no 'local_path' nor 'bucket_name' options are provided, consider this remote storage disabled
            (None, None, None) => return Ok(None),
@@ -390,7 +324,6 @@ impl RemoteStorageConfig {
                    .map(|endpoint| parse_toml_string("endpoint", endpoint))
                    .transpose()?,
                concurrency_limit,
-                max_keys_per_list_response,
            }),
            (Some(local_path), None, None) => RemoteStorageKind::LocalFs(PathBuf::from(
                parse_toml_string("local_path", local_path)?,
--- a/libs/remote_storage/src/local_fs.rs
+++ b/libs/remote_storage/src/local_fs.rs
@@ -73,8 +73,10 @@ impl LocalFs {
            Ok(None)
        }
    }
+}

-    #[cfg(test)]
+#[async_trait::async_trait]
+impl RemoteStorage for LocalFs {
    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
        Ok(get_all_files(&self.storage_root, true)
            .await?
@@ -89,10 +91,7 @@ impl LocalFs {
            })
            .collect())
    }
-}

-#[async_trait::async_trait]
-impl RemoteStorage for LocalFs {
    async fn list_prefixes(
        &self,
        prefix: Option<&RemotePath>,
@@ -118,7 +117,7 @@ impl RemoteStorage for LocalFs {

    async fn upload(
        &self,
-        data: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        data: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
        data_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -102,7 +102,6 @@ pub struct S3Bucket {
    client: Client,
    bucket_name: String,
    prefix_in_bucket: Option<String>,
-    max_keys_per_list_response: Option<i32>,
    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
    // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
    // The helps to ensure we don't exceed the thresholds.
@@ -165,7 +164,6 @@ impl S3Bucket {
        Ok(Self {
            client,
            bucket_name: aws_config.bucket_name.clone(),
-            max_keys_per_list_response: aws_config.max_keys_per_list_response,
            prefix_in_bucket,
            concurrency_limiter: Arc::new(Semaphore::new(aws_config.concurrency_limit.get())),
        })
@@ -275,6 +273,48 @@ impl<S: AsyncRead> AsyncRead for RatelimitedAsyncRead<S> {

 #[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
+    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
+        let mut document_keys = Vec::new();
+
+        let mut continuation_token = None;
+        loop {
+            let _guard = self
+                .concurrency_limiter
+                .acquire()
+                .await
+                .context("Concurrency limiter semaphore got closed during S3 list")?;
+
+            metrics::inc_list_objects();
+
+            let fetch_response = self
+                .client
+                .list_objects_v2()
+                .bucket(self.bucket_name.clone())
+                .set_prefix(self.prefix_in_bucket.clone())
+                .set_continuation_token(continuation_token)
+                .send()
+                .await
+                .map_err(|e| {
+                    metrics::inc_list_objects_fail();
+                    e
+                })?;
+            document_keys.extend(
+                fetch_response
+                    .contents
+                    .unwrap_or_default()
+                    .into_iter()
+                    .filter_map(|o| Some(self.s3_object_to_relative_path(o.key()?))),
+            );
+
+            match fetch_response.continuation_token {
+                Some(new_token) => continuation_token = Some(new_token),
+                None => break,
+            }
+        }
+
+        Ok(document_keys)
+    }
+
    /// See the doc for `RemoteStorage::list_prefixes`
    /// Note: it wont include empty "directories"
    async fn list_prefixes(
@@ -314,7 +354,6 @@ impl RemoteStorage for S3Bucket {
                .set_prefix(list_prefix.clone())
                .set_continuation_token(continuation_token)
                .delimiter(REMOTE_STORAGE_PREFIX_SEPARATOR.to_string())
-                .set_max_keys(self.max_keys_per_list_response)
                .send()
                .await
                .map_err(|e| {
@@ -332,7 +371,7 @@ impl RemoteStorage for S3Bucket {
                    .filter_map(|o| Some(self.s3_object_to_relative_path(o.prefix()?))),
            );

-            match fetch_response.next_continuation_token {
+            match fetch_response.continuation_token {
                Some(new_token) => continuation_token = Some(new_token),
                None => break,
            }
@@ -343,7 +382,7 @@ impl RemoteStorage for S3Bucket {

    async fn upload(
        &self,
-        from: impl io::AsyncRead + Unpin + Send + Sync + 'static,
+        from: Box<(dyn io::AsyncRead + Unpin + Send + Sync + 'static)>,
        from_size_bytes: usize,
        to: &RemotePath,
        metadata: Option<StorageMetadata>,
--- a/libs/remote_storage/src/simulate_failures.rs
+++ b/libs/remote_storage/src/simulate_failures.rs
@@ -20,6 +20,7 @@ pub struct UnreliableWrapper {
 /// Used to identify retries of different unique operation.
 #[derive(Debug, Hash, Eq, PartialEq)]
 enum RemoteOp {
+    List,
    ListPrefixes(Option<RemotePath>),
    Upload(RemotePath),
    Download(RemotePath),
@@ -74,6 +75,12 @@ impl UnreliableWrapper {

 #[async_trait::async_trait]
 impl RemoteStorage for UnreliableWrapper {
+    /// Lists all items the storage has right now.
+    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
+        self.attempt(RemoteOp::List)?;
+        self.inner.list().await
+    }
+
    async fn list_prefixes(
        &self,
        prefix: Option<&RemotePath>,
@@ -84,7 +91,7 @@ impl RemoteStorage for UnreliableWrapper {

    async fn upload(
        &self,
-        data: impl tokio::io::AsyncRead + Unpin + Send + Sync + 'static,
+        data: Box<(dyn tokio::io::AsyncRead + Unpin + Send + Sync + 'static)>,
        // S3 PUT request requires the content length to be specified,
        // otherwise it starts to fail with the concurrent connection count increasing.
        data_size_bytes: usize,
--- a/libs/remote_storage/tests/pagination_tests.rs
+++ b/libs/remote_storage/tests/pagination_tests.rs
@@ -1,275 +0,0 @@
-use std::collections::HashSet;
-use std::env;
-use std::num::{NonZeroU32, NonZeroUsize};
-use std::ops::ControlFlow;
-use std::path::{Path, PathBuf};
-use std::sync::Arc;
-use std::time::UNIX_EPOCH;
-
-use anyhow::Context;
-use remote_storage::{
-    GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
-};
-use test_context::{test_context, AsyncTestContext};
-use tokio::task::JoinSet;
-use tracing::{debug, error, info};
-
-const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";
-
-/// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries.
-/// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified.
-/// See the client creation in [`create_s3_client`] for details on the required env vars.
-/// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
-/// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
-///
-/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_s3_data`]
-/// where
-/// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
-/// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
-///
-/// Then, verifies that the client does return correct prefixes when queried:
-/// * with no prefix, it lists everything after its `${random_prefix_part}/` — that should be `${base_prefix_str}` value only
-/// * with `${base_prefix_str}/` prefix, it lists every `sub_prefix_${i}`
-///
-/// With the real S3 enabled and `#[cfg(test)]` Rust configuration used, the S3 client test adds a `max-keys` param to limit the response keys.
-/// This way, we are able to test the pagination implicitly, by ensuring all results are returned from the remote storage and avoid uploading too many blobs to S3,
-/// since current default AWS S3 pagination limit is 1000.
-/// (see https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax)
-///
-/// Lastly, the test attempts to clean up and remove all uploaded S3 files.
-/// If any errors appear during the clean up, they get logged, but the test is not failed or stopped until clean up is finished.
-#[test_context(MaybeEnabledS3)]
-#[tokio::test]
-async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
-    let ctx = match ctx {
-        MaybeEnabledS3::Enabled(ctx) => ctx,
-        MaybeEnabledS3::Disabled => return Ok(()),
-        MaybeEnabledS3::UploadsFailed(e, _) => anyhow::bail!("S3 init failed: {e:?}"),
-    };
-
-    let test_client = Arc::clone(&ctx.client_with_excessive_pagination);
-    let expected_remote_prefixes = ctx.remote_prefixes.clone();
-
-    let base_prefix =
-        RemotePath::new(Path::new(ctx.base_prefix_str)).context("common_prefix construction")?;
-    let root_remote_prefixes = test_client
-        .list_prefixes(None)
-        .await
-        .context("client list root prefixes failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        root_remote_prefixes, HashSet::from([base_prefix.clone()]),
-        "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}"
-    );
-
-    let nested_remote_prefixes = test_client
-        .list_prefixes(Some(&base_prefix))
-        .await
-        .context("client list nested prefixes failure")?
-        .into_iter()
-        .collect::<HashSet<_>>();
-    let remote_only_prefixes = nested_remote_prefixes
-        .difference(&expected_remote_prefixes)
-        .collect::<HashSet<_>>();
-    let missing_uploaded_prefixes = expected_remote_prefixes
-        .difference(&nested_remote_prefixes)
-        .collect::<HashSet<_>>();
-    assert_eq!(
-        remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0,
-        "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}",
-    );
-
-    Ok(())
-}
-
-enum MaybeEnabledS3 {
-    Enabled(S3WithTestBlobs),
-    Disabled,
-    UploadsFailed(anyhow::Error, S3WithTestBlobs),
-}
-
-struct S3WithTestBlobs {
-    client_with_excessive_pagination: Arc<GenericRemoteStorage>,
-    base_prefix_str: &'static str,
-    remote_prefixes: HashSet<RemotePath>,
-    remote_blobs: HashSet<RemotePath>,
-}
-
-#[async_trait::async_trait]
-impl AsyncTestContext for MaybeEnabledS3 {
-    async fn setup() -> Self {
-        utils::logging::init(utils::logging::LogFormat::Test).expect("logging init failed");
-        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
-            info!(
-                "`{}` env variable is not set, skipping the test",
-                ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME
-            );
-            return Self::Disabled;
-        }
-
-        let max_keys_in_list_response = 10;
-        let upload_tasks_count = 1 + (2 * usize::try_from(max_keys_in_list_response).unwrap());
-
-        let client_with_excessive_pagination = create_s3_client(max_keys_in_list_response)
-            .context("S3 client creation")
-            .expect("S3 client creation failed");
-
-        let base_prefix_str = "test/";
-        match upload_s3_data(
-            &client_with_excessive_pagination,
-            base_prefix_str,
-            upload_tasks_count,
-        )
-        .await
-        {
-            ControlFlow::Continue(uploads) => {
-                info!("Remote objects created successfully");
-                Self::Enabled(S3WithTestBlobs {
-                    client_with_excessive_pagination,
-                    base_prefix_str,
-                    remote_prefixes: uploads.prefixes,
-                    remote_blobs: uploads.blobs,
-                })
-            }
-            ControlFlow::Break(uploads) => Self::UploadsFailed(
-                anyhow::anyhow!("One or multiple blobs failed to upload to S3"),
-                S3WithTestBlobs {
-                    client_with_excessive_pagination,
-                    base_prefix_str,
-                    remote_prefixes: uploads.prefixes,
-                    remote_blobs: uploads.blobs,
-                },
-            ),
-        }
-    }
-
-    async fn teardown(self) {
-        match self {
-            Self::Disabled => {}
-            Self::Enabled(ctx) | Self::UploadsFailed(_, ctx) => {
-                cleanup(&ctx.client_with_excessive_pagination, ctx.remote_blobs).await;
-            }
-        }
-    }
-}
-
-fn create_s3_client(max_keys_per_list_response: i32) -> anyhow::Result<Arc<GenericRemoteStorage>> {
-    let remote_storage_s3_bucket = env::var("REMOTE_STORAGE_S3_BUCKET")
-        .context("`REMOTE_STORAGE_S3_BUCKET` env var is not set, but real S3 tests are enabled")?;
-    let remote_storage_s3_region = env::var("REMOTE_STORAGE_S3_REGION")
-        .context("`REMOTE_STORAGE_S3_REGION` env var is not set, but real S3 tests are enabled")?;
-    let random_prefix_part = std::time::SystemTime::now()
-        .duration_since(UNIX_EPOCH)
-        .context("random s3 test prefix part calculation")?
-        .as_millis();
-    let remote_storage_config = RemoteStorageConfig {
-        max_concurrent_syncs: NonZeroUsize::new(100).unwrap(),
-        max_sync_errors: NonZeroU32::new(5).unwrap(),
-        storage: RemoteStorageKind::AwsS3(S3Config {
-            bucket_name: remote_storage_s3_bucket,
-            bucket_region: remote_storage_s3_region,
-            prefix_in_bucket: Some(format!("pagination_should_work_test_{random_prefix_part}/")),
-            endpoint: None,
-            concurrency_limit: NonZeroUsize::new(100).unwrap(),
-            max_keys_per_list_response: Some(max_keys_per_list_response),
-        }),
-    };
-    Ok(Arc::new(
-        GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
-    ))
-}
-
-struct Uploads {
-    prefixes: HashSet<RemotePath>,
-    blobs: HashSet<RemotePath>,
-}
-
-async fn upload_s3_data(
-    client: &Arc<GenericRemoteStorage>,
-    base_prefix_str: &'static str,
-    upload_tasks_count: usize,
-) -> ControlFlow<Uploads, Uploads> {
-    info!("Creating {upload_tasks_count} S3 files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let prefix = PathBuf::from(format!("{base_prefix_str}/sub_prefix_{i}/"));
-            let blob_prefix = RemotePath::new(&prefix)
-                .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
-            let blob_path = blob_prefix.join(Path::new(&format!("blob_{i}")));
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let data = format!("remote blob data {i}").into_bytes();
-            let data_len = data.len();
-            task_client
-                .upload(
-                    Box::new(std::io::Cursor::new(data)),
-                    data_len,
-                    &blob_path,
-                    None,
-                )
-                .await?;
-
-            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok((upload_prefix, upload_path)) => {
-                uploaded_prefixes.insert(upload_prefix);
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    let uploads = Uploads {
-        prefixes: uploaded_prefixes,
-        blobs: uploaded_blobs,
-    };
-    if upload_tasks_failed {
-        ControlFlow::Break(uploads)
-    } else {
-        ControlFlow::Continue(uploads)
-    }
-}
-
-async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<RemotePath>) {
-    info!(
-        "Removing {} objects from the remote storage during cleanup",
-        objects_to_delete.len()
-    );
-    let mut delete_tasks = JoinSet::new();
-    for object_to_delete in objects_to_delete {
-        let task_client = Arc::clone(client);
-        delete_tasks.spawn(async move {
-            debug!("Deleting remote item at path {object_to_delete:?}");
-            task_client
-                .delete(&object_to_delete)
-                .await
-                .with_context(|| format!("{object_to_delete:?} removal"))
-        });
-    }
-
-    while let Some(task_run_result) = delete_tasks.join_next().await {
-        match task_run_result {
-            Ok(task_result) => match task_result {
-                Ok(()) => {}
-                Err(e) => error!("Delete task failed: {e:?}"),
-            },
-            Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
-        }
-    }
-}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -19,7 +19,6 @@ jsonwebtoken.workspace = true
 nix.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
-regex.workspace = true
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
--- a/libs/utils/src/http/error.rs
+++ b/libs/utils/src/http/error.rs
@@ -20,9 +20,6 @@ pub enum ApiError {
    #[error("Conflict: {0}")]
    Conflict(String),

-    #[error("Precondition failed: {0}")]
-    PreconditionFailed(&'static str),
-
    #[error(transparent)]
    InternalServerError(anyhow::Error),
 }
@@ -47,10 +44,6 @@ impl ApiError {
            ApiError::Conflict(_) => {
                HttpErrorBody::response_from_msg_and_status(self.to_string(), StatusCode::CONFLICT)
            }
-            ApiError::PreconditionFailed(_) => HttpErrorBody::response_from_msg_and_status(
-                self.to_string(),
-                StatusCode::PRECONDITION_FAILED,
-            ),
            ApiError::InternalServerError(err) => HttpErrorBody::response_from_msg_and_status(
                err.to_string(),
                StatusCode::INTERNAL_SERVER_ERROR,
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -23,7 +23,7 @@ pub enum IdError {
 struct Id([u8; 16]);

 impl Id {
-    pub fn get_from_buf(buf: &mut impl bytes::Buf) -> Id {
+    pub fn get_from_buf(buf: &mut dyn bytes::Buf) -> Id {
        let mut arr = [0u8; 16];
        buf.copy_to_slice(&mut arr);
        Id::from(arr)
@@ -112,7 +112,7 @@ impl fmt::Debug for Id {
 macro_rules! id_newtype {
    ($t:ident) => {
        impl $t {
-            pub fn get_from_buf(buf: &mut impl bytes::Buf) -> $t {
+            pub fn get_from_buf(buf: &mut dyn bytes::Buf) -> $t {
                $t(Id::get_from_buf(buf))
            }

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -51,9 +51,6 @@ pub mod history_buffer;

 pub mod measured_stream;

-pub mod serde_percent;
-pub mod serde_regex;
-
 /// use with fail::cfg("$name", "return(2000)")
 #[macro_export]
 macro_rules! failpoint_sleep_millis_async {
--- a/libs/utils/src/serde_percent.rs
+++ b/libs/utils/src/serde_percent.rs
@@ -1,91 +0,0 @@
-//! A serde::Deserialize type for percentages.
-//!
-//! See [`Percent`] for details.
-
-use serde::{Deserialize, Serialize};
-
-/// If the value is not an integer between 0 and 100,
-/// deserialization fails with a descriptive error.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
-#[serde(transparent)]
-pub struct Percent(#[serde(deserialize_with = "deserialize_pct_0_to_100")] u8);
-
-impl Percent {
-    pub const fn new(pct: u8) -> Option<Self> {
-        if pct <= 100 {
-            Some(Percent(pct))
-        } else {
-            None
-        }
-    }
-
-    pub fn get(&self) -> u8 {
-        self.0
-    }
-}
-
-fn deserialize_pct_0_to_100<'de, D>(deserializer: D) -> Result<u8, D::Error>
-where
-    D: serde::de::Deserializer<'de>,
-{
-    let v: u8 = serde::de::Deserialize::deserialize(deserializer)?;
-    if v > 100 {
-        return Err(serde::de::Error::custom(
-            "must be an integer between 0 and 100",
-        ));
-    }
-    Ok(v)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Percent;
-
-    #[derive(serde::Deserialize, serde::Serialize, Debug, PartialEq, Eq)]
-    struct Foo {
-        bar: Percent,
-    }
-
-    #[test]
-    fn basics() {
-        let input = r#"{ "bar": 50 }"#;
-        let foo: Foo = serde_json::from_str(input).unwrap();
-        assert_eq!(foo.bar.get(), 50);
-    }
-    #[test]
-    fn null_handling() {
-        let input = r#"{ "bar": null }"#;
-        let res: Result<Foo, _> = serde_json::from_str(input);
-        assert!(res.is_err());
-    }
-    #[test]
-    fn zero() {
-        let input = r#"{ "bar": 0 }"#;
-        let foo: Foo = serde_json::from_str(input).unwrap();
-        assert_eq!(foo.bar.get(), 0);
-    }
-    #[test]
-    fn out_of_range_above() {
-        let input = r#"{ "bar": 101 }"#;
-        let res: Result<Foo, _> = serde_json::from_str(input);
-        assert!(res.is_err());
-    }
-    #[test]
-    fn out_of_range_below() {
-        let input = r#"{ "bar": -1 }"#;
-        let res: Result<Foo, _> = serde_json::from_str(input);
-        assert!(res.is_err());
-    }
-    #[test]
-    fn float() {
-        let input = r#"{ "bar": 50.5 }"#;
-        let res: Result<Foo, _> = serde_json::from_str(input);
-        assert!(res.is_err());
-    }
-    #[test]
-    fn string() {
-        let input = r#"{ "bar": "50 %" }"#;
-        let res: Result<Foo, _> = serde_json::from_str(input);
-        assert!(res.is_err());
-    }
-}
--- a/libs/utils/src/serde_regex.rs
+++ b/libs/utils/src/serde_regex.rs
@@ -1,60 +0,0 @@
-//! A `serde::{Deserialize,Serialize}` type for regexes.
-
-use std::ops::Deref;
-
-#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
-#[serde(transparent)]
-pub struct Regex(
-    #[serde(
-        deserialize_with = "deserialize_regex",
-        serialize_with = "serialize_regex"
-    )]
-    regex::Regex,
-);
-
-fn deserialize_regex<'de, D>(deserializer: D) -> Result<regex::Regex, D::Error>
-where
-    D: serde::de::Deserializer<'de>,
-{
-    let s: String = serde::de::Deserialize::deserialize(deserializer)?;
-    let re = regex::Regex::new(&s).map_err(serde::de::Error::custom)?;
-    Ok(re)
-}
-
-fn serialize_regex<S>(re: &regex::Regex, serializer: S) -> Result<S::Ok, S::Error>
-where
-    S: serde::ser::Serializer,
-{
-    serializer.collect_str(re.as_str())
-}
-
-impl Deref for Regex {
-    type Target = regex::Regex;
-
-    fn deref(&self) -> &regex::Regex {
-        &self.0
-    }
-}
-
-impl PartialEq for Regex {
-    fn eq(&self, other: &Regex) -> bool {
-        // comparing the automatons would be quite complicated
-        self.as_str() == other.as_str()
-    }
-}
-
-impl Eq for Regex {}
-
-#[cfg(test)]
-mod tests {
-
-    #[test]
-    fn roundtrip() {
-        let input = r#""foo.*bar""#;
-        let re: super::Regex = serde_json::from_str(input).unwrap();
-        assert!(re.is_match("foo123bar"));
-        assert!(!re.is_match("foo"));
-        let output = serde_json::to_string(&re).unwrap();
-        assert_eq!(output, input);
-    }
-}
--- a/libs/utils/src/signals.rs
+++ b/libs/utils/src/signals.rs
@@ -1,7 +1,25 @@
+use signal_hook::flag;
 use signal_hook::iterator::Signals;
+use std::sync::atomic::AtomicBool;
+use std::sync::Arc;

 pub use signal_hook::consts::{signal::*, TERM_SIGNALS};

+pub fn install_shutdown_handlers() -> anyhow::Result<ShutdownSignals> {
+    let term_now = Arc::new(AtomicBool::new(false));
+    for sig in TERM_SIGNALS {
+        // When terminated by a second term signal, exit with exit code 1.
+        // This will do nothing the first time (because term_now is false).
+        flag::register_conditional_shutdown(*sig, 1, Arc::clone(&term_now))?;
+        // But this will "arm" the above for the second time, by setting it to true.
+        // The order of registering these is important, if you put this one first, it will
+        // first arm and then terminate ‒ all in the first round.
+        flag::register(*sig, Arc::clone(&term_now))?;
+    }
+
+    Ok(ShutdownSignals)
+}
+
 pub enum Signal {
    Quit,
    Interrupt,
@@ -21,7 +39,10 @@ impl Signal {
 pub struct ShutdownSignals;

 impl ShutdownSignals {
-    pub fn handle(mut handler: impl FnMut(Signal) -> anyhow::Result<()>) -> anyhow::Result<()> {
+    pub fn handle(
+        self,
+        mut handler: impl FnMut(Signal) -> anyhow::Result<()>,
+    ) -> anyhow::Result<()> {
        for raw_signal in Signals::new(TERM_SIGNALS)?.into_iter() {
            let signal = match raw_signal {
                SIGINT => Signal::Interrupt,
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -48,7 +48,6 @@ serde_json = { workspace = true, features = ["raw_value"] }
 serde_with.workspace = true
 signal-hook.workspace = true
 svg_fmt.workspace = true
-sync_wrapper.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
--- a/pageserver/src/bin/draw_layer-trace.rs
+++ b/pageserver/src/bin/draw_layer-trace.rs
@@ -0,0 +1,541 @@
+use anyhow::Result;
+use pageserver::repository::Key;
+use serde::{Deserialize, Serialize};
+use std::cmp::Ordering;
+use std::io::{self, BufRead};
+use std::{
+    collections::{BTreeMap, BTreeSet, HashMap},
+    fmt::Write,
+    ops::Range,
+};
+use svg_fmt::{rgb, BeginSvg, EndSvg, Fill, Stroke, Style};
+use utils::{lsn::Lsn, project_git_version};
+
+project_git_version!(GIT_VERSION);
+
+// Map values to their compressed coordinate - the index the value
+// would have in a sorted and deduplicated list of all values.
+struct CoordinateMap<T: Ord + Copy> {
+    map: BTreeMap<T, usize>,
+    stretch: f32
+}
+
+impl<T: Ord + Copy> CoordinateMap<T> {
+    fn new(coords: Vec<T>, stretch: f32) -> Self {
+        let set: BTreeSet<T> = coords.into_iter().collect();
+
+        let mut map: BTreeMap<T, usize> = BTreeMap::new();
+        for (i, e) in set.iter().enumerate() {
+            map.insert(*e, i);
+        }
+
+        Self { map, stretch }
+    }
+
+    fn map(&self, val: T) -> f32 {
+        *self.map.get(&val).unwrap() as f32 * self.stretch
+    }
+
+    fn max(&self) -> f32 {
+        self.map.len() as f32 * self.stretch
+    }
+}
+
+fn parse_filename(name: &str) -> (Range<Key>, Range<Lsn>) {
+    let split: Vec<&str> = name.split("__").collect();
+    let keys: Vec<&str> = split[0].split('-').collect();
+    let mut lsns: Vec<&str> = split[1].split('-').collect();
+    if lsns.len() == 1 {
+        lsns.push(lsns[0]);
+    }
+
+    let keys = Key::from_hex(keys[0]).unwrap()..Key::from_hex(keys[1]).unwrap();
+    let lsns = Lsn::from_hex(lsns[0]).unwrap()..Lsn::from_hex(lsns[1]).unwrap();
+    (keys, lsns)
+}
+
+#[derive(Serialize, Deserialize, PartialEq)]
+enum  LayerTraceOp {
+    #[serde(rename = "evict")]
+    Evict,
+    #[serde(rename = "flush")]
+    Flush,
+    #[serde(rename = "compact_create")]
+    CompactCreate,
+    #[serde(rename = "compact_delete")]
+    CompactDelete,
+    #[serde(rename = "image_create")]
+    ImageCreate,
+    #[serde(rename = "gc_delete")]
+    GcDelete,
+    #[serde(rename = "gc_start")]
+    GcStart,
+}
+
+impl std::fmt::Display for LayerTraceOp {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
+        let op_str = match self {
+            LayerTraceOp::Evict => "evict",
+            LayerTraceOp::Flush => "flush",
+            LayerTraceOp::CompactCreate => "compact_create",
+            LayerTraceOp::CompactDelete => "compact_delete",
+            LayerTraceOp::ImageCreate => "image_create",
+            LayerTraceOp::GcDelete => "gc_delete",
+            LayerTraceOp::GcStart => "gc_start",
+        };
+        f.write_str(op_str)
+    }
+}
+
+#[serde_with::serde_as]
+#[derive(Serialize, Deserialize)]
+struct LayerTraceLine {
+    time: u64,
+    op: LayerTraceOp,
+    #[serde(default)]
+    filename: String,
+    #[serde_as(as = "Option<serde_with::DisplayFromStr>")]
+    cutoff: Option<Lsn>,
+}
+
+struct LayerTraceFile {
+    filename: String,
+    key_range: Range<Key>,
+    lsn_range: Range<Lsn>,
+}
+
+impl LayerTraceFile {
+    fn is_image(&self) -> bool {
+        self.lsn_range.start == self.lsn_range.end
+    }
+}
+
+struct LayerTraceEvent {
+    time_rel: u64,
+    op: LayerTraceOp,
+    filename: String,
+}
+
+struct GcEvent {
+    time_rel: u64,
+    cutoff: Lsn,
+}
+
+fn main() -> Result<()> {
+    // Parse trace lines from stdin
+    let stdin = io::stdin();
+
+    let mut files: HashMap<String, LayerTraceFile> = HashMap::new();
+    let mut layer_events: Vec<LayerTraceEvent> = Vec::new();
+    let mut gc_events: Vec<GcEvent> = Vec::new();
+    let mut first_time: Option<u64> = None;
+    for line in stdin.lock().lines() {
+        let line = line.unwrap();
+        let parsed_line: LayerTraceLine = serde_json::from_str(&line)?;
+
+        let time_rel = if let Some(first_time) = first_time {
+            parsed_line.time - first_time
+        } else {
+            first_time = Some(parsed_line.time);
+            0
+        };
+
+        if parsed_line.op == LayerTraceOp::GcStart {
+            gc_events.push(GcEvent {
+                time_rel,
+                cutoff: parsed_line.cutoff.unwrap(),
+            });
+        } else {
+            layer_events.push(LayerTraceEvent {
+                time_rel,
+                filename: parsed_line.filename.clone(),
+                op: parsed_line.op,
+            });
+
+            if !files.contains_key(&parsed_line.filename) {
+                let (key_range, lsn_range) = parse_filename(&parsed_line.filename);
+                files.insert(parsed_line.filename.clone(), LayerTraceFile {
+                    filename: parsed_line.filename.clone(),
+                    key_range,
+                    lsn_range,
+                });
+            };
+        }
+    }
+    let mut last_time_rel = layer_events.last().unwrap().time_rel;
+    if let Some(last_gc) = gc_events.last() {
+        last_time_rel = std::cmp::min(last_gc.time_rel, last_time_rel);
+    }
+
+    // Collect all coordinates
+    let mut keys: Vec<Key> = vec![];
+    let mut lsns: Vec<Lsn> = vec![];
+    for f in files.values() {
+        keys.push(f.key_range.start);
+        keys.push(f.key_range.end);
+        lsns.push(f.lsn_range.start);
+        lsns.push(f.lsn_range.end);
+    }
+    for gc_event in &gc_events {
+        lsns.push(gc_event.cutoff);
+    }
+
+    // Analyze
+    let key_map = CoordinateMap::new(keys, 2.0);
+    // Stretch out vertically for better visibility
+    let lsn_map = CoordinateMap::new(lsns, 3.0);
+
+    // Initialize stats
+    let mut num_deltas = 0;
+    let mut num_images = 0;
+
+    let mut svg = String::new();
+
+    // Draw
+    writeln!(svg,
+        "{}",
+        BeginSvg {
+            w: key_map.max(),
+            h: lsn_map.max(),
+        }
+    )?;
+    let lsn_max = lsn_map.max();
+
+    // Sort the files by LSN, but so that image layers go after all delta layers
+    // The SVG is painted in the order the elements appear, and we want to draw
+    // image layers on top of the delta layers if they overlap
+    let mut files_sorted: Vec<LayerTraceFile> = files.into_values().collect();
+    files_sorted.sort_by(|a, b| {
+        if a.is_image() && !b.is_image() {
+            Ordering::Greater
+        } else if !a.is_image() && b.is_image() {
+            Ordering::Less
+        } else {
+            a.lsn_range.end.cmp(&b.lsn_range.end)
+        }
+    });
+
+    for f in files_sorted {
+        let key_start = key_map.map(f.key_range.start);
+        let key_end = key_map.map(f.key_range.end);
+        let key_diff = key_end - key_start;
+
+        if key_start >= key_end {
+            panic!("Invalid key range {}-{}", key_start, key_end);
+        }
+
+        let lsn_start = lsn_map.map(f.lsn_range.start);
+        let lsn_end = lsn_map.map(f.lsn_range.end);
+
+        // Fill in and thicken rectangle if it's an
+        // image layer so that we can see it.
+        let mut style = Style::default();
+        style.fill = Fill::Color(rgb(0x80, 0x80, 0x80));
+        style.stroke = Stroke::Color(rgb(0, 0, 0), 0.5);
+
+        let y_start = (lsn_max - lsn_start) as f32;
+        let y_end = (lsn_max - lsn_end) as f32;
+
+        let x_margin = 0.25;
+        let y_margin = 0.5;
+
+        match f.lsn_range.start.cmp(&f.lsn_range.end) {
+            Ordering::Less => {
+                num_deltas += 1;
+                write!(svg,
+                       r#"    <rect id="layer_{}" x="{}" y="{}" width="{}" height="{}" ry="{}" style="{}">"#,
+                       f.filename,
+                       key_start as f32 + x_margin,
+                       y_end + y_margin,
+                       key_diff as f32 - x_margin * 2.0,
+                       y_start - y_end - y_margin * 2.0,
+                       1.0, // border_radius,
+                       style.to_string(),
+                )?;
+                write!(svg, "<title>{}<br>{} - {}</title>", f.filename, lsn_end, y_end)?;
+                writeln!(svg, "</rect>")?;
+            }
+            Ordering::Equal => {
+                num_images += 1;
+                //lsn_diff = 0.3;
+                //lsn_offset = -lsn_diff / 2.0;
+                //margin = 0.05;
+                style.fill = Fill::Color(rgb(0x80, 0, 0x80));
+                style.stroke = Stroke::Color(rgb(0x80, 0, 0x80), 3.0);
+                write!(svg,
+                       r#"    <line id="layer_{}" x1="{}" y1="{}" x2="{}" y2="{}" style="{}">"#,
+                       f.filename,
+                       key_start as f32 + x_margin,
+                       y_end,
+                       key_end as f32 - x_margin,
+                       y_end,
+                       style.to_string(),
+                )?;
+                write!(svg, "<title>{}<br>{} - {}</title>", f.filename, lsn_end, y_end)?;
+                writeln!(svg, "</line>")?;
+            }
+            Ordering::Greater => panic!("Invalid lsn range {}-{}", lsn_start, lsn_end),
+        }
+    }
+
+    for (idx, gc) in gc_events.iter().enumerate() {
+        let cutoff_lsn = lsn_map.map(gc.cutoff);
+
+        let mut style = Style::default();
+        style.fill = Fill::None;
+        style.stroke = Stroke::Color(rgb(0xff, 0, 0), 0.5);
+
+        let y = lsn_max - cutoff_lsn;
+        writeln!(svg,
+                 r#"    <line id="gc_{}" x1="{}" y1="{}" x2="{}" y2="{}" style="{}" />"#,
+                 idx,
+                 0,
+                 y,
+                 key_map.max(),
+                 y,
+                 style.to_string(),
+        )?;
+    }
+
+    writeln!(svg, "{}", EndSvg)?;
+
+    let mut layer_events_str = String::new();
+    let mut first = true;
+    for e in layer_events {
+        if !first {
+            writeln!(layer_events_str, ",")?;
+        }
+        write!(layer_events_str,
+                 r#"  {{"time_rel": {}, "filename": "{}", "op": "{}"}}"#,
+                 e.time_rel, e.filename, e.op)?;
+        first = false;
+    }
+    writeln!(layer_events_str)?;
+
+    let mut gc_events_str = String::new();
+    let mut first = true;
+    for e in gc_events {
+        if !first {
+            writeln!(gc_events_str, ",")?;
+        }
+        write!(gc_events_str,
+                 r#"  {{"time_rel": {}, "cutoff_lsn": "{}"}}"#,
+                 e.time_rel, e.cutoff)?;
+        first = false;
+    }
+    writeln!(gc_events_str)?;
+    
+    println!(r#"<!DOCTYPE html>
+<html>
+<head>
+<style>
+/* Keep the slider pinned at top */
+.topbar {{
+  display: block;
+  overflow: hidden;
+  background-color: lightgrey;
+  position: fixed;
+  top: 0;
+  width: 100%;
+/*  width: 500px; */
+}}
+.slidercontainer {{
+  float: left;
+  width: 50%;
+  margin-right: 200px;
+}}
+.slider {{
+  float: left;
+  width: 100%;
+}}
+.legend {{
+  width: 200px;
+  float: right;
+}}
+
+/* Main content */
+.main {{
+  margin-top: 50px; /* Add a top margin to avoid content overlay */
+}}
+</style>
+</head>
+
+  <body onload="init()">
+    <script type="text/javascript">
+
+      var layer_events = [{layer_events_str}]
+      var gc_events = [{gc_events_str}]
+
+      let ticker;
+
+      function init() {{
+          moveSlider({last_time_rel})
+          moveSlider(0)
+          moveSlider(last_slider_pos)
+      }}
+
+      function startAnimation() {{
+          ticker = setInterval(animateStep, 100);
+      }}
+      function stopAnimation() {{
+          clearInterval(ticker);
+      }}
+
+      function animateStep() {{
+          if (last_layer_event < layer_events.length - 1) {{
+              var slider = document.getElementById("time-slider");
+              let prevPos = slider.value
+              let nextEvent = last_layer_event
+              while (nextEvent < layer_events.length - 1) {{
+                  if (layer_events[nextEvent].time_rel > prevPos) {{
+                      break;
+                  }}
+                  nextEvent += 1;
+              }}
+              let nextPos = layer_events[nextEvent].time_rel
+              slider.value = nextPos
+              moveSlider(nextPos)
+          }}
+      }}
+
+      function redoLayerEvent(n, dir) {{
+          var layer = document.getElementById("layer_" + layer_events[n].filename);
+          switch (layer_events[n].op) {{
+              case "evict":
+                  break;
+              case "flush":
+                  layer.style.visibility = "visible";
+                  break;
+              case "compact_create":
+                  layer.style.visibility = "visible";
+                  break;
+              case "image_create":
+                  layer.style.visibility = "visible";
+                  break;
+              case "compact_delete":
+                  layer.style.visibility = "hidden";
+                  break;
+              case "gc_delete":
+                  layer.style.visibility = "hidden";
+                  break;
+              case "gc_start":
+                  layer.style.visibility = "hidden";
+                  break;
+          }}
+      }}
+      function undoLayerEvent(n) {{
+          var layer = document.getElementById("layer_" + layer_events[n].filename);
+          switch (layer_events[n].op) {{
+              case "evict":
+                  break;
+              case "flush":
+                  layer.style.visibility = "hidden";
+                  break;
+              case "compact_create":
+                  layer.style.visibility = "hidden";
+                  break;
+              case "image_create":
+                  layer.style.visibility = "hidden";
+                  break;
+              case "compact_delete":
+                  layer.style.visibility = "visible";
+                  break;
+              case "gc_delete":
+                  layer.style.visibility = "visible";
+                  break;
+          }}
+      }}
+
+      function redoGcEvent(n) {{
+          var prev_gc_bar = document.getElementById("gc_" + (n - 1));
+          var new_gc_bar = document.getElementById("gc_" + n);
+
+          prev_gc_bar.style.visibility = "hidden"
+          new_gc_bar.style.visibility = "visible"
+      }}
+      function undoGcEvent(n) {{
+          var prev_gc_bar = document.getElementById("gc_" + n);
+          var new_gc_bar = document.getElementById("gc_" + (n - 1));
+
+          prev_gc_bar.style.visibility = "hidden"
+          new_gc_bar.style.visibility = "visible"
+      }}
+
+      var last_slider_pos = 0
+      var last_layer_event = 0
+      var last_gc_event = 0
+
+      var moveSlider = function(new_pos) {{
+          if (new_pos > last_slider_pos) {{
+              while (last_layer_event < layer_events.length - 1) {{
+                  if (layer_events[last_layer_event + 1].time_rel > new_pos) {{
+                      break;
+                  }}
+                  last_layer_event += 1;
+                  redoLayerEvent(last_layer_event)
+              }}
+
+              while (last_gc_event < gc_events.length - 1) {{
+                  if (gc_events[last_gc_event + 1].time_rel > new_pos) {{
+                      break;
+                  }}
+                  last_gc_event += 1;
+                  redoGcEvent(last_gc_event)
+              }}
+
+          }}
+          if (new_pos < last_slider_pos) {{
+              while (last_layer_event > 0) {{
+                  if (layer_events[last_layer_event - 1].time_rel < new_pos) {{
+                      break;
+                  }}
+                  undoLayerEvent(last_layer_event)
+                  last_layer_event -= 1;
+              }}
+              while (last_gc_event > 0) {{
+                  if (gc_events[last_gc_event - 1].time_rel < new_pos) {{
+                      break;
+                  }}
+                  undoGcEvent(last_gc_event)
+                  last_gc_event -= 1;
+              }}
+          }}
+          last_slider_pos = new_pos;
+          document.getElementById("debug_pos").textContent=new_pos;
+          document.getElementById("debug_layer_event").textContent=last_layer_event + " " + layer_events[last_layer_event].time_rel + " " + layer_events[last_layer_event].op;
+          document.getElementById("debug_gc_event").textContent=last_gc_event + " " + gc_events[last_gc_event].time_rel;
+      }}
+    </script>
+
+    <div class="topbar">
+      <div class="slidercontainer">
+        <label for="time-slider">TIME</label>:
+        <input id="time-slider" class="slider" type="range" min="0" max="{last_time_rel}" value="0" oninput="moveSlider(this.value)"><br>
+
+        pos: <span id="debug_pos"></span><br>
+        event: <span id="debug_layer_event"></span><br>
+        gc: <span id="debug_gc_event"></span><br>
+      </div>
+
+      <button onclick="startAnimation()">Play</button> 
+      <button onclick="stopAnimation()">Stop</button> 
+
+      <svg class="legend">
+        <rect x=5 y=0 width=20 height=20 style="fill:rgb(128,128,128);stroke:rgb(0,0,0);stroke-width:0.5;fill-opacity:1;stroke-opacity:1;"/>
+        <line x1=5 y1=30 x2=25 y2=30 style="fill:rgb(128,0,128);stroke:rgb(128,0,128);stroke-width:3;fill-opacity:1;stroke-opacity:1;"/>
+        <line x1=0 y1=40 x2=30 y2=40 style="fill:none;stroke:rgb(255,0,0);stroke-width:0.5;fill-opacity:1;stroke-opacity:1;"/>
+      </svg>
+    </div>
+
+    <div class="main">
+{svg}
+    </div>
+  </body>
+</html>
+"#);
+
+    eprintln!("num_images: {}", num_images);
+    eprintln!("num_deltas: {}", num_deltas);
+
+    Ok(())
+}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -8,7 +8,6 @@ use anyhow::{anyhow, Context};
 use clap::{Arg, ArgAction, Command};
 use fail::FailScenario;
 use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
-use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use remote_storage::GenericRemoteStorage;
 use tracing::*;

@@ -25,9 +24,11 @@ use pageserver::{
    virtual_file,
 };
 use postgres_backend::AuthType;
-use utils::signals::ShutdownSignals;
 use utils::{
-    auth::JwtAuth, logging, project_git_version, sentry_init::init_sentry, signals::Signal,
+    auth::JwtAuth,
+    logging, project_git_version,
+    sentry_init::init_sentry,
+    signals::{self, Signal},
    tcp_listener,
 };

@@ -262,6 +263,9 @@ fn start_pageserver(
    info!("Starting pageserver pg protocol handler on {pg_addr}");
    let pageserver_listener = tcp_listener::bind(pg_addr)?;

+    // Install signal handlers
+    let signals = signals::install_shutdown_handlers()?;
+
    // Launch broker client
    WALRECEIVER_RUNTIME.block_on(pageserver::broker_client::init_broker_client(conf))?;

@@ -315,34 +319,14 @@ fn start_pageserver(
    // Scan the local 'tenants/' directory and start loading the tenants
    BACKGROUND_RUNTIME.block_on(mgr::init_tenant_mgr(conf, remote_storage.clone()))?;

-    // shared state between the disk-usage backed eviction background task and the http endpoint
-    // that allows triggering disk-usage based eviction manually. note that the http endpoint
-    // is still accessible even if background task is not configured as long as remote storage has
-    // been configured.
-    let disk_usage_eviction_state: Arc<disk_usage_eviction_task::State> = Arc::default();
-
-    if let Some(remote_storage) = &remote_storage {
-        launch_disk_usage_global_eviction_task(
-            conf,
-            remote_storage.clone(),
-            disk_usage_eviction_state.clone(),
-        )?;
-    }
-
    // Start up the service to handle HTTP mgmt API request. We created the
    // listener earlier already.
    {
        let _rt_guard = MGMT_REQUEST_RUNTIME.enter();

-        let router = http::make_router(
-            conf,
-            launch_ts,
-            http_auth,
-            remote_storage,
-            disk_usage_eviction_state,
-        )?
-        .build()
-        .map_err(|err| anyhow!(err))?;
+        let router = http::make_router(conf, launch_ts, http_auth, remote_storage)?
+            .build()
+            .map_err(|err| anyhow!(err))?;
        let service = utils::http::RouterService::new(router).unwrap();
        let server = hyper::Server::from_tcp(http_listener)?
            .serve(service)
@@ -425,7 +409,7 @@ fn start_pageserver(
    }

    // All started up! Now just sit and wait for shutdown signal.
-    ShutdownSignals::handle(|signal| match signal {
+    signals.handle(|signal| match signal {
        Signal::Quit => {
            info!(
                "Got {}. Terminating in immediate shutdown mode",
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -27,7 +27,6 @@ use utils::{
    logging::LogFormat,
 };

-use crate::disk_usage_eviction_task::DiskUsageEvictionTaskConfig;
 use crate::tenant::config::TenantConf;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::{TENANT_ATTACHING_MARKER_FILENAME, TIMELINES_SEGMENT_NAME};
@@ -93,8 +92,6 @@ pub mod defaults {

 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'

-#disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}}
-
 # [tenant_config]
 #checkpoint_distance = {DEFAULT_CHECKPOINT_DISTANCE} # in bytes
 #checkpoint_timeout = {DEFAULT_CHECKPOINT_TIMEOUT}
@@ -107,8 +104,6 @@ pub mod defaults {
 #image_creation_threshold = {DEFAULT_IMAGE_CREATION_THRESHOLD}
 #pitr_interval = '{DEFAULT_PITR_INTERVAL}'

-#min_resident_size_override = .. # in bytes
-
 # [remote_storage]

 "###
@@ -170,10 +165,6 @@ pub struct PageServerConf {

    /// Number of concurrent [`Tenant::gather_size_inputs`] allowed.
    pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
-    /// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
-    /// The number of permits is the same as `concurrent_tenant_size_logical_size_queries`.
-    /// See the comment in `eviction_task` for details.
-    pub eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore,

    // How often to collect metrics and send them to the metrics endpoint.
    pub metric_collection_interval: Duration,
@@ -185,8 +176,6 @@ pub struct PageServerConf {
    // See the corresponding metric's help string.
    pub evictions_low_residence_duration_metric_threshold: Duration,

-    pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,
-
    pub test_remote_failures: u64,

    pub ondemand_download_behavior_treat_error_as_warn: bool,
@@ -250,7 +239,7 @@ struct PageServerConfigBuilder {

    log_format: BuilderValue<LogFormat>,

-    concurrent_tenant_size_logical_size_queries: BuilderValue<NonZeroUsize>,
+    concurrent_tenant_size_logical_size_queries: BuilderValue<ConfigurableSemaphore>,

    metric_collection_interval: BuilderValue<Duration>,
    cached_metric_collection_interval: BuilderValue<Duration>,
@@ -259,8 +248,6 @@ struct PageServerConfigBuilder {

    evictions_low_residence_duration_metric_threshold: BuilderValue<Duration>,

-    disk_usage_based_eviction: BuilderValue<Option<DiskUsageEvictionTaskConfig>>,
-
    test_remote_failures: BuilderValue<u64>,

    ondemand_download_behavior_treat_error_as_warn: BuilderValue<bool>,
@@ -299,9 +286,7 @@ impl Default for PageServerConfigBuilder {
            .expect("cannot parse default keepalive interval")),
            log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),

-            concurrent_tenant_size_logical_size_queries: Set(
-                ConfigurableSemaphore::DEFAULT_INITIAL,
-            ),
+            concurrent_tenant_size_logical_size_queries: Set(ConfigurableSemaphore::default()),
            metric_collection_interval: Set(humantime::parse_duration(
                DEFAULT_METRIC_COLLECTION_INTERVAL,
            )
@@ -321,8 +306,6 @@ impl Default for PageServerConfigBuilder {
            )
            .expect("cannot parse DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD")),

-            disk_usage_based_eviction: Set(None),
-
            test_remote_failures: Set(0),

            ondemand_download_behavior_treat_error_as_warn: Set(false),
@@ -406,7 +389,7 @@ impl PageServerConfigBuilder {
        self.log_format = BuilderValue::Set(log_format)
    }

-    pub fn concurrent_tenant_size_logical_size_queries(&mut self, u: NonZeroUsize) {
+    pub fn concurrent_tenant_size_logical_size_queries(&mut self, u: ConfigurableSemaphore) {
        self.concurrent_tenant_size_logical_size_queries = BuilderValue::Set(u);
    }

@@ -442,10 +425,6 @@ impl PageServerConfigBuilder {
        self.evictions_low_residence_duration_metric_threshold = BuilderValue::Set(value);
    }

-    pub fn disk_usage_based_eviction(&mut self, value: Option<DiskUsageEvictionTaskConfig>) {
-        self.disk_usage_based_eviction = BuilderValue::Set(value);
-    }
-
    pub fn ondemand_download_behavior_treat_error_as_warn(
        &mut self,
        ondemand_download_behavior_treat_error_as_warn: bool,
@@ -455,11 +434,6 @@ impl PageServerConfigBuilder {
    }

    pub fn build(self) -> anyhow::Result<PageServerConf> {
-        let concurrent_tenant_size_logical_size_queries = self
-            .concurrent_tenant_size_logical_size_queries
-            .ok_or(anyhow!(
-                "missing concurrent_tenant_size_logical_size_queries"
-            ))?;
        Ok(PageServerConf {
            listen_pg_addr: self
                .listen_pg_addr
@@ -507,12 +481,11 @@ impl PageServerConfigBuilder {
                .broker_keepalive_interval
                .ok_or(anyhow!("No broker keepalive interval provided"))?,
            log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
-            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(
-                concurrent_tenant_size_logical_size_queries,
-            ),
-            eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::new(
-                concurrent_tenant_size_logical_size_queries,
-            ),
+            concurrent_tenant_size_logical_size_queries: self
+                .concurrent_tenant_size_logical_size_queries
+                .ok_or(anyhow!(
+                    "missing concurrent_tenant_size_logical_size_queries"
+                ))?,
            metric_collection_interval: self
                .metric_collection_interval
                .ok_or(anyhow!("missing metric_collection_interval"))?,
@@ -530,9 +503,6 @@ impl PageServerConfigBuilder {
                .ok_or(anyhow!(
                    "missing evictions_low_residence_duration_metric_threshold"
                ))?,
-            disk_usage_based_eviction: self
-                .disk_usage_based_eviction
-                .ok_or(anyhow!("missing disk_usage_based_eviction"))?,
            test_remote_failures: self
                .test_remote_failures
                .ok_or(anyhow!("missing test_remote_failuers"))?,
@@ -710,7 +680,8 @@ impl PageServerConf {
                "concurrent_tenant_size_logical_size_queries" => builder.concurrent_tenant_size_logical_size_queries({
                    let input = parse_toml_string(key, item)?;
                    let permits = input.parse::<usize>().context("expected a number of initial permits, not {s:?}")?;
-                    NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?
+                    let permits = NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?;
+                    ConfigurableSemaphore::new(permits)
                }),
                "metric_collection_interval" => builder.metric_collection_interval(parse_toml_duration(key, item)?),
                "cached_metric_collection_interval" => builder.cached_metric_collection_interval(parse_toml_duration(key, item)?),
@@ -722,12 +693,6 @@ impl PageServerConf {
                    builder.synthetic_size_calculation_interval(parse_toml_duration(key, item)?),
                "test_remote_failures" => builder.test_remote_failures(parse_toml_u64(key, item)?),
                "evictions_low_residence_duration_metric_threshold" => builder.evictions_low_residence_duration_metric_threshold(parse_toml_duration(key, item)?),
-                "disk_usage_based_eviction" => {
-                    tracing::info!("disk_usage_based_eviction: {:#?}", &item);
-                    builder.disk_usage_based_eviction(
-                    toml_edit::de::from_item(item.clone())
-                    .context("parse disk_usage_based_eviction")?)
-                },
                "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?),
                _ => bail!("unrecognized pageserver option '{key}'"),
            }
@@ -832,13 +797,6 @@ impl PageServerConf {
            );
        }

-        if let Some(item) = item.get("min_resident_size_override") {
-            t_conf.min_resident_size_override = Some(
-                toml_edit::de::from_item(item.clone())
-                    .context("parse min_resident_size_override")?,
-            );
-        }
-
        Ok(t_conf)
    }

@@ -871,8 +829,6 @@ impl PageServerConf {
            broker_keepalive_interval: Duration::from_secs(5000),
            log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
-            eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::default(
-            ),
            metric_collection_interval: Duration::from_secs(60),
            cached_metric_collection_interval: Duration::from_secs(60 * 60),
            metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
@@ -881,7 +837,6 @@ impl PageServerConf {
                defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
            )
            .unwrap(),
-            disk_usage_based_eviction: None,
            test_remote_failures: 0,
            ondemand_download_behavior_treat_error_as_warn: false,
        }
@@ -966,11 +921,6 @@ impl ConfigurableSemaphore {
            inner: std::sync::Arc::new(tokio::sync::Semaphore::new(initial_permits.get())),
        }
    }
-
-    /// Returns the configured amount of permits.
-    pub fn initial_permits(&self) -> NonZeroUsize {
-        self.initial_permits
-    }
 }

 impl Default for ConfigurableSemaphore {
@@ -1075,8 +1025,6 @@ log_format = 'json'
                )?,
                log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
                concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
-                eviction_task_immitated_concurrent_logical_size_queries:
-                    ConfigurableSemaphore::default(),
                metric_collection_interval: humantime::parse_duration(
                    defaults::DEFAULT_METRIC_COLLECTION_INTERVAL
                )?,
@@ -1090,7 +1038,6 @@ log_format = 'json'
                evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
                    defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD
                )?,
-                disk_usage_based_eviction: None,
                test_remote_failures: 0,
                ondemand_download_behavior_treat_error_as_warn: false,
            },
@@ -1138,14 +1085,11 @@ log_format = 'json'
                broker_keepalive_interval: Duration::from_secs(5),
                log_format: LogFormat::Json,
                concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
-                eviction_task_immitated_concurrent_logical_size_queries:
-                    ConfigurableSemaphore::default(),
                metric_collection_interval: Duration::from_secs(222),
                cached_metric_collection_interval: Duration::from_secs(22200),
                metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
                synthetic_size_calculation_interval: Duration::from_secs(333),
                evictions_low_residence_duration_metric_threshold: Duration::from_secs(444),
-                disk_usage_based_eviction: None,
                test_remote_failures: 0,
                ondemand_download_behavior_treat_error_as_warn: false,
            },
@@ -1272,7 +1216,6 @@ broker_endpoint = '{broker_endpoint}'
                        prefix_in_bucket: Some(prefix_in_bucket.clone()),
                        endpoint: Some(endpoint.clone()),
                        concurrency_limit: s3_concurrency_limit,
-                        max_keys_per_list_response: None,
                    }),
                },
                "Remote storage config should correctly parse the S3 config"
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -1,728 +0,0 @@
-//! This module implements the pageserver-global disk-usage-based layer eviction task.
-//!
-//! # Mechanics
-//!
-//! Function `launch_disk_usage_global_eviction_task` starts a pageserver-global background
-//! loop that evicts layers in response to a shortage of available bytes
-//! in the $repo/tenants directory's filesystem.
-//!
-//! The loop runs periodically at a configurable `period`.
-//!
-//! Each loop iteration uses `statvfs` to determine filesystem-level space usage.
-//! It compares the returned usage data against two different types of thresholds.
-//! The iteration tries to evict layers until app-internal accounting says we should be below the thresholds.
-//! We cross-check this internal accounting with the real world by making another `statvfs` at the end of the iteration.
-//! We're good if that second statvfs shows that we're _actually_ below the configured thresholds.
-//! If we're still above one or more thresholds, we emit a warning log message, leaving it to the operator to investigate further.
-//!
-//! # Eviction Policy
-//!
-//! There are two thresholds:
-//! `max_usage_pct` is the relative available space, expressed in percent of the total filesystem space.
-//! If the actual usage is higher, the threshold is exceeded.
-//! `min_avail_bytes` is the absolute available space in bytes.
-//! If the actual usage is lower, the threshold is exceeded.
-//! If either of these thresholds is exceeded, the system is considered to have "disk pressure", and eviction
-//! is performed on the next iteration, to release disk space and bring the usage below the thresholds again.
-//! The iteration evicts layers in LRU fashion, but, with a weak reservation per tenant.
-//! The reservation is to keep the most recently accessed X bytes per tenant resident.
-//! If we cannot relieve pressure by evicting layers outside of the reservation, we
-//! start evicting layers that are part of the reservation, LRU first.
-//!
-//! The value for the per-tenant reservation is referred to as `tenant_min_resident_size`
-//! throughout the code, but, no actual variable carries that name.
-//! The per-tenant default value is the `max(tenant's layer file sizes, regardless of local or remote)`.
-//! The idea is to allow at least one layer to be resident per tenant, to ensure it can make forward progress
-//! during page reconstruction.
-//! An alternative default for all tenants can be specified in the `tenant_config` section of the config.
-//! Lastly, each tenant can have an override in their respective tenant config (`min_resident_size_override`).
-
-// Implementation notes:
-// - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl
-//   reading these fields. We use the Debug impl for semi-structured logging, though.
-
-use std::{
-    collections::HashMap,
-    path::Path,
-    sync::Arc,
-    time::{Duration, SystemTime},
-};
-
-use anyhow::Context;
-use remote_storage::GenericRemoteStorage;
-use serde::{Deserialize, Serialize};
-use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, instrument, warn, Instrument};
-use utils::serde_percent::Percent;
-
-use crate::{
-    config::PageServerConf,
-    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
-    tenant::{self, storage_layer::PersistentLayer, Timeline},
-};
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
-pub struct DiskUsageEvictionTaskConfig {
-    pub max_usage_pct: Percent,
-    pub min_avail_bytes: u64,
-    #[serde(with = "humantime_serde")]
-    pub period: Duration,
-    #[cfg(feature = "testing")]
-    pub mock_statvfs: Option<crate::statvfs::mock::Behavior>,
-}
-
-#[derive(Default)]
-pub struct State {
-    /// Exclude http requests and background task from running at the same time.
-    mutex: tokio::sync::Mutex<()>,
-}
-
-pub fn launch_disk_usage_global_eviction_task(
-    conf: &'static PageServerConf,
-    storage: GenericRemoteStorage,
-    state: Arc<State>,
-) -> anyhow::Result<()> {
-    let Some(task_config) = &conf.disk_usage_based_eviction else {
-        info!("disk usage based eviction task not configured");
-        return Ok(());
-    };
-
-    info!("launching disk usage based eviction task");
-
-    task_mgr::spawn(
-        BACKGROUND_RUNTIME.handle(),
-        TaskKind::DiskUsageEviction,
-        None,
-        None,
-        "disk usage based eviction",
-        false,
-        async move {
-            disk_usage_eviction_task(
-                &state,
-                task_config,
-                storage,
-                &conf.tenants_path(),
-                task_mgr::shutdown_token(),
-            )
-            .await;
-            info!("disk usage based eviction task finishing");
-            Ok(())
-        },
-    );
-
-    Ok(())
-}
-
-#[instrument(skip_all)]
-async fn disk_usage_eviction_task(
-    state: &State,
-    task_config: &DiskUsageEvictionTaskConfig,
-    storage: GenericRemoteStorage,
-    tenants_dir: &Path,
-    cancel: CancellationToken,
-) {
-    use crate::tenant::tasks::random_init_delay;
-    {
-        if random_init_delay(task_config.period, &cancel)
-            .await
-            .is_err()
-        {
-            info!("shutting down");
-            return;
-        }
-    }
-
-    let mut iteration_no = 0;
-    loop {
-        iteration_no += 1;
-        let start = Instant::now();
-
-        async {
-            let res = disk_usage_eviction_task_iteration(
-                state,
-                task_config,
-                &storage,
-                tenants_dir,
-                &cancel,
-            )
-            .await;
-
-            match res {
-                Ok(()) => {}
-                Err(e) => {
-                    // these stat failures are expected to be very rare
-                    warn!("iteration failed, unexpected error: {e:#}");
-                }
-            }
-        }
-        .instrument(tracing::info_span!("iteration", iteration_no))
-        .await;
-
-        let sleep_until = start + task_config.period;
-        tokio::select! {
-            _ = tokio::time::sleep_until(sleep_until) => {},
-            _ = cancel.cancelled() => {
-                info!("shutting down");
-                break
-            }
-        }
-    }
-}
-
-pub trait Usage: Clone + Copy + std::fmt::Debug {
-    fn has_pressure(&self) -> bool;
-    fn add_available_bytes(&mut self, bytes: u64);
-}
-
-async fn disk_usage_eviction_task_iteration(
-    state: &State,
-    task_config: &DiskUsageEvictionTaskConfig,
-    storage: &GenericRemoteStorage,
-    tenants_dir: &Path,
-    cancel: &CancellationToken,
-) -> anyhow::Result<()> {
-    let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
-        .context("get filesystem-level disk usage before evictions")?;
-    let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
-    match res {
-        Ok(outcome) => {
-            debug!(?outcome, "disk_usage_eviction_iteration finished");
-            match outcome {
-                IterationOutcome::NoPressure | IterationOutcome::Cancelled => {
-                    // nothing to do, select statement below will handle things
-                }
-                IterationOutcome::Finished(outcome) => {
-                    // Verify with statvfs whether we made any real progress
-                    let after = filesystem_level_usage::get(tenants_dir, task_config)
-                        // It's quite unlikely to hit the error here. Keep the code simple and bail out.
-                        .context("get filesystem-level disk usage after evictions")?;
-
-                    debug!(?after, "disk usage");
-
-                    if after.has_pressure() {
-                        // Don't bother doing an out-of-order iteration here now.
-                        // In practice, the task period is set to a value in the tens-of-seconds range,
-                        // which will cause another iteration to happen soon enough.
-                        // TODO: deltas between the three different usages would be helpful,
-                        // consider MiB, GiB, TiB
-                        warn!(?outcome, ?after, "disk usage still high");
-                    } else {
-                        info!(?outcome, ?after, "disk usage pressure relieved");
-                    }
-                }
-            }
-        }
-        Err(e) => {
-            error!("disk_usage_eviction_iteration failed: {:#}", e);
-        }
-    }
-
-    Ok(())
-}
-
-#[derive(Debug, Serialize)]
-#[allow(clippy::large_enum_variant)]
-pub enum IterationOutcome<U> {
-    NoPressure,
-    Cancelled,
-    Finished(IterationOutcomeFinished<U>),
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Serialize)]
-pub struct IterationOutcomeFinished<U> {
-    /// The actual usage observed before we started the iteration.
-    before: U,
-    /// The expected value for `after`, according to internal accounting, after phase 1.
-    planned: PlannedUsage<U>,
-    /// The outcome of phase 2, where we actually do the evictions.
-    ///
-    /// If all layers that phase 1 planned to evict _can_ actually get evicted, this will
-    /// be the same as `planned`.
-    assumed: AssumedUsage<U>,
-}
-
-#[derive(Debug, Serialize)]
-#[allow(dead_code)]
-struct AssumedUsage<U> {
-    /// The expected value for `after`, after phase 2.
-    projected_after: U,
-    /// The layers we failed to evict during phase 2.
-    failed: LayerCount,
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Serialize)]
-struct PlannedUsage<U> {
-    respecting_tenant_min_resident_size: U,
-    fallback_to_global_lru: Option<U>,
-}
-
-#[allow(dead_code)]
-#[derive(Debug, Default, Serialize)]
-struct LayerCount {
-    file_sizes: u64,
-    count: usize,
-}
-
-pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
-    state: &State,
-    storage: &GenericRemoteStorage,
-    usage_pre: U,
-    cancel: &CancellationToken,
-) -> anyhow::Result<IterationOutcome<U>> {
-    // use tokio's mutex to get a Sync guard (instead of std::sync::Mutex)
-    let _g = state
-        .mutex
-        .try_lock()
-        .map_err(|_| anyhow::anyhow!("iteration is already executing"))?;
-
-    debug!(?usage_pre, "disk usage");
-
-    if !usage_pre.has_pressure() {
-        return Ok(IterationOutcome::NoPressure);
-    }
-
-    warn!(
-        ?usage_pre,
-        "running disk usage based eviction due to pressure"
-    );
-
-    let candidates = match collect_eviction_candidates(cancel).await? {
-        EvictionCandidates::Cancelled => {
-            return Ok(IterationOutcome::Cancelled);
-        }
-        EvictionCandidates::Finished(partitioned) => partitioned,
-    };
-
-    // Debug-log the list of candidates
-    let now = SystemTime::now();
-    for (i, (partition, candidate)) in candidates.iter().enumerate() {
-        debug!(
-            "cand {}/{}: size={}, no_access_for={}us, parition={:?}, tenant={} timeline={} layer={}",
-            i + 1,
-            candidates.len(),
-            candidate.layer.file_size(),
-            now.duration_since(candidate.last_activity_ts)
-                .unwrap()
-                .as_micros(),
-            partition,
-            candidate.layer.get_tenant_id(),
-            candidate.layer.get_timeline_id(),
-            candidate.layer.filename().file_name(),
-        );
-    }
-
-    // phase1: select victims to relieve pressure
-    //
-    // Walk through the list of candidates, until we have accumulated enough layers to get
-    // us back under the pressure threshold. 'usage_planned' is updated so that it tracks
-    // how much disk space would be used after evicting all the layers up to the current
-    // point in the list. The layers are collected in 'batched', grouped per timeline.
-    //
-    // If we get far enough in the list that we start to evict layers that are below
-    // the tenant's min-resident-size threshold, print a warning, and memorize the disk
-    // usage at that point, in 'usage_planned_min_resident_size_respecting'.
-    let mut batched: HashMap<_, Vec<Arc<dyn PersistentLayer>>> = HashMap::new();
-    let mut warned = None;
-    let mut usage_planned = usage_pre;
-    for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
-        if !usage_planned.has_pressure() {
-            debug!(
-                no_candidates_evicted = i,
-                "took enough candidates for pressure to be relieved"
-            );
-            break;
-        }
-
-        if partition == MinResidentSizePartition::Below && warned.is_none() {
-            warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
-            warned = Some(usage_planned);
-        }
-
-        usage_planned.add_available_bytes(candidate.layer.file_size());
-
-        batched
-            .entry(TimelineKey(candidate.timeline))
-            .or_default()
-            .push(candidate.layer);
-    }
-
-    let usage_planned = match warned {
-        Some(respecting_tenant_min_resident_size) => PlannedUsage {
-            respecting_tenant_min_resident_size,
-            fallback_to_global_lru: Some(usage_planned),
-        },
-        None => PlannedUsage {
-            respecting_tenant_min_resident_size: usage_planned,
-            fallback_to_global_lru: None,
-        },
-    };
-    debug!(?usage_planned, "usage planned");
-
-    // phase2: evict victims batched by timeline
-
-    // After the loop, `usage_assumed` is the post-eviction usage,
-    // according to internal accounting.
-    let mut usage_assumed = usage_pre;
-    let mut evictions_failed = LayerCount::default();
-    for (timeline, batch) in batched {
-        let tenant_id = timeline.tenant_id;
-        let timeline_id = timeline.timeline_id;
-        let batch_size = batch.len();
-
-        debug!(%timeline_id, "evicting batch for timeline");
-
-        async {
-            let results = timeline.evict_layers(storage, &batch, cancel.clone()).await;
-
-            match results {
-                Err(e) => {
-                    warn!("failed to evict batch: {:#}", e);
-                }
-                Ok(results) => {
-                    assert_eq!(results.len(), batch.len());
-                    for (result, layer) in results.into_iter().zip(batch.iter()) {
-                        match result {
-                            Some(Ok(true)) => {
-                                usage_assumed.add_available_bytes(layer.file_size());
-                            }
-                            Some(Ok(false)) => {
-                                // this is:
-                                // - Replacement::{NotFound, Unexpected}
-                                // - it cannot be is_remote_layer, filtered already
-                                evictions_failed.file_sizes += layer.file_size();
-                                evictions_failed.count += 1;
-                            }
-                            None => {
-                                assert!(cancel.is_cancelled());
-                                return;
-                            }
-                            Some(Err(e)) => {
-                                // we really shouldn't be getting this, precondition failure
-                                error!("failed to evict layer: {:#}", e);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        .instrument(tracing::info_span!("evict_batch", %tenant_id, %timeline_id, batch_size))
-        .await;
-
-        if cancel.is_cancelled() {
-            return Ok(IterationOutcome::Cancelled);
-        }
-    }
-
-    Ok(IterationOutcome::Finished(IterationOutcomeFinished {
-        before: usage_pre,
-        planned: usage_planned,
-        assumed: AssumedUsage {
-            projected_after: usage_assumed,
-            failed: evictions_failed,
-        },
-    }))
-}
-
-#[derive(Clone)]
-struct EvictionCandidate {
-    timeline: Arc<Timeline>,
-    layer: Arc<dyn PersistentLayer>,
-    last_activity_ts: SystemTime,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
-enum MinResidentSizePartition {
-    Above,
-    Below,
-}
-
-enum EvictionCandidates {
-    Cancelled,
-    Finished(Vec<(MinResidentSizePartition, EvictionCandidate)>),
-}
-
-/// Gather the eviction candidates.
-///
-/// The returned `Ok(EvictionCandidates::Finished(candidates))` is sorted in eviction
-/// order. A caller that evicts in that order, until pressure is relieved, implements
-/// the eviction policy outlined in the module comment.
-///
-/// # Example
-///
-/// Imagine that there are two tenants, A and B, with five layers each, a-e.
-/// Each layer has size 100, and both tenant's min_resident_size is 150.
-/// The eviction order would be
-///
-/// ```text
-/// partition last_activity_ts    tenant/layer
-/// Above     18:30               A/c
-/// Above     19:00               A/b
-/// Above     18:29               B/c
-/// Above     19:05               B/b
-/// Above     20:00               B/a
-/// Above     20:03               A/a
-/// Below     20:30               A/d
-/// Below     20:40               B/d
-/// Below     20:45               B/e
-/// Below     20:58               A/e
-/// ```
-///
-/// Now, if we need to evict 300 bytes to relieve pressure, we'd evict `A/c, A/b, B/c`.
-/// They are all in the `Above` partition, so, we respected each tenant's min_resident_size.
-///
-/// But, if we need to evict 900 bytes to relieve pressure, we'd evict
-/// `A/c, A/b, B/c, B/b, B/a, A/a, A/d, B/d, B/e`, reaching into the `Below` partition
-/// after exhauting the `Above` partition.
-/// So, we did not respect each tenant's min_resident_size.
-async fn collect_eviction_candidates(
-    cancel: &CancellationToken,
-) -> anyhow::Result<EvictionCandidates> {
-    // get a snapshot of the list of tenants
-    let tenants = tenant::mgr::list_tenants()
-        .await
-        .context("get list of tenants")?;
-
-    let mut candidates = Vec::new();
-
-    for (tenant_id, _state) in &tenants {
-        if cancel.is_cancelled() {
-            return Ok(EvictionCandidates::Cancelled);
-        }
-        let tenant = match tenant::mgr::get_tenant(*tenant_id, true).await {
-            Ok(tenant) => tenant,
-            Err(e) => {
-                // this can happen if tenant has lifecycle transition after we fetched it
-                debug!("failed to get tenant: {e:#}");
-                continue;
-            }
-        };
-
-        // collect layers from all timelines in this tenant
-        //
-        // If one of the timelines becomes `!is_active()` during the iteration,
-        // for example because we're shutting down, then `max_layer_size` can be too small.
-        // That's OK. This code only runs under a disk pressure situation, and being
-        // a little unfair to tenants during shutdown in such a situation is tolerable.
-        let mut tenant_candidates = Vec::new();
-        let mut max_layer_size = 0;
-        for tl in tenant.list_timelines() {
-            if !tl.is_active() {
-                continue;
-            }
-            let info = tl.get_local_layers_for_disk_usage_eviction();
-            debug!(tenant_id=%tl.tenant_id, timeline_id=%tl.timeline_id, "timeline resident layers count: {}", info.resident_layers.len());
-            tenant_candidates.extend(
-                info.resident_layers
-                    .into_iter()
-                    .map(|layer_infos| (tl.clone(), layer_infos)),
-            );
-            max_layer_size = max_layer_size.max(info.max_layer_size.unwrap_or(0));
-
-            if cancel.is_cancelled() {
-                return Ok(EvictionCandidates::Cancelled);
-            }
-        }
-
-        // `min_resident_size` defaults to maximum layer file size of the tenant.
-        // This ensures that each tenant can have at least one layer resident at a given time,
-        // ensuring forward progress for a single Timeline::get in that tenant.
-        // It's a questionable heuristic since, usually, there are many Timeline::get
-        // requests going on for a tenant, and, at least in Neon prod, the median
-        // layer file size is much smaller than the compaction target size.
-        // We could be better here, e.g., sum of all L0 layers + most recent L1 layer.
-        // That's what's typically used by the various background loops.
-        //
-        // The default can be overriden with a fixed value in the tenant conf.
-        // A default override can be put in the default tenant conf in the pageserver.toml.
-        let min_resident_size = if let Some(s) = tenant.get_min_resident_size_override() {
-            debug!(
-                tenant_id=%tenant.tenant_id(),
-                overriden_size=s,
-                "using overridden min resident size for tenant"
-            );
-            s
-        } else {
-            debug!(
-                tenant_id=%tenant.tenant_id(),
-                max_layer_size,
-                "using max layer size as min_resident_size for tenant",
-            );
-            max_layer_size
-        };
-
-        // Sort layers most-recently-used first, then partition by
-        // cumsum above/below min_resident_size.
-        tenant_candidates
-            .sort_unstable_by_key(|(_, layer_info)| std::cmp::Reverse(layer_info.last_activity_ts));
-        let mut cumsum: i128 = 0;
-        for (timeline, layer_info) in tenant_candidates.into_iter() {
-            let file_size = layer_info.file_size();
-            let candidate = EvictionCandidate {
-                timeline,
-                last_activity_ts: layer_info.last_activity_ts,
-                layer: layer_info.layer,
-            };
-            let partition = if cumsum > min_resident_size as i128 {
-                MinResidentSizePartition::Above
-            } else {
-                MinResidentSizePartition::Below
-            };
-            candidates.push((partition, candidate));
-            cumsum += i128::from(file_size);
-        }
-    }
-
-    debug_assert!(MinResidentSizePartition::Above < MinResidentSizePartition::Below,
-        "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first");
-    candidates
-        .sort_unstable_by_key(|(partition, candidate)| (*partition, candidate.last_activity_ts));
-
-    Ok(EvictionCandidates::Finished(candidates))
-}
-
-struct TimelineKey(Arc<Timeline>);
-
-impl PartialEq for TimelineKey {
-    fn eq(&self, other: &Self) -> bool {
-        Arc::ptr_eq(&self.0, &other.0)
-    }
-}
-
-impl Eq for TimelineKey {}
-
-impl std::hash::Hash for TimelineKey {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        Arc::as_ptr(&self.0).hash(state);
-    }
-}
-
-impl std::ops::Deref for TimelineKey {
-    type Target = Timeline;
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_ref()
-    }
-}
-
-mod filesystem_level_usage {
-    use std::path::Path;
-
-    use anyhow::Context;
-
-    use crate::statvfs::Statvfs;
-
-    use super::DiskUsageEvictionTaskConfig;
-
-    #[derive(Debug, Clone, Copy)]
-    #[allow(dead_code)]
-    pub struct Usage<'a> {
-        config: &'a DiskUsageEvictionTaskConfig,
-
-        /// Filesystem capacity
-        total_bytes: u64,
-        /// Free filesystem space
-        avail_bytes: u64,
-    }
-
-    impl super::Usage for Usage<'_> {
-        fn has_pressure(&self) -> bool {
-            let usage_pct =
-                (100.0 * (1.0 - ((self.avail_bytes as f64) / (self.total_bytes as f64)))) as u64;
-
-            let pressures = [
-                (
-                    "min_avail_bytes",
-                    self.avail_bytes < self.config.min_avail_bytes,
-                ),
-                (
-                    "max_usage_pct",
-                    usage_pct >= self.config.max_usage_pct.get() as u64,
-                ),
-            ];
-
-            pressures.into_iter().any(|(_, has_pressure)| has_pressure)
-        }
-
-        fn add_available_bytes(&mut self, bytes: u64) {
-            self.avail_bytes += bytes;
-        }
-    }
-
-    pub fn get<'a>(
-        tenants_dir: &Path,
-        config: &'a DiskUsageEvictionTaskConfig,
-    ) -> anyhow::Result<Usage<'a>> {
-        let mock_config = {
-            #[cfg(feature = "testing")]
-            {
-                config.mock_statvfs.as_ref()
-            }
-            #[cfg(not(feature = "testing"))]
-            {
-                None
-            }
-        };
-
-        let stat = Statvfs::get(tenants_dir, mock_config)
-            .context("statvfs failed, presumably directory got unlinked")?;
-
-        // https://unix.stackexchange.com/a/703650
-        let blocksize = if stat.fragment_size() > 0 {
-            stat.fragment_size()
-        } else {
-            stat.block_size()
-        };
-
-        // use blocks_available (b_avail) since, pageserver runs as unprivileged user
-        let avail_bytes = stat.blocks_available() * blocksize;
-        let total_bytes = stat.blocks() * blocksize;
-
-        Ok(Usage {
-            config,
-            total_bytes,
-            avail_bytes,
-        })
-    }
-
-    #[test]
-    fn max_usage_pct_pressure() {
-        use super::Usage as _;
-        use std::time::Duration;
-        use utils::serde_percent::Percent;
-
-        let mut usage = Usage {
-            config: &DiskUsageEvictionTaskConfig {
-                max_usage_pct: Percent::new(85).unwrap(),
-                min_avail_bytes: 0,
-                period: Duration::MAX,
-                #[cfg(feature = "testing")]
-                mock_statvfs: None,
-            },
-            total_bytes: 100_000,
-            avail_bytes: 0,
-        };
-
-        assert!(usage.has_pressure(), "expected pressure at 100%");
-
-        usage.add_available_bytes(14_000);
-        assert!(usage.has_pressure(), "expected pressure at 86%");
-
-        usage.add_available_bytes(999);
-        assert!(usage.has_pressure(), "expected pressure at 85.001%");
-
-        usage.add_available_bytes(1);
-        assert!(usage.has_pressure(), "expected pressure at precisely 85%");
-
-        usage.add_available_bytes(1);
-        assert!(!usage.has_pressure(), "no pressure at 84.999%");
-
-        usage.add_available_bytes(999);
-        assert!(!usage.has_pressure(), "no pressure at 84%");
-
-        usage.add_available_bytes(16_000);
-        assert!(!usage.has_pressure());
-    }
-}
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -27,31 +27,6 @@ paths:
                  id:
                    type: integer

-  /v1/disk_usage_eviction/run:
-    put:
-      description: Do an iteration of disk-usage-based eviction to evict a given amount of disk space.
-      security: []
-      requestBody:
-        content:
-          application/json:
-            schema:
-              type: object
-              required:
-                - evict_bytes
-              properties:
-                evict_bytes:
-                  type: integer
-      responses:
-        "200":
-          description: |
-            The run completed.
-            This does not necessarily mean that we actually evicted `evict_bytes`.
-            Examine the returned object for detail, or, just watch the actual effect of the call using `du` or `df`.
-          content:
-            application/json:
-              schema:
-                type: object
-
  /v1/tenant/{tenant_id}:
    parameters:
      - name: tenant_id
@@ -208,19 +183,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/ForbiddenError"
-        "404":
-          description: Timeline not found
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/NotFoundError"
-        "412":
-          description: Tenant is missing
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/PreconditionFailedError"
-
        "500":
          description: Generic operation error
          content:
@@ -421,12 +383,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/ForbiddenError"
-        "404":
-          description: Tenant not found
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/NotFoundError"
        "500":
          description: Generic operation error
          content:
@@ -898,9 +854,13 @@ components:
      type: object
      properties:
        tenant_specific_overrides:
-          $ref: "#/components/schemas/TenantConfigInfo"
+          type: object
+          schema:
+            $ref: "#/components/schemas/TenantConfigInfo"
        effective_config:
-          $ref: "#/components/schemas/TenantConfigInfo"
+          type: object
+          schema:
+            $ref: "#/components/schemas/TenantConfigInfo"
    TimelineInfo:
      type: object
      required:
@@ -986,13 +946,6 @@ components:
      properties:
        msg:
          type: string
-    PreconditionFailedError:
-      type: object
-      required:
-        - msg
-      properties:
-        msg:
-          type: string

 security:
  - JWT: []
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -18,7 +18,6 @@ use super::models::{
    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
 };
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::disk_usage_eviction_task;
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
@@ -49,7 +48,6 @@ struct State {
    auth: Option<Arc<JwtAuth>>,
    allowlist_routes: Vec<Uri>,
    remote_storage: Option<GenericRemoteStorage>,
-    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
 }

 impl State {
@@ -57,7 +55,6 @@ impl State {
        conf: &'static PageServerConf,
        auth: Option<Arc<JwtAuth>>,
        remote_storage: Option<GenericRemoteStorage>,
-        disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
    ) -> anyhow::Result<Self> {
        let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml"]
            .iter()
@@ -68,7 +65,6 @@ impl State {
            auth,
            allowlist_routes,
            remote_storage,
-            disk_usage_eviction_state,
        })
    }
 }
@@ -135,34 +131,6 @@ impl From<TenantStateError> for ApiError {
    }
 }

-impl From<crate::tenant::DeleteTimelineError> for ApiError {
-    fn from(value: crate::tenant::DeleteTimelineError) -> Self {
-        use crate::tenant::DeleteTimelineError::*;
-        match value {
-            NotFound => ApiError::NotFound(anyhow::anyhow!("timeline not found")),
-            HasChildren => ApiError::BadRequest(anyhow::anyhow!(
-                "Cannot delete timeline which has child timelines"
-            )),
-            Other(e) => ApiError::InternalServerError(e),
-        }
-    }
-}
-
-impl From<crate::tenant::mgr::DeleteTimelineError> for ApiError {
-    fn from(value: crate::tenant::mgr::DeleteTimelineError) -> Self {
-        use crate::tenant::mgr::DeleteTimelineError::*;
-        match value {
-            // Report Precondition failed so client can distinguish between
-            // "tenant is missing" case from "timeline is missing"
-            Tenant(TenantStateError::NotFound(..)) => {
-                ApiError::PreconditionFailed("Requested tenant is missing")
-            }
-            Tenant(t) => ApiError::from(t),
-            Timeline(t) => ApiError::from(t),
-        }
-    }
-}
-
 // Helper function to construct a TimelineInfo struct for a timeline
 async fn build_timeline_info(
    timeline: &Arc<Timeline>,
@@ -779,8 +747,6 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
        );
    }

-    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
-
    let target_tenant_id = request_data
        .new_tenant_id
        .map(TenantId::from)
@@ -912,8 +878,6 @@ async fn update_tenant_config_handler(
        );
    }

-    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;
-
    let state = get_state(&request);
    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
@@ -922,20 +886,6 @@ async fn update_tenant_config_handler(
    json_response(StatusCode::OK, ())
 }

-/// Testing helper to transition a tenant to [`crate::tenant::TenantState::Broken`].
-#[cfg(feature = "testing")]
-async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
-
-    let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
-
-    tenant.set_broken("broken from test");
-
-    json_response(StatusCode::OK, ())
-}
-
 #[cfg(feature = "testing")]
 async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    if !fail::has_failpoints() {
@@ -1085,89 +1035,6 @@ async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiE
    json_response(StatusCode::NO_CONTENT, ())
 }

-async fn disk_usage_eviction_run(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
-    check_permission(&r, None)?;
-
-    #[derive(Debug, Clone, Copy, serde::Serialize, serde::Deserialize)]
-    struct Config {
-        /// How many bytes to evict before reporting that pressure is relieved.
-        evict_bytes: u64,
-    }
-
-    #[derive(Debug, Clone, Copy, serde::Serialize)]
-    struct Usage {
-        // remains unchanged after instantiation of the struct
-        config: Config,
-        // updated by `add_available_bytes`
-        freed_bytes: u64,
-    }
-
-    impl crate::disk_usage_eviction_task::Usage for Usage {
-        fn has_pressure(&self) -> bool {
-            self.config.evict_bytes > self.freed_bytes
-        }
-
-        fn add_available_bytes(&mut self, bytes: u64) {
-            self.freed_bytes += bytes;
-        }
-    }
-
-    let config = json_request::<Config>(&mut r)
-        .await
-        .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?;
-
-    let usage = Usage {
-        config,
-        freed_bytes: 0,
-    };
-
-    use crate::task_mgr::MGMT_REQUEST_RUNTIME;
-
-    let (tx, rx) = tokio::sync::oneshot::channel();
-
-    let state = get_state(&r);
-
-    let Some(storage) = state.remote_storage.clone() else {
-        return Err(ApiError::InternalServerError(anyhow::anyhow!(
-            "remote storage not configured, cannot run eviction iteration"
-        )))
-    };
-
-    let state = state.disk_usage_eviction_state.clone();
-
-    let cancel = CancellationToken::new();
-    let child_cancel = cancel.clone();
-    let _g = cancel.drop_guard();
-
-    crate::task_mgr::spawn(
-        MGMT_REQUEST_RUNTIME.handle(),
-        TaskKind::DiskUsageEviction,
-        None,
-        None,
-        "ondemand disk usage eviction",
-        false,
-        async move {
-            let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
-                &state,
-                &storage,
-                usage,
-                &child_cancel,
-            )
-            .await;
-
-            info!(?res, "disk_usage_eviction_task_iteration_impl finished");
-
-            let _ = tx.send(res);
-            Ok(())
-        }
-        .in_current_span(),
-    );
-
-    let response = rx.await.unwrap().map_err(ApiError::InternalServerError)?;
-
-    json_response(StatusCode::OK, response)
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -1180,7 +1047,6 @@ pub fn make_router(
    launch_ts: &'static LaunchTimestamp,
    auth: Option<Arc<JwtAuth>>,
    remote_storage: Option<GenericRemoteStorage>,
-    disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
 ) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
    let spec = include_bytes!("openapi_spec.yml");
    let mut router = attach_openapi_ui(endpoint::make_router(), spec, "/swagger.yml", "/v1/doc");
@@ -1225,8 +1091,7 @@ pub fn make_router(

    Ok(router
        .data(Arc::new(
-            State::new(conf, auth, remote_storage, disk_usage_eviction_state)
-                .context("Failed to initialize router state")?,
+            State::new(conf, auth, remote_storage).context("Failed to initialize router state")?,
        ))
        .get("/v1/status", |r| RequestSpan(status_handler).handle(r))
        .put(
@@ -1307,13 +1172,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
            |r| RequestSpan(evict_timeline_layer_handler).handle(r),
        )
-        .put("/v1/disk_usage_eviction/run", |r| {
-            RequestSpan(disk_usage_eviction_run).handle(r)
-        })
-        .put(
-            "/v1/tenant/:tenant_id/break",
-            testing_api!("set tenant state to broken", handle_tenant_break),
-        )
        .get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r))
        .any(handler_404))
 }
--- a/pageserver/src/keyspace.rs
+++ b/pageserver/src/keyspace.rs
@@ -1,11 +1,12 @@
 use crate::repository::{key_range_size, singleton_range, Key};
 use postgres_ffi::BLCKSZ;
 use std::ops::Range;
+use tracing::debug;

 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub struct KeySpace {
    /// Contiguous ranges of keys that belong to the key space. In key order,
    /// and with no overlap.
@@ -61,6 +62,60 @@ impl KeySpace {

        KeyPartitioning { parts }
    }
+
+    /// Add range to keyspace.
+    ///
+    /// Unlike KeySpaceAccum, it accepts key ranges in any order and overlapping ranges.
+    pub fn add_range(&mut self, range: Range<Key>) {
+        let start = range.start;
+        let mut end = range.end;
+        let mut prev_index = match self.ranges.binary_search_by_key(&end, |r| r.start) {
+            Ok(index) => index,
+            Err(0) => {
+                self.ranges.insert(0, range);
+                return;
+            }
+            Err(index) => index - 1,
+        };
+        loop {
+            let mut prev = &mut self.ranges[prev_index];
+            if prev.end >= start {
+                // two ranges overlap
+                if prev.start <= start {
+                    // combine with prev range
+                    if prev.end < end {
+                        prev.end = end;
+                        debug!("Extend wanted image {}..{}", prev.start, end);
+                    }
+                    return;
+                } else {
+                    if prev.end > end {
+                        end = prev.end;
+                    }
+                    self.ranges.remove(prev_index);
+                }
+            } else {
+                break;
+            }
+            if prev_index == 0 {
+                break;
+            }
+            prev_index -= 1;
+        }
+        debug!("Wanted image {}..{}", start, end);
+        self.ranges.insert(prev_index, start..end);
+    }
+
+    ///
+    /// Check if key space contains overlapping range
+    ///
+    pub fn overlaps(&self, range: &Range<Key>) -> bool {
+        match self.ranges.binary_search_by_key(&range.end, |r| r.start) {
+            Ok(_) => false,
+            Err(0) => false,
+            Err(index) => self.ranges[index - 1].end > range.start,
+        }
+    }
 }

 ///
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -4,7 +4,6 @@ pub mod broker_client;
 pub mod config;
 pub mod consumption_metrics;
 pub mod context;
-pub mod disk_usage_eviction_task;
 pub mod http;
 pub mod import_datadir;
 pub mod keyspace;
@@ -13,7 +12,6 @@ pub mod page_cache;
 pub mod page_service;
 pub mod pgdatadir_mapping;
 pub mod repository;
-pub(crate) mod statvfs;
 pub mod task_mgr;
 pub mod tenant;
 pub mod trace;
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -257,7 +257,7 @@ impl EvictionsWithLowResidenceDuration {
    }

    pub fn observe(&self, observed_value: Duration) {
-        if observed_value < self.threshold {
+        if self.threshold < observed_value {
            self.counter
                .as_ref()
                .expect("nobody calls this function after `remove_from_vec`")
@@ -586,6 +586,7 @@ pub struct TimelineMetrics {
    pub flush_time_histo: StorageTimeMetrics,
    pub compact_time_histo: StorageTimeMetrics,
    pub create_images_time_histo: StorageTimeMetrics,
+    pub init_logical_size_histo: StorageTimeMetrics,
    pub logical_size_histo: StorageTimeMetrics,
    pub load_layer_map_histo: StorageTimeMetrics,
    pub garbage_collect_histo: StorageTimeMetrics,
@@ -618,6 +619,8 @@ impl TimelineMetrics {
        let compact_time_histo = StorageTimeMetrics::new("compact", &tenant_id, &timeline_id);
        let create_images_time_histo =
            StorageTimeMetrics::new("create images", &tenant_id, &timeline_id);
+        let init_logical_size_histo =
+            StorageTimeMetrics::new("init logical size", &tenant_id, &timeline_id);
        let logical_size_histo = StorageTimeMetrics::new("logical size", &tenant_id, &timeline_id);
        let load_layer_map_histo =
            StorageTimeMetrics::new("load layer map", &tenant_id, &timeline_id);
@@ -654,6 +657,7 @@ impl TimelineMetrics {
            flush_time_histo,
            compact_time_histo,
            create_images_time_histo,
+            init_logical_size_histo,
            logical_size_histo,
            garbage_collect_histo,
            load_layer_map_histo,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -27,7 +27,6 @@ use pq_proto::FeStartupPacket;
 use pq_proto::{BeMessage, FeMessage, RowDescriptor};
 use std::io;
 use std::net::TcpListener;
-use std::pin::pin;
 use std::str;
 use std::str::FromStr;
 use std::sync::Arc;
@@ -467,7 +466,8 @@ impl PageServerHandler {
        pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;

-        let mut copyin_reader = pin!(StreamReader::new(copyin_stream(pgb)));
+        let copyin_reader = StreamReader::new(copyin_stream(pgb));
+        tokio::pin!(copyin_reader);
        timeline
            .import_basebackup_from_tar(&mut copyin_reader, base_lsn, &ctx)
            .await?;
@@ -512,7 +512,8 @@ impl PageServerHandler {
        info!("importing wal");
        pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;
-        let mut copyin_reader = pin!(StreamReader::new(copyin_stream(pgb)));
+        let copyin_reader = StreamReader::new(copyin_stream(pgb));
+        tokio::pin!(copyin_reader);
        import_wal_from_tar(&timeline, &mut copyin_reader, start_lsn, end_lsn, &ctx).await?;
        info!("wal import complete");

--- a/pageserver/src/statvfs.rs
+++ b/pageserver/src/statvfs.rs
@@ -1,150 +0,0 @@
-//! Wrapper around nix::sys::statvfs::Statvfs that allows for mocking.
-
-use std::path::Path;
-
-pub enum Statvfs {
-    Real(nix::sys::statvfs::Statvfs),
-    Mock(mock::Statvfs),
-}
-
-// NB: on macOS, the block count type of struct statvfs is u32.
-// The workaround seems to be to use the non-standard statfs64 call.
-// Sincce it should only be a problem on > 2TiB disks, let's ignore
-// the problem for now and upcast to u64.
-impl Statvfs {
-    pub fn get(tenants_dir: &Path, mocked: Option<&mock::Behavior>) -> nix::Result<Self> {
-        if let Some(mocked) = mocked {
-            Ok(Statvfs::Mock(mock::get(tenants_dir, mocked)?))
-        } else {
-            Ok(Statvfs::Real(nix::sys::statvfs::statvfs(tenants_dir)?))
-        }
-    }
-
-    // NB: allow() because the block count type is u32 on macOS.
-    #[allow(clippy::useless_conversion)]
-    pub fn blocks(&self) -> u64 {
-        match self {
-            Statvfs::Real(stat) => u64::try_from(stat.blocks()).unwrap(),
-            Statvfs::Mock(stat) => stat.blocks,
-        }
-    }
-
-    // NB: allow() because the block count type is u32 on macOS.
-    #[allow(clippy::useless_conversion)]
-    pub fn blocks_available(&self) -> u64 {
-        match self {
-            Statvfs::Real(stat) => u64::try_from(stat.blocks_available()).unwrap(),
-            Statvfs::Mock(stat) => stat.blocks_available,
-        }
-    }
-
-    pub fn fragment_size(&self) -> u64 {
-        match self {
-            Statvfs::Real(stat) => stat.fragment_size(),
-            Statvfs::Mock(stat) => stat.fragment_size,
-        }
-    }
-
-    pub fn block_size(&self) -> u64 {
-        match self {
-            Statvfs::Real(stat) => stat.block_size(),
-            Statvfs::Mock(stat) => stat.block_size,
-        }
-    }
-}
-
-pub mod mock {
-    use anyhow::Context;
-    use regex::Regex;
-    use std::path::Path;
-    use tracing::log::info;
-
-    #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-    #[serde(tag = "type")]
-    pub enum Behavior {
-        Success {
-            blocksize: u64,
-            total_blocks: u64,
-            name_filter: Option<utils::serde_regex::Regex>,
-        },
-        Failure {
-            mocked_error: MockedError,
-        },
-    }
-
-    #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
-    #[allow(clippy::upper_case_acronyms)]
-    pub enum MockedError {
-        EIO,
-    }
-
-    impl From<MockedError> for nix::Error {
-        fn from(e: MockedError) -> Self {
-            match e {
-                MockedError::EIO => nix::Error::EIO,
-            }
-        }
-    }
-
-    pub fn get(tenants_dir: &Path, behavior: &Behavior) -> nix::Result<Statvfs> {
-        info!("running mocked statvfs");
-
-        match behavior {
-            Behavior::Success {
-                blocksize,
-                total_blocks,
-                ref name_filter,
-            } => {
-                let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap();
-
-                // round it up to the nearest block multiple
-                let used_blocks = (used_bytes + (blocksize - 1)) / blocksize;
-
-                if used_blocks > *total_blocks {
-                    panic!(
-                        "mocking error: used_blocks > total_blocks: {used_blocks} > {total_blocks}"
-                    );
-                }
-
-                let avail_blocks = total_blocks - used_blocks;
-
-                Ok(Statvfs {
-                    blocks: *total_blocks,
-                    blocks_available: avail_blocks,
-                    fragment_size: *blocksize,
-                    block_size: *blocksize,
-                })
-            }
-            Behavior::Failure { mocked_error } => Err((*mocked_error).into()),
-        }
-    }
-
-    fn walk_dir_disk_usage(path: &Path, name_filter: Option<&Regex>) -> anyhow::Result<u64> {
-        let mut total = 0;
-        for entry in walkdir::WalkDir::new(path) {
-            let entry = entry?;
-            if !entry.file_type().is_file() {
-                continue;
-            }
-            if !name_filter
-                .as_ref()
-                .map(|filter| filter.is_match(entry.file_name().to_str().unwrap()))
-                .unwrap_or(true)
-            {
-                continue;
-            }
-            total += entry
-                .metadata()
-                .with_context(|| format!("get metadata of {:?}", entry.path()))?
-                .len();
-        }
-        Ok(total)
-    }
-
-    pub struct Statvfs {
-        pub blocks: u64,
-        pub blocks_available: u64,
-        pub fragment_size: u64,
-        pub block_size: u64,
-    }
-}
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -234,9 +234,6 @@ pub enum TaskKind {
    // Eviction. One per timeline.
    Eviction,

-    /// See [`crate::disk_usage_eviction_task`].
-    DiskUsageEviction,
-
    // Initial logical size calculation
    InitialLogicalSizeCalculation,

@@ -484,25 +481,13 @@ pub async fn shutdown_tasks(
    for task in victim_tasks {
        let join_handle = {
            let mut task_mut = task.mutable.lock().unwrap();
-            task_mut.join_handle.take()
+            info!("waiting for {} to shut down", task.name);
+            let join_handle = task_mut.join_handle.take();
+            drop(task_mut);
+            join_handle
        };
-        if let Some(mut join_handle) = join_handle {
-            let completed = tokio::select! {
-                _ = &mut join_handle => { true },
-                _ = tokio::time::sleep(std::time::Duration::from_secs(1)) => {
-                    // allow some time to elapse before logging to cut down the number of log
-                    // lines.
-                    info!("waiting for {} to shut down", task.name);
-                    false
-                }
-            };
-            if !completed {
-                // we never handled this return value, but:
-                // - we don't deschedule which would lead to is_cancelled
-                // - panics are already logged (is_panicked)
-                // - task errors are already logged in the wrapper
-                let _ = join_handle.await;
-            }
+        if let Some(join_handle) = join_handle {
+            let _ = join_handle.await;
        } else {
            // Possibly one of:
            //  * The task had not even fully started yet.
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -46,7 +46,6 @@ use std::time::{Duration, Instant};
 use self::config::TenantConf;
 use self::metadata::TimelineMetadata;
 use self::remote_timeline_client::RemoteTimelineClient;
-use self::timeline::EvictionTaskTenantState;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir;
@@ -95,7 +94,7 @@ mod timeline;

 pub mod size;

-pub use timeline::{LocalLayerInfoForDiskUsageEviction, PageReconstructError, Timeline};
+pub use timeline::{PageReconstructError, Timeline};

 // re-export this function so that page_cache.rs can use it.
 pub use crate::tenant::ephemeral_file::writeback as writeback_ephemeral_file;
@@ -143,8 +142,6 @@ pub struct Tenant {
    /// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
    cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
    cached_synthetic_tenant_size: Arc<AtomicU64>,
-
-    eviction_task_tenant_state: tokio::sync::Mutex<EvictionTaskTenantState>,
 }

 /// A timeline with some of its files on disk, being initialized.
@@ -434,16 +431,6 @@ remote:
    }
 }

-#[derive(Debug, thiserror::Error)]
-pub enum DeleteTimelineError {
-    #[error("NotFound")]
-    NotFound,
-    #[error("HasChildren")]
-    HasChildren,
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 struct RemoteStartupData {
    index_part: IndexPart,
    remote_metadata: TimelineMetadata,
@@ -1320,7 +1307,7 @@ impl Tenant {
        &self,
        timeline_id: TimelineId,
        _ctx: &RequestContext,
-    ) -> Result<(), DeleteTimelineError> {
+    ) -> anyhow::Result<()> {
        // Transition the timeline into TimelineState::Stopping.
        // This should prevent new operations from starting.
        let timeline = {
@@ -1332,13 +1319,13 @@ impl Tenant {
                .iter()
                .any(|(_, entry)| entry.get_ancestor_timeline_id() == Some(timeline_id));

-            if children_exist {
-                return Err(DeleteTimelineError::HasChildren);
-            }
-
+            anyhow::ensure!(
+                !children_exist,
+                "Cannot delete timeline which has child timelines"
+            );
            let timeline_entry = match timelines.entry(timeline_id) {
                Entry::Occupied(e) => e,
-                Entry::Vacant(_) => return Err(DeleteTimelineError::NotFound),
+                Entry::Vacant(_) => bail!("timeline not found"),
            };

            let timeline = Arc::clone(timeline_entry.get());
@@ -1706,13 +1693,6 @@ impl Tenant {
            .unwrap_or(self.conf.default_tenant_conf.trace_read_requests)
    }

-    pub fn get_min_resident_size_override(&self) -> Option<u64> {
-        let tenant_conf = self.tenant_conf.read().unwrap();
-        tenant_conf
-            .min_resident_size_override
-            .or(self.conf.default_tenant_conf.min_resident_size_override)
-    }
-
    pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
        *self.tenant_conf.write().unwrap() = new_tenant_conf;
    }
@@ -1791,7 +1771,6 @@ impl Tenant {
            state,
            cached_logical_sizes: tokio::sync::Mutex::new(HashMap::new()),
            cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)),
-            eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),
        }
    }

@@ -2790,7 +2769,6 @@ pub mod harness {
                max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag),
                trace_read_requests: Some(tenant_conf.trace_read_requests),
                eviction_policy: Some(tenant_conf.eviction_policy),
-                min_resident_size_override: tenant_conf.min_resident_size_override,
            }
        }
    }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -92,7 +92,6 @@ pub struct TenantConf {
    pub max_lsn_wal_lag: NonZeroU64,
    pub trace_read_requests: bool,
    pub eviction_policy: EvictionPolicy,
-    pub min_resident_size_override: Option<u64>,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -160,10 +159,6 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub eviction_policy: Option<EvictionPolicy>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
-    pub min_resident_size_override: Option<u64>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -225,9 +220,6 @@ impl TenantConfOpt {
                .trace_read_requests
                .unwrap_or(global_conf.trace_read_requests),
            eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
-            min_resident_size_override: self
-                .min_resident_size_override
-                .or(global_conf.min_resident_size_override),
        }
    }
 }
@@ -259,7 +251,6 @@ impl Default for TenantConf {
                .expect("cannot parse default max walreceiver Lsn wal lag"),
            trace_read_requests: false,
            eviction_policy: EvictionPolicy::NoEviction,
-            min_resident_size_override: None,
        }
    }
 }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -321,20 +321,11 @@ pub async fn get_tenant(
    }
 }

-#[derive(Debug, thiserror::Error)]
-pub enum DeleteTimelineError {
-    #[error("Tenant {0}")]
-    Tenant(#[from] TenantStateError),
-
-    #[error("Timeline {0}")]
-    Timeline(#[from] crate::tenant::DeleteTimelineError),
-}
-
 pub async fn delete_timeline(
    tenant_id: TenantId,
    timeline_id: TimelineId,
    ctx: &RequestContext,
-) -> Result<(), DeleteTimelineError> {
+) -> Result<(), TenantStateError> {
    let tenant = get_tenant(tenant_id, true).await?;
    tenant.delete_timeline(timeline_id, ctx).await?;
    Ok(())
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -6,7 +6,6 @@ use std::sync::Arc;
 use anyhow::{bail, Context};
 use tokio::sync::oneshot::error::RecvError;
 use tokio::sync::Semaphore;
-use tokio_util::sync::CancellationToken;

 use crate::context::RequestContext;
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
@@ -353,10 +352,6 @@ async fn fill_logical_sizes(
    // our advantage with `?` error handling.
    let mut joinset = tokio::task::JoinSet::new();

-    let cancel = tokio_util::sync::CancellationToken::new();
-    // be sure to cancel all spawned tasks if we are dropped
-    let _dg = cancel.clone().drop_guard();
-
    // For each point that would benefit from having a logical size available,
    // spawn a Task to fetch it, unless we have it cached already.
    for seg in segments.iter() {
@@ -378,7 +373,6 @@ async fn fill_logical_sizes(
                    timeline,
                    lsn,
                    ctx,
-                    cancel.child_token(),
                ));
            }
            e.insert(cached_size);
@@ -483,14 +477,13 @@ async fn calculate_logical_size(
    timeline: Arc<crate::tenant::Timeline>,
    lsn: utils::lsn::Lsn,
    ctx: RequestContext,
-    cancel: CancellationToken,
 ) -> Result<TimelineAtLsnSizeResult, RecvError> {
    let _permit = tokio::sync::Semaphore::acquire_owned(limit)
        .await
        .expect("global semaphore should not had been closed");

    let size_res = timeline
-        .spawn_ondemand_logical_size_calculation(lsn, ctx, cancel)
+        .spawn_ondemand_logical_size_calculation(lsn, ctx)
        .instrument(info_span!("spawn_ondemand_logical_size_calculation"))
        .await?;
    Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -121,10 +121,10 @@ struct LayerAccessStatsInner {
 }

 #[derive(Debug, Clone, Copy)]
-pub(crate) struct LayerAccessStatFullDetails {
-    pub(crate) when: SystemTime,
-    pub(crate) task_kind: TaskKind,
-    pub(crate) access_kind: LayerAccessKind,
+pub(super) struct LayerAccessStatFullDetails {
+    pub(super) when: SystemTime,
+    pub(super) task_kind: TaskKind,
+    pub(super) access_kind: LayerAccessKind,
 }

 #[derive(Clone, Copy, strum_macros::EnumString)]
@@ -255,7 +255,7 @@ impl LayerAccessStats {
        ret
    }

-    fn most_recent_access_or_residence_event(
+    pub(super) fn most_recent_access_or_residence_event(
        &self,
    ) -> Either<LayerAccessStatFullDetails, LayerResidenceEvent> {
        let locked = self.0.lock().unwrap();
@@ -268,13 +268,6 @@ impl LayerAccessStats {
            }
        }
    }
-
-    pub(crate) fn latest_activity(&self) -> SystemTime {
-        match self.most_recent_access_or_residence_event() {
-            Either::Left(mra) => mra.when,
-            Either::Right(re) => re.timestamp,
-        }
-    }
 }

 /// Supertrait of the [`Layer`] trait that captures the bare minimum interface
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -244,12 +244,14 @@ pub(crate) async fn random_init_delay(
 ) -> Result<(), Cancelled> {
    use rand::Rng;

-    if period == Duration::ZERO {
-        return Ok(());
-    }
-
    let d = {
        let mut rng = rand::thread_rng();
+
+        // gen_range asserts that the range cannot be empty, which it could be because period can
+        // be set to zero to disable gc or compaction, so lets set it to be at least 10s.
+        let period = std::cmp::max(period, Duration::from_secs(10));
+
+        // semi-ok default as the source of jitter
        rng.gen_range(Duration::ZERO..=period)
    };

--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -2,6 +2,7 @@

 mod eviction_task;
 mod walreceiver;
+mod layer_trace;

 use anyhow::{anyhow, bail, ensure, Context};
 use bytes::Bytes;
@@ -13,19 +14,16 @@ use pageserver_api::models::{
    DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
    DownloadRemoteLayersTaskState, LayerMapInfo, LayerResidenceStatus, TimelineState,
 };
-use remote_storage::GenericRemoteStorage;
 use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::TenantTimelineId;

 use std::cmp::{max, min, Ordering};
-use std::collections::BinaryHeap;
-use std::collections::HashMap;
+use std::collections::{BinaryHeap, HashMap};
 use std::fs;
 use std::ops::{Deref, Range};
 use std::path::{Path, PathBuf};
-use std::pin::pin;
 use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
 use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak};
 use std::time::{Duration, Instant, SystemTime};
@@ -73,9 +71,6 @@ use crate::ZERO_PAGE;
 use crate::{is_temporary, task_mgr};
 use walreceiver::spawn_connection_manager_task;

-pub(super) use self::eviction_task::EvictionTaskTenantState;
-use self::eviction_task::EvictionTaskTimelineState;
-
 use super::layer_map::BatchedUpdates;
 use super::remote_timeline_client::index::IndexPart;
 use super::remote_timeline_client::RemoteTimelineClient;
@@ -120,6 +115,17 @@ pub struct Timeline {

    pub(super) layers: RwLock<LayerMap<dyn PersistentLayer>>,

+    /// Set of key ranges which should be covered by image layers to
+    /// allow GC to remove old layers. This set is created by GC and its cutoff LSN is also stored.
+    /// It is used by compaction task when it checks if new image layer should be created.
+    /// Newly created image layer doesn't help to remove the delta layer, until the
+    /// newly created image layer falls off the PITR horizon. So on next GC cycle,
+    /// gc_timeline may still want the new image layer to be created. To avoid redundant
+    /// image layers creation we should check if image layer exists but beyond PITR horizon.
+    /// This is why we need remember GC cutoff LSN.
+    ///
+    wanted_image_layers: Mutex<Option<(Lsn, KeySpace)>>,
+
    last_freeze_at: AtomicLsn,
    // Atomic would be more appropriate here.
    last_freeze_ts: RwLock<Instant>,
@@ -222,7 +228,7 @@ pub struct Timeline {

    state: watch::Sender<TimelineState>,

-    eviction_task_timeline_state: tokio::sync::Mutex<EvictionTaskTimelineState>,
+    layer_trace_file: Mutex<Option<std::fs::File>>,
 }

 /// Internal structure to hold all data needed for logical size calculation.
@@ -319,7 +325,7 @@ impl LogicalSize {
        //                  we change the type.
        match self.initial_logical_size.get() {
            Some(initial_size) => {
-                initial_size.checked_add_signed(size_increment)
+                initial_size.checked_add(size_increment.try_into().unwrap())
                    .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
                    .map(CurrentLogicalSize::Exact)
            }
@@ -335,13 +341,9 @@ impl LogicalSize {
            .fetch_add(delta, AtomicOrdering::SeqCst);
    }

-    /// Make the value computed by initial logical size computation
-    /// available for re-use. This doesn't contain the incremental part.
-    fn initialized_size(&self, lsn: Lsn) -> Option<u64> {
-        match self.initial_part_end {
-            Some(v) if v == lsn => self.initial_logical_size.get().copied(),
-            _ => None,
-        }
+    /// Returns the initialized (already calculated) value, if any.
+    fn initialized_size(&self) -> Option<u64> {
+        self.initial_logical_size.get().copied()
    }
 }

@@ -679,7 +681,8 @@ impl Timeline {

            let mut failed = 0;

-            let mut cancelled = pin!(task_mgr::shutdown_watcher());
+            let cancelled = task_mgr::shutdown_watcher();
+            tokio::pin!(cancelled);

            loop {
                tokio::select! {
@@ -816,11 +819,11 @@ impl Timeline {

        let mut is_exact = true;
        let size = current_size.size();
-        if let (CurrentLogicalSize::Approximate(_), Some(initial_part_end)) =
+        if let (CurrentLogicalSize::Approximate(_), Some(init_lsn)) =
            (current_size, self.current_logical_size.initial_part_end)
        {
            is_exact = false;
-            self.try_spawn_size_init_task(initial_part_end, ctx);
+            self.try_spawn_size_init_task(init_lsn, ctx);
        }

        Ok((size, is_exact))
@@ -867,6 +870,7 @@ impl Timeline {
    }

    pub fn activate(self: &Arc<Self>) {
+        self.start_layer_tracing();
        self.set_state(TimelineState::Active);
        self.launch_wal_receiver();
        self.launch_eviction_task();
@@ -958,25 +962,6 @@ impl Timeline {
        }
    }

-    /// Evict a batch of layers.
-    ///
-    /// GenericRemoteStorage reference is required as a witness[^witness_article] for "remote storage is configured."
-    ///
-    /// [^witness_article]: https://willcrichton.net/rust-api-type-patterns/witnesses.html
-    pub async fn evict_layers(
-        &self,
-        _: &GenericRemoteStorage,
-        layers_to_evict: &[Arc<dyn PersistentLayer>],
-        cancel: CancellationToken,
-    ) -> anyhow::Result<Vec<Option<anyhow::Result<bool>>>> {
-        let remote_client = self.remote_client.clone().expect(
-            "GenericRemoteStorage is configured, so timeline must have RemoteTimelineClient",
-        );
-
-        self.evict_layer_batch(&remote_client, layers_to_evict, cancel)
-            .await
-    }
-
    /// Evict multiple layers at once, continuing through errors.
    ///
    /// Try to evict the given `layers_to_evict` by
@@ -1014,15 +999,6 @@ impl Timeline {
        // now lock out layer removal (compaction, gc, timeline deletion)
        let layer_removal_guard = self.layer_removal_cs.lock().await;

-        {
-            // to avoid racing with detach and delete_timeline
-            let state = self.current_state();
-            anyhow::ensure!(
-                state == TimelineState::Active,
-                "timeline is not active but {state:?}"
-            );
-        }
-
        // start the batch update
        let mut layer_map = self.layers.write().unwrap();
        let mut batch_updates = layer_map.batch_update();
@@ -1056,8 +1032,6 @@ impl Timeline {
        use super::layer_map::Replacement;

        if local_layer.is_remote_layer() {
-            // TODO(issue #3851): consider returning an err here instead of false,
-            // which is the same out the match later
            return Ok(false);
        }

@@ -1120,6 +1094,7 @@ impl Timeline {
                self.metrics
                    .resident_physical_size_gauge
                    .sub(layer_file_size);
+                self.trace_layer_evict(&local_layer.filename());

                self.metrics.evictions.inc();

@@ -1127,9 +1102,6 @@ impl Timeline {
                    self.metrics
                        .evictions_with_low_residence_duration
                        .observe(delta);
-                    info!(layer=%local_layer.short_id(), residence_millis=delta.as_millis(), "evicted layer after known residence period");
-                } else {
-                    info!(layer=%local_layer.short_id(), "evicted layer after unknown residence period");
                }

                true
@@ -1229,6 +1201,7 @@ impl Timeline {
                tenant_id,
                pg_version,
                layers: RwLock::new(LayerMap::default()),
+                wanted_image_layers: Mutex::new(None),

                walredo_mgr,

@@ -1292,9 +1265,7 @@ impl Timeline {

                state,

-                eviction_task_timeline_state: tokio::sync::Mutex::new(
-                    EvictionTaskTimelineState::default(),
-                ),
+                layer_trace_file: Mutex::new(None),
            };
            result.repartition_threshold = result.get_checkpoint_distance() / 10;
            result
@@ -1735,7 +1706,7 @@ impl Timeline {
        Ok(())
    }

-    fn try_spawn_size_init_task(self: &Arc<Self>, lsn: Lsn, ctx: &RequestContext) {
+    fn try_spawn_size_init_task(self: &Arc<Self>, init_lsn: Lsn, ctx: &RequestContext) {
        let permit = match Arc::clone(&self.current_logical_size.initial_size_computation)
            .try_acquire_owned()
        {
@@ -1772,11 +1743,8 @@ impl Timeline {
            false,
            // NB: don't log errors here, task_mgr will do that.
            async move {
-                // no cancellation here, because nothing really waits for this to complete compared
-                // to spawn_ondemand_logical_size_calculation.
-                let cancel = CancellationToken::new();
                let calculated_size = match self_clone
-                    .logical_size_calculation_task(lsn, &background_ctx, cancel)
+                    .logical_size_calculation_task(init_lsn, &background_ctx)
                    .await
                {
                    Ok(s) => s,
@@ -1797,7 +1765,7 @@ impl Timeline {
                    .size_added_after_initial
                    .load(AtomicOrdering::Relaxed);

-                let sum = calculated_size.saturating_add_signed(added);
+                let sum = calculated_size.saturating_add(added.try_into().unwrap());

                // set the gauge value before it can be set in `update_current_logical_size`.
                self_clone.metrics.current_logical_size_gauge.set(sum);
@@ -1831,7 +1799,6 @@ impl Timeline {
        self: &Arc<Self>,
        lsn: Lsn,
        ctx: RequestContext,
-        cancel: CancellationToken,
    ) -> oneshot::Receiver<Result<u64, CalculateLogicalSizeError>> {
        let (sender, receiver) = oneshot::channel();
        let self_clone = Arc::clone(self);
@@ -1851,9 +1818,7 @@ impl Timeline {
            "ondemand logical size calculation",
            false,
            async move {
-                let res = self_clone
-                    .logical_size_calculation_task(lsn, &ctx, cancel)
-                    .await;
+                let res = self_clone.logical_size_calculation_task(lsn, &ctx).await;
                let _ = sender.send(res).ok();
                Ok(()) // Receiver is responsible for handling errors
            },
@@ -1864,20 +1829,20 @@ impl Timeline {
    #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))]
    async fn logical_size_calculation_task(
        self: &Arc<Self>,
-        lsn: Lsn,
+        init_lsn: Lsn,
        ctx: &RequestContext,
-        cancel: CancellationToken,
    ) -> Result<u64, CalculateLogicalSizeError> {
        let mut timeline_state_updates = self.subscribe_for_state_updates();
        let self_calculation = Arc::clone(self);
+        let cancel = CancellationToken::new();

-        let mut calculation = pin!(async {
+        let calculation = async {
            let cancel = cancel.child_token();
            let ctx = ctx.attached_child();
            self_calculation
-                .calculate_logical_size(lsn, cancel, &ctx)
+                .calculate_logical_size(init_lsn, cancel, &ctx)
                .await
-        });
+        };
        let timeline_state_cancellation = async {
            loop {
                match timeline_state_updates.changed().await {
@@ -1906,6 +1871,7 @@ impl Timeline {
            "aborted because task_mgr shutdown requested".to_string()
        };

+        tokio::pin!(calculation);
        loop {
            tokio::select! {
                res = &mut calculation => { return res }
@@ -1958,12 +1924,21 @@ impl Timeline {
            // need to return something
            Ok(0)
        });
-        // See if we've already done the work for initial size calculation.
-        // This is a short-cut for timelines that are mostly unused.
-        if let Some(size) = self.current_logical_size.initialized_size(up_to_lsn) {
-            return Ok(size);
-        }
-        let timer = self.metrics.logical_size_histo.start_timer();
+        let timer = if up_to_lsn == self.initdb_lsn {
+            if let Some(size) = self.current_logical_size.initialized_size() {
+                if size != 0 {
+                    // non-zero size means that the size has already been calculated by this method
+                    // after startup. if the logical size is for a new timeline without layers the
+                    // size will be zero, and we cannot use that, or this caching strategy until
+                    // pageserver restart.
+                    return Ok(size);
+                }
+            }
+
+            self.metrics.init_logical_size_histo.start_timer()
+        } else {
+            self.metrics.logical_size_histo.start_timer()
+        };
        let logical_size = self
            .get_current_logical_size_non_incremental(up_to_lsn, cancel, ctx)
            .await?;
@@ -2671,6 +2646,8 @@ impl Timeline {
            self.conf.timeline_path(&self.timeline_id, &self.tenant_id),
        ])?;

+        self.trace_layer_flush(&new_delta.filename());
+
        // Add it to the layer map
        self.layers
            .write()
@@ -2726,6 +2703,30 @@ impl Timeline {
        let layers = self.layers.read().unwrap();

        let mut max_deltas = 0;
+        let wanted_image_layers = self.wanted_image_layers.lock().unwrap();
+        if let Some((cutoff_lsn, wanted)) = &*wanted_image_layers {
+            let img_range =
+                partition.ranges.first().unwrap().start..partition.ranges.last().unwrap().end;
+            if wanted.overlaps(&img_range) {
+                //
+                // gc_timeline only pays attention to image layers that are older than the GC cutoff,
+                // but create_image_layers creates image layers at last-record-lsn.
+                // So it's possible that gc_timeline decides that it wants new image layer to be created for a key range,
+                // and on next compcation create_image_layers creates the image layer.
+                // But on next GC cycle, gc_timeline still wantes the new image layer to be created,
+                // because the newly created image layer doesn't help to remove the delta layer,
+                // until the newly created image layer falls off the PITR horizon.
+                //
+                // So we should check if image layer beyond cutoff LSN already exists.
+                if !layers.image_layer_exists(&img_range, &(*cutoff_lsn..lsn))? {
+                    debug!(
+                        "Force generation of layer {}-{} wanted by GC)",
+                        img_range.start, img_range.end
+                    );
+                    return Ok(true);
+                }
+            }
+        }

        for part_range in &partition.ranges {
            let image_coverage = layers.image_coverage(part_range, lsn)?;
@@ -2845,6 +2846,11 @@ impl Timeline {
                image_layers.push(image_layer);
            }
        }
+        // All wanted layers are taken in account by time_for_new_image_layer.
+        // The wanted_image_layers could get updated out of turn and we could
+        // clear something which hasn't been looked at all. This is fine, because
+        // next gc round any wanted would get added back in.
+        *self.wanted_image_layers.lock().unwrap() = None;

        // Sync the new layer to disk before adding it to the layer map, to make sure
        // we don't garbage collect something based on the new layer, before it has
@@ -2881,6 +2887,7 @@ impl Timeline {
            self.metrics
                .resident_physical_size_gauge
                .add(metadata.len());
+            self.trace_layer_image_create(&l.filename());
            updates.insert_historic(Arc::new(l));
        }
        updates.flush();
@@ -3311,6 +3318,7 @@ impl Timeline {
            self.metrics
                .resident_physical_size_gauge
                .add(metadata.len());
+            self.trace_layer_compact_create(&l.filename());

            new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len()));
            let x: Arc<dyn PersistentLayer + 'static> = Arc::new(l);
@@ -3321,6 +3329,7 @@ impl Timeline {
        // delete the old ones
        let mut layer_names_to_delete = Vec::with_capacity(deltas_to_compact.len());
        for l in deltas_to_compact {
+            self.trace_layer_compact_delete(&l.filename());
            layer_names_to_delete.push(l.filename());
            self.delete_historic_layer(layer_removal_cs, l, &mut updates)?;
        }
@@ -3517,6 +3526,8 @@ impl Timeline {

        info!("GC starting");

+        self.trace_gc_start(new_gc_cutoff);
+
        debug!("retain_lsns: {:?}", retain_lsns);

        // Before deleting any layers, we need to wait for their upload ops to finish.
@@ -3531,6 +3542,7 @@ impl Timeline {
        }

        let mut layers_to_remove = Vec::new();
+        let mut wanted_image_layers = KeySpace::default();

        // Scan all layers in the timeline (remote or on-disk).
        //
@@ -3614,6 +3626,15 @@ impl Timeline {
                    "keeping {} because it is the latest layer",
                    l.filename().file_name()
                );
+                // Collect delta key ranges that need image layers to allow garbage
+                // collecting the layers.
+                // It is not so obvious whether we need to propagate information only about
+                // delta layers. Image layers can form "stairs" preventing old image from been deleted.
+                // But image layers are in any case less sparse than delta layers. Also we need some
+                // protection from replacing recent image layers with new one after each GC iteration.
+                if l.is_incremental() && !LayerMap::is_l0(&*l) {
+                    wanted_image_layers.add_range(l.get_key_range());
+                }
                result.layers_not_updated += 1;
                continue 'outer;
            }
@@ -3626,6 +3647,10 @@ impl Timeline {
            );
            layers_to_remove.push(Arc::clone(&l));
        }
+        self.wanted_image_layers
+            .lock()
+            .unwrap()
+            .replace((new_gc_cutoff, wanted_image_layers));

        let mut updates = layers.batch_update();
        if !layers_to_remove.is_empty() {
@@ -3640,6 +3665,7 @@ impl Timeline {
            {
                for doomed_layer in layers_to_remove {
                    layer_names_to_delete.push(doomed_layer.filename());
+                    self.trace_layer_gc_delete(&doomed_layer.filename());
                    self.delete_historic_layer(layer_removal_cs, doomed_layer, &mut updates)?; // FIXME: schedule succeeded deletions before returning?
                    result.layers_removed += 1;
                }
@@ -4046,67 +4072,6 @@ impl Timeline {
    }
 }

-pub struct DiskUsageEvictionInfo {
-    /// Timeline's largest layer (remote or resident)
-    pub max_layer_size: Option<u64>,
-    /// Timeline's resident layers
-    pub resident_layers: Vec<LocalLayerInfoForDiskUsageEviction>,
-}
-
-pub struct LocalLayerInfoForDiskUsageEviction {
-    pub layer: Arc<dyn PersistentLayer>,
-    pub last_activity_ts: SystemTime,
-}
-
-impl std::fmt::Debug for LocalLayerInfoForDiskUsageEviction {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        // format the tv_sec, tv_nsec into rfc3339 in case someone is looking at it
-        // having to allocate a string to this is bad, but it will rarely be formatted
-        let ts = chrono::DateTime::<chrono::Utc>::from(self.last_activity_ts);
-        let ts = ts.to_rfc3339_opts(chrono::SecondsFormat::Nanos, true);
-        f.debug_struct("LocalLayerInfoForDiskUsageEviction")
-            .field("layer", &self.layer)
-            .field("last_activity", &ts)
-            .finish()
-    }
-}
-
-impl LocalLayerInfoForDiskUsageEviction {
-    pub fn file_size(&self) -> u64 {
-        self.layer.file_size()
-    }
-}
-
-impl Timeline {
-    pub(crate) fn get_local_layers_for_disk_usage_eviction(&self) -> DiskUsageEvictionInfo {
-        let layers = self.layers.read().unwrap();
-
-        let mut max_layer_size: Option<u64> = None;
-        let mut resident_layers = Vec::new();
-
-        for l in layers.iter_historic_layers() {
-            let file_size = l.file_size();
-            max_layer_size = max_layer_size.map_or(Some(file_size), |m| Some(m.max(file_size)));
-
-            if l.is_remote_layer() {
-                continue;
-            }
-
-            let last_activity_ts = l.access_stats().latest_activity();
-
-            resident_layers.push(LocalLayerInfoForDiskUsageEviction {
-                layer: l,
-                last_activity_ts,
-            });
-        }
-
-        DiskUsageEvictionInfo {
-            max_layer_size,
-            resident_layers,
-        }
-    }
-}
-
 type TraversalPathItem = (
    ValueReconstructResult,
    Lsn,
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -14,12 +14,12 @@
 //!
 //! See write-up on restart on-demand download spike: <https://gist.github.com/problame/2265bf7b8dc398be834abfead36c76b5>
 use std::{
-    collections::HashMap,
    ops::ControlFlow,
    sync::Arc,
    time::{Duration, SystemTime},
 };

+use either::Either;
 use tokio::time::Instant;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, instrument, warn};
@@ -30,22 +30,11 @@ use crate::{
    tenant::{
        config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
        storage_layer::PersistentLayer,
-        Tenant,
    },
 };

 use super::Timeline;

-#[derive(Default)]
-pub struct EvictionTaskTimelineState {
-    last_layer_access_imitation: Option<tokio::time::Instant>,
-}
-
-#[derive(Default)]
-pub struct EvictionTaskTenantState {
-    last_layer_access_imitation: Option<Instant>,
-}
-
 impl Timeline {
    pub(super) fn launch_eviction_task(self: &Arc<Self>) {
        let self_clone = Arc::clone(self);
@@ -133,35 +122,6 @@ impl Timeline {
    ) -> ControlFlow<()> {
        let now = SystemTime::now();

-        // If we evict layers but keep cached values derived from those layers, then
-        // we face a storm of on-demand downloads after pageserver restart.
-        // The reason is that the restart empties the caches, and so, the values
-        // need to be re-computed by accessing layers, which we evicted while the
-        // caches were filled.
-        //
-        // Solutions here would be one of the following:
-        // 1. Have a persistent cache.
-        // 2. Count every access to a cached value to the access stats of all layers
-        //    that were accessed to compute the value in the first place.
-        // 3. Invalidate the caches at a period of < p.threshold/2, so that the values
-        //    get re-computed from layers, thereby counting towards layer access stats.
-        // 4. Make the eviction task imitate the layer accesses that typically hit caches.
-        //
-        // We follow approach (4) here because in Neon prod deployment:
-        // - page cache is quite small => high churn => low hit rate
-        //   => eviction gets correct access stats
-        // - value-level caches such as logical size & repatition have a high hit rate,
-        //   especially for inactive tenants
-        //   => eviction sees zero accesses for these
-        //   => they cause the on-demand download storm on pageserver restart
-        //
-        // We should probably move to persistent caches in the future, or avoid
-        // having inactive tenants attached to pageserver in the first place.
-        match self.imitate_layer_accesses(p, cancel, ctx).await {
-            ControlFlow::Break(()) => return ControlFlow::Break(()),
-            ControlFlow::Continue(()) => (),
-        }
-
        #[allow(dead_code)]
        #[derive(Debug, Default)]
        struct EvictionStats {
@@ -172,6 +132,19 @@ impl Timeline {
            skipped_for_shutdown: usize,
        }

+        // what we want is to invalidate any caches which haven't been accessed for `p.threshold`,
+        // but we cannot actually do it for current limitations except by restarting pageserver. we
+        // just recompute the values which would be recomputed on startup.
+        //
+        // for active tenants this will likely materialized page cache or in-memory layers. for
+        // inactive tenants it will refresh the last_access timestamps so that we will not evict
+        // and re-download on restart these layers.
+        self.refresh_layers_required_in_restart(cancel, ctx).await;
+
+        if cancel.is_cancelled() {
+            return ControlFlow::Break(());
+        }
+
        let mut stats = EvictionStats::default();
        // Gather layers for eviction.
        // NB: all the checks can be invalidated as soon as we release the layer map lock.
@@ -184,7 +157,13 @@ impl Timeline {
                if hist_layer.is_remote_layer() {
                    continue;
                }
-                let last_activity_ts = hist_layer.access_stats().latest_activity();
+                let last_activity_ts = match hist_layer
+                    .access_stats()
+                    .most_recent_access_or_residence_event()
+                {
+                    Either::Left(mra) => mra.when,
+                    Either::Right(re) => re.timestamp,
+                };
                let no_activity_for = match now.duration_since(last_activity_ts) {
                    Ok(d) => d,
                    Err(_e) => {
@@ -269,55 +248,8 @@ impl Timeline {
        ControlFlow::Continue(())
    }

-    async fn imitate_layer_accesses(
-        &self,
-        p: &EvictionPolicyLayerAccessThreshold,
-        cancel: &CancellationToken,
-        ctx: &RequestContext,
-    ) -> ControlFlow<()> {
-        let mut state = self.eviction_task_timeline_state.lock().await;
-        match state.last_layer_access_imitation {
-            Some(ts) if ts.elapsed() < p.threshold => { /* no need to run */ }
-            _ => {
-                self.imitate_timeline_cached_layer_accesses(cancel, ctx)
-                    .await;
-                state.last_layer_access_imitation = Some(tokio::time::Instant::now())
-            }
-        }
-        drop(state);
-
-        if cancel.is_cancelled() {
-            return ControlFlow::Break(());
-        }
-
-        // This task is timeline-scoped, but the synthetic size calculation is tenant-scoped.
-        // Make one of the tenant's timelines draw the short straw and run the calculation.
-        // The others wait until the calculation is done so that they take into account the
-        // imitated accesses that the winner made.
-        let Ok(tenant) = crate::tenant::mgr::get_tenant(self.tenant_id, true).await else {
-            // likely, we're shutting down
-            return ControlFlow::Break(());
-        };
-        let mut state = tenant.eviction_task_tenant_state.lock().await;
-        match state.last_layer_access_imitation {
-            Some(ts) if ts.elapsed() < p.threshold => { /* no need to run */ }
-            _ => {
-                self.imitate_synthetic_size_calculation_worker(&tenant, ctx, cancel)
-                    .await;
-                state.last_layer_access_imitation = Some(tokio::time::Instant::now());
-            }
-        }
-        drop(state);
-
-        if cancel.is_cancelled() {
-            return ControlFlow::Break(());
-        }
-
-        ControlFlow::Continue(())
-    }
-
    /// Recompute the values which would cause on-demand downloads during restart.
-    async fn imitate_timeline_cached_layer_accesses(
+    async fn refresh_layers_required_in_restart(
        &self,
        cancel: &CancellationToken,
        ctx: &RequestContext,
@@ -351,61 +283,4 @@ impl Timeline {
            }
        }
    }
-
-    // Imitate the synthetic size calculation done by the consumption_metrics module.
-    async fn imitate_synthetic_size_calculation_worker(
-        &self,
-        tenant: &Arc<Tenant>,
-        ctx: &RequestContext,
-        cancel: &CancellationToken,
-    ) {
-        if self.conf.metric_collection_endpoint.is_none() {
-            // We don't start the consumption metrics task if this is not set in the config.
-            // So, no need to imitate the accesses in that case.
-            return;
-        }
-
-        // The consumption metrics are collected on a per-tenant basis, by a single
-        // global background loop.
-        // It limits the number of synthetic size calculations using the global
-        // `concurrent_tenant_size_logical_size_queries` semaphore to not overload
-        // the pageserver. (size calculation is somewhat expensive in terms of CPU and IOs).
-        //
-        // If we used that same semaphore here, then we'd compete for the
-        // same permits, which may impact timeliness of consumption metrics.
-        // That is a no-go, as consumption metrics are much more important
-        // than what we do here.
-        //
-        // So, we have a separate semaphore, initialized to the same
-        // number of permits as the `concurrent_tenant_size_logical_size_queries`.
-        // In the worst, we would have twice the amount of concurrenct size calculations.
-        // But in practice, the `p.threshold` >> `consumption metric interval`, and
-        // we spread out the eviction task using `random_init_delay`.
-        // So, the chance of the worst case is quite low in practice.
-        // It runs as a per-tenant task, but the eviction_task.rs is per-timeline.
-        // So, we must coordinate with other with other eviction tasks of this tenant.
-        let limit = self
-            .conf
-            .eviction_task_immitated_concurrent_logical_size_queries
-            .inner();
-
-        let mut throwaway_cache = HashMap::new();
-        let gather =
-            crate::tenant::size::gather_inputs(tenant, limit, None, &mut throwaway_cache, ctx);
-
-        tokio::select! {
-            _ = cancel.cancelled() => {}
-            gather_result = gather => {
-                match gather_result {
-                    Ok(_) => {},
-                    Err(e) => {
-                        // We don't care about the result, but, if it failed, we should log it,
-                        // since consumption metric might be hitting the cached value and
-                        // thus not encountering this error.
-                        warn!("failed to imitate synthetic size calculation accesses: {e:#}")
-                    }
-                }
-           }
-        }
-    }
 }
--- a/pageserver/src/tenant/timeline/layer_trace.rs
+++ b/pageserver/src/tenant/timeline/layer_trace.rs
@@ -0,0 +1,81 @@
+use crate::tenant::timeline::LayerFileName;
+use crate::tenant::Timeline;
+use std::io::Write;
+use std::time::UNIX_EPOCH;
+use tracing::*;
+use std::fs::File;
+use utils::lsn::Lsn;
+
+impl Timeline {
+
+    pub(super) fn start_layer_tracing(&self) {
+        let timeline_path = self.conf.timeline_path(&self.timeline_id, &self.tenant_id);
+
+        let path = timeline_path.join("layer_trace");
+
+        match File::options()
+            .create(true)
+            .append(true)
+            .open(&path)
+        {
+            Ok(file) => {
+                info!("enabled layer tracing");
+                self.layer_trace_file.lock().unwrap().replace(file);
+            },
+            Err(e) => {
+                warn!("could not open layer tracing file \"{}\": {}", path.display(), e);
+            }
+        }
+    }
+
+    fn trace_op(&self, op: &str, filename: &str) {
+        let opt_out = &self.layer_trace_file.lock().unwrap();
+        if let Some(mut out) = opt_out.as_ref() {
+            if let Ok(elapsed) = UNIX_EPOCH.elapsed() {
+                let time = elapsed.as_millis();
+                let _ = writeln!(out, "{{ \"time\": {time}, \"op\": \"{op}\", \"filename\": \"{filename}\"}}");
+            }
+            else {
+                warn!("could not get current timestamp");
+            }
+        }
+    }
+
+    pub(super) fn trace_layer_evict(&self, filename: &LayerFileName) {
+        self.trace_op("evict", &filename.file_name())
+    }
+
+    pub(super) fn trace_layer_flush(&self, filename: &LayerFileName) {
+        self.trace_op("flush", &filename.file_name())
+    }
+
+    pub(super) fn trace_layer_compact_create(&self, filename: &LayerFileName) {
+        self.trace_op("compact_create", &filename.file_name())
+    }
+
+    pub(super) fn trace_layer_compact_delete(&self, filename: &LayerFileName) {
+        self.trace_op("compact_delete", &filename.file_name())
+    }
+
+    pub(super) fn trace_layer_image_create(&self, filename: &LayerFileName) {
+        self.trace_op("image_create", &filename.file_name())
+    }
+    
+    pub(super) fn trace_layer_gc_delete(&self, filename: &LayerFileName) {
+        self.trace_op("gc_delete", &filename.file_name())
+    }
+
+    // TODO: also report 'retain_lsns'
+    pub(super) fn trace_gc_start(&self, cutoff_lsn: Lsn) {
+        let opt_out = &self.layer_trace_file.lock().unwrap();
+        if let Some(mut out) = opt_out.as_ref() {
+            if let Ok(elapsed) = UNIX_EPOCH.elapsed() {
+                let time = elapsed.as_millis();
+                let _ = writeln!(out, "{{ \"time\": {time}, \"op\": \"gc_start\", \"cutoff\": \"{cutoff_lsn}\"}}");
+            }
+            else {
+                warn!("could not get current timestamp");
+            }
+        }
+    }
+}
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -237,7 +237,11 @@ async fn connection_manager_loop_step(
        if let Some(new_candidate) = walreceiver_state.next_connection_candidate() {
            info!("Switching to new connection candidate: {new_candidate:?}");
            walreceiver_state
-                .change_connection(new_candidate, ctx)
+                .change_connection(
+                    new_candidate.safekeeper_id,
+                    new_candidate.wal_source_connconf,
+                    ctx,
+                )
                .await
        }
    }
@@ -342,8 +346,6 @@ struct WalConnection {
    started_at: NaiveDateTime,
    /// Current safekeeper pageserver is connected to for WAL streaming.
    sk_id: NodeId,
-    /// Availability zone of the safekeeper.
-    availability_zone: Option<String>,
    /// Status of the connection.
    status: WalConnectionStatus,
    /// WAL streaming task handle.
@@ -403,7 +405,12 @@ impl WalreceiverState {
    }

    /// Shuts down the current connection (if any) and immediately starts another one with the given connection string.
-    async fn change_connection(&mut self, new_sk: NewWalConnectionCandidate, ctx: &RequestContext) {
+    async fn change_connection(
+        &mut self,
+        new_sk_id: NodeId,
+        new_wal_source_connconf: PgConnectionConfig,
+        ctx: &RequestContext,
+    ) {
        self.drop_old_connection(true).await;

        let id = self.id;
@@ -417,7 +424,7 @@ impl WalreceiverState {
            async move {
                super::walreceiver_connection::handle_walreceiver_connection(
                    timeline,
-                    new_sk.wal_source_connconf,
+                    new_wal_source_connconf,
                    events_sender,
                    cancellation,
                    connect_timeout,
@@ -426,16 +433,13 @@ impl WalreceiverState {
                .await
                .context("walreceiver connection handling failure")
            }
-            .instrument(
-                info_span!("walreceiver_connection", id = %id, node_id = %new_sk.safekeeper_id),
-            )
+            .instrument(info_span!("walreceiver_connection", id = %id, node_id = %new_sk_id))
        });

        let now = Utc::now().naive_utc();
        self.wal_connection = Some(WalConnection {
            started_at: now,
-            sk_id: new_sk.safekeeper_id,
-            availability_zone: new_sk.availability_zone,
+            sk_id: new_sk_id,
            status: WalConnectionStatus {
                is_connected: false,
                has_processed_wal: false,
@@ -542,7 +546,6 @@ impl WalreceiverState {
    /// * if connected safekeeper is not present, pick the candidate
    /// * if we haven't received any updates for some time, pick the candidate
    /// * if the candidate commit_lsn is much higher than the current one, pick the candidate
-    /// * if the candidate commit_lsn is same, but candidate is located in the same AZ as the pageserver, pick the candidate
    /// * if connected safekeeper stopped sending us new WAL which is available on other safekeeper, pick the candidate
    ///
    /// This way we ensure to keep up with the most up-to-date safekeeper and don't try to jump from one safekeeper to another too frequently.
@@ -556,7 +559,6 @@ impl WalreceiverState {

                let (new_sk_id, new_safekeeper_broker_data, new_wal_source_connconf) =
                    self.select_connection_candidate(Some(connected_sk_node))?;
-                let new_availability_zone = new_safekeeper_broker_data.availability_zone.clone();

                let now = Utc::now().naive_utc();
                if let Ok(latest_interaciton) =
@@ -567,7 +569,6 @@ impl WalreceiverState {
                        return Some(NewWalConnectionCandidate {
                            safekeeper_id: new_sk_id,
                            wal_source_connconf: new_wal_source_connconf,
-                            availability_zone: new_availability_zone,
                            reason: ReconnectReason::NoKeepAlives {
                                last_keep_alive: Some(
                                    existing_wal_connection.status.latest_connection_update,
@@ -593,7 +594,6 @@ impl WalreceiverState {
                                return Some(NewWalConnectionCandidate {
                                    safekeeper_id: new_sk_id,
                                    wal_source_connconf: new_wal_source_connconf,
-                                    availability_zone: new_availability_zone,
                                    reason: ReconnectReason::LaggingWal {
                                        current_commit_lsn,
                                        new_commit_lsn,
@@ -601,20 +601,6 @@ impl WalreceiverState {
                                    },
                                });
                            }
-                            // If we have a candidate with the same commit_lsn as the current one, which is in the same AZ as pageserver,
-                            // and the current one is not, switch to the new one.
-                            if self.availability_zone.is_some()
-                                && existing_wal_connection.availability_zone
-                                    != self.availability_zone
-                                && self.availability_zone == new_availability_zone
-                            {
-                                return Some(NewWalConnectionCandidate {
-                                    safekeeper_id: new_sk_id,
-                                    availability_zone: new_availability_zone,
-                                    wal_source_connconf: new_wal_source_connconf,
-                                    reason: ReconnectReason::SwitchAvailabilityZone,
-                                });
-                            }
                        }
                        None => debug!(
                            "Best SK candidate has its commit_lsn behind connected SK's commit_lsn"
@@ -682,7 +668,6 @@ impl WalreceiverState {
                            return Some(NewWalConnectionCandidate {
                                safekeeper_id: new_sk_id,
                                wal_source_connconf: new_wal_source_connconf,
-                                availability_zone: new_availability_zone,
                                reason: ReconnectReason::NoWalTimeout {
                                    current_lsn,
                                    current_commit_lsn,
@@ -701,11 +686,10 @@ impl WalreceiverState {
                self.wal_connection.as_mut().unwrap().discovered_new_wal = discovered_new_wal;
            }
            None => {
-                let (new_sk_id, new_safekeeper_broker_data, new_wal_source_connconf) =
+                let (new_sk_id, _, new_wal_source_connconf) =
                    self.select_connection_candidate(None)?;
                return Some(NewWalConnectionCandidate {
                    safekeeper_id: new_sk_id,
-                    availability_zone: new_safekeeper_broker_data.availability_zone.clone(),
                    wal_source_connconf: new_wal_source_connconf,
                    reason: ReconnectReason::NoExistingConnection,
                });
@@ -810,7 +794,6 @@ impl WalreceiverState {
 struct NewWalConnectionCandidate {
    safekeeper_id: NodeId,
    wal_source_connconf: PgConnectionConfig,
-    availability_zone: Option<String>,
    // This field is used in `derive(Debug)` only.
    #[allow(dead_code)]
    reason: ReconnectReason,
@@ -825,7 +808,6 @@ enum ReconnectReason {
        new_commit_lsn: Lsn,
        threshold: NonZeroU64,
    },
-    SwitchAvailabilityZone,
    NoWalTimeout {
        current_lsn: Lsn,
        current_commit_lsn: Lsn,
@@ -891,7 +873,6 @@ mod tests {
                peer_horizon_lsn: 0,
                local_start_lsn: 0,
                safekeeper_connstr: safekeeper_connstr.to_owned(),
-                availability_zone: None,
            },
            latest_update,
        }
@@ -952,7 +933,6 @@ mod tests {
        state.wal_connection = Some(WalConnection {
            started_at: now,
            sk_id: connected_sk_id,
-            availability_zone: None,
            status: connection_status,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
@@ -1115,7 +1095,6 @@ mod tests {
        state.wal_connection = Some(WalConnection {
            started_at: now,
            sk_id: connected_sk_id,
-            availability_zone: None,
            status: connection_status,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
@@ -1181,7 +1160,6 @@ mod tests {
        state.wal_connection = Some(WalConnection {
            started_at: now,
            sk_id: NodeId(1),
-            availability_zone: None,
            status: connection_status,
            connection_task: TaskHandle::spawn(move |sender, _| async move {
                sender
@@ -1244,7 +1222,6 @@ mod tests {
        state.wal_connection = Some(WalConnection {
            started_at: now,
            sk_id: NodeId(1),
-            availability_zone: None,
            status: connection_status,
            connection_task: TaskHandle::spawn(move |_, _| async move { Ok(()) }),
            discovered_new_wal: Some(NewCommittedWAL {
@@ -1312,74 +1289,4 @@ mod tests {
            availability_zone: None,
        }
    }
-
-    #[tokio::test]
-    async fn switch_to_same_availability_zone() -> anyhow::Result<()> {
-        // Pageserver and one of safekeepers will be in the same availability zone
-        // and pageserver should prefer to connect to it.
-        let test_az = Some("test_az".to_owned());
-
-        let harness = TenantHarness::create("switch_to_same_availability_zone")?;
-        let mut state = dummy_state(&harness).await;
-        state.availability_zone = test_az.clone();
-        let current_lsn = Lsn(100_000).align();
-        let now = Utc::now().naive_utc();
-
-        let connected_sk_id = NodeId(0);
-
-        let connection_status = WalConnectionStatus {
-            is_connected: true,
-            has_processed_wal: true,
-            latest_connection_update: now,
-            latest_wal_update: now,
-            commit_lsn: Some(current_lsn),
-            streaming_lsn: Some(current_lsn),
-        };
-
-        state.wal_connection = Some(WalConnection {
-            started_at: now,
-            sk_id: connected_sk_id,
-            availability_zone: None,
-            status: connection_status,
-            connection_task: TaskHandle::spawn(move |sender, _| async move {
-                sender
-                    .send(TaskStateUpdate::Progress(connection_status))
-                    .ok();
-                Ok(())
-            }),
-            discovered_new_wal: None,
-        });
-
-        // We have another safekeeper with the same commit_lsn, and it have the same availability zone as
-        // the current pageserver.
-        let mut same_az_sk = dummy_broker_sk_timeline(current_lsn.0, "same_az", now);
-        same_az_sk.timeline.availability_zone = test_az.clone();
-
-        state.wal_stream_candidates = HashMap::from([
-            (
-                connected_sk_id,
-                dummy_broker_sk_timeline(current_lsn.0, DUMMY_SAFEKEEPER_HOST, now),
-            ),
-            (NodeId(1), same_az_sk),
-        ]);
-
-        // We expect that pageserver will switch to the safekeeper in the same availability zone,
-        // even if it has the same commit_lsn.
-        let next_candidate = state.next_connection_candidate().expect(
-            "Expected one candidate selected out of multiple valid data options, but got none",
-        );
-
-        assert_eq!(next_candidate.safekeeper_id, NodeId(1));
-        assert_eq!(
-            next_candidate.reason,
-            ReconnectReason::SwitchAvailabilityZone,
-            "Should switch to the safekeeper in the same availability zone, if it has the same commit_lsn"
-        );
-        assert_eq!(
-            next_candidate.wal_source_connconf.host(),
-            &Host::Domain("same_az".to_owned())
-        );
-
-        Ok(())
-    }
 }
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -2,7 +2,6 @@

 use std::{
    error::Error,
-    pin::pin,
    str::FromStr,
    sync::Arc,
    time::{Duration, SystemTime},
@@ -18,7 +17,7 @@ use postgres_ffi::v14::xlog_utils::normalize_lsn;
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use postgres_protocol::message::backend::ReplicationMessage;
 use postgres_types::PgLsn;
-use tokio::{select, sync::watch, time};
+use tokio::{pin, select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, trace, warn};
@@ -37,7 +36,7 @@ use crate::{
 use postgres_backend::is_expected_io_error;
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::waldecoder::WalStreamDecoder;
-use pq_proto::PageserverFeedback;
+use pq_proto::ReplicationFeedback;
 use utils::lsn::Lsn;

 /// Status of the connection.
@@ -188,7 +187,8 @@ pub async fn handle_walreceiver_connection(
    let query = format!("START_REPLICATION PHYSICAL {startpoint}");

    let copy_stream = replication_client.copy_both_simple(&query).await?;
-    let mut physical_stream = pin!(ReplicationStream::new(copy_stream));
+    let physical_stream = ReplicationStream::new(copy_stream);
+    pin!(physical_stream);

    let mut waldecoder = WalStreamDecoder::new(startpoint, timeline.pg_version);

@@ -319,12 +319,12 @@ pub async fn handle_walreceiver_connection(
                timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));

            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
-            let last_received_lsn = u64::from(last_lsn);
+            let write_lsn = u64::from(last_lsn);
            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
-            let disk_consistent_lsn = u64::from(timeline.get_disk_consistent_lsn());
+            let flush_lsn = u64::from(timeline.get_disk_consistent_lsn());
            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
-            let remote_consistent_lsn = u64::from(timeline_remote_consistent_lsn);
+            let apply_lsn = u64::from(timeline_remote_consistent_lsn);
            let ts = SystemTime::now();

            // Update the status about what we just received. This is shown in the mgmt API.
@@ -343,12 +343,12 @@ pub async fn handle_walreceiver_connection(
            let (timeline_logical_size, _) = timeline
                .get_current_logical_size(&ctx)
                .context("Status update creation failed to get current logical size")?;
-            let status_update = PageserverFeedback {
+            let status_update = ReplicationFeedback {
                current_timeline_size: timeline_logical_size,
-                last_received_lsn,
-                disk_consistent_lsn,
-                remote_consistent_lsn,
-                replytime: ts,
+                ps_writelsn: write_lsn,
+                ps_flushlsn: flush_lsn,
+                ps_applylsn: apply_lsn,
+                ps_replytime: ts,
            };

            debug!("neon_status_update {status_update:?}");
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -14,7 +14,6 @@
 */

 #include <sys/file.h>
-#include <sys/statvfs.h>
 #include <unistd.h>
 #include <fcntl.h>

@@ -35,9 +34,6 @@
 #include "storage/fd.h"
 #include "storage/pg_shmem.h"
 #include "storage/buf_internals.h"
-#include "storage/procsignal.h"
-#include "postmaster/bgworker.h"
-#include "postmaster/interrupt.h"

 /*
 * Local file cache is used to temporary store relations pages in local file system.
@@ -63,9 +59,6 @@

 #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK))

-#define MAX_MONITOR_INTERVAL_USEC 1000000 /* 1 second */
-#define MAX_DISK_WRITE_RATE       1000 /* MB/sec */
-
 typedef struct FileCacheEntry
 {
 	BufferTag	key;
@@ -78,7 +71,6 @@ typedef struct FileCacheEntry
 typedef struct FileCacheControl
 {
 	uint32 size; /* size of cache file in chunks */
-	uint32 used; /* number of used chunks */
 	dlist_head lru; /* double linked list for LRU replacement algorithm */
 } FileCacheControl;

@@ -87,14 +79,12 @@ static int   lfc_desc;
 static LWLockId lfc_lock;
 static int   lfc_max_size;
 static int   lfc_size_limit;
-static int   lfc_free_space_watermark;
 static char* lfc_path;
 static  FileCacheControl* lfc_ctl;
 static shmem_startup_hook_type prev_shmem_startup_hook;
 #if PG_VERSION_NUM>=150000
 static shmem_request_hook_type prev_shmem_request_hook;
 #endif
-static int   lfc_shrinking_factor; /* power of two by which local cache size will be shrinked when lfc_free_space_watermark is reached */

 static void
 lfc_shmem_startup(void)
@@ -122,7 +112,6 @@ lfc_shmem_startup(void)
 								 &info,
 								 HASH_ELEM | HASH_BLOBS);
 		lfc_ctl->size = 0;
-		lfc_ctl->used = 0;
 		dlist_init(&lfc_ctl->lru);

 		/* Remove file cache on restart */
@@ -176,7 +165,7 @@ lfc_change_limit_hook(int newval, void *extra)
 		}
 	}
 	LWLockAcquire(lfc_lock, LW_EXCLUSIVE);
-	while (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru))
+	while (new_size < lfc_ctl->size && !dlist_is_empty(&lfc_ctl->lru))
 	{
 		/* Shrink cache by throwing away least recently accessed chunks and returning their space to file system */
 		FileCacheEntry* victim = dlist_container(FileCacheEntry, lru_node, dlist_pop_head_node(&lfc_ctl->lru));
@@ -186,86 +175,12 @@ lfc_change_limit_hook(int newval, void *extra)
 			elog(LOG, "Failed to punch hole in file: %m");
 #endif
 		hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL);
-		lfc_ctl->used -= 1;
+		lfc_ctl->size -= 1;
 	}
 	elog(LOG, "set local file cache limit to %d", new_size);
 	LWLockRelease(lfc_lock);
 }

-/*
- * Local file system state monitor check available free space.
- * If it is lower than lfc_free_space_watermark then we shrink size of local cache
- * but throwing away least recently accessed chunks.
- * First time low space watermark is reached cache size is divided by two,
- * second time by four,... Finally we remove all chunks from local cache.
- *
- * Please notice that we are not changing lfc_cache_size: it is used to be adjusted by autoscaler.
- * We only throw away cached chunks but do not prevent from filling cache by new chunks.
- *
- * Interval of poooling cache state is calculated as minimal time needed to consume lfc_free_space_watermark
- * disk space with maximal possible disk write speed (1Gb/sec). But not larger than 1 second.
- * Calling statvfs each second should not add any noticeable overhead.
- */
-void
-FileCacheMonitorMain(Datum main_arg)
-{
-	/*
-	 * Choose file system state monitor interval so that space can not be exosted
-	 * during this period but not longer than  MAX_MONITOR_INTERVAL (10 sec)
-	 */
-	uint64 monitor_interval = Min(MAX_MONITOR_INTERVAL_USEC, lfc_free_space_watermark*MB/MAX_DISK_WRITE_RATE);
-
-	/* Establish signal handlers. */
-	pqsignal(SIGUSR1, procsignal_sigusr1_handler);
-	pqsignal(SIGHUP, SignalHandlerForConfigReload);
-	pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
-	BackgroundWorkerUnblockSignals();
-
-	/* Periodically dump buffers until terminated. */
-	while (!ShutdownRequestPending)
-	{
-		if (lfc_size_limit != 0)
-		{
-			struct statvfs sfs;
-			if (statvfs(lfc_path, &sfs) < 0)
-			{
-				elog(WARNING, "Failed to obtain status of %s: %m", lfc_path);
-			}
-			else
-			{
-				if (sfs.f_bavail*sfs.f_bsize < lfc_free_space_watermark*MB)
-				{
-					if (lfc_shrinking_factor < 31) {
-						lfc_shrinking_factor += 1;
-					}
-					lfc_change_limit_hook(lfc_size_limit >> lfc_shrinking_factor, NULL);
-				}
-				else
-					lfc_shrinking_factor = 0; /* reset to initial value */
-			}
-		}
-		pg_usleep(monitor_interval);
-	}
-}
-
-static void
-lfc_register_free_space_monitor(void)
-{
-	BackgroundWorker bgw;
-	memset(&bgw, 0, sizeof(bgw));
-	bgw.bgw_flags = BGWORKER_SHMEM_ACCESS;
-	bgw.bgw_start_time = BgWorkerStart_RecoveryFinished;
-	snprintf(bgw.bgw_library_name, BGW_MAXLEN, "neon");
-	snprintf(bgw.bgw_function_name, BGW_MAXLEN, "FileCacheMonitorMain");
-	snprintf(bgw.bgw_name, BGW_MAXLEN, "Local free space monitor");
-	snprintf(bgw.bgw_type, BGW_MAXLEN, "Local free space monitor");
-	bgw.bgw_restart_time = 5;
-	bgw.bgw_notify_pid = 0;
-	bgw.bgw_main_arg = (Datum) 0;
-
-	RegisterBackgroundWorker(&bgw);
-}
-
 void
 lfc_init(void)
 {
@@ -302,19 +217,6 @@ lfc_init(void)
 							lfc_change_limit_hook,
 							NULL);

-	DefineCustomIntVariable("neon.free_space_watermark",
-							"Minimal free space in local file system after reaching which local file cache will be truncated",
-							NULL,
-							&lfc_free_space_watermark,
-							1024, /* 1GB */
-							0,
-							INT_MAX,
-							PGC_SIGHUP,
-							GUC_UNIT_MB,
-							NULL,
-							NULL,
-							NULL);
-
 	DefineCustomStringVariable("neon.file_cache_path",
 							   "Path to local file cache (can be raw device)",
 							   NULL,
@@ -329,9 +231,6 @@ lfc_init(void)
 	if (lfc_max_size == 0)
 		return;

-	if (lfc_free_space_watermark != 0)
-		lfc_register_free_space_monitor();
-
 	prev_shmem_startup_hook = shmem_startup_hook;
 	shmem_startup_hook = lfc_shmem_startup;
 #if PG_VERSION_NUM>=150000
@@ -481,7 +380,7 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 		 * there are should be very large number of concurrent IO operations and them are limited by max_connections,
 		 * we prefer not to complicate code and use second approach.
 		 */
-		if (lfc_ctl->used >= SIZE_MB_TO_CHUNKS(lfc_size_limit) && !dlist_is_empty(&lfc_ctl->lru))
+		if (lfc_ctl->size >= SIZE_MB_TO_CHUNKS(lfc_size_limit) && !dlist_is_empty(&lfc_ctl->lru))
 		{
 			/* Cache overflow: evict least recently used chunk */
 			FileCacheEntry* victim = dlist_container(FileCacheEntry, lru_node, dlist_pop_head_node(&lfc_ctl->lru));
@@ -491,10 +390,7 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 			elog(LOG, "Swap file cache page");
 		}
 		else
-		{
-			lfc_ctl->used += 1;
 			entry->offset = lfc_ctl->size++; /* allocate new chunk at end of file */
-		}
 		entry->access_count = 1;
 		memset(entry->bitmap, 0, sizeof entry->bitmap);
 	}
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -1872,9 +1872,9 @@ RecvAppendResponses(Safekeeper *sk)
 	return sk->state == SS_ACTIVE;
 }

-/* Parse a PageserverFeedback message, or the PageserverFeedback part of an AppendResponse */
+/* Parse a ReplicationFeedback message, or the ReplicationFeedback part of an AppendResponse */
 void
-ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback * rf)
+ParseReplicationFeedbackMessage(StringInfo reply_message, ReplicationFeedback * rf)
 {
 	uint8		nkeys;
 	int			i;
@@ -1892,45 +1892,45 @@ ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback * rf
 			pq_getmsgint(reply_message, sizeof(int32));
 			/* read value length */
 			rf->currentClusterSize = pq_getmsgint64(reply_message);
-			elog(DEBUG2, "ParsePageserverFeedbackMessage: current_timeline_size %lu",
+			elog(DEBUG2, "ParseReplicationFeedbackMessage: current_timeline_size %lu",
 				 rf->currentClusterSize);
 		}
-		else if ((strcmp(key, "ps_writelsn") == 0) || (strcmp(key, "last_received_lsn") == 0))
+		else if (strcmp(key, "ps_writelsn") == 0)
 		{
 			pq_getmsgint(reply_message, sizeof(int32));
 			/* read value length */
-			rf->last_received_lsn = pq_getmsgint64(reply_message);
-			elog(DEBUG2, "ParsePageserverFeedbackMessage: last_received_lsn %X/%X",
-				 LSN_FORMAT_ARGS(rf->last_received_lsn));
+			rf->ps_writelsn = pq_getmsgint64(reply_message);
+			elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_writelsn %X/%X",
+				 LSN_FORMAT_ARGS(rf->ps_writelsn));
 		}
-		else if ((strcmp(key, "ps_flushlsn") == 0) || (strcmp(key, "disk_consistent_lsn") == 0))
+		else if (strcmp(key, "ps_flushlsn") == 0)
 		{
 			pq_getmsgint(reply_message, sizeof(int32));
 			/* read value length */
-			rf->disk_consistent_lsn = pq_getmsgint64(reply_message);
-			elog(DEBUG2, "ParsePageserverFeedbackMessage: disk_consistent_lsn %X/%X",
-				 LSN_FORMAT_ARGS(rf->disk_consistent_lsn));
+			rf->ps_flushlsn = pq_getmsgint64(reply_message);
+			elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_flushlsn %X/%X",
+				 LSN_FORMAT_ARGS(rf->ps_flushlsn));
 		}
-		else if ((strcmp(key, "ps_applylsn") == 0) || (strcmp(key, "remote_consistent_lsn") == 0))
+		else if (strcmp(key, "ps_applylsn") == 0)
 		{
 			pq_getmsgint(reply_message, sizeof(int32));
 			/* read value length */
-			rf->remote_consistent_lsn = pq_getmsgint64(reply_message);
-			elog(DEBUG2, "ParsePageserverFeedbackMessage: remote_consistent_lsn %X/%X",
-				 LSN_FORMAT_ARGS(rf->remote_consistent_lsn));
+			rf->ps_applylsn = pq_getmsgint64(reply_message);
+			elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_applylsn %X/%X",
+				 LSN_FORMAT_ARGS(rf->ps_applylsn));
 		}
-		else if ((strcmp(key, "ps_replytime") == 0) || (strcmp(key, "replytime") == 0))
+		else if (strcmp(key, "ps_replytime") == 0)
 		{
 			pq_getmsgint(reply_message, sizeof(int32));
 			/* read value length */
-			rf->replytime = pq_getmsgint64(reply_message);
+			rf->ps_replytime = pq_getmsgint64(reply_message);
 			{
 				char	   *replyTimeStr;

 				/* Copy because timestamptz_to_str returns a static buffer */
-				replyTimeStr = pstrdup(timestamptz_to_str(rf->replytime));
-				elog(DEBUG2, "ParsePageserverFeedbackMessage: replytime %lu reply_time: %s",
-					 rf->replytime, replyTimeStr);
+				replyTimeStr = pstrdup(timestamptz_to_str(rf->ps_replytime));
+				elog(DEBUG2, "ParseReplicationFeedbackMessage: ps_replytime %lu reply_time: %s",
+					 rf->ps_replytime, replyTimeStr);

 				pfree(replyTimeStr);
 			}
@@ -1944,7 +1944,7 @@ ParsePageserverFeedbackMessage(StringInfo reply_message, PageserverFeedback * rf
 			 * Skip unknown keys to support backward compatibile protocol
 			 * changes
 			 */
-			elog(LOG, "ParsePageserverFeedbackMessage: unknown key: %s len %d", key, len);
+			elog(LOG, "ParseReplicationFeedbackMessage: unknown key: %s len %d", key, len);
 			pq_getmsgbytes(reply_message, len);
 		};
 	}
@@ -2024,7 +2024,7 @@ GetAcknowledgedByQuorumWALPosition(void)
 }

 /*
- * WalproposerShmemSize --- report amount of shared memory space needed
+ * ReplicationFeedbackShmemSize --- report amount of shared memory space needed
 */
 Size
 WalproposerShmemSize(void)
@@ -2054,10 +2054,10 @@ WalproposerShmemInit(void)
 }

 void
-replication_feedback_set(PageserverFeedback * rf)
+replication_feedback_set(ReplicationFeedback * rf)
 {
 	SpinLockAcquire(&walprop_shared->mutex);
-	memcpy(&walprop_shared->feedback, rf, sizeof(PageserverFeedback));
+	memcpy(&walprop_shared->feedback, rf, sizeof(ReplicationFeedback));
 	SpinLockRelease(&walprop_shared->mutex);
 }

@@ -2065,43 +2065,43 @@ void
 replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn)
 {
 	SpinLockAcquire(&walprop_shared->mutex);
-	*writeLsn = walprop_shared->feedback.last_received_lsn;
-	*flushLsn = walprop_shared->feedback.disk_consistent_lsn;
-	*applyLsn = walprop_shared->feedback.remote_consistent_lsn;
+	*writeLsn = walprop_shared->feedback.ps_writelsn;
+	*flushLsn = walprop_shared->feedback.ps_flushlsn;
+	*applyLsn = walprop_shared->feedback.ps_applylsn;
 	SpinLockRelease(&walprop_shared->mutex);
 }

 /*
- * Get PageserverFeedback fields from the most advanced safekeeper
+ * Get ReplicationFeedback fields from the most advanced safekeeper
 */
 static void
-GetLatestNeonFeedback(PageserverFeedback * rf)
+GetLatestNeonFeedback(ReplicationFeedback * rf)
 {
 	int			latest_safekeeper = 0;
-	XLogRecPtr	last_received_lsn = InvalidXLogRecPtr;
+	XLogRecPtr	ps_writelsn = InvalidXLogRecPtr;

 	for (int i = 0; i < n_safekeepers; i++)
 	{
-		if (safekeeper[i].appendResponse.rf.last_received_lsn > last_received_lsn)
+		if (safekeeper[i].appendResponse.rf.ps_writelsn > ps_writelsn)
 		{
 			latest_safekeeper = i;
-			last_received_lsn = safekeeper[i].appendResponse.rf.last_received_lsn;
+			ps_writelsn = safekeeper[i].appendResponse.rf.ps_writelsn;
 		}
 	}

 	rf->currentClusterSize = safekeeper[latest_safekeeper].appendResponse.rf.currentClusterSize;
-	rf->last_received_lsn = safekeeper[latest_safekeeper].appendResponse.rf.last_received_lsn;
-	rf->disk_consistent_lsn = safekeeper[latest_safekeeper].appendResponse.rf.disk_consistent_lsn;
-	rf->remote_consistent_lsn = safekeeper[latest_safekeeper].appendResponse.rf.remote_consistent_lsn;
-	rf->replytime = safekeeper[latest_safekeeper].appendResponse.rf.replytime;
+	rf->ps_writelsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_writelsn;
+	rf->ps_flushlsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_flushlsn;
+	rf->ps_applylsn = safekeeper[latest_safekeeper].appendResponse.rf.ps_applylsn;
+	rf->ps_replytime = safekeeper[latest_safekeeper].appendResponse.rf.ps_replytime;

 	elog(DEBUG2, "GetLatestNeonFeedback: currentClusterSize %lu,"
-		 " last_received_lsn %X/%X, disk_consistent_lsn %X/%X, remote_consistent_lsn %X/%X, replytime %lu",
+		 " ps_writelsn %X/%X, ps_flushlsn %X/%X, ps_applylsn %X/%X, ps_replytime %lu",
 		 rf->currentClusterSize,
-		 LSN_FORMAT_ARGS(rf->last_received_lsn),
-		 LSN_FORMAT_ARGS(rf->disk_consistent_lsn),
-		 LSN_FORMAT_ARGS(rf->remote_consistent_lsn),
-		 rf->replytime);
+		 LSN_FORMAT_ARGS(rf->ps_writelsn),
+		 LSN_FORMAT_ARGS(rf->ps_flushlsn),
+		 LSN_FORMAT_ARGS(rf->ps_applylsn),
+		 rf->ps_replytime);

 	replication_feedback_set(rf);
 }
@@ -2115,16 +2115,16 @@ HandleSafekeeperResponse(void)
 	XLogRecPtr	minFlushLsn;

 	minQuorumLsn = GetAcknowledgedByQuorumWALPosition();
-	diskConsistentLsn = quorumFeedback.rf.disk_consistent_lsn;
+	diskConsistentLsn = quorumFeedback.rf.ps_flushlsn;

 	if (!syncSafekeepers)
 	{
-		/* Get PageserverFeedback fields from the most advanced safekeeper */
+		/* Get ReplicationFeedback fields from the most advanced safekeeper */
 		GetLatestNeonFeedback(&quorumFeedback.rf);
 		SetZenithCurrentClusterSize(quorumFeedback.rf.currentClusterSize);
 	}

-	if (minQuorumLsn > quorumFeedback.flushLsn || diskConsistentLsn != quorumFeedback.rf.disk_consistent_lsn)
+	if (minQuorumLsn > quorumFeedback.flushLsn || diskConsistentLsn != quorumFeedback.rf.ps_flushlsn)
 	{

 		if (minQuorumLsn > quorumFeedback.flushLsn)
@@ -2142,7 +2142,7 @@ HandleSafekeeperResponse(void)
 			 * apply_lsn - This is what processed and durably saved at*
 			 * pageserver.
 			 */
-								quorumFeedback.rf.disk_consistent_lsn,
+								quorumFeedback.rf.ps_flushlsn,
 								GetCurrentTimestamp(), false);
 	}

@@ -2326,7 +2326,7 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage * anymsg)
 				msg->hs.xmin.value = pq_getmsgint64_le(&s);
 				msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s);
 				if (buf_size > APPENDRESPONSE_FIXEDPART_SIZE)
-					ParsePageserverFeedbackMessage(&s, &msg->rf);
+					ParseReplicationFeedbackMessage(&s, &msg->rf);
 				pq_getmsgend(&s);
 				return true;
 			}
@@ -2462,7 +2462,7 @@ backpressure_lag_impl(void)
 		replication_feedback_get_lsns(&writePtr, &flushPtr, &applyPtr);
 #define MB ((XLogRecPtr)1024 * 1024)

-		elog(DEBUG2, "current flushLsn %X/%X PageserverFeedback: write %X/%X flush %X/%X apply %X/%X",
+		elog(DEBUG2, "current flushLsn %X/%X ReplicationFeedback: write %X/%X flush %X/%X apply %X/%X",
 			 LSN_FORMAT_ARGS(myFlushLsn),
 			 LSN_FORMAT_ARGS(writePtr),
 			 LSN_FORMAT_ARGS(flushPtr),
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -280,21 +280,21 @@ typedef struct HotStandbyFeedback
 	FullTransactionId catalog_xmin;
 }			HotStandbyFeedback;

-typedef struct PageserverFeedback
+typedef struct ReplicationFeedback
 {
 	/* current size of the timeline on pageserver */
 	uint64		currentClusterSize;
 	/* standby_status_update fields that safekeeper received from pageserver */
-	XLogRecPtr	last_received_lsn;
-	XLogRecPtr	disk_consistent_lsn;
-	XLogRecPtr	remote_consistent_lsn;
-	TimestampTz replytime;
-}			PageserverFeedback;
+	XLogRecPtr	ps_writelsn;
+	XLogRecPtr	ps_flushlsn;
+	XLogRecPtr	ps_applylsn;
+	TimestampTz ps_replytime;
+}			ReplicationFeedback;

 typedef struct WalproposerShmemState
 {
 	slock_t		mutex;
-	PageserverFeedback feedback;
+	ReplicationFeedback feedback;
 	term_t		mineLastElectedTerm;
 	pg_atomic_uint64 backpressureThrottlingTime;
 }			WalproposerShmemState;
@@ -320,10 +320,10 @@ typedef struct AppendResponse
 	/* Feedback recieved from pageserver includes standby_status_update fields */
 	/* and custom neon feedback. */
 	/* This part of the message is extensible. */
-	PageserverFeedback rf;
+	ReplicationFeedback rf;
 }			AppendResponse;

-/*  PageserverFeedback is extensible part of the message that is parsed separately */
+/*  ReplicationFeedback is extensible part of the message that is parsed separately */
 /*  Other fields are fixed part */
 #define APPENDRESPONSE_FIXEDPART_SIZE offsetof(AppendResponse, rf)

@@ -383,13 +383,13 @@ extern void WalProposerSync(int argc, char *argv[]);
 extern void WalProposerMain(Datum main_arg);
 extern void WalProposerBroadcast(XLogRecPtr startpos, XLogRecPtr endpos);
 extern void WalProposerPoll(void);
-extern void ParsePageserverFeedbackMessage(StringInfo reply_message,
-											PageserverFeedback *rf);
+extern void ParseReplicationFeedbackMessage(StringInfo reply_message,
+											ReplicationFeedback *rf);
 extern void StartProposerReplication(StartReplicationCmd *cmd);

 extern Size WalproposerShmemSize(void);
 extern bool WalproposerShmemInit(void);
-extern void replication_feedback_set(PageserverFeedback *rf);
+extern void replication_feedback_set(ReplicationFeedback *rf);
 extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);

 /* libpqwalproposer hooks & helper type */
--- a/pgxn/neon_walredo/seccomp.c
+++ b/pgxn/neon_walredo/seccomp.c
@@ -9,14 +9,6 @@
 * To prevent this, it has been decided to limit possible interactions
 * with the outside world using the Secure Computing BPF mode.
 *
- * This code is intended to support both x86_64 and aarch64. The latter
- * doesn't implement some syscalls like open and select. We allow both
- * select (absent on aarch64) and pselect6 (present on both architectures)
- * We call select(2) through libc, and the libc wrapper calls select or pselect6
- * depending on the architecture. You can check which syscalls are present on
- * different architectures with the `scmp_sys_resolver` tool from the
- * seccomp package.
- *
 * We use this mode to disable all syscalls not in the allowlist. This
 * approach has its pros & cons:
 *
@@ -81,6 +73,8 @@
 *    I suspect that certain libc functions might involve slightly
 *    different syscalls, e.g. select/pselect6/pselect6_time64/whatever.
 *
+ *  - Test on any arch other than amd64 to see if it works there.
+ *
 *-------------------------------------------------------------------------
 */

@@ -128,10 +122,9 @@ seccomp_load_rules(PgSeccompRule *rules, int count)

 	/*
 	 * First, check that open of a well-known file works.
-	 * XXX: We use raw syscall() to call the very openat() which is
-	 * present both on x86_64 and on aarch64.
+	 * XXX: We use raw syscall() to call the very open().
 	 */
-	fd = syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0);
+	fd = syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
 	if (seccomp_test_sighandler_done)
 		ereport(FATAL,
 				(errcode(ERRCODE_SYSTEM_ERROR),
@@ -142,15 +135,15 @@ seccomp_load_rules(PgSeccompRule *rules, int count)
 				 errmsg("seccomp: could not open /dev/null for seccomp testing: %m")));
 	close((int) fd);

-	/* Set a trap on openat() to test seccomp bpf */
-	rule = PG_SCMP(openat, SCMP_ACT_TRAP);
+	/* Set a trap on open() to test seccomp bpf */
+	rule = PG_SCMP(open, SCMP_ACT_TRAP);
 	if (do_seccomp_load_rules(&rule, 1, SCMP_ACT_ALLOW) != 0)
 		ereport(FATAL,
 				(errcode(ERRCODE_SYSTEM_ERROR),
 				 errmsg("seccomp: could not load test trap")));

-	/* Finally, check that openat() now raises SIGSYS */
-	(void) syscall(SCMP_SYS(openat), AT_FDCWD, "/dev/null", O_RDONLY, 0);
+	/* Finally, check that open() now raises SIGSYS */
+	(void) syscall(SCMP_SYS(open), "/dev/null", O_RDONLY, 0);
 	if (!seccomp_test_sighandler_done)
 		ereport(FATAL,
 				(errcode(ERRCODE_SYSTEM_ERROR),
@@ -231,7 +224,7 @@ seccomp_test_sighandler(int signum, siginfo_t *info, void *cxt pg_attribute_unus
 		die(1, DIE_PREFIX "bad signal number\n");

 	/* TODO: maybe somehow extract the hardcoded syscall number */
-	if (info->si_syscall != SCMP_SYS(openat))
+	if (info->si_syscall != SCMP_SYS(open))
 		die(1, DIE_PREFIX "bad syscall number\n");

 #undef DIE_PREFIX
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Heikki Linnakangas	7eefad691c	Improve the scaling, add Play/Stop buttons	2023-03-22 19:40:54 +02:00
Heikki Linnakangas	fe59a063ea	WIP: Collect and draw layer trace	2023-03-22 19:40:54 +02:00
Heikki Linnakangas	ae8e5b3a8e	Add test from PR #3673	2023-03-22 19:40:54 +02:00