Clone less

WIP
2026-03-15 06:10:36 +00:00 · 2023-02-06 14:42:17 -05:00 · 2023-02-06 13:55:53 -05:00
174 changed files with 2722 additions and 11847 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -14,6 +14,3 @@ opt-level = 1

 [alias]
 build_testing = ["build", "--features", "testing"]
-
-[build]
-rustflags = ["-C", "default-linker-libraries"]
--- a/.dockerignore
+++ b/.dockerignore
@@ -21,4 +21,3 @@
 !workspace_hack/
 !neon_local/
 !scripts/ninstall.sh
-!vm-cgconfig.conf
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-ap-southeast-1
    bucket_region: ap-southeast-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -32,7 +32,7 @@ storage:
      hosts:
        safekeeper-0.ap-southeast-1.aws.neon.tech:
          ansible_host:  i-0d6f1dc5161eef894
+        safekeeper-1.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-0e338adda8eb2d19f
        safekeeper-2.ap-southeast-1.aws.neon.tech:
          ansible_host:  i-04fb63634e4679eb9
-        safekeeper-3.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-05481f3bc88cfc2d4
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-eu-central-1
    bucket_region: eu-central-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-east-2
    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -27,8 +27,6 @@ storage:
          ansible_host:  i-062227ba7f119eb8c
        pageserver-1.us-east-2.aws.neon.tech:
          ansible_host:  i-0b3ec0afab5968938
-        pageserver-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-0d7a1c4325e71421d

    safekeepers:
      hosts:
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-west-2
    bucket_region: us-west-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -29,8 +29,6 @@ storage:
          ansible_host: i-0c834be1dddba8b3f
        pageserver-2.us-west-2.aws.neon.tech:
          ansible_host: i-051642d372c0a4f32
-        pageserver-3.us-west-2.aws.neon.tech:
-          ansible_host: i-00c3844beb9ad1c6b

    safekeepers:
      hosts:
--- a/.github/ansible/production.hosts.yaml
+++ b/.github/ansible/production.hosts.yaml
@@ -0,0 +1,40 @@
+---
+storage:
+  vars:
+    console_mgmt_base_url: http://console-release.local
+    bucket_name: zenith-storage-oregon
+    bucket_region: us-west-2
+    broker_endpoint: http://storage-broker.prod.local:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "{{ inventory_hostname }}"
+    safekeeper_s3_prefix: prod-1/wal
+    hostname_suffix: ".local"
+    remote_user: admin
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        zenith-1-ps-2:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-3:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-4:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-5:
+          console_region_id: aws-us-west-2
+
+    safekeepers:
+      hosts:
+        zenith-1-sk-1:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-2:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-4:
+          console_region_id: aws-us-west-2
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -2,17 +2,12 @@ storage:
  vars:
    bucket_name: neon-dev-storage-eu-west-1
    bucket_region: eu-west-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    console_mgmt_base_url: http://console-staging.local
    broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-staging.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -2,17 +2,12 @@ storage:
  vars:
    bucket_name: neon-staging-storage-us-east-2
    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    console_mgmt_base_url: http://console-staging.local
    broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-staging.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
@@ -36,8 +31,6 @@ storage:
          ansible_host: i-01e31cdf7e970586a
        pageserver-3.us-east-2.aws.neon.build:
          ansible_host: i-0602a0291365ef7cc
-        pageserver-99.us-east-2.aws.neon.build:
-          ansible_host: i-0c39491109bb88824

    safekeepers:
      hosts:
@@ -47,5 +40,3 @@ storage:
          ansible_host: i-0171efc3604a7b907
        safekeeper-2.us-east-2.aws.neon.build:
          ansible_host: i-0de0b03a51676a6ce
-        safekeeper-99.us-east-2.aws.neon.build:
-          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -1,31 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.eu-west-1.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -10,7 +10,7 @@ settings:
  uri: "https://console.stage.neon.tech/psql_session/"
  domain: "pg.neon.build"
  sentryEnvironment: "staging"
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy-link pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.cloud.stage.neon.tech"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -1,31 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.ap-southeast-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.eu-central-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.cloud.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-west-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/production.neon-storage-broker.yaml
+++ b/.github/helm-values/production.neon-storage-broker.yaml
@@ -0,0 +1,56 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker.prod.local
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    selector:
+      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -611,31 +611,34 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.4.6
+      VM_INFORMANT_VERSION: 0.1.1

    steps:
-      - name: Checkout
-        uses: actions/checkout@v1
-        with:
-          fetch-depth: 0
-
-      - name: Downloading vm-builder
+      - name: Downloading latest vm-builder
        run: |
-          curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -L https://github.com/neondatabase/neonvm/releases/latest/download/vm-builder -o vm-builder
          chmod +x vm-builder

      - name: Pulling compute-node image
        run: |
          docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

-      - name: Building VM compute-node rootfs
+      - name: Downloading VM informant version ${{ env.VM_INFORMANT_VERSION }}
        run: |
-          docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/${{ env.VM_INFORMANT_VERSION }}/vm-informant -o vm-informant
+          chmod +x vm-informant
+
+      - name: Adding VM informant to compute-node image
+        run: |
+          ID=$(docker create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}})
+          docker cp vm-informant $ID:/bin/vm-informant
+          docker commit $ID temp-vm-compute-node
+          docker rm -f $ID

      - name: Build vm image
        run: |
          # note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
-          ./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          ./vm-builder -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

      - name: Pushing vm-compute-node image
        run: |
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -67,7 +67,7 @@ jobs:
          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
          rm -f neon_install.tar.gz .neon_current_version

      - name: Cleanup ansible folder
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -40,9 +40,7 @@ concurrency:
 jobs:
  deploy-prod-new:
    runs-on: prod
-    container:
-      image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-      options: --user root --privileged
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    if: inputs.deployStorage && inputs.disclamerAcknowledged
    defaults:
      run:
@@ -68,7 +66,7 @@ jobs:
          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy-prod-new:
@@ -165,3 +163,78 @@ jobs:
      - name: Deploy storage-broker
        run:
          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+  # Deploy to old account below          
+
+  deploy:
+    runs-on: prod
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    if: inputs.deployStorage && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    environment:
+      name: prod-old
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{ inputs.dockerTag }}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh
+
+          eval $(ssh-agent)
+          echo "${{ secrets.TELEPORT_SSH_KEY }}"  | tr -d '\n'| base64 --decode >ssh-key
+          echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+          chmod 0600 ssh-key
+          ssh-add ssh-key
+          rm -f ssh-key ssh-key-cert.pub
+          ANSIBLE_CONFIG=./ansible.cfg ansible-galaxy collection install sivel.toiletwater
+          ANSIBLE_CONFIG=./ansible.cfg ansible-playbook deploy.yaml -i production.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          rm -f neon_install.tar.gz .neon_current_version
+
+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ansible/collections': Permission denied
+      - name: Cleanup ansible folder
+        run: rm -rf ~/.ansible
+
+  deploy-storage-broker:
+    name: deploy storage broker on old staging and old prod
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    environment:
+      name: prod-old
+    env:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Store kubeconfig file
+        run: |
+          echo "${{ secrets.PRODUCTION_KUBECONFIG_DATA }}" | base64 --decode > ${KUBECONFIG}
+          chmod 0600 ${KUBECONFIG}
+
+      - name: Add neon helm chart
+        run: helm repo add neondatabase https://neondatabase.github.io/helm-charts
+
+      - name: Deploy storage-broker
+        run:
+          helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/production.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
--- a/.gitignore
+++ b/.gitignore
@@ -18,5 +18,3 @@ test_output/
 *.o
 *.so
 *.Po
-
-tmp
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -679,25 +679,6 @@ version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"

-[[package]]
-name = "cbindgen"
-version = "0.24.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d"
-dependencies = [
- "clap 3.2.23",
- "heck",
- "indexmap",
- "log",
- "proc-macro2",
- "quote",
- "serde",
- "serde_json",
- "syn",
- "tempfile",
- "toml",
-]
-
 [[package]]
 name = "cc"
 version = "1.0.79"
@@ -776,12 +757,9 @@ version = "3.2.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
 dependencies = [
- "atty",
 "bitflags",
 "clap_lex 0.2.4",
 "indexmap",
- "strsim",
- "termcolor",
 "textwrap",
 ]

@@ -876,7 +854,6 @@ dependencies = [
 "opentelemetry",
 "postgres",
 "regex",
- "reqwest",
 "serde",
 "serde_json",
 "tar",
@@ -940,7 +917,6 @@ dependencies = [
 "reqwest",
 "safekeeper_api",
 "serde",
- "serde_json",
 "serde_with",
 "storage_broker",
 "tar",
@@ -1036,20 +1012,6 @@ version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52"

-[[package]]
-name = "crossbeam"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c"
-dependencies = [
- "cfg-if",
- "crossbeam-channel",
- "crossbeam-deque",
- "crossbeam-epoch",
- "crossbeam-queue",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.6"
@@ -1084,16 +1046,6 @@ dependencies = [
 "scopeguard",
 ]

-[[package]]
-name = "crossbeam-queue"
-version = "0.3.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add"
-dependencies = [
- "cfg-if",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "crossbeam-utils"
 version = "0.8.14"
@@ -2157,16 +2109,6 @@ version = "0.3.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"

-[[package]]
-name = "mime_guess"
-version = "2.0.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
-dependencies = [
- "mime",
- "unicase",
-]
-
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -2479,7 +2421,6 @@ dependencies = [
 "crc32c",
 "criterion",
 "crossbeam-utils",
- "either",
 "enum-map",
 "enumset",
 "fail",
@@ -2543,7 +2484,6 @@ dependencies = [
 "enum-map",
 "postgres_ffi",
 "serde",
- "serde_json",
 "serde_with",
 "utils",
 "workspace_hack",
@@ -2941,7 +2881,6 @@ dependencies = [
 "md5",
 "metrics",
 "once_cell",
- "opentelemetry",
 "parking_lot",
 "pin-project-lite",
 "pq_proto",
@@ -2950,8 +2889,6 @@ dependencies = [
 "rcgen",
 "regex",
 "reqwest",
- "reqwest-middleware",
- "reqwest-tracing",
 "routerify",
 "rstest",
 "rustls",
@@ -2961,7 +2898,6 @@ dependencies = [
 "serde_json",
 "sha2",
 "socket2",
- "sync_wrapper",
 "thiserror",
 "tls-listener",
 "tokio",
@@ -2969,9 +2905,7 @@ dependencies = [
 "tokio-postgres-rustls",
 "tokio-rustls",
 "tracing",
- "tracing-opentelemetry",
 "tracing-subscriber",
- "tracing-utils",
 "url",
 "utils",
 "uuid",
@@ -3101,7 +3035,6 @@ dependencies = [
 "hyper",
 "metrics",
 "once_cell",
- "pin-project-lite",
 "serde",
 "serde_json",
 "tempfile",
@@ -3113,6 +3046,15 @@ dependencies = [
 "workspace_hack",
 ]

+[[package]]
+name = "remove_dir_all"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "reqwest"
 version = "0.11.14"
@@ -3133,7 +3075,6 @@ dependencies = [
 "js-sys",
 "log",
 "mime",
- "mime_guess",
 "once_cell",
 "percent-encoding",
 "pin-project-lite",
@@ -3153,36 +3094,6 @@ dependencies = [
 "winreg",
 ]

-[[package]]
-name = "reqwest-middleware"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1c03e9011a8c59716ad13115550469e081e2e9892656b0ba6a47c907921894"
-dependencies = [
- "anyhow",
- "async-trait",
- "http",
- "reqwest",
- "serde",
- "task-local-extensions",
- "thiserror",
-]
-
-[[package]]
-name = "reqwest-tracing"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b739d87a6b2cf4743968ad2b4cef648fbe0204c19999509824425babb2097bce"
-dependencies = [
- "async-trait",
- "opentelemetry",
- "reqwest",
- "reqwest-middleware",
- "task-local-extensions",
- "tracing",
- "tracing-opentelemetry",
-]
-
 [[package]]
 name = "ring"
 version = "0.16.20"
@@ -3353,11 +3264,9 @@ dependencies = [
 "async-trait",
 "byteorder",
 "bytes",
- "chrono",
 "clap 4.1.4",
 "const_format",
 "crc32c",
- "crossbeam",
 "fs2",
 "git-version",
 "hex",
@@ -3371,11 +3280,9 @@ dependencies = [
 "postgres-protocol",
 "postgres_ffi",
 "pq_proto",
- "rand",
 "regex",
 "remote_storage",
 "safekeeper_api",
- "scopeguard",
 "serde",
 "serde_json",
 "serde_with",
@@ -3879,26 +3786,18 @@ dependencies = [
 "xattr",
 ]

-[[package]]
-name = "task-local-extensions"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4167afbec18ae012de40f8cf1b9bf48420abb390678c34821caa07d924941cc4"
-dependencies = [
- "tokio",
-]
-
 [[package]]
 name = "tempfile"
-version = "3.4.0"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af18f7ae1acd354b992402e9ec5864359d693cd8a79dcbef59f76891701c1e95"
+checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4"
 dependencies = [
 "cfg-if",
 "fastrand",
+ "libc",
 "redox_syscall",
- "rustix",
- "windows-sys 0.42.0",
+ "remove_dir_all",
+ "winapi",
 ]

 [[package]]
@@ -3906,8 +3805,6 @@ name = "tenant_size_model"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "serde",
- "serde_json",
 "workspace_hack",
 ]

@@ -4457,15 +4354,6 @@ dependencies = [
 "libc",
 ]

-[[package]]
-name = "unicase"
-version = "2.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
-dependencies = [
- "version_check",
-]
-
 [[package]]
 name = "unicode-bidi"
 version = "0.3.10"
@@ -4555,7 +4443,6 @@ dependencies = [
 "byteorder",
 "bytes",
 "criterion",
- "futures",
 "git-version",
 "heapless",
 "hex",
@@ -4585,7 +4472,6 @@ dependencies = [
 "tracing",
 "tracing-subscriber",
 "url",
- "uuid",
 "workspace_hack",
 ]

@@ -4637,38 +4523,6 @@ dependencies = [
 "winapi-util",
 ]

-[[package]]
-name = "walproposer"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "atty",
- "bindgen",
- "byteorder",
- "bytes",
- "cbindgen",
- "crc32c",
- "env_logger",
- "hex",
- "hyper",
- "libc",
- "log",
- "memoffset 0.8.0",
- "once_cell",
- "postgres",
- "postgres_ffi",
- "rand",
- "regex",
- "safekeeper",
- "scopeguard",
- "serde",
- "thiserror",
- "tracing",
- "tracing-subscriber",
- "utils",
- "workspace_hack",
-]
-
 [[package]]
 name = "want"
 version = "0.3.0"
@@ -4924,6 +4778,7 @@ dependencies = [
 "either",
 "fail",
 "futures",
+ "futures-channel",
 "futures-executor",
 "futures-util",
 "hashbrown 0.12.3",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -38,7 +38,6 @@ comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-utils = "0.8.5"
-either = "1.8"
 enum-map = "2.4.2"
 enumset = "1.0.12"
 fail = "0.5.0"
@@ -69,6 +68,7 @@ once_cell = "1.13"
 opentelemetry = "0.18.0"
 opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.10.0"
+tracing-opentelemetry = "0.18.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
@@ -76,8 +76,6 @@ prost = "0.11"
 rand = "0.8"
 regex = "1.4"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
-reqwest-middleware = "0.2.0"
 routerify = "3"
 rpds = "0.12.0"
 rustls = "0.20"
@@ -94,7 +92,6 @@ socket2 = "0.4.4"
 strum = "0.24"
 strum_macros = "0.24"
 svg_fmt = "0.4.1"
-sync_wrapper = "0.1.2"
 tar = "0.4"
 thiserror = "1.0"
 tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
@@ -107,7 +104,6 @@ toml = "0.5"
 toml_edit = { version = "0.17", features = ["easy"] }
 tonic = {version = "0.8", features = ["tls", "tls-roots"]}
 tracing = "0.1"
-tracing-opentelemetry = "0.18.0"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 url = "2.2"
 uuid = { version = "1.2", features = ["v4", "serde"] }
@@ -138,12 +134,10 @@ postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
 remote_storage = { version = "0.1", path = "./libs/remote_storage/" }
 safekeeper_api = { version = "0.1", path = "./libs/safekeeper_api" }
-safekeeper = { path = "./safekeeper/" }
 storage_broker = { version = "0.1", path = "./storage_broker/" } # Note: main broker code is inside the binary crate, so linking with the library shouldn't be heavy.
 tenant_size_model = { version = "0.1", path = "./libs/tenant_size_model/" }
 tracing-utils = { version = "0.1", path = "./libs/tracing-utils/" }
 utils = { version = "0.1", path = "./libs/utils/" }
-walproposer = { version = "0.1", path = "./libs/walproposer/" }

 ## Common library dependency
 workspace_hack = { version = "0.1", path = "./workspace_hack/" }
@@ -152,7 +146,7 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }
 criterion = "0.4"
 rcgen = "0.10"
 rstest = "0.16"
-tempfile = "3.4"
+tempfile = "3.2"
 tonic-build = "0.8"

 # This is only needed for proxy's tests.
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -1,4 +1,3 @@
-ARG PG_VERSION
 ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 ARG IMAGE=rust
 ARG TAG=pinned
@@ -12,7 +11,7 @@ FROM debian:bullseye-slim AS build-deps
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
-    libicu-dev libxslt1-dev
+    libicu-dev

 #########################################################################################
 #
@@ -24,24 +23,18 @@ FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION} postgres
 RUN cd postgres && \
-    ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \
-    --with-libxml --with-libxslt && \
+    ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
    # Install headers
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
    # Enable some of contrib extensions
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control

 #########################################################################################
 #
@@ -57,18 +50,17 @@ RUN apt update && \
    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
    protobuf-c-compiler xsltproc

-# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
-RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
-    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
-    cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
+RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz && \
+    tar zxvf SFCGAL-v1.3.10.tar.gz && \
+    cd SFCGAL-v1.3.10 && cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /

-ENV PATH "/usr/local/pgsql/bin:$PATH"
-
-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
-    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
+    tar xvzf postgis-3.3.1.tar.gz && \
+    cd postgis-3.3.1 && \
    ./autogen.sh && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    cd extensions/postgis && \
@@ -82,15 +74,6 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control

-RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
-    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
-    mkdir build && \
-    cd build && \
-    cmake .. && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
-
 #########################################################################################
 #
 # Layer "plv8-build"
@@ -100,17 +83,30 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
 FROM build-deps AS plv8-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y ninja-build python3-dev libncurses5 binutils clang
+    apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils

-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
-    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
+# https://github.com/plv8/plv8/issues/475:
+#   v8 uses gold for linking and sets `--thread-count=4` which breaks
+#   gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
+# Install newer gold version manually as debian-testing binutils version updates
+# libc version, which in turn breaks other extension built against non-testing libc.
+RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
+    tar xvzf binutils-2.38.tar.gz && \
+    cd binutils-2.38 && \
+    cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
+    cd ../bfd && ./configure && make bfdver.h && \
+    cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
+    cp /usr/local/bin/ld.gold /usr/bin/gold
+
+# Sed is used to patch for https://github.com/plv8/plv8/issues/503
+RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
+    tar xvzf v3.1.4.tar.gz && \
+    cd plv8-3.1.4 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
    rm -rf /plv8-* && \
-    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control

 #########################################################################################
 #
@@ -128,17 +124,20 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh

-RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
-    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
-    mkdir build && cd build && \
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
+    tar xvzf h3.tgz  && \
+    cd h3-4.0.1 && \
+    mkdir build && \
+    cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/h3 make install && \
    cp -R /h3/usr / && \
    rm -rf build

-RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
-    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
+    tar xvzf h3-pg.tgz && \
+    cd h3-pg-4.0.1 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -154,8 +153,9 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3
 FROM build-deps AS unit-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
-    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz && \
+    tar xvzf 7.7.tar.gz && \
+    cd postgresql-unit-7.7 && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
@@ -165,156 +165,6 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
    find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control

-#########################################################################################
-#
-# Layer "vector-pg-build"
-# compile pgvector extension
-#
-#########################################################################################
-FROM build-deps AS vector-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
-    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
-
-#########################################################################################
-#
-# Layer "pgjwt-pg-build"
-# compile pgjwt extension
-#
-#########################################################################################
-FROM build-deps AS pgjwt-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
-RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
-    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
-
-#########################################################################################
-#
-# Layer "hypopg-pg-build"
-# compile hypopg extension
-#
-#########################################################################################
-FROM build-deps AS hypopg-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
-    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
-
-#########################################################################################
-#
-# Layer "pg-hashids-pg-build"
-# compile pg_hashids extension
-#
-#########################################################################################
-FROM build-deps AS pg-hashids-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
-    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
-
-#########################################################################################
-#
-# Layer "rum-pg-build"
-# compile rum extension
-#
-#########################################################################################
-FROM build-deps AS rum-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
-    mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/rum.control
-
-#########################################################################################
-#
-# Layer "pgtap-pg-build"
-# compile pgTAP extension
-#
-#########################################################################################
-FROM build-deps AS pgtap-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
-    mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control
-
-#########################################################################################
-# 
-# Layer "rust extensions"
-# This layer is used to build `pgx` deps
-#
-#########################################################################################
-FROM build-deps AS rust-extensions-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN apt-get update && \
-    apt-get install -y curl libclang-dev cmake && \
-    useradd -ms /bin/bash nonroot -b /home
-
-ENV HOME=/home/nonroot
-ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
-USER nonroot
-WORKDIR /home/nonroot
-ARG PG_VERSION
-
-RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
-    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
-    rm rustup-init && \
-    cargo install --git https://github.com/vadim2404/pgx --branch neon_abi_v0.6.1 --locked cargo-pgx && \
-    /bin/bash -c 'cargo pgx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
-
-USER root
-
-#########################################################################################
-# 
-# Layer "pg-jsonschema-pg-build"
-# Compile "pg_jsonschema" extension
-#
-#########################################################################################
-
-FROM rust-extensions-build AS pg-jsonschema-pg-build
-
-RUN git clone --depth=1 --single-branch --branch neon_abi_v0.1.4 https://github.com/vadim2404/pg_jsonschema/ && \
-    cd pg_jsonschema && \
-    cargo pgx install --release && \
-    # it's needed to enable extension because it uses untrusted C language
-    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_jsonschema.control && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
-
-#########################################################################################
-# 
-# Layer "pg-graphql-pg-build"
-# Compile "pg_graphql" extension
-#
-#########################################################################################
-
-FROM rust-extensions-build AS pg-graphql-pg-build
-
-RUN git clone --depth=1 --single-branch --branch neon_abi_v1.1.0 https://github.com/vadim2404/pg_graphql && \
-    cd pg_graphql && \  
-    cargo pgx install --release && \
-    # it's needed to enable extension because it uses untrusted C language
-    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
-
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -328,14 +178,6 @@ COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=h3-pg-build /h3/usr /
 COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -386,9 +228,7 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
    chown -R postgres:postgres /var/db/postgres && \
    chmod 0750 /var/db/postgres/compute && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
-    # create folder for file cache
-    mkdir -p -m 777 /neon/cache
+    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl
@@ -398,7 +238,6 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 # libicu67, locales for collations (including ICU)
 # libossp-uuid16 for extension ossp-uuid
 # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
-# libxml2, libxslt1.1 for xml2
 RUN apt update &&  \
    apt install --no-install-recommends -y \
        locales \
@@ -410,8 +249,6 @@ RUN apt update &&  \
        libproj19 \
        libprotobuf-c1 \
        libsfcgal1 \
-        libxml2 \
-        libxslt1.1 \
        gdb && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -1,25 +0,0 @@
-# Note: this file *mostly* just builds on Dockerfile.compute-node
-
-ARG SRC_IMAGE
-ARG VM_INFORMANT_VERSION=v0.1.6
-
-# Pull VM informant and set up inittab
-FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
-
-RUN set -e \
-	&& rm -f /etc/inittab \
-	&& touch /etc/inittab
-
-RUN set -e \
-	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant --auto-restart'" >> /etc/inittab
-
-# Combine, starting from non-VM compute node image.
-FROM $SRC_IMAGE as base
-
-# Temporarily set user back to root so we can run adduser
-USER root
-RUN adduser vm-informant --disabled-password --no-create-home
-USER postgres
-
-COPY --from=informant /etc/inittab /etc/inittab
-COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
--- a/20
+++ b/20
@@ -39,8 +39,6 @@ endif
 # been no changes to the files. Changing the mtime triggers an
 # unnecessary rebuild of 'postgres_ffi'.
 PG_CONFIGURE_OPTS += INSTALL='$(ROOT_PROJECT_DIR)/scripts/ninstall.sh -C'
-PG_CONFIGURE_OPTS += CC=clang
-PG_CONFIGURE_OPTS += CCX=clang++

 # Choose whether we should be silent or verbose
 CARGO_BUILD_FLAGS += --$(if $(filter s,$(MAKEFLAGS)),quiet,verbose)
@@ -136,23 +134,11 @@ neon-pg-ext-%: postgres-%
 		-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
 		-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile install

-.PHONY:
-neon-pg-ext-walproposer:
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/v15/bin/pg_config CFLAGS='$(PG_CFLAGS) $(COPT)' \
-		-C $(POSTGRES_INSTALL_DIR)/build/neon-v15 \
-		-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile install
-
 .PHONY: neon-pg-ext-clean-%
 neon-pg-ext-clean-%:
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon_walredo-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon_test_utils-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean

 .PHONY: neon-pg-ext
 neon-pg-ext: \
--- a/README.md
+++ b/README.md
@@ -34,11 +34,6 @@ dnf install flex bison readline-devel zlib-devel openssl-devel \
  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
  protobuf-devel
 ```
-* On Arch based systems, these packages are needed:
-```bash
-pacman -S base-devel readline zlib libseccomp openssl clang \
-postgresql-libs cmake postgresql protobuf
-```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
 ```
@@ -88,10 +83,9 @@ cd neon

 # The preferred and default is to make a debug build. This will create a
 # demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`nproc` -s"
-# Remove -s for the verbose build log
+# use "BUILD_TYPE=release make -j`nproc`"

-make -j`nproc` -s
+make -j`nproc`
 ```

 #### Building on OSX
@@ -105,10 +99,9 @@ cd neon

 # The preferred and default is to make a debug build. This will create a
 # demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s"
-# Remove -s for the verbose build log
+# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"

-make -j`sysctl -n hw.logicalcpu` -s
+make -j`sysctl -n hw.logicalcpu`
 ```

 #### Dependency installation notes
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -17,7 +17,6 @@ regex.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tar.workspace = true
-reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tracing.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -44,6 +44,7 @@ use tracing::{error, info};

 use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
 use compute_tools::http::api::launch_http_server;
+use compute_tools::informant::spawn_vm_informant_if_present;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
@@ -65,9 +66,6 @@ fn main() -> Result<()> {
    let spec = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

-    let compute_id = matches.get_one::<String>("compute-id");
-    let control_plane_uri = matches.get_one::<String>("control-plane-uri");
-
    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

@@ -80,27 +78,8 @@ fn main() -> Result<()> {
                let path = Path::new(sp);
                let file = File::open(path)?;
                serde_json::from_reader(file)?
-            } else if let Some(id) = compute_id {
-                if let Some(cp_base) = control_plane_uri {
-                    let cp_uri = format!("{cp_base}/management/api/v1/{id}/spec");
-                    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
-                        Ok(v) => v,
-                        Err(_) => "".to_string(),
-                    };
-
-                    reqwest::blocking::Client::new()
-                        .get(cp_uri)
-                        .header("Authorization", jwt)
-                        .send()?
-                        .json()?
-                } else {
-                    panic!(
-                        "must specify --control-plane-uri \"{:#?}\" and --compute-id \"{:#?}\"",
-                        control_plane_uri, compute_id
-                    );
-                }
            } else {
-                panic!("compute spec should be provided via --spec or --spec-path argument");
+                panic!("cluster spec should be provided via --spec or --spec-path argument");
            }
        }
    };
@@ -162,6 +141,8 @@ fn main() -> Result<()> {
    // requests, while configuration is still in progress.
    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
+    // Also spawn the thread responsible for handling the VM informant -- if it's present
+    let _vm_informant_handle = spawn_vm_informant_if_present().expect("cannot launch VM informant");

    // Start Postgres
    let mut delay_exit = false;
@@ -249,18 +230,6 @@ fn cli() -> clap::Command {
                .long("spec-path")
                .value_name("SPEC_PATH"),
        )
-        .arg(
-            Arg::new("compute-id")
-                .short('i')
-                .long("compute-id")
-                .value_name("COMPUTE_ID"),
-        )
-        .arg(
-            Arg::new("control-plane-uri")
-                .short('p')
-                .long("control-plane-uri")
-                .value_name("CONTROL_PLANE"),
-        )
 }

 #[test]
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -3,7 +3,6 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::compute::ComputeNode;
 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
@@ -11,6 +10,8 @@ use serde_json;
 use tracing::{error, info};
 use tracing_utils::http::OtelName;

+use crate::compute::ComputeNode;
+
 // Service function to handle all available routes.
 async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
    //
--- a/compute_tools/src/informant.rs
+++ b/compute_tools/src/informant.rs
@@ -0,0 +1,50 @@
+use std::path::Path;
+use std::process;
+use std::thread;
+use std::time::Duration;
+use tracing::{info, warn};
+
+use anyhow::{Context, Result};
+
+const VM_INFORMANT_PATH: &str = "/bin/vm-informant";
+const RESTART_INFORMANT_AFTER_MILLIS: u64 = 5000;
+
+/// Launch a thread to start the VM informant if it's present (and restart, on failure)
+pub fn spawn_vm_informant_if_present() -> Result<Option<thread::JoinHandle<()>>> {
+    let exists = Path::new(VM_INFORMANT_PATH)
+        .try_exists()
+        .context("could not check if path exists")?;
+
+    if !exists {
+        return Ok(None);
+    }
+
+    Ok(Some(
+        thread::Builder::new()
+            .name("run-vm-informant".into())
+            .spawn(move || run_informant())?,
+    ))
+}
+
+fn run_informant() -> ! {
+    let restart_wait = Duration::from_millis(RESTART_INFORMANT_AFTER_MILLIS);
+
+    info!("starting VM informant");
+
+    loop {
+        let mut cmd = process::Command::new(VM_INFORMANT_PATH);
+        // Block on subprocess:
+        let result = cmd.status();
+
+        match result {
+            Err(e) => warn!("failed to run VM informant at {VM_INFORMANT_PATH:?}: {e}"),
+            Ok(status) if !status.success() => {
+                warn!("{VM_INFORMANT_PATH} exited with code {status:?}, retrying")
+            }
+            Ok(_) => info!("{VM_INFORMANT_PATH} ended gracefully (unexpectedly). Retrying"),
+        }
+
+        // Wait before retrying
+        thread::sleep(restart_wait);
+    }
+}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -8,6 +8,7 @@ pub mod http;
 #[macro_use]
 pub mod logger;
 pub mod compute;
+pub mod informant;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -15,7 +15,6 @@ postgres.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
 serde.workspace = true
-serde_json.workspace = true
 serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -419,11 +419,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'trace_read_requests' as bool")?,
-                eviction_policy: settings
-                    .get("eviction_policy")
-                    .map(|x| serde_json::from_str(x))
-                    .transpose()
-                    .context("Failed to parse 'eviction_policy' json")?,
            })
            .send()?
            .error_from_body()?;
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -16,7 +16,7 @@ listen_http_addr = '127.0.0.1:9898'
 checkpoint_distance = '268435456' # in bytes
 checkpoint_timeout = '10m'

-gc_period = '1 hour'
+gc_period = '100 s'
 gc_horizon = '67108864'

 max_file_descriptors = '100'
@@ -101,7 +101,7 @@ away.

 #### gc_period

-Interval at which garbage collection is triggered. Default is 1 hour.
+Interval at which garbage collection is triggered. Default is 100 s.

 #### image_creation_threshold

@@ -109,7 +109,7 @@ L0 delta layer threshold for L1 image layer creation. Default is 3.

 #### pitr_interval

-WAL retention duration for PITR branching. Default is 7 days.
+WAL retention duration for PITR branching. Default is 30 days.

 #### walreceiver_connect_timeout

--- a/docs/synthetic-size.md
+++ b/docs/synthetic-size.md
@@ -1,335 +0,0 @@
-# Synthetic size
-
-Neon storage has copy-on-write branching, which makes it difficult to
-answer the question "how large is my database"? To give one reasonable
-answer, we calculate _synthetic size_ for a project.
-
-The calculation is called "synthetic", because it is based purely on
-the user-visible logical size, which is the size that you would see on
-a standalone PostgreSQL installation, and the amount of WAL, which is
-also the same as what you'd see on a standalone PostgreSQL, for the
-same set of updates.
-
-The synthetic size does *not* depend on the actual physical size
-consumed in the storage, or implementation details of the Neon storage
-like garbage collection, compaction and compression.  There is a
-strong *correlation* between the physical size and the synthetic size,
-but the synthetic size is designed to be independent of the
-implementation details, so that any improvements we make in the
-storage system simply reduce our COGS. And vice versa: any bugs or bad
-implementation where we keep more data than we would need to, do not
-change the synthetic size or incur any costs to the user.
-
-The synthetic size is calculated for the whole project. It is not
-straighforward to attribute size to individual branches. See "What is
-the size of an individual branch?" for discussion on those
-difficulties.
-
-The synthetic size is designed to:
-
- Take into account the copy-on-write nature of the storage. For
-  example, if you create a branch, it doesn't immediately add anything
-  to the synthetic size. It starts to affect the synthetic size only
-  as it diverges from the parent branch.
-
- Be independent of any implementation details of the storage, like
-  garbage collection, remote storage, or compression.
-
-## Terms & assumptions
-
- logical size is the size of a branch *at a given point in
-  time*. It's the total size of all tables in all databases, as you
-  see with "\l+" in psql for example, plus the Postgres SLRUs and some
-  small amount of metadata. NOTE that currently, Neon does not include
-  the SLRUs and metadata in the logical size. See comment to `get_current_logical_size_non_incremental()`.
-
- a "point in time" is defined as an LSN value. You can convert a
-  timestamp to an LSN, but the storage internally works with LSNs.
-
- PITR horizon can be set per-branch.
-
- PITR horizon can be set as a time interval, e.g. 5 days or hours, or
-  as amount of WAL, in bytes.  If it's given as a time interval, it's
-  converted to an LSN for the calculation.
-
- PITR horizon can be set to 0, if you don't want to retain any history.
-
-## Calculation
-
-Inputs to the calculation are:
- logical size of the database at different points in time,
- amount of WAL generated, and
- the PITR horizon settings
-
-The synthetic size is based on an idealistic model of the storage
-system, where we pretend that the storage consists of two things:
- snapshots, containing a full snapshot of the database, at a given
-  point in time, and
- WAL.
-
-In the simple case that the project contains just one branch (main),
-and a fixed PITR horizon, the synthetic size is the sum of:
-
- the logical size of the branch *at the beginning of the PITR
-  horizon*, i.e. at the oldest point that you can still recover to, and
- the size of the WAL covering the PITR horizon.
-
-The snapshot allows you to recover to the beginning of the PITR
-horizon, and the WAL allows you to recover from that point to any
-point within the horizon.
-
-```
-                             WAL
-   -----------------------#########>
-                          ^
-                       snapshot
-
-Legend:
-  ##### PITR horizon. This is the region that you can still access
-        with Point-in-time query and you can still create branches
-        from.
-  ----- history that has fallen out of the PITR horizon, and can no
-        longer be accessed
-```
-
-NOTE: This is not how the storage system actually works! The actual
-implementation is also based on snapshots and WAL, but the snapshots
-are taken for individual database pages and ranges of pages rather
-than the whole database, and it is much more complicated. This model
-is a reasonable approximation, however, to make the synthetic size a
-useful proxy for the actual storage consumption.
-
-
-## Example: Data is INSERTed
-
-For example, let's assume that your database contained 10 GB of data
-at the beginning of the PITR horizon, and you have since then inserted
-5 GB of additional data into it. The additional insertions of 5 GB of
-data consume roughly 5 GB of WAL. In that case, the synthetic size is:
-
-> 10 GB (snapshot) +  5 GB (WAL) = 15 GB
-
-If you now set the PITR horizon on the project to 0, so that no
-historical data is retained, then the beginning PITR horizon would be
-at the end of the branch, so the size of the snapshot would be
-calculated at the end of the branch, after the insertions. Then the
-synthetic size is:
-
-> 15 GB (snapshot) + 0 GB (WAL) = 15 GB.
-
-In this case, the synthetic size is the same, regardless of the PITR horizon,
-because all the history consists of inserts. The newly inserted data takes
-up the same amount of space, whether it's stored as part of the logical
-snapshot, or as WAL. (*)
-
-(*) This is a rough approximation. In reality, the WAL contains
-headers and other overhead, and on the other hand, the logical
-snapshot includes empty space on pages, so the size of insertions in
-WAL can be smaller or greater than the size of the final table after
-the insertions. But in most cases, it's in the same ballpark.
-
-## Example: Data is DELETEd
-
-Let's look at another example:
-
-Let's start again with a database that contains 10 GB of data. Then,
-you DELETE 5 GB of the data, and run VACUUM to free up the space, so
-that the logical size of the database is now only 5 GB.
-
-Let's assume that the WAL for the deletions and the vacuum take up
-100 MB of space. In that case, the synthetic size of the project is:
-
-> 10 GB (snapshot) + 100 MB (WAL) = 10.1 GB
-
-This is much larger than the logical size of the database after the
-deletions (5 GB). That's because the system still needs to retain the
-deleted data, because it's still accessible to queries and branching
-in the PITR window.
-
-If you now set the PITR horizon to 0 or just wait for time to pass so
-that the data falls out of the PITR horizon, making the deleted data
-inaccessible, the synthetic size shrinks:
-
-> 5 GB (snapshot) + 0 GB (WAL) = 5 GB
-
-
-# Branching
-
-Things get more complicated with branching. Branches in Neon are
-copy-on-write, which is also reflected in the synthetic size.
-
-When you create a branch, it doesn't immediately change the synthetic
-size at all. The branch point is within the PITR horizon, and all the
-data needed to recover to that point in time needs to be retained
-anyway.
-
-However, if you make modifications on the branch, the system needs to
-keep the WAL of those modifications. The WAL is included in the
-synthetic size.
-
-## Example: branch and INSERT
-
-Let's assume that you again start with a 10 GB database.
-On the main branch, you insert 2 GB of data. Then you create
-a branch at that point, and insert another 3 GB of data on the
-main branch, and 1 GB of data on the child branch
-
-```
-  child                 +#####>
-                        |
-                        |    WAL
-  main    ---------###############>
-                   ^
-                snapshot
-```
-
-In this case, the synthetic size consists of:
- the snapshot at the beginning of the PITR horizon (10 GB)
- the WAL on the main branch (2 GB + 3 GB = 5 GB)
- the WAL on the child branch (1 GB)
-
-Total: 16 GB
-
-# Diverging branches
-
-If there is only a small amount of changes in the database on the
-different branches, as in the previous example, the synthetic size
-consists of a snapshot before the branch point, containing all the
-shared data, and the WAL on both branches. However, if the branches
-diverge a lot, it is more efficient to store a separate snapshot of
-branches.
-
-## Example: diverging branches
-
-You start with a 10 GB database. You insert 5 GB of data on the main
-branch. Then you create a branch, and immediately delete all the data
-on the child branch and insert 5 GB of new data to it. Then you do the
-same on the main branch. Let's assume
-that the PITR horizon requires keeping the last 1 GB of WAL on the
-both branches.
-
-```
-                              snapshot
-                                  v     WAL
-  child                 +---------##############>
-                        |
-                        |
-  main     -------------+---------##############>
-                                  ^     WAL
-                              snapshot
-```
-
-In this case, the synthetic size consists of:
- snapshot at the beginning of the PITR horizon on the main branch (4 GB)
- WAL on the main branch (1 GB)
- snapshot at the beginning of the PITR horizon on the child branch (4 GB)
- last 1 GB of WAL on the child branch (1 GB)
-
-Total: 10 GB
-
-The alternative way to store this would be to take only one snapshot
-at the beginning of branch point, and keep all the WAL on both
-branches.  However, the size with that method would be larger, as it
-would require one 10 GB snapshot, and 5 GB + 5 GB of WAL. It depends
-on the amount of changes (WAL) on both branches, and the logical size
-at the branch point, which method would result in a smaller synthetic
-size. On each branch point, the system performs the calculation with
-both methods, and uses the method that is cheaper, i.e. the one that
-results in a smaller synthetic size.
-
-One way to think about this is that when you create a branch, it
-starts out as a thin branch that only stores the WAL since the branch
-point.  As you modify it, and the amount of WAL grows, at some point
-it becomes cheaper to store a completely new snapshot of the branch
-and truncate the WAL.
-
-
-# What is the size of an individual branch?
-
-Synthetic size is calculated for the whole project, and includes all
-branches. There is no such thing as the size of a branch, because it
-is not straighforward to attribute the parts of size to individual
-branches.
-
-## Example: attributing size to branches
-
-(copied from https://github.com/neondatabase/neon/pull/2884#discussion_r1029365278)
-
-Imagine that you create two branches, A and B, at the same point from
-main branch, and do a couple of small updates on both branches. Then
-six months pass, and during those six months the data on the main
-branch churns over completely multiple times. The retention period is,
-say 1 month.
-
-```
-                      +------> A
-                     /
--------------------*-------------------------------> main
-                     \
-                      +--------> B
-```
-
-In that situation, the synthetic tenant size would be calculated based
-on a "logical snapshot" at the branch point, that is, the logical size
-of the database at that point. Plus the WAL on branches A and B. Let's
-say that the snapshot size is 10 GB, and the WAL is 1 MB on both
-branches A and B. So the total synthetic storage size is 10002
-MB. (Let's ignore the main branch for now, that would be just added to
-the sum)
-
-How would you break that down per branch? I can think of three
-different ways to do it, and all of them have their own problems:
-
-### Subtraction method
-
-For each branch, calculate how much smaller the total synthetic size
-would be, if that branch didn't exist. In other words, how much would
-you save if you dropped the branch. With this method, the size of
-branches A and B is 1 MB.
-
-With this method, the 10 GB shared logical snapshot is not included
-for A nor B. So the size of all branches is not equal to the total
-synthetic size of the tenant. If you drop branch A, you save 1 MB as
-you'd expect, but also the size of B suddenly jumps from 1 MB to 10001
-MB, which might feel surprising.
-
-### Division method
-
-Divide the common parts evenly across all branches that need
-them. With this method, the size of branches A and B would be 5001 MB.
-
-With this method, the sum of all branches adds up to the total
-synthetic size. But it's surprising in other ways: if you drop branch
-A, you might think that you save 5001 MB, but in reality you only save
-1 MB, and the size of branch B suddenly grows from 5001 to 10001 MB.
-
-### Addition method
-
-For each branch, include all the snapshots and WAL that it depends on,
-even if some of them are shared by other branches. With this method,
-the size of branches A and B would be 10001 MB.
-
-The surprise with this method is that the sum of all the branches is
-larger than the total synthetic size. And if you drop branch A, the
-total synthetic size doesn't fall by 10001 MB as you might think.
-
-# Alternatives
-
-A sort of cop-out method would be to show the whole tree of branches
-graphically, and for each section of WAL or logical snapshot, display
-the size of that section. You can then see which branches depend on
-which sections, which sections are shared etc. That would be good to
-have in the UI anyway.
-
-Or perhaps calculate per-branch numbers using the subtraction method,
-and in addition to that, one more number for "shared size" that
-includes all the data that is needed by more than one branch.
-
-## Which is the right method?
-
-The bottom line is that it's not straightforward to attribute the
-synthetic size to individual branches. There are things we can do, and
-all of those methods are pretty straightforward to implement, but they
-all have their own problems. What makes sense depends a lot on what
-you want to do with the number, what question you are trying to
-answer.
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -14,6 +14,5 @@ byteorder.workspace = true
 utils.workspace = true
 postgres_ffi.workspace = true
 enum-map.workspace = true
-serde_json.workspace = true

 workspace_hack.workspace = true
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -155,11 +155,6 @@ pub struct TenantConfigRequest {
    pub lagging_wal_timeout: Option<String>,
    pub max_lsn_wal_lag: Option<NonZeroU64>,
    pub trace_read_requests: Option<bool>,
-    // We defer the parsing of the eviction_policy field to the request handler.
-    // Otherwise we'd have to move the types for eviction policy into this package.
-    // We might do that once the eviction feature has stabilizied.
-    // For now, this field is not even documented in the openapi_spec.yml.
-    pub eviction_policy: Option<serde_json::Value>,
 }

 impl TenantConfigRequest {
@@ -179,7 +174,6 @@ impl TenantConfigRequest {
            lagging_wal_timeout: None,
            max_lsn_wal_lag: None,
            trace_read_requests: None,
-            eviction_policy: None,
        }
    }
 }
@@ -269,11 +263,11 @@ pub struct LayerResidenceEvent {
    ///
    #[serde(rename = "timestamp_millis_since_epoch")]
    #[serde_as(as = "serde_with::TimestampMilliSeconds")]
-    pub timestamp: SystemTime,
+    timestamp: SystemTime,
    /// The new residence status of the layer.
-    pub status: LayerResidenceStatus,
+    status: LayerResidenceStatus,
    /// The reason why we had to record this event.
-    pub reason: LayerResidenceEventReason,
+    reason: LayerResidenceEventReason,
 }

 /// The reason for recording a given [`ResidenceEvent`].
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -98,15 +98,6 @@ impl RelTag {

        name
    }
-
-    pub fn with_forknum(&self, forknum: u8) -> Self {
-        RelTag {
-            forknum,
-            spcnode: self.spcnode,
-            dbnode: self.dbnode,
-            relnode: self.relnode,
-        }
-    }
 }

 ///
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -75,36 +75,27 @@ impl StartupMessageParams {
    /// taking into account all escape sequences but leaving them as-is.
    /// [`None`] means that there's no `options` in [`Self`].
    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {
-        self.get("options").map(Self::parse_options_raw)
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// applying all escape sequences (using owned strings as needed).
-    /// [`None`] means that there's no `options` in [`Self`].
-    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {
-        self.get("options").map(Self::parse_options_escaped)
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// taking into account all escape sequences but leaving them as-is.
-    pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {
        // See `postgres: pg_split_opts`.
        let mut last_was_escape = false;
-        input
+        let iter = self
+            .get("options")?
            .split(move |c: char| {
                // We split by non-escaped whitespace symbols.
                let should_split = c.is_ascii_whitespace() && !last_was_escape;
                last_was_escape = c == '\\' && !last_was_escape;
                should_split
            })
-            .filter(|s| !s.is_empty())
+            .filter(|s| !s.is_empty());
+
+        Some(iter)
    }

    /// Split command-line options according to PostgreSQL's logic,
    /// applying all escape sequences (using owned strings as needed).
-    pub fn parse_options_escaped(input: &str) -> impl Iterator<Item = Cow<'_, str>> {
+    /// [`None`] means that there's no `options` in [`Self`].
+    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {
        // See `postgres: pg_split_opts`.
-        Self::parse_options_raw(input).map(|s| {
+        let iter = self.options_raw()?.map(|s| {
            let mut preserve_next_escape = false;
            let escape = |c| {
                // We should remove '\\' unless it's preceded by '\\'.
@@ -117,12 +108,9 @@ impl StartupMessageParams {
                true => Cow::Owned(s.replace(escape, "")),
                false => Cow::Borrowed(s),
            }
-        })
-    }
+        });

-    /// Iterate through key-value pairs in an arbitrary order.
-    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
-        self.params.iter().map(|(k, v)| (k.as_str(), v.as_str()))
+        Some(iter)
    }

    // This function is mostly useful in tests.
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -21,7 +21,7 @@ toml_edit.workspace = true
 tracing.workspace = true
 metrics.workspace = true
 utils.workspace = true
-pin-project-lite.workspace = true
+
 workspace_hack.workspace = true

 [dev-dependencies]
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -20,10 +20,7 @@ use aws_sdk_s3::{
 };
 use aws_smithy_http::body::SdkBody;
 use hyper::Body;
-use tokio::{
-    io::{self, AsyncRead},
-    sync::Semaphore,
-};
+use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

@@ -105,7 +102,7 @@ pub struct S3Bucket {
    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
    // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
    // The helps to ensure we don't exceed the thresholds.
-    concurrency_limiter: Arc<Semaphore>,
+    concurrency_limiter: Semaphore,
 }

 #[derive(Default)]
@@ -165,7 +162,7 @@ impl S3Bucket {
            client,
            bucket_name: aws_config.bucket_name.clone(),
            prefix_in_bucket,
-            concurrency_limiter: Arc::new(Semaphore::new(aws_config.concurrency_limit.get())),
+            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
        })
    }

@@ -197,10 +194,9 @@ impl S3Bucket {
    }

    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
-        let permit = self
+        let _guard = self
            .concurrency_limiter
-            .clone()
-            .acquire_owned()
+            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 download")
            .map_err(DownloadError::Other)?;
@@ -221,10 +217,9 @@ impl S3Bucket {
                let metadata = object_output.metadata().cloned().map(StorageMetadata);
                Ok(Download {
                    metadata,
-                    download_stream: Box::pin(io::BufReader::new(RatelimitedAsyncRead::new(
-                        permit,
+                    download_stream: Box::pin(io::BufReader::new(
                        object_output.body.into_async_read(),
-                    ))),
+                    )),
                })
            }
            Err(SdkError::ServiceError {
@@ -245,32 +240,6 @@ impl S3Bucket {
    }
 }

-pin_project_lite::pin_project! {
-    /// An `AsyncRead` adapter which carries a permit for the lifetime of the value.
-    struct RatelimitedAsyncRead<S> {
-        permit: tokio::sync::OwnedSemaphorePermit,
-        #[pin]
-        inner: S,
-    }
-}
-
-impl<S: AsyncRead> RatelimitedAsyncRead<S> {
-    fn new(permit: tokio::sync::OwnedSemaphorePermit, inner: S) -> Self {
-        RatelimitedAsyncRead { permit, inner }
-    }
-}
-
-impl<S: AsyncRead> AsyncRead for RatelimitedAsyncRead<S> {
-    fn poll_read(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &mut io::ReadBuf<'_>,
-    ) -> std::task::Poll<std::io::Result<()>> {
-        let this = self.project();
-        this.inner.poll_read(cx, buf)
-    }
-}
-
 #[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
--- a/libs/tenant_size_model/Cargo.toml
+++ b/libs/tenant_size_model/Cargo.toml
@@ -7,7 +7,5 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-serde.workspace = true
-serde_json.workspace = true

 workspace_hack.workspace = true
--- a/libs/tenant_size_model/src/calculation.rs
+++ b/libs/tenant_size_model/src/calculation.rs
@@ -1,219 +0,0 @@
-use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};
-
-//
-//                 *-g--*---D--->
-//                /
-//               /
-//              /                 *---b----*-B--->
-//             /                 /
-//            /                 /
-//      -----*--e---*-----f----* C
-//           E                  \
-//                               \
-//                                *--a---*---A-->
-//
-// If A and B need to be retained, is it cheaper to store
-// snapshot at C+a+b, or snapshots at A and B ?
-//
-// If D also needs to be retained, which is cheaper:
-//
-// 1. E+g+e+f+a+b
-// 2. D+C+a+b
-// 3. D+A+B
-
-/// [`Segment`] which has had it's size calculated.
-#[derive(Clone, Debug)]
-struct SegmentSize {
-    method: SegmentMethod,
-
-    // calculated size of this subtree, using this method
-    accum_size: u64,
-
-    seg_id: usize,
-    children: Vec<SegmentSize>,
-}
-
-struct SizeAlternatives {
-    // cheapest alternative if parent is available.
-    incremental: SegmentSize,
-
-    // cheapest alternative if parent node is not available
-    non_incremental: Option<SegmentSize>,
-}
-
-impl StorageModel {
-    pub fn calculate(&self) -> SizeResult {
-        // Build adjacency list. 'child_list' is indexed by segment id. Each entry
-        // contains a list of all child segments of the segment.
-        let mut roots: Vec<usize> = Vec::new();
-        let mut child_list: Vec<Vec<usize>> = Vec::new();
-        child_list.resize(self.segments.len(), Vec::new());
-
-        for (seg_id, seg) in self.segments.iter().enumerate() {
-            if let Some(parent_id) = seg.parent {
-                child_list[parent_id].push(seg_id);
-            } else {
-                roots.push(seg_id);
-            }
-        }
-
-        let mut segment_results = Vec::new();
-        segment_results.resize(
-            self.segments.len(),
-            SegmentSizeResult {
-                method: SegmentMethod::Skipped,
-                accum_size: 0,
-            },
-        );
-
-        let mut total_size = 0;
-        for root in roots {
-            if let Some(selected) = self.size_here(root, &child_list).non_incremental {
-                StorageModel::fill_selected_sizes(&selected, &mut segment_results);
-                total_size += selected.accum_size;
-            } else {
-                // Couldn't find any way to get this root. Error?
-            }
-        }
-
-        SizeResult {
-            total_size,
-            segments: segment_results,
-        }
-    }
-
-    fn fill_selected_sizes(selected: &SegmentSize, result: &mut Vec<SegmentSizeResult>) {
-        result[selected.seg_id] = SegmentSizeResult {
-            method: selected.method,
-            accum_size: selected.accum_size,
-        };
-        // recurse to children
-        for child in selected.children.iter() {
-            StorageModel::fill_selected_sizes(child, result);
-        }
-    }
-
-    //
-    // This is the core of the sizing calculation.
-    //
-    // This is a recursive function, that for each Segment calculates the best way
-    // to reach all the Segments that are marked as needed in this subtree, under two
-    // different conditions:
-    // a) when the parent of this segment is available (as a snaphot or through WAL), and
-    // b) when the parent of this segment is not available.
-    //
-    fn size_here(&self, seg_id: usize, child_list: &Vec<Vec<usize>>) -> SizeAlternatives {
-        let seg = &self.segments[seg_id];
-        // First figure out the best way to get each child
-        let mut children = Vec::new();
-        for child_id in &child_list[seg_id] {
-            children.push(self.size_here(*child_id, child_list))
-        }
-
-        // Method 1. If this node is not needed, we can skip it as long as we
-        // take snapshots later in each sub-tree
-        let snapshot_later = if !seg.needed {
-            let mut snapshot_later = SegmentSize {
-                seg_id,
-                method: SegmentMethod::Skipped,
-                accum_size: 0,
-                children: Vec::new(),
-            };
-
-            let mut possible = true;
-            for child in children.iter() {
-                if let Some(non_incremental) = &child.non_incremental {
-                    snapshot_later.accum_size += non_incremental.accum_size;
-                    snapshot_later.children.push(non_incremental.clone())
-                } else {
-                    possible = false;
-                    break;
-                }
-            }
-            if possible {
-                Some(snapshot_later)
-            } else {
-                None
-            }
-        } else {
-            None
-        };
-
-        // Method 2. Get a snapshot here. This assumed to be possible, if the 'size' of
-        // this Segment was given.
-        let snapshot_here = if !seg.needed || seg.parent.is_none() {
-            if let Some(snapshot_size) = seg.size {
-                let mut snapshot_here = SegmentSize {
-                    seg_id,
-                    method: SegmentMethod::SnapshotHere,
-                    accum_size: snapshot_size,
-                    children: Vec::new(),
-                };
-                for child in children.iter() {
-                    snapshot_here.accum_size += child.incremental.accum_size;
-                    snapshot_here.children.push(child.incremental.clone())
-                }
-                Some(snapshot_here)
-            } else {
-                None
-            }
-        } else {
-            None
-        };
-
-        // Method 3. Use WAL to get here from parent
-        let wal_here = {
-            let mut wal_here = SegmentSize {
-                seg_id,
-                method: SegmentMethod::Wal,
-                accum_size: if let Some(parent_id) = seg.parent {
-                    seg.lsn - self.segments[parent_id].lsn
-                } else {
-                    0
-                },
-                children: Vec::new(),
-            };
-            for child in children {
-                wal_here.accum_size += child.incremental.accum_size;
-                wal_here.children.push(child.incremental)
-            }
-            wal_here
-        };
-
-        // If the parent is not available, what's the cheapest method involving
-        // a snapshot here or later?
-        let mut cheapest_non_incremental: Option<SegmentSize> = None;
-        if let Some(snapshot_here) = snapshot_here {
-            cheapest_non_incremental = Some(snapshot_here);
-        }
-        if let Some(snapshot_later) = snapshot_later {
-            // Use <=, to prefer skipping if the size is equal
-            if let Some(parent) = &cheapest_non_incremental {
-                if snapshot_later.accum_size <= parent.accum_size {
-                    cheapest_non_incremental = Some(snapshot_later);
-                }
-            } else {
-                cheapest_non_incremental = Some(snapshot_later);
-            }
-        }
-
-        // And what's the cheapest method, if the parent is available?
-        let cheapest_incremental = if let Some(cheapest_non_incremental) = &cheapest_non_incremental
-        {
-            // Is it cheaper to use a snapshot here or later, anyway?
-            // Use <, to prefer Wal over snapshot if the cost is the same
-            if wal_here.accum_size < cheapest_non_incremental.accum_size {
-                wal_here
-            } else {
-                cheapest_non_incremental.clone()
-            }
-        } else {
-            wal_here
-        };
-
-        SizeAlternatives {
-            incremental: cheapest_incremental,
-            non_incremental: cheapest_non_incremental,
-        }
-    }
-}
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -1,70 +1,401 @@
-//! Synthetic size calculation
+use std::borrow::Cow;
+use std::collections::HashMap;

-mod calculation;
-pub mod svg;
+use anyhow::Context;

-/// StorageModel is the input to the synthetic size calculation. It represents
-/// a tree of timelines, with just the information that's needed for the
-/// calculation. This doesn't track timeline names or where each timeline
-/// begins and ends, for example. Instead, it consists of "points of interest"
-/// on the timelines. A point of interest could be the timeline start or end point,
-/// the oldest point on a timeline that needs to be retained because of PITR
-/// cutoff, or snapshot points named by the user. For each such point, and the
-/// edge connecting the points (implicit in Segment), we store information about
-/// whether we need to be able to recover to the point, and if known, the logical
-/// size at the point.
+/// Pricing model or history size builder.
 ///
-/// The segments must form a well-formed tree, with no loops.
-#[derive(serde::Serialize)]
-pub struct StorageModel {
-    pub segments: Vec<Segment>,
+/// Maintains knowledge of the branches and their modifications. Generic over the branch name key
+/// type.
+pub struct Storage<K: 'static> {
+    segments: Vec<Segment>,
+
+    /// Mapping from the branch name to the index of a segment describing it's latest state.
+    branches: HashMap<K, usize>,
 }

-/// Segment represents one point in the tree of branches, *and* the edge that leads
-/// to it (if any). We don't need separate structs for points and edges, because each
-/// point can have only one parent.
-///
-/// When 'needed' is true, it means that we need to be able to reconstruct
-/// any version between 'parent.lsn' and 'lsn'. If you want to represent that only
-/// a single point is needed, create two Segments with the same lsn, and mark only
-/// the child as needed.
-///
-#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
+/// Snapshot of a branch.
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Segment {
    /// Previous segment index into ['Storage::segments`], if any.
-    pub parent: Option<usize>,
+    parent: Option<usize>,

-    /// LSN at this point
-    pub lsn: u64,
+    /// Description of how did we get to this state.
+    ///
+    /// Mainly used in the original scenarios 1..=4 with insert, delete and update. Not used when
+    /// modifying a branch directly.
+    pub op: Cow<'static, str>,

-    /// Logical size at this node, if known.
-    pub size: Option<u64>,
+    /// LSN before this state
+    start_lsn: u64,

-    /// If true, the segment from parent to this node is needed by `retention_period`
+    /// LSN at this state
+    pub end_lsn: u64,
+
+    /// Logical size before this state
+    start_size: u64,
+
+    /// Logical size at this state. Can be None in the last Segment of a branch.
+    pub end_size: Option<u64>,
+
+    /// Indices to [`Storage::segments`]
+    ///
+    /// FIXME: this could be an Option<usize>
+    children_after: Vec<usize>,
+
+    /// Determined by `retention_period` given to [`Storage::calculate`]
    pub needed: bool,
 }

-/// Result of synthetic size calculation. Returned by StorageModel::calculate()
-pub struct SizeResult {
-    pub total_size: u64,
+//
+//
+//
+//
+//                 *-g--*---D--->
+//                /
+//               /
+//              /                 *---b----*-B--->
+//             /                 /
+//            /                 /
+//      -----*--e---*-----f----* C
+//           E                  \
+//                               \
+//                                *--a---*---A-->
+//
+// If A and B need to be retained, is it cheaper to store
+// snapshot at C+a+b, or snapshots at A and B ?
+//
+// If D also needs to be retained, which is cheaper:
+//
+// 1. E+g+e+f+a+b
+// 2. D+C+a+b
+// 3. D+A+B

-    // This has same length as the StorageModel::segments vector in the input.
-    // Each entry in this array corresponds to the entry with same index in
-    // StorageModel::segments.
-    pub segments: Vec<SegmentSizeResult>,
+/// [`Segment`] which has had it's size calculated.
+pub struct SegmentSize {
+    pub seg_id: usize,
+
+    pub method: SegmentMethod,
+
+    this_size: u64,
+
+    pub children: Vec<SegmentSize>,
 }

-#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
-pub struct SegmentSizeResult {
-    pub method: SegmentMethod,
-    // calculated size of this subtree, using this method
-    pub accum_size: u64,
+impl SegmentSize {
+    fn total(&self) -> u64 {
+        self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
+    }
+
+    pub fn total_children(&self) -> u64 {
+        if self.method == SnapshotAfter {
+            self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
+        } else {
+            self.children.iter().fold(0, |acc, x| acc + x.total())
+        }
+    }
 }

 /// Different methods to retain history from a particular state
-#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum SegmentMethod {
-    SnapshotHere, // A logical snapshot is needed after this segment
-    Wal,          // Keep WAL leading up to this node
+    SnapshotAfter,
+    Wal,
+    WalNeeded,
    Skipped,
 }
+
+use SegmentMethod::*;
+
+impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
+    /// Creates a new storage with the given default branch name.
+    pub fn new(initial_branch: K) -> Storage<K> {
+        let init_segment = Segment {
+            op: "".into(),
+            needed: false,
+            parent: None,
+            start_lsn: 0,
+            end_lsn: 0,
+            start_size: 0,
+            end_size: Some(0),
+            children_after: Vec::new(),
+        };
+
+        Storage {
+            segments: vec![init_segment],
+            branches: HashMap::from([(initial_branch, 0)]),
+        }
+    }
+
+    /// Advances the branch with a new point, at given LSN.
+    pub fn insert_point<Q: ?Sized>(
+        &mut self,
+        branch: &Q,
+        op: Cow<'static, str>,
+        lsn: u64,
+        size: Option<u64>,
+    ) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        let Some(lastseg_id) = self.branches.get(branch).copied() else { anyhow::bail!("branch not found: {branch:?}") };
+        let newseg_id = self.segments.len();
+        let lastseg = &mut self.segments[lastseg_id];
+
+        assert!(lsn > lastseg.end_lsn);
+
+        let Some(start_size) = lastseg.end_size else { anyhow::bail!("no end_size on latest segment for {branch:?}") };
+
+        let newseg = Segment {
+            op,
+            parent: Some(lastseg_id),
+            start_lsn: lastseg.end_lsn,
+            end_lsn: lsn,
+            start_size,
+            end_size: size,
+            children_after: Vec::new(),
+            needed: false,
+        };
+        lastseg.children_after.push(newseg_id);
+
+        self.segments.push(newseg);
+        *self.branches.get_mut(branch).expect("read already") = newseg_id;
+
+        Ok(())
+    }
+
+    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
+    pub fn modify_branch<Q: ?Sized>(
+        &mut self,
+        branch: &Q,
+        op: Cow<'static, str>,
+        lsn_bytes: u64,
+        size_bytes: i64,
+    ) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        let Some(lastseg_id) = self.branches.get(branch).copied() else { anyhow::bail!("branch not found: {branch:?}") };
+        let newseg_id = self.segments.len();
+        let lastseg = &mut self.segments[lastseg_id];
+
+        let Some(last_end_size) = lastseg.end_size else { anyhow::bail!("no end_size on latest segment for {branch:?}") };
+
+        let newseg = Segment {
+            op,
+            parent: Some(lastseg_id),
+            start_lsn: lastseg.end_lsn,
+            end_lsn: lastseg.end_lsn + lsn_bytes,
+            start_size: last_end_size,
+            end_size: Some((last_end_size as i64 + size_bytes) as u64),
+            children_after: Vec::new(),
+            needed: false,
+        };
+        lastseg.children_after.push(newseg_id);
+
+        self.segments.push(newseg);
+        *self.branches.get_mut(branch).expect("read already") = newseg_id;
+        Ok(())
+    }
+
+    pub fn insert<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "insert".into(), bytes, bytes as i64)
+    }
+
+    pub fn update<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "update".into(), bytes, 0i64)
+    }
+
+    pub fn delete<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "delete".into(), bytes, -(bytes as i64))
+    }
+
+    pub fn branch<Q: ?Sized>(&mut self, parent: &Q, name: K) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q> + std::fmt::Debug,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        // Find the right segment
+        let branchseg_id = *self.branches.get(parent).with_context(|| {
+            format!(
+                "should had found the parent {:?} by key. in branches {:?}",
+                parent, self.branches
+            )
+        })?;
+
+        let _branchseg = &mut self.segments[branchseg_id];
+
+        // Create branch name for it
+        self.branches.insert(name, branchseg_id);
+        Ok(())
+    }
+
+    pub fn calculate(&mut self, retention_period: u64) -> anyhow::Result<SegmentSize> {
+        // Phase 1: Mark all the segments that need to be retained
+        for (_branch, &last_seg_id) in self.branches.iter() {
+            let last_seg = &self.segments[last_seg_id];
+            let cutoff_lsn = last_seg.start_lsn.saturating_sub(retention_period);
+            let mut seg_id = last_seg_id;
+            loop {
+                let seg = &mut self.segments[seg_id];
+                if seg.end_lsn < cutoff_lsn {
+                    break;
+                }
+                seg.needed = true;
+                if let Some(prev_seg_id) = seg.parent {
+                    seg_id = prev_seg_id;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        // Phase 2: For each oldest segment in a chain that needs to be retained,
+        // calculate if we should store snapshot or WAL
+        self.size_from_snapshot_later(0)
+    }
+
+    fn size_from_wal(&self, seg_id: usize) -> anyhow::Result<SegmentSize> {
+        let seg = &self.segments[seg_id];
+
+        let this_size = seg.end_lsn - seg.start_lsn;
+
+        let mut children = Vec::new();
+
+        // try both ways
+        for &child_id in seg.children_after.iter() {
+            // try each child both ways
+            let child = &self.segments[child_id];
+            let p1 = self.size_from_wal(child_id)?;
+
+            let p = if !child.needed {
+                let p2 = self.size_from_snapshot_later(child_id)?;
+                if p1.total() < p2.total() {
+                    p1
+                } else {
+                    p2
+                }
+            } else {
+                p1
+            };
+            children.push(p);
+        }
+        Ok(SegmentSize {
+            seg_id,
+            method: if seg.needed { WalNeeded } else { Wal },
+            this_size,
+            children,
+        })
+    }
+
+    fn size_from_snapshot_later(&self, seg_id: usize) -> anyhow::Result<SegmentSize> {
+        // If this is needed, then it's time to do the snapshot and continue
+        // with wal method.
+        let seg = &self.segments[seg_id];
+        //eprintln!("snap: seg{}: {} needed: {}", seg_id, seg.children_after.len(), seg.needed);
+        if seg.needed {
+            let mut children = Vec::new();
+
+            for &child_id in seg.children_after.iter() {
+                // try each child both ways
+                let child = &self.segments[child_id];
+                let p1 = self.size_from_wal(child_id)?;
+
+                let p = if !child.needed {
+                    let p2 = self.size_from_snapshot_later(child_id)?;
+                    if p1.total() < p2.total() {
+                        p1
+                    } else {
+                        p2
+                    }
+                } else {
+                    p1
+                };
+                children.push(p);
+            }
+            Ok(SegmentSize {
+                seg_id,
+                method: WalNeeded,
+                this_size: seg.start_size,
+                children,
+            })
+        } else {
+            // If any of the direct children are "needed", need to be able to reconstruct here
+            let mut children_needed = false;
+            for &child in seg.children_after.iter() {
+                let seg = &self.segments[child];
+                if seg.needed {
+                    children_needed = true;
+                    break;
+                }
+            }
+
+            let method1 = if !children_needed {
+                let mut children = Vec::new();
+                for child in seg.children_after.iter() {
+                    children.push(self.size_from_snapshot_later(*child)?);
+                }
+                Some(SegmentSize {
+                    seg_id,
+                    method: Skipped,
+                    this_size: 0,
+                    children,
+                })
+            } else {
+                None
+            };
+
+            // If this a junction, consider snapshotting here
+            let method2 = if children_needed || seg.children_after.len() >= 2 {
+                let mut children = Vec::new();
+                for child in seg.children_after.iter() {
+                    children.push(self.size_from_wal(*child)?);
+                }
+                let Some(this_size) = seg.end_size else { anyhow::bail!("no end_size at junction {seg_id}") };
+                Some(SegmentSize {
+                    seg_id,
+                    method: SnapshotAfter,
+                    this_size,
+                    children,
+                })
+            } else {
+                None
+            };
+
+            Ok(match (method1, method2) {
+                (None, None) => anyhow::bail!(
+                    "neither method was applicable: children_after={}, children_needed={}",
+                    seg.children_after.len(),
+                    children_needed
+                ),
+                (Some(method), None) => method,
+                (None, Some(method)) => method,
+                (Some(method1), Some(method2)) => {
+                    if method1.total() < method2.total() {
+                        method1
+                    } else {
+                        method2
+                    }
+                }
+            })
+        }
+    }
+
+    pub fn into_segments(self) -> Vec<Segment> {
+        self.segments
+    }
+}
--- a/libs/tenant_size_model/src/main.rs
+++ b/libs/tenant_size_model/src/main.rs
@@ -0,0 +1,269 @@
+//! Tenant size model testing ground.
+//!
+//! Has a number of scenarios and a `main` for invoking these by number, calculating the history
+//! size, outputs graphviz graph. Makefile in directory shows how to use graphviz to turn scenarios
+//! into pngs.
+
+use tenant_size_model::{Segment, SegmentSize, Storage};
+
+// Main branch only. Some updates on it.
+fn scenario_1() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Main branch only. Some updates on it.
+fn scenario_2() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    storage.update("main", 1_000)?;
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Like 2, but more updates on main
+fn scenario_3() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Diverged branches
+fn scenario_4() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    for _ in 0..8 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn scenario_5() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    let mut storage = Storage::new("a");
+    storage.insert("a", 5000)?;
+    storage.branch("a", "b")?;
+    storage.update("b", 4000)?;
+    storage.update("a", 2000)?;
+    storage.branch("a", "c")?;
+    storage.insert("c", 4000)?;
+    storage.insert("a", 2000)?;
+
+    let size = storage.calculate(5000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn scenario_6() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    use std::borrow::Cow;
+
+    const NO_OP: Cow<'static, str> = Cow::Borrowed("");
+
+    let branches = [
+        Some(0x7ff1edab8182025f15ae33482edb590a_u128),
+        Some(0xb1719e044db05401a05a2ed588a3ad3f),
+        Some(0xb68d6691c895ad0a70809470020929ef),
+    ];
+
+    // compared to other scenarios, this one uses bytes instead of kB
+
+    let mut storage = Storage::new(None);
+
+    storage.branch(&None, branches[0])?; // at 0
+    storage.modify_branch(&branches[0], NO_OP, 108951064, 43696128)?; // at 108951064
+    storage.branch(&branches[0], branches[1])?; // at 108951064
+    storage.modify_branch(&branches[1], NO_OP, 15560408, -1851392)?; // at 124511472
+    storage.modify_branch(&branches[0], NO_OP, 174464360, -1531904)?; // at 283415424
+    storage.branch(&branches[0], branches[2])?; // at 283415424
+    storage.modify_branch(&branches[2], NO_OP, 15906192, 8192)?; // at 299321616
+    storage.modify_branch(&branches[0], NO_OP, 18909976, 32768)?; // at 302325400
+
+    let size = storage.calculate(100_000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+
+    let scenario = if args.len() < 2 { "1" } else { &args[1] };
+
+    let (segments, size) = match scenario {
+        "1" => scenario_1(),
+        "2" => scenario_2(),
+        "3" => scenario_3(),
+        "4" => scenario_4(),
+        "5" => scenario_5(),
+        "6" => scenario_6(),
+        other => {
+            eprintln!("invalid scenario {}", other);
+            std::process::exit(1);
+        }
+    }
+    .unwrap();
+
+    graphviz_tree(&segments, &size);
+}
+
+fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
+    use tenant_size_model::SegmentMethod::*;
+
+    let seg_id = node.seg_id;
+    let seg = segments.get(seg_id).unwrap();
+    let lsn = seg.end_lsn;
+    let size = seg.end_size.unwrap_or(0);
+    let method = node.method;
+
+    println!("  {{");
+    println!("    node [width=0.1 height=0.1 shape=oval]");
+
+    let tenant_size = node.total_children();
+
+    let penwidth = if seg.needed { 6 } else { 3 };
+    let x = match method {
+        SnapshotAfter =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" style=filled penwidth={penwidth}"),
+        Wal =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
+        WalNeeded =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
+        Skipped =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"gray\" penwidth={penwidth}"),
+    };
+
+    println!("    \"seg{seg_id}\" [{x}]");
+    println!("  }}");
+
+    // Recurse. Much of the data is actually on the edge
+    for child in node.children.iter() {
+        let child_id = child.seg_id;
+        graphviz_recurse(segments, child);
+
+        let edge_color = match child.method {
+            SnapshotAfter => "gray",
+            Wal => "black",
+            WalNeeded => "black",
+            Skipped => "gray",
+        };
+
+        println!("  {{");
+        println!("    edge [] ");
+        print!("    \"seg{seg_id}\" -> \"seg{child_id}\" [");
+        print!("color={edge_color}");
+        if child.method == WalNeeded {
+            print!(" penwidth=6");
+        }
+        if child.method == Wal {
+            print!(" penwidth=3");
+        }
+
+        let next = segments.get(child_id).unwrap();
+
+        if next.op.is_empty() {
+            print!(
+                " label=\"{} / {}\"",
+                next.end_lsn - seg.end_lsn,
+                (next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
+            );
+        } else {
+            print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
+        }
+        println!("]");
+        println!("  }}");
+    }
+}
+
+fn graphviz_tree(segments: &[Segment], tree: &SegmentSize) {
+    println!("digraph G {{");
+    println!("  fontname=\"Helvetica,Arial,sans-serif\"");
+    println!("  node [fontname=\"Helvetica,Arial,sans-serif\"]");
+    println!("  edge [fontname=\"Helvetica,Arial,sans-serif\"]");
+    println!("  graph [center=1 rankdir=LR]");
+    println!("  edge [dir=none]");
+
+    graphviz_recurse(segments, tree);
+
+    println!("}}");
+}
+
+#[test]
+fn scenarios_return_same_size() {
+    type ScenarioFn = fn() -> anyhow::Result<(Vec<Segment>, SegmentSize)>;
+    let truths: &[(u32, ScenarioFn, _)] = &[
+        (line!(), scenario_1, 8000),
+        (line!(), scenario_2, 9000),
+        (line!(), scenario_3, 13000),
+        (line!(), scenario_4, 16000),
+        (line!(), scenario_5, 17000),
+        (line!(), scenario_6, 333_792_000),
+    ];
+
+    for (line, scenario, expected) in truths {
+        let (_, size) = scenario().unwrap();
+        assert_eq!(*expected, size.total_children(), "scenario on line {line}");
+    }
+}
--- a/libs/tenant_size_model/src/svg.rs
+++ b/libs/tenant_size_model/src/svg.rs
@@ -1,193 +0,0 @@
-use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};
-use std::fmt::Write;
-
-const SVG_WIDTH: f32 = 500.0;
-
-struct SvgDraw<'a> {
-    storage: &'a StorageModel,
-    branches: &'a [String],
-    seg_to_branch: &'a [usize],
-    sizes: &'a [SegmentSizeResult],
-
-    // layout
-    xscale: f32,
-    min_lsn: u64,
-    seg_coordinates: Vec<(f32, f32)>,
-}
-
-fn draw_legend(result: &mut String) -> anyhow::Result<()> {
-    writeln!(
-        result,
-        "<circle cx=\"10\" cy=\"10\" r=\"5\" stroke=\"red\"/>"
-    )?;
-    writeln!(result, "<text x=\"20\" y=\"15\">logical snapshot</text>")?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"30\" x2=\"15\" y2=\"30\" stroke-width=\"6\" stroke=\"black\" />"
-    )?;
-    writeln!(
-        result,
-        "<text x=\"20\" y=\"35\">WAL within retention period</text>"
-    )?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"50\" x2=\"15\" y2=\"50\" stroke-width=\"3\" stroke=\"black\" />"
-    )?;
-    writeln!(
-        result,
-        "<text x=\"20\" y=\"55\">WAL retained to avoid copy</text>"
-    )?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"70\" x2=\"15\" y2=\"70\" stroke-width=\"1\" stroke=\"gray\" />"
-    )?;
-    writeln!(result, "<text x=\"20\" y=\"75\">WAL not retained</text>")?;
-    Ok(())
-}
-
-pub fn draw_svg(
-    storage: &StorageModel,
-    branches: &[String],
-    seg_to_branch: &[usize],
-    sizes: &SizeResult,
-) -> anyhow::Result<String> {
-    let mut draw = SvgDraw {
-        storage,
-        branches,
-        seg_to_branch,
-        sizes: &sizes.segments,
-
-        xscale: 0.0,
-        min_lsn: 0,
-        seg_coordinates: Vec::new(),
-    };
-
-    let mut result = String::new();
-
-    writeln!(result, "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" height=\"300\" width=\"500\">")?;
-
-    draw.calculate_svg_layout();
-
-    // Draw the tree
-    for (seg_id, _seg) in storage.segments.iter().enumerate() {
-        draw.draw_seg_phase1(seg_id, &mut result)?;
-    }
-
-    // Draw snapshots
-    for (seg_id, _seg) in storage.segments.iter().enumerate() {
-        draw.draw_seg_phase2(seg_id, &mut result)?;
-    }
-
-    draw_legend(&mut result)?;
-
-    write!(result, "</svg>")?;
-
-    Ok(result)
-}
-
-impl<'a> SvgDraw<'a> {
-    fn calculate_svg_layout(&mut self) {
-        // Find x scale
-        let segments = &self.storage.segments;
-        let min_lsn = segments.iter().map(|s| s.lsn).fold(u64::MAX, std::cmp::min);
-        let max_lsn = segments.iter().map(|s| s.lsn).fold(0, std::cmp::max);
-
-        // Start with 1 pixel = 1 byte. Double the scale until it fits into the image
-        let mut xscale = 1.0;
-        while (max_lsn - min_lsn) as f32 / xscale > SVG_WIDTH {
-            xscale *= 2.0;
-        }
-
-        // Layout the timelines on Y dimension.
-        // TODO
-        let mut y = 100.0;
-        let mut branch_y_coordinates = Vec::new();
-        for _branch in self.branches {
-            branch_y_coordinates.push(y);
-            y += 40.0;
-        }
-
-        // Calculate coordinates for each point
-        let seg_coordinates = std::iter::zip(segments, self.seg_to_branch)
-            .map(|(seg, branch_id)| {
-                let x = (seg.lsn - min_lsn) as f32 / xscale;
-                let y = branch_y_coordinates[*branch_id];
-                (x, y)
-            })
-            .collect();
-
-        self.xscale = xscale;
-        self.min_lsn = min_lsn;
-        self.seg_coordinates = seg_coordinates;
-    }
-
-    /// Draws lines between points
-    fn draw_seg_phase1(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {
-        let seg = &self.storage.segments[seg_id];
-
-        let wal_bytes = if let Some(parent_id) = seg.parent {
-            seg.lsn - self.storage.segments[parent_id].lsn
-        } else {
-            0
-        };
-
-        let style = match self.sizes[seg_id].method {
-            SegmentMethod::SnapshotHere => "stroke-width=\"1\" stroke=\"gray\"",
-            SegmentMethod::Wal if seg.needed && wal_bytes > 0 => {
-                "stroke-width=\"6\" stroke=\"black\""
-            }
-            SegmentMethod::Wal => "stroke-width=\"3\" stroke=\"black\"",
-            SegmentMethod::Skipped => "stroke-width=\"1\" stroke=\"gray\"",
-        };
-        if let Some(parent_id) = seg.parent {
-            let (x1, y1) = self.seg_coordinates[parent_id];
-            let (x2, y2) = self.seg_coordinates[seg_id];
-
-            writeln!(
-                result,
-                "<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" {style}>",
-            )?;
-            writeln!(
-                result,
-                "  <title>{wal_bytes} bytes of WAL (seg {seg_id})</title>"
-            )?;
-            writeln!(result, "</line>")?;
-        } else {
-            // draw a little dash to mark the starting point of this branch
-            let (x, y) = self.seg_coordinates[seg_id];
-            let (x1, y1) = (x, y - 5.0);
-            let (x2, y2) = (x, y + 5.0);
-
-            writeln!(
-                result,
-                "<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" {style}>",
-            )?;
-            writeln!(result, "  <title>(seg {seg_id})</title>")?;
-            writeln!(result, "</line>")?;
-        }
-
-        Ok(())
-    }
-
-    /// Draw circles where snapshots are taken
-    fn draw_seg_phase2(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {
-        let seg = &self.storage.segments[seg_id];
-
-        // draw a snapshot point if it's needed
-        let (coord_x, coord_y) = self.seg_coordinates[seg_id];
-        if self.sizes[seg_id].method == SegmentMethod::SnapshotHere {
-            writeln!(
-                result,
-                "<circle cx=\"{coord_x}\" cy=\"{coord_y}\" r=\"5\" stroke=\"red\">",
-            )?;
-            writeln!(
-                result,
-                "  <title>logical size {}</title>",
-                seg.size.unwrap()
-            )?;
-            write!(result, "</circle>")?;
-        }
-
-        Ok(())
-    }
-}
--- a/libs/tenant_size_model/tests/tests.rs
+++ b/libs/tenant_size_model/tests/tests.rs
@@ -1,313 +0,0 @@
-//! Tenant size model tests.
-
-use tenant_size_model::{Segment, SizeResult, StorageModel};
-
-use std::collections::HashMap;
-
-struct ScenarioBuilder {
-    segments: Vec<Segment>,
-
-    /// Mapping from the branch name to the index of a segment describing its latest state.
-    branches: HashMap<String, usize>,
-}
-
-impl ScenarioBuilder {
-    /// Creates a new storage with the given default branch name.
-    pub fn new(initial_branch: &str) -> ScenarioBuilder {
-        let init_segment = Segment {
-            parent: None,
-            lsn: 0,
-            size: Some(0),
-            needed: false, // determined later
-        };
-
-        ScenarioBuilder {
-            segments: vec![init_segment],
-            branches: HashMap::from([(initial_branch.into(), 0)]),
-        }
-    }
-
-    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
-    pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
-        let lastseg_id = *self.branches.get(branch).unwrap();
-        let newseg_id = self.segments.len();
-        let lastseg = &mut self.segments[lastseg_id];
-
-        let newseg = Segment {
-            parent: Some(lastseg_id),
-            lsn: lastseg.lsn + lsn_bytes,
-            size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
-            needed: false,
-        };
-
-        self.segments.push(newseg);
-        *self.branches.get_mut(branch).expect("read already") = newseg_id;
-    }
-
-    pub fn insert(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, bytes as i64);
-    }
-
-    pub fn update(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, 0i64);
-    }
-
-    pub fn _delete(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, -(bytes as i64));
-    }
-
-    /// Panics if the parent branch cannot be found.
-    pub fn branch(&mut self, parent: &str, name: &str) {
-        // Find the right segment
-        let branchseg_id = *self
-            .branches
-            .get(parent)
-            .expect("should had found the parent by key");
-        let _branchseg = &mut self.segments[branchseg_id];
-
-        // Create branch name for it
-        self.branches.insert(name.to_string(), branchseg_id);
-    }
-
-    pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
-        // Phase 1: Mark all the segments that need to be retained
-        for (_branch, &last_seg_id) in self.branches.iter() {
-            let last_seg = &self.segments[last_seg_id];
-            let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
-            let mut seg_id = last_seg_id;
-            loop {
-                let seg = &mut self.segments[seg_id];
-                if seg.lsn <= cutoff_lsn {
-                    break;
-                }
-                seg.needed = true;
-                if let Some(prev_seg_id) = seg.parent {
-                    seg_id = prev_seg_id;
-                } else {
-                    break;
-                }
-            }
-        }
-
-        // Perform the calculation
-        let storage_model = StorageModel {
-            segments: self.segments.clone(),
-        };
-        let size_result = storage_model.calculate();
-        (storage_model, size_result)
-    }
-}
-
-// Main branch only. Some updates on it.
-#[test]
-fn scenario_1() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Calculate the synthetic size with retention horizon 1000
-    let (_model, result) = scenario.calculate(1000);
-
-    // The end of the branch is at LSN 10000. Need to retain
-    // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
-    // The logical snapshot has size 5000.
-    assert_eq!(result.total_size, 5000 + 1000);
-}
-
-// Main branch only. Some updates on it.
-#[test]
-fn scenario_2() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    scenario.update("main", 1_000);
-
-    //
-    // The history looks like this now:
-    //
-    //         10000          11000
-    // *----*----*--------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, we need to retain logical snapshot
-    // at the branch point, size 5000, and the WAL from 10000-11000 on
-    // both branches.
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 1000);
-}
-
-// Like 2, but more updates on main
-#[test]
-fn scenario_3() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    //
-    // The history looks like this now:
-    //
-    //         10000                                 15000
-    // *----*----*------------------------------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, it's still cheapest to retain
-    // - snapshot at branch point (size 5000)
-    // - WAL on child between 10000-11000
-    // - WAL on main between 10000-15000
-    //
-    // This is in total 5000 + 1000 + 5000
-    //
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 5000);
-}
-
-// Diverged branches
-#[test]
-fn scenario_4() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    for _ in 0..8 {
-        scenario.update("main", 1_000);
-    }
-
-    //
-    // The history looks like this now:
-    //
-    //         10000                                 18000
-    // *----*----*------------------------------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, it's now cheapest to retain
-    // separate snapshots on both branches:
-    // - snapshot on main branch at LSN 17000 (size 5000)
-    // - WAL on main between 17000-18000
-    // - snapshot on child branch at LSN 10000 (size 5000)
-    // - WAL on child between 10000-11000
-    //
-    // This is in total 5000 + 1000 + 5000 + 1000 = 12000
-    //
-    // (If we used the the method from the previous scenario, and
-    // kept only snapshot at the branch point, we'd need to keep
-    // all the WAL between 10000-18000 on the main branch, so
-    // the total size would be 5000 + 1000 + 8000 = 14000. The
-    // calculation always picks the cheapest alternative)
-
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
-}
-
-#[test]
-fn scenario_5() {
-    let mut scenario = ScenarioBuilder::new("a");
-    scenario.insert("a", 5000);
-    scenario.branch("a", "b");
-    scenario.update("b", 4000);
-    scenario.update("a", 2000);
-    scenario.branch("a", "c");
-    scenario.insert("c", 4000);
-    scenario.insert("a", 2000);
-
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 17000);
-}
-
-#[test]
-fn scenario_6() {
-    let branches = [
-        "7ff1edab8182025f15ae33482edb590a",
-        "b1719e044db05401a05a2ed588a3ad3f",
-        "0xb68d6691c895ad0a70809470020929ef",
-    ];
-
-    // compared to other scenarios, this one uses bytes instead of kB
-
-    let mut scenario = ScenarioBuilder::new("");
-
-    scenario.branch("", branches[0]); // at 0
-    scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
-    scenario.branch(branches[0], branches[1]); // at 108951064
-    scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
-    scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
-    scenario.branch(branches[0], branches[2]); // at 283415424
-    scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
-    scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
-
-    let (model, result) = scenario.calculate(100_000);
-
-    // FIXME: We previously calculated 333_792_000. But with this PR, we get
-    // a much lower number. At a quick look at the model output and the
-    // calculations here, the new result seems correct to me.
-    eprintln!(
-        " MODEL: {}",
-        serde_json::to_string(&model.segments).unwrap()
-    );
-    eprintln!(
-        "RESULT: {}",
-        serde_json::to_string(&result.segments).unwrap()
-    );
-
-    assert_eq!(result.total_size, 136_236_928);
-}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -13,7 +13,6 @@ bincode.workspace = true
 bytes.workspace = true
 heapless.workspace = true
 hyper = { workspace = true, features = ["full"] }
-futures = { workspace = true}
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -40,7 +39,7 @@ pq_proto.workspace = true

 workspace_hack.workspace = true
 url.workspace = true
-uuid = { version = "1.2", features = ["v4", "serde"] }
+
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -4,13 +4,13 @@ use anyhow::{anyhow, Context};
 use hyper::header::{HeaderName, AUTHORIZATION};
 use hyper::http::HeaderValue;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
-use hyper::{Method, StatusCode};
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
 use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
-use routerify::{Middleware, RequestInfo, Router, RouterBuilder, RouterService};
+use routerify::RequestInfo;
+use routerify::{Middleware, Router, RouterBuilder, RouterService};
 use tokio::task::JoinError;
-use tracing;
+use tracing::info;

 use std::future::Future;
 use std::net::TcpListener;
@@ -26,36 +26,8 @@ static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-static X_REQUEST_ID_HEADER_STR: &str = "x-request-id";
-
-static X_REQUEST_ID_HEADER: HeaderName = HeaderName::from_static(X_REQUEST_ID_HEADER_STR);
-#[derive(Debug, Default, Clone)]
-struct RequestId(String);
-
 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
-    let request_id = info.context::<RequestId>().unwrap_or_default().0;
-
-    // cannot factor out the Level to avoid the repetition
-    // because tracing can only work with const Level
-    // which is not the case here
-
-    if info.method() == Method::GET && res.status() == StatusCode::OK {
-        tracing::debug!(
-            "{} {} {} {}",
-            info.method(),
-            info.uri().path(),
-            request_id,
-            res.status()
-        );
-    } else {
-        tracing::info!(
-            "{} {} {} {}",
-            info.method(),
-            info.uri().path(),
-            request_id,
-            res.status()
-        );
-    }
+    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
    Ok(res)
 }

@@ -83,52 +55,9 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    Ok(response)
 }

-pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
-) -> Middleware<B, ApiError> {
-    Middleware::pre(move |req| async move {
-        let request_id = match req.headers().get(&X_REQUEST_ID_HEADER) {
-            Some(request_id) => request_id
-                .to_str()
-                .expect("extract request id value")
-                .to_owned(),
-            None => {
-                let request_id = uuid::Uuid::new_v4();
-                request_id.to_string()
-            }
-        };
-
-        if req.method() == Method::GET {
-            tracing::debug!("{} {} {}", req.method(), req.uri().path(), request_id);
-        } else {
-            tracing::info!("{} {} {}", req.method(), req.uri().path(), request_id);
-        }
-        req.set_context(RequestId(request_id));
-
-        Ok(req)
-    })
-}
-
-async fn add_request_id_header_to_response(
-    mut res: Response<Body>,
-    req_info: RequestInfo,
-) -> Result<Response<Body>, ApiError> {
-    if let Some(request_id) = req_info.context::<RequestId>() {
-        if let Ok(request_header_value) = HeaderValue::from_str(&request_id.0) {
-            res.headers_mut()
-                .insert(&X_REQUEST_ID_HEADER, request_header_value);
-        };
-    };
-
-    Ok(res)
-}
-
 pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
    Router::builder()
-        .middleware(add_request_id_middleware())
        .middleware(Middleware::post_with_info(logger))
-        .middleware(Middleware::post_with_info(
-            add_request_id_header_to_response,
-        ))
        .get("/metrics", prometheus_metrics_handler)
        .err_handler(error::handler)
 }
@@ -274,7 +203,7 @@ pub fn serve_thread_main<S>(
 where
    S: Future<Output = ()> + Send + Sync,
 {
-    tracing::info!("Starting an HTTP endpoint at {}", listener.local_addr()?);
+    info!("Starting an HTTP endpoint at {}", listener.local_addr()?);

    // Create a Service from the router above to handle incoming requests.
    let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
@@ -294,48 +223,3 @@ where

    Ok(())
 }
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use futures::future::poll_fn;
-    use hyper::service::Service;
-    use routerify::RequestServiceBuilder;
-    use std::net::{IpAddr, SocketAddr};
-
-    #[tokio::test]
-    async fn test_request_id_returned() {
-        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
-        let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
-        let mut service = builder.build(remote_addr);
-        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
-            panic!("request service is not ready: {:?}", e);
-        }
-
-        let mut req: Request<Body> = Request::default();
-        req.headers_mut()
-            .append(&X_REQUEST_ID_HEADER, HeaderValue::from_str("42").unwrap());
-
-        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
-
-        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER).unwrap();
-
-        assert!(header_val == "42", "response header mismatch");
-    }
-
-    #[tokio::test]
-    async fn test_request_id_empty() {
-        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
-        let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
-        let mut service = builder.build(remote_addr);
-        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
-            panic!("request service is not ready: {:?}", e);
-        }
-
-        let req: Request<Body> = Request::default();
-        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
-
-        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER);
-
-        assert_ne!(header_val, None, "response header should NOT be empty");
-    }
-}
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -1,9 +1,7 @@
-use std::fmt::Display;
-
 use anyhow::Context;
 use bytes::Buf;
 use hyper::{header, Body, Request, Response, StatusCode};
-use serde::{Deserialize, Serialize, Serializer};
+use serde::{Deserialize, Serialize};

 use super::error::ApiError;

@@ -33,12 +31,3 @@ pub fn json_response<T: Serialize>(
        .map_err(|e| ApiError::InternalServerError(e.into()))?;
    Ok(response)
 }
-
-/// Serialize through Display trait.
-pub fn display_serialize<S, F>(z: &F, s: S) -> Result<S::Ok, S::Error>
-where
-    S: Serializer,
-    F: Display,
-{
-    s.serialize_str(&format!("{}", z))
-}
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -45,115 +45,3 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> {

    Ok(())
 }
-
-/// Disable the default rust panic hook by using `set_hook`.
-///
-/// For neon binaries, the assumption is that tracing is configured before with [`init`], after
-/// that sentry is configured (if needed). sentry will install it's own on top of this, always
-/// processing the panic before we log it.
-///
-/// When the return value is dropped, the hook is reverted to std default hook (prints to stderr).
-/// If the assumptions about the initialization order are not held, use
-/// [`TracingPanicHookGuard::disarm`] but keep in mind, if tracing is stopped, then panics will be
-/// lost.
-#[must_use]
-pub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard {
-    std::panic::set_hook(Box::new(tracing_panic_hook));
-    TracingPanicHookGuard::new()
-}
-
-/// Drop guard which restores the std panic hook on drop.
-///
-/// Tracing should not be used when it's not configured, but we cannot really latch on to any
-/// imaginary lifetime of tracing.
-pub struct TracingPanicHookGuard {
-    act: bool,
-}
-
-impl TracingPanicHookGuard {
-    fn new() -> Self {
-        TracingPanicHookGuard { act: true }
-    }
-
-    /// Make this hook guard not do anything when dropped.
-    pub fn forget(&mut self) {
-        self.act = false;
-    }
-}
-
-impl Drop for TracingPanicHookGuard {
-    fn drop(&mut self) {
-        if self.act {
-            let _ = std::panic::take_hook();
-        }
-    }
-}
-
-/// Named symbol for our panic hook, which logs the panic.
-fn tracing_panic_hook(info: &std::panic::PanicInfo) {
-    // following rust 1.66.1 std implementation:
-    // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
-    let location = info.location();
-
-    let msg = match info.payload().downcast_ref::<&'static str>() {
-        Some(s) => *s,
-        None => match info.payload().downcast_ref::<String>() {
-            Some(s) => &s[..],
-            None => "Box<dyn Any>",
-        },
-    };
-
-    let thread = std::thread::current();
-    let thread = thread.name().unwrap_or("<unnamed>");
-    let backtrace = std::backtrace::Backtrace::capture();
-
-    let _entered = if let Some(location) = location {
-        tracing::error_span!("panic", %thread, location = %PrettyLocation(location))
-    } else {
-        // very unlikely to hit here, but the guarantees of std could change
-        tracing::error_span!("panic", %thread)
-    }
-    .entered();
-
-    if backtrace.status() == std::backtrace::BacktraceStatus::Captured {
-        // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really
-        // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to
-        // string, maybe even to a TLS one but tracing already does that.
-        tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}");
-    } else {
-        tracing::error!("{msg}");
-    }
-
-    // ensure that we log something on the panic if this hook is left after tracing has been
-    // unconfigured. worst case when teardown is racing the panic is to log the panic twice.
-    tracing::dispatcher::get_default(|d| {
-        if let Some(_none) = d.downcast_ref::<tracing::subscriber::NoSubscriber>() {
-            let location = location.map(PrettyLocation);
-            log_panic_to_stderr(thread, msg, location, &backtrace);
-        }
-    });
-}
-
-#[cold]
-fn log_panic_to_stderr(
-    thread: &str,
-    msg: &str,
-    location: Option<PrettyLocation<'_, '_>>,
-    backtrace: &std::backtrace::Backtrace,
-) {
-    eprintln!("panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}");
-}
-
-struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>);
-
-impl std::fmt::Display for PrettyLocation<'_, '_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column())
-    }
-}
-
-impl std::fmt::Debug for PrettyLocation<'_, '_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        <Self as std::fmt::Display>::fmt(self, f)
-    }
-}
--- a/libs/walproposer/.gitignore
+++ b/libs/walproposer/.gitignore
@@ -1,4 +0,0 @@
-*.a
-*.o
-*.tmp
-pgdata
--- a/libs/walproposer/Cargo.toml
+++ b/libs/walproposer/Cargo.toml
@@ -1,39 +0,0 @@
-[package]
-name = "walproposer"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-atty.workspace = true
-rand.workspace = true
-regex.workspace = true
-bytes.workspace = true
-byteorder.workspace = true
-anyhow.workspace = true
-crc32c.workspace = true
-hex.workspace = true
-once_cell.workspace = true
-log.workspace = true
-libc.workspace = true
-memoffset.workspace = true
-thiserror.workspace = true
-tracing.workspace = true
-tracing-subscriber = { workspace = true, features = ["json"] }
-serde.workspace = true
-scopeguard.workspace = true
-utils.workspace = true
-safekeeper.workspace = true
-postgres_ffi.workspace = true
-hyper.workspace = true
-
-workspace_hack.workspace = true
-
-[dev-dependencies]
-env_logger.workspace = true
-postgres.workspace = true
-
-[build-dependencies]
-anyhow.workspace = true
-bindgen.workspace = true
-cbindgen = "0.24.0"
--- a/libs/walproposer/README.md
+++ b/libs/walproposer/README.md
@@ -1,16 +0,0 @@
-# walproposer Rust module
-
-## Rust -> C
-
-We compile walproposer as a static library and generate Rust bindings for it using `bindgen`.
-Entrypoint header file is `bindgen_deps.h`.
-
-## C -> Rust
-
-We use `cbindgen` to generate C bindings for the Rust code. They are stored in `rust_bindings.h`.
-
-## How to run the tests
-
-```
-export RUSTFLAGS="-C default-linker-libraries"
-```
--- a/libs/walproposer/bindgen_deps.h
+++ b/libs/walproposer/bindgen_deps.h
@@ -1,30 +0,0 @@
-/*
- * This header file is the input to bindgen. It includes all the
- * PostgreSQL headers that we need to auto-generate Rust structs
- * from. If you need to expose a new struct to Rust code, add the
- * header here, and whitelist the struct in the build.rs file.
- */
-#include "c.h"
-#include "walproposer.h"
-
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-// Calc a sum of two numbers. Used to test Rust->C function calls.
-int TestFunc(int a, int b);
-
-// Run a client for simple simlib test.
-void RunClientC(uint32_t serverId);
-
-void WalProposerRust();
-
-void WalProposerCleanup();
-
-extern bool debug_enabled;
-
-// Initialize global variables before calling any Postgres C code.
-void MyContextInit();
-
-XLogRecPtr MyInsertRecord();
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -1,137 +0,0 @@
-use std::{env, path::PathBuf, process::Command};
-use anyhow::{anyhow, Context};
-use bindgen::CargoCallbacks;
-
-extern crate bindgen;
-
-fn main() -> anyhow::Result<()> {
-    let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
-
-    cbindgen::Builder::new()
-        .with_crate(crate_dir)
-        .with_language(cbindgen::Language::C)
-        .generate()
-        .expect("Unable to generate bindings")
-        .write_to_file("rust_bindings.h");
-
-    // Tell cargo to invalidate the built crate whenever the wrapper changes
-    println!("cargo:rerun-if-changed=bindgen_deps.h,test.c,../../pgxn/neon/walproposer.c,build.sh");
-    println!("cargo:rustc-link-arg=-Wl,--start-group");
-    println!("cargo:rustc-link-arg=-lsim");
-    println!("cargo:rustc-link-arg=-lpgport_srv");
-    println!("cargo:rustc-link-arg=-lpostgres");
-    println!("cargo:rustc-link-arg=-lpgcommon_srv");
-    println!("cargo:rustc-link-arg=-lssl");
-    println!("cargo:rustc-link-arg=-lcrypto");
-    println!("cargo:rustc-link-arg=-lz");
-    println!("cargo:rustc-link-arg=-lpthread");
-    println!("cargo:rustc-link-arg=-lrt");
-    println!("cargo:rustc-link-arg=-ldl");
-    println!("cargo:rustc-link-arg=-lm");
-    println!("cargo:rustc-link-arg=-lwalproposer");
-    println!("cargo:rustc-link-arg=-Wl,--end-group");
-    println!("cargo:rustc-link-search=/home/admin/simulator/libs/walproposer");
-    // disable fPIE
-    println!("cargo:rustc-link-arg=-no-pie");
-
-    // print output of build.sh
-    let output = std::process::Command::new("./build.sh")
-        .output()
-        .expect("could not spawn `clang`");
-    
-    println!("stdout: {}", String::from_utf8(output.stdout).unwrap());
-    println!("stderr: {}", String::from_utf8(output.stderr).unwrap());
-
-    if !output.status.success() {
-        // Panic if the command was not successful.
-        panic!("could not compile object file");
-    }
-
-    // // Finding the location of C headers for the Postgres server:
-    // // - if POSTGRES_INSTALL_DIR is set look into it, otherwise look into `<project_root>/pg_install`
-    // // - if there's a `bin/pg_config` file use it for getting include server, otherwise use `<project_root>/pg_install/{PG_MAJORVERSION}/include/postgresql/server`
-    let pg_install_dir = if let Some(postgres_install_dir) = env::var_os("POSTGRES_INSTALL_DIR") {
-        postgres_install_dir.into()
-    } else {
-        PathBuf::from("pg_install")
-    };
-
-    let pg_version = "v15";
-    let mut pg_install_dir_versioned = pg_install_dir.join(pg_version);
-    if pg_install_dir_versioned.is_relative() {
-        let cwd = env::current_dir().context("Failed to get current_dir")?;
-        pg_install_dir_versioned = cwd.join("..").join("..").join(pg_install_dir_versioned);
-    }
-
-    let pg_config_bin = pg_install_dir_versioned
-        .join(pg_version)
-        .join("bin")
-        .join("pg_config");
-    let inc_server_path: String = if pg_config_bin.exists() {
-        let output = Command::new(pg_config_bin)
-            .arg("--includedir-server")
-            .output()
-            .context("failed to execute `pg_config --includedir-server`")?;
-
-        if !output.status.success() {
-            panic!("`pg_config --includedir-server` failed")
-        }
-
-        String::from_utf8(output.stdout)
-            .context("pg_config output is not UTF-8")?
-            .trim_end()
-            .into()
-    } else {
-        let server_path = pg_install_dir_versioned
-            .join("include")
-            .join("postgresql")
-            .join("server")
-            .into_os_string();
-        server_path
-            .into_string()
-            .map_err(|s| anyhow!("Bad postgres server path {s:?}"))?
-    };
-
-    let inc_pgxn_path = "/home/admin/simulator/pgxn/neon";
-
-    // The bindgen::Builder is the main entry point
-    // to bindgen, and lets you build up options for
-    // the resulting bindings.
-    let bindings = bindgen::Builder::default()
-        // The input header we would like to generate
-        // bindings for.
-        .header("bindgen_deps.h")
-        // Tell cargo to invalidate the built crate whenever any of the
-        // included header files changed.
-        .parse_callbacks(Box::new(CargoCallbacks))
-        .allowlist_function("TestFunc")
-        .allowlist_function("RunClientC")
-        .allowlist_function("WalProposerRust")
-        .allowlist_function("MyContextInit")
-        .allowlist_function("WalProposerCleanup")
-        .allowlist_function("MyInsertRecord")
-        .allowlist_var("wal_acceptors_list")
-        .allowlist_var("wal_acceptor_reconnect_timeout")
-        .allowlist_var("wal_acceptor_connection_timeout")
-        .allowlist_var("am_wal_proposer")
-        .allowlist_var("neon_timeline_walproposer")
-        .allowlist_var("neon_tenant_walproposer")
-        .allowlist_var("syncSafekeepers")
-        .allowlist_var("sim_redo_start_lsn")
-        .allowlist_var("debug_enabled")
-        .clang_arg(format!("-I{inc_server_path}"))
-        .clang_arg(format!("-I{inc_pgxn_path}"))
-        .clang_arg(format!("-DSIMLIB"))
-        // Finish the builder and generate the bindings.
-        .generate()
-        // Unwrap the Result and panic on failure.
-        .expect("Unable to generate bindings");
-
-    // Write the bindings to the $OUT_DIR/bindings.rs file.
-    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs");
-    bindings
-        .write_to_file(out_path)
-        .expect("Couldn't write bindings!");
-
-    Ok(())
-}
--- a/libs/walproposer/build.sh
+++ b/libs/walproposer/build.sh
@@ -1,21 +0,0 @@
-#!/bin/bash
-set -e
-
-cd /home/admin/simulator/libs/walproposer
-
-# TODO: rewrite to Makefile
-
-make -C ../.. neon-pg-ext-walproposer
-make -C ../../pg_install/build/v15/src/backend postgres-lib -s
-cp ../../pg_install/build/v15/src/backend/libpostgres.a .
-cp ../../pg_install/build/v15/src/common/libpgcommon_srv.a .
-cp ../../pg_install/build/v15/src/port/libpgport_srv.a .
-
-clang -g -c libpqwalproposer.c test.c -ferror-limit=1 -I ../../pg_install/v15/include/postgresql/server -I ../../pgxn/neon
-rm -rf libsim.a
-ar rcs libsim.a test.o libpqwalproposer.o
-
-rm -rf libwalproposer.a
-
-PGXN_DIR=../../pg_install/build/neon-v15/
-ar rcs libwalproposer.a $PGXN_DIR/walproposer.o $PGXN_DIR/walproposer_utils.o $PGXN_DIR/neon.o 
--- a/libs/walproposer/libpqwalproposer.c
+++ b/libs/walproposer/libpqwalproposer.c
@@ -1,542 +0,0 @@
-#include "postgres.h"
-#include "neon.h"
-#include "walproposer.h"
-#include "rust_bindings.h"
-#include "replication/message.h"
-#include "access/xlog_internal.h"
-
-// defined in walproposer.h
-uint64 sim_redo_start_lsn;
-XLogRecPtr sim_latest_available_lsn;
-
-/* Header in walproposer.h -- Wrapper struct to abstract away the libpq connection */
-struct WalProposerConn
-{
-	int64_t tcp;
-};
-
-/* Helper function */
-static bool
-ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
-{
-	// walprop_log(LOG, "not implemented");
-    return false;
-}
-
-/* Exported function definitions */
-char *
-walprop_error_message(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "not implemented");
-    return NULL;
-}
-
-WalProposerConnStatusType
-walprop_status(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "not implemented: walprop_status");
-    return WP_CONNECTION_OK;
-}
-
-WalProposerConn *
-walprop_connect_start(char *conninfo)
-{
-	WalProposerConn *conn;
-
-	walprop_log(LOG, "walprop_connect_start: %s", conninfo);
-	
-	const char *connstr_prefix = "host=node port=";
-	Assert(strncmp(conninfo, connstr_prefix, strlen(connstr_prefix)) == 0);
-
-	int nodeId = atoi(conninfo + strlen(connstr_prefix));
-
-	conn = palloc(sizeof(WalProposerConn));
-	conn->tcp = sim_open_tcp(nodeId);
-	return conn;
-}
-
-WalProposerConnectPollStatusType
-walprop_connect_poll(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "not implemented: walprop_connect_poll");
-    return WP_CONN_POLLING_OK;
-}
-
-bool
-walprop_send_query(WalProposerConn *conn, char *query)
-{
-	// walprop_log(LOG, "not implemented: walprop_send_query");
-    return true;
-}
-
-WalProposerExecStatusType
-walprop_get_query_result(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "not implemented: walprop_get_query_result");
-    return WP_EXEC_SUCCESS_COPYBOTH;
-}
-
-pgsocket
-walprop_socket(WalProposerConn *conn)
-{
-	return (pgsocket) conn->tcp;
-}
-
-int
-walprop_flush(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "not implemented");
-    return 0;
-}
-
-void
-walprop_finish(WalProposerConn *conn)
-{
-	// walprop_log(LOG, "walprop_finish not implemented");
-}
-
-/*
- * Receive a message from the safekeeper.
- *
- * On success, the data is placed in *buf. It is valid until the next call
- * to this function.
- */
-PGAsyncReadResult
-walprop_async_read(WalProposerConn *conn, char **buf, int *amount)
-{
-	uintptr_t len;
-	char *msg;
-	Event event;
-
-	event = sim_epoll_peek(0);
-	if (event.tcp != conn->tcp || event.tag != Message || event.any_message != Bytes)
-		return PG_ASYNC_READ_TRY_AGAIN;
-
-	event = sim_epoll_rcv(0);
-
-	// walprop_log(LOG, "walprop_async_read, T: %d, tcp: %d, tag: %d", (int) event.tag, (int) event.tcp, (int) event.any_message);
-	Assert(event.tcp == conn->tcp);
-	Assert(event.tag == Message);
-	Assert(event.any_message == Bytes);
-	
-	msg = (char*) sim_msg_get_bytes(&len);
-	*buf = msg;
-	*amount = len;
-	// walprop_log(LOG, "walprop_async_read: %d", (int) len);
-
-    return PG_ASYNC_READ_SUCCESS;
-}
-
-PGAsyncWriteResult
-walprop_async_write(WalProposerConn *conn, void const *buf, size_t size)
-{
-	// walprop_log(LOG, "walprop_async_write");
-	sim_msg_set_bytes(buf, size);
-	sim_tcp_send(conn->tcp);
-    return PG_ASYNC_WRITE_SUCCESS;
-}
-
-/*
- * This function is very similar to walprop_async_write. For more
- * information, refer to the comments there.
- */
-bool
-walprop_blocking_write(WalProposerConn *conn, void const *buf, size_t size)
-{
-	// walprop_log(LOG, "walprop_blocking_write");
-	sim_msg_set_bytes(buf, size);
-	sim_tcp_send(conn->tcp);
-    return true;
-}
-
-void
-sim_start_replication(XLogRecPtr startptr)
-{
-	walprop_log(LOG, "sim_start_replication: %X/%X", LSN_FORMAT_ARGS(startptr));
-	sim_latest_available_lsn = startptr;
-
-	for (;;)
-	{
-		XLogRecPtr endptr = sim_latest_available_lsn;
-
-		Assert(startptr <= endptr);
-		if (endptr > startptr)
-		{
-			WalProposerBroadcast(startptr, endptr);
-			startptr = endptr;
-		}
-
-		WalProposerPoll();
-	}
-}
-
-#define UsableBytesInPage (XLOG_BLCKSZ - SizeOfXLogShortPHD)
-
-static int UsableBytesInSegment =
-		(DEFAULT_XLOG_SEG_SIZE / XLOG_BLCKSZ * UsableBytesInPage) -
-		(SizeOfXLogLongPHD - SizeOfXLogShortPHD);
-
-/*
- * Converts a "usable byte position" to XLogRecPtr. A usable byte position
- * is the position starting from the beginning of WAL, excluding all WAL
- * page headers.
- */
-static XLogRecPtr
-XLogBytePosToRecPtr(uint64 bytepos)
-{
-	uint64		fullsegs;
-	uint64		fullpages;
-	uint64		bytesleft;
-	uint32		seg_offset;
-	XLogRecPtr	result;
-
-	fullsegs = bytepos / UsableBytesInSegment;
-	bytesleft = bytepos % UsableBytesInSegment;
-
-	if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
-	{
-		/* fits on first page of segment */
-		seg_offset = bytesleft + SizeOfXLogLongPHD;
-	}
-	else
-	{
-		/* account for the first page on segment with long header */
-		seg_offset = XLOG_BLCKSZ;
-		bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
-
-		fullpages = bytesleft / UsableBytesInPage;
-		bytesleft = bytesleft % UsableBytesInPage;
-
-		seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
-	}
-
-	XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
-
-	return result;
-}
-
-/*
- * Convert an XLogRecPtr to a "usable byte position".
- */
-static uint64
-XLogRecPtrToBytePos(XLogRecPtr ptr)
-{
-	uint64		fullsegs;
-	uint32		fullpages;
-	uint32		offset;
-	uint64		result;
-
-	XLByteToSeg(ptr, fullsegs, wal_segment_size);
-
-	fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
-	offset = ptr % XLOG_BLCKSZ;
-
-	if (fullpages == 0)
-	{
-		result = fullsegs * UsableBytesInSegment;
-		if (offset > 0)
-		{
-			Assert(offset >= SizeOfXLogLongPHD);
-			result += offset - SizeOfXLogLongPHD;
-		}
-	}
-	else
-	{
-		result = fullsegs * UsableBytesInSegment +
-			(XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
-			(fullpages - 1) * UsableBytesInPage;	/* full pages */
-		if (offset > 0)
-		{
-			Assert(offset >= SizeOfXLogShortPHD);
-			result += offset - SizeOfXLogShortPHD;
-		}
-	}
-
-	return result;
-}
-
-#define max_rdatas 16
-
-void InitMyInsert();
-static void MyBeginInsert();
-static void MyRegisterData(char *data, int len);
-static XLogRecPtr MyFinishInsert(RmgrId rmid, uint8 info, uint8 flags);
-static void MyCopyXLogRecordToWAL(int write_len, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos);
-
-/*
- * An array of XLogRecData structs, to hold registered data.
- */
-static XLogRecData rdatas[max_rdatas];
-static int	num_rdatas;			/* entries currently used */
-static uint32 mainrdata_len;	/* total # of bytes in chain */
-static XLogRecData hdr_rdt;
-static char hdr_scratch[16000];
-static XLogRecPtr CurrBytePos;
-static XLogRecPtr PrevBytePos;
-
-void InitMyInsert()
-{
-	CurrBytePos = sim_redo_start_lsn;
-	PrevBytePos = InvalidXLogRecPtr;
-	sim_latest_available_lsn = sim_redo_start_lsn;
-}
-
-static void MyBeginInsert()
-{
-	num_rdatas = 0;
-	mainrdata_len = 0;
-}
-
-static void MyRegisterData(char *data, int len)
-{
-	XLogRecData *rdata;
-
-	if (num_rdatas >= max_rdatas)
-		walprop_log(ERROR, "too much WAL data");
-	rdata = &rdatas[num_rdatas++];
-
-	rdata->data = data;
-	rdata->len = len;
-	rdata->next = NULL;
-
-	if (num_rdatas > 1) {
-		rdatas[num_rdatas - 2].next = rdata;
-	}
-
-	mainrdata_len += len;
-}
-
-static XLogRecPtr
-MyFinishInsert(RmgrId rmid, uint8 info, uint8 flags)
-{
-	XLogRecData *rdt;
-	uint32		total_len = 0;
-	int			block_id;
-	pg_crc32c	rdata_crc;
-	XLogRecord *rechdr;
-	char	   *scratch = hdr_scratch;
-	int         size;
-	XLogRecPtr  StartPos;
-	XLogRecPtr  EndPos;
-	uint64		startbytepos;
-	uint64		endbytepos;
-
-	/*
-	 * Note: this function can be called multiple times for the same record.
-	 * All the modifications we do to the rdata chains below must handle that.
-	 */
-
-	/* The record begins with the fixed-size header */
-	rechdr = (XLogRecord *) scratch;
-	scratch += SizeOfXLogRecord;
-
-	hdr_rdt.data = hdr_scratch;
-	
-	if (num_rdatas > 0)
-	{
-		hdr_rdt.next = &rdatas[0];
-	}
-	else
-	{
-		hdr_rdt.next = NULL;
-	}
-
-	/* followed by main data, if any */
-	if (mainrdata_len > 0)
-	{
-		if (mainrdata_len > 255)
-		{
-			*(scratch++) = (char) XLR_BLOCK_ID_DATA_LONG;
-			memcpy(scratch, &mainrdata_len, sizeof(uint32));
-			scratch += sizeof(uint32);
-		}
-		else
-		{
-			*(scratch++) = (char) XLR_BLOCK_ID_DATA_SHORT;
-			*(scratch++) = (uint8) mainrdata_len;
-		}
-		total_len += mainrdata_len;
-	}
-
-	hdr_rdt.len = (scratch - hdr_scratch);
-	total_len += hdr_rdt.len;
-
-	/*
-	 * Calculate CRC of the data
-	 *
-	 * Note that the record header isn't added into the CRC initially since we
-	 * don't know the prev-link yet.  Thus, the CRC will represent the CRC of
-	 * the whole record in the order: rdata, then backup blocks, then record
-	 * header.
-	 */
-	INIT_CRC32C(rdata_crc);
-	COMP_CRC32C(rdata_crc, hdr_scratch + SizeOfXLogRecord, hdr_rdt.len - SizeOfXLogRecord);
-	for (size_t i = 0; i < num_rdatas; i++)
-	{
-		rdt = &rdatas[i];
-		COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
-	}
-
-	/*
-	 * Fill in the fields in the record header. Prev-link is filled in later,
-	 * once we know where in the WAL the record will be inserted. The CRC does
-	 * not include the record header yet.
-	 */
-	rechdr->xl_xid = 0;
-	rechdr->xl_tot_len = total_len;
-	rechdr->xl_info = info;
-	rechdr->xl_rmid = rmid;
-	rechdr->xl_prev = InvalidXLogRecPtr;
-	rechdr->xl_crc = rdata_crc;
-
-	size = MAXALIGN(rechdr->xl_tot_len);
-
-	/* All (non xlog-switch) records should contain data. */
-	Assert(size > SizeOfXLogRecord);
-
-	startbytepos = XLogRecPtrToBytePos(CurrBytePos);
-	endbytepos = startbytepos + size;
-
-	// Get the position.
-	StartPos = XLogBytePosToRecPtr(startbytepos);
-	EndPos = XLogBytePosToRecPtr(startbytepos + size);
-	rechdr->xl_prev = PrevBytePos;
-
-	Assert(XLogRecPtrToBytePos(StartPos) == startbytepos);
-	Assert(XLogRecPtrToBytePos(EndPos) == endbytepos);
-
-	// Update global pointers.
-	CurrBytePos = EndPos;
-	PrevBytePos = StartPos;
-
-	/*
-	 * Now that xl_prev has been filled in, calculate CRC of the record
-	 * header.
-	 */
-	rdata_crc = rechdr->xl_crc;
-	COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
-	FIN_CRC32C(rdata_crc);
-	rechdr->xl_crc = rdata_crc;
-
-	// Now write it to disk.
-	MyCopyXLogRecordToWAL(rechdr->xl_tot_len, &hdr_rdt, StartPos, EndPos);
-	return EndPos;
-}
-
-#define INSERT_FREESPACE(endptr)	\
-	(((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
-
-static void
-MyCopyXLogRecordToWAL(int write_len, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos)
-{
-	XLogRecPtr	CurrPos;
-	int			written;
-	int			freespace;
-
-	// Write hdr_rdt and `num_rdatas` other datas.
-	CurrPos = StartPos;
-	freespace = INSERT_FREESPACE(CurrPos);
-	written = 0;
-
-	Assert(freespace >= sizeof(uint32));
-
-	while (rdata != NULL)
-	{
-		char	   *rdata_data = rdata->data;
-		int			rdata_len = rdata->len;
-
-		while (rdata_len >= freespace)
-		{
-			char header_buf[SizeOfXLogLongPHD];
-			XLogPageHeader NewPage = (XLogPageHeader) header_buf;
-			
-			Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
-			XLogWalPropWrite(rdata_data, freespace, CurrPos);
-			rdata_data += freespace;
-			rdata_len -= freespace;
-			written += freespace;
-			CurrPos += freespace;
-
-			// Init new page
-			MemSet(header_buf, 0, SizeOfXLogLongPHD);
-			
-			/*
-			 * Fill the new page's header
-			 */
-			NewPage->xlp_magic = XLOG_PAGE_MAGIC;
-
-			/* NewPage->xlp_info = 0; */	/* done by memset */
-			NewPage->xlp_tli = 1;
-			NewPage->xlp_pageaddr = CurrPos;
-
-			/* NewPage->xlp_rem_len = 0; */	/* done by memset */
-			NewPage->xlp_info |= XLP_BKP_REMOVABLE;
-
-			/*
-			 * If first page of an XLOG segment file, make it a long header.
-			 */
-			if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
-			{
-				XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
-
-				NewLongPage->xlp_sysid = 0;
-				NewLongPage->xlp_seg_size = wal_segment_size;
-				NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
-				NewPage->xlp_info |= XLP_LONG_HEADER;
-			}
-
-			NewPage->xlp_rem_len = write_len - written;
-			if (NewPage->xlp_rem_len > 0) {
-				NewPage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
-			}
-
-			/* skip over the page header */
-			if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
-			{
-				XLogWalPropWrite(header_buf, SizeOfXLogLongPHD, CurrPos);
-				CurrPos += SizeOfXLogLongPHD;
-			}
-			else
-			{
-				XLogWalPropWrite(header_buf, SizeOfXLogShortPHD, CurrPos);
-				CurrPos += SizeOfXLogShortPHD;
-			}
-			freespace = INSERT_FREESPACE(CurrPos);
-		}
-
-		Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
-		XLogWalPropWrite(rdata_data, rdata_len, CurrPos);
-		CurrPos += rdata_len;
-		written += rdata_len;
-		freespace -= rdata_len;
-
-		rdata = rdata->next;
-	}
-
-	Assert(written == write_len);
-	CurrPos = MAXALIGN64(CurrPos);
-	Assert(CurrPos == EndPos);
-}
-
-XLogRecPtr MyInsertRecord()
-{
-	const char *prefix = "prefix";
-	const char *message = "message";
-	size_t size = 7;
-	bool transactional = false;
-
-	xl_logical_message xlrec;
-
-	xlrec.dbId = 0;
-	xlrec.transactional = transactional;
-	/* trailing zero is critical; see logicalmsg_desc */
-	xlrec.prefix_size = strlen(prefix) + 1;
-	xlrec.message_size = size;
-
-	MyBeginInsert();
-	MyRegisterData((char *) &xlrec, SizeOfLogicalMessage);
-	MyRegisterData(unconstify(char *, prefix), xlrec.prefix_size);
-	MyRegisterData(unconstify(char *, message), size);
-
-	return MyFinishInsert(RM_LOGICALMSG_ID, XLOG_LOGICAL_MESSAGE, XLOG_INCLUDE_ORIGIN);
-}
--- a/libs/walproposer/rust_bindings.h
+++ b/libs/walproposer/rust_bindings.h
@@ -1,106 +0,0 @@
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdlib.h>
-
-/**
- * List of all possible AnyMessage.
- */
-enum AnyMessageTag {
-  None,
-  InternalConnect,
-  Just32,
-  ReplCell,
-  Bytes,
-  LSN,
-};
-typedef uint8_t AnyMessageTag;
-
-/**
- * List of all possible NodeEvent.
- */
-enum EventTag {
-  Timeout,
-  Accept,
-  Closed,
-  Message,
-  Internal,
-};
-typedef uint8_t EventTag;
-
-/**
- * Event returned by epoll_recv.
- */
-typedef struct Event {
-  EventTag tag;
-  int64_t tcp;
-  AnyMessageTag any_message;
-} Event;
-
-void rust_function(uint32_t a);
-
-/**
- * C API for the node os.
- */
-void sim_sleep(uint64_t ms);
-
-uint64_t sim_random(uint64_t max);
-
-uint32_t sim_id(void);
-
-int64_t sim_open_tcp(uint32_t dst);
-
-int64_t sim_open_tcp_nopoll(uint32_t dst);
-
-/**
- * Send MESSAGE_BUF content to the given tcp.
- */
-void sim_tcp_send(int64_t tcp);
-
-/**
- * Receive a message from the given tcp. Can be used only with tcp opened with
- * `sim_open_tcp_nopoll`.
- */
-struct Event sim_tcp_recv(int64_t tcp);
-
-struct Event sim_epoll_rcv(int64_t timeout);
-
-struct Event sim_epoll_peek(int64_t timeout);
-
-int64_t sim_now(void);
-
-void sim_exit(int32_t code, const uint8_t *msg);
-
-void sim_set_result(int32_t code, const uint8_t *msg);
-
-void sim_log_event(const int8_t *msg);
-
-/**
- * Get tag of the current message.
- */
-AnyMessageTag sim_msg_tag(void);
-
-/**
- * Read AnyMessage::Just32 message.
- */
-void sim_msg_get_just_u32(uint32_t *val);
-
-/**
- * Read AnyMessage::LSN message.
- */
-void sim_msg_get_lsn(uint64_t *val);
-
-/**
- * Write AnyMessage::ReplCell message.
- */
-void sim_msg_set_repl_cell(uint32_t value, uint32_t client_id, uint32_t seqno);
-
-/**
- * Write AnyMessage::Bytes message.
- */
-void sim_msg_set_bytes(const char *bytes, uintptr_t len);
-
-/**
- * Read AnyMessage::Bytes message.
- */
-const char *sim_msg_get_bytes(uintptr_t *len);
--- a/libs/walproposer/src/lib.rs
+++ b/libs/walproposer/src/lib.rs
@@ -1,36 +0,0 @@
-#![allow(non_upper_case_globals)]
-#![allow(non_camel_case_types)]
-#![allow(non_snake_case)]
-
-use safekeeper::simlib::node_os::NodeOs;
-use tracing::info;
-
-pub mod bindings {
-    include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
-}
-
-#[no_mangle]
-pub extern "C" fn rust_function(a: u32) {
-    info!("Hello from Rust!");
-    info!("a: {}", a);
-}
-
-pub mod sim;
-pub mod sim_proto;
-
-#[cfg(test)]
-mod test;
-
-#[cfg(test)]
-pub mod simtest;
-
-pub fn c_context() -> Option<Box<dyn Fn(NodeOs) + Send + Sync>> {
-    Some(Box::new(|os: NodeOs| {
-        sim::c_attach_node_os(os);
-        unsafe { bindings::MyContextInit(); }
-    }))
-}
-
-pub fn enable_debug() {
-    unsafe { bindings::debug_enabled = true; }
-}
--- a/libs/walproposer/src/sim.rs
+++ b/libs/walproposer/src/sim.rs
@@ -1,240 +0,0 @@
-use log::debug;
-use safekeeper::simlib::{network::TCP, node_os::NodeOs, world::NodeEvent};
-use std::{
-    cell::RefCell,
-    collections::HashMap,
-    ffi::{CStr, CString},
-};
-use tracing::trace;
-
-use crate::sim_proto::{anymessage_tag, AnyMessageTag, Event, EventTag, MESSAGE_BUF};
-
-thread_local! {
-    static CURRENT_NODE_OS: RefCell<Option<NodeOs>> = RefCell::new(None);
-    static TCP_CACHE: RefCell<HashMap<i64, TCP>> = RefCell::new(HashMap::new());
-}
-
-/// Get the current node os.
-fn os() -> NodeOs {
-    CURRENT_NODE_OS.with(|cell| cell.borrow().clone().expect("no node os set"))
-}
-
-fn tcp_save(tcp: TCP) -> i64 {
-    TCP_CACHE.with(|cell| {
-        let mut cache = cell.borrow_mut();
-        let id = tcp.id();
-        cache.insert(id, tcp);
-        id
-    })
-}
-
-fn tcp_load(id: i64) -> TCP {
-    TCP_CACHE.with(|cell| {
-        let cache = cell.borrow();
-        cache.get(&id).expect("unknown TCP id").clone()
-    })
-}
-
-/// Should be called before calling any of the C functions.
-pub(crate) fn c_attach_node_os(os: NodeOs) {
-    CURRENT_NODE_OS.with(|cell| {
-        *cell.borrow_mut() = Some(os);
-    });
-    TCP_CACHE.with(|cell| {
-        *cell.borrow_mut() = HashMap::new();
-    });
-}
-
-/// C API for the node os.
-
-#[no_mangle]
-pub extern "C" fn sim_sleep(ms: u64) {
-    os().sleep(ms);
-}
-
-#[no_mangle]
-pub extern "C" fn sim_random(max: u64) -> u64 {
-    os().random(max)
-}
-
-#[no_mangle]
-pub extern "C" fn sim_id() -> u32 {
-    os().id().into()
-}
-
-#[no_mangle]
-pub extern "C" fn sim_open_tcp(dst: u32) -> i64 {
-    tcp_save(os().open_tcp(dst.into()))
-}
-
-#[no_mangle]
-pub extern "C" fn sim_open_tcp_nopoll(dst: u32) -> i64 {
-    tcp_save(os().open_tcp_nopoll(dst.into()))
-}
-
-#[no_mangle]
-/// Send MESSAGE_BUF content to the given tcp.
-pub extern "C" fn sim_tcp_send(tcp: i64) {
-    tcp_load(tcp).send(MESSAGE_BUF.with(|cell| cell.borrow().clone()));
-}
-
-#[no_mangle]
-/// Receive a message from the given tcp. Can be used only with tcp opened with
-/// `sim_open_tcp_nopoll`.
-pub extern "C" fn sim_tcp_recv(tcp: i64) -> Event {
-    let event = tcp_load(tcp).recv();
-    match event {
-        NodeEvent::Accept(_) => unreachable!(),
-        NodeEvent::Closed(_) => Event {
-            tag: EventTag::Closed,
-            tcp: 0,
-            any_message: AnyMessageTag::None,
-        },
-        NodeEvent::Internal(_) => unreachable!(),
-        NodeEvent::Message((message, _)) => {
-            // store message in thread local storage, C code should use
-            // sim_msg_* functions to access it.
-            MESSAGE_BUF.with(|cell| {
-                *cell.borrow_mut() = message.clone();
-            });
-            Event {
-                tag: EventTag::Message,
-                tcp: 0,
-                any_message: anymessage_tag(&message),
-            }
-        }
-        NodeEvent::WakeTimeout(_) => unreachable!(),
-    }
-}
-
-#[no_mangle]
-pub extern "C" fn sim_epoll_rcv(timeout: i64) -> Event {
-    let event = os().epoll_recv(timeout);
-    let event = if let Some(event) = event {
-        event
-    } else {
-        return Event {
-            tag: EventTag::Timeout,
-            tcp: 0,
-            any_message: AnyMessageTag::None,
-        };
-    };
-
-    match event {
-        NodeEvent::Accept(tcp) => Event {
-            tag: EventTag::Accept,
-            tcp: tcp_save(tcp),
-            any_message: AnyMessageTag::None,
-        },
-        NodeEvent::Closed(tcp) => Event {
-            tag: EventTag::Closed,
-            tcp: tcp_save(tcp),
-            any_message: AnyMessageTag::None,
-        },
-        NodeEvent::Message((message, tcp)) => {
-            // store message in thread local storage, C code should use
-            // sim_msg_* functions to access it.
-            MESSAGE_BUF.with(|cell| {
-                *cell.borrow_mut() = message.clone();
-            });
-            Event {
-                tag: EventTag::Message,
-                tcp: tcp_save(tcp),
-                any_message: anymessage_tag(&message),
-            }
-        }
-        NodeEvent::Internal(message) => {
-            // store message in thread local storage, C code should use
-            // sim_msg_* functions to access it.
-            MESSAGE_BUF.with(|cell| {
-                *cell.borrow_mut() = message.clone();
-            });
-            Event {
-                tag: EventTag::Internal,
-                tcp: 0,
-                any_message: anymessage_tag(&message),
-            }
-        }
-        NodeEvent::WakeTimeout(_) => {
-            // can't happen
-            unreachable!()
-        }
-    }
-}
-
-#[no_mangle]
-pub extern "C" fn sim_epoll_peek(timeout: i64) -> Event {
-    let event = os().epoll_peek(timeout);
-    let event = if let Some(event) = event {
-        event
-    } else {
-        return Event {
-            tag: EventTag::Timeout,
-            tcp: 0,
-            any_message: AnyMessageTag::None,
-        };
-    };
-
-    match event {
-        NodeEvent::Accept(tcp) => Event {
-            tag: EventTag::Accept,
-            tcp: tcp_save(tcp),
-            any_message: AnyMessageTag::None,
-        },
-        NodeEvent::Closed(tcp) => Event {
-            tag: EventTag::Closed,
-            tcp: tcp_save(tcp),
-            any_message: AnyMessageTag::None,
-        },
-        NodeEvent::Message((message, tcp)) => Event {
-            tag: EventTag::Message,
-            tcp: tcp_save(tcp),
-            any_message: anymessage_tag(&message),
-        },
-        NodeEvent::Internal(message) => Event {
-            tag: EventTag::Internal,
-            tcp: 0,
-            any_message: anymessage_tag(&message),
-        },
-        NodeEvent::WakeTimeout(_) => {
-            // can't happen
-            unreachable!()
-        }
-    }
-}
-
-#[no_mangle]
-pub extern "C" fn sim_now() -> i64 {
-    os().now() as i64
-}
-
-#[no_mangle]
-pub extern "C" fn sim_exit(code: i32, msg: *const u8) {
-    trace!("sim_exit({}, {:?})", code, msg);
-    sim_set_result(code, msg);
-
-    // I tried to make use of pthread_exit, but it doesn't work.
-    // https://github.com/rust-lang/unsafe-code-guidelines/issues/211
-    // unsafe { libc::pthread_exit(std::ptr::null_mut()) };
-
-    // https://doc.rust-lang.org/nomicon/unwinding.html
-    // Everyone on the internet saying this is UB, but it works for me,
-    // so I'm going to use it for now.
-    panic!("sim_exit() called from C code")
-}
-
-#[no_mangle]
-pub extern "C" fn sim_set_result(code: i32, msg: *const u8) {
-    let msg = unsafe { CStr::from_ptr(msg as *const i8) };
-    let msg = msg.to_string_lossy().into_owned();
-    debug!("sim_set_result({}, {:?})", code, msg);
-    os().set_result(code, msg);
-}
-
-#[no_mangle]
-pub extern "C" fn sim_log_event(msg: *const i8) {
-    let msg = unsafe { CStr::from_ptr(msg) };
-    let msg = msg.to_string_lossy().into_owned();
-    debug!("sim_log_event({:?})", msg);
-    os().log_event(msg);
-}
--- a/libs/walproposer/src/sim_proto.rs
+++ b/libs/walproposer/src/sim_proto.rs
@@ -1,114 +0,0 @@
-use safekeeper::simlib::proto::{AnyMessage, ReplCell};
-use std::{cell::RefCell, ffi::c_char};
-
-pub(crate) fn anymessage_tag(msg: &AnyMessage) -> AnyMessageTag {
-    match msg {
-        AnyMessage::None => AnyMessageTag::None,
-        AnyMessage::InternalConnect => AnyMessageTag::InternalConnect,
-        AnyMessage::Just32(_) => AnyMessageTag::Just32,
-        AnyMessage::ReplCell(_) => AnyMessageTag::ReplCell,
-        AnyMessage::Bytes(_) => AnyMessageTag::Bytes,
-        AnyMessage::LSN(_) => AnyMessageTag::LSN,
-    }
-}
-
-thread_local! {
-    pub static MESSAGE_BUF: RefCell<AnyMessage> = RefCell::new(AnyMessage::None);
-}
-
-#[no_mangle]
-/// Get tag of the current message.
-pub extern "C" fn sim_msg_tag() -> AnyMessageTag {
-    MESSAGE_BUF.with(|cell| anymessage_tag(&*cell.borrow()))
-}
-
-#[no_mangle]
-/// Read AnyMessage::Just32 message.
-pub extern "C" fn sim_msg_get_just_u32(val: &mut u32) {
-    MESSAGE_BUF.with(|cell| match &*cell.borrow() {
-        AnyMessage::Just32(v) => {
-            *val = *v;
-        }
-        _ => panic!("expected Just32 message"),
-    });
-}
-
-#[no_mangle]
-/// Read AnyMessage::LSN message.
-pub extern "C" fn sim_msg_get_lsn(val: &mut u64) {
-    MESSAGE_BUF.with(|cell| match &*cell.borrow() {
-        AnyMessage::LSN(v) => {
-            *val = *v;
-        }
-        _ => panic!("expected LSN message"),
-    });
-}
-
-#[no_mangle]
-/// Write AnyMessage::ReplCell message.
-pub extern "C" fn sim_msg_set_repl_cell(value: u32, client_id: u32, seqno: u32) {
-    MESSAGE_BUF.with(|cell| {
-        *cell.borrow_mut() = AnyMessage::ReplCell(ReplCell {
-            value,
-            client_id,
-            seqno,
-        });
-    });
-}
-
-#[no_mangle]
-/// Write AnyMessage::Bytes message.
-pub extern "C" fn sim_msg_set_bytes(bytes: *const c_char, len: usize) {
-    MESSAGE_BUF.with(|cell| {
-        // copy bytes to a Rust Vec
-        let mut v: Vec<u8> = Vec::with_capacity(len);
-        unsafe {
-            v.set_len(len);
-            std::ptr::copy_nonoverlapping(bytes as *const u8, v.as_mut_ptr(), len);
-        }
-        *cell.borrow_mut() = AnyMessage::Bytes(v.into());
-    });
-}
-
-#[no_mangle]
-/// Read AnyMessage::Bytes message.
-pub extern "C" fn sim_msg_get_bytes(len: *mut usize) -> *const c_char {
-    MESSAGE_BUF.with(|cell| match &*cell.borrow() {
-        AnyMessage::Bytes(v) => {
-            unsafe {
-                *len = v.len();
-                v.as_ptr() as *const i8
-            }
-        }
-        _ => panic!("expected Bytes message"),
-    })
-}
-
-#[repr(C)]
-/// Event returned by epoll_recv.
-pub struct Event {
-    pub tag: EventTag,
-    pub tcp: i64,
-    pub any_message: AnyMessageTag,
-}
-
-#[repr(u8)]
-/// List of all possible NodeEvent.
-pub enum EventTag {
-    Timeout,
-    Accept,
-    Closed,
-    Message,
-    Internal,
-}
-
-#[repr(u8)]
-/// List of all possible AnyMessage.
-pub enum AnyMessageTag {
-    None,
-    InternalConnect,
-    Just32,
-    ReplCell,
-    Bytes,
-    LSN,
-}
--- a/libs/walproposer/src/simtest/disk.rs
+++ b/libs/walproposer/src/simtest/disk.rs
@@ -1,88 +0,0 @@
-use std::collections::HashMap;
-use std::sync::Arc;
-
-use safekeeper::safekeeper::SafeKeeperState;
-use safekeeper::simlib::sync::Mutex;
-use utils::id::TenantTimelineId;
-
-pub struct Disk {
-    pub timelines: Mutex<HashMap<TenantTimelineId, Arc<TimelineDisk>>>,
-}
-
-impl Disk {
-    pub fn new() -> Self {
-        Disk {
-            timelines: Mutex::new(HashMap::new()),
-        }
-    }
-
-    pub fn put_state(&self, ttid: &TenantTimelineId, state: SafeKeeperState) -> Arc<TimelineDisk> {
-        self.timelines
-            .lock()
-            .entry(ttid.clone())
-            .and_modify(|e| {
-                let mut mu = e.state.lock();
-                *mu = state.clone();
-            })
-            .or_insert_with(|| {
-                Arc::new(TimelineDisk {
-                    state: Mutex::new(state),
-                    wal: Mutex::new(BlockStorage::new()),
-                })
-            })
-            .clone()
-    }
-}
-
-pub struct TimelineDisk {
-    pub state: Mutex<SafeKeeperState>,
-    pub wal: Mutex<BlockStorage>,
-}
-
-const BLOCK_SIZE: usize = 8192;
-
-pub struct BlockStorage {
-    blocks: HashMap<u64, [u8; BLOCK_SIZE]>,
-}
-
-impl BlockStorage {
-    pub fn new() -> Self {
-        BlockStorage {
-            blocks: HashMap::new(),
-        }
-    }
-
-    pub fn read(&self, pos: u64, buf: &mut [u8]) {
-        let mut buf_offset = 0;
-        let mut storage_pos = pos;
-        while buf_offset < buf.len() {
-            let block_id = storage_pos / BLOCK_SIZE as u64;
-            let block = self.blocks.get(&block_id).unwrap_or(&[0; BLOCK_SIZE]);
-            let block_offset = storage_pos % BLOCK_SIZE as u64;
-            let block_len = BLOCK_SIZE as u64 - block_offset;
-            let buf_len = buf.len() - buf_offset;
-            let copy_len = std::cmp::min(block_len as usize, buf_len);
-            buf[buf_offset..buf_offset + copy_len]
-                .copy_from_slice(&block[block_offset as usize..block_offset as usize + copy_len]);
-            buf_offset += copy_len;
-            storage_pos += copy_len as u64;
-        }
-    }
-
-    pub fn write(&mut self, pos: u64, buf: &[u8]) {
-        let mut buf_offset = 0;
-        let mut storage_pos = pos;
-        while buf_offset < buf.len() {
-            let block_id = storage_pos / BLOCK_SIZE as u64;
-            let block = self.blocks.entry(block_id).or_insert([0; BLOCK_SIZE]);
-            let block_offset = storage_pos % BLOCK_SIZE as u64;
-            let block_len = BLOCK_SIZE as u64 - block_offset;
-            let buf_len = buf.len() - buf_offset;
-            let copy_len = std::cmp::min(block_len as usize, buf_len);
-            block[block_offset as usize..block_offset as usize + copy_len]
-                .copy_from_slice(&buf[buf_offset..buf_offset + copy_len]);
-            buf_offset += copy_len;
-            storage_pos += copy_len as u64
-        }
-    }
-}
--- a/libs/walproposer/src/simtest/log.rs
+++ b/libs/walproposer/src/simtest/log.rs
@@ -1,61 +0,0 @@
-use std::{sync::Arc, fmt};
-
-use safekeeper::simlib::{world::World, sync::Mutex};
-use tracing_subscriber::fmt::{time::FormatTime, format::Writer};
-use utils::logging;
-
-use crate::bindings;
-
-
-#[derive(Clone)]
-pub struct SimClock {
-    world_ptr: Arc<Mutex<Option<Arc<World>>>>,
-}
-
-impl Default for SimClock {
-    fn default() -> Self {
-        SimClock {
-            world_ptr: Arc::new(Mutex::new(None)),
-        }
-    }
-}
-
-impl SimClock {
-    pub fn set_world(&self, world: Arc<World>) {
-        *self.world_ptr.lock() = Some(world);
-    }
-}
-
-impl FormatTime for SimClock {
-    fn format_time(&self, w: &mut Writer<'_>) -> fmt::Result {
-        let world = self.world_ptr.lock().clone();
-        
-        if let Some(world) = world {
-            let now = world.now();
-            write!(w, "[{}]", now)
-        } else {
-            write!(w, "[?]")
-        }
-    }
-}
-
-pub fn init_logger() -> SimClock {
-    let debug_enabled = unsafe { bindings::debug_enabled };
-
-    let clock = SimClock::default();
-    let base_logger = tracing_subscriber::fmt()
-        .with_target(false)
-        .with_timer(clock.clone())
-        .with_ansi(true)
-        .with_max_level(match debug_enabled {
-            true => tracing::Level::DEBUG,
-            false => tracing::Level::INFO,
-        })
-        .with_writer(std::io::stdout);
-    base_logger.init();
-
-    // logging::replace_panic_hook_with_tracing_panic_hook().forget();
-    std::panic::set_hook(Box::new(|_| {}));
-
-    clock
-}
--- a/libs/walproposer/src/simtest/mod.rs
+++ b/libs/walproposer/src/simtest/mod.rs
@@ -1,11 +0,0 @@
-#[cfg(test)]
-pub mod simple_client;
-
-#[cfg(test)]
-pub mod wp_sk;
-
-pub mod disk;
-pub mod safekeeper;
-pub mod storage;
-pub mod log;
-pub mod util;
--- a/libs/walproposer/src/simtest/safekeeper.rs
+++ b/libs/walproposer/src/simtest/safekeeper.rs
@@ -1,372 +0,0 @@
-//! Safekeeper communication endpoint to WAL proposer (compute node).
-//! Gets messages from the network, passes them down to consensus module and
-//! sends replies back.
-
-use std::{collections::HashMap, path::PathBuf, sync::Arc, time::Duration};
-
-use anyhow::{anyhow, bail, Result};
-use bytes::{Bytes, BytesMut};
-use hyper::Uri;
-use log::info;
-use safekeeper::{
-    safekeeper::{
-        ProposerAcceptorMessage, SafeKeeper, SafeKeeperState, ServerInfo, UNKNOWN_SERVER_VERSION,
-    },
-    simlib::{network::TCP, node_os::NodeOs, proto::AnyMessage, world::NodeEvent},
-    timeline::TimelineError,
-    SafeKeeperConf, wal_storage::Storage,
-};
-use tracing::{debug, info_span};
-use utils::{
-    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
-
-use crate::simtest::storage::DiskStateStorage;
-
-use super::{
-    disk::{Disk, TimelineDisk},
-    storage::DiskWALStorage,
-};
-
-struct ConnState {
-    tcp: TCP,
-
-    greeting: bool,
-    ttid: TenantTimelineId,
-    flush_pending: bool,
-}
-
-struct SharedState {
-    sk: SafeKeeper<DiskStateStorage, DiskWALStorage>,
-    disk: Arc<TimelineDisk>,
-}
-
-struct GlobalMap {
-    timelines: HashMap<TenantTimelineId, SharedState>,
-    conf: SafeKeeperConf,
-    disk: Arc<Disk>,
-}
-
-impl GlobalMap {
-    fn new(disk: Arc<Disk>, conf: SafeKeeperConf) -> Result<Self> {
-        let mut timelines = HashMap::new();
-
-        for (&ttid, disk) in disk.timelines.lock().iter() {
-            debug!("loading timeline {}", ttid);
-            let state = disk.state.lock().clone();
-
-            if state.server.wal_seg_size == 0 {
-                bail!(TimelineError::UninitializedWalSegSize(ttid));
-            }
-
-            if state.server.pg_version == UNKNOWN_SERVER_VERSION {
-                bail!(TimelineError::UninitialinzedPgVersion(ttid));
-            }
-
-            if state.commit_lsn < state.local_start_lsn {
-                bail!(
-                    "commit_lsn {} is higher than local_start_lsn {}",
-                    state.commit_lsn,
-                    state.local_start_lsn
-                );
-            }
-
-            let control_store = DiskStateStorage::new(disk.clone());
-            let wal_store = DiskWALStorage::new(disk.clone(), &control_store)?;
-
-            let sk = SafeKeeper::new(control_store, wal_store, conf.my_id)?;
-            timelines.insert(
-                ttid.clone(),
-                SharedState {
-                    sk,
-                    disk: disk.clone(),
-                },
-            );
-        }
-
-        Ok(Self {
-            timelines,
-            conf,
-            disk,
-        })
-    }
-
-    fn create(&mut self, ttid: TenantTimelineId, server_info: ServerInfo) -> Result<()> {
-        if self.timelines.contains_key(&ttid) {
-            bail!("timeline {} already exists", ttid);
-        }
-
-        debug!("creating new timeline {}", ttid);
-
-        let commit_lsn = Lsn::INVALID;
-        let local_start_lsn = Lsn::INVALID;
-
-        // TODO: load state from in-memory storage
-        let state = SafeKeeperState::new(&ttid, server_info, vec![], commit_lsn, local_start_lsn);
-
-        if state.server.wal_seg_size == 0 {
-            bail!(TimelineError::UninitializedWalSegSize(ttid));
-        }
-
-        if state.server.pg_version == UNKNOWN_SERVER_VERSION {
-            bail!(TimelineError::UninitialinzedPgVersion(ttid));
-        }
-
-        if state.commit_lsn < state.local_start_lsn {
-            bail!(
-                "commit_lsn {} is higher than local_start_lsn {}",
-                state.commit_lsn,
-                state.local_start_lsn
-            );
-        }
-
-        let disk_timeline = self.disk.put_state(&ttid, state);
-        let control_store = DiskStateStorage::new(disk_timeline.clone());
-        let wal_store = DiskWALStorage::new(disk_timeline.clone(), &control_store)?;
-
-        let sk = SafeKeeper::new(control_store, wal_store, self.conf.my_id)?;
-
-        self.timelines.insert(
-            ttid.clone(),
-            SharedState {
-                sk,
-                disk: disk_timeline,
-            },
-        );
-        Ok(())
-    }
-
-    fn get(&mut self, ttid: &TenantTimelineId) -> &mut SharedState {
-        self.timelines.get_mut(ttid).expect("timeline must exist")
-    }
-
-    fn has_tli(&self, ttid: &TenantTimelineId) -> bool {
-        self.timelines.contains_key(ttid)
-    }
-}
-
-pub fn run_server(os: NodeOs, disk: Arc<Disk>) -> Result<()> {
-    let _enter = info_span!("safekeeper", id = os.id()).entered();
-    debug!("started server");
-    os.log_event("started;safekeeper".to_owned());
-    let conf = SafeKeeperConf {
-        workdir: PathBuf::from("."),
-        my_id: NodeId(os.id() as u64),
-        listen_pg_addr: String::new(),
-        listen_http_addr: String::new(),
-        no_sync: false,
-        broker_endpoint: "/".parse::<Uri>().unwrap(),
-        broker_keepalive_interval: Duration::from_secs(0),
-        heartbeat_timeout: Duration::from_secs(0),
-        remote_storage: None,
-        max_offloader_lag_bytes: 0,
-        backup_runtime_threads: None,
-        wal_backup_enabled: false,
-        auth: None,
-    };
-
-    let mut global = GlobalMap::new(disk, conf.clone())?;
-    let mut conns: HashMap<i64, ConnState> = HashMap::new();
-
-    for (&ttid, shared_state) in global.timelines.iter_mut() {
-        let flush_lsn = shared_state.sk.wal_store.flush_lsn();
-        let commit_lsn = shared_state.sk.state.commit_lsn;
-        os.log_event(format!("tli_loaded;{};{}", flush_lsn.0, commit_lsn.0));
-    }
-
-    let epoll = os.epoll();
-    loop {
-        // waiting for the next message
-        let mut next_event = Some(epoll.recv());
-
-        loop {
-            let event = match next_event {
-                Some(event) => event,
-                None => break,
-            };
-
-            match event {
-                NodeEvent::Accept(tcp) => {
-                    conns.insert(
-                        tcp.id(),
-                        ConnState {
-                            tcp,
-                            greeting: false,
-                            ttid: TenantTimelineId::empty(),
-                            flush_pending: false,
-                        },
-                    );
-                }
-                NodeEvent::Message((msg, tcp)) => {
-                    let conn = conns.get_mut(&tcp.id());
-                    if let Some(conn) = conn {
-                        let res = conn.process_any(msg, &mut global);
-                        if res.is_err() {
-                            debug!("conn {:?} error: {:#}", tcp, res.unwrap_err());
-                            conns.remove(&tcp.id());
-                        }
-                    } else {
-                        debug!("conn {:?} was closed, dropping msg {:?}", tcp, msg);
-                    }
-                }
-                NodeEvent::Internal(_) => {}
-                NodeEvent::Closed(_) => {}
-                NodeEvent::WakeTimeout(_) => {}
-            }
-
-            // TODO: make simulator support multiple events per tick
-            next_event = epoll.try_recv();
-        }
-
-        conns.retain(|_, conn| {
-            let res = conn.flush(&mut global);
-            if res.is_err() {
-                debug!("conn {:?} error: {:?}", conn.tcp, res);
-            }
-            res.is_ok()
-        });
-    }
-}
-
-impl ConnState {
-    fn process_any(&mut self, any: AnyMessage, global: &mut GlobalMap) -> Result<()> {
-        if let AnyMessage::Bytes(copy_data) = any {
-            let repl_prefix = b"START_REPLICATION ";
-            if !self.greeting && copy_data.starts_with(repl_prefix) {
-                self.process_start_replication(copy_data.slice(repl_prefix.len()..), global)?;
-                bail!("finished processing START_REPLICATION")
-            }
-
-            let msg = ProposerAcceptorMessage::parse(copy_data)?;
-            debug!("got msg: {:?}", msg);
-            return self.process(msg, global);
-        } else {
-            bail!("unexpected message, expected AnyMessage::Bytes");
-        }
-    }
-
-    fn process_start_replication(
-        &mut self,
-        copy_data: Bytes,
-        global: &mut GlobalMap,
-    ) -> Result<()> {
-        // format is "<tenant_id> <timeline_id> <start_lsn> <end_lsn>"
-        let str = String::from_utf8(copy_data.to_vec())?;
-
-        let mut parts = str.split(' ');
-        let tenant_id = parts.next().unwrap().parse::<TenantId>()?;
-        let timeline_id = parts.next().unwrap().parse::<TimelineId>()?;
-        let start_lsn = parts.next().unwrap().parse::<u64>()?;
-        let end_lsn = parts.next().unwrap().parse::<u64>()?;
-
-        let ttid = TenantTimelineId::new(tenant_id, timeline_id);
-        let shared_state = global.get(&ttid);
-
-        // read bytes from start_lsn to end_lsn
-        let mut buf = vec![0; (end_lsn - start_lsn) as usize];
-        shared_state.disk.wal.lock().read(start_lsn, &mut buf);
-
-        // send bytes to the client
-        self.tcp.send(AnyMessage::Bytes(Bytes::from(buf)));
-        Ok(())
-    }
-
-    fn init_timeline(
-        &mut self,
-        ttid: TenantTimelineId,
-        server_info: ServerInfo,
-        global: &mut GlobalMap,
-    ) -> Result<()> {
-        self.ttid = ttid;
-        if global.has_tli(&ttid) {
-            return Ok(());
-        }
-
-        global.create(ttid, server_info)
-    }
-
-    fn process(&mut self, msg: ProposerAcceptorMessage, global: &mut GlobalMap) -> Result<()> {
-        if !self.greeting {
-            self.greeting = true;
-
-            match msg {
-                ProposerAcceptorMessage::Greeting(ref greeting) => {
-                    debug!(
-                        "start handshake with walproposer {:?}",
-                        self.tcp,
-                    );
-                    let server_info = ServerInfo {
-                        pg_version: greeting.pg_version,
-                        system_id: greeting.system_id,
-                        wal_seg_size: greeting.wal_seg_size,
-                    };
-                    let ttid = TenantTimelineId::new(greeting.tenant_id, greeting.timeline_id);
-                    self.init_timeline(ttid, server_info, global)?
-                }
-                _ => {
-                    bail!("unexpected message {msg:?} instead of greeting");
-                }
-            }
-        }
-
-        let tli = global.get(&self.ttid);
-
-        match msg {
-            ProposerAcceptorMessage::AppendRequest(append_request) => {
-                self.flush_pending = true;
-                self.process_sk_msg(
-                    tli,
-                    &ProposerAcceptorMessage::NoFlushAppendRequest(append_request),
-                )?;
-            }
-            other => {
-                self.process_sk_msg(tli, &other)?;
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Process FlushWAL if needed.
-    // TODO: add extra flushes, to verify that extra flushes don't break anything
-    fn flush(&mut self, global: &mut GlobalMap) -> Result<()> {
-        if !self.flush_pending {
-            return Ok(());
-        }
-        self.flush_pending = false;
-        let shared_state = global.get(&self.ttid);
-        self.process_sk_msg(shared_state, &ProposerAcceptorMessage::FlushWAL)
-    }
-
-    /// Make safekeeper process a message and send a reply to the TCP
-    fn process_sk_msg(
-        &mut self,
-        shared_state: &mut SharedState,
-        msg: &ProposerAcceptorMessage,
-    ) -> Result<()> {
-        let mut reply = shared_state.sk.process_msg(msg)?;
-        if let Some(reply) = &mut reply {
-            // // if this is AppendResponse, fill in proper hot standby feedback and disk consistent lsn
-            // if let AcceptorProposerMessage::AppendResponse(ref mut resp) = reply {
-            //     // TODO:
-            // }
-
-            let mut buf = BytesMut::with_capacity(128);
-            reply.serialize(&mut buf)?;
-
-            self.tcp.send(AnyMessage::Bytes(buf.into()));
-        }
-        Ok(())
-    }
-}
-
-impl Drop for ConnState {
-    fn drop(&mut self) {
-        debug!("dropping conn: {:?}", self.tcp);
-        if !std::thread::panicking() {
-            self.tcp.close();
-        }
-        // TODO: clean up non-fsynced WAL
-    }
-}
--- a/libs/walproposer/src/simtest/simple_client.rs
+++ b/libs/walproposer/src/simtest/simple_client.rs
@@ -1,38 +0,0 @@
-use std::sync::Arc;
-
-use safekeeper::{
-    simlib::{
-        network::{Delay, NetworkOptions},
-        world::World,
-    },
-    simtest::{start_simulation, Options},
-};
-
-use crate::{bindings::RunClientC, c_context};
-
-#[test]
-fn run_rust_c_test() {
-    let delay = Delay {
-        min: 1,
-        max: 5,
-        fail_prob: 0.5,
-    };
-
-    let network = NetworkOptions {
-        keepalive_timeout: Some(50),
-        connect_delay: delay.clone(),
-        send_delay: delay.clone(),
-    };
-
-    let u32_data: [u32; 5] = [1, 2, 3, 4, 5];
-
-    let world = Arc::new(World::new(1337, Arc::new(network), c_context()));
-    start_simulation(Options {
-        world,
-        time_limit: 1_000_000,
-        client_fn: Box::new(move |_, server_id| unsafe {
-            RunClientC(server_id);
-        }),
-        u32_data,
-    });
-}
--- a/libs/walproposer/src/simtest/storage.rs
+++ b/libs/walproposer/src/simtest/storage.rs
@@ -1,234 +0,0 @@
-use std::{ops::Deref, sync::Arc};
-
-use anyhow::Result;
-use bytes::{Buf, BytesMut};
-use log::{debug, info};
-use postgres_ffi::{waldecoder::WalStreamDecoder, XLogSegNo};
-use safekeeper::{control_file, safekeeper::SafeKeeperState, wal_storage};
-use utils::lsn::Lsn;
-
-use super::disk::TimelineDisk;
-
-pub struct DiskStateStorage {
-    persisted_state: SafeKeeperState,
-    disk: Arc<TimelineDisk>,
-}
-
-impl DiskStateStorage {
-    pub fn new(disk: Arc<TimelineDisk>) -> Self {
-        let guard = disk.state.lock();
-        let state = guard.clone();
-        drop(guard);
-        DiskStateStorage {
-            persisted_state: state,
-            disk,
-        }
-    }
-}
-
-impl control_file::Storage for DiskStateStorage {
-    fn persist(&mut self, s: &SafeKeeperState) -> Result<()> {
-        self.persisted_state = s.clone();
-        *self.disk.state.lock() = s.clone();
-        Ok(())
-    }
-}
-
-impl Deref for DiskStateStorage {
-    type Target = SafeKeeperState;
-
-    fn deref(&self) -> &Self::Target {
-        &self.persisted_state
-    }
-}
-
-pub struct DummyWalStore {
-    lsn: Lsn,
-}
-
-impl DummyWalStore {
-    pub fn new() -> Self {
-        DummyWalStore { lsn: Lsn::INVALID }
-    }
-}
-
-impl wal_storage::Storage for DummyWalStore {
-    fn flush_lsn(&self) -> Lsn {
-        self.lsn
-    }
-
-    fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()> {
-        self.lsn = startpos + buf.len() as u64;
-        Ok(())
-    }
-
-    fn truncate_wal(&mut self, end_pos: Lsn) -> Result<()> {
-        self.lsn = end_pos;
-        Ok(())
-    }
-
-    fn flush_wal(&mut self) -> Result<()> {
-        Ok(())
-    }
-
-    fn remove_up_to(&self) -> Box<dyn Fn(XLogSegNo) -> Result<()>> {
-        Box::new(move |_segno_up_to: XLogSegNo| Ok(()))
-    }
-
-    fn get_metrics(&self) -> safekeeper::metrics::WalStorageMetrics {
-        safekeeper::metrics::WalStorageMetrics::default()
-    }
-}
-
-pub struct DiskWALStorage {
-    /// Written to disk, but possibly still in the cache and not fully persisted.
-    /// Also can be ahead of record_lsn, if happen to be in the middle of a WAL record.
-    write_lsn: Lsn,
-
-    /// The LSN of the last WAL record written to disk. Still can be not fully flushed.
-    write_record_lsn: Lsn,
-
-    /// The LSN of the last WAL record flushed to disk.
-    flush_record_lsn: Lsn,
-
-    /// Decoder is required for detecting boundaries of WAL records.
-    decoder: WalStreamDecoder,
-
-    unflushed_bytes: BytesMut,
-
-    disk: Arc<TimelineDisk>,
-}
-
-impl DiskWALStorage {
-    pub fn new(disk: Arc<TimelineDisk>, state: &SafeKeeperState) -> Result<Self> {
-        let write_lsn = if state.commit_lsn == Lsn(0) {
-            Lsn(0)
-        } else {
-            Self::find_end_of_wal(disk.clone(), state.commit_lsn)?
-        };
-
-        let flush_lsn = write_lsn;
-        Ok(DiskWALStorage {
-            write_lsn,
-            write_record_lsn: flush_lsn,
-            flush_record_lsn: flush_lsn,
-            decoder: WalStreamDecoder::new(flush_lsn, 15),
-            unflushed_bytes: BytesMut::new(),
-            disk,
-        })
-    }
-
-    fn find_end_of_wal(disk: Arc<TimelineDisk>, start_lsn: Lsn) -> Result<Lsn> {
-        let mut buf = [0; 8192];
-        let mut pos = start_lsn.0;
-        let mut decoder = WalStreamDecoder::new(start_lsn, 15);
-        let mut result = start_lsn;
-        loop {
-            disk.wal.lock().read(pos, &mut buf);
-            pos += buf.len() as u64;
-            decoder.feed_bytes(&buf);
-
-            loop {
-                match decoder.poll_decode() {
-                    Ok(Some(record)) => result = record.0,
-                    Err(e) => {
-                        debug!(
-                            "find_end_of_wal reached end at {:?}, decode error: {:?}",
-                            result, e
-                        );
-                        return Ok(result);
-                    }
-                    Ok(None) => break, // need more data
-                }
-            }
-        }
-    }
-}
-
-impl wal_storage::Storage for DiskWALStorage {
-    fn flush_lsn(&self) -> Lsn {
-        self.flush_record_lsn
-    }
-
-    fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()> {
-        if self.write_lsn != startpos {
-            panic!("write_wal called with wrong startpos");
-        }
-
-        self.unflushed_bytes.extend_from_slice(buf);
-        self.write_lsn += buf.len() as u64;
-
-        if self.decoder.available() != startpos {
-            info!(
-                "restart decoder from {} to {}",
-                self.decoder.available(),
-                startpos,
-            );
-            self.decoder = WalStreamDecoder::new(startpos, 15);
-        }
-        self.decoder.feed_bytes(buf);
-        loop {
-            match self.decoder.poll_decode()? {
-                None => break, // no full record yet
-                Some((lsn, _rec)) => {
-                    self.write_record_lsn = lsn;
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    fn truncate_wal(&mut self, end_pos: Lsn) -> Result<()> {
-        if self.write_lsn != Lsn(0) && end_pos > self.write_lsn {
-            panic!(
-                "truncate_wal called on non-written WAL, write_lsn={}, end_pos={}",
-                self.write_lsn, end_pos
-            );
-        }
-
-        self.flush_wal()?;
-
-        // write zeroes to disk from end_pos until self.write_lsn
-        let buf = [0; 8192];
-        let mut pos = end_pos.0;
-        while pos < self.write_lsn.0 {
-            self.disk.wal.lock().write(pos, &buf);
-            pos += buf.len() as u64;
-        }
-
-        self.write_lsn = end_pos;
-        self.write_record_lsn = end_pos;
-        self.flush_record_lsn = end_pos;
-        self.unflushed_bytes.clear();
-        self.decoder = WalStreamDecoder::new(end_pos, 15);
-
-        Ok(())
-    }
-
-    fn flush_wal(&mut self) -> Result<()> {
-        if self.flush_record_lsn == self.write_record_lsn {
-            // no need to do extra flush
-            return Ok(());
-        }
-
-        let num_bytes = self.write_record_lsn.0 - self.flush_record_lsn.0;
-
-        self.disk.wal.lock().write(
-            self.flush_record_lsn.0,
-            &self.unflushed_bytes[..num_bytes as usize],
-        );
-        self.unflushed_bytes.advance(num_bytes as usize);
-        self.flush_record_lsn = self.write_record_lsn;
-
-        Ok(())
-    }
-
-    fn remove_up_to(&self) -> Box<dyn Fn(XLogSegNo) -> Result<()>> {
-        Box::new(move |_segno_up_to: XLogSegNo| Ok(()))
-    }
-
-    fn get_metrics(&self) -> safekeeper::metrics::WalStorageMetrics {
-        safekeeper::metrics::WalStorageMetrics::default()
-    }
-}
--- a/libs/walproposer/src/simtest/util.rs
+++ b/libs/walproposer/src/simtest/util.rs
@@ -1,610 +0,0 @@
-use std::{ffi::CString, path::Path, str::FromStr, sync::Arc, collections::HashMap};
-
-use rand::{Rng, SeedableRng};
-use safekeeper::simlib::{
-    network::{Delay, NetworkOptions},
-    proto::AnyMessage,
-    time::EmptyEvent,
-    world::World,
-    world::{Node, NodeEvent, SEvent, NodeId},
-};
-use tracing::{debug, error, info, warn};
-use utils::{id::TenantTimelineId, lsn::Lsn};
-
-use crate::{
-    bindings::{
-        neon_tenant_walproposer, neon_timeline_walproposer, sim_redo_start_lsn, syncSafekeepers,
-        wal_acceptor_connection_timeout, wal_acceptor_reconnect_timeout, wal_acceptors_list,
-        MyInsertRecord, WalProposerCleanup, WalProposerRust,
-    },
-    c_context,
-    simtest::{
-        log::{init_logger, SimClock},
-        safekeeper::run_server,
-    },
-};
-
-use super::disk::Disk;
-
-pub struct SkNode {
-    pub node: Arc<Node>,
-    pub id: u32,
-    pub disk: Arc<Disk>,
-}
-
-impl SkNode {
-    pub fn new(node: Arc<Node>) -> Self {
-        let disk = Arc::new(Disk::new());
-        let res = Self {
-            id: node.id,
-            node,
-            disk,
-        };
-        res.launch();
-        res
-    }
-
-    pub fn launch(&self) {
-        let id = self.id;
-        let disk = self.disk.clone();
-        // start the server thread
-        self.node.launch(move |os| {
-            let res = run_server(os, disk);
-            debug!("server {} finished: {:?}", id, res);
-        });
-    }
-
-    pub fn restart(&self) {
-        self.node.crash_stop();
-        self.launch();
-    }
-}
-
-pub struct TestConfig {
-    pub network: NetworkOptions,
-    pub timeout: u64,
-    pub clock: Option<SimClock>,
-}
-
-impl TestConfig {
-    pub fn new(clock: Option<SimClock>) -> Self {
-        Self {
-            network: NetworkOptions {
-                keepalive_timeout: Some(2000),
-                connect_delay: Delay {
-                    min: 1,
-                    max: 5,
-                    fail_prob: 0.0,
-                },
-                send_delay: Delay {
-                    min: 1,
-                    max: 5,
-                    fail_prob: 0.0,
-                },
-            },
-            timeout: 1_000 * 10,
-            clock,
-        }
-    }
-
-    pub fn start(&self, seed: u64) -> Test {
-        let world = Arc::new(World::new(
-            seed,
-            Arc::new(self.network.clone()),
-            c_context(),
-        ));
-        world.register_world();
-
-        if let Some(clock) = &self.clock {
-            clock.set_world(world.clone());
-        }
-
-        let servers = [
-            SkNode::new(world.new_node()),
-            SkNode::new(world.new_node()),
-            SkNode::new(world.new_node()),
-        ];
-
-        let server_ids = [servers[0].id, servers[1].id, servers[2].id];
-
-        let safekeepers_guc = server_ids.map(|id| format!("node:{}", id)).join(",");
-        let ttid = TenantTimelineId::generate();
-
-        // wait init for all servers
-        world.await_all();
-
-        // clean up pgdata directory
-        self.init_pgdata();
-
-        Test {
-            world,
-            servers,
-            safekeepers_guc,
-            ttid,
-            timeout: self.timeout,
-        }
-    }
-
-    pub fn init_pgdata(&self) {
-        let pgdata = Path::new("/home/admin/simulator/libs/walproposer/pgdata");
-        if pgdata.exists() {
-            std::fs::remove_dir_all(pgdata).unwrap();
-        }
-        std::fs::create_dir(pgdata).unwrap();
-
-        // create empty pg_wal and pg_notify subdirs
-        std::fs::create_dir(pgdata.join("pg_wal")).unwrap();
-        std::fs::create_dir(pgdata.join("pg_notify")).unwrap();
-
-        // write postgresql.conf
-        let mut conf = std::fs::File::create(pgdata.join("postgresql.conf")).unwrap();
-        let content = "
-wal_log_hints=off
-hot_standby=on
-fsync=off
-wal_level=replica
-restart_after_crash=off
-shared_preload_libraries=neon
-neon.pageserver_connstring=''
-neon.tenant_id=cc6e67313d57283bad411600fbf5c142
-neon.timeline_id=de6fa815c1e45aa61491c3d34c4eb33e
-synchronous_standby_names=walproposer
-neon.safekeepers='node:1,node:2,node:3'
-max_connections=100
-";
-
-        std::io::Write::write_all(&mut conf, content.as_bytes()).unwrap();
-    }
-}
-
-pub struct Test {
-    pub world: Arc<World>,
-    pub servers: [SkNode; 3],
-    pub safekeepers_guc: String,
-    pub ttid: TenantTimelineId,
-    pub timeout: u64,
-}
-
-impl Test {
-    fn launch_sync(&self) -> Arc<Node> {
-        let client_node = self.world.new_node();
-        debug!("sync-safekeepers started at node {}", client_node.id);
-
-        // start the client thread
-        let guc = self.safekeepers_guc.clone();
-        let ttid = self.ttid.clone();
-        client_node.launch(move |_| {
-            let list = CString::new(guc).unwrap();
-
-            unsafe {
-                WalProposerCleanup();
-
-                syncSafekeepers = true;
-                wal_acceptors_list = list.into_raw();
-                wal_acceptor_reconnect_timeout = 1000;
-                wal_acceptor_connection_timeout = 5000;
-                neon_tenant_walproposer =
-                    CString::new(ttid.tenant_id.to_string()).unwrap().into_raw();
-                neon_timeline_walproposer = CString::new(ttid.timeline_id.to_string())
-                    .unwrap()
-                    .into_raw();
-                WalProposerRust();
-            }
-        });
-
-        self.world.await_all();
-
-        client_node
-    }
-
-    pub fn sync_safekeepers(&self) -> anyhow::Result<Lsn> {
-        let client_node = self.launch_sync();
-
-        // poll until exit or timeout
-        let time_limit = self.timeout;
-        while self.world.step() && self.world.now() < time_limit && !client_node.is_finished() {}
-
-        if !client_node.is_finished() {
-            anyhow::bail!("timeout or idle stuck");
-        }
-
-        let res = client_node.result.lock().clone();
-        if res.0 != 0 {
-            anyhow::bail!("non-zero exitcode: {:?}", res);
-        }
-        let lsn = Lsn::from_str(&res.1)?;
-        Ok(lsn)
-    }
-
-    pub fn launch_walproposer(&self, lsn: Lsn) -> WalProposer {
-        let client_node = self.world.new_node();
-
-        let lsn = if lsn.0 == 0 {
-            // usual LSN after basebackup
-            Lsn(21623024)
-        } else {
-            lsn
-        };
-
-        // start the client thread
-        let guc = self.safekeepers_guc.clone();
-        let ttid = self.ttid.clone();
-        client_node.launch(move |_| {
-            let list = CString::new(guc).unwrap();
-
-            unsafe {
-                WalProposerCleanup();
-
-                sim_redo_start_lsn = lsn.0;
-                syncSafekeepers = false;
-                wal_acceptors_list = list.into_raw();
-                wal_acceptor_reconnect_timeout = 1000;
-                wal_acceptor_connection_timeout = 5000;
-                neon_tenant_walproposer =
-                    CString::new(ttid.tenant_id.to_string()).unwrap().into_raw();
-                neon_timeline_walproposer = CString::new(ttid.timeline_id.to_string())
-                    .unwrap()
-                    .into_raw();
-                WalProposerRust();
-            }
-        });
-
-        self.world.await_all();
-
-        WalProposer {
-            node: client_node,
-        }
-    }
-
-    pub fn poll_for_duration(&self, duration: u64) {
-        let time_limit = std::cmp::min(self.world.now() + duration, self.timeout);
-        while self.world.step() && self.world.now() < time_limit {}
-    }
-
-    pub fn run_schedule(&self, schedule: &Schedule) -> anyhow::Result<()> {
-        {
-            let empty_event = Box::new(EmptyEvent);
-
-            let now = self.world.now();
-            for (time, _) in schedule {
-                if *time < now {
-                    continue;
-                }
-                self.world.schedule(*time - now, empty_event.clone())
-            }
-        }
-
-        let mut wait_node = self.launch_sync();
-        // fake walproposer
-        let mut wp = WalProposer {
-            node: wait_node.clone(),
-        };
-        let mut sync_in_progress = true;
-
-        let mut skipped_tx = 0;
-        let mut started_tx = 0;
-
-        let mut schedule_ptr = 0;
-
-        loop {
-            if sync_in_progress && wait_node.is_finished() {
-                let res = wait_node.result.lock().clone();
-                if res.0 != 0 {
-                    warn!("sync non-zero exitcode: {:?}", res);
-                    debug!("restarting walproposer");
-                    wait_node = self.launch_sync();
-                    continue;
-                }
-                let lsn = Lsn::from_str(&res.1)?;
-                debug!("sync-safekeepers finished at LSN {}", lsn);
-                wp = self.launch_walproposer(lsn);
-                wait_node = wp.node.clone();
-                debug!("walproposer started at node {}", wait_node.id);
-                sync_in_progress = false;
-            }
-
-            let now = self.world.now();
-            while schedule_ptr < schedule.len() && schedule[schedule_ptr].0 <= now {
-                if now != schedule[schedule_ptr].0 {
-                    warn!("skipped event {:?} at {}", schedule[schedule_ptr], now);
-                }
-
-                let action = &schedule[schedule_ptr].1;
-                match action {
-                    TestAction::WriteTx(size) => {
-                        if !sync_in_progress && !wait_node.is_finished() {
-                            started_tx += *size;
-                            wp.write_tx(*size);
-                            debug!("written {} transactions", size);
-                        } else {
-                            skipped_tx += size;
-                            debug!("skipped {} transactions", size);
-                        }
-                    }
-                    TestAction::RestartSafekeeper(id) => {
-                        debug!("restarting safekeeper {}", id);
-                        self.servers[*id as usize].restart();
-                    }
-                    TestAction::RestartWalProposer => {
-                        debug!("restarting walproposer");
-                        wait_node.crash_stop();
-                        sync_in_progress = true;
-                        wait_node = self.launch_sync();
-                    }
-                }
-                schedule_ptr += 1;
-            }
-
-            if schedule_ptr == schedule.len() {
-                break;
-            }
-            let next_event_time = schedule[schedule_ptr].0;
-
-            // poll until the next event
-            if wait_node.is_finished() {
-                while self.world.step() && self.world.now() < next_event_time {}
-            } else {
-                while self.world.step()
-                    && self.world.now() < next_event_time
-                    && !wait_node.is_finished()
-                {}
-            }
-        }
-
-        debug!("finished schedule");
-        debug!("skipped_tx: {}", skipped_tx);
-        debug!("started_tx: {}", started_tx);
-
-        Ok(())
-    }
-}
-
-pub struct WalProposer {
-    pub node: Arc<Node>,
-}
-
-impl WalProposer {
-    pub fn write_tx(&mut self, cnt: usize) {
-        self.node
-            .network_chan()
-            .send(NodeEvent::Internal(AnyMessage::Just32(cnt as u32)));
-    }
-
-    pub fn stop(&self) {
-        self.node.crash_stop();
-    }
-}
-
-#[derive(Debug, Clone)]
-pub enum TestAction {
-    WriteTx(usize),
-    RestartSafekeeper(usize),
-    RestartWalProposer,
-}
-
-pub type Schedule = Vec<(u64, TestAction)>;
-
-pub fn generate_schedule(seed: u64) -> Schedule {
-    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
-    let mut schedule = Vec::new();
-    let mut time = 0;
-
-    let cnt = rng.gen_range(1..100);
-
-    for _ in 0..cnt {
-        time += rng.gen_range(0..500);
-        let action = match rng.gen_range(0..3) {
-            0 => TestAction::WriteTx(rng.gen_range(1..10)),
-            1 => TestAction::RestartSafekeeper(rng.gen_range(0..3)),
-            2 => TestAction::RestartWalProposer,
-            _ => unreachable!(),
-        };
-        schedule.push((time, action));
-    }
-
-    schedule
-}
-
-pub fn generate_network_opts(seed: u64) -> NetworkOptions {
-    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);
-
-    let timeout = rng.gen_range(100..2000);
-    let max_delay = rng.gen_range(1..2*timeout);
-    let min_delay = rng.gen_range(1..=max_delay);
-
-    let max_fail_prob = rng.gen_range(0.0..0.9);
-    let connect_fail_prob = rng.gen_range(0.0..max_fail_prob);
-    let send_fail_prob = rng.gen_range(0.0..connect_fail_prob);
-
-    NetworkOptions {
-        keepalive_timeout: Some(timeout),
-        connect_delay: Delay {
-            min: min_delay,
-            max: max_delay,
-            fail_prob: connect_fail_prob,
-        },
-        send_delay: Delay {
-            min: min_delay,
-            max: max_delay,
-            fail_prob: send_fail_prob,
-        },
-    }
-}
-
-#[derive(Debug,Clone,PartialEq,Eq)]
-enum NodeKind {
-    Unknown,
-    Safekeeper,
-    WalProposer,
-}
-
-impl Default for NodeKind {
-    fn default() -> Self {
-        Self::Unknown
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-struct NodeInfo {
-    kind: NodeKind,
-
-    // walproposer
-    is_sync: bool,
-    term: u64,
-    epoch_lsn: u64,
-
-    // safekeeper
-    commit_lsn: u64,
-    flush_lsn: u64,
-}
-
-impl NodeInfo {
-    fn init_kind(&mut self, kind: NodeKind) {
-        if self.kind == NodeKind::Unknown {
-            self.kind = kind;
-        } else {
-            assert!(self.kind == kind);
-        }
-    }
-
-    fn started(&mut self, data: &str) {
-        let mut parts = data.split(';');
-        assert!(parts.next().unwrap() == "started");
-        match parts.next().unwrap() {
-            "safekeeper" => {
-                self.init_kind(NodeKind::Safekeeper);
-            }
-            "walproposer" => {
-                self.init_kind(NodeKind::WalProposer);
-                let is_sync: u8 = parts.next().unwrap().parse().unwrap();
-                self.is_sync = is_sync != 0;
-            }
-            _ => unreachable!(),
-        }
-    }
-}
-
-#[derive(Debug,Default)]
-struct GlobalState {
-    nodes: Vec<NodeInfo>,
-    commit_lsn: u64,
-    write_lsn: u64,
-    max_write_lsn: u64,
-
-    written_wal: u64,
-    written_records: u64,
-}
-
-impl GlobalState {
-    fn new() -> Self {
-        Default::default()
-    }
-
-    fn get(&mut self, id: u32) -> &mut NodeInfo {
-        let id = id as usize;
-        if id >= self.nodes.len() {
-            self.nodes.resize(id + 1, NodeInfo::default());
-        }
-        &mut self.nodes[id]
-    }
-}
-
-pub fn validate_events(events: Vec<SEvent>) {
-    const INITDB_LSN: u64 = 21623024;
-
-    let hook = std::panic::take_hook();
-    scopeguard::defer_on_success! {
-        std::panic::set_hook(hook);
-    };
-
-    let mut state = GlobalState::new();
-    state.max_write_lsn = INITDB_LSN;
-
-    for event in events {
-        debug!("{:?}", event);
-
-        let node = state.get(event.node);
-        if event.data.starts_with("started;") {
-            node.started(&event.data);
-            continue;
-        }
-        assert!(node.kind != NodeKind::Unknown);
-
-        // drop reference to unlock state
-        let mut node = node.clone();
-
-        let mut parts = event.data.split(';');
-        match node.kind {
-            NodeKind::Safekeeper => {
-                match parts.next().unwrap() {
-                    "tli_loaded" => {
-                        let flush_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        let commit_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        node.flush_lsn = flush_lsn;
-                        node.commit_lsn = commit_lsn;
-                    }
-                    _ => unreachable!(),
-                }
-            }
-            NodeKind::WalProposer => {
-                match parts.next().unwrap() {
-                    "prop_elected" => {
-                        let prop_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        let prop_term: u64 = parts.next().unwrap().parse().unwrap();
-                        let prev_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        let prev_term: u64 = parts.next().unwrap().parse().unwrap();
-
-                        assert!(prop_lsn >= prev_lsn);
-                        assert!(prop_term >= prev_term);
-
-                        assert!(prop_lsn >= state.commit_lsn);
-
-                        if prop_lsn > state.write_lsn {
-                            assert!(prop_lsn <= state.max_write_lsn);
-                            debug!("moving write_lsn up from {} to {}", state.write_lsn, prop_lsn);
-                            state.write_lsn = prop_lsn;
-                        }
-                        if prop_lsn < state.write_lsn {
-                            debug!("moving write_lsn down from {} to {}", state.write_lsn, prop_lsn);
-                            state.write_lsn = prop_lsn;
-                        }
-
-                        node.epoch_lsn = prop_lsn;
-                        node.term = prop_term;
-                    }
-                    "write_wal" => {
-                        assert!(!node.is_sync);
-                        let start_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        let end_lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        let cnt: u64 = parts.next().unwrap().parse().unwrap();
-
-                        let size = end_lsn - start_lsn;
-                        state.written_wal += size;
-                        state.written_records += cnt;
-
-                        // TODO: If we allow writing WAL before winning the election
-
-                        assert!(start_lsn >= state.commit_lsn);
-                        assert!(end_lsn >= start_lsn);
-                        assert!(start_lsn == state.write_lsn);
-                        state.write_lsn = end_lsn;
-
-                        if end_lsn > state.max_write_lsn {
-                            state.max_write_lsn = end_lsn;
-                        }
-                    }
-                    "commit_lsn" => {
-                        let lsn: u64 = parts.next().unwrap().parse().unwrap();
-                        assert!(lsn >= state.commit_lsn);
-                        state.commit_lsn = lsn;
-                    }
-                    _ => unreachable!(),
-                }
-            }
-            _ => unreachable!(),
-        }
-
-        // update the node in the state struct
-        *state.get(event.node) = node;
-    }
-}
--- a/libs/walproposer/src/simtest/wp_sk.rs
+++ b/libs/walproposer/src/simtest/wp_sk.rs
@@ -1,265 +0,0 @@
-use std::{ffi::CString, path::Path, str::FromStr, sync::Arc};
-
-use rand::Rng;
-use safekeeper::simlib::{
-    network::{Delay, NetworkOptions},
-    proto::AnyMessage,
-    world::World,
-    world::{Node, NodeEvent},
-};
-use tracing::{info, warn};
-use utils::{id::TenantTimelineId, lsn::Lsn};
-
-use crate::{
-    bindings::{
-        neon_tenant_walproposer, neon_timeline_walproposer, sim_redo_start_lsn, syncSafekeepers,
-        wal_acceptor_connection_timeout, wal_acceptor_reconnect_timeout, wal_acceptors_list,
-        MyInsertRecord, WalProposerCleanup, WalProposerRust,
-    },
-    c_context,
-    simtest::{
-        log::{init_logger, SimClock},
-        safekeeper::run_server,
-        util::{generate_schedule, TestConfig, generate_network_opts, validate_events},
-    }, enable_debug,
-};
-
-use super::{
-    disk::Disk,
-    util::{Schedule, TestAction},
-};
-
-#[test]
-fn sync_empty_safekeepers() {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    let test = config.start(1337);
-
-    let lsn = test.sync_safekeepers().unwrap();
-    assert_eq!(lsn, Lsn(0));
-    info!("Sucessfully synced empty safekeepers at 0/0");
-
-    let lsn = test.sync_safekeepers().unwrap();
-    assert_eq!(lsn, Lsn(0));
-    info!("Sucessfully synced (again) empty safekeepers at 0/0");
-}
-
-#[test]
-fn run_walproposer_generate_wal() {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    // config.network.timeout = Some(250);
-    let test = config.start(1337);
-
-    let lsn = test.sync_safekeepers().unwrap();
-    assert_eq!(lsn, Lsn(0));
-    info!("Sucessfully synced empty safekeepers at 0/0");
-
-    let mut wp = test.launch_walproposer(lsn);
-
-    test.poll_for_duration(30);
-
-    for i in 0..100 {
-        wp.write_tx(1);
-        test.poll_for_duration(5);
-    }
-}
-
-#[test]
-fn crash_safekeeper() {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    // config.network.timeout = Some(250);
-    let test = config.start(1337);
-
-    let lsn = test.sync_safekeepers().unwrap();
-    assert_eq!(lsn, Lsn(0));
-    info!("Sucessfully synced empty safekeepers at 0/0");
-
-    let mut wp = test.launch_walproposer(lsn);
-
-    test.poll_for_duration(30);
-
-    wp.write_tx(3);
-
-    test.servers[0].restart();
-
-    test.poll_for_duration(100);
-    test.poll_for_duration(1000);
-}
-
-#[test]
-fn test_simple_restart() {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    // config.network.timeout = Some(250);
-    let test = config.start(1337);
-
-    let lsn = test.sync_safekeepers().unwrap();
-    assert_eq!(lsn, Lsn(0));
-    info!("Sucessfully synced empty safekeepers at 0/0");
-
-    let mut wp = test.launch_walproposer(lsn);
-
-    test.poll_for_duration(30);
-
-    wp.write_tx(3);
-    test.poll_for_duration(100);
-
-    wp.stop();
-    drop(wp);
-
-    let lsn = test.sync_safekeepers().unwrap();
-    info!("Sucessfully synced safekeepers at {}", lsn);
-}
-
-#[test]
-fn test_simple_schedule() -> anyhow::Result<()> {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    config.network.keepalive_timeout = Some(100);
-    let test = config.start(1337);
-
-    let schedule: Schedule = vec![
-        (0, TestAction::RestartWalProposer),
-        (50, TestAction::WriteTx(5)),
-        (100, TestAction::RestartSafekeeper(0)),
-        (100, TestAction::WriteTx(5)),
-        (110, TestAction::RestartSafekeeper(1)),
-        (110, TestAction::WriteTx(5)),
-        (120, TestAction::RestartSafekeeper(2)),
-        (120, TestAction::WriteTx(5)),
-        (201, TestAction::RestartWalProposer),
-        (251, TestAction::RestartSafekeeper(0)),
-        (251, TestAction::RestartSafekeeper(1)),
-        (251, TestAction::RestartSafekeeper(2)),
-        (251, TestAction::WriteTx(5)),
-        (255, TestAction::WriteTx(5)),
-        (1000, TestAction::WriteTx(5)),
-    ];
-
-    test.run_schedule(&schedule)?;
-    info!("Test finished, stopping all threads");
-    test.world.deallocate();
-
-    Ok(())
-}
-
-#[test]
-fn test_many_tx() -> anyhow::Result<()> {
-    enable_debug();
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    let test = config.start(1337);
-
-    let mut schedule: Schedule = vec![];
-    for i in 0..100 {
-        schedule.push((i * 10, TestAction::WriteTx(10)));
-    }
-
-    test.run_schedule(&schedule)?;
-    info!("Test finished, stopping all threads");
-    test.world.stop_all();
-
-    let events = test.world.take_events();
-    info!("Events: {:?}", events);
-    let last_commit_lsn = events
-        .iter()
-        .filter_map(|event| {
-            if event.data.starts_with("commit_lsn;") {
-                let lsn: u64 = event.data.split(';').nth(1).unwrap().parse().unwrap();
-                return Some(lsn);
-            }
-            None
-        })
-        .last()
-        .unwrap();
-
-    let initdb_lsn = 21623024;
-    let diff = last_commit_lsn - initdb_lsn;
-    info!("Last commit lsn: {}, diff: {}", last_commit_lsn, diff);
-    assert!(diff > 1000 * 8);
-    Ok(())
-}
-
-#[test]
-fn test_random_schedules() -> anyhow::Result<()> {
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    config.network.keepalive_timeout = Some(100);
-
-    for i in 0..30000 {
-        let seed: u64 = rand::thread_rng().gen();
-        config.network = generate_network_opts(seed);
-
-        let test = config.start(seed);
-        warn!("Running test with seed {}", seed);
-
-        let schedule = generate_schedule(seed);
-        test.run_schedule(&schedule).unwrap();
-        validate_events(test.world.take_events());
-        test.world.deallocate();
-    }
-
-    Ok(())
-}
-
-#[test]
-fn test_one_schedule() -> anyhow::Result<()> {
-    enable_debug();
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-    config.network.keepalive_timeout = Some(100);
-
-    // let seed = 6762900106769428342;
-    // let test = config.start(seed);
-    // warn!("Running test with seed {}", seed);
-
-    // let schedule = generate_schedule(seed);
-    // info!("schedule: {:?}", schedule);
-    // test.run_schedule(&schedule)?;
-    // test.world.deallocate();
-
-    let seed = 3649773280641776194;
-    config.network = generate_network_opts(seed);
-    info!("network: {:?}", config.network);
-    let test = config.start(seed);
-    warn!("Running test with seed {}", seed);
-
-    let schedule = generate_schedule(seed);
-    info!("schedule: {:?}", schedule);
-    test.run_schedule(&schedule).unwrap();
-    validate_events(test.world.take_events());
-    test.world.deallocate();
-
-    Ok(())
-}
-
-#[test]
-fn test_res_dealloc() -> anyhow::Result<()> {
-    // enable_debug();
-    let clock = init_logger();
-    let mut config = TestConfig::new(Some(clock));
-
-    // print pid
-    let pid = unsafe { libc::getpid() };
-    info!("pid: {}", pid);
-
-    let seed = 123456;
-    config.network = generate_network_opts(seed);
-    let test = config.start(seed);
-    warn!("Running test with seed {}", seed);
-
-    let schedule = generate_schedule(seed);
-    info!("schedule: {:?}", schedule);
-    test.run_schedule(&schedule).unwrap();
-    test.world.stop_all();
-
-    let world = test.world.clone();
-    drop(test);
-    info!("world strong count: {}", Arc::strong_count(&world));
-    world.deallocate();
-    info!("world strong count: {}", Arc::strong_count(&world));
-
-    Ok(())
-}
--- a/libs/walproposer/src/test.rs
+++ b/libs/walproposer/src/test.rs
@@ -1,31 +0,0 @@
-use tracing::info;
-
-use crate::bindings::{TestFunc, MyContextInit};
-
-#[test]
-fn test_rust_c_calls() {
-    let res = std::thread::spawn(|| {
-        let res = unsafe {
-            MyContextInit();
-            TestFunc(1, 2)
-        };
-        res
-    }).join().unwrap();
-    info!("res: {}", res);
-}
-
-#[test]
-fn test_sim_bindings() {
-    std::thread::spawn(|| {
-        unsafe {
-            MyContextInit();
-            TestFunc(1, 2)
-        }
-    }).join().unwrap();
-    std::thread::spawn(|| {
-        unsafe {
-            MyContextInit();
-            TestFunc(1, 2)
-        }
-    }).join().unwrap();
-}
--- a/libs/walproposer/test.c
+++ b/libs/walproposer/test.c
@@ -1,100 +0,0 @@
-#include "bindgen_deps.h"
-#include "rust_bindings.h"
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include "postgres.h"
-#include "utils/memutils.h"
-#include "utils/guc.h"
-#include "miscadmin.h"
-#include "common/pg_prng.h"
-
-// From src/backend/main/main.c
-const char *progname = "fakepostgres";
-
-int TestFunc(int a, int b) {
-    printf("TestFunc: %d + %d = %d\n", a, b, a + b);
-    rust_function(0);
-    elog(LOG, "postgres elog test");
-    printf("After rust_function\n");
-    return a + b;
-}
-
-// This is a quick experiment with rewriting existing Rust code in C.
-void RunClientC(uint32_t serverId) {
-    uint32_t clientId = sim_id();
-
-    elog(LOG, "started client");
-
-    int data_len = 5;
-
-    int delivered = 0;
-    int tcp = sim_open_tcp(serverId);
-    while (delivered < data_len) {
-        sim_msg_set_repl_cell(delivered+1, clientId, delivered);
-        sim_tcp_send(tcp);
-
-        Event event = sim_epoll_rcv(-1);
-        switch (event.tag)
-        {
-        case Closed:
-            elog(LOG, "connection closed");
-            tcp = sim_open_tcp(serverId);
-            break;
-
-        case Message:
-            Assert(event.any_message == Just32);
-            uint32_t val;
-            sim_msg_get_just_u32(&val);
-            if (val == delivered + 1) {
-                delivered += 1;
-            }
-            break;
-
-        default:
-            Assert(false);
-        }
-    }
-}
-
-bool debug_enabled = false;
-
-bool initializedMemoryContext = false;
-// pthread_mutex_init(&lock, NULL)?
-pthread_mutex_t lock;
-
-void MyContextInit() {
-    // initializes global variables, TODO how to make them thread-local?
-    pthread_mutex_lock(&lock);
-    if (!initializedMemoryContext) {
-        initializedMemoryContext = true;
-        MemoryContextInit();
-        pg_prng_seed(&pg_global_prng_state, 0);
-
-        setenv("PGDATA", "/home/admin/simulator/libs/walproposer/pgdata", 1);
-
-        /*
-         * Set default values for command-line options.
-         */
-        InitializeGUCOptions();
-
-        /* Acquire configuration parameters */
-        if (!SelectConfigFiles(NULL, progname))
-            exit(1);
-
-        if (debug_enabled) {
-            log_min_messages = LOG;
-        } else {
-            log_min_messages = FATAL;
-        }
-        Log_line_prefix = "[%p] ";
-
-        InitializeMaxBackends();
-        ChangeToDataDir();
-        CreateSharedMemoryAndSemaphores();
-        SetInstallXLogFileSegmentActive();
-        // CreateAuxProcessResourceOwner();
-        // StartupXLOG();
-    }
-    pthread_mutex_unlock(&lock);
-}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -23,7 +23,6 @@ const_format.workspace = true
 consumption_metrics.workspace = true
 crc32c.workspace = true
 crossbeam-utils.workspace = true
-either.workspace = true
 fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
@@ -52,7 +51,7 @@ thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
-toml_edit = { workspace = true, features = [ "serde" ] }
+toml_edit.workspace = true
 tracing.workspace = true
 url.workspace = true
 walkdir.workspace = true
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -33,7 +33,6 @@ use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
 use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
-use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
 use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
@@ -191,31 +190,14 @@ where
        {
            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

-            // If full backup is requested, include all relation files.
-            // Otherwise only include init forks of unlogged relations.
-            let rels = self
-                .timeline
-                .list_rels(spcnode, dbnode, self.lsn, self.ctx)
-                .await?;
-            for &rel in rels.iter() {
-                // Send init fork as main fork to provide well formed empty
-                // contents of UNLOGGED relations. Postgres copies it in
-                // `reinit.c` during recovery.
-                if rel.forknum == INIT_FORKNUM {
-                    // I doubt we need _init fork itself, but having it at least
-                    // serves as a marker relation is unlogged.
-                    self.add_rel(rel, rel).await?;
-                    self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;
-                    continue;
-                }
-
-                if self.full_backup {
-                    if rel.forknum == MAIN_FORKNUM && rels.contains(&rel.with_forknum(INIT_FORKNUM))
-                    {
-                        // skip this, will include it when we reach the init fork
-                        continue;
-                    }
-                    self.add_rel(rel, rel).await?;
+            // Gather and send relational files in each database if full backup is requested.
+            if self.full_backup {
+                for rel in self
+                    .timeline
+                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                    .await?
+                {
+                    self.add_rel(rel).await?;
                }
            }
        }
@@ -238,16 +220,15 @@ where
        Ok(())
    }

-    /// Add contents of relfilenode `src`, naming it as `dst`.
-    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> {
+    async fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
        let nblocks = self
            .timeline
-            .get_rel_size(src, self.lsn, false, self.ctx)
+            .get_rel_size(tag, self.lsn, false, self.ctx)
            .await?;

        // If the relation is empty, create an empty file
        if nblocks == 0 {
-            let file_name = dst.to_segfile_name(0);
+            let file_name = tag.to_segfile_name(0);
            let header = new_tar_header(&file_name, 0)?;
            self.ar.append(&header, &mut io::empty()).await?;
            return Ok(());
@@ -263,12 +244,12 @@ where
            for blknum in startblk..endblk {
                let img = self
                    .timeline
-                    .get_rel_page_at_lsn(src, blknum, self.lsn, false, self.ctx)
+                    .get_rel_page_at_lsn(tag, blknum, self.lsn, false, self.ctx)
                    .await?;
                segment_data.extend_from_slice(&img[..]);
            }

-            let file_name = dst.to_segfile_name(seg as u32);
+            let file_name = tag.to_segfile_name(seg as u32);
            let header = new_tar_header(&file_name, segment_data.len() as u64)?;
            self.ar.append(&header, segment_data.as_slice()).await?;

--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -88,13 +88,6 @@ fn main() -> anyhow::Result<()> {
        }
    };

-    // Initialize logging, which must be initialized before the custom panic hook is installed.
-    logging::init(conf.log_format)?;
-
-    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
-    // disarming this hook on pageserver, because we never tear down tracing.
-    logging::replace_panic_hook_with_tracing_panic_hook().forget();
-
    // initialize sentry if SENTRY_DSN is provided
    let _sentry_guard = init_sentry(
        Some(GIT_VERSION.into()),
@@ -217,6 +210,9 @@ fn start_pageserver(
    launch_ts: &'static LaunchTimestamp,
    conf: &'static PageServerConf,
 ) -> anyhow::Result<()> {
+    // Initialize logging
+    logging::init(conf.log_format)?;
+
    // Print version and launch timestamp to the log,
    // and expose them as prometheus metrics.
    // A changed version string indicates changed software.
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -731,13 +731,6 @@ impl PageServerConf {
                })?);
        }

-        if let Some(eviction_policy) = item.get("eviction_policy") {
-            t_conf.eviction_policy = Some(
-                toml_edit::de::from_item(eviction_policy.clone())
-                    .context("parse eviction_policy")?,
-            );
-        }
-
        Ok(t_conf)
    }

--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -25,7 +25,7 @@ const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
 const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";

 #[serde_as]
-#[derive(Serialize, Debug)]
+#[derive(Serialize)]
 struct Ids {
    #[serde_as(as = "DisplayFromStr")]
    tenant_id: TenantId,
@@ -75,7 +75,7 @@ pub async fn collect_metrics(
    // define client here to reuse it for all requests
    let client = reqwest::Client::new();
    let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
-    let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();
+    let mut prev_iteration_time: Option<std::time::Instant> = None;

    loop {
        tokio::select! {
@@ -86,11 +86,11 @@ pub async fn collect_metrics(
            _ = ticker.tick() => {

                // send cached metrics every cached_metric_collection_interval
-                let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
+                let send_cached = prev_iteration_time
+                .map(|x| x.elapsed() >= cached_metric_collection_interval)
+                .unwrap_or(false);

-                if send_cached {
-                    prev_iteration_time = std::time::Instant::now();
-                }
+                prev_iteration_time = Some(std::time::Instant::now());

                collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
            }
@@ -287,12 +287,6 @@ pub async fn collect_metrics_iteration(
                    }
                } else {
                    error!("metrics endpoint refused the sent metrics: {:?}", res);
-                    for metric in chunk_to_send.iter() {
-                        // Report if the metric value is suspiciously large
-                        if metric.value > (1u64 << 40) {
-                            error!("potentially abnormal metric value: {:?}", metric);
-                        }
-                    }
                }
            }
            Err(err) => {
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -437,13 +437,6 @@ paths:
          type: boolean
        description: |
          When true, skip calculation and only provide the model inputs (for debugging). Defaults to false.
-      - name: retention_period
-        in: query
-        required: false
-        schema:
-          type: integer
-        description: |
-          Override the default retention period (in bytes) used for size calculation.
    get:
      description: |
        Calculate tenant's size, which is a mixture of WAL (bytes) and logical_size (bytes).
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -7,21 +7,19 @@ use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
 use remote_storage::GenericRemoteStorage;
-use tenant_size_model::{SizeResult, StorageModel};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};

 use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
-    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
+    TimelineCreateRequest, TimelineInfo,
 };
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::mgr::TenantMapInsertError;
-use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::{PageReconstructError, Timeline};
 use crate::{config::PageServerConf, tenant::mgr};
@@ -40,7 +38,7 @@ use utils::{

 // Imports only used for testing APIs
 #[cfg(feature = "testing")]
-use super::models::ConfigureFailpointsRequest;
+use super::models::{ConfigureFailpointsRequest, TimelineGcRequest};

 struct State {
    conf: &'static PageServerConf,
@@ -481,19 +479,11 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
 /// to debug any of the calculations. Requires `tenant_id` request parameter, supports
 /// `inputs_only=true|false` (default false) which supports debugging failure to calculate model
 /// values.
-///
-/// 'retention_period' query parameter overrides the cutoff that is used to calculate the size
-/// (only if it is shorter than the real cutoff).
-///
-/// Note: we don't update the cached size and prometheus metric here.
-/// The retention period might be different, and it's nice to have a method to just calculate it
-/// without modifying anything anyway.
 async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
+
    let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
-    let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
-    let headers = request.headers();

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
    let tenant = mgr::get_tenant(tenant_id, true)
@@ -502,29 +492,24 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A

    // this can be long operation
    let inputs = tenant
-        .gather_size_inputs(retention_period, &ctx)
+        .gather_size_inputs(&ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

-    let mut sizes = None;
-    if !inputs_only.unwrap_or(false) {
-        let storage_model = inputs
-            .calculate_model()
-            .map_err(ApiError::InternalServerError)?;
-        let size = storage_model.calculate();
+    let size = if !inputs_only.unwrap_or(false) {
+        Some(
+            tenant
+                .calc_and_update_cached_synthetic_size(&inputs)
+                .map_err(ApiError::InternalServerError)?,
+        )
+    } else {
+        None
+    };

-        // If request header expects html, return html
-        if headers["Accept"] == "text/html" {
-            return synthetic_size_html_response(inputs, storage_model, size);
-        }
-        sizes = Some(size);
-    } else if headers["Accept"] == "text/html" {
-        return Err(ApiError::BadRequest(anyhow!(
-            "inputs_only parameter is incompatible with html output request"
-        )));
-    }
-
-    /// The type resides in the pageserver not to expose `ModelInputs`.
+    /// Private response type with the additional "unstable" `inputs` field.
+    ///
+    /// The type is described with `id` and `size` in the openapi_spec file, but the `inputs` is
+    /// intentionally left out. The type resides in the pageserver not to expose `ModelInputs`.
    #[serde_with::serde_as]
    #[derive(serde::Serialize)]
    struct TenantHistorySize {
@@ -534,9 +519,6 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
        ///
        /// Will be none if `?inputs_only=true` was given.
        size: Option<u64>,
-        /// Size of each segment used in the model.
-        /// Will be null if `?inputs_only=true` was given.
-        segment_sizes: Option<Vec<tenant_size_model::SegmentSizeResult>>,
        inputs: crate::tenant::size::ModelInputs,
    }

@@ -544,8 +526,7 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
        StatusCode::OK,
        TenantHistorySize {
            id: tenant_id,
-            size: sizes.as_ref().map(|x| x.total_size),
-            segment_sizes: sizes.map(|x| x.segments),
+            size,
            inputs,
        },
    )
@@ -610,62 +591,6 @@ async fn evict_timeline_layer_handler(request: Request<Body>) -> Result<Response
    }
 }

-/// Get tenant_size SVG graph along with the JSON data.
-fn synthetic_size_html_response(
-    inputs: ModelInputs,
-    storage_model: StorageModel,
-    sizes: SizeResult,
-) -> Result<Response<Body>, ApiError> {
-    let mut timeline_ids: Vec<String> = Vec::new();
-    let mut timeline_map: HashMap<TimelineId, usize> = HashMap::new();
-    for (index, ti) in inputs.timeline_inputs.iter().enumerate() {
-        timeline_map.insert(ti.timeline_id, index);
-        timeline_ids.push(ti.timeline_id.to_string());
-    }
-    let seg_to_branch: Vec<usize> = inputs
-        .segments
-        .iter()
-        .map(|seg| *timeline_map.get(&seg.timeline_id).unwrap())
-        .collect();
-
-    let svg =
-        tenant_size_model::svg::draw_svg(&storage_model, &timeline_ids, &seg_to_branch, &sizes)
-            .map_err(ApiError::InternalServerError)?;
-
-    let mut response = String::new();
-
-    use std::fmt::Write;
-    write!(response, "<html>\n<body>\n").unwrap();
-    write!(response, "<div>\n{svg}\n</div>").unwrap();
-    writeln!(response, "Project size: {}", sizes.total_size).unwrap();
-    writeln!(response, "<pre>").unwrap();
-    writeln!(
-        response,
-        "{}",
-        serde_json::to_string_pretty(&inputs).unwrap()
-    )
-    .unwrap();
-    writeln!(
-        response,
-        "{}",
-        serde_json::to_string_pretty(&sizes.segments).unwrap()
-    )
-    .unwrap();
-    writeln!(response, "</pre>").unwrap();
-    write!(response, "</body>\n</html>\n").unwrap();
-
-    html_response(StatusCode::OK, response)
-}
-
-pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>, ApiError> {
-    let response = Response::builder()
-        .status(status)
-        .header(hyper::header::CONTENT_TYPE, "text/html")
-        .body(Body::from(data.as_bytes().to_vec()))
-        .map_err(|e| ApiError::InternalServerError(e.into()))?;
-    Ok(response)
-}
-
 // Helper function to standardize the error messages we produce on bad durations
 //
 // Intended to be used with anyhow's `with_context`, e.g.:
@@ -872,14 +797,6 @@ async fn update_tenant_config_handler(
        );
    }

-    if let Some(eviction_policy) = request_data.eviction_policy {
-        tenant_conf.eviction_policy = Some(
-            serde_json::from_value(eviction_policy)
-                .context("parse field `eviction_policy`")
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
    let state = get_state(&request);
    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
@@ -925,6 +842,7 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
 }

 // Run GC immediately on given timeline.
+#[cfg(feature = "testing")]
 async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -971,22 +889,19 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;
-    async {
-        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-        timeline
-            .freeze_and_flush()
-            .await
-            .map_err(ApiError::InternalServerError)?;
-        timeline
-            .compact(&ctx)
-            .await
-            .map_err(ApiError::InternalServerError)?;

-        json_response(StatusCode::OK, ())
-    }
-    .instrument(info_span!("manual_checkpoint", tenant_id = %tenant_id, timeline_id = %timeline_id))
-    .await
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    timeline
+        .freeze_and_flush()
+        .await
+        .map_err(ApiError::InternalServerError)?;
+    timeline
+        .compact(&ctx)
+        .await
+        .map_err(ApiError::InternalServerError)?;
+
+    json_response(StatusCode::OK, ())
 }

 async fn timeline_download_remote_layers_handler_post(
@@ -1031,17 +946,6 @@ async fn active_timeline_of_active_tenant(
        .map_err(ApiError::NotFound)
 }

-async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    // Deliberately cause a panic to exercise the panic hook registered via std::panic::set_hook().
-    // For pageserver, the relevant panic hook is `tracing_panic_hook` , and the `sentry` crate's wrapper around it.
-    // Use catch_unwind to ensure that tokio nor hyper are distracted by our panic.
-    let query = req.uri().query();
-    let _ = std::panic::catch_unwind(|| {
-        panic!("unconditional panic for testing panic hook integration; request query: {query:?}")
-    });
-    json_response(StatusCode::NO_CONTENT, ())
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -1107,7 +1011,7 @@ pub fn make_router(
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
        .get("/v1/tenant/:tenant_id", tenant_status)
-        .get("/v1/tenant/:tenant_id/synthetic_size", tenant_size_handler)
+        .get("/v1/tenant/:tenant_id/size", tenant_size_handler)
        .put("/v1/tenant/config", update_tenant_config_handler)
        .get("/v1/tenant/:tenant_id/config", get_tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
@@ -1126,7 +1030,7 @@ pub fn make_router(
        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc",
-            timeline_gc_handler,
+            testing_api!("run timeline GC", timeline_gc_handler),
        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/compact",
@@ -1160,6 +1064,5 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
            evict_timeline_layer_handler,
        )
-        .get("/v1/panic", always_panic_handler)
        .any(handler_404))
 }
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -7,11 +7,11 @@ use std::fmt;
 use std::ops::{AddAssign, Range};
 use std::time::Duration;

+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 /// Key used in the Repository kv-store.
 ///
 /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
 /// for what we actually store in these fields.
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 pub struct Key {
    pub field1: u8,
    pub field2: u32,
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -231,9 +231,6 @@ pub enum TaskKind {
    // Compaction. One per tenant.
    Compaction,

-    // Eviction. One per timeline.
-    Eviction,
-
    // Initial logical size calculation
    InitialLogicalSizeCalculation,

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -2418,9 +2418,6 @@ impl Tenant {
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    pub async fn gather_size_inputs(
        &self,
-        // `max_retention_period` overrides the cutoff that is used to calculate the size
-        // (only if it is shorter than the real cutoff).
-        max_retention_period: Option<u64>,
        ctx: &RequestContext,
    ) -> anyhow::Result<size::ModelInputs> {
        let logical_sizes_at_once = self
@@ -2428,41 +2425,32 @@ impl Tenant {
            .concurrent_tenant_size_logical_size_queries
            .inner();

-        // TODO: Having a single mutex block concurrent reads is not great for performance.
-        //
-        // But the only case where we need to run multiple of these at once is when we
-        // request a size for a tenant manually via API, while another background calculation
-        // is in progress (which is not a common case).
+        // TODO: Having a single mutex block concurrent reads is unfortunate, but since the queries
+        // are for testing/experimenting, we tolerate this.
        //
        // See more for on the issue #2748 condenced out of the initial PR review.
        let mut shared_cache = self.cached_logical_sizes.lock().await;

-        size::gather_inputs(
-            self,
-            logical_sizes_at_once,
-            max_retention_period,
-            &mut shared_cache,
-            ctx,
-        )
-        .await
+        size::gather_inputs(self, logical_sizes_at_once, &mut shared_cache, ctx).await
    }

-    /// Calculate synthetic tenant size and cache the result.
+    /// Calculate synthetic tenant size
    /// This is periodically called by background worker.
    /// result is cached in tenant struct
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    pub async fn calculate_synthetic_size(&self, ctx: &RequestContext) -> anyhow::Result<u64> {
-        let inputs = self.gather_size_inputs(None, ctx).await?;
+        let inputs = self.gather_size_inputs(ctx).await?;

-        let size = inputs.calculate()?;
-
-        self.set_cached_synthetic_size(size);
-
-        Ok(size)
+        self.calc_and_update_cached_synthetic_size(&inputs)
    }

-    /// Cache given synthetic size and update the metric value
-    pub fn set_cached_synthetic_size(&self, size: u64) {
+    /// Calculate synthetic size , cache it and set metric value
+    pub fn calc_and_update_cached_synthetic_size(
+        &self,
+        inputs: &size::ModelInputs,
+    ) -> anyhow::Result<u64> {
+        let size = inputs.calculate()?;
+
        self.cached_synthetic_tenant_size
            .store(size, Ordering::Relaxed);

@@ -2470,6 +2458,8 @@ impl Tenant {
            .get_metric_with_label_values(&[&self.tenant_id.to_string()])
            .unwrap()
            .set(size);
+
+        Ok(size)
    }

    pub fn get_cached_synthetic_size(&self) -> u64 {
@@ -2767,7 +2757,6 @@ pub mod harness {
                lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout),
                max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag),
                trace_read_requests: Some(tenant_conf.trace_read_requests),
-                eviction_policy: Some(tenant_conf.eviction_policy),
            }
        }
    }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -91,7 +91,6 @@ pub struct TenantConf {
    /// to avoid eager reconnects.
    pub max_lsn_wal_lag: NonZeroU64,
    pub trace_read_requests: bool,
-    pub eviction_policy: EvictionPolicy,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -103,7 +102,6 @@ pub struct TenantConfOpt {
    pub checkpoint_distance: Option<u64>,

    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(with = "humantime_serde")]
    #[serde(default)]
    pub checkpoint_timeout: Option<Duration>,

@@ -155,34 +153,6 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub trace_read_requests: Option<bool>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
-    pub eviction_policy: Option<EvictionPolicy>,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(tag = "kind")]
-pub enum EvictionPolicy {
-    NoEviction,
-    LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
-}
-
-impl EvictionPolicy {
-    pub fn discriminant_str(&self) -> &'static str {
-        match self {
-            EvictionPolicy::NoEviction => "NoEviction",
-            EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct EvictionPolicyLayerAccessThreshold {
-    #[serde(with = "humantime_serde")]
-    pub period: Duration,
-    #[serde(with = "humantime_serde")]
-    pub threshold: Duration,
 }

 impl TenantConfOpt {
@@ -219,7 +189,6 @@ impl TenantConfOpt {
            trace_read_requests: self
                .trace_read_requests
                .unwrap_or(global_conf.trace_read_requests),
-            eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
        }
    }

@@ -292,7 +261,6 @@ impl Default for TenantConf {
            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
                .expect("cannot parse default max walreceiver Lsn wal lag"),
            trace_read_requests: false,
-            eviction_policy: EvictionPolicy::NoEviction,
        }
    }
 }
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -53,6 +53,7 @@ use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use crate::tenant::storage_layer::Layer;
 use anyhow::Result;
+use std::collections::HashMap;
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
@@ -61,6 +62,8 @@ use utils::lsn::Lsn;
 use historic_layer_coverage::BufferedHistoricLayerCoverage;
 pub use historic_layer_coverage::Replacement;

+use self::historic_layer_coverage::LayerKey;
+
 use super::storage_layer::range_eq;

 ///
@@ -87,11 +90,18 @@ pub struct LayerMap<L: ?Sized> {
    pub frozen_layers: VecDeque<Arc<InMemoryLayer>>,

    /// Index of the historic layers optimized for search
-    historic: BufferedHistoricLayerCoverage<Arc<L>>,
+    historic: BufferedHistoricLayerCoverage<LayerKey>,
+
+    /// All layers accessible by key. Useful for:
+    /// 1. Iterating all layers
+    /// 2. Dereferencing a self.historic search result
+    /// 3. Replacing a layer with a remote/local version without
+    ///    rebuilding the self.historic index.
+    mapping: HashMap<LayerKey, Arc<L>>,

    /// L0 layers have key range Key::MIN..Key::MAX, and locating them using R-Tree search is very inefficient.
    /// So L0 layers are held in l0_delta_layers vector, in addition to the R-tree.
-    l0_delta_layers: Vec<Arc<L>>,
+    l0_delta_layers: HashMap<LayerKey, Arc<L>>,
 }

 impl<L: ?Sized> Default for LayerMap<L> {
@@ -100,8 +110,9 @@ impl<L: ?Sized> Default for LayerMap<L> {
            open_layer: None,
            next_open_layer_at: None,
            frozen_layers: VecDeque::default(),
-            l0_delta_layers: Vec::default(),
+            l0_delta_layers: HashMap::default(),
            historic: BufferedHistoricLayerCoverage::default(),
+            mapping: HashMap::default(),
        }
    }
 }
@@ -139,30 +150,6 @@ where
        self.layer_map.remove_historic_noflush(layer)
    }

-    /// Replaces existing layer iff it is the `expected`.
-    ///
-    /// If the expected layer has been removed it will not be inserted by this function.
-    ///
-    /// Returned `Replacement` describes succeeding in replacement or the reason why it could not
-    /// be done.
-    ///
-    /// TODO replacement can be done without buffering and rebuilding layer map updates.
-    ///      One way to do that is to add a layer of indirection for returned values, so
-    ///      that we can replace values only by updating a hashmap.
-    pub fn replace_historic(
-        &mut self,
-        expected: &Arc<L>,
-        new: Arc<L>,
-    ) -> anyhow::Result<Replacement<Arc<L>>> {
-        fail::fail_point!("layermap-replace-notfound", |_| Ok(
-            // this is not what happens if an L0 layer was not found a anyhow error but perhaps
-            // that should be changed. this is good enough to show a replacement failure.
-            Replacement::NotFound
-        ));
-
-        self.layer_map.replace_historic_noflush(expected, new)
-    }
-
    // We will flush on drop anyway, but this method makes it
    // more explicit that there is some work being done.
    /// Apply all updates
@@ -234,33 +221,38 @@ where
        match (latest_delta, latest_image) {
            (None, None) => None,
            (None, Some(image)) => {
+                let image = self.mapping.get(&image).unwrap();
                let lsn_floor = image.get_lsn_range().start;
                Some(SearchResult {
-                    layer: image,
+                    layer: image.clone(),
                    lsn_floor,
                })
            }
            (Some(delta), None) => {
+                let delta = self.mapping.get(&delta).unwrap();
                let lsn_floor = delta.get_lsn_range().start;
                Some(SearchResult {
-                    layer: delta,
+                    layer: delta.clone(),
                    lsn_floor,
                })
            }
            (Some(delta), Some(image)) => {
+                let image = self.mapping.get(&image).unwrap();
+                let delta = self.mapping.get(&delta).unwrap();
+
                let img_lsn = image.get_lsn_range().start;
                let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end;
                let image_exact_match = img_lsn + 1 == end_lsn;
                if image_is_newer || image_exact_match {
                    Some(SearchResult {
-                        layer: image,
+                        layer: image.clone(),
                        lsn_floor: img_lsn,
                    })
                } else {
                    let lsn_floor =
                        std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1);
                    Some(SearchResult {
-                        layer: delta,
+                        layer: delta.clone(),
                        lsn_floor,
                    })
                }
@@ -279,13 +271,12 @@ where
    /// Helper function for BatchedUpdates::insert_historic
    ///
    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
-        self.historic.insert(
-            historic_layer_coverage::LayerKey::from(&*layer),
-            Arc::clone(&layer),
-        );
+        let key = LayerKey::from(&*layer);
+        self.historic.insert(key.clone(), key.clone());
+        self.mapping.insert(key.clone(), layer.clone());

        if Self::is_l0(&layer) {
-            self.l0_delta_layers.push(layer);
+            self.l0_delta_layers.insert(key, layer.clone());
        }

        NUM_ONDISK_LAYERS.inc();
@@ -297,27 +288,28 @@ where
    /// Helper function for BatchedUpdates::remove_historic
    ///
    pub fn remove_historic_noflush(&mut self, layer: Arc<L>) {
-        self.historic
-            .remove(historic_layer_coverage::LayerKey::from(&*layer));
+        let key = historic_layer_coverage::LayerKey::from(&*layer);
+        self.historic.remove(key.clone());
+        self.mapping.remove(&key.clone());

        if Self::is_l0(&layer) {
-            let len_before = self.l0_delta_layers.len();
-            self.l0_delta_layers
-                .retain(|other| !Self::compare_arced_layers(other, &layer));
-            // this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
-            // there's a chance that the comparison fails at runtime due to it comparing (pointer,
-            // vtable) pairs.
-            assert_eq!(
-                self.l0_delta_layers.len(),
-                len_before - 1,
-                "failed to locate removed historic layer from l0_delta_layers"
-            );
+            self.l0_delta_layers.remove(&key);
        }

        NUM_ONDISK_LAYERS.dec();
    }

-    pub(self) fn replace_historic_noflush(
+    /// Replaces existing layer iff it is the `expected`.
+    ///
+    /// If the expected layer has been removed it will not be inserted by this function.
+    ///
+    /// Returned `Replacement` describes succeeding in replacement or the reason why it could not
+    /// be done.
+    ///
+    /// TODO replacement can be done without buffering and rebuilding layer map updates.
+    ///      One way to do that is to add a layer of indirection for returned values, so
+    ///      that we can replace values only by updating a hashmap.
+    pub fn replace_historic(
        &mut self,
        expected: &Arc<L>,
        new: Arc<L>,
@@ -338,29 +330,23 @@ where
            "expected and new must both be l0 deltas or neither should be: {expected_l0} != {new_l0}"
        );

-        let l0_index = if expected_l0 {
-            // find the index in case replace worked, we need to replace that as well
-            Some(
-                self.l0_delta_layers
-                    .iter()
-                    .position(|slot| Self::compare_arced_layers(slot, expected))
-                    .ok_or_else(|| anyhow::anyhow!("existing l0 delta layer was not found"))?,
-            )
-        } else {
-            None
+        use std::collections::hash_map::Entry;
+
+        if expected_l0 {
+            match self.mapping.entry(key.clone()) {
+                Entry::Occupied(mut entry) => entry.insert(new.clone()),
+                Entry::Vacant(_) => anyhow::bail!("layer doesn't exist"),
+            };
        };

-        let replaced = self.historic.replace(&key, new.clone(), |existing| {
-            Self::compare_arced_layers(existing, expected)
-        });
+        match self.mapping.entry(key.clone()) {
+            Entry::Occupied(mut entry) => entry.insert(new.clone()),
+            Entry::Vacant(_) => anyhow::bail!("layer doesn't exist"),
+        };

-        if let Replacement::Replaced { .. } = &replaced {
-            if let Some(index) = l0_index {
-                self.l0_delta_layers[index] = new;
-            }
-        }
-
-        Ok(replaced)
+        Ok(Replacement::Replaced {
+            in_buffered: false,
+        })
    }

    /// Helper function for BatchedUpdates::drop.
@@ -388,8 +374,8 @@ where
        let start = key.start.to_i128();
        let end = key.end.to_i128();

-        let layer_covers = |layer: Option<Arc<L>>| match layer {
-            Some(layer) => layer.get_lsn_range().start >= lsn.start,
+        let layer_covers = |key: Option<&LayerKey>| match key {
+            Some(key) => self.mapping.get(key).unwrap().get_lsn_range().start >= lsn.start,
            None => false,
        };

@@ -409,7 +395,7 @@ where
    }

    pub fn iter_historic_layers(&self) -> impl '_ + Iterator<Item = Arc<L>> {
-        self.historic.iter()
+        self.mapping.values().cloned()
    }

    ///
@@ -436,10 +422,13 @@ where
        // Initialize loop variables
        let mut coverage: Vec<(Range<Key>, Option<Arc<L>>)> = vec![];
        let mut current_key = start;
-        let mut current_val = version.image_coverage.query(start);
+        let mut current_val = version.image_coverage.query(start)
+            .map(|key| self.mapping.get(&key).unwrap().clone());

        // Loop through the change events and push intervals
        for (change_key, change_val) in version.image_coverage.range(start..end) {
+            let change_val = change_val.map(|key| self.mapping.get(&key).unwrap().clone());
+
            let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
            coverage.push((kr, current_val.take()));
            current_key = change_key;
@@ -533,6 +522,7 @@ where
        for (change_key, change_val) in version.delta_coverage.range(start..end) {
            // If there's a relevant delta in this part, add 1 and recurse down
            if let Some(val) = current_val {
+                let val = self.mapping.get(&val).unwrap().clone();
                if val.get_lsn_range().end > lsn.start {
                    let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
                    let lr = lsn.start..val.get_lsn_range().start;
@@ -555,6 +545,7 @@ where

        // Consider the last part
        if let Some(val) = current_val {
+            let val = self.mapping.get(&val).unwrap().clone();
            if val.get_lsn_range().end > lsn.start {
                let kr = Key::from_i128(current_key)..Key::from_i128(end);
                let lr = lsn.start..val.get_lsn_range().start;
@@ -711,7 +702,7 @@ where

    /// Return all L0 delta layers
    pub fn get_level0_deltas(&self) -> Result<Vec<Arc<L>>> {
-        Ok(self.l0_delta_layers.clone())
+        Ok(self.l0_delta_layers.values().cloned().collect())
    }

    /// debugging function to print out the contents of the layer map
@@ -736,32 +727,6 @@ where
        println!("End dump LayerMap");
        Ok(())
    }
-
-    /// Similar to `Arc::ptr_eq`, but only compares the object pointers, not vtables.
-    ///
-    /// Returns `true` if the two `Arc` point to the same layer, false otherwise.
-    #[inline(always)]
-    pub fn compare_arced_layers(left: &Arc<L>, right: &Arc<L>) -> bool {
-        // "dyn Trait" objects are "fat pointers" in that they have two components:
-        // - pointer to the object
-        // - pointer to the vtable
-        //
-        // rust does not provide a guarantee that these vtables are unique, but however
-        // `Arc::ptr_eq` as of writing (at least up to 1.67) uses a comparison where both the
-        // pointer and the vtable need to be equal.
-        //
-        // See: https://github.com/rust-lang/rust/issues/103763
-        //
-        // A future version of rust will most likely use this form below, where we cast each
-        // pointer into a pointer to unit, which drops the inaccessible vtable pointer, making it
-        // not affect the comparison.
-        //
-        // See: https://github.com/rust-lang/rust/pull/106450
-        let left = Arc::as_ptr(left) as *const ();
-        let right = Arc::as_ptr(right) as *const ();
-
-        left == right
-    }
 }

 #[cfg(test)]
@@ -822,7 +787,6 @@ mod tests {
            assert_eq!(count_layer_in(&map, &remote), expected_in_counts);

            let replaced = map
-                .batch_update()
                .replace_historic(&remote, downloaded.clone())
                .expect("name derived attributes are the same");
            assert!(
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -12,7 +12,7 @@ use super::layer_coverage::LayerCoverageTuple;
 /// These three values are enough to uniquely identify a layer, since
 /// a layer is obligated to contain all contents within range, so two
 /// deltas (or images) with the same range have identical content.
-#[derive(Debug, PartialEq, Eq, Clone)]
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub struct LayerKey {
    // TODO I use i128 and u64 because it was easy for prototyping,
    //      testing, and benchmarking. If we can use the Lsn and Key
@@ -438,46 +438,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
    ///
    /// Returns a `Replacement` value describing the outcome; only the case of
    /// `Replacement::Replaced` modifies the map and requires a rebuild.
-    pub fn replace<F>(
-        &mut self,
-        layer_key: &LayerKey,
-        new: Value,
-        check_expected: F,
-    ) -> Replacement<Value>
-    where
-        F: FnOnce(&Value) -> bool,
-    {
-        let (slot, in_buffered) = match self.buffer.get(layer_key) {
-            Some(inner @ Some(_)) => {
-                // we compare against the buffered version, because there will be a later
-                // rebuild before querying
-                (inner.as_ref(), true)
-            }
-            Some(None) => {
-                // buffer has removal for this key; it will not be equivalent by any check_expected.
-                return Replacement::RemovalBuffered;
-            }
-            None => {
-                // no pending modification for the key, check layers
-                (self.layers.get(layer_key), false)
-            }
-        };
-
-        match slot {
-            Some(existing) if !check_expected(existing) => {
-                // unfortunate clone here, but otherwise the nll borrowck grows the region of
-                // 'a to cover the whole function, and we could not mutate in the other
-                // Some(existing) branch
-                Replacement::Unexpected(existing.clone())
-            }
-            None => Replacement::NotFound,
-            Some(_existing) => {
-                self.insert(layer_key.to_owned(), new);
-                Replacement::Replaced { in_buffered }
-            }
-        }
-    }
-
    pub fn rebuild(&mut self) {
        // Find the first LSN that needs to be rebuilt
        let rebuild_since: u64 = match self.buffer.iter().next() {
@@ -521,17 +481,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        )
    }

-    /// Iterate all the layers
-    pub fn iter(&self) -> impl '_ + Iterator<Item = Value> {
-        // NOTE we can actually perform this without rebuilding,
-        //      but it's not necessary for now.
-        if !self.buffer.is_empty() {
-            panic!("rebuild pls")
-        }
-
-        self.layers.values().cloned()
-    }
-
    /// Return a reference to a queryable map, assuming all updates
    /// have already been processed using self.rebuild()
    pub fn get(&self) -> anyhow::Result<&HistoricLayerCoverage<Value>> {
@@ -670,139 +619,3 @@ fn test_retroactive_simple() {
        assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
    }
 }
-
-#[test]
-fn test_retroactive_replacement() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-
-    let keys = [
-        LayerKey {
-            key: 0..5,
-            lsn: 100..101,
-            is_image: true,
-        },
-        LayerKey {
-            key: 3..9,
-            lsn: 110..111,
-            is_image: true,
-        },
-        LayerKey {
-            key: 4..6,
-            lsn: 120..121,
-            is_image: true,
-        },
-    ];
-
-    let layers = [
-        "Image 1".to_string(),
-        "Image 2".to_string(),
-        "Image 3".to_string(),
-    ];
-
-    for (key, layer) in keys.iter().zip(layers.iter()) {
-        map.insert(key.to_owned(), layer.to_owned());
-    }
-
-    // rebuild is not necessary here, because replace works for both buffered updates and existing
-    // layers.
-
-    for (key, orig_layer) in keys.iter().zip(layers.iter()) {
-        let replacement = format!("Remote {orig_layer}");
-
-        // evict
-        let ret = map.replace(key, replacement.clone(), |l| l == orig_layer);
-        assert!(
-            matches!(ret, Replacement::Replaced { .. }),
-            "replace {orig_layer}: {ret:?}"
-        );
-        map.rebuild();
-
-        let at = key.lsn.end + 1;
-
-        let version = map.get().expect("rebuilt").get_version(at).unwrap();
-        assert_eq!(
-            version.image_coverage.query(4).as_deref(),
-            Some(replacement.as_str()),
-            "query for 4 at version {at} after eviction",
-        );
-
-        // download
-        let ret = map.replace(key, orig_layer.clone(), |l| l == &replacement);
-        assert!(
-            matches!(ret, Replacement::Replaced { .. }),
-            "replace {orig_layer} back: {ret:?}"
-        );
-        map.rebuild();
-        let version = map.get().expect("rebuilt").get_version(at).unwrap();
-        assert_eq!(
-            version.image_coverage.query(4).as_deref(),
-            Some(orig_layer.as_str()),
-            "query for 4 at version {at} after download",
-        );
-    }
-}
-
-#[test]
-fn missing_key_is_not_inserted_with_replace() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-    let key = LayerKey {
-        key: 0..5,
-        lsn: 100..101,
-        is_image: true,
-    };
-
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(matches!(ret, Replacement::NotFound), "{ret:?}");
-    map.rebuild();
-    assert!(map
-        .get()
-        .expect("no changes to rebuild")
-        .get_version(102)
-        .is_none());
-}
-
-#[test]
-fn replacing_buffered_insert_and_remove() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-    let key = LayerKey {
-        key: 0..5,
-        lsn: 100..101,
-        is_image: true,
-    };
-
-    map.insert(key.clone(), "Image 1");
-    let ret = map.replace(&key, "Remote Image 1", |&l| l == "Image 1");
-    assert!(
-        matches!(ret, Replacement::Replaced { in_buffered: true }),
-        "{ret:?}"
-    );
-    map.rebuild();
-
-    assert_eq!(
-        map.get()
-            .expect("rebuilt")
-            .get_version(102)
-            .unwrap()
-            .image_coverage
-            .query(4),
-        Some("Remote Image 1")
-    );
-
-    map.remove(key.clone());
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(
-        matches!(ret, Replacement::RemovalBuffered),
-        "cannot replace after scheduled remove: {ret:?}"
-    );
-
-    map.rebuild();
-
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(
-        matches!(ret, Replacement::NotFound),
-        "cannot replace after remove + rebuild: {ret:?}"
-    );
-
-    let at_version = map.get().expect("rebuilt").get_version(102);
-    assert!(at_version.is_none());
-}
--- a/pageserver/src/tenant/layer_map/layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/layer_coverage.rs
@@ -101,24 +101,24 @@ impl<Value: Clone> LayerCoverage<Value> {
    /// Get the latest (by lsn.end) layer at a given key
    ///
    /// Complexity: O(log N)
-    pub fn query(&self, key: i128) -> Option<Value> {
+    pub fn query(&self, key: i128) -> Option<&Value> {
        self.nodes
            .range(..=key)
            .rev()
            .next()?
            .1
            .as_ref()
-            .map(|(_, v)| v.clone())
+            .map(|(_, v)| v)
    }

    /// Iterate the changes in layer coverage in a given range. You will likely
    /// want to start with self.query(key.start), and then follow up with self.range
    ///
    /// Complexity: O(log N + result_size)
-    pub fn range(&self, key: Range<i128>) -> impl '_ + Iterator<Item = (i128, Option<Value>)> {
+    pub fn range(&self, key: Range<i128>) -> impl '_ + Iterator<Item = (i128, Option<&Value>)> {
        self.nodes
            .range(key)
-            .map(|(k, v)| (*k, v.as_ref().map(|x| x.1.clone())))
+            .map(|(k, v)| (*k, v.as_ref().map(|x| &x.1)))
    }

    /// O(1) clone
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -540,11 +540,13 @@ where
    }
 }

+#[cfg(feature = "testing")]
 use {
    crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
    utils::http::error::ApiError,
 };

+#[cfg(feature = "testing")]
 pub async fn immediate_gc(
    tenant_id: TenantId,
    timeline_id: TimelineId,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -571,15 +571,14 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    /// Launch a delete operation in the background.
    ///
-    /// The operation does not modify local state but assumes the local files have already been
-    /// deleted, and is used to mirror those changes to remote.
+    /// Launch a delete operation in the background.
    ///
    /// Note: This schedules an index file upload before the deletions.  The
    /// deletion won't actually be performed, until any previously scheduled
    /// upload operations, and the index file upload, have completed
    /// succesfully.
+    ///
    pub fn schedule_layer_file_deletion(
        self: &Arc<Self>,
        names: &[LayerFileName],
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -13,7 +13,6 @@ use crate::task_mgr::TaskKind;
 use crate::walrecord::NeonWalRecord;
 use anyhow::Result;
 use bytes::Bytes;
-use either::Either;
 use enum_map::EnumMap;
 use enumset::EnumSet;
 use pageserver_api::models::LayerAccessKind;
@@ -93,23 +92,7 @@ pub enum ValueReconstructResult {
 }

 #[derive(Debug)]
-pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
-
-/// This struct holds two instances of [`LayerAccessStatsInner`].
-/// Accesses are recorded to both instances.
-/// The `for_scraping_api`instance can be reset from the management API via [`LayerAccessStatsReset`].
-/// The `for_eviction_policy` is never reset.
-#[derive(Debug, Default, Clone)]
-struct LayerAccessStatsLocked {
-    for_scraping_api: LayerAccessStatsInner,
-    for_eviction_policy: LayerAccessStatsInner,
-}
-
-impl LayerAccessStatsLocked {
-    fn iter_mut(&mut self) -> impl Iterator<Item = &mut LayerAccessStatsInner> {
-        [&mut self.for_scraping_api, &mut self.for_eviction_policy].into_iter()
-    }
-}
+pub struct LayerAccessStats(Mutex<LayerAccessStatsInner>);

 #[derive(Debug, Default, Clone)]
 struct LayerAccessStatsInner {
@@ -120,11 +103,11 @@ struct LayerAccessStatsInner {
    last_residence_changes: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
 }

-#[derive(Debug, Clone, Copy)]
-pub(super) struct LayerAccessStatFullDetails {
-    pub(super) when: SystemTime,
-    pub(super) task_kind: TaskKind,
-    pub(super) access_kind: LayerAccessKind,
+#[derive(Debug, Clone)]
+struct LayerAccessStatFullDetails {
+    when: SystemTime,
+    task_kind: TaskKind,
+    access_kind: LayerAccessKind,
 }

 #[derive(Clone, Copy, strum_macros::EnumString)]
@@ -143,7 +126,7 @@ fn system_time_to_millis_since_epoch(ts: &SystemTime) -> u64 {
 }

 impl LayerAccessStatFullDetails {
-    fn as_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
+    fn to_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
        let Self {
            when,
            task_kind,
@@ -159,13 +142,13 @@ impl LayerAccessStatFullDetails {

 impl LayerAccessStats {
    pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
-        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
+        let new = LayerAccessStats(Mutex::new(LayerAccessStatsInner::default()));
        new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
        new
    }

    pub(crate) fn for_new_layer_file() -> Self {
-        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
+        let new = LayerAccessStats(Mutex::new(LayerAccessStatsInner::default()));
        new.record_residence_event(
            LayerResidenceStatus::Resident,
            LayerResidenceEventReason::LayerCreate,
@@ -193,43 +176,38 @@ impl LayerAccessStats {
        status: LayerResidenceStatus,
        reason: LayerResidenceEventReason,
    ) {
-        let mut locked = self.0.lock().unwrap();
-        locked.iter_mut().for_each(|inner| {
-            inner
-                .last_residence_changes
-                .write(LayerResidenceEvent::new(status, reason))
-        });
+        let mut inner = self.0.lock().unwrap();
+        inner
+            .last_residence_changes
+            .write(LayerResidenceEvent::new(status, reason));
    }

    fn record_access(&self, access_kind: LayerAccessKind, task_kind: TaskKind) {
+        let mut inner = self.0.lock().unwrap();
        let this_access = LayerAccessStatFullDetails {
            when: SystemTime::now(),
            task_kind,
            access_kind,
        };
-
-        let mut locked = self.0.lock().unwrap();
-        locked.iter_mut().for_each(|inner| {
-            inner.first_access.get_or_insert(this_access);
-            inner.count_by_access_kind[access_kind] += 1;
-            inner.task_kind_flag |= task_kind;
-            inner.last_accesses.write(this_access);
-        })
+        inner
+            .first_access
+            .get_or_insert_with(|| this_access.clone());
+        inner.count_by_access_kind[access_kind] += 1;
+        inner.task_kind_flag |= task_kind;
+        inner.last_accesses.write(this_access);
    }
-
-    fn as_api_model(
+    fn to_api_model(
        &self,
        reset: LayerAccessStatsReset,
    ) -> pageserver_api::models::LayerAccessStats {
-        let mut locked = self.0.lock().unwrap();
-        let inner = &mut locked.for_scraping_api;
+        let mut inner = self.0.lock().unwrap();
        let LayerAccessStatsInner {
            first_access,
            count_by_access_kind,
            task_kind_flag,
            last_accesses,
            last_residence_changes,
-        } = inner;
+        } = &*inner;
        let ret = pageserver_api::models::LayerAccessStats {
            access_count_by_access_kind: count_by_access_kind
                .iter()
@@ -239,8 +217,8 @@ impl LayerAccessStats {
                .iter()
                .map(|task_kind| task_kind.into()) // into static str, powered by strum_macros
                .collect(),
-            first: first_access.as_ref().map(|a| a.as_api_model()),
-            accesses_history: last_accesses.map(|m| m.as_api_model()),
+            first: first_access.as_ref().map(|a| a.to_api_model()),
+            accesses_history: last_accesses.map(|m| m.to_api_model()),
            residence_events_history: last_residence_changes.clone(),
        };
        match reset {
@@ -254,20 +232,6 @@ impl LayerAccessStats {
        }
        ret
    }
-
-    pub(super) fn most_recent_access_or_residence_event(
-        &self,
-    ) -> Either<LayerAccessStatFullDetails, LayerResidenceEvent> {
-        let locked = self.0.lock().unwrap();
-        let inner = &locked.for_eviction_policy;
-        match inner.last_accesses.recent() {
-            Some(a) => Either::Left(*a),
-            None => match inner.last_residence_changes.recent() {
-                Some(e) => Either::Right(e.clone()),
-                None => unreachable!("constructors for LayerAccessStats ensure that there's always a residence change event"),
-            }
-        }
-    }
 }

 /// Supertrait of the [`Layer`] trait that captures the bare minimum interface
@@ -364,7 +328,7 @@ pub trait PersistentLayer: Layer {
    }

    /// Permanently remove this layer from disk.
-    fn delete_resident_layer_file(&self) -> Result<()>;
+    fn delete(&self) -> Result<()>;

    fn downcast_remote_layer(self: Arc<Self>) -> Option<std::sync::Arc<RemoteLayer>> {
        None
@@ -485,14 +449,3 @@ enum PathOrConf {
    Path(PathBuf),
    Conf(&'static PageServerConf),
 }
-
-/// Range wrapping newtype, which uses display to render Debug.
-///
-/// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
-struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
-
-impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}..{}", self.0.start, self.0.end)
-    }
-}
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -194,10 +194,8 @@ pub struct DeltaLayer {

 impl std::fmt::Debug for DeltaLayer {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use super::RangeDisplayDebug;
-
        f.debug_struct("DeltaLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
+            .field("key_range", &self.key_range)
            .field("lsn_range", &self.lsn_range)
            .field("file_size", &self.file_size)
            .field("inner", &self.inner)
@@ -438,7 +436,7 @@ impl PersistentLayer for DeltaLayer {
        ))
    }

-    fn delete_resident_layer_file(&self) -> Result<()> {
+    fn delete(&self) -> Result<()> {
        // delete underlying file
        fs::remove_file(self.path())?;
        Ok(())
@@ -452,7 +450,7 @@ impl PersistentLayer for DeltaLayer {
        let layer_file_name = self.filename().file_name();
        let lsn_range = self.get_lsn_range();

-        let access_stats = self.access_stats.as_api_model(reset);
+        let access_stats = self.access_stats.to_api_model(reset);

        HistoricLayerInfo::Delta {
            layer_file_name,
--- a/pageserver/src/tenant/storage_layer/filename.rs
+++ b/pageserver/src/tenant/storage_layer/filename.rs
@@ -10,23 +10,12 @@ use std::str::FromStr;
 use utils::lsn::Lsn;

 // Note: Timeline::load_layer_map() relies on this sort order
-#[derive(PartialEq, Eq, Clone, Hash)]
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub struct DeltaFileName {
    pub key_range: Range<Key>,
    pub lsn_range: Range<Lsn>,
 }

-impl std::fmt::Debug for DeltaFileName {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("DeltaFileName")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn_range", &self.lsn_range)
-            .finish()
-    }
-}
-
 impl PartialOrd for DeltaFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
@@ -111,23 +100,12 @@ impl fmt::Display for DeltaFileName {
    }
 }

-#[derive(PartialEq, Eq, Clone, Hash)]
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub struct ImageFileName {
    pub key_range: Range<Key>,
    pub lsn: Lsn,
 }

-impl std::fmt::Debug for ImageFileName {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("ImageFileName")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn", &self.lsn)
-            .finish()
-    }
-}
-
 impl PartialOrd for ImageFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -119,10 +119,8 @@ pub struct ImageLayer {

 impl std::fmt::Debug for ImageLayer {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use super::RangeDisplayDebug;
-
        f.debug_struct("ImageLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
+            .field("key_range", &self.key_range)
            .field("file_size", &self.file_size)
            .field("lsn", &self.lsn)
            .field("inner", &self.inner)
@@ -252,7 +250,7 @@ impl PersistentLayer for ImageLayer {
        unimplemented!();
    }

-    fn delete_resident_layer_file(&self) -> Result<()> {
+    fn delete(&self) -> Result<()> {
        // delete underlying file
        fs::remove_file(self.path())?;
        Ok(())
@@ -271,7 +269,7 @@ impl PersistentLayer for ImageLayer {
            layer_file_size: Some(self.file_size),
            lsn_start: lsn_range.start,
            remote: false,
-            access_stats: self.access_stats.as_api_model(reset),
+            access_stats: self.access_stats.to_api_model(reset),
        }
    }

--- a/pageserver/src/tenant/storage_layer/remote_layer.rs
+++ b/pageserver/src/tenant/storage_layer/remote_layer.rs
@@ -49,17 +49,6 @@ pub struct RemoteLayer {
    access_stats: LayerAccessStats,

    pub(crate) ongoing_download: Arc<tokio::sync::Semaphore>,
-
-    /// Has `LayerMap::replace` failed for this (true) or not (false).
-    ///
-    /// Used together with [`ongoing_download`] semaphore in `Timeline::download_remote_layer`.
-    /// The field is used to mark a RemoteLayer permanently (until restart or ignore+load)
-    /// unprocessable, because a LayerMap::replace failed.
-    ///
-    /// It is very unlikely to accumulate these in the Timeline's LayerMap, but having this avoids
-    /// a possible fast loop between `Timeline::get_reconstruct_data` and
-    /// `Timeline::download_remote_layer`, which also logs.
-    pub(crate) download_replacement_failure: std::sync::atomic::AtomicBool,
 }

 impl std::fmt::Debug for RemoteLayer {
@@ -155,8 +144,8 @@ impl PersistentLayer for RemoteLayer {
        bail!("cannot iterate a remote layer");
    }

-    fn delete_resident_layer_file(&self) -> Result<()> {
-        bail!("remote layer has no layer file");
+    fn delete(&self) -> Result<()> {
+        Ok(())
    }

    fn downcast_remote_layer<'a>(self: Arc<Self>) -> Option<std::sync::Arc<RemoteLayer>> {
@@ -182,7 +171,7 @@ impl PersistentLayer for RemoteLayer {
                lsn_start: lsn_range.start,
                lsn_end: lsn_range.end,
                remote: true,
-                access_stats: self.access_stats.as_api_model(reset),
+                access_stats: self.access_stats.to_api_model(reset),
            }
        } else {
            HistoricLayerInfo::Image {
@@ -190,7 +179,7 @@ impl PersistentLayer for RemoteLayer {
                layer_file_size: self.layer_metadata.file_size(),
                lsn_start: lsn_range.start,
                remote: true,
-                access_stats: self.access_stats.as_api_model(reset),
+                access_stats: self.access_stats.to_api_model(reset),
            }
        }
    }
@@ -218,7 +207,6 @@ impl RemoteLayer {
            file_name: fname.to_owned().into(),
            layer_metadata: layer_metadata.clone(),
            ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
-            download_replacement_failure: std::sync::atomic::AtomicBool::default(),
            access_stats,
        }
    }
@@ -240,7 +228,6 @@ impl RemoteLayer {
            file_name: fname.to_owned().into(),
            layer_metadata: layer_metadata.clone(),
            ongoing_download: Arc::new(tokio::sync::Semaphore::new(1)),
-            download_replacement_failure: std::sync::atomic::AtomicBool::default(),
            access_stats,
        }
    }
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -3,7 +3,7 @@

 use std::ops::ControlFlow;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
+use std::time::Duration;

 use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
@@ -11,7 +11,6 @@ use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::mgr;
 use crate::tenant::{Tenant, TenantState};
-use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::TenantId;

@@ -54,55 +53,37 @@ async fn compaction_loop(tenant_id: TenantId) {
    info!("starting");
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
-        let cancel = task_mgr::shutdown_token();
        let ctx = RequestContext::todo_child(TaskKind::Compaction, DownloadBehavior::Download);
-        let mut first = true;
        loop {
            trace!("waking up");

            let tenant = tokio::select! {
-                _ = cancel.cancelled() => {
+                _ = task_mgr::shutdown_watcher() => {
                    info!("received cancellation request");
-                    return;
+                return;
                },
                tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
                    ControlFlow::Break(()) => return,
                    ControlFlow::Continue(tenant) => tenant,
                },
-            };
+        };

-            let period = tenant.get_compaction_period();
-
-            // TODO: we shouldn't need to await to find tenant and this could be moved outside of
-            // loop, #3501. There are also additional "allowed_errors" in tests.
-            if first {
-                first = false;
-                if random_init_delay(period, &cancel).await.is_err() {
-                    break;
-                }
-            }
-
-            let started_at = Instant::now();
-
-            let sleep_duration = if period == Duration::ZERO {
+            let mut sleep_duration = tenant.get_compaction_period();
+            if sleep_duration == Duration::ZERO {
                info!("automatic compaction is disabled");
                // check again in 10 seconds, in case it's been enabled again.
-                Duration::from_secs(10)
+                sleep_duration = Duration::from_secs(10);
            } else {
                // Run compaction
                if let Err(e) = tenant.compaction_iteration(&ctx).await {
-                    error!("Compaction failed, retrying in {:?}: {e:?}", wait_duration);
-                    wait_duration
-                } else {
-                    period
+                    sleep_duration = wait_duration;
+                    error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
                }
-            };
-
-            warn_when_period_overrun(started_at.elapsed(), period, "compaction");
+            }

            // Sleep
            tokio::select! {
-                _ = cancel.cancelled() => {
+                _ = task_mgr::shutdown_watcher() => {
                    info!("received cancellation request during idling");
                    break;
                },
@@ -124,16 +105,14 @@ async fn gc_loop(tenant_id: TenantId) {
    info!("starting");
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
-        let cancel = task_mgr::shutdown_token();
        // GC might require downloading, to find the cutoff LSN that corresponds to the
        // cutoff specified as time.
        let ctx = RequestContext::todo_child(TaskKind::GarbageCollector, DownloadBehavior::Download);
-        let mut first = true;
        loop {
            trace!("waking up");

            let tenant = tokio::select! {
-                _ = cancel.cancelled() => {
+                _ = task_mgr::shutdown_watcher() => {
                    info!("received cancellation request");
                    return;
                },
@@ -143,38 +122,27 @@ async fn gc_loop(tenant_id: TenantId) {
                },
            };

-            let period = tenant.get_gc_period();
-
-            if first {
-                first = false;
-                if random_init_delay(period, &cancel).await.is_err() {
-                    break;
+            let gc_period = tenant.get_gc_period();
+            let gc_horizon = tenant.get_gc_horizon();
+            let mut sleep_duration = gc_period;
+            if sleep_duration == Duration::ZERO {
+                info!("automatic GC is disabled");
+                // check again in 10 seconds, in case it's been enabled again.
+                sleep_duration = Duration::from_secs(10);
+            } else {
+                // Run gc
+                if gc_horizon > 0 {
+                    if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx).await
+                    {
+                        sleep_duration = wait_duration;
+                        error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
+                    }
                }
            }

-            let started_at = Instant::now();
-
-            let gc_horizon = tenant.get_gc_horizon();
-            let sleep_duration = if period == Duration::ZERO || gc_horizon == 0 {
-                info!("automatic GC is disabled");
-                // check again in 10 seconds, in case it's been enabled again.
-                Duration::from_secs(10)
-            } else {
-                // Run gc
-                let res = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &ctx).await;
-                if let Err(e) = res {
-                    error!("Gc failed, retrying in {:?}: {e:?}", wait_duration);
-                    wait_duration
-                } else {
-                    period
-                }
-            };
-
-            warn_when_period_overrun(started_at.elapsed(), period, "gc");
-
            // Sleep
            tokio::select! {
-                _ = cancel.cancelled() => {
+                _ = task_mgr::shutdown_watcher() => {
                    info!("received cancellation request during idling");
                    break;
                },
@@ -229,49 +197,3 @@ async fn wait_for_active_tenant(
        }
    }
 }
-
-#[derive(thiserror::Error, Debug)]
-#[error("cancelled")]
-pub(crate) struct Cancelled;
-
-/// Provide a random delay for background task initialization.
-///
-/// This delay prevents a thundering herd of background tasks and will likely keep them running on
-/// different periods for more stable load.
-pub(crate) async fn random_init_delay(
-    period: Duration,
-    cancel: &CancellationToken,
-) -> Result<(), Cancelled> {
-    use rand::Rng;
-
-    let d = {
-        let mut rng = rand::thread_rng();
-
-        // gen_range asserts that the range cannot be empty, which it could be because period can
-        // be set to zero to disable gc or compaction, so lets set it to be at least 10s.
-        let period = std::cmp::max(period, Duration::from_secs(10));
-
-        // semi-ok default as the source of jitter
-        rng.gen_range(Duration::ZERO..=period)
-    };
-
-    tokio::select! {
-        _ = cancel.cancelled() => Err(Cancelled),
-        _ = tokio::time::sleep(d) => Ok(()),
-    }
-}
-
-pub(crate) fn warn_when_period_overrun(elapsed: Duration, period: Duration, task: &str) {
-    // Duration::ZERO will happen because it's the "disable [bgtask]" value.
-    if elapsed >= period && period != Duration::ZERO {
-        // humantime does no significant digits clamping whereas Duration's debug is a bit more
-        // intelligent. however it makes sense to keep the "configuration format" for period, even
-        // though there's no way to output the actual config value.
-        warn!(
-            ?elapsed,
-            period = %humantime::format_duration(period),
-            task,
-            "task iteration took longer than the configured period"
-        );
-    }
-}
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1,6 +1,5 @@
 //!

-mod eviction_task;
 mod walreceiver;

 use anyhow::{anyhow, bail, ensure, Context};
@@ -19,7 +18,6 @@ use tracing::*;
 use utils::id::TenantTimelineId;

 use std::cmp::{max, min, Ordering};
-use std::collections::BinaryHeap;
 use std::collections::HashMap;
 use std::fs;
 use std::ops::{Deref, Range};
@@ -49,7 +47,7 @@ use crate::metrics::TimelineMetrics;
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
-use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
+use crate::tenant::config::TenantConfOpt;
 use pageserver_api::reltag::RelTag;

 use postgres_connection::PgConnectionConfig;
@@ -83,25 +81,6 @@ enum FlushLoopState {
    Exited,
 }

-/// Wrapper for key range to provide reverse ordering by range length for BinaryHeap
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct Hole {
-    key_range: Range<Key>,
-    coverage_size: usize,
-}
-
-impl Ord for Hole {
-    fn cmp(&self, other: &Self) -> Ordering {
-        other.coverage_size.cmp(&self.coverage_size) // inverse order
-    }
-}
-
-impl PartialOrd for Hole {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
 pub struct Timeline {
    conf: &'static PageServerConf,
    tenant_conf: Arc<RwLock<TenantConfOpt>>,
@@ -312,9 +291,18 @@ impl LogicalSize {
        //                  we change the type.
        match self.initial_logical_size.get() {
            Some(initial_size) => {
-                initial_size.checked_add_signed(size_increment)
-                    .with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
-                    .map(CurrentLogicalSize::Exact)
+                let absolute_size_increment = u64::try_from(
+                    size_increment
+                        .checked_abs()
+                        .with_context(|| format!("Size added after initial {size_increment} is not expected to be i64::MIN"))?,
+                    ).expect("casting nonnegative i64 to u64 should not fail");
+
+                if size_increment < 0 {
+                    initial_size.checked_sub(absolute_size_increment)
+                } else {
+                    initial_size.checked_add(absolute_size_increment)
+                }.with_context(|| format!("Overflow during logical size calculation, initial_size: {initial_size}, size_increment: {size_increment}"))
+                .map(CurrentLogicalSize::Exact)
            }
            None => {
                let non_negative_size_increment = u64::try_from(size_increment).unwrap_or(0);
@@ -633,10 +621,7 @@ impl Timeline {
        self.flush_frozen_layers_and_wait().await
    }

-    /// Outermost timeline compaction operation; downloads needed layers.
    pub async fn compact(&self, ctx: &RequestContext) -> anyhow::Result<()> {
-        const ROUNDS: usize = 2;
-
        let last_record_lsn = self.get_last_record_lsn();

        // Last record Lsn could be zero in case the timeline was just created
@@ -645,86 +630,6 @@ impl Timeline {
            return Ok(());
        }

-        // retry two times to allow first round to find layers which need to be downloaded, then
-        // download them, then retry compaction
-        for round in 0..ROUNDS {
-            // should we error out with the most specific error?
-            let last_round = round == ROUNDS - 1;
-
-            let res = self.compact_inner(ctx).await;
-
-            // If `create_image_layers' or `compact_level0` scheduled any
-            // uploads or deletions, but didn't update the index file yet,
-            // do it now.
-            //
-            // This isn't necessary for correctness, the remote state is
-            // consistent without the uploads and deletions, and we would
-            // update the index file on next flush iteration too. But it
-            // could take a while until that happens.
-            //
-            // Additionally, only do this once before we return from this function.
-            if last_round || res.is_ok() {
-                if let Some(remote_client) = &self.remote_client {
-                    remote_client.schedule_index_upload_for_file_changes()?;
-                }
-            }
-
-            let rls = match res {
-                Ok(()) => return Ok(()),
-                Err(CompactionError::DownloadRequired(rls)) if !last_round => {
-                    // this can be done at most one time before exiting, waiting
-                    rls
-                }
-                Err(CompactionError::DownloadRequired(rls)) => {
-                    anyhow::bail!("Compaction requires downloading multiple times (last was {} layers), possibly battling against eviction", rls.len())
-                }
-                Err(CompactionError::Other(e)) => {
-                    return Err(e);
-                }
-            };
-
-            // this path can be visited in the second round of retrying, if first one found that we
-            // must first download some remote layers
-            let total = rls.len();
-
-            let mut downloads = rls
-                .into_iter()
-                .map(|rl| self.download_remote_layer(rl))
-                .collect::<futures::stream::FuturesUnordered<_>>();
-
-            let mut failed = 0;
-
-            let cancelled = task_mgr::shutdown_watcher();
-            tokio::pin!(cancelled);
-
-            loop {
-                tokio::select! {
-                    _ = &mut cancelled => anyhow::bail!("Cancelled while downloading remote layers"),
-                    res = downloads.next() => {
-                        match res {
-                            Some(Ok(())) => {},
-                            Some(Err(e)) => {
-                                warn!("Downloading remote layer for compaction failed: {e:#}");
-                                failed += 1;
-                            }
-                            None => break,
-                        }
-                    }
-                }
-            }
-
-            if failed != 0 {
-                anyhow::bail!("{failed} out of {total} layers failed to download, retrying later");
-            }
-
-            // if everything downloaded fine, lets try again
-        }
-
-        unreachable!("retry loop exits")
-    }
-
-    /// Compaction which might need to be retried after downloading remote layers.
-    async fn compact_inner(&self, ctx: &RequestContext) -> Result<(), CompactionError> {
        //
        // High level strategy for compaction / image creation:
        //
@@ -763,7 +668,7 @@ impl Timeline {
        // Is the timeline being deleted?
        let state = *self.state.borrow();
        if state == TimelineState::Stopping {
-            return Err(anyhow::anyhow!("timeline is Stopping").into());
+            anyhow::bail!("timeline is Stopping");
        }

        let target_file_size = self.get_checkpoint_distance();
@@ -783,8 +688,7 @@ impl Timeline {
                // "enough".
                let layer_paths_to_upload = self
                    .create_image_layers(&partitioning, lsn, false, ctx)
-                    .await
-                    .map_err(anyhow::Error::from)?;
+                    .await?;
                if let Some(remote_client) = &self.remote_client {
                    for (path, layer_metadata) in layer_paths_to_upload {
                        remote_client.schedule_layer_file_upload(&path, &layer_metadata)?;
@@ -796,6 +700,18 @@ impl Timeline {
                self.compact_level0(&layer_removal_cs, target_file_size, ctx)
                    .await?;
                timer.stop_and_record();
+
+                // If `create_image_layers' or `compact_level0` scheduled any
+                // uploads or deletions, but didn't update the index file yet,
+                // do it now.
+                //
+                // This isn't necessary for correctness, the remote state is
+                // consistent without the uploads and deletions, and we would
+                // update the index file on next flush iteration too. But it
+                // could take a while until that happens.
+                if let Some(remote_client) = &self.remote_client {
+                    remote_client.schedule_index_upload_for_file_changes()?;
+                }
            }
            Err(err) => {
                // no partitioning? This is normal, if the timeline was just created
@@ -885,7 +801,6 @@ impl Timeline {
    pub fn activate(self: &Arc<Self>) {
        self.set_state(TimelineState::Active);
        self.launch_wal_receiver();
-        self.launch_eviction_task();
    }

    pub fn set_state(&self, new_state: TimelineState) {
@@ -952,107 +867,24 @@ impl Timeline {
        Ok(Some(true))
    }

-    /// Like [`evict_layer_batch`], but for just one layer.
-    /// Additional case `Ok(None)` covers the case where the layer could not be found by its `layer_file_name`.
    pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
        let Some(local_layer) = self.find_layer(layer_file_name) else { return Ok(None) };
-        let remote_client = self
-            .remote_client
-            .as_ref()
-            .ok_or_else(|| anyhow::anyhow!("remote storage not configured; cannot evict"))?;
-
-        let cancel = CancellationToken::new();
-        let results = self
-            .evict_layer_batch(remote_client, &[local_layer], cancel)
-            .await?;
-        assert_eq!(results.len(), 1);
-        let result: Option<anyhow::Result<bool>> = results.into_iter().next().unwrap();
-        match result {
-            None => anyhow::bail!("task_mgr shutdown requested"),
-            Some(Ok(b)) => Ok(Some(b)),
-            Some(Err(e)) => Err(e),
+        if local_layer.is_remote_layer() {
+            return Ok(Some(false));
        }
-    }
+        let Some(remote_client) = &self.remote_client else { return Ok(Some(false)) };

-    /// Evict multiple layers at once, continuing through errors.
-    ///
-    /// Try to evict the given `layers_to_evict` by
-    ///
-    /// 1. Replacing the given layer object in the layer map with a corresponding [`RemoteLayer`] object.
-    /// 2. Deleting the now unreferenced layer file from disk.
-    ///
-    /// The `remote_client` should be this timeline's `self.remote_client`.
-    /// We make the caller provide it so that they are responsible for handling the case
-    /// where someone wants to evict the layer but no remote storage is configured.
-    ///
-    /// Returns either `Err()` or `Ok(results)` where `results.len() == layers_to_evict.len()`.
-    /// If `Err()` is returned, no eviction was attempted.
-    /// Each position of `Ok(results)` corresponds to the layer in `layers_to_evict`.
-    /// Meaning of each `result[i]`:
-    /// - `Some(Err(...))` if layer replacement failed for an unexpected reason
-    /// - `Some(Ok(true))` if everything went well.
-    /// - `Some(Ok(false))` if there was an expected reason why the layer could not be replaced, e.g.:
-    ///    - evictee was not yet downloaded
-    ///    - replacement failed for an expectable reason (e.g., layer removed by GC before we grabbed all locks)
-    /// - `None` if no eviction attempt was made for the layer because `cancel.is_cancelled() == true`.
-    async fn evict_layer_batch(
-        &self,
-        remote_client: &Arc<RemoteTimelineClient>,
-        layers_to_evict: &[Arc<dyn PersistentLayer>],
-        cancel: CancellationToken,
-    ) -> anyhow::Result<Vec<Option<anyhow::Result<bool>>>> {
-        // ensure that the layers have finished uploading
-        // (don't hold the layer_removal_cs while we do it, we're not removing anything yet)
+        // ensure the current layer is uploaded for sure
        remote_client
            .wait_completion()
            .await
            .context("wait for layer upload ops to complete")?;

-        // now lock out layer removal (compaction, gc, timeline deletion)
-        let layer_removal_guard = self.layer_removal_cs.lock().await;
-
-        // start the batch update
-        let mut layer_map = self.layers.write().unwrap();
-        let mut batch_updates = layer_map.batch_update();
-
-        let mut results = Vec::with_capacity(layers_to_evict.len());
-
-        for l in layers_to_evict.iter() {
-            let res = if cancel.is_cancelled() {
-                None
-            } else {
-                Some(self.evict_layer_batch_impl(&layer_removal_guard, l, &mut batch_updates))
-            };
-            results.push(res);
-        }
-
-        // commit the updates & release locks
-        batch_updates.flush();
-        drop(layer_map);
-        drop(layer_removal_guard);
-
-        assert_eq!(results.len(), layers_to_evict.len());
-        Ok(results)
-    }
-
-    fn evict_layer_batch_impl(
-        &self,
-        _layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
-        local_layer: &Arc<dyn PersistentLayer>,
-        batch_updates: &mut BatchedUpdates<'_, dyn PersistentLayer>,
-    ) -> anyhow::Result<bool> {
-        use super::layer_map::Replacement;
-
-        if local_layer.is_remote_layer() {
-            return Ok(false);
-        }
-
-        let layer_file_size = local_layer
-            .file_size()
-            .expect("Local layer should have a file size");
-
-        let layer_metadata = LayerFileMetadata::new(layer_file_size);
-
+        let layer_metadata = LayerFileMetadata::new(
+            local_layer
+                .file_size()
+                .expect("Local layer should have a file size"),
+        );
        let new_remote_layer = Arc::new(match local_layer.filename() {
            LayerFileName::Image(image_name) => RemoteLayer::new_img(
                self.tenant_id,
@@ -1074,45 +906,16 @@ impl Timeline {
            ),
        });

-        let replaced = match batch_updates.replace_historic(local_layer, new_remote_layer)? {
-            Replacement::Replaced { .. } => {
-                if let Err(e) = local_layer.delete_resident_layer_file() {
-                    error!("failed to remove layer file on evict after replacement: {e:#?}");
-                }
-                // Always decrement the physical size gauge, even if we failed to delete the file.
-                // Rationale: we already replaced the layer with a remote layer in the layer map,
-                // and any subsequent download_remote_layer will
-                // 1. overwrite the file on disk and
-                // 2. add the downloaded size to the resident size gauge.
-                //
-                // If there is no re-download, and we restart the pageserver, then load_layer_map
-                // will treat the file as a local layer again, count it towards resident size,
-                // and it'll be like the layer removal never happened.
-                // The bump in resident size is perhaps unexpected but overall a robust behavior.
-                self.metrics
-                    .resident_physical_size_gauge
-                    .sub(layer_file_size);
+        let gc_lock = self.layer_removal_cs.lock().await;
+        let mut layers = self.layers.write().unwrap();
+        let mut updates = layers.batch_update();
+        self.delete_historic_layer(&gc_lock, local_layer, &mut updates)?;
+        updates.insert_historic(new_remote_layer);
+        updates.flush();
+        drop(layers);
+        drop(gc_lock);

-                true
-            }
-            Replacement::NotFound => {
-                debug!(evicted=?local_layer, "layer was no longer in layer map");
-                false
-            }
-            Replacement::RemovalBuffered => {
-                unreachable!("not doing anything else in this batch")
-            }
-            Replacement::Unexpected(other) => {
-                error!(
-                    local_layer.ptr=?Arc::as_ptr(local_layer),
-                    other.ptr=?Arc::as_ptr(&other),
-                    ?other,
-                    "failed to replace");
-                false
-            }
-        };
-
-        Ok(replaced)
+        Ok(Some(true))
    }
 }

@@ -1153,13 +956,6 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.image_creation_threshold)
    }

-    fn get_eviction_policy(&self) -> EvictionPolicy {
-        let tenant_conf = self.tenant_conf.read().unwrap();
-        tenant_conf
-            .eviction_policy
-            .unwrap_or(self.conf.default_tenant_conf.eviction_policy)
-    }
-
    /// Open a Timeline handle.
    ///
    /// Loads the metadata for the timeline into memory, but not the layer map.
@@ -1716,31 +1512,13 @@ impl Timeline {
                    }
                    x @ Err(_) => x.context("Failed to calculate logical size")?,
                };
-
-                // we cannot query current_logical_size.current_size() to know the current
-                // *negative* value, only truncated to u64.
-                let added = self_clone
-                    .current_logical_size
-                    .size_added_after_initial
-                    .load(AtomicOrdering::Relaxed);
-
-                let sum = calculated_size.saturating_add_signed(added);
-
-                // set the gauge value before it can be set in `update_current_logical_size`.
-                self_clone.metrics.current_logical_size_gauge.set(sum);
-
                match self_clone
                    .current_logical_size
                    .initial_logical_size
                    .set(calculated_size)
                {
                    Ok(()) => (),
-                    Err(_what_we_just_attempted_to_set) => {
-                        let existing_size = self_clone
-                            .current_logical_size
-                            .initial_logical_size
-                            .get()
-                            .expect("once_cell set was lost, then get failed, impossible.");
+                    Err(existing_size) => {
                        // This shouldn't happen because the semaphore is initialized with 1.
                        // But if it happens, just complain & report success so there are no further retries.
                        error!("Tried to update initial timeline size value to {calculated_size}, but the size was already set to {existing_size}, not changing")
@@ -1798,9 +1576,15 @@ impl Timeline {
        let calculation = async {
            let cancel = cancel.child_token();
            let ctx = ctx.attached_child();
-            self_calculation
-                .calculate_logical_size(init_lsn, cancel, &ctx)
-                .await
+            tokio::task::spawn_blocking(move || {
+                // Run in a separate thread since this can do a lot of
+                // synchronous file IO without .await inbetween
+                // if there are no RemoteLayers that would require downloading.
+                let h = tokio::runtime::Handle::current();
+                h.block_on(self_calculation.calculate_logical_size(init_lsn, cancel, &ctx))
+            })
+            .await
+            .context("Failed to spawn calculation result task")?
        };
        let timeline_state_cancellation = async {
            loop {
@@ -1833,7 +1617,7 @@ impl Timeline {
        tokio::pin!(calculation);
        loop {
            tokio::select! {
-                res = &mut calculation => { return res }
+                res = &mut calculation =>  { return res }
                reason = timeline_state_cancellation => {
                    debug!(reason = reason, "cancelling calculation");
                    cancel.cancel();
@@ -1917,15 +1701,10 @@ impl Timeline {
        // one value while current_logical_size is set to the
        // other.
        match logical_size.current_size() {
-            Ok(CurrentLogicalSize::Exact(new_current_size)) => self
+            Ok(new_current_size) => self
                .metrics
                .current_logical_size_gauge
-                .set(new_current_size),
-            Ok(CurrentLogicalSize::Approximate(_)) => {
-                // don't update the gauge yet, this allows us not to update the gauge back and
-                // forth between the initial size calculation task.
-            }
-            // this is overflow
+                .set(new_current_size.size()),
            Err(e) => error!("Failed to compute current logical size for metrics update: {e:?}"),
        }
    }
@@ -1950,14 +1729,11 @@ impl Timeline {
        layer: Arc<dyn PersistentLayer>,
        updates: &mut BatchedUpdates<'_, dyn PersistentLayer>,
    ) -> anyhow::Result<()> {
-        if !layer.is_remote_layer() {
-            layer.delete_resident_layer_file()?;
-            let layer_file_size = layer
-                .file_size()
-                .expect("Local layer should have a file size");
-            self.metrics
-                .resident_physical_size_gauge
-                .sub(layer_file_size);
+        let layer_size = layer.file_size();
+
+        layer.delete()?;
+        if let Some(layer_size) = layer_size {
+            self.metrics.resident_physical_size_gauge.sub(layer_size);
        }

        // TODO Removing from the bottom of the layer map is expensive.
@@ -2497,7 +2273,7 @@ impl Timeline {
            // Only one thread may call this function at a time (for this
            // timeline). If two threads tried to flush the same frozen
            // layer to disk at the same time, that would not work.
-            assert!(LayerMap::compare_arced_layers(&l.unwrap(), &frozen_layer));
+            assert!(Arc::ptr_eq(&l.unwrap(), &frozen_layer));

            // release lock on 'layers'
        }
@@ -2633,13 +2409,10 @@ impl Timeline {
    ) -> anyhow::Result<(KeyPartitioning, Lsn)> {
        {
            let partitioning_guard = self.partitioning.lock().unwrap();
-            let distance = lsn.0 - partitioning_guard.1 .0;
-            if partitioning_guard.1 != Lsn(0) && distance <= self.repartition_threshold {
-                debug!(
-                    distance,
-                    threshold = self.repartition_threshold,
-                    "no repartitioning needed"
-                );
+            if partitioning_guard.1 != Lsn(0)
+                && lsn.0 - partitioning_guard.1 .0 <= self.repartition_threshold
+            {
+                // no repartitioning needed
                return Ok((partitioning_guard.0.clone(), partitioning_guard.1));
            }
        }
@@ -2657,12 +2430,8 @@ impl Timeline {

    // Is it time to create a new image layer for the given partition?
    fn time_for_new_image_layer(&self, partition: &KeySpace, lsn: Lsn) -> anyhow::Result<bool> {
-        let threshold = self.get_image_creation_threshold();
-
        let layers = self.layers.read().unwrap();

-        let mut max_deltas = 0;
-
        for part_range in &partition.ranges {
            let image_coverage = layers.image_coverage(part_range, lsn)?;
            for (img_range, last_img) in image_coverage {
@@ -2684,25 +2453,21 @@ impl Timeline {
                // are some delta layers *later* than current 'lsn', if more WAL was processed and flushed
                // after we read last_record_lsn, which is passed here in the 'lsn' argument.
                if img_lsn < lsn {
+                    let threshold = self.get_image_creation_threshold();
                    let num_deltas =
                        layers.count_deltas(&img_range, &(img_lsn..lsn), Some(threshold))?;

-                    max_deltas = max_deltas.max(num_deltas);
+                    debug!(
+                        "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
+                        img_range.start, img_range.end, num_deltas, img_lsn, lsn
+                    );
                    if num_deltas >= threshold {
-                        debug!(
-                            "key range {}-{}, has {} deltas on this timeline in LSN range {}..{}",
-                            img_range.start, img_range.end, num_deltas, img_lsn, lsn
-                        );
                        return Ok(true);
                    }
                }
            }
        }

-        debug!(
-            max_deltas,
-            "none of the partitioned ranges had >= {threshold} deltas"
-        );
        Ok(false)
    }

@@ -2815,55 +2580,25 @@ impl Timeline {
        Ok(layer_paths_to_upload)
    }
 }
-
 #[derive(Default)]
 struct CompactLevel0Phase1Result {
    new_layers: Vec<DeltaLayer>,
    deltas_to_compact: Vec<Arc<dyn PersistentLayer>>,
 }

-/// Top-level failure to compact.
-#[derive(Debug)]
-enum CompactionError {
-    /// L0 compaction requires layers to be downloaded.
-    ///
-    /// This should not happen repeatedly, but will be retried once by top-level
-    /// `Timeline::compact`.
-    DownloadRequired(Vec<Arc<RemoteLayer>>),
-    /// Compaction cannot be done right now; page reconstruction and so on.
-    Other(anyhow::Error),
-}
-
-impl From<anyhow::Error> for CompactionError {
-    fn from(value: anyhow::Error) -> Self {
-        CompactionError::Other(value)
-    }
-}
-
 impl Timeline {
-    /// Level0 files first phase of compaction, explained in the [`compact_inner`] comment.
-    ///
-    /// This method takes the `_layer_removal_cs` guard to highlight it required downloads are
-    /// returned as an error. If the `layer_removal_cs` boundary is changed not to be taken in the
-    /// start of level0 files compaction, the on-demand download should be revisited as well.
    async fn compact_level0_phase1(
        &self,
-        _layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
        target_file_size: u64,
        ctx: &RequestContext,
-    ) -> Result<CompactLevel0Phase1Result, CompactionError> {
+    ) -> anyhow::Result<CompactLevel0Phase1Result> {
        let layers = self.layers.read().unwrap();
        let mut level0_deltas = layers.get_level0_deltas()?;
        drop(layers);

        // Only compact if enough layers have accumulated.
-        let threshold = self.get_compaction_threshold();
-        if level0_deltas.is_empty() || level0_deltas.len() < threshold {
-            debug!(
-                level0_deltas = level0_deltas.len(),
-                threshold, "too few deltas to compact"
-            );
-            return Ok(CompactLevel0Phase1Result::default());
+        if level0_deltas.is_empty() || level0_deltas.len() < self.get_compaction_threshold() {
+            return Ok(Default::default());
        }

        // Gather the files to compact in this iteration.
@@ -2899,24 +2634,6 @@ impl Timeline {
            end: deltas_to_compact.last().unwrap().get_lsn_range().end,
        };

-        let remotes = deltas_to_compact
-            .iter()
-            .filter(|l| l.is_remote_layer())
-            .inspect(|l| info!("compact requires download of {}", l.filename().file_name()))
-            .map(|l| {
-                l.clone()
-                    .downcast_remote_layer()
-                    .expect("just checked it is remote layer")
-            })
-            .collect::<Vec<_>>();
-
-        if !remotes.is_empty() {
-            // caller is holding the lock to layer_removal_cs, and we don't want to download while
-            // holding that; in future download_remote_layer might take it as well. this is
-            // regardless of earlier image creation downloading on-demand, while holding the lock.
-            return Err(CompactionError::DownloadRequired(remotes));
-        }
-
        info!(
            "Starting Level0 compaction in LSN range {}-{} for {} layers ({} deltas in total)",
            lsn_range.start,
@@ -2924,11 +2641,9 @@ impl Timeline {
            deltas_to_compact.len(),
            level0_deltas.len()
        );
-
        for l in deltas_to_compact.iter() {
            info!("compact includes {}", l.filename().file_name());
        }
-
        // We don't need the original list of layers anymore. Drop it so that
        // we don't accidentally use it later in the function.
        drop(level0_deltas);
@@ -2972,47 +2687,6 @@ impl Timeline {
            },
        )?;

-        // Determine N largest holes where N is number of compacted layers.
-        let max_holes = deltas_to_compact.len();
-        let last_record_lsn = self.get_last_record_lsn();
-        let layers = self.layers.read().unwrap(); // Is'n it better to hold original layers lock till here?
-        let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128;
-        let min_hole_coverage_size = 3; // TODO: something more flexible?
-
-        // min-heap (reserve space for one more element added before eviction)
-        let mut heap: BinaryHeap<Hole> = BinaryHeap::with_capacity(max_holes + 1);
-        let mut prev: Option<Key> = None;
-        for (next_key, _next_lsn, _size) in itertools::process_results(
-            deltas_to_compact.iter().map(|l| l.key_iter(ctx)),
-            |iter_iter| iter_iter.kmerge_by(|a, b| a.0 <= b.0),
-        )? {
-            if let Some(prev_key) = prev {
-                // just first fast filter
-                if next_key.to_i128() - prev_key.to_i128() >= min_hole_range {
-                    let key_range = prev_key..next_key;
-                    // Measuring hole by just subtraction of i128 representation of key range boundaries
-                    // has not so much sense, because largest holes will corresponds field1/field2 changes.
-                    // But we are mostly interested to eliminate holes which cause generation of excessive image layers.
-                    // That is why it is better to measure size of hole as number of covering image layers.
-                    let coverage_size = layers.image_coverage(&key_range, last_record_lsn)?.len();
-                    if coverage_size >= min_hole_coverage_size {
-                        heap.push(Hole {
-                            key_range,
-                            coverage_size,
-                        });
-                        if heap.len() > max_holes {
-                            heap.pop(); // remove smallest hole
-                        }
-                    }
-                }
-            }
-            prev = Some(next_key.next());
-        }
-        drop(layers);
-        let mut holes = heap.into_vec();
-        holes.sort_unstable_by_key(|hole| hole.key_range.start);
-        let mut next_hole = 0; // index of next hole in holes vector
-
        // Merge the contents of all the input delta layers into a new set
        // of delta layers, based on the current partitioning.
        //
@@ -3107,22 +2781,14 @@ impl Timeline {
                }
                if writer.is_some() {
                    let written_size = writer.as_mut().unwrap().size();
-                    let contains_hole =
-                        next_hole < holes.len() && key >= holes[next_hole].key_range.end;
-                    // check if key cause layer overflow or contains hole...
+                    // check if key cause layer overflow...
                    if is_dup_layer
                        || dup_end_lsn.is_valid()
                        || written_size + key_values_total_size > target_file_size
-                        || contains_hole
                    {
                        // ... if so, flush previous layer and prepare to write new one
                        new_layers.push(writer.take().unwrap().finish(prev_key.unwrap().next())?);
                        writer = None;
-
-                        if contains_hole {
-                            // skip hole
-                            next_hole += 1;
-                        }
                    }
                }
                // Remember size of key value because at next iteration we will access next item
@@ -3147,9 +2813,7 @@ impl Timeline {
            }

            fail_point!("delta-layer-writer-fail-before-finish", |_| {
-                return Err(
-                    anyhow::anyhow!("failpoint delta-layer-writer-fail-before-finish").into(),
-                );
+                anyhow::bail!("failpoint delta-layer-writer-fail-before-finish");
            });

            writer.as_mut().unwrap().put_value(key, lsn, value)?;
@@ -3168,7 +2832,7 @@ impl Timeline {

            // Fsync all the layer files and directory using multiple threads to
            // minimize latency.
-            par_fsync::par_fsync(&layer_paths).context("fsync all new layers")?;
+            par_fsync::par_fsync(&layer_paths)?;

            layer_paths.pop().unwrap();
        }
@@ -3190,13 +2854,11 @@ impl Timeline {
        layer_removal_cs: &tokio::sync::MutexGuard<'_, ()>,
        target_file_size: u64,
        ctx: &RequestContext,
-    ) -> Result<(), CompactionError> {
+    ) -> anyhow::Result<()> {
        let CompactLevel0Phase1Result {
            new_layers,
            deltas_to_compact,
-        } = self
-            .compact_level0_phase1(layer_removal_cs, target_file_size, ctx)
-            .await?;
+        } = self.compact_level0_phase1(target_file_size, ctx).await?;

        if new_layers.is_empty() && deltas_to_compact.is_empty() {
            // nothing to do
@@ -3220,12 +2882,7 @@ impl Timeline {
        for l in new_layers {
            let new_delta_path = l.path();

-            let metadata = new_delta_path.metadata().with_context(|| {
-                format!(
-                    "read file metadata for new created layer {}",
-                    new_delta_path.display()
-                )
-            })?;
+            let metadata = new_delta_path.metadata()?;

            if let Some(remote_client) = &self.remote_client {
                remote_client.schedule_layer_file_upload(
@@ -3459,7 +3116,7 @@ impl Timeline {

        let mut layers_to_remove = Vec::new();

-        // Scan all layers in the timeline (remote or on-disk).
+        // Scan all on-disk layers in the timeline.
        //
        // Garbage collect the layer if all conditions are satisfied:
        // 1. it is older than cutoff LSN;
@@ -3698,26 +3355,14 @@ impl Timeline {
        &self,
        remote_layer: Arc<RemoteLayer>,
    ) -> anyhow::Result<()> {
-        use std::sync::atomic::Ordering::Relaxed;
-
        let permit = match Arc::clone(&remote_layer.ongoing_download)
            .acquire_owned()
            .await
        {
            Ok(permit) => permit,
            Err(_closed) => {
-                if remote_layer.download_replacement_failure.load(Relaxed) {
-                    // this path will be hit often, in case there are upper retries. however
-                    // hitting this error will prevent a busy loop between get_reconstruct_data and
-                    // download, so an error is prefered.
-                    //
-                    // TODO: we really should poison the timeline, but panicking is not yet
-                    // supported. Related: https://github.com/neondatabase/neon/issues/3621
-                    anyhow::bail!("an earlier download succeeded but LayerMap::replace failed")
-                } else {
-                    info!("download of layer has already finished");
-                    return Ok(());
-                }
+                info!("download of layer has already finished");
+                return Ok(());
            }
        };

@@ -3749,12 +3394,11 @@ impl Timeline {
                    // Delta- or ImageLayer in the layer map.
                    let new_layer = remote_layer.create_downloaded_layer(self_clone.conf, *size);
                    let mut layers = self_clone.layers.write().unwrap();
-                    let mut updates = layers.batch_update();
                    {
                        use crate::tenant::layer_map::Replacement;
                        let l: Arc<dyn PersistentLayer> = remote_layer.clone();
-                        let failure = match updates.replace_historic(&l, new_layer) {
-                            Ok(Replacement::Replaced { .. }) => false,
+                        match layers.replace_historic(&l, new_layer) {
+                            Ok(Replacement::Replaced { .. }) => { /* expected */ }
                            Ok(Replacement::NotFound) => {
                                // TODO: the downloaded file should probably be removed, otherwise
                                // it will be added to the layermap on next load? we should
@@ -3762,7 +3406,6 @@ impl Timeline {
                                //
                                // See: https://github.com/neondatabase/neon/issues/3533
                                error!("replacing downloaded layer into layermap failed because layer was not found");
-                                true
                            }
                            Ok(Replacement::RemovalBuffered) => {
                                unreachable!("current implementation does not remove anything")
@@ -3778,38 +3421,16 @@ impl Timeline {
                                error!(
                                    expected.ptr = ?Arc::as_ptr(&l),
                                    other.ptr = ?Arc::as_ptr(&other),
-                                    ?other,
                                    "replacing downloaded layer into layermap failed because another layer was found instead of expected"
                                );
-                                true
                            }
                            Err(e) => {
                                // this is a precondition failure, the layer filename derived
                                // attributes didn't match up, which doesn't seem likely.
-                                error!("replacing downloaded layer into layermap failed: {e:#?}");
-                                true
+                                error!("replacing downloaded layer into layermap failed: {e:#?}")
                            }
-                        };
-
-                        if failure {
-                            // mark the remote layer permanently failed; the timeline is most
-                            // likely unusable after this. sadly we cannot just poison the layermap
-                            // lock with panic, because that would create an issue with shutdown.
-                            //
-                            // this does not change the retry semantics on failed downloads.
-                            //
-                            // use of Relaxed is valid because closing of the semaphore gives
-                            // happens-before and wakes up any waiters; we write this value before
-                            // and any waiters (or would be waiters) will load it after closing
-                            // semaphore.
-                            //
-                            // See: https://github.com/neondatabase/neon/issues/3533
-                            remote_layer
-                                .download_replacement_failure
-                                .store(true, Relaxed);
                        }
                    }
-                    updates.flush();
                    drop(layers);

                    // Now that we've inserted the download into the layer map,
@@ -3819,7 +3440,6 @@ impl Timeline {
                    remote_layer.ongoing_download.close();
                } else {
                    // Keep semaphore open. We'll drop the permit at the end of the function.
-                    info!("on-demand download failed: {:?}", result.as_ref().unwrap_err());
                }

                // Don't treat it as an error if the task that triggered the download
@@ -3833,7 +3453,7 @@ impl Timeline {
                drop(permit);

                Ok(())
-            }.in_current_span(),
+            },
        );

        receiver.await.context("download task cancelled")?
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -1,219 +0,0 @@
-//! The per-timeline layer eviction task.
-
-use std::{
-    ops::ControlFlow,
-    sync::Arc,
-    time::{Duration, SystemTime},
-};
-
-use either::Either;
-use tokio::time::Instant;
-use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, instrument, warn};
-
-use crate::{
-    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
-    tenant::{
-        config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
-        storage_layer::PersistentLayer,
-    },
-};
-
-use super::Timeline;
-
-impl Timeline {
-    pub(super) fn launch_eviction_task(self: &Arc<Self>) {
-        let self_clone = Arc::clone(self);
-        task_mgr::spawn(
-            BACKGROUND_RUNTIME.handle(),
-            TaskKind::Eviction,
-            Some(self.tenant_id),
-            Some(self.timeline_id),
-            &format!("layer eviction for {}/{}", self.tenant_id, self.timeline_id),
-            false,
-            async move {
-                self_clone.eviction_task(task_mgr::shutdown_token()).await;
-                info!("eviction task finishing");
-                Ok(())
-            },
-        );
-    }
-
-    #[instrument(skip_all, fields(tenant_id = %self.tenant_id, timeline_id = %self.timeline_id))]
-    async fn eviction_task(self: Arc<Self>, cancel: CancellationToken) {
-        use crate::tenant::tasks::random_init_delay;
-        {
-            let policy = self.get_eviction_policy();
-            let period = match policy {
-                EvictionPolicy::LayerAccessThreshold(lat) => lat.period,
-                EvictionPolicy::NoEviction => Duration::from_secs(10),
-            };
-            if random_init_delay(period, &cancel).await.is_err() {
-                info!("shutting down");
-                return;
-            }
-        }
-
-        loop {
-            let policy = self.get_eviction_policy();
-            let cf = self.eviction_iteration(&policy, cancel.clone()).await;
-
-            match cf {
-                ControlFlow::Break(()) => break,
-                ControlFlow::Continue(sleep_until) => {
-                    tokio::select! {
-                        _ = cancel.cancelled() => {
-                            info!("shutting down");
-                            break;
-                        }
-                        _ = tokio::time::sleep_until(sleep_until) => { }
-                    }
-                }
-            }
-        }
-    }
-
-    #[instrument(skip_all, fields(policy_kind = policy.discriminant_str()))]
-    async fn eviction_iteration(
-        self: &Arc<Self>,
-        policy: &EvictionPolicy,
-        cancel: CancellationToken,
-    ) -> ControlFlow<(), Instant> {
-        debug!("eviction iteration: {policy:?}");
-        match policy {
-            EvictionPolicy::NoEviction => {
-                // check again in 10 seconds; XXX config watch mechanism
-                ControlFlow::Continue(Instant::now() + Duration::from_secs(10))
-            }
-            EvictionPolicy::LayerAccessThreshold(p) => {
-                let start = Instant::now();
-                match self.eviction_iteration_threshold(p, cancel).await {
-                    ControlFlow::Break(()) => return ControlFlow::Break(()),
-                    ControlFlow::Continue(()) => (),
-                }
-                let elapsed = start.elapsed();
-                crate::tenant::tasks::warn_when_period_overrun(elapsed, p.period, "eviction");
-                ControlFlow::Continue(start + p.period)
-            }
-        }
-    }
-
-    async fn eviction_iteration_threshold(
-        self: &Arc<Self>,
-        p: &EvictionPolicyLayerAccessThreshold,
-        cancel: CancellationToken,
-    ) -> ControlFlow<()> {
-        let now = SystemTime::now();
-
-        #[allow(dead_code)]
-        #[derive(Debug, Default)]
-        struct EvictionStats {
-            candidates: usize,
-            evicted: usize,
-            errors: usize,
-            not_evictable: usize,
-            skipped_for_shutdown: usize,
-        }
-        let mut stats = EvictionStats::default();
-        // Gather layers for eviction.
-        // NB: all the checks can be invalidated as soon as we release the layer map lock.
-        // We don't want to hold the layer map lock during eviction.
-        // So, we just need to deal with this.
-        let candidates: Vec<Arc<dyn PersistentLayer>> = {
-            let layers = self.layers.read().unwrap();
-            let mut candidates = Vec::new();
-            for hist_layer in layers.iter_historic_layers() {
-                if hist_layer.is_remote_layer() {
-                    continue;
-                }
-                let last_activity_ts = match hist_layer
-                    .access_stats()
-                    .most_recent_access_or_residence_event()
-                {
-                    Either::Left(mra) => mra.when,
-                    Either::Right(re) => re.timestamp,
-                };
-                let no_activity_for = match now.duration_since(last_activity_ts) {
-                    Ok(d) => d,
-                    Err(_e) => {
-                        // We reach here if `now` < `last_activity_ts`, which can legitimately
-                        // happen if there is an access between us getting `now`, and us getting
-                        // the access stats from the layer.
-                        //
-                        // The other reason why it can happen is system clock skew because
-                        // SystemTime::now() is not monotonic, so, even if there is no access
-                        // to the layer after we get `now` at the beginning of this function,
-                        // it could be that `now`  < `last_activity_ts`.
-                        //
-                        // To distinguish the cases, we would need to record `Instant`s in the
-                        // access stats (i.e., monotonic timestamps), but then, the timestamps
-                        // values in the access stats would need to be `Instant`'s, and hence
-                        // they would be meaningless outside of the pageserver process.
-                        // At the time of writing, the trade-off is that access stats are more
-                        // valuable than detecting clock skew.
-                        continue;
-                    }
-                };
-                if no_activity_for > p.threshold {
-                    candidates.push(hist_layer)
-                }
-            }
-            candidates
-        };
-        stats.candidates = candidates.len();
-
-        let remote_client = match self.remote_client.as_ref() {
-            None => {
-                error!(
-                    num_candidates = candidates.len(),
-                    "no remote storage configured, cannot evict layers"
-                );
-                return ControlFlow::Continue(());
-            }
-            Some(c) => c,
-        };
-
-        let results = match self
-            .evict_layer_batch(remote_client, &candidates[..], cancel)
-            .await
-        {
-            Err(pre_err) => {
-                stats.errors += candidates.len();
-                error!("could not do any evictions: {pre_err:#}");
-                return ControlFlow::Continue(());
-            }
-            Ok(results) => results,
-        };
-        assert_eq!(results.len(), candidates.len());
-        for (l, result) in candidates.iter().zip(results) {
-            match result {
-                None => {
-                    stats.skipped_for_shutdown += 1;
-                }
-                Some(Ok(true)) => {
-                    debug!("evicted layer {l:?}");
-                    stats.evicted += 1;
-                }
-                Some(Ok(false)) => {
-                    debug!("layer is not evictable: {l:?}");
-                    stats.not_evictable += 1;
-                }
-                Some(Err(e)) => {
-                    // This variant is the case where an unexpected error happened during eviction.
-                    // Expected errors that result in non-eviction are `Some(Ok(false))`.
-                    // So, dump Debug here to gather as much info as possible in this rare case.
-                    warn!("failed to evict layer {l:?}: {e:?}");
-                    stats.errors += 1;
-                }
-            }
-        }
-        if stats.candidates == stats.not_evictable {
-            debug!(stats=?stats, "eviction iteration complete");
-        } else if stats.errors > 0 || stats.not_evictable > 0 {
-            warn!(stats=?stats, "eviction iteration complete");
-        } else {
-            info!(stats=?stats, "eviction iteration complete");
-        }
-        ControlFlow::Continue(())
-    }
-}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Bojan Serafimov	c471c25744	Clone less	2023-02-06 14:42:17 -05:00
Bojan Serafimov	e030830397	WIP	2023-02-06 13:55:53 -05:00