RFC about read-only replica

2026-02-13 07:30:38 +00:00 · 2023-02-02 17:06:26 +02:00
173 changed files with 5630 additions and 10357 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -15,10 +15,8 @@
 !proxy/
 !safekeeper/
 !storage_broker/
-!trace/
 !vendor/postgres-v14/
 !vendor/postgres-v15/
 !workspace_hack/
 !neon_local/
 !scripts/ninstall.sh
-!vm-cgconfig.conf
--- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-ap-southeast-1
    bucket_region: ap-southeast-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.epsilon.ap-southeast-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -32,7 +32,7 @@ storage:
      hosts:
        safekeeper-0.ap-southeast-1.aws.neon.tech:
          ansible_host:  i-0d6f1dc5161eef894
+        safekeeper-1.ap-southeast-1.aws.neon.tech:
+          ansible_host:  i-0e338adda8eb2d19f
        safekeeper-2.ap-southeast-1.aws.neon.tech:
          ansible_host:  i-04fb63634e4679eb9
-        safekeeper-3.ap-southeast-1.aws.neon.tech:
-          ansible_host:  i-05481f3bc88cfc2d4
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-eu-central-1
    bucket_region: eu-central-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.gamma.eu-central-1.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-east-2
    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.delta.us-east-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -27,8 +27,6 @@ storage:
          ansible_host:  i-062227ba7f119eb8c
        pageserver-1.us-east-2.aws.neon.tech:
          ansible_host:  i-0b3ec0afab5968938
-        pageserver-2.us-east-2.aws.neon.tech:
-          ansible_host:  i-0d7a1c4325e71421d

    safekeepers:
      hosts:
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -2,11 +2,11 @@ storage:
  vars:
    bucket_name: neon-prod-storage-us-west-2
    bucket_region: us-west-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    console_mgmt_base_url: http://console-release.local
    broker_endpoint: http://storage-broker-lb.eta.us-west-2.internal.aws.neon.tech:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
      remote_storage:
        bucket_name: "{{ bucket_name }}"
@@ -29,8 +29,6 @@ storage:
          ansible_host: i-0c834be1dddba8b3f
        pageserver-2.us-west-2.aws.neon.tech:
          ansible_host: i-051642d372c0a4f32
-        pageserver-3.us-west-2.aws.neon.tech:
-          ansible_host: i-00c3844beb9ad1c6b

    safekeepers:
      hosts:
--- a/.github/ansible/production.hosts.yaml
+++ b/.github/ansible/production.hosts.yaml
@@ -0,0 +1,40 @@
+---
+storage:
+  vars:
+    console_mgmt_base_url: http://console-release.local
+    bucket_name: zenith-storage-oregon
+    bucket_region: us-west-2
+    broker_endpoint: http://storage-broker.prod.local:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://console-release.local/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "{{ inventory_hostname }}"
+    safekeeper_s3_prefix: prod-1/wal
+    hostname_suffix: ".local"
+    remote_user: admin
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        zenith-1-ps-2:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-3:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-4:
+          console_region_id: aws-us-west-2
+        zenith-1-ps-5:
+          console_region_id: aws-us-west-2
+
+    safekeepers:
+      hosts:
+        zenith-1-sk-1:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-2:
+          console_region_id: aws-us-west-2
+        zenith-1-sk-4:
+          console_region_id: aws-us-west-2
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -2,17 +2,12 @@ storage:
  vars:
    bucket_name: neon-dev-storage-eu-west-1
    bucket_region: eu-west-1
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    console_mgmt_base_url: http://console-staging.local
    broker_endpoint: http://storage-broker-lb.zeta.eu-west-1.internal.aws.neon.build:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-staging.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -2,17 +2,12 @@ storage:
  vars:
    bucket_name: neon-staging-storage-us-east-2
    bucket_region: us-east-2
-    console_mgmt_base_url: http://neon-internal-api.aws.neon.build
+    console_mgmt_base_url: http://console-staging.local
    broker_endpoint: http://storage-broker-lb.beta.us-east-2.internal.aws.neon.build:50051
    pageserver_config_stub:
      pg_distrib_dir: /usr/local
-      metric_collection_endpoint: http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_endpoint: http://console-staging.local/billing/api/v1/usage_events
      metric_collection_interval: 10min
-      tenant_config:
-        eviction_policy:
-          kind: "LayerAccessThreshold"
-          period: "20m"
-          threshold: "20m"
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
@@ -36,8 +31,6 @@ storage:
          ansible_host: i-01e31cdf7e970586a
        pageserver-3.us-east-2.aws.neon.build:
          ansible_host: i-0602a0291365ef7cc
-        pageserver-99.us-east-2.aws.neon.build:
-          ansible_host: i-0c39491109bb88824

    safekeepers:
      hosts:
@@ -47,5 +40,3 @@ storage:
          ansible_host: i-0171efc3604a7b907
        safekeeper-2.us-east-2.aws.neon.build:
          ansible_host: i-0de0b03a51676a6ce
-        safekeeper-99.us-east-2.aws.neon.build:
-          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -1,31 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.eu-west-1.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -10,7 +10,7 @@ settings:
  uri: "https://console.stage.neon.tech/psql_session/"
  domain: "pg.neon.build"
  sentryEnvironment: "staging"
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy-link pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.cloud.stage.neon.tech"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -1,31 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
+  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-staging.local/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.ap-southeast-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.eu-central-1.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -6,11 +6,11 @@ image:

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.cloud.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -1,32 +1,16 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

-deploymentStrategy:
-  type: RollingUpdate
-  rollingUpdate:
-    maxSurge: 100%
-    maxUnavailable: 50%
-
-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
-# The pod(s) will stay in Terminating, keeps the existing connections
-# but doesn't receive new ones
-containerLifecycle:
-  preStop:
-    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
-terminationGracePeriodSeconds: 604800
-
-
 image:
  repository: neondatabase/neon

 settings:
  authBackend: "console"
-  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-west-2.aws.neon.tech"
  sentryEnvironment: "production"
  wssPort: 8443
-  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
  metricCollectionInterval: "10min"

 # -- Additional labels for neon-proxy pods
--- a/.github/helm-values/production.neon-storage-broker.yaml
+++ b/.github/helm-values/production.neon-storage-broker.yaml
@@ -0,0 +1,56 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker.prod.local
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    selector:
+      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/production.proxy-scram.yaml
+++ b/.github/helm-values/production.proxy-scram.yaml
@@ -0,0 +1,54 @@
+settings:
+  authBackend: "console"
+  authEndpoint: "http://console-release.local/management/api/v2"
+  domain: "*.cloud.neon.tech"
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://console-release.local/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+podLabels:
+  zenith_service: proxy-scram
+  zenith_env: production
+  zenith_region: us-west-2
+  zenith_region_slug: oregon
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
+  httpsPort: 443
+
+metrics:
+  enabled: true
+  serviceMonitor:
+    enabled: true
+    selector:
+      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -611,31 +611,34 @@ jobs:
      run:
        shell: sh -eu {0}
    env:
-      VM_BUILDER_VERSION: v0.4.6
+      VM_INFORMANT_VERSION: 0.1.1

    steps:
-      - name: Checkout
-        uses: actions/checkout@v1
-        with:
-          fetch-depth: 0
-
-      - name: Downloading vm-builder
+      - name: Downloading latest vm-builder
        run: |
-          curl -L https://github.com/neondatabase/neonvm/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder
+          curl -L https://github.com/neondatabase/neonvm/releases/latest/download/vm-builder -o vm-builder
          chmod +x vm-builder

      - name: Pulling compute-node image
        run: |
          docker pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

-      - name: Building VM compute-node rootfs
+      - name: Downloading VM informant version ${{ env.VM_INFORMANT_VERSION }}
        run: |
-          docker build -t temp-vm-compute-node --build-arg SRC_IMAGE=369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}} -f Dockerfile.vm-compute-node .
+          curl -fL https://github.com/neondatabase/autoscaling/releases/download/${{ env.VM_INFORMANT_VERSION }}/vm-informant -o vm-informant
+          chmod +x vm-informant
+
+      - name: Adding VM informant to compute-node image
+        run: |
+          ID=$(docker create 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}})
+          docker cp vm-informant $ID:/bin/vm-informant
+          docker commit $ID temp-vm-compute-node
+          docker rm -f $ID

      - name: Build vm image
        run: |
          # note: as of 2023-01-12, vm-builder requires a trailing ":latest" for local images
-          ./vm-builder -use-inittab -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+          ./vm-builder -src=temp-vm-compute-node:latest -dst=369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

      - name: Pushing vm-compute-node image
        run: |
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -67,7 +67,7 @@ jobs:
          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i staging.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_STAGING_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
          rm -f neon_install.tar.gz .neon_current_version

      - name: Cleanup ansible folder
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -40,9 +40,7 @@ concurrency:
 jobs:
  deploy-prod-new:
    runs-on: prod
-    container:
-      image: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-      options: --user root --privileged
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
    if: inputs.deployStorage && inputs.disclamerAcknowledged
    defaults:
      run:
@@ -68,7 +66,7 @@ jobs:
          ./get_binaries.sh

          ansible-galaxy collection install sivel.toiletwater
-          ansible-playbook -v deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          ansible-playbook deploy.yaml -i prod.${{ matrix.target_region }}.hosts.yaml -e @ssm_config -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
          rm -f neon_install.tar.gz .neon_current_version

  deploy-proxy-prod-new:
@@ -165,3 +163,113 @@ jobs:
      - name: Deploy storage-broker
        run:
          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+  # Deploy to old account below          
+
+  deploy:
+    runs-on: prod
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    if: inputs.deployStorage && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    environment:
+      name: prod-old
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Redeploy
+        run: |
+          export DOCKER_TAG=${{ inputs.dockerTag }}
+          cd "$(pwd)/.github/ansible"
+
+          ./get_binaries.sh
+
+          eval $(ssh-agent)
+          echo "${{ secrets.TELEPORT_SSH_KEY }}"  | tr -d '\n'| base64 --decode >ssh-key
+          echo "${{ secrets.TELEPORT_SSH_CERT }}" | tr -d '\n'| base64 --decode >ssh-key-cert.pub
+          chmod 0600 ssh-key
+          ssh-add ssh-key
+          rm -f ssh-key ssh-key-cert.pub
+          ANSIBLE_CONFIG=./ansible.cfg ansible-galaxy collection install sivel.toiletwater
+          ANSIBLE_CONFIG=./ansible.cfg ansible-playbook deploy.yaml -i production.hosts.yaml -e CONSOLE_API_TOKEN=${{ secrets.NEON_PRODUCTION_API_KEY }} -e SENTRY_URL_PAGESERVER=${{ secrets.SENTRY_URL_PAGESERVER }} -e SENTRY_URL_SAFEKEEPER=${{ secrets.SENTRY_URL_SAFEKEEPER }}
+          rm -f neon_install.tar.gz .neon_current_version
+
+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ansible/collections': Permission denied
+      - name: Cleanup ansible folder
+        run: rm -rf ~/.ansible
+
+  deploy-proxy:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployProxy && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    environment:
+      name: prod-old
+    env:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Store kubeconfig file
+        run: |
+          echo "${{ secrets.PRODUCTION_KUBECONFIG_DATA }}" | base64 --decode > ${KUBECONFIG}
+          chmod 0600 ${KUBECONFIG}
+
+      - name: Add neon helm chart
+        run: helm repo add neondatabase https://neondatabase.github.io/helm-charts
+
+      - name: Re-deploy proxy
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --install --atomic -f .github/helm-values/production.proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
+
+  deploy-storage-broker:
+    name: deploy storage broker on old staging and old prod
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployStorageBroker && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    environment:
+      name: prod-old
+    env:
+      KUBECONFIG: .kubeconfig
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+
+      - name: Store kubeconfig file
+        run: |
+          echo "${{ secrets.PRODUCTION_KUBECONFIG_DATA }}" | base64 --decode > ${KUBECONFIG}
+          chmod 0600 ${KUBECONFIG}
+
+      - name: Add neon helm chart
+        run: helm repo add neondatabase https://neondatabase.github.io/helm-charts
+
+      - name: Deploy storage-broker
+        run:
+          helm upgrade neon-storage-broker neondatabase/neon-storage-broker --namespace neon-storage-broker --create-namespace --install --atomic -f .github/helm-values/production.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -4,7 +4,6 @@ on:
  push:
    branches:
    - main
-  pull_request:

 defaults:
  run:
@@ -21,7 +20,6 @@ env:

 jobs:
  check-macos-build:
-    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-macos')
    timeout-minutes: 90
    runs-on: macos-latest

@@ -95,16 +93,11 @@ jobs:
        run: ./run_clippy.sh

  gather-rust-build-stats:
-    if: github.ref_name == 'main' || contains(github.event.pull_request.labels.*.name, 'run-extra-build-stats')
-    runs-on: [ self-hosted, gen3, large ]
-    container:
-      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
-      options: --init
+    timeout-minutes: 90
+    runs-on: ubuntu-latest

    env:
      BUILD_TYPE: release
-      # remove the cachepot wrapper and build without crate caches
-      RUSTC_WRAPPER: ""
      # build with incremental compilation produce partial results
      # so do not attempt to cache this build, also disable the incremental compilation
      CARGO_INCREMENTAL: 0
@@ -116,6 +109,11 @@ jobs:
          submodules: true
          fetch-depth: 1

+      - name: Install Ubuntu postgres dependencies
+        run: |
+          sudo apt update
+          sudo apt install build-essential libreadline-dev zlib1g-dev flex bison libseccomp-dev libssl-dev protobuf-compiler
+
      # Some of our rust modules use FFI and need those to be checked
      - name: Get postgres headers
        run: make postgres-headers -j$(nproc)
@@ -124,31 +122,7 @@ jobs:
        run: cargo build --all --release --timings

      - name: Upload the build stats
-        id: upload-stats
-        env:
-          BUCKET: neon-github-public-dev
-          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
-          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
-          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_KEY_DEV }}
-        run: |
-          REPORT_URL=https://${BUCKET}.s3.amazonaws.com/build-stats/${SHA}/${GITHUB_RUN_ID}/cargo-timing.html
-          aws s3 cp --only-show-errors ./target/cargo-timings/cargo-timing.html "s3://${BUCKET}/build-stats/${SHA}/${GITHUB_RUN_ID}/"
-          echo "report-url=${REPORT_URL}" >> $GITHUB_OUTPUT
-
-      - name: Publish build stats report
-        uses: actions/github-script@v6
-        env:
-          REPORT_URL: ${{ steps.upload-stats.outputs.report-url }}
-          SHA: ${{ github.event.pull_request.head.sha || github.sha }}
+        uses: actions/upload-artifact@v3
        with:
-          script: |
-            const { REPORT_URL, SHA } = process.env
-
-            await github.rest.repos.createCommitStatus({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              sha: `${SHA}`,
-              state: 'success',
-              target_url: `${REPORT_URL}`,
-              context: `Build stats (release)`,
-            })
+          name: neon-${{ runner.os }}-release-build-stats
+          path: ./target/cargo-timings/
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,6 @@ members = [
    "safekeeper",
    "storage_broker",
    "workspace_hack",
-    "trace",
    "libs/*",
 ]

@@ -32,15 +31,12 @@ bstr = "1.0"
 byteorder = "1.4"
 bytes = "1.0"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
-clap = { version = "4.0", features = ["derive"] }
+clap = "4.0"
 close_fds = "0.3.2"
 comfy-table = "6.1"
 const_format = "0.2"
 crc32c = "0.6"
 crossbeam-utils = "0.8.5"
-either = "1.8"
-enum-map = "2.4.2"
-enumset = "1.0.12"
 fail = "0.5.0"
 fs2 = "0.4.3"
 futures = "0.3"
@@ -69,6 +65,7 @@ once_cell = "1.13"
 opentelemetry = "0.18.0"
 opentelemetry-otlp = { version = "0.11.0", default_features=false, features = ["http-proto", "trace", "http", "reqwest-client"] }
 opentelemetry-semantic-conventions = "0.10.0"
+tracing-opentelemetry = "0.18.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2"
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
@@ -76,8 +73,6 @@ prost = "0.11"
 rand = "0.8"
 regex = "1.4"
 reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
-reqwest-middleware = "0.2.0"
 routerify = "3"
 rpds = "0.12.0"
 rustls = "0.20"
@@ -94,7 +89,6 @@ socket2 = "0.4.4"
 strum = "0.24"
 strum_macros = "0.24"
 svg_fmt = "0.4.1"
-sync_wrapper = "0.1.2"
 tar = "0.4"
 thiserror = "1.0"
 tls-listener = { version = "0.6", features = ["rustls", "hyper-h1"] }
@@ -107,7 +101,6 @@ toml = "0.5"
 toml_edit = { version = "0.17", features = ["easy"] }
 tonic = {version = "0.8", features = ["tls", "tls-roots"]}
 tracing = "0.1"
-tracing-opentelemetry = "0.18.0"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 url = "2.2"
 uuid = { version = "1.2", features = ["v4", "serde"] }
@@ -126,14 +119,10 @@ postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", re
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
 tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }

-## Other git libraries
-heapless = { default-features=false, features=[], git = "https://github.com/japaric/heapless.git", rev = "644653bf3b831c6bb4963be2de24804acf5e5001" } # upstream release pending
-
 ## Local libraries
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
-postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
 pq_proto = { version = "0.1", path = "./libs/pq_proto/" }
@@ -151,7 +140,7 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }
 criterion = "0.4"
 rcgen = "0.10"
 rstest = "0.16"
-tempfile = "3.4"
+tempfile = "3.2"
 tonic-build = "0.8"

 # This is only needed for proxy's tests.
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -1,4 +1,3 @@
-ARG PG_VERSION
 ARG REPOSITORY=369495373322.dkr.ecr.eu-central-1.amazonaws.com
 ARG IMAGE=rust
 ARG TAG=pinned
@@ -11,8 +10,7 @@ ARG TAG=pinned
 FROM debian:bullseye-slim AS build-deps
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
-    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
-    libicu-dev libxslt1-dev
+    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev

 #########################################################################################
 #
@@ -24,24 +22,18 @@ FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION} postgres
 RUN cd postgres && \
-    ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \
-    --with-libxml --with-libxslt && \
+    ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
    # Install headers
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
    # Enable some of contrib extensions
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/autoinc.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/insert_username.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/moddatetime.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/refint.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/xml2.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/earthdistance.control

 #########################################################################################
 #
@@ -57,18 +49,17 @@ RUN apt update && \
    libcgal-dev libgdal-dev libgmp-dev libmpfr-dev libopenscenegraph-dev libprotobuf-c-dev \
    protobuf-c-compiler xsltproc

-# SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
-RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
-    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
-    cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
+RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz && \
+    tar zxvf SFCGAL-v1.3.10.tar.gz && \
+    cd SFCGAL-v1.3.10 && cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
    make clean && cp -R /sfcgal/* /

-ENV PATH "/usr/local/pgsql/bin:$PATH"
-
-RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
-    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
+RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
+    tar xvzf postgis-3.3.1.tar.gz && \
+    cd postgis-3.3.1 && \
    ./autogen.sh && \
+    export PATH="/usr/local/pgsql/bin:$PATH" && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
    cd extensions/postgis && \
@@ -82,15 +73,6 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control

-RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
-    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
-    mkdir build && \
-    cd build && \
-    cmake .. && \
-    make -j $(getconf _NPROCESSORS_ONLN) && \
-    make -j $(getconf _NPROCESSORS_ONLN) install && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrouting.control
-
 #########################################################################################
 #
 # Layer "plv8-build"
@@ -100,17 +82,30 @@ RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouti
 FROM build-deps AS plv8-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 RUN apt update && \
-    apt install -y ninja-build python3-dev libncurses5 binutils clang
+    apt install -y ninja-build python3-dev libc++-dev libc++abi-dev libncurses5 binutils

-RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
-    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
+# https://github.com/plv8/plv8/issues/475:
+#   v8 uses gold for linking and sets `--thread-count=4` which breaks
+#   gold version <= 1.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=23607)
+# Install newer gold version manually as debian-testing binutils version updates
+# libc version, which in turn breaks other extension built against non-testing libc.
+RUN wget https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz && \
+    tar xvzf binutils-2.38.tar.gz && \
+    cd binutils-2.38 && \
+    cd libiberty && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && \
+    cd ../bfd && ./configure && make bfdver.h && \
+    cd ../gold && ./configure && make -j $(getconf _NPROCESSORS_ONLN) && make install && \
+    cp /usr/local/bin/ld.gold /usr/bin/gold
+
+# Sed is used to patch for https://github.com/plv8/plv8/issues/503
+RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.4.tar.gz && \
+    tar xvzf v3.1.4.tar.gz && \
+    cd plv8-3.1.4 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
+    sed -i 's/MemoryContextAlloc(/MemoryContextAllocZero(/' plv8.cc && \
    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
    rm -rf /plv8-* && \
-    find /usr/local/pgsql/ -name "plv8-*.so" | xargs strip && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plcoffee.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plls.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/plv8.control

 #########################################################################################
 #
@@ -128,17 +123,20 @@ RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh

-RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
-    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
-    mkdir build && cd build && \
+RUN wget https://github.com/uber/h3/archive/refs/tags/v4.0.1.tar.gz -O h3.tgz && \
+    tar xvzf h3.tgz  && \
+    cd h3-4.0.1 && \
+    mkdir build && \
+    cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/h3 make install && \
    cp -R /h3/usr / && \
    rm -rf build

-RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
-    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.0.1.tar.gz -O h3-pg.tgz && \
+    tar xvzf h3-pg.tgz && \
+    cd h3-pg-4.0.1 && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
    make -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -154,8 +152,9 @@ RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3
 FROM build-deps AS unit-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

-RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
-    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
+RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz && \
+    tar xvzf 7.7.tar.gz && \
+    cd postgresql-unit-7.7 && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    # unit extension's "create extension" script relies on absolute install path to fill some reference tables.
@@ -165,126 +164,6 @@ RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -
    find /usr/local/pgsql/share/extension/ -name "unit*.sql" -print0 | xargs -0 sed -i "s|pgsql/||g" && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/unit.control

-#########################################################################################
-#
-# Layer "vector-pg-build"
-# compile pgvector extension
-#
-#########################################################################################
-FROM build-deps AS vector-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
-    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/vector.control
-
-#########################################################################################
-#
-# Layer "pgjwt-pg-build"
-# compile pgjwt extension
-#
-#########################################################################################
-FROM build-deps AS pgjwt-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-# 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
-RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
-    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
-
-#########################################################################################
-#
-# Layer "hypopg-pg-build"
-# compile hypopg extension
-#
-#########################################################################################
-FROM build-deps AS hypopg-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
-    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/hypopg.control
-
-#########################################################################################
-#
-# Layer "pg-hashids-pg-build"
-# compile pg_hashids extension
-#
-#########################################################################################
-FROM build-deps AS pg-hashids-pg-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
-    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
-    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_hashids.control
-
-#########################################################################################
-# 
-# Layer "rust extensions"
-# This layer is used to build `pgx` deps
-#
-#########################################################################################
-FROM build-deps AS rust-extensions-build
-COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
-
-RUN apt-get update && \
-    apt-get install -y curl libclang-dev cmake && \
-    useradd -ms /bin/bash nonroot -b /home
-
-ENV HOME=/home/nonroot
-ENV PATH="/home/nonroot/.cargo/bin:/usr/local/pgsql/bin/:$PATH"
-USER nonroot
-WORKDIR /home/nonroot
-ARG PG_VERSION
-
-RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && \
-    chmod +x rustup-init && \
-    ./rustup-init -y --no-modify-path --profile minimal --default-toolchain stable && \
-    rm rustup-init && \
-    cargo install --git https://github.com/vadim2404/pgx --branch neon_abi_v0.6.1 --locked cargo-pgx && \
-    /bin/bash -c 'cargo pgx init --pg${PG_VERSION:1}=/usr/local/pgsql/bin/pg_config'
-
-USER root
-
-#########################################################################################
-# 
-# Layer "pg-jsonschema-pg-build"
-# Compile "pg_jsonschema" extension
-#
-#########################################################################################
-
-FROM rust-extensions-build AS pg-jsonschema-pg-build
-
-RUN git clone --depth=1 --single-branch --branch neon_abi_v0.1.4 https://github.com/vadim2404/pg_jsonschema/ && \
-    cd pg_jsonschema && \
-    cargo pgx install --release && \
-    # it's needed to enable extension because it uses untrusted C language
-    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_jsonschema.control && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control
-
-#########################################################################################
-# 
-# Layer "pg-graphql-pg-build"
-# Compile "pg_graphql" extension
-#
-#########################################################################################
-
-FROM rust-extensions-build AS pg-graphql-pg-build
-
-RUN git clone --depth=1 --single-branch --branch neon_abi_v1.1.0 https://github.com/vadim2404/pg_graphql && \
-    cd pg_graphql && \  
-    cargo pgx install --release && \
-    # it's needed to enable extension because it uses untrusted C language
-    sed -i 's/superuser = false/superuser = true/g' /usr/local/pgsql/share/extension/pg_graphql.control && \
-    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_graphql.control
-
 #########################################################################################
 #
 # Layer "neon-pg-ext-build"
@@ -298,12 +177,6 @@ COPY --from=plv8-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=h3-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=h3-pg-build /h3/usr /
 COPY --from=unit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=vector-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pgjwt-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-jsonschema-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-graphql-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
-COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -354,23 +227,17 @@ RUN mkdir /var/db && useradd -m -d /var/db/postgres postgres && \
    mkdir /var/db/postgres/compute && mkdir /var/db/postgres/specs && \
    chown -R postgres:postgres /var/db/postgres && \
    chmod 0750 /var/db/postgres/compute && \
-    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig && \
-    # create folder for file cache
-    mkdir -p -m 777 /neon/cache
+    echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig

 COPY --from=postgres-cleanup-layer --chown=postgres /usr/local/pgsql /usr/local
 COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-debug-size-lto/compute_ctl /usr/local/bin/compute_ctl

 # Install:
 # libreadline8 for psql
-# libicu67, locales for collations (including ICU)
 # libossp-uuid16 for extension ossp-uuid
 # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
-# libxml2, libxslt1.1 for xml2
 RUN apt update &&  \
    apt install --no-install-recommends -y \
-        locales \
-        libicu67 \
        libreadline8 \
        libossp-uuid16 \
        libgeos-c1v5 \
@@ -378,12 +245,8 @@ RUN apt update &&  \
        libproj19 \
        libprotobuf-c1 \
        libsfcgal1 \
-        libxml2 \
-        libxslt1.1 \
        gdb && \
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
-    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8
+    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

-ENV LANG en_US.utf8
 USER postgres
 ENTRYPOINT ["/usr/local/bin/compute_ctl"]
--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -1,32 +0,0 @@
-# Note: this file *mostly* just builds on Dockerfile.compute-node
-
-ARG SRC_IMAGE
-ARG VM_INFORMANT_VERSION=v0.1.6
-
-# Pull VM informant and set up inittab
-FROM neondatabase/vm-informant:$VM_INFORMANT_VERSION as informant
-
-RUN set -e \
-	&& rm -f /etc/inittab \
-	&& touch /etc/inittab
-
-RUN set -e \
-	&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
-	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant --auto-restart --cgroup=neon-postgres'" >> /etc/inittab
-
-# Combine, starting from non-VM compute node image.
-FROM $SRC_IMAGE as base
-
-# Temporarily set user back to root so we can run apt update and adduser
-USER root
-RUN apt update && \
-	apt install --no-install-recommends -y \
-        cgroup-tools
-RUN adduser vm-informant --disabled-password --no-create-home
-USER postgres
-
-ADD vm-cgconfig.conf /etc/cgconfig.conf
-COPY --from=informant /etc/inittab /etc/inittab
-COPY --from=informant /usr/bin/vm-informant /usr/local/bin/vm-informant
-
-ENTRYPOINT ["/usr/sbin/cgexec", "-g", "*:neon-postgres", "/usr/local/bin/compute_ctl"]
--- a/12
+++ b/12
@@ -136,15 +136,9 @@ neon-pg-ext-%: postgres-%

 .PHONY: neon-pg-ext-clean-%
 neon-pg-ext-clean-%:
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-walredo-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
-	$(MAKE) PG_CONFIG=$(POSTGRES_INSTALL_DIR)/$*/bin/pg_config \
-	-C $(POSTGRES_INSTALL_DIR)/build/neon-test-utils-$* \
-	-f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon_walredo-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon_walredo/Makefile clean
+	$(MAKE) -C $(POSTGRES_INSTALL_DIR)/pgxn/neon_test_utils-$* -f $(ROOT_PROJECT_DIR)/pgxn/neon_test_utils/Makefile clean

 .PHONY: neon-pg-ext
 neon-pg-ext: \
--- a/README.md
+++ b/README.md
@@ -34,11 +34,6 @@ dnf install flex bison readline-devel zlib-devel openssl-devel \
  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
  protobuf-devel
 ```
-* On Arch based systems, these packages are needed:
-```bash
-pacman -S base-devel readline zlib libseccomp openssl clang \
-postgresql-libs cmake postgresql protobuf
-```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
 ```
@@ -88,10 +83,9 @@ cd neon

 # The preferred and default is to make a debug build. This will create a
 # demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`nproc` -s"
-# Remove -s for the verbose build log
+# use "BUILD_TYPE=release make -j`nproc`"

-make -j`nproc` -s
+make -j`nproc`
 ```

 #### Building on OSX
@@ -105,10 +99,9 @@ cd neon

 # The preferred and default is to make a debug build. This will create a
 # demonstrably slower build than a release build. For a release build,
-# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu` -s"
-# Remove -s for the verbose build log
+# use "BUILD_TYPE=release make -j`sysctl -n hw.logicalcpu`"

-make -j`sysctl -n hw.logicalcpu` -s
+make -j`sysctl -n hw.logicalcpu`
 ```

 #### Dependency installation notes
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -17,7 +17,6 @@ regex.workspace = true
 serde.workspace = true
 serde_json.workspace = true
 tar.workspace = true
-reqwest = { workspace = true, features = ["json"] }
 tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tokio-postgres.workspace = true
 tracing.workspace = true
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -44,6 +44,7 @@ use tracing::{error, info};

 use compute_tools::compute::{ComputeMetrics, ComputeNode, ComputeState, ComputeStatus};
 use compute_tools::http::api::launch_http_server;
+use compute_tools::informant::spawn_vm_informant_if_present;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
@@ -65,9 +66,6 @@ fn main() -> Result<()> {
    let spec = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");

-    let compute_id = matches.get_one::<String>("compute-id");
-    let control_plane_uri = matches.get_one::<String>("control-plane-uri");
-
    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

@@ -80,27 +78,8 @@ fn main() -> Result<()> {
                let path = Path::new(sp);
                let file = File::open(path)?;
                serde_json::from_reader(file)?
-            } else if let Some(id) = compute_id {
-                if let Some(cp_base) = control_plane_uri {
-                    let cp_uri = format!("{cp_base}/management/api/v1/{id}/spec");
-                    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
-                        Ok(v) => v,
-                        Err(_) => "".to_string(),
-                    };
-
-                    reqwest::blocking::Client::new()
-                        .get(cp_uri)
-                        .header("Authorization", jwt)
-                        .send()?
-                        .json()?
-                } else {
-                    panic!(
-                        "must specify --control-plane-uri \"{:#?}\" and --compute-id \"{:#?}\"",
-                        control_plane_uri, compute_id
-                    );
-                }
            } else {
-                panic!("compute spec should be provided via --spec or --spec-path argument");
+                panic!("cluster spec should be provided via --spec or --spec-path argument");
            }
        }
    };
@@ -162,6 +141,8 @@ fn main() -> Result<()> {
    // requests, while configuration is still in progress.
    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
+    // Also spawn the thread responsible for handling the VM informant -- if it's present
+    let _vm_informant_handle = spawn_vm_informant_if_present().expect("cannot launch VM informant");

    // Start Postgres
    let mut delay_exit = false;
@@ -249,18 +230,6 @@ fn cli() -> clap::Command {
                .long("spec-path")
                .value_name("SPEC_PATH"),
        )
-        .arg(
-            Arg::new("compute-id")
-                .short('i')
-                .long("compute-id")
-                .value_name("COMPUTE_ID"),
-        )
-        .arg(
-            Arg::new("control-plane-uri")
-                .short('p')
-                .long("control-plane-uri")
-                .value_name("CONTROL_PLANE"),
-        )
 }

 #[test]
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -3,7 +3,6 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::compute::ComputeNode;
 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
 use hyper::{Body, Method, Request, Response, Server, StatusCode};
@@ -11,6 +10,8 @@ use serde_json;
 use tracing::{error, info};
 use tracing_utils::http::OtelName;

+use crate::compute::ComputeNode;
+
 // Service function to handle all available routes.
 async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body> {
    //
--- a/compute_tools/src/informant.rs
+++ b/compute_tools/src/informant.rs
@@ -0,0 +1,50 @@
+use std::path::Path;
+use std::process;
+use std::thread;
+use std::time::Duration;
+use tracing::{info, warn};
+
+use anyhow::{Context, Result};
+
+const VM_INFORMANT_PATH: &str = "/bin/vm-informant";
+const RESTART_INFORMANT_AFTER_MILLIS: u64 = 5000;
+
+/// Launch a thread to start the VM informant if it's present (and restart, on failure)
+pub fn spawn_vm_informant_if_present() -> Result<Option<thread::JoinHandle<()>>> {
+    let exists = Path::new(VM_INFORMANT_PATH)
+        .try_exists()
+        .context("could not check if path exists")?;
+
+    if !exists {
+        return Ok(None);
+    }
+
+    Ok(Some(
+        thread::Builder::new()
+            .name("run-vm-informant".into())
+            .spawn(move || run_informant())?,
+    ))
+}
+
+fn run_informant() -> ! {
+    let restart_wait = Duration::from_millis(RESTART_INFORMANT_AFTER_MILLIS);
+
+    info!("starting VM informant");
+
+    loop {
+        let mut cmd = process::Command::new(VM_INFORMANT_PATH);
+        // Block on subprocess:
+        let result = cmd.status();
+
+        match result {
+            Err(e) => warn!("failed to run VM informant at {VM_INFORMANT_PATH:?}: {e}"),
+            Ok(status) if !status.success() => {
+                warn!("{VM_INFORMANT_PATH} exited with code {status:?}, retrying")
+            }
+            Ok(_) => info!("{VM_INFORMANT_PATH} ended gracefully (unexpectedly). Retrying"),
+        }
+
+        // Wait before retrying
+        thread::sleep(restart_wait);
+    }
+}
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -8,6 +8,7 @@ pub mod http;
 #[macro_use]
 pub mod logger;
 pub mod compute;
+pub mod informant;
 pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -15,7 +15,6 @@ postgres.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
 serde.workspace = true
-serde_json.workspace = true
 serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
@@ -24,7 +23,6 @@ url.workspace = true
 # Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
 # instead, so that recompile times are better.
 pageserver_api.workspace = true
-postgres_backend.workspace = true
 safekeeper_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -17,7 +17,6 @@ use pageserver_api::{
    DEFAULT_HTTP_LISTEN_ADDR as DEFAULT_PAGESERVER_HTTP_ADDR,
    DEFAULT_PG_LISTEN_ADDR as DEFAULT_PAGESERVER_PG_ADDR,
 };
-use postgres_backend::AuthType;
 use safekeeper_api::{
    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT,
    DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT,
@@ -31,6 +30,7 @@ use utils::{
    auth::{Claims, Scope},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
    lsn::Lsn,
+    postgres_backend::AuthType,
    project_git_version,
 };

--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -11,10 +11,10 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
-use postgres_backend::AuthType;
 use utils::{
    id::{TenantId, TimelineId},
    lsn::Lsn,
+    postgres_backend::AuthType,
 };

 use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -5,7 +5,6 @@

 use anyhow::{bail, ensure, Context};

-use postgres_backend::AuthType;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
@@ -20,6 +19,7 @@ use std::process::{Command, Stdio};
 use utils::{
    auth::{encode_from_key_file, Claims, Scope},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
+    postgres_backend::AuthType,
 };

 use crate::safekeeper::SafekeeperNode;
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -11,7 +11,6 @@ use anyhow::{bail, Context};
 use pageserver_api::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
-use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
 use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
@@ -21,6 +20,7 @@ use utils::{
    http::error::HttpErrorBody,
    id::{TenantId, TimelineId},
    lsn::Lsn,
+    postgres_backend::AuthType,
 };

 use crate::{background_process, local_env::LocalEnv};
@@ -419,11 +419,6 @@ impl PageServerNode {
                    .map(|x| x.parse::<bool>())
                    .transpose()
                    .context("Failed to parse 'trace_read_requests' as bool")?,
-                eviction_policy: settings
-                    .get("eviction_policy")
-                    .map(|x| serde_json::from_str(x))
-                    .transpose()
-                    .context("Failed to parse 'eviction_policy' json")?,
            })
            .send()?
            .error_from_body()?;
--- a/docs/rfcs/read-only-replica.md
+++ b/docs/rfcs/read-only-replica.md
@@ -0,0 +1,60 @@
+# Read-only replicas
+
+We want to be able to spin-up read-only compute nodes.
+
+## Scope of this feature
+
+- We want to be able to spin-up multiple read-only replicas.
+- We should be able to spin-up RO node, if primary is inactive.
+
+- We don't want "snapshot" read-only computes at fixed LSN. Users can use branches for that.
+- We don't need replica promotion. For failower we rely on quick primary node restarts.
+
+- Implement it for v15 first.
+
+## Design
+
+### Replica creation
+
+1. At the moment of endpoint creation, user should be able to specify that they want to create a read-only endpoint.
+This setting will be passed via compute spec to `compute_ctl`. The replica will be created in the same region as primary (and all project's branches).
+
+Q: Should we allow to specify region for replica? It will affect latency and costs.
+
+2. `compute_ctl` will spin up a read-only replica.
+
+3. replica then connects to safekeepers and starts streaming WAL from them
+It would be good to connect to the safekeeper in the same AZ to reduce network latency.
+
+### Configuration & UI
+
+We take the same approach as with primary nodes and don't expose any configuration knobs to users.
+It may be useful to add a dashboard to show replica lag.
+
+### Replication
+
+- Replica receives WAL stream from safekeeper to keep up with primary.
+- Replica is connected to pageserver to receive page images.
+
+There are two cases of WAL apply:
+
+- page is already in buffer cache.
+In this case, replica should update page's `LastWriteLSN` and apply the WAL record to the page.
+
+NOTE: We must either hold buffer lock (pin) or update `LastWriteLSN` before applying WAL record. Otherwise concurrent process may request a stale page from pageserver.
+
+- page is not in buffer cache
+In this case, replica should update `LastWriteLSN` and ignore the WAL record.
+
+NOTE: We must be careful about atomicity of operations that modify multiple pages. See buffer lock coupling/crabbing ( https://github.com/neondatabase/neon/issues/383) for details. Most likely everything will work fine, but we need to test it properly.
+
+
+To request page, replica should use current WAL applyLSN to make sure that we don't get pages from the future.
+
+
+## Safekeepers
+
+- safekeepers should be able to stream WAL to multiple replicas and handle their feedback.
+
+- lagging replica can make safekeepers to keep a lot of WAL.
+If they can offload WAL to S3 and download it back on demand, it will be fine.
--- a/docs/settings.md
+++ b/docs/settings.md
@@ -16,7 +16,7 @@ listen_http_addr = '127.0.0.1:9898'
 checkpoint_distance = '268435456' # in bytes
 checkpoint_timeout = '10m'

-gc_period = '1 hour'
+gc_period = '100 s'
 gc_horizon = '67108864'

 max_file_descriptors = '100'
@@ -101,7 +101,7 @@ away.

 #### gc_period

-Interval at which garbage collection is triggered. Default is 1 hour.
+Interval at which garbage collection is triggered. Default is 100 s.

 #### image_creation_threshold

@@ -109,7 +109,7 @@ L0 delta layer threshold for L1 image layer creation. Default is 3.

 #### pitr_interval

-WAL retention duration for PITR branching. Default is 7 days.
+WAL retention duration for PITR branching. Default is 30 days.

 #### walreceiver_connect_timeout

--- a/docs/synthetic-size.md
+++ b/docs/synthetic-size.md
@@ -1,335 +0,0 @@
-# Synthetic size
-
-Neon storage has copy-on-write branching, which makes it difficult to
-answer the question "how large is my database"? To give one reasonable
-answer, we calculate _synthetic size_ for a project.
-
-The calculation is called "synthetic", because it is based purely on
-the user-visible logical size, which is the size that you would see on
-a standalone PostgreSQL installation, and the amount of WAL, which is
-also the same as what you'd see on a standalone PostgreSQL, for the
-same set of updates.
-
-The synthetic size does *not* depend on the actual physical size
-consumed in the storage, or implementation details of the Neon storage
-like garbage collection, compaction and compression.  There is a
-strong *correlation* between the physical size and the synthetic size,
-but the synthetic size is designed to be independent of the
-implementation details, so that any improvements we make in the
-storage system simply reduce our COGS. And vice versa: any bugs or bad
-implementation where we keep more data than we would need to, do not
-change the synthetic size or incur any costs to the user.
-
-The synthetic size is calculated for the whole project. It is not
-straighforward to attribute size to individual branches. See "What is
-the size of an individual branch?" for discussion on those
-difficulties.
-
-The synthetic size is designed to:
-
- Take into account the copy-on-write nature of the storage. For
-  example, if you create a branch, it doesn't immediately add anything
-  to the synthetic size. It starts to affect the synthetic size only
-  as it diverges from the parent branch.
-
- Be independent of any implementation details of the storage, like
-  garbage collection, remote storage, or compression.
-
-## Terms & assumptions
-
- logical size is the size of a branch *at a given point in
-  time*. It's the total size of all tables in all databases, as you
-  see with "\l+" in psql for example, plus the Postgres SLRUs and some
-  small amount of metadata. NOTE that currently, Neon does not include
-  the SLRUs and metadata in the logical size. See comment to `get_current_logical_size_non_incremental()`.
-
- a "point in time" is defined as an LSN value. You can convert a
-  timestamp to an LSN, but the storage internally works with LSNs.
-
- PITR horizon can be set per-branch.
-
- PITR horizon can be set as a time interval, e.g. 5 days or hours, or
-  as amount of WAL, in bytes.  If it's given as a time interval, it's
-  converted to an LSN for the calculation.
-
- PITR horizon can be set to 0, if you don't want to retain any history.
-
-## Calculation
-
-Inputs to the calculation are:
- logical size of the database at different points in time,
- amount of WAL generated, and
- the PITR horizon settings
-
-The synthetic size is based on an idealistic model of the storage
-system, where we pretend that the storage consists of two things:
- snapshots, containing a full snapshot of the database, at a given
-  point in time, and
- WAL.
-
-In the simple case that the project contains just one branch (main),
-and a fixed PITR horizon, the synthetic size is the sum of:
-
- the logical size of the branch *at the beginning of the PITR
-  horizon*, i.e. at the oldest point that you can still recover to, and
- the size of the WAL covering the PITR horizon.
-
-The snapshot allows you to recover to the beginning of the PITR
-horizon, and the WAL allows you to recover from that point to any
-point within the horizon.
-
-```
-                             WAL
-   -----------------------#########>
-                          ^
-                       snapshot
-
-Legend:
-  ##### PITR horizon. This is the region that you can still access
-        with Point-in-time query and you can still create branches
-        from.
-  ----- history that has fallen out of the PITR horizon, and can no
-        longer be accessed
-```
-
-NOTE: This is not how the storage system actually works! The actual
-implementation is also based on snapshots and WAL, but the snapshots
-are taken for individual database pages and ranges of pages rather
-than the whole database, and it is much more complicated. This model
-is a reasonable approximation, however, to make the synthetic size a
-useful proxy for the actual storage consumption.
-
-
-## Example: Data is INSERTed
-
-For example, let's assume that your database contained 10 GB of data
-at the beginning of the PITR horizon, and you have since then inserted
-5 GB of additional data into it. The additional insertions of 5 GB of
-data consume roughly 5 GB of WAL. In that case, the synthetic size is:
-
-> 10 GB (snapshot) +  5 GB (WAL) = 15 GB
-
-If you now set the PITR horizon on the project to 0, so that no
-historical data is retained, then the beginning PITR horizon would be
-at the end of the branch, so the size of the snapshot would be
-calculated at the end of the branch, after the insertions. Then the
-synthetic size is:
-
-> 15 GB (snapshot) + 0 GB (WAL) = 15 GB.
-
-In this case, the synthetic size is the same, regardless of the PITR horizon,
-because all the history consists of inserts. The newly inserted data takes
-up the same amount of space, whether it's stored as part of the logical
-snapshot, or as WAL. (*)
-
-(*) This is a rough approximation. In reality, the WAL contains
-headers and other overhead, and on the other hand, the logical
-snapshot includes empty space on pages, so the size of insertions in
-WAL can be smaller or greater than the size of the final table after
-the insertions. But in most cases, it's in the same ballpark.
-
-## Example: Data is DELETEd
-
-Let's look at another example:
-
-Let's start again with a database that contains 10 GB of data. Then,
-you DELETE 5 GB of the data, and run VACUUM to free up the space, so
-that the logical size of the database is now only 5 GB.
-
-Let's assume that the WAL for the deletions and the vacuum take up
-100 MB of space. In that case, the synthetic size of the project is:
-
-> 10 GB (snapshot) + 100 MB (WAL) = 10.1 GB
-
-This is much larger than the logical size of the database after the
-deletions (5 GB). That's because the system still needs to retain the
-deleted data, because it's still accessible to queries and branching
-in the PITR window.
-
-If you now set the PITR horizon to 0 or just wait for time to pass so
-that the data falls out of the PITR horizon, making the deleted data
-inaccessible, the synthetic size shrinks:
-
-> 5 GB (snapshot) + 0 GB (WAL) = 5 GB
-
-
-# Branching
-
-Things get more complicated with branching. Branches in Neon are
-copy-on-write, which is also reflected in the synthetic size.
-
-When you create a branch, it doesn't immediately change the synthetic
-size at all. The branch point is within the PITR horizon, and all the
-data needed to recover to that point in time needs to be retained
-anyway.
-
-However, if you make modifications on the branch, the system needs to
-keep the WAL of those modifications. The WAL is included in the
-synthetic size.
-
-## Example: branch and INSERT
-
-Let's assume that you again start with a 10 GB database.
-On the main branch, you insert 2 GB of data. Then you create
-a branch at that point, and insert another 3 GB of data on the
-main branch, and 1 GB of data on the child branch
-
-```
-  child                 +#####>
-                        |
-                        |    WAL
-  main    ---------###############>
-                   ^
-                snapshot
-```
-
-In this case, the synthetic size consists of:
- the snapshot at the beginning of the PITR horizon (10 GB)
- the WAL on the main branch (2 GB + 3 GB = 5 GB)
- the WAL on the child branch (1 GB)
-
-Total: 16 GB
-
-# Diverging branches
-
-If there is only a small amount of changes in the database on the
-different branches, as in the previous example, the synthetic size
-consists of a snapshot before the branch point, containing all the
-shared data, and the WAL on both branches. However, if the branches
-diverge a lot, it is more efficient to store a separate snapshot of
-branches.
-
-## Example: diverging branches
-
-You start with a 10 GB database. You insert 5 GB of data on the main
-branch. Then you create a branch, and immediately delete all the data
-on the child branch and insert 5 GB of new data to it. Then you do the
-same on the main branch. Let's assume
-that the PITR horizon requires keeping the last 1 GB of WAL on the
-both branches.
-
-```
-                              snapshot
-                                  v     WAL
-  child                 +---------##############>
-                        |
-                        |
-  main     -------------+---------##############>
-                                  ^     WAL
-                              snapshot
-```
-
-In this case, the synthetic size consists of:
- snapshot at the beginning of the PITR horizon on the main branch (4 GB)
- WAL on the main branch (1 GB)
- snapshot at the beginning of the PITR horizon on the child branch (4 GB)
- last 1 GB of WAL on the child branch (1 GB)
-
-Total: 10 GB
-
-The alternative way to store this would be to take only one snapshot
-at the beginning of branch point, and keep all the WAL on both
-branches.  However, the size with that method would be larger, as it
-would require one 10 GB snapshot, and 5 GB + 5 GB of WAL. It depends
-on the amount of changes (WAL) on both branches, and the logical size
-at the branch point, which method would result in a smaller synthetic
-size. On each branch point, the system performs the calculation with
-both methods, and uses the method that is cheaper, i.e. the one that
-results in a smaller synthetic size.
-
-One way to think about this is that when you create a branch, it
-starts out as a thin branch that only stores the WAL since the branch
-point.  As you modify it, and the amount of WAL grows, at some point
-it becomes cheaper to store a completely new snapshot of the branch
-and truncate the WAL.
-
-
-# What is the size of an individual branch?
-
-Synthetic size is calculated for the whole project, and includes all
-branches. There is no such thing as the size of a branch, because it
-is not straighforward to attribute the parts of size to individual
-branches.
-
-## Example: attributing size to branches
-
-(copied from https://github.com/neondatabase/neon/pull/2884#discussion_r1029365278)
-
-Imagine that you create two branches, A and B, at the same point from
-main branch, and do a couple of small updates on both branches. Then
-six months pass, and during those six months the data on the main
-branch churns over completely multiple times. The retention period is,
-say 1 month.
-
-```
-                      +------> A
-                     /
--------------------*-------------------------------> main
-                     \
-                      +--------> B
-```
-
-In that situation, the synthetic tenant size would be calculated based
-on a "logical snapshot" at the branch point, that is, the logical size
-of the database at that point. Plus the WAL on branches A and B. Let's
-say that the snapshot size is 10 GB, and the WAL is 1 MB on both
-branches A and B. So the total synthetic storage size is 10002
-MB. (Let's ignore the main branch for now, that would be just added to
-the sum)
-
-How would you break that down per branch? I can think of three
-different ways to do it, and all of them have their own problems:
-
-### Subtraction method
-
-For each branch, calculate how much smaller the total synthetic size
-would be, if that branch didn't exist. In other words, how much would
-you save if you dropped the branch. With this method, the size of
-branches A and B is 1 MB.
-
-With this method, the 10 GB shared logical snapshot is not included
-for A nor B. So the size of all branches is not equal to the total
-synthetic size of the tenant. If you drop branch A, you save 1 MB as
-you'd expect, but also the size of B suddenly jumps from 1 MB to 10001
-MB, which might feel surprising.
-
-### Division method
-
-Divide the common parts evenly across all branches that need
-them. With this method, the size of branches A and B would be 5001 MB.
-
-With this method, the sum of all branches adds up to the total
-synthetic size. But it's surprising in other ways: if you drop branch
-A, you might think that you save 5001 MB, but in reality you only save
-1 MB, and the size of branch B suddenly grows from 5001 to 10001 MB.
-
-### Addition method
-
-For each branch, include all the snapshots and WAL that it depends on,
-even if some of them are shared by other branches. With this method,
-the size of branches A and B would be 10001 MB.
-
-The surprise with this method is that the sum of all the branches is
-larger than the total synthetic size. And if you drop branch A, the
-total synthetic size doesn't fall by 10001 MB as you might think.
-
-# Alternatives
-
-A sort of cop-out method would be to show the whole tree of branches
-graphically, and for each section of WAL or logical snapshot, display
-the size of that section. You can then see which branches depend on
-which sections, which sections are shared etc. That would be good to
-have in the UI anyway.
-
-Or perhaps calculate per-branch numbers using the subtraction method,
-and in addition to that, one more number for "shared size" that
-includes all the data that is needed by more than one branch.
-
-## Which is the right method?
-
-The bottom line is that it's not straightforward to attribute the
-synthetic size to individual branches. There are things we can do, and
-all of those methods are pretty straightforward to implement, but they
-all have their own problems. What makes sense depends a lot on what
-you want to do with the number, what question you are trying to
-answer.
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -8,6 +8,5 @@ license.workspace = true
 prometheus.workspace = true
 libc.workspace = true
 once_cell.workspace = true
-chrono.workspace = true

 workspace_hack.workspace = true
--- a/libs/metrics/src/launch_timestamp.rs
+++ b/libs/metrics/src/launch_timestamp.rs
@@ -1,34 +0,0 @@
-//! A timestamp captured at process startup to identify restarts of the process, e.g., in logs and metrics.
-
-use chrono::Utc;
-
-use super::register_uint_gauge;
-use std::fmt::Display;
-
-pub struct LaunchTimestamp(chrono::DateTime<Utc>);
-
-impl LaunchTimestamp {
-    pub fn generate() -> Self {
-        LaunchTimestamp(Utc::now())
-    }
-}
-
-impl Display for LaunchTimestamp {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.0)
-    }
-}
-
-pub fn set_launch_timestamp_metric(launch_ts: &'static LaunchTimestamp) {
-    let millis_since_epoch: u64 = launch_ts
-        .0
-        .timestamp_millis()
-        .try_into()
-        .expect("we're after the epoch, this should be positive");
-    let metric = register_uint_gauge!(
-        "libmetrics_launch_timestamp",
-        "Timestamp (millis since epoch) at wich the process launched."
-    )
-    .unwrap();
-    metric.set(millis_since_epoch);
-}
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -20,7 +20,6 @@ pub use prometheus::{register_int_gauge_vec, IntGaugeVec};
 pub use prometheus::{Encoder, TextEncoder};
 use prometheus::{Registry, Result};

-pub mod launch_timestamp;
 mod wrappers;
 pub use wrappers::{CountedReader, CountedWriter};

@@ -35,14 +34,6 @@ macro_rules! register_uint_gauge_vec {
    }};
 }

-#[macro_export]
-macro_rules! register_uint_gauge {
-    ($NAME:expr, $HELP:expr $(,)?) => {{
-        let gauge = $crate::UIntGauge::new($NAME, $HELP).unwrap();
-        $crate::register(Box::new(gauge.clone())).map(|_| gauge)
-    }};
-}
-
 /// Special internal registry, to collect metrics independently from the default registry.
 /// Was introduced to fix deadlock with lazy registration of metrics in the default registry.
 static INTERNAL_REGISTRY: Lazy<Registry> = Lazy::new(Registry::new);
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -13,7 +13,5 @@ bytes.workspace = true
 byteorder.workspace = true
 utils.workspace = true
 postgres_ffi.workspace = true
-enum-map.workspace = true
-serde_json.workspace = true

 workspace_hack.workspace = true
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1,14 +1,9 @@
-use std::{
-    collections::HashMap,
-    num::{NonZeroU64, NonZeroUsize},
-    time::SystemTime,
-};
+use std::num::{NonZeroU64, NonZeroUsize};

 use byteorder::{BigEndian, ReadBytesExt};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use utils::{
-    history_buffer::HistoryBufferWithDropCounter,
    id::{NodeId, TenantId, TimelineId},
    lsn::Lsn,
 };
@@ -142,6 +137,7 @@ pub struct TenantConfigRequest {
    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
    #[serde(default)]
+    #[serde_as(as = "Option<DisplayFromStr>")]
    pub checkpoint_distance: Option<u64>,
    pub checkpoint_timeout: Option<String>,
    pub compaction_target_size: Option<u64>,
@@ -155,11 +151,6 @@ pub struct TenantConfigRequest {
    pub lagging_wal_timeout: Option<String>,
    pub max_lsn_wal_lag: Option<NonZeroU64>,
    pub trace_read_requests: Option<bool>,
-    // We defer the parsing of the eviction_policy field to the request handler.
-    // Otherwise we'd have to move the types for eviction policy into this package.
-    // We might do that once the eviction feature has stabilizied.
-    // For now, this field is not even documented in the openapi_spec.yml.
-    pub eviction_policy: Option<serde_json::Value>,
 }

 impl TenantConfigRequest {
@@ -179,7 +170,6 @@ impl TenantConfigRequest {
            lagging_wal_timeout: None,
            max_lsn_wal_lag: None,
            trace_read_requests: None,
-            eviction_policy: None,
        }
    }
 }
@@ -237,130 +227,6 @@ pub struct TimelineInfo {
    pub state: TimelineState,
 }

-#[derive(Debug, Clone, Serialize)]
-pub struct LayerMapInfo {
-    pub in_memory_layers: Vec<InMemoryLayerInfo>,
-    pub historic_layers: Vec<HistoricLayerInfo>,
-}
-
-#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy, Serialize, Deserialize, enum_map::Enum)]
-#[repr(usize)]
-pub enum LayerAccessKind {
-    GetValueReconstructData,
-    Iter,
-    KeyIter,
-    Dump,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LayerAccessStatFullDetails {
-    pub when_millis_since_epoch: u64,
-    pub task_kind: &'static str,
-    pub access_kind: LayerAccessKind,
-}
-
-/// An event that impacts the layer's residence status.
-#[serde_as]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct LayerResidenceEvent {
-    /// The time when the event occurred.
-    /// NB: this timestamp is captured while the residence status changes.
-    /// So, it might be behind/ahead of the actual residence change by a short amount of time.
-    ///
-    #[serde(rename = "timestamp_millis_since_epoch")]
-    #[serde_as(as = "serde_with::TimestampMilliSeconds")]
-    pub timestamp: SystemTime,
-    /// The new residence status of the layer.
-    pub status: LayerResidenceStatus,
-    /// The reason why we had to record this event.
-    pub reason: LayerResidenceEventReason,
-}
-
-/// The reason for recording a given [`ResidenceEvent`].
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
-pub enum LayerResidenceEventReason {
-    /// The layer map is being populated, e.g. during timeline load or attach.
-    /// This includes [`RemoteLayer`] objects created in [`reconcile_with_remote`].
-    /// We need to record such events because there is no persistent storage for the events.
-    LayerLoad,
-    /// We just created the layer (e.g., freeze_and_flush or compaction).
-    /// Such layers are always [`LayerResidenceStatus::Resident`].
-    LayerCreate,
-    /// We on-demand downloaded or evicted the given layer.
-    ResidenceChange,
-}
-
-/// The residence status of the layer, after the given [`LayerResidenceEvent`].
-#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
-pub enum LayerResidenceStatus {
-    /// Residence status for a layer file that exists locally.
-    /// It may also exist on the remote, we don't care here.
-    Resident,
-    /// Residence status for a layer file that only exists on the remote.
-    Evicted,
-}
-
-impl LayerResidenceEvent {
-    pub fn new(status: LayerResidenceStatus, reason: LayerResidenceEventReason) -> Self {
-        Self {
-            status,
-            reason,
-            timestamp: SystemTime::now(),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Serialize)]
-pub struct LayerAccessStats {
-    pub access_count_by_access_kind: HashMap<LayerAccessKind, u64>,
-    pub task_kind_access_flag: Vec<&'static str>,
-    pub first: Option<LayerAccessStatFullDetails>,
-    pub accesses_history: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
-    pub residence_events_history: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
-}
-
-#[serde_as]
-#[derive(Debug, Clone, Serialize)]
-#[serde(tag = "kind")]
-pub enum InMemoryLayerInfo {
-    Open {
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_start: Lsn,
-    },
-    Frozen {
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_start: Lsn,
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_end: Lsn,
-    },
-}
-
-#[serde_as]
-#[derive(Debug, Clone, Serialize)]
-#[serde(tag = "kind")]
-pub enum HistoricLayerInfo {
-    Delta {
-        layer_file_name: String,
-        layer_file_size: Option<u64>,
-
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_start: Lsn,
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_end: Lsn,
-        remote: bool,
-        access_stats: LayerAccessStats,
-    },
-    Image {
-        layer_file_name: String,
-        layer_file_size: Option<u64>,
-
-        #[serde_as(as = "DisplayFromStr")]
-        lsn_start: Lsn,
-        remote: bool,
-        access_stats: LayerAccessStats,
-    },
-}
-
 #[derive(Debug, Serialize, Deserialize)]
 pub struct DownloadRemoteLayersTaskSpawnRequest {
    pub max_concurrent_downloads: NonZeroUsize,
@@ -401,7 +267,7 @@ pub struct TimelineGcRequest {
 }

 // Wrapped in libpq CopyData
-#[derive(PartialEq, Eq, Debug)]
+#[derive(PartialEq, Eq)]
 pub enum PagestreamFeMessage {
    Exists(PagestreamExistsRequest),
    Nblocks(PagestreamNblocksRequest),
--- a/libs/pageserver_api/src/reltag.rs
+++ b/libs/pageserver_api/src/reltag.rs
@@ -98,15 +98,6 @@ impl RelTag {

        name
    }
-
-    pub fn with_forknum(&self, forknum: u8) -> Self {
-        RelTag {
-            forknum,
-            spcnode: self.spcnode,
-            dbnode: self.dbnode,
-            relnode: self.relnode,
-        }
-    }
 }

 ///
--- a/libs/postgres_backend/Cargo.toml
+++ b/libs/postgres_backend/Cargo.toml
@@ -1,26 +0,0 @@
-[package]
-name = "postgres_backend"
-version = "0.1.0"
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-async-trait.workspace = true
-anyhow.workspace = true
-bytes.workspace = true
-futures.workspace = true
-rustls.workspace = true
-serde.workspace = true
-thiserror.workspace = true
-tokio.workspace = true
-tokio-rustls.workspace = true
-tracing.workspace = true
-
-pq_proto.workspace = true
-workspace_hack.workspace = true
-
-[dev-dependencies]
-once_cell.workspace = true
-rustls-pemfile.workspace = true
-tokio-postgres.workspace = true
-tokio-postgres-rustls.workspace = true
--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -1,911 +0,0 @@
-//! Server-side asynchronous Postgres connection, as limited as we need.
-//! To use, create PostgresBackend and run() it, passing the Handler
-//! implementation determining how to process the queries. Currently its API
-//! is rather narrow, but we can extend it once required.
-use anyhow::Context;
-use bytes::Bytes;
-use futures::pin_mut;
-use serde::{Deserialize, Serialize};
-use std::io::ErrorKind;
-use std::net::SocketAddr;
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{ready, Poll};
-use std::{fmt, io};
-use std::{future::Future, str::FromStr};
-use tokio::io::{AsyncRead, AsyncWrite, ReadHalf, WriteHalf};
-use tokio_rustls::TlsAcceptor;
-
-use tracing::{debug, error, info, trace};
-
-use pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter};
-use pq_proto::{
-    BeMessage, FeMessage, FeStartupPacket, ProtocolError, SQLSTATE_INTERNAL_ERROR,
-    SQLSTATE_SUCCESSFUL_COMPLETION,
-};
-
-/// An error, occurred during query processing:
-/// either during the connection ([`ConnectionError`]) or before/after it.
-#[derive(thiserror::Error, Debug)]
-pub enum QueryError {
-    /// The connection was lost while processing the query.
-    #[error(transparent)]
-    Disconnected(#[from] ConnectionError),
-    /// Some other error
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
-impl From<io::Error> for QueryError {
-    fn from(e: io::Error) -> Self {
-        Self::Disconnected(ConnectionError::Io(e))
-    }
-}
-
-impl QueryError {
-    pub fn pg_error_code(&self) -> &'static [u8; 5] {
-        match self {
-            Self::Disconnected(_) => b"08006",         // connection failure
-            Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error
-        }
-    }
-}
-
-pub fn is_expected_io_error(e: &io::Error) -> bool {
-    use io::ErrorKind::*;
-    matches!(
-        e.kind(),
-        ConnectionRefused | ConnectionAborted | ConnectionReset
-    )
-}
-
-#[async_trait::async_trait]
-pub trait Handler {
-    /// Handle single query.
-    /// postgres_backend will issue ReadyForQuery after calling this (this
-    /// might be not what we want after CopyData streaming, but currently we don't
-    /// care). It will also flush out the output buffer.
-    async fn process_query(
-        &mut self,
-        pgb: &mut PostgresBackend,
-        query_string: &str,
-    ) -> Result<(), QueryError>;
-
-    /// Called on startup packet receival, allows to process params.
-    ///
-    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
-    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
-    /// to override whole init logic in implementations.
-    fn startup(
-        &mut self,
-        _pgb: &mut PostgresBackend,
-        _sm: &FeStartupPacket,
-    ) -> Result<(), QueryError> {
-        Ok(())
-    }
-
-    /// Check auth jwt
-    fn check_auth_jwt(
-        &mut self,
-        _pgb: &mut PostgresBackend,
-        _jwt_response: &[u8],
-    ) -> Result<(), QueryError> {
-        Err(QueryError::Other(anyhow::anyhow!("JWT auth failed")))
-    }
-}
-
-/// PostgresBackend protocol state.
-/// XXX: The order of the constructors matters.
-#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
-pub enum ProtoState {
-    /// Nothing happened yet.
-    Initialization,
-    /// Encryption handshake is done; waiting for encrypted Startup message.
-    Encrypted,
-    /// Waiting for password (auth token).
-    Authentication,
-    /// Performed handshake and auth, ReadyForQuery is issued.
-    Established,
-    Closed,
-}
-
-#[derive(Clone, Copy)]
-pub enum ProcessMsgResult {
-    Continue,
-    Break,
-}
-
-/// Either plain TCP stream or encrypted one, implementing AsyncRead + AsyncWrite.
-pub enum MaybeTlsStream {
-    Unencrypted(tokio::net::TcpStream),
-    Tls(Box<tokio_rustls::server::TlsStream<tokio::net::TcpStream>>),
-}
-
-impl AsyncWrite for MaybeTlsStream {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &[u8],
-    ) -> Poll<io::Result<usize>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_write(cx, buf),
-            Self::Tls(stream) => Pin::new(stream).poll_write(cx, buf),
-        }
-    }
-    fn poll_flush(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<io::Result<()>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_flush(cx),
-            Self::Tls(stream) => Pin::new(stream).poll_flush(cx),
-        }
-    }
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<io::Result<()>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_shutdown(cx),
-            Self::Tls(stream) => Pin::new(stream).poll_shutdown(cx),
-        }
-    }
-}
-impl AsyncRead for MaybeTlsStream {
-    fn poll_read(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &mut tokio::io::ReadBuf<'_>,
-    ) -> Poll<io::Result<()>> {
-        match self.get_mut() {
-            Self::Unencrypted(stream) => Pin::new(stream).poll_read(cx, buf),
-            Self::Tls(stream) => Pin::new(stream).poll_read(cx, buf),
-        }
-    }
-}
-
-#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
-pub enum AuthType {
-    Trust,
-    // This mimics postgres's AuthenticationCleartextPassword but instead of password expects JWT
-    NeonJWT,
-}
-
-impl FromStr for AuthType {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "Trust" => Ok(Self::Trust),
-            "NeonJWT" => Ok(Self::NeonJWT),
-            _ => anyhow::bail!("invalid value \"{s}\" for auth type"),
-        }
-    }
-}
-
-impl fmt::Display for AuthType {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str(match self {
-            AuthType::Trust => "Trust",
-            AuthType::NeonJWT => "NeonJWT",
-        })
-    }
-}
-
-/// Either full duplex Framed or write only half; the latter is left in
-/// PostgresBackend after call to `split`. In principle we could always store a
-/// pair of splitted handles, but that would force to to pay splitting price
-/// (Arc and kinda mutex inside polling) for all uses (e.g. pageserver).
-enum MaybeWriteOnly {
-    Full(Framed<MaybeTlsStream>),
-    WriteOnly(FramedWriter<WriteHalf<MaybeTlsStream>>),
-    Broken, // temporary value palmed off during the split
-}
-
-impl MaybeWriteOnly {
-    async fn read_startup_message(&mut self) -> Result<Option<FeStartupPacket>, ConnectionError> {
-        match self {
-            MaybeWriteOnly::Full(framed) => framed.read_startup_message().await,
-            MaybeWriteOnly::WriteOnly(_) => {
-                Err(io::Error::new(ErrorKind::Other, "reading from write only half").into())
-            }
-            MaybeWriteOnly::Broken => panic!("IO on invalid MaybeWriteOnly"),
-        }
-    }
-
-    async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        match self {
-            MaybeWriteOnly::Full(framed) => framed.read_message().await,
-            MaybeWriteOnly::WriteOnly(_) => {
-                Err(io::Error::new(ErrorKind::Other, "reading from write only half").into())
-            }
-            MaybeWriteOnly::Broken => panic!("IO on invalid MaybeWriteOnly"),
-        }
-    }
-
-    fn write_message_noflush(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {
-        match self {
-            MaybeWriteOnly::Full(framed) => framed.write_message(msg),
-            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.write_message_noflush(msg),
-            MaybeWriteOnly::Broken => panic!("IO on invalid MaybeWriteOnly"),
-        }
-    }
-
-    async fn flush(&mut self) -> io::Result<()> {
-        match self {
-            MaybeWriteOnly::Full(framed) => framed.flush().await,
-            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.flush().await,
-            MaybeWriteOnly::Broken => panic!("IO on invalid MaybeWriteOnly"),
-        }
-    }
-
-    async fn shutdown(&mut self) -> io::Result<()> {
-        match self {
-            MaybeWriteOnly::Full(framed) => framed.shutdown().await,
-            MaybeWriteOnly::WriteOnly(framed_writer) => framed_writer.shutdown().await,
-            MaybeWriteOnly::Broken => panic!("IO on invalid MaybeWriteOnly"),
-        }
-    }
-}
-
-pub struct PostgresBackend {
-    framed: MaybeWriteOnly,
-
-    pub state: ProtoState,
-
-    auth_type: AuthType,
-
-    peer_addr: SocketAddr,
-    pub tls_config: Option<Arc<rustls::ServerConfig>>,
-}
-
-pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
-    let mut query_string = query_string.to_vec();
-    if let Some(ch) = query_string.last() {
-        if *ch == 0 {
-            query_string.pop();
-        }
-    }
-    query_string
-}
-
-/// Cast a byte slice to a string slice, dropping null terminator if there's one.
-fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
-    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
-    std::str::from_utf8(without_null).map_err(|e| e.into())
-}
-
-impl PostgresBackend {
-    pub fn new(
-        socket: tokio::net::TcpStream,
-        auth_type: AuthType,
-        tls_config: Option<Arc<rustls::ServerConfig>>,
-    ) -> io::Result<Self> {
-        let peer_addr = socket.peer_addr()?;
-        let stream = MaybeTlsStream::Unencrypted(socket);
-
-        Ok(Self {
-            framed: MaybeWriteOnly::Full(Framed::new(stream)),
-            state: ProtoState::Initialization,
-            auth_type,
-            tls_config,
-            peer_addr,
-        })
-    }
-
-    pub fn get_peer_addr(&self) -> &SocketAddr {
-        &self.peer_addr
-    }
-
-    /// Read full message or return None if connection is cleanly closed with no
-    /// unprocessed data.
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        if let ProtoState::Closed = self.state {
-            Ok(None)
-        } else {
-            let m = self.framed.read_message().await?;
-            trace!("read msg {:?}", m);
-            Ok(m)
-        }
-    }
-
-    /// Write message into internal output buffer, doesn't flush it. Technically
-    /// error type can be only ProtocolError here (if, unlikely, serialization
-    /// fails), but callers typically wrap it anyway.
-    pub fn write_message_noflush(
-        &mut self,
-        message: &BeMessage<'_>,
-    ) -> Result<&mut Self, ConnectionError> {
-        self.framed.write_message_noflush(message)?;
-        trace!("wrote msg {:?}", message);
-        Ok(self)
-    }
-
-    /// Flush output buffer into the socket.
-    pub async fn flush(&mut self) -> io::Result<()> {
-        self.framed.flush().await
-    }
-
-    /// Polling version of `flush()`, saves the caller need to pin.
-    pub fn poll_flush(
-        &mut self,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        let flush_fut = self.flush();
-        pin_mut!(flush_fut);
-        flush_fut.poll(cx)
-    }
-
-    /// Write message into internal output buffer and flush it to the stream.
-    pub async fn write_message(
-        &mut self,
-        message: &BeMessage<'_>,
-    ) -> Result<&mut Self, ConnectionError> {
-        self.write_message_noflush(message)?;
-        self.flush().await?;
-        Ok(self)
-    }
-
-    /// Returns an AsyncWrite implementation that wraps all the data written
-    /// to it in CopyData messages, and writes them to the connection
-    ///
-    /// The caller is responsible for sending CopyOutResponse and CopyDone messages.
-    pub fn copyout_writer(&mut self) -> CopyDataWriter {
-        CopyDataWriter { pgb: self }
-    }
-
-    /// Wrapper for run_message_loop() that shuts down socket when we are done
-    pub async fn run<F, S>(
-        mut self,
-        handler: &mut impl Handler,
-        shutdown_watcher: F,
-    ) -> Result<(), QueryError>
-    where
-        F: Fn() -> S,
-        S: Future,
-    {
-        let ret = self.run_message_loop(handler, shutdown_watcher).await;
-        // socket might be already closed, e.g. if previously received error,
-        // so ignore result.
-        self.framed.shutdown().await.ok();
-        ret
-    }
-
-    async fn run_message_loop<F, S>(
-        &mut self,
-        handler: &mut impl Handler,
-        shutdown_watcher: F,
-    ) -> Result<(), QueryError>
-    where
-        F: Fn() -> S,
-        S: Future,
-    {
-        trace!("postgres backend to {:?} started", self.peer_addr);
-
-        tokio::select!(
-            biased;
-
-            _ = shutdown_watcher() => {
-                // We were requested to shut down.
-                tracing::info!("shutdown request received during handshake");
-                return Ok(())
-            },
-
-            result = self.handshake(handler) => {
-                // Handshake complete.
-                result?;
-                if self.state == ProtoState::Closed {
-                    return Ok(()); // EOF during handshake
-                }
-            }
-        );
-
-        // Authentication completed
-        let mut query_string = Bytes::new();
-        while let Some(msg) = tokio::select!(
-            biased;
-            _ = shutdown_watcher() => {
-                // We were requested to shut down.
-                tracing::info!("shutdown request received in run_message_loop");
-                Ok(None)
-            },
-            msg = self.read_message() => { msg },
-        )? {
-            trace!("got message {:?}", msg);
-
-            let result = self.process_message(handler, msg, &mut query_string).await;
-            self.flush().await?;
-            match result? {
-                ProcessMsgResult::Continue => {
-                    self.flush().await?;
-                    continue;
-                }
-                ProcessMsgResult::Break => break,
-            }
-        }
-
-        trace!("postgres backend to {:?} exited", self.peer_addr);
-        Ok(())
-    }
-
-    /// Try to upgrade MaybeTlsStream into actual TLS one, performing handshake.
-    async fn tls_upgrade(
-        src: MaybeTlsStream,
-        tls_config: Arc<rustls::ServerConfig>,
-    ) -> anyhow::Result<MaybeTlsStream> {
-        match src {
-            MaybeTlsStream::Unencrypted(s) => {
-                let acceptor = TlsAcceptor::from(tls_config);
-                let tls_stream = acceptor.accept(s).await?;
-                Ok(MaybeTlsStream::Tls(Box::new(tls_stream)))
-            }
-            MaybeTlsStream::Tls(_) => {
-                anyhow::bail!("TLS already started");
-            }
-        }
-    }
-
-    async fn start_tls(&mut self) -> anyhow::Result<()> {
-        // temporary replace stream with fake to cook TLS one, Indiana Jones style
-        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {
-            MaybeWriteOnly::Full(framed) => {
-                let tls_config = self
-                    .tls_config
-                    .as_ref()
-                    .context("start_tls called without conf")?
-                    .clone();
-                let tls_framed = framed
-                    .map_stream(|s| PostgresBackend::tls_upgrade(s, tls_config))
-                    .await?;
-                // push back ready TLS stream
-                self.framed = MaybeWriteOnly::Full(tls_framed);
-                Ok(())
-            }
-            MaybeWriteOnly::WriteOnly(_) => {
-                anyhow::bail!("TLS upgrade attempt in split state")
-            }
-            MaybeWriteOnly::Broken => panic!("TLS upgrade on framed in invalid state"),
-        }
-    }
-
-    /// Split off owned read part from which messages can be read in different
-    /// task/thread.
-    pub fn split(&mut self) -> anyhow::Result<PostgresBackendReader> {
-        // temporary replace stream with fake to cook split one, Indiana Jones style
-        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {
-            MaybeWriteOnly::Full(framed) => {
-                let (reader, writer) = framed.split();
-                self.framed = MaybeWriteOnly::WriteOnly(writer);
-                Ok(PostgresBackendReader(reader))
-            }
-            MaybeWriteOnly::WriteOnly(_) => {
-                anyhow::bail!("PostgresBackend is already split")
-            }
-            MaybeWriteOnly::Broken => panic!("split on framed in invalid state"),
-        }
-    }
-
-    /// Join read part back.
-    pub fn unsplit(&mut self, reader: PostgresBackendReader) -> anyhow::Result<()> {
-        // temporary replace stream with fake to cook joined one, Indiana Jones style
-        match std::mem::replace(&mut self.framed, MaybeWriteOnly::Broken) {
-            MaybeWriteOnly::Full(_) => {
-                anyhow::bail!("PostgresBackend is not split")
-            }
-            MaybeWriteOnly::WriteOnly(writer) => {
-                let joined = Framed::unsplit(reader.0, writer);
-                self.framed = MaybeWriteOnly::Full(joined);
-                Ok(())
-            }
-            MaybeWriteOnly::Broken => panic!("unsplit on framed in invalid state"),
-        }
-    }
-
-    /// Perform handshake with the client, transitioning to Established.
-    /// In case of EOF during handshake logs this, sets state to Closed and returns Ok(()).
-    async fn handshake(&mut self, handler: &mut impl Handler) -> Result<(), QueryError> {
-        while self.state < ProtoState::Authentication {
-            match self.framed.read_startup_message().await? {
-                Some(msg) => {
-                    self.process_startup_message(handler, msg).await?;
-                }
-                None => {
-                    trace!(
-                        "postgres backend to {:?} received EOF during handshake",
-                        self.peer_addr
-                    );
-                    self.state = ProtoState::Closed;
-                    return Ok(());
-                }
-            }
-        }
-
-        // Perform auth, if needed.
-        if self.state == ProtoState::Authentication {
-            match self.framed.read_message().await? {
-                Some(FeMessage::PasswordMessage(m)) => {
-                    assert!(self.auth_type == AuthType::NeonJWT);
-
-                    let (_, jwt_response) = m.split_last().context("protocol violation")?;
-
-                    if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
-                        self.write_message_noflush(&BeMessage::ErrorResponse(
-                            &e.to_string(),
-                            Some(e.pg_error_code()),
-                        ))?;
-                        return Err(e);
-                    }
-
-                    self.write_message_noflush(&BeMessage::AuthenticationOk)?
-                        .write_message_noflush(&BeMessage::CLIENT_ENCODING)?
-                        .write_message(&BeMessage::ReadyForQuery)
-                        .await?;
-                    self.state = ProtoState::Established;
-                }
-                Some(m) => {
-                    return Err(QueryError::Other(anyhow::anyhow!(
-                        "Unexpected message {:?} while waiting for handshake",
-                        m
-                    )));
-                }
-                None => {
-                    trace!(
-                        "postgres backend to {:?} received EOF during auth",
-                        self.peer_addr
-                    );
-                    self.state = ProtoState::Closed;
-                    return Ok(());
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Process startup packet:
-    /// - transition to Established if auth type is trust
-    /// - transition to Authentication if auth type is NeonJWT.
-    /// - or perform TLS handshake -- then need to call this again to receive
-    ///   actual startup packet.
-    async fn process_startup_message(
-        &mut self,
-        handler: &mut impl Handler,
-        msg: FeStartupPacket,
-    ) -> Result<(), QueryError> {
-        assert!(self.state < ProtoState::Authentication);
-        let have_tls = self.tls_config.is_some();
-        match msg {
-            FeStartupPacket::SslRequest => {
-                debug!("SSL requested");
-
-                self.write_message(&BeMessage::EncryptionResponse(have_tls))
-                    .await?;
-
-                if have_tls {
-                    self.start_tls().await?;
-                    self.state = ProtoState::Encrypted;
-                }
-            }
-            FeStartupPacket::GssEncRequest => {
-                debug!("GSS requested");
-                self.write_message(&BeMessage::EncryptionResponse(false))
-                    .await?;
-            }
-            FeStartupPacket::StartupMessage { .. } => {
-                if have_tls && !matches!(self.state, ProtoState::Encrypted) {
-                    self.write_message(&BeMessage::ErrorResponse("must connect with TLS", None))
-                        .await?;
-                    return Err(QueryError::Other(anyhow::anyhow!(
-                        "client did not connect with TLS"
-                    )));
-                }
-
-                // NB: startup() may change self.auth_type -- we are using that in proxy code
-                // to bypass auth for new users.
-                handler.startup(self, &msg)?;
-
-                match self.auth_type {
-                    AuthType::Trust => {
-                        self.write_message_noflush(&BeMessage::AuthenticationOk)?
-                            .write_message_noflush(&BeMessage::CLIENT_ENCODING)?
-                            .write_message_noflush(&BeMessage::INTEGER_DATETIMES)?
-                            // The async python driver requires a valid server_version
-                            .write_message_noflush(&BeMessage::server_version("14.1"))?
-                            .write_message(&BeMessage::ReadyForQuery)
-                            .await?;
-                        self.state = ProtoState::Established;
-                    }
-                    AuthType::NeonJWT => {
-                        self.write_message(&BeMessage::AuthenticationCleartextPassword)
-                            .await?;
-                        self.state = ProtoState::Authentication;
-                    }
-                }
-            }
-            FeStartupPacket::CancelRequest { .. } => {
-                return Err(QueryError::Other(anyhow::anyhow!(
-                    "Unexpected CancelRequest message during handshake"
-                )));
-            }
-        }
-        Ok(())
-    }
-
-    async fn process_message(
-        &mut self,
-        handler: &mut impl Handler,
-        msg: FeMessage,
-        unnamed_query_string: &mut Bytes,
-    ) -> Result<ProcessMsgResult, QueryError> {
-        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
-        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
-        assert!(self.state == ProtoState::Established);
-
-        match msg {
-            FeMessage::Query(body) => {
-                // remove null terminator
-                let query_string = cstr_to_str(&body)?;
-
-                trace!("got query {query_string:?}");
-                if let Err(e) = handler.process_query(self, query_string).await {
-                    log_query_error(query_string, &e);
-                    let short_error = short_error(&e);
-                    self.write_message_noflush(&BeMessage::ErrorResponse(
-                        &short_error,
-                        Some(e.pg_error_code()),
-                    ))?;
-                }
-                self.write_message_noflush(&BeMessage::ReadyForQuery)?;
-            }
-
-            FeMessage::Parse(m) => {
-                *unnamed_query_string = m.query_string;
-                self.write_message_noflush(&BeMessage::ParseComplete)?;
-            }
-
-            FeMessage::Describe(_) => {
-                self.write_message_noflush(&BeMessage::ParameterDescription)?
-                    .write_message_noflush(&BeMessage::NoData)?;
-            }
-
-            FeMessage::Bind(_) => {
-                self.write_message_noflush(&BeMessage::BindComplete)?;
-            }
-
-            FeMessage::Close(_) => {
-                self.write_message_noflush(&BeMessage::CloseComplete)?;
-            }
-
-            FeMessage::Execute(_) => {
-                let query_string = cstr_to_str(unnamed_query_string)?;
-                trace!("got execute {query_string:?}");
-                if let Err(e) = handler.process_query(self, query_string).await {
-                    log_query_error(query_string, &e);
-                    self.write_message_noflush(&BeMessage::ErrorResponse(
-                        &e.to_string(),
-                        Some(e.pg_error_code()),
-                    ))?;
-                }
-                // NOTE there is no ReadyForQuery message. This handler is used
-                // for basebackup and it uses CopyOut which doesn't require
-                // ReadyForQuery message and backend just switches back to
-                // processing mode after sending CopyDone or ErrorResponse.
-            }
-
-            FeMessage::Sync => {
-                self.write_message_noflush(&BeMessage::ReadyForQuery)?;
-            }
-
-            FeMessage::Terminate => {
-                return Ok(ProcessMsgResult::Break);
-            }
-
-            // We prefer explicit pattern matching to wildcards, because
-            // this helps us spot the places where new variants are missing
-            FeMessage::CopyData(_)
-            | FeMessage::CopyDone
-            | FeMessage::CopyFail
-            | FeMessage::PasswordMessage(_) => {
-                return Err(QueryError::Other(anyhow::anyhow!(
-                    "unexpected message type: {msg:?}",
-                )));
-            }
-        }
-
-        Ok(ProcessMsgResult::Continue)
-    }
-
-    /// Log as info/error result of handling COPY stream and send back
-    /// ErrorResponse if that makes sense. Shutdown the stream if we got
-    /// Terminate. TODO: transition into waiting for Sync msg if we initiate the
-    /// close.
-    pub async fn handle_copy_stream_end(&mut self, end: CopyStreamHandlerEnd) {
-        use CopyStreamHandlerEnd::*;
-
-        let expected_end = match &end {
-            ServerInitiated(_) | CopyDone | CopyFail | Terminate | EOF => true,
-            CopyStreamHandlerEnd::Disconnected(ConnectionError::Io(io_error))
-                if is_expected_io_error(io_error) =>
-            {
-                true
-            }
-            _ => false,
-        };
-        if expected_end {
-            info!("terminated: {:#}", end);
-        } else {
-            error!("terminated: {:?}", end);
-        }
-
-        // Note: no current usages ever send this
-        if let CopyDone = &end {
-            if let Err(e) = self.write_message(&BeMessage::CopyDone).await {
-                error!("failed to send CopyDone: {}", e);
-            }
-        }
-
-        if let Terminate = &end {
-            self.state = ProtoState::Closed;
-        }
-
-        let err_to_send_and_errcode = match &end {
-            ServerInitiated(_) => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),
-            Other(_) => Some((end.to_string(), SQLSTATE_INTERNAL_ERROR)),
-            // Note: CopyFail in duplex copy is somewhat unexpected (at least to
-            // PG walsender; evidently and per my docs reading client should
-            // finish it with CopyDone). It is not a problem to recover from it
-            // finishing the stream in both directions like we do, but note that
-            // sync rust-postgres client (which we don't use anymore) hangs if
-            // socket is not closed here.
-            // https://github.com/sfackler/rust-postgres/issues/755
-            // https://github.com/neondatabase/neon/issues/935
-            //
-            // Currently, the version of tokio_postgres replication patch we use
-            // sends this when it closes the stream (e.g. pageserver decided to
-            // switch conn to another safekeeper and client gets dropped).
-            // Moreover, seems like 'connection' task errors with 'unexpected
-            // message from server' when it receives ErrorResponse (anything but
-            // CopyData/CopyDone) back.
-            CopyFail => Some((end.to_string(), SQLSTATE_SUCCESSFUL_COMPLETION)),
-            _ => None,
-        };
-        if let Some((err, errcode)) = err_to_send_and_errcode {
-            if let Err(ee) = self
-                .write_message(&BeMessage::ErrorResponse(&err, Some(errcode)))
-                .await
-            {
-                error!("failed to send ErrorResponse: {}", ee);
-            }
-        }
-    }
-}
-
-pub struct PostgresBackendReader(FramedReader<ReadHalf<MaybeTlsStream>>);
-
-impl PostgresBackendReader {
-    /// Read full message or return None if connection is cleanly closed with no
-    /// unprocessed data.
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        let m = self.0.read_message().await?;
-        trace!("read msg {:?}", m);
-        Ok(m)
-    }
-
-    /// Get CopyData contents of the next message in COPY stream or error
-    /// closing it. The error type is wider than actual errors which can happen
-    /// here -- it includes 'Other' and 'ServerInitiated', but that's ok for
-    /// current callers.
-    pub async fn read_copy_message(&mut self) -> Result<Bytes, CopyStreamHandlerEnd> {
-        match self.read_message().await? {
-            Some(msg) => match msg {
-                FeMessage::CopyData(m) => Ok(m),
-                FeMessage::CopyDone => Err(CopyStreamHandlerEnd::CopyDone),
-                FeMessage::CopyFail => Err(CopyStreamHandlerEnd::CopyFail),
-                FeMessage::Terminate => Err(CopyStreamHandlerEnd::Terminate),
-                _ => Err(CopyStreamHandlerEnd::from(ConnectionError::Protocol(
-                    ProtocolError::Protocol(format!("unexpected message in COPY stream {:?}", msg)),
-                ))),
-            },
-            None => Err(CopyStreamHandlerEnd::EOF),
-        }
-    }
-}
-
-///
-/// A futures::AsyncWrite implementation that wraps all data written to it in CopyData
-/// messages.
-///
-
-pub struct CopyDataWriter<'a> {
-    pgb: &'a mut PostgresBackend,
-}
-
-impl<'a> AsyncWrite for CopyDataWriter<'a> {
-    fn poll_write(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &[u8],
-    ) -> Poll<Result<usize, std::io::Error>> {
-        let this = self.get_mut();
-
-        // It's not strictly required to flush between each message, but makes it easier
-        // to view in wireshark, and usually the messages that the callers write are
-        // decently-sized anyway.
-        if let Err(err) = ready!(this.pgb.poll_flush(cx)) {
-            return Poll::Ready(Err(err));
-        }
-
-        // CopyData
-        // XXX: if the input is large, we should split it into multiple messages.
-        // Not sure what the threshold should be, but the ultimate hard limit is that
-        // the length cannot exceed u32.
-        this.pgb
-            .write_message_noflush(&BeMessage::CopyData(buf))
-            // write_message only writes to the buffer, so it can fail iff the
-            // message is invaid, but CopyData can't be invalid.
-            .map_err(|_| io::Error::new(ErrorKind::Other, "failed to serialize CopyData"))?;
-
-        Poll::Ready(Ok(buf.len()))
-    }
-
-    fn poll_flush(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        let this = self.get_mut();
-        this.pgb.poll_flush(cx)
-    }
-
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
-        let this = self.get_mut();
-        this.pgb.poll_flush(cx)
-    }
-}
-
-pub fn short_error(e: &QueryError) -> String {
-    match e {
-        QueryError::Disconnected(connection_error) => connection_error.to_string(),
-        QueryError::Other(e) => format!("{e:#}"),
-    }
-}
-
-fn log_query_error(query: &str, e: &QueryError) {
-    match e {
-        QueryError::Disconnected(ConnectionError::Io(io_error)) => {
-            if is_expected_io_error(io_error) {
-                info!("query handler for '{query}' failed with expected io error: {io_error}");
-            } else {
-                error!("query handler for '{query}' failed with io error: {io_error}");
-            }
-        }
-        QueryError::Disconnected(other_connection_error) => {
-            error!("query handler for '{query}' failed with connection error: {other_connection_error:?}")
-        }
-        QueryError::Other(e) => {
-            error!("query handler for '{query}' failed: {e:?}");
-        }
-    }
-}
-
-/// Something finishing handling of COPY stream, see handle_copy_stream_end.
-/// This is not always a real error, but it allows to use ? and thiserror impls.
-#[derive(thiserror::Error, Debug)]
-pub enum CopyStreamHandlerEnd {
-    /// Handler initiates the end of streaming.
-    #[error("{0}")]
-    ServerInitiated(String),
-    #[error("received CopyDone")]
-    CopyDone,
-    #[error("received CopyFail")]
-    CopyFail,
-    #[error("received Terminate")]
-    Terminate,
-    #[error("EOF on COPY stream")]
-    EOF,
-    /// The connection was lost
-    #[error(transparent)]
-    Disconnected(#[from] ConnectionError),
-    /// Some other error
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
--- a/libs/postgres_backend/tests/simple_select.rs
+++ b/libs/postgres_backend/tests/simple_select.rs
@@ -1,139 +0,0 @@
-/// Test postgres_backend_async with tokio_postgres
-use once_cell::sync::Lazy;
-use postgres_backend::{AuthType, Handler, PostgresBackend, QueryError};
-use pq_proto::{BeMessage, RowDescriptor};
-use std::io::Cursor;
-use std::{future, sync::Arc};
-use tokio::net::{TcpListener, TcpStream};
-use tokio_postgres::config::SslMode;
-use tokio_postgres::tls::MakeTlsConnect;
-use tokio_postgres::{Config, NoTls, SimpleQueryMessage};
-use tokio_postgres_rustls::MakeRustlsConnect;
-
-// generate client, server test streams
-async fn make_tcp_pair() -> (TcpStream, TcpStream) {
-    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
-    let addr = listener.local_addr().unwrap();
-    let client_stream = TcpStream::connect(addr).await.unwrap();
-    let (server_stream, _) = listener.accept().await.unwrap();
-    (client_stream, server_stream)
-}
-
-struct TestHandler {}
-
-#[async_trait::async_trait]
-impl Handler for TestHandler {
-    // return single col 'hey' for any query
-    async fn process_query(
-        &mut self,
-        pgb: &mut PostgresBackend,
-        _query_string: &str,
-    ) -> Result<(), QueryError> {
-        pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor::text_col(
-            b"hey",
-        )]))?
-        .write_message_noflush(&BeMessage::DataRow(&[Some("hey".as_bytes())]))?
-        .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
-        Ok(())
-    }
-}
-
-// test that basic select works
-#[tokio::test]
-async fn simple_select() {
-    let (client_sock, server_sock) = make_tcp_pair().await;
-
-    // create and run pgbackend
-    let pgbackend =
-        PostgresBackend::new(server_sock, AuthType::Trust, None).expect("pgbackend creation");
-
-    tokio::spawn(async move {
-        let mut handler = TestHandler {};
-        pgbackend.run(&mut handler, future::pending::<()>).await
-    });
-
-    let conf = Config::new();
-    let (client, connection) = conf.connect_raw(client_sock, NoTls).await.expect("connect");
-    // The connection object performs the actual communication with the database,
-    // so spawn it off to run on its own.
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            eprintln!("connection error: {}", e);
-        }
-    });
-
-    let first_val = &(client.simple_query("SELECT 42;").await.expect("select"))[0];
-    if let SimpleQueryMessage::Row(row) = first_val {
-        let first_col = row.get(0).expect("first column");
-        assert_eq!(first_col, "hey");
-    } else {
-        panic!("expected SimpleQueryMessage::Row");
-    }
-}
-
-static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("key.pem"));
-    rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
-});
-
-static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
-    let mut cursor = Cursor::new(include_bytes!("cert.pem"));
-    rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
-});
-
-// test that basic select with ssl works
-#[tokio::test]
-async fn simple_select_ssl() {
-    let (client_sock, server_sock) = make_tcp_pair().await;
-
-    let server_cfg = rustls::ServerConfig::builder()
-        .with_safe_defaults()
-        .with_no_client_auth()
-        .with_single_cert(vec![CERT.clone()], KEY.clone())
-        .unwrap();
-    let tls_config = Some(Arc::new(server_cfg));
-    let pgbackend =
-        PostgresBackend::new(server_sock, AuthType::Trust, tls_config).expect("pgbackend creation");
-
-    tokio::spawn(async move {
-        let mut handler = TestHandler {};
-        pgbackend.run(&mut handler, future::pending::<()>).await
-    });
-
-    let client_cfg = rustls::ClientConfig::builder()
-        .with_safe_defaults()
-        .with_root_certificates({
-            let mut store = rustls::RootCertStore::empty();
-            store.add(&CERT).unwrap();
-            store
-        })
-        .with_no_client_auth();
-    let mut make_tls_connect = tokio_postgres_rustls::MakeRustlsConnect::new(client_cfg);
-    let tls_connect = <MakeRustlsConnect as MakeTlsConnect<TcpStream>>::make_tls_connect(
-        &mut make_tls_connect,
-        "localhost",
-    )
-    .expect("make_tls_connect");
-
-    let mut conf = Config::new();
-    conf.ssl_mode(SslMode::Require);
-    let (client, connection) = conf
-        .connect_raw(client_sock, tls_connect)
-        .await
-        .expect("connect");
-    // The connection object performs the actual communication with the database,
-    // so spawn it off to run on its own.
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            eprintln!("connection error: {}", e);
-        }
-    });
-
-    let first_val = &(client.simple_query("SELECT 42;").await.expect("select"))[0];
-    if let SimpleQueryMessage::Row(row) = first_val {
-        let first_col = row.get(0).expect("first column");
-        assert_eq!(first_col, "hey");
-    } else {
-        panic!("expected SimpleQueryMessage::Row");
-    }
-}
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -5,8 +5,8 @@ edition.workspace = true
 license.workspace = true

 [dependencies]
+anyhow.workspace = true
 bytes.workspace = true
-byteorder.workspace = true
 pin-project-lite.workspace = true
 postgres-protocol.workspace = true
 rand.workspace = true
--- a/libs/pq_proto/src/framed.rs
+++ b/libs/pq_proto/src/framed.rs
@@ -1,251 +0,0 @@
-//! Provides `Framed` -- writing/flushing and reading Postgres messages to/from
-//! the async stream based on (and buffered with) BytesMut. All functions are
-//! cancellation safe.
-//!
-//! It is similar to what tokio_util::codec::Framed with appropriate codec
-//! provides, but `FramedReader` and `FramedWriter` read/write parts can be used
-//! separately without using split from futures::stream::StreamExt (which
-//! allocates box[1] in polling internally). tokio::io::split is used for splitting
-//! instead. Plus we customize error messages more than a single type for all io
-//! calls.
-//!
-//! [1] https://docs.rs/futures-util/0.3.26/src/futures_util/lock/bilock.rs.html#107
-use bytes::{Buf, BytesMut};
-use std::{
-    future::Future,
-    io::{self, ErrorKind},
-};
-use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, ReadHalf, WriteHalf};
-
-use crate::{BeMessage, FeMessage, FeStartupPacket, ProtocolError};
-
-const INITIAL_CAPACITY: usize = 8 * 1024;
-
-/// Error on postgres connection: either IO (physical transport error) or
-/// protocol violation.
-#[derive(thiserror::Error, Debug)]
-pub enum ConnectionError {
-    #[error(transparent)]
-    Io(#[from] io::Error),
-    #[error(transparent)]
-    Protocol(#[from] ProtocolError),
-}
-
-impl ConnectionError {
-    /// Proxy stream.rs uses only io::Error; provide it.
-    pub fn into_io_error(self) -> io::Error {
-        match self {
-            ConnectionError::Io(io) => io,
-            ConnectionError::Protocol(pe) => io::Error::new(io::ErrorKind::Other, pe.to_string()),
-        }
-    }
-}
-
-/// Wraps async io `stream`, providing messages to write/flush + read Postgres
-/// messages.
-pub struct Framed<S> {
-    stream: S,
-    read_buf: BytesMut,
-    write_buf: BytesMut,
-}
-
-impl<S> Framed<S> {
-    pub fn new(stream: S) -> Self {
-        Self {
-            stream,
-            read_buf: BytesMut::with_capacity(INITIAL_CAPACITY),
-            write_buf: BytesMut::with_capacity(INITIAL_CAPACITY),
-        }
-    }
-
-    /// Get a shared reference to the underlying stream.
-    pub fn get_ref(&self) -> &S {
-        &self.stream
-    }
-
-    /// Extract the underlying stream.
-    pub fn into_inner(self) -> S {
-        self.stream
-    }
-
-    /// Return new Framed with stream type transformed by async f, for TLS
-    /// upgrade.
-    pub async fn map_stream<S2, E, F, Fut>(self, f: F) -> Result<Framed<S2>, E>
-    where
-        F: FnOnce(S) -> Fut,
-        Fut: Future<Output = Result<S2, E>>,
-    {
-        let stream = f(self.stream).await?;
-        Ok(Framed {
-            stream,
-            read_buf: self.read_buf,
-            write_buf: self.write_buf,
-        })
-    }
-}
-
-impl<S: AsyncRead + Unpin> Framed<S> {
-    pub async fn read_startup_message(
-        &mut self,
-    ) -> Result<Option<FeStartupPacket>, ConnectionError> {
-        read_message(&mut self.stream, &mut self.read_buf, FeStartupPacket::parse).await
-    }
-
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        read_message(&mut self.stream, &mut self.read_buf, FeMessage::parse).await
-    }
-}
-
-impl<S: AsyncWrite + Unpin> Framed<S> {
-    /// Write next message to the output buffer; doesn't flush.
-    pub fn write_message(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {
-        BeMessage::write(&mut self.write_buf, msg)
-    }
-
-    /// Flush out the buffer. This function is cancellation safe: it can be
-    /// interrupted and flushing will be continued in the next call.
-    pub async fn flush(&mut self) -> Result<(), io::Error> {
-        flush(&mut self.stream, &mut self.write_buf).await
-    }
-
-    /// Flush out the buffer and shutdown the stream.
-    pub async fn shutdown(&mut self) -> Result<(), io::Error> {
-        shutdown(&mut self.stream, &mut self.write_buf).await
-    }
-}
-
-impl<S: AsyncRead + AsyncWrite + Unpin> Framed<S> {
-    /// Split into owned read and write parts. Beware of potential issues with
-    /// using halves in different tasks on TLS stream:
-    /// https://github.com/tokio-rs/tls/issues/40
-    pub fn split(self) -> (FramedReader<ReadHalf<S>>, FramedWriter<WriteHalf<S>>) {
-        let (read_half, write_half) = tokio::io::split(self.stream);
-        let reader = FramedReader {
-            stream: read_half,
-            read_buf: self.read_buf,
-        };
-        let writer = FramedWriter {
-            stream: write_half,
-            write_buf: self.write_buf,
-        };
-        (reader, writer)
-    }
-
-    /// Join read and write parts back.
-    pub fn unsplit(reader: FramedReader<ReadHalf<S>>, writer: FramedWriter<WriteHalf<S>>) -> Self {
-        Self {
-            stream: reader.stream.unsplit(writer.stream),
-            read_buf: reader.read_buf,
-            write_buf: writer.write_buf,
-        }
-    }
-}
-
-/// Read-only version of `Framed`.
-pub struct FramedReader<S> {
-    stream: S,
-    read_buf: BytesMut,
-}
-
-impl<S: AsyncRead + Unpin> FramedReader<S> {
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, ConnectionError> {
-        read_message(&mut self.stream, &mut self.read_buf, FeMessage::parse).await
-    }
-}
-
-/// Write-only version of `Framed`.
-pub struct FramedWriter<S> {
-    stream: S,
-    write_buf: BytesMut,
-}
-
-impl<S> FramedWriter<S> {
-    /// Get a mut reference to the underlying stream.
-    pub fn get_mut(&mut self) -> &mut S {
-        &mut self.stream
-    }
-}
-
-impl<S: AsyncWrite + Unpin> FramedWriter<S> {
-    /// Write next message to the output buffer; doesn't flush.
-    pub fn write_message_noflush(&mut self, msg: &BeMessage<'_>) -> Result<(), ProtocolError> {
-        BeMessage::write(&mut self.write_buf, msg)
-    }
-
-    /// Flush out the buffer. This function is cancellation safe: it can be
-    /// interrupted and flushing will be continued in the next call.
-    pub async fn flush(&mut self) -> Result<(), io::Error> {
-        flush(&mut self.stream, &mut self.write_buf).await
-    }
-
-    /// Flush out the buffer and shutdown the stream.
-    pub async fn shutdown(&mut self) -> Result<(), io::Error> {
-        shutdown(&mut self.stream, &mut self.write_buf).await
-    }
-}
-
-/// Read next message from the stream. Returns Ok(None), if EOF happened and we
-/// don't have remaining data in the buffer. This function is cancellation safe:
-/// you can drop future which is not yet complete and finalize reading message
-/// with the next call.
-///
-/// Parametrized to allow reading startup or usual message, having different
-/// format.
-async fn read_message<S: AsyncRead + Unpin, M, P>(
-    stream: &mut S,
-    read_buf: &mut BytesMut,
-    parse: P,
-) -> Result<Option<M>, ConnectionError>
-where
-    P: Fn(&mut BytesMut) -> Result<Option<M>, ProtocolError>,
-{
-    loop {
-        if let Some(msg) = parse(read_buf)? {
-            return Ok(Some(msg));
-        }
-        // If we can't build a frame yet, try to read more data and try again.
-        // Make sure we've got room for at least one byte to read to ensure
-        // that we don't get a spurious 0 that looks like EOF.
-        read_buf.reserve(1);
-        if stream.read_buf(read_buf).await? == 0 {
-            if read_buf.has_remaining() {
-                return Err(io::Error::new(
-                    ErrorKind::UnexpectedEof,
-                    "EOF with unprocessed data in the buffer",
-                )
-                .into());
-            } else {
-                return Ok(None); // clean EOF
-            }
-        }
-    }
-}
-
-async fn flush<S: AsyncWrite + Unpin>(
-    stream: &mut S,
-    write_buf: &mut BytesMut,
-) -> Result<(), io::Error> {
-    while write_buf.has_remaining() {
-        let bytes_written = stream.write(write_buf.chunk()).await?;
-        if bytes_written == 0 {
-            return Err(io::Error::new(
-                ErrorKind::WriteZero,
-                "failed to write message",
-            ));
-        }
-        // The advanced part will be garbage collected, likely during shifting
-        // data left on next attempt to write to buffer when free space is not
-        // enough.
-        write_buf.advance(bytes_written);
-    }
-    write_buf.clear();
-    stream.flush().await
-}
-
-async fn shutdown<S: AsyncWrite + Unpin>(
-    stream: &mut S,
-    write_buf: &mut BytesMut,
-) -> Result<(), io::Error> {
-    flush(stream, write_buf).await?;
-    stream.shutdown().await
-}
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -2,18 +2,24 @@
 //! <https://www.postgresql.org/docs/devel/protocol-message-formats.html>
 //! on message formats.

-pub mod framed;
+// Tools for calling certain async methods in sync contexts.
+pub mod sync;

-use byteorder::{BigEndian, ReadBytesExt};
+use anyhow::{ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use postgres_protocol::PG_EPOCH;
 use serde::{Deserialize, Serialize};
 use std::{
    borrow::Cow,
    collections::HashMap,
-    fmt, io, str,
+    fmt,
+    future::Future,
+    io::{self, Cursor},
+    str,
    time::{Duration, SystemTime},
 };
+use sync::{AsyncishRead, SyncFuture};
+use tokio::io::AsyncReadExt;
 use tracing::{trace, warn};

 pub type Oid = u32;
@@ -25,6 +31,7 @@ pub const TEXT_OID: Oid = 25;

 #[derive(Debug)]
 pub enum FeMessage {
+    StartupPacket(FeStartupPacket),
    // Simple query.
    Query(Bytes),
    // Extended query protocol.
@@ -68,36 +75,27 @@ impl StartupMessageParams {
    /// taking into account all escape sequences but leaving them as-is.
    /// [`None`] means that there's no `options` in [`Self`].
    pub fn options_raw(&self) -> Option<impl Iterator<Item = &str>> {
-        self.get("options").map(Self::parse_options_raw)
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// applying all escape sequences (using owned strings as needed).
-    /// [`None`] means that there's no `options` in [`Self`].
-    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {
-        self.get("options").map(Self::parse_options_escaped)
-    }
-
-    /// Split command-line options according to PostgreSQL's logic,
-    /// taking into account all escape sequences but leaving them as-is.
-    pub fn parse_options_raw(input: &str) -> impl Iterator<Item = &str> {
        // See `postgres: pg_split_opts`.
        let mut last_was_escape = false;
-        input
+        let iter = self
+            .get("options")?
            .split(move |c: char| {
                // We split by non-escaped whitespace symbols.
                let should_split = c.is_ascii_whitespace() && !last_was_escape;
                last_was_escape = c == '\\' && !last_was_escape;
                should_split
            })
-            .filter(|s| !s.is_empty())
+            .filter(|s| !s.is_empty());
+
+        Some(iter)
    }

    /// Split command-line options according to PostgreSQL's logic,
    /// applying all escape sequences (using owned strings as needed).
-    pub fn parse_options_escaped(input: &str) -> impl Iterator<Item = Cow<'_, str>> {
+    /// [`None`] means that there's no `options` in [`Self`].
+    pub fn options_escaped(&self) -> Option<impl Iterator<Item = Cow<'_, str>>> {
        // See `postgres: pg_split_opts`.
-        Self::parse_options_raw(input).map(|s| {
+        let iter = self.options_raw()?.map(|s| {
            let mut preserve_next_escape = false;
            let escape = |c| {
                // We should remove '\\' unless it's preceded by '\\'.
@@ -110,12 +108,9 @@ impl StartupMessageParams {
                true => Cow::Owned(s.replace(escape, "")),
                false => Cow::Borrowed(s),
            }
-        })
-    }
+        });

-    /// Iterate through key-value pairs in an arbitrary order.
-    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
-        self.params.iter().map(|(k, v)| (k.as_str(), v.as_str()))
+        Some(iter)
    }

    // This function is mostly useful in tests.
@@ -184,207 +179,260 @@ pub struct FeExecuteMessage {
 #[derive(Debug)]
 pub struct FeCloseMessage;

-/// An error occured while parsing or serializing raw stream into Postgres
-/// messages.
-#[derive(thiserror::Error, Debug)]
-pub enum ProtocolError {
-    /// Invalid packet was received from the client (e.g. unexpected message
-    /// type or broken len).
-    #[error("Protocol error: {0}")]
-    Protocol(String),
-    /// Failed to parse or, (unlikely), serialize a protocol message.
-    #[error("Message parse error: {0}")]
-    BadMessage(String),
+/// Retry a read on EINTR
+///
+/// This runs the enclosed expression, and if it returns
+/// Err(io::ErrorKind::Interrupted), retries it.
+macro_rules! retry_read {
+    ( $x:expr ) => {
+        loop {
+            match $x {
+                Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
+                res => break res,
+            }
+        }
+    };
 }

-impl ProtocolError {
-    /// Proxy stream.rs uses only io::Error; provide it.
+/// An error occured during connection being open.
+#[derive(thiserror::Error, Debug)]
+pub enum ConnectionError {
+    /// IO error during writing to or reading from the connection socket.
+    #[error("Socket IO error: {0}")]
+    Socket(std::io::Error),
+    /// Invalid packet was received from client
+    #[error("Protocol error: {0}")]
+    Protocol(String),
+    /// Failed to parse a protocol mesage
+    #[error("Message parse error: {0}")]
+    MessageParse(anyhow::Error),
+}
+
+impl From<anyhow::Error> for ConnectionError {
+    fn from(e: anyhow::Error) -> Self {
+        Self::MessageParse(e)
+    }
+}
+
+impl ConnectionError {
    pub fn into_io_error(self) -> io::Error {
-        io::Error::new(io::ErrorKind::Other, self.to_string())
+        match self {
+            ConnectionError::Socket(io) => io,
+            other => io::Error::new(io::ErrorKind::Other, other.to_string()),
+        }
    }
 }

 impl FeMessage {
-    /// Read and parse one message from the `buf` input buffer. If there is at
-    /// least one valid message, returns it, advancing `buf`; redundant copies
-    /// are avoided, as thanks to `bytes` crate ptrs in parsed message point
-    /// directly into the `buf` (processed data is garbage collected after
-    /// parsed message is dropped).
+    /// Read one message from the stream.
+    /// This function returns `Ok(None)` in case of EOF.
+    /// One way to handle this properly:
    ///
-    /// Returns None if `buf` doesn't contain enough data for a single message.
-    /// For efficiency, tries to reserve large enough space in `buf` for the
-    /// next message in this case to save the repeated calls.
+    /// ```
+    /// # use std::io;
+    /// # use pq_proto::FeMessage;
+    /// #
+    /// # fn process_message(msg: FeMessage) -> anyhow::Result<()> {
+    /// #     Ok(())
+    /// # };
+    /// #
+    /// fn do_the_job(stream: &mut (impl io::Read + Unpin)) -> anyhow::Result<()> {
+    ///     while let Some(msg) = FeMessage::read(stream)? {
+    ///         process_message(msg)?;
+    ///     }
    ///
-    /// Returns Error if message is malformed, the only possible ErrorKind is
-    /// InvalidInput.
-    //
-    // Inspired by rust-postgres Message::parse.
-    pub fn parse(buf: &mut BytesMut) -> Result<Option<FeMessage>, ProtocolError> {
-        // Every message contains message type byte and 4 bytes len; can't do
-        // much without them.
-        if buf.len() < 5 {
-            let to_read = 5 - buf.len();
-            buf.reserve(to_read);
-            return Ok(None);
-        }
+    ///     Ok(())
+    /// }
+    /// ```
+    #[inline(never)]
+    pub fn read(
+        stream: &mut (impl io::Read + Unpin),
+    ) -> Result<Option<FeMessage>, ConnectionError> {
+        Self::read_fut(&mut AsyncishRead(stream)).wait()
+    }

-        // We shouldn't advance `buf` as probably full message is not there yet,
-        // so can't directly use Bytes::get_u32 etc.
-        let tag = buf[0];
-        let len = (&buf[1..5]).read_u32::<BigEndian>().unwrap();
-        if len < 4 {
-            return Err(ProtocolError::Protocol(format!(
-                "invalid message length {}",
-                len
-            )));
-        }
+    /// Read one message from the stream.
+    /// See documentation for `Self::read`.
+    pub fn read_fut<Reader>(
+        stream: &mut Reader,
+    ) -> SyncFuture<Reader, impl Future<Output = Result<Option<FeMessage>, ConnectionError>> + '_>
+    where
+        Reader: tokio::io::AsyncRead + Unpin,
+    {
+        // We return a Future that's sync (has a `wait` method) if and only if the provided stream is SyncProof.
+        // SyncFuture contract: we are only allowed to await on sync-proof futures, the AsyncRead and
+        // AsyncReadExt methods of the stream.
+        SyncFuture::new(async move {
+            // Each libpq message begins with a message type byte, followed by message length
+            // If the client closes the connection, return None. But if the client closes the
+            // connection in the middle of a message, we will return an error.
+            let tag = match retry_read!(stream.read_u8().await) {
+                Ok(b) => b,
+                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+                Err(e) => return Err(ConnectionError::Socket(e)),
+            };

-        // length field includes itself, but not message type.
-        let total_len = len as usize + 1;
-        if buf.len() < total_len {
-            // Don't have full message yet.
-            let to_read = total_len - buf.len();
-            buf.reserve(to_read);
-            return Ok(None);
-        }
+            // The message length includes itself, so it better be at least 4.
+            let len = retry_read!(stream.read_u32().await)
+                .map_err(ConnectionError::Socket)?
+                .checked_sub(4)
+                .ok_or_else(|| ConnectionError::Protocol("invalid message length".to_string()))?;

-        // got the message, advance buffer
-        let mut msg = buf.split_to(total_len).freeze();
-        msg.advance(5); // consume message type and len
+            let body = {
+                let mut buffer = vec![0u8; len as usize];
+                stream
+                    .read_exact(&mut buffer)
+                    .await
+                    .map_err(ConnectionError::Socket)?;
+                Bytes::from(buffer)
+            };

-        match tag {
-            b'Q' => Ok(Some(FeMessage::Query(msg))),
-            b'P' => Ok(Some(FeParseMessage::parse(msg)?)),
-            b'D' => Ok(Some(FeDescribeMessage::parse(msg)?)),
-            b'E' => Ok(Some(FeExecuteMessage::parse(msg)?)),
-            b'B' => Ok(Some(FeBindMessage::parse(msg)?)),
-            b'C' => Ok(Some(FeCloseMessage::parse(msg)?)),
-            b'S' => Ok(Some(FeMessage::Sync)),
-            b'X' => Ok(Some(FeMessage::Terminate)),
-            b'd' => Ok(Some(FeMessage::CopyData(msg))),
-            b'c' => Ok(Some(FeMessage::CopyDone)),
-            b'f' => Ok(Some(FeMessage::CopyFail)),
-            b'p' => Ok(Some(FeMessage::PasswordMessage(msg))),
-            tag => {
-                return Err(ProtocolError::Protocol(format!(
-                    "unknown message tag: {tag},'{msg:?}'"
-                )))
+            match tag {
+                b'Q' => Ok(Some(FeMessage::Query(body))),
+                b'P' => Ok(Some(FeParseMessage::parse(body)?)),
+                b'D' => Ok(Some(FeDescribeMessage::parse(body)?)),
+                b'E' => Ok(Some(FeExecuteMessage::parse(body)?)),
+                b'B' => Ok(Some(FeBindMessage::parse(body)?)),
+                b'C' => Ok(Some(FeCloseMessage::parse(body)?)),
+                b'S' => Ok(Some(FeMessage::Sync)),
+                b'X' => Ok(Some(FeMessage::Terminate)),
+                b'd' => Ok(Some(FeMessage::CopyData(body))),
+                b'c' => Ok(Some(FeMessage::CopyDone)),
+                b'f' => Ok(Some(FeMessage::CopyFail)),
+                b'p' => Ok(Some(FeMessage::PasswordMessage(body))),
+                tag => {
+                    return Err(ConnectionError::Protocol(format!(
+                        "unknown message tag: {tag},'{body:?}'"
+                    )))
+                }
            }
-        }
+        })
    }
 }

 impl FeStartupPacket {
-    /// Read and parse startup message from the `buf` input buffer. It is
-    /// different from [`FeMessage::parse`] because startup messages don't have
-    /// message type byte; otherwise, its comments apply.
-    pub fn parse(buf: &mut BytesMut) -> Result<Option<FeStartupPacket>, ProtocolError> {
+    /// Read startup message from the stream.
+    // XXX: It's tempting yet undesirable to accept `stream` by value,
+    // since such a change will cause user-supplied &mut references to be consumed
+    pub fn read(
+        stream: &mut (impl io::Read + Unpin),
+    ) -> Result<Option<FeMessage>, ConnectionError> {
+        Self::read_fut(&mut AsyncishRead(stream)).wait()
+    }
+
+    /// Read startup message from the stream.
+    // XXX: It's tempting yet undesirable to accept `stream` by value,
+    // since such a change will cause user-supplied &mut references to be consumed
+    pub fn read_fut<Reader>(
+        stream: &mut Reader,
+    ) -> SyncFuture<Reader, impl Future<Output = Result<Option<FeMessage>, ConnectionError>> + '_>
+    where
+        Reader: tokio::io::AsyncRead + Unpin,
+    {
        const MAX_STARTUP_PACKET_LENGTH: usize = 10000;
        const RESERVED_INVALID_MAJOR_VERSION: u32 = 1234;
        const CANCEL_REQUEST_CODE: u32 = 5678;
        const NEGOTIATE_SSL_CODE: u32 = 5679;
        const NEGOTIATE_GSS_CODE: u32 = 5680;

-        // need at least 4 bytes with packet len
-        if buf.len() < 4 {
-            let to_read = 4 - buf.len();
-            buf.reserve(to_read);
-            return Ok(None);
-        }
+        SyncFuture::new(async move {
+            // Read length. If the connection is closed before reading anything (or before
+            // reading 4 bytes, to be precise), return None to indicate that the connection
+            // was closed. This matches the PostgreSQL server's behavior, which avoids noise
+            // in the log if the client opens connection but closes it immediately.
+            let len = match retry_read!(stream.read_u32().await) {
+                Ok(len) => len as usize,
+                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
+                Err(e) => return Err(ConnectionError::Socket(e)),
+            };

-        // We shouldn't advance `buf` as probably full message is not there yet,
-        // so can't directly use Bytes::get_u32 etc.
-        let len = (&buf[0..4]).read_u32::<BigEndian>().unwrap() as usize;
-        if len < 4 || len > MAX_STARTUP_PACKET_LENGTH {
-            return Err(ProtocolError::Protocol(format!(
-                "invalid startup packet message length {}",
-                len
-            )));
-        }
-
-        if buf.len() < len {
-            // Don't have full message yet.
-            let to_read = len - buf.len();
-            buf.reserve(to_read);
-            return Ok(None);
-        }
-
-        // got the message, advance buffer
-        let mut msg = buf.split_to(len).freeze();
-        msg.advance(4); // consume len
-
-        let request_code = msg.get_u32();
-        let req_hi = request_code >> 16;
-        let req_lo = request_code & ((1 << 16) - 1);
-        // StartupMessage, CancelRequest, SSLRequest etc are differentiated by request code.
-        let message = match (req_hi, req_lo) {
-            (RESERVED_INVALID_MAJOR_VERSION, CANCEL_REQUEST_CODE) => {
-                if msg.remaining() != 8 {
-                    return Err(ProtocolError::BadMessage(
-                        "CancelRequest message is malformed, backend PID / secret key missing"
-                            .to_owned(),
-                    ));
-                }
-                FeStartupPacket::CancelRequest(CancelKeyData {
-                    backend_pid: msg.get_i32(),
-                    cancel_key: msg.get_i32(),
-                })
-            }
-            (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => {
-                // Requested upgrade to SSL (aka TLS)
-                FeStartupPacket::SslRequest
-            }
-            (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_GSS_CODE) => {
-                // Requested upgrade to GSSAPI
-                FeStartupPacket::GssEncRequest
-            }
-            (RESERVED_INVALID_MAJOR_VERSION, unrecognized_code) => {
-                return Err(ProtocolError::Protocol(format!(
-                    "Unrecognized request code {unrecognized_code}"
+            #[allow(clippy::manual_range_contains)]
+            if len < 4 || len > MAX_STARTUP_PACKET_LENGTH {
+                return Err(ConnectionError::Protocol(format!(
+                    "invalid message length {len}"
                )));
            }
-            // TODO bail if protocol major_version is not 3?
-            (major_version, minor_version) => {
-                // StartupMessage

-                // Parse pairs of null-terminated strings (key, value).
-                // See `postgres: ProcessStartupPacket, build_startup_packet`.
-                let mut tokens = str::from_utf8(&msg)
-                    .map_err(|_e| {
-                        ProtocolError::BadMessage("StartupMessage params: invalid utf-8".to_owned())
-                    })?
-                    .strip_suffix('\0') // drop packet's own null
-                    .ok_or_else(|| {
-                        ProtocolError::Protocol(
-                            "StartupMessage params: missing null terminator".to_string(),
-                        )
-                    })?
-                    .split_terminator('\0');
+            let request_code =
+                retry_read!(stream.read_u32().await).map_err(ConnectionError::Socket)?;

-                let mut params = HashMap::new();
-                while let Some(name) = tokens.next() {
-                    let value = tokens.next().ok_or_else(|| {
-                        ProtocolError::Protocol(
-                            "StartupMessage params: key without value".to_string(),
-                        )
-                    })?;
+            // the rest of startup packet are params
+            let params_len = len - 8;
+            let mut params_bytes = vec![0u8; params_len];
+            stream
+                .read_exact(params_bytes.as_mut())
+                .await
+                .map_err(ConnectionError::Socket)?;

-                    params.insert(name.to_owned(), value.to_owned());
+            // Parse params depending on request code
+            let req_hi = request_code >> 16;
+            let req_lo = request_code & ((1 << 16) - 1);
+            let message = match (req_hi, req_lo) {
+                (RESERVED_INVALID_MAJOR_VERSION, CANCEL_REQUEST_CODE) => {
+                    if params_len != 8 {
+                        return Err(ConnectionError::Protocol(
+                            "expected 8 bytes for CancelRequest params".to_string(),
+                        ));
+                    }
+                    let mut cursor = Cursor::new(params_bytes);
+                    FeStartupPacket::CancelRequest(CancelKeyData {
+                        backend_pid: cursor.read_i32().await.map_err(ConnectionError::Socket)?,
+                        cancel_key: cursor.read_i32().await.map_err(ConnectionError::Socket)?,
+                    })
                }
-
-                FeStartupPacket::StartupMessage {
-                    major_version,
-                    minor_version,
-                    params: StartupMessageParams { params },
+                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => {
+                    // Requested upgrade to SSL (aka TLS)
+                    FeStartupPacket::SslRequest
                }
-            }
-        };
-        Ok(Some(message))
+                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_GSS_CODE) => {
+                    // Requested upgrade to GSSAPI
+                    FeStartupPacket::GssEncRequest
+                }
+                (RESERVED_INVALID_MAJOR_VERSION, unrecognized_code) => {
+                    return Err(ConnectionError::Protocol(format!(
+                        "Unrecognized request code {unrecognized_code}"
+                    )));
+                }
+                // TODO bail if protocol major_version is not 3?
+                (major_version, minor_version) => {
+                    // Parse pairs of null-terminated strings (key, value).
+                    // See `postgres: ProcessStartupPacket, build_startup_packet`.
+                    let mut tokens = str::from_utf8(&params_bytes)
+                        .context("StartupMessage params: invalid utf-8")?
+                        .strip_suffix('\0') // drop packet's own null
+                        .ok_or_else(|| {
+                            ConnectionError::Protocol(
+                                "StartupMessage params: missing null terminator".to_string(),
+                            )
+                        })?
+                        .split_terminator('\0');
+
+                    let mut params = HashMap::new();
+                    while let Some(name) = tokens.next() {
+                        let value = tokens.next().ok_or_else(|| {
+                            ConnectionError::Protocol(
+                                "StartupMessage params: key without value".to_string(),
+                            )
+                        })?;
+
+                        params.insert(name.to_owned(), value.to_owned());
+                    }
+
+                    FeStartupPacket::StartupMessage {
+                        major_version,
+                        minor_version,
+                        params: StartupMessageParams { params },
+                    }
+                }
+            };
+
+            Ok(Some(FeMessage::StartupPacket(message)))
+        })
    }
 }

 impl FeParseMessage {
-    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {
+    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        // FIXME: the rust-postgres driver uses a named prepared statement
        // for copy_out(). We're not prepared to handle that correctly. For
        // now, just ignore the statement name, assuming that the client never
@@ -392,82 +440,55 @@ impl FeParseMessage {

        let _pstmt_name = read_cstr(&mut buf)?;
        let query_string = read_cstr(&mut buf)?;
-        if buf.remaining() < 2 {
-            return Err(ProtocolError::BadMessage(
-                "Parse message is malformed, nparams missing".to_string(),
-            ));
-        }
        let nparams = buf.get_i16();

-        if nparams != 0 {
-            return Err(ProtocolError::BadMessage(
-                "query params not implemented".to_string(),
-            ));
-        }
+        ensure!(nparams == 0, "query params not implemented");

        Ok(FeMessage::Parse(FeParseMessage { query_string }))
    }
 }

 impl FeDescribeMessage {
-    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {
+    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let kind = buf.get_u8();
        let _pstmt_name = read_cstr(&mut buf)?;

        // FIXME: see FeParseMessage::parse
-        if kind != b'S' {
-            return Err(ProtocolError::BadMessage(
-                "only prepared statemement Describe is implemented".to_string(),
-            ));
-        }
+        ensure!(
+            kind == b'S',
+            "only prepared statemement Describe is implemented"
+        );

        Ok(FeMessage::Describe(FeDescribeMessage { kind }))
    }
 }

 impl FeExecuteMessage {
-    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {
+    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let portal_name = read_cstr(&mut buf)?;
-        if buf.remaining() < 4 {
-            return Err(ProtocolError::BadMessage(
-                "FeExecuteMessage message is malformed, maxrows missing".to_string(),
-            ));
-        }
        let maxrows = buf.get_i32();

-        if !portal_name.is_empty() {
-            return Err(ProtocolError::BadMessage(
-                "named portals not implemented".to_string(),
-            ));
-        }
-        if maxrows != 0 {
-            return Err(ProtocolError::BadMessage(
-                "row limit in Execute message not implemented".to_string(),
-            ));
-        }
+        ensure!(portal_name.is_empty(), "named portals not implemented");
+        ensure!(maxrows == 0, "row limit in Execute message not implemented");

        Ok(FeMessage::Execute(FeExecuteMessage { maxrows }))
    }
 }

 impl FeBindMessage {
-    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {
+    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let portal_name = read_cstr(&mut buf)?;
        let _pstmt_name = read_cstr(&mut buf)?;

        // FIXME: see FeParseMessage::parse
-        if !portal_name.is_empty() {
-            return Err(ProtocolError::BadMessage(
-                "named portals not implemented".to_string(),
-            ));
-        }
+        ensure!(portal_name.is_empty(), "named portals not implemented");

        Ok(FeMessage::Bind(FeBindMessage))
    }
 }

 impl FeCloseMessage {
-    fn parse(mut buf: Bytes) -> Result<FeMessage, ProtocolError> {
+    fn parse(mut buf: Bytes) -> anyhow::Result<FeMessage> {
        let _kind = buf.get_u8();
        let _pstmt_or_portal_name = read_cstr(&mut buf)?;

@@ -496,7 +517,6 @@ pub enum BeMessage<'a> {
    CloseComplete,
    // None means column is NULL
    DataRow(&'a [Option<&'a [u8]>]),
-    // None errcode means internal_error will be sent.
    ErrorResponse(&'a str, Option<&'a [u8; 5]>),
    /// Single byte - used in response to SSLRequest/GSSENCRequest.
    EncryptionResponse(bool),
@@ -527,11 +547,6 @@ impl<'a> BeMessage<'a> {
        value: b"UTF8",
    };

-    pub const INTEGER_DATETIMES: Self = Self::ParameterStatus {
-        name: b"integer_datetimes",
-        value: b"on",
-    };
-
    /// Build a [`BeMessage::ParameterStatus`] holding the server version.
    pub fn server_version(version: &'a str) -> Self {
        Self::ParameterStatus {
@@ -610,7 +625,7 @@ impl RowDescriptor<'_> {
 #[derive(Debug)]
 pub struct XLogDataBody<'a> {
    pub wal_start: u64,
-    pub wal_end: u64, // current end of WAL on the server
+    pub wal_end: u64,
    pub timestamp: i64,
    pub data: &'a [u8],
 }
@@ -650,11 +665,12 @@ fn write_body<R>(buf: &mut BytesMut, f: impl FnOnce(&mut BytesMut) -> R) -> R {
 }

 /// Safe write of s into buf as cstring (String in the protocol).
-fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), ProtocolError> {
+fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> io::Result<()> {
    let bytes = s.as_ref();
    if bytes.contains(&0) {
-        return Err(ProtocolError::BadMessage(
-            "string contains embedded null".to_owned(),
+        return Err(io::Error::new(
+            io::ErrorKind::InvalidInput,
+            "string contains embedded null",
        ));
    }
    buf.put_slice(bytes);
@@ -662,27 +678,22 @@ fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), ProtocolErr
    Ok(())
 }

-/// Read cstring from buf, advancing it.
-fn read_cstr(buf: &mut Bytes) -> Result<Bytes, ProtocolError> {
-    let pos = buf
-        .iter()
-        .position(|x| *x == 0)
-        .ok_or_else(|| ProtocolError::BadMessage("missing cstring terminator".to_owned()))?;
-    let result = buf.split_to(pos);
+fn read_cstr(buf: &mut Bytes) -> anyhow::Result<Bytes> {
+    let pos = buf.iter().position(|x| *x == 0);
+    let result = buf.split_to(pos.context("missing terminator")?);
    buf.advance(1); // drop the null terminator
    Ok(result)
 }

 pub const SQLSTATE_INTERNAL_ERROR: &[u8; 5] = b"XX000";
-pub const SQLSTATE_SUCCESSFUL_COMPLETION: &[u8; 5] = b"00000";

 impl<'a> BeMessage<'a> {
-    /// Serialize `message` to the given `buf`.
-    /// Apart from smart memory managemet, BytesMut is good here as msg len
-    /// precedes its body and it is handy to write it down first and then fill
-    /// the length. With Write we would have to either calc it manually or have
-    /// one more buffer.
-    pub fn write(buf: &mut BytesMut, message: &BeMessage) -> Result<(), ProtocolError> {
+    /// Write message to the given buf.
+    // Unlike the reading side, we use BytesMut
+    // here as msg len precedes its body and it is handy to write it down first
+    // and then fill the length. With Write we would have to either calc it
+    // manually or have one more buffer.
+    pub fn write(buf: &mut BytesMut, message: &BeMessage) -> io::Result<()> {
        match message {
            BeMessage::AuthenticationOk => {
                buf.put_u8(b'R');
@@ -727,7 +738,7 @@ impl<'a> BeMessage<'a> {
                            buf.put_slice(extra);
                        }
                    }
-                    Ok(())
+                    Ok::<_, io::Error>(())
                })?;
            }

@@ -818,7 +829,7 @@ impl<'a> BeMessage<'a> {
            BeMessage::ErrorResponse(error_msg, pg_error_code) => {
                // 'E' signalizes ErrorResponse messages
                buf.put_u8(b'E');
-                write_body(buf, |buf| -> Result<(), ProtocolError> {
+                write_body(buf, |buf| {
                    buf.put_u8(b'S'); // severity
                    buf.put_slice(b"ERROR\0");

@@ -831,7 +842,7 @@ impl<'a> BeMessage<'a> {
                    write_cstr(error_msg, buf)?;

                    buf.put_u8(0); // terminator
-                    Ok(())
+                    Ok::<_, io::Error>(())
                })?;
            }

@@ -843,7 +854,7 @@ impl<'a> BeMessage<'a> {

                // 'N' signalizes NoticeResponse messages
                buf.put_u8(b'N');
-                write_body(buf, |buf| -> Result<(), ProtocolError> {
+                write_body(buf, |buf| {
                    buf.put_u8(b'S'); // severity
                    buf.put_slice(b"NOTICE\0");

@@ -854,7 +865,7 @@ impl<'a> BeMessage<'a> {
                    write_cstr(error_msg.as_bytes(), buf)?;

                    buf.put_u8(0); // terminator
-                    Ok(())
+                    Ok::<_, io::Error>(())
                })?;
            }

@@ -898,7 +909,7 @@ impl<'a> BeMessage<'a> {

            BeMessage::RowDescription(rows) => {
                buf.put_u8(b'T');
-                write_body(buf, |buf| -> Result<(), ProtocolError> {
+                write_body(buf, |buf| {
                    buf.put_i16(rows.len() as i16); // # of fields
                    for row in rows.iter() {
                        write_cstr(row.name, buf)?;
@@ -909,7 +920,7 @@ impl<'a> BeMessage<'a> {
                        buf.put_i32(-1); /* typmod */
                        buf.put_i16(0); /* format code */
                    }
-                    Ok(())
+                    Ok::<_, io::Error>(())
                })?;
            }

@@ -976,7 +987,7 @@ impl ReplicationFeedback {
    // null-terminated string - key,
    // uint32 - value length in bytes
    // value itself
-    pub fn serialize(&self, buf: &mut BytesMut) {
+    pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> {
        buf.put_u8(REPLICATION_FEEDBACK_FIELDS_NUMBER); // # of keys
        buf.put_slice(b"current_timeline_size\0");
        buf.put_i32(8);
@@ -1001,6 +1012,7 @@ impl ReplicationFeedback {
        buf.put_slice(b"ps_replytime\0");
        buf.put_i32(8);
        buf.put_i64(timestamp);
+        Ok(())
    }

    // Deserialize ReplicationFeedback message
@@ -1068,7 +1080,7 @@ mod tests {
        // because it is rounded up to microseconds during serialization.
        rf.ps_replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
        let mut data = BytesMut::new();
-        rf.serialize(&mut data);
+        rf.serialize(&mut data).unwrap();

        let rf_parsed = ReplicationFeedback::parse(data.freeze());
        assert_eq!(rf, rf_parsed);
@@ -1083,7 +1095,7 @@ mod tests {
        // because it is rounded up to microseconds during serialization.
        rf.ps_replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
        let mut data = BytesMut::new();
-        rf.serialize(&mut data);
+        rf.serialize(&mut data).unwrap();

        // Add an extra field to the buffer and adjust number of keys
        if let Some(first) = data.first_mut() {
@@ -1125,6 +1137,15 @@ mod tests {
        let params = make_params("foo\\ bar \\ \\\\ baz\\  lol");
        assert_eq!(split_options(&params), ["foo bar", " \\", "baz ", "lol"]);
    }
+
+    // Make sure that `read` is sync/async callable
+    async fn _assert(stream: &mut (impl tokio::io::AsyncRead + Unpin)) {
+        let _ = FeMessage::read(&mut [].as_ref());
+        let _ = FeMessage::read_fut(stream).await;
+
+        let _ = FeStartupPacket::read(&mut [].as_ref());
+        let _ = FeStartupPacket::read_fut(stream).await;
+    }
 }

 fn terminate_code(code: &[u8; 5]) -> [u8; 6] {
--- a/libs/pq_proto/src/sync.rs
+++ b/libs/pq_proto/src/sync.rs
@@ -0,0 +1,179 @@
+use pin_project_lite::pin_project;
+use std::future::Future;
+use std::marker::PhantomData;
+use std::pin::Pin;
+use std::{io, task};
+
+pin_project! {
+    /// We use this future to mark certain methods
+    /// as callable in both sync and async modes.
+    #[repr(transparent)]
+    pub struct SyncFuture<S, T: Future> {
+        #[pin]
+        inner: T,
+        _marker: PhantomData<S>,
+    }
+}
+
+/// This wrapper lets us synchronously wait for inner future's completion
+/// (see [`SyncFuture::wait`]) **provided that `S` implements [`SyncProof`]**.
+/// For instance, `S` may be substituted with types implementing
+/// [`tokio::io::AsyncRead`], but it's not the only viable option.
+impl<S, T: Future> SyncFuture<S, T> {
+    /// NOTE: caller should carefully pick a type for `S`,
+    /// because we don't want to enable [`SyncFuture::wait`] when
+    /// it's in fact impossible to run the future synchronously.
+    /// Violation of this contract will not cause UB, but
+    /// panics and async event loop freezes won't please you.
+    ///
+    /// Example:
+    ///
+    /// ```
+    /// # use pq_proto::sync::SyncFuture;
+    /// # use std::future::Future;
+    /// # use tokio::io::AsyncReadExt;
+    /// #
+    /// // Parse a pair of numbers from a stream
+    /// pub fn parse_pair<Reader>(
+    ///     stream: &mut Reader,
+    /// ) -> SyncFuture<Reader, impl Future<Output = anyhow::Result<(u32, u64)>> + '_>
+    /// where
+    ///     Reader: tokio::io::AsyncRead + Unpin,
+    /// {
+    ///     // If `Reader` is a `SyncProof`, this will give caller
+    ///     // an opportunity to use `SyncFuture::wait`, because
+    ///     // `.await` will always result in `Poll::Ready`.
+    ///     SyncFuture::new(async move {
+    ///         let x = stream.read_u32().await?;
+    ///         let y = stream.read_u64().await?;
+    ///         Ok((x, y))
+    ///     })
+    /// }
+    /// ```
+    pub fn new(inner: T) -> Self {
+        Self {
+            inner,
+            _marker: PhantomData,
+        }
+    }
+}
+
+impl<S, T: Future> Future for SyncFuture<S, T> {
+    type Output = T::Output;
+
+    /// In async code, [`SyncFuture`] behaves like a regular wrapper.
+    #[inline(always)]
+    fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> task::Poll<Self::Output> {
+        self.project().inner.poll(cx)
+    }
+}
+
+/// Postulates that we can call [`SyncFuture::wait`].
+/// If implementer is also a [`Future`], it should always
+/// return [`task::Poll::Ready`] from [`Future::poll`].
+///
+/// Each implementation should document which futures
+/// specifically are being declared sync-proof.
+pub trait SyncPostulate {}
+
+impl<T: SyncPostulate> SyncPostulate for &T {}
+impl<T: SyncPostulate> SyncPostulate for &mut T {}
+
+impl<P: SyncPostulate, T: Future> SyncFuture<P, T> {
+    /// Synchronously wait for future completion.
+    pub fn wait(mut self) -> T::Output {
+        const RAW_WAKER: task::RawWaker = task::RawWaker::new(
+            std::ptr::null(),
+            &task::RawWakerVTable::new(
+                |_| RAW_WAKER,
+                |_| panic!("SyncFuture: failed to wake"),
+                |_| panic!("SyncFuture: failed to wake by ref"),
+                |_| { /* drop is no-op */ },
+            ),
+        );
+
+        // SAFETY: We never move `self` during this call;
+        // furthermore, it will be dropped in the end regardless of panics
+        let this = unsafe { Pin::new_unchecked(&mut self) };
+
+        // SAFETY: This waker doesn't do anything apart from panicking
+        let waker = unsafe { task::Waker::from_raw(RAW_WAKER) };
+        let context = &mut task::Context::from_waker(&waker);
+
+        match this.poll(context) {
+            task::Poll::Ready(res) => res,
+            _ => panic!("SyncFuture: unexpected pending!"),
+        }
+    }
+}
+
+/// This wrapper turns any [`std::io::Read`] into a blocking [`tokio::io::AsyncRead`],
+/// which lets us abstract over sync & async readers in methods returning [`SyncFuture`].
+/// NOTE: you **should not** use this in async code.
+#[repr(transparent)]
+pub struct AsyncishRead<T: io::Read + Unpin>(pub T);
+
+/// This lets us call [`SyncFuture<AsyncishRead<_>, _>::wait`],
+/// and allows the future to await on any of the [`AsyncRead`]
+/// and [`AsyncReadExt`] methods on `AsyncishRead`.
+impl<T: io::Read + Unpin> SyncPostulate for AsyncishRead<T> {}
+
+impl<T: io::Read + Unpin> tokio::io::AsyncRead for AsyncishRead<T> {
+    #[inline(always)]
+    fn poll_read(
+        mut self: Pin<&mut Self>,
+        _cx: &mut task::Context<'_>,
+        buf: &mut tokio::io::ReadBuf<'_>,
+    ) -> task::Poll<io::Result<()>> {
+        task::Poll::Ready(
+            // `Read::read` will block, meaning we don't need a real event loop!
+            self.0
+                .read(buf.initialize_unfilled())
+                .map(|sz| buf.advance(sz)),
+        )
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tokio::io::{AsyncReadExt, AsyncWriteExt};
+
+    // async helper(stream: &mut impl AsyncRead) -> io::Result<u32>
+    fn bytes_add<Reader>(
+        stream: &mut Reader,
+    ) -> SyncFuture<Reader, impl Future<Output = io::Result<u32>> + '_>
+    where
+        Reader: tokio::io::AsyncRead + Unpin,
+    {
+        SyncFuture::new(async move {
+            let a = stream.read_u32().await?;
+            let b = stream.read_u32().await?;
+            Ok(a + b)
+        })
+    }
+
+    #[test]
+    fn test_sync() {
+        let bytes = [100u32.to_be_bytes(), 200u32.to_be_bytes()].concat();
+        let res = bytes_add(&mut AsyncishRead(&mut &bytes[..]))
+            .wait()
+            .unwrap();
+        assert_eq!(res, 300);
+    }
+
+    // We need a single-threaded executor for this test
+    #[tokio::test(flavor = "current_thread")]
+    async fn test_async() {
+        let (mut tx, mut rx) = tokio::net::UnixStream::pair().unwrap();
+
+        let write = async move {
+            tx.write_u32(100).await?;
+            tx.write_u32(200).await?;
+            Ok(())
+        };
+
+        let (res, ()) = tokio::try_join!(bytes_add(&mut rx), write).unwrap();
+        assert_eq!(res, 300);
+    }
+}
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -21,7 +21,7 @@ toml_edit.workspace = true
 tracing.workspace = true
 metrics.workspace = true
 utils.workspace = true
-pin-project-lite.workspace = true
+
 workspace_hack.workspace = true

 [dev-dependencies]
--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -111,7 +111,7 @@ pub trait RemoteStorage: Send + Sync + 'static {
 }

 pub struct Download {
-    pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send + Sync>>,
+    pub download_stream: Pin<Box<dyn io::AsyncRead + Unpin + Send>>,
    /// Extra key-value data, associated with the current remote file.
    pub metadata: Option<StorageMetadata>,
 }
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -20,10 +20,7 @@ use aws_sdk_s3::{
 };
 use aws_smithy_http::body::SdkBody;
 use hyper::Body;
-use tokio::{
-    io::{self, AsyncRead},
-    sync::Semaphore,
-};
+use tokio::{io, sync::Semaphore};
 use tokio_util::io::ReaderStream;
 use tracing::debug;

@@ -105,7 +102,7 @@ pub struct S3Bucket {
    // Every request to S3 can be throttled or cancelled, if a certain number of requests per second is exceeded.
    // Same goes to IAM, which is queried before every S3 request, if enabled. IAM has even lower RPS threshold.
    // The helps to ensure we don't exceed the thresholds.
-    concurrency_limiter: Arc<Semaphore>,
+    concurrency_limiter: Semaphore,
 }

 #[derive(Default)]
@@ -165,7 +162,7 @@ impl S3Bucket {
            client,
            bucket_name: aws_config.bucket_name.clone(),
            prefix_in_bucket,
-            concurrency_limiter: Arc::new(Semaphore::new(aws_config.concurrency_limit.get())),
+            concurrency_limiter: Semaphore::new(aws_config.concurrency_limit.get()),
        })
    }

@@ -197,10 +194,9 @@ impl S3Bucket {
    }

    async fn download_object(&self, request: GetObjectRequest) -> Result<Download, DownloadError> {
-        let permit = self
+        let _guard = self
            .concurrency_limiter
-            .clone()
-            .acquire_owned()
+            .acquire()
            .await
            .context("Concurrency limiter semaphore got closed during S3 download")
            .map_err(DownloadError::Other)?;
@@ -221,10 +217,9 @@ impl S3Bucket {
                let metadata = object_output.metadata().cloned().map(StorageMetadata);
                Ok(Download {
                    metadata,
-                    download_stream: Box::pin(io::BufReader::new(RatelimitedAsyncRead::new(
-                        permit,
+                    download_stream: Box::pin(io::BufReader::new(
                        object_output.body.into_async_read(),
-                    ))),
+                    )),
                })
            }
            Err(SdkError::ServiceError {
@@ -245,32 +240,6 @@ impl S3Bucket {
    }
 }

-pin_project_lite::pin_project! {
-    /// An `AsyncRead` adapter which carries a permit for the lifetime of the value.
-    struct RatelimitedAsyncRead<S> {
-        permit: tokio::sync::OwnedSemaphorePermit,
-        #[pin]
-        inner: S,
-    }
-}
-
-impl<S: AsyncRead> RatelimitedAsyncRead<S> {
-    fn new(permit: tokio::sync::OwnedSemaphorePermit, inner: S) -> Self {
-        RatelimitedAsyncRead { permit, inner }
-    }
-}
-
-impl<S: AsyncRead> AsyncRead for RatelimitedAsyncRead<S> {
-    fn poll_read(
-        self: std::pin::Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-        buf: &mut io::ReadBuf<'_>,
-    ) -> std::task::Poll<std::io::Result<()>> {
-        let this = self.project();
-        this.inner.poll_read(cx, buf)
-    }
-}
-
 #[async_trait::async_trait]
 impl RemoteStorage for S3Bucket {
    async fn list(&self) -> anyhow::Result<Vec<RemotePath>> {
--- a/libs/tenant_size_model/Cargo.toml
+++ b/libs/tenant_size_model/Cargo.toml
@@ -7,7 +7,5 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-serde.workspace = true
-serde_json.workspace = true

 workspace_hack.workspace = true
--- a/libs/tenant_size_model/src/calculation.rs
+++ b/libs/tenant_size_model/src/calculation.rs
@@ -1,219 +0,0 @@
-use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};
-
-//
-//                 *-g--*---D--->
-//                /
-//               /
-//              /                 *---b----*-B--->
-//             /                 /
-//            /                 /
-//      -----*--e---*-----f----* C
-//           E                  \
-//                               \
-//                                *--a---*---A-->
-//
-// If A and B need to be retained, is it cheaper to store
-// snapshot at C+a+b, or snapshots at A and B ?
-//
-// If D also needs to be retained, which is cheaper:
-//
-// 1. E+g+e+f+a+b
-// 2. D+C+a+b
-// 3. D+A+B
-
-/// [`Segment`] which has had it's size calculated.
-#[derive(Clone, Debug)]
-struct SegmentSize {
-    method: SegmentMethod,
-
-    // calculated size of this subtree, using this method
-    accum_size: u64,
-
-    seg_id: usize,
-    children: Vec<SegmentSize>,
-}
-
-struct SizeAlternatives {
-    // cheapest alternative if parent is available.
-    incremental: SegmentSize,
-
-    // cheapest alternative if parent node is not available
-    non_incremental: Option<SegmentSize>,
-}
-
-impl StorageModel {
-    pub fn calculate(&self) -> SizeResult {
-        // Build adjacency list. 'child_list' is indexed by segment id. Each entry
-        // contains a list of all child segments of the segment.
-        let mut roots: Vec<usize> = Vec::new();
-        let mut child_list: Vec<Vec<usize>> = Vec::new();
-        child_list.resize(self.segments.len(), Vec::new());
-
-        for (seg_id, seg) in self.segments.iter().enumerate() {
-            if let Some(parent_id) = seg.parent {
-                child_list[parent_id].push(seg_id);
-            } else {
-                roots.push(seg_id);
-            }
-        }
-
-        let mut segment_results = Vec::new();
-        segment_results.resize(
-            self.segments.len(),
-            SegmentSizeResult {
-                method: SegmentMethod::Skipped,
-                accum_size: 0,
-            },
-        );
-
-        let mut total_size = 0;
-        for root in roots {
-            if let Some(selected) = self.size_here(root, &child_list).non_incremental {
-                StorageModel::fill_selected_sizes(&selected, &mut segment_results);
-                total_size += selected.accum_size;
-            } else {
-                // Couldn't find any way to get this root. Error?
-            }
-        }
-
-        SizeResult {
-            total_size,
-            segments: segment_results,
-        }
-    }
-
-    fn fill_selected_sizes(selected: &SegmentSize, result: &mut Vec<SegmentSizeResult>) {
-        result[selected.seg_id] = SegmentSizeResult {
-            method: selected.method,
-            accum_size: selected.accum_size,
-        };
-        // recurse to children
-        for child in selected.children.iter() {
-            StorageModel::fill_selected_sizes(child, result);
-        }
-    }
-
-    //
-    // This is the core of the sizing calculation.
-    //
-    // This is a recursive function, that for each Segment calculates the best way
-    // to reach all the Segments that are marked as needed in this subtree, under two
-    // different conditions:
-    // a) when the parent of this segment is available (as a snaphot or through WAL), and
-    // b) when the parent of this segment is not available.
-    //
-    fn size_here(&self, seg_id: usize, child_list: &Vec<Vec<usize>>) -> SizeAlternatives {
-        let seg = &self.segments[seg_id];
-        // First figure out the best way to get each child
-        let mut children = Vec::new();
-        for child_id in &child_list[seg_id] {
-            children.push(self.size_here(*child_id, child_list))
-        }
-
-        // Method 1. If this node is not needed, we can skip it as long as we
-        // take snapshots later in each sub-tree
-        let snapshot_later = if !seg.needed {
-            let mut snapshot_later = SegmentSize {
-                seg_id,
-                method: SegmentMethod::Skipped,
-                accum_size: 0,
-                children: Vec::new(),
-            };
-
-            let mut possible = true;
-            for child in children.iter() {
-                if let Some(non_incremental) = &child.non_incremental {
-                    snapshot_later.accum_size += non_incremental.accum_size;
-                    snapshot_later.children.push(non_incremental.clone())
-                } else {
-                    possible = false;
-                    break;
-                }
-            }
-            if possible {
-                Some(snapshot_later)
-            } else {
-                None
-            }
-        } else {
-            None
-        };
-
-        // Method 2. Get a snapshot here. This assumed to be possible, if the 'size' of
-        // this Segment was given.
-        let snapshot_here = if !seg.needed || seg.parent.is_none() {
-            if let Some(snapshot_size) = seg.size {
-                let mut snapshot_here = SegmentSize {
-                    seg_id,
-                    method: SegmentMethod::SnapshotHere,
-                    accum_size: snapshot_size,
-                    children: Vec::new(),
-                };
-                for child in children.iter() {
-                    snapshot_here.accum_size += child.incremental.accum_size;
-                    snapshot_here.children.push(child.incremental.clone())
-                }
-                Some(snapshot_here)
-            } else {
-                None
-            }
-        } else {
-            None
-        };
-
-        // Method 3. Use WAL to get here from parent
-        let wal_here = {
-            let mut wal_here = SegmentSize {
-                seg_id,
-                method: SegmentMethod::Wal,
-                accum_size: if let Some(parent_id) = seg.parent {
-                    seg.lsn - self.segments[parent_id].lsn
-                } else {
-                    0
-                },
-                children: Vec::new(),
-            };
-            for child in children {
-                wal_here.accum_size += child.incremental.accum_size;
-                wal_here.children.push(child.incremental)
-            }
-            wal_here
-        };
-
-        // If the parent is not available, what's the cheapest method involving
-        // a snapshot here or later?
-        let mut cheapest_non_incremental: Option<SegmentSize> = None;
-        if let Some(snapshot_here) = snapshot_here {
-            cheapest_non_incremental = Some(snapshot_here);
-        }
-        if let Some(snapshot_later) = snapshot_later {
-            // Use <=, to prefer skipping if the size is equal
-            if let Some(parent) = &cheapest_non_incremental {
-                if snapshot_later.accum_size <= parent.accum_size {
-                    cheapest_non_incremental = Some(snapshot_later);
-                }
-            } else {
-                cheapest_non_incremental = Some(snapshot_later);
-            }
-        }
-
-        // And what's the cheapest method, if the parent is available?
-        let cheapest_incremental = if let Some(cheapest_non_incremental) = &cheapest_non_incremental
-        {
-            // Is it cheaper to use a snapshot here or later, anyway?
-            // Use <, to prefer Wal over snapshot if the cost is the same
-            if wal_here.accum_size < cheapest_non_incremental.accum_size {
-                wal_here
-            } else {
-                cheapest_non_incremental.clone()
-            }
-        } else {
-            wal_here
-        };
-
-        SizeAlternatives {
-            incremental: cheapest_incremental,
-            non_incremental: cheapest_non_incremental,
-        }
-    }
-}
--- a/libs/tenant_size_model/src/lib.rs
+++ b/libs/tenant_size_model/src/lib.rs
@@ -1,70 +1,401 @@
-//! Synthetic size calculation
+use std::borrow::Cow;
+use std::collections::HashMap;

-mod calculation;
-pub mod svg;
+use anyhow::Context;

-/// StorageModel is the input to the synthetic size calculation. It represents
-/// a tree of timelines, with just the information that's needed for the
-/// calculation. This doesn't track timeline names or where each timeline
-/// begins and ends, for example. Instead, it consists of "points of interest"
-/// on the timelines. A point of interest could be the timeline start or end point,
-/// the oldest point on a timeline that needs to be retained because of PITR
-/// cutoff, or snapshot points named by the user. For each such point, and the
-/// edge connecting the points (implicit in Segment), we store information about
-/// whether we need to be able to recover to the point, and if known, the logical
-/// size at the point.
+/// Pricing model or history size builder.
 ///
-/// The segments must form a well-formed tree, with no loops.
-#[derive(serde::Serialize)]
-pub struct StorageModel {
-    pub segments: Vec<Segment>,
+/// Maintains knowledge of the branches and their modifications. Generic over the branch name key
+/// type.
+pub struct Storage<K: 'static> {
+    segments: Vec<Segment>,
+
+    /// Mapping from the branch name to the index of a segment describing it's latest state.
+    branches: HashMap<K, usize>,
 }

-/// Segment represents one point in the tree of branches, *and* the edge that leads
-/// to it (if any). We don't need separate structs for points and edges, because each
-/// point can have only one parent.
-///
-/// When 'needed' is true, it means that we need to be able to reconstruct
-/// any version between 'parent.lsn' and 'lsn'. If you want to represent that only
-/// a single point is needed, create two Segments with the same lsn, and mark only
-/// the child as needed.
-///
-#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
+/// Snapshot of a branch.
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub struct Segment {
    /// Previous segment index into ['Storage::segments`], if any.
-    pub parent: Option<usize>,
+    parent: Option<usize>,

-    /// LSN at this point
-    pub lsn: u64,
+    /// Description of how did we get to this state.
+    ///
+    /// Mainly used in the original scenarios 1..=4 with insert, delete and update. Not used when
+    /// modifying a branch directly.
+    pub op: Cow<'static, str>,

-    /// Logical size at this node, if known.
-    pub size: Option<u64>,
+    /// LSN before this state
+    start_lsn: u64,

-    /// If true, the segment from parent to this node is needed by `retention_period`
+    /// LSN at this state
+    pub end_lsn: u64,
+
+    /// Logical size before this state
+    start_size: u64,
+
+    /// Logical size at this state. Can be None in the last Segment of a branch.
+    pub end_size: Option<u64>,
+
+    /// Indices to [`Storage::segments`]
+    ///
+    /// FIXME: this could be an Option<usize>
+    children_after: Vec<usize>,
+
+    /// Determined by `retention_period` given to [`Storage::calculate`]
    pub needed: bool,
 }

-/// Result of synthetic size calculation. Returned by StorageModel::calculate()
-pub struct SizeResult {
-    pub total_size: u64,
+//
+//
+//
+//
+//                 *-g--*---D--->
+//                /
+//               /
+//              /                 *---b----*-B--->
+//             /                 /
+//            /                 /
+//      -----*--e---*-----f----* C
+//           E                  \
+//                               \
+//                                *--a---*---A-->
+//
+// If A and B need to be retained, is it cheaper to store
+// snapshot at C+a+b, or snapshots at A and B ?
+//
+// If D also needs to be retained, which is cheaper:
+//
+// 1. E+g+e+f+a+b
+// 2. D+C+a+b
+// 3. D+A+B

-    // This has same length as the StorageModel::segments vector in the input.
-    // Each entry in this array corresponds to the entry with same index in
-    // StorageModel::segments.
-    pub segments: Vec<SegmentSizeResult>,
+/// [`Segment`] which has had it's size calculated.
+pub struct SegmentSize {
+    pub seg_id: usize,
+
+    pub method: SegmentMethod,
+
+    this_size: u64,
+
+    pub children: Vec<SegmentSize>,
 }

-#[derive(Clone, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
-pub struct SegmentSizeResult {
-    pub method: SegmentMethod,
-    // calculated size of this subtree, using this method
-    pub accum_size: u64,
+impl SegmentSize {
+    fn total(&self) -> u64 {
+        self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
+    }
+
+    pub fn total_children(&self) -> u64 {
+        if self.method == SnapshotAfter {
+            self.this_size + self.children.iter().fold(0, |acc, x| acc + x.total())
+        } else {
+            self.children.iter().fold(0, |acc, x| acc + x.total())
+        }
+    }
 }

 /// Different methods to retain history from a particular state
-#[derive(Clone, Copy, Debug, Eq, PartialEq, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
 pub enum SegmentMethod {
-    SnapshotHere, // A logical snapshot is needed after this segment
-    Wal,          // Keep WAL leading up to this node
+    SnapshotAfter,
+    Wal,
+    WalNeeded,
    Skipped,
 }
+
+use SegmentMethod::*;
+
+impl<K: std::hash::Hash + Eq + 'static> Storage<K> {
+    /// Creates a new storage with the given default branch name.
+    pub fn new(initial_branch: K) -> Storage<K> {
+        let init_segment = Segment {
+            op: "".into(),
+            needed: false,
+            parent: None,
+            start_lsn: 0,
+            end_lsn: 0,
+            start_size: 0,
+            end_size: Some(0),
+            children_after: Vec::new(),
+        };
+
+        Storage {
+            segments: vec![init_segment],
+            branches: HashMap::from([(initial_branch, 0)]),
+        }
+    }
+
+    /// Advances the branch with a new point, at given LSN.
+    pub fn insert_point<Q: ?Sized>(
+        &mut self,
+        branch: &Q,
+        op: Cow<'static, str>,
+        lsn: u64,
+        size: Option<u64>,
+    ) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        let Some(lastseg_id) = self.branches.get(branch).copied() else { anyhow::bail!("branch not found: {branch:?}") };
+        let newseg_id = self.segments.len();
+        let lastseg = &mut self.segments[lastseg_id];
+
+        assert!(lsn > lastseg.end_lsn);
+
+        let Some(start_size) = lastseg.end_size else { anyhow::bail!("no end_size on latest segment for {branch:?}") };
+
+        let newseg = Segment {
+            op,
+            parent: Some(lastseg_id),
+            start_lsn: lastseg.end_lsn,
+            end_lsn: lsn,
+            start_size,
+            end_size: size,
+            children_after: Vec::new(),
+            needed: false,
+        };
+        lastseg.children_after.push(newseg_id);
+
+        self.segments.push(newseg);
+        *self.branches.get_mut(branch).expect("read already") = newseg_id;
+
+        Ok(())
+    }
+
+    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
+    pub fn modify_branch<Q: ?Sized>(
+        &mut self,
+        branch: &Q,
+        op: Cow<'static, str>,
+        lsn_bytes: u64,
+        size_bytes: i64,
+    ) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        let Some(lastseg_id) = self.branches.get(branch).copied() else { anyhow::bail!("branch not found: {branch:?}") };
+        let newseg_id = self.segments.len();
+        let lastseg = &mut self.segments[lastseg_id];
+
+        let Some(last_end_size) = lastseg.end_size else { anyhow::bail!("no end_size on latest segment for {branch:?}") };
+
+        let newseg = Segment {
+            op,
+            parent: Some(lastseg_id),
+            start_lsn: lastseg.end_lsn,
+            end_lsn: lastseg.end_lsn + lsn_bytes,
+            start_size: last_end_size,
+            end_size: Some((last_end_size as i64 + size_bytes) as u64),
+            children_after: Vec::new(),
+            needed: false,
+        };
+        lastseg.children_after.push(newseg_id);
+
+        self.segments.push(newseg);
+        *self.branches.get_mut(branch).expect("read already") = newseg_id;
+        Ok(())
+    }
+
+    pub fn insert<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "insert".into(), bytes, bytes as i64)
+    }
+
+    pub fn update<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "update".into(), bytes, 0i64)
+    }
+
+    pub fn delete<Q: ?Sized>(&mut self, branch: &Q, bytes: u64) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q>,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        self.modify_branch(branch, "delete".into(), bytes, -(bytes as i64))
+    }
+
+    pub fn branch<Q: ?Sized>(&mut self, parent: &Q, name: K) -> anyhow::Result<()>
+    where
+        K: std::borrow::Borrow<Q> + std::fmt::Debug,
+        Q: std::hash::Hash + Eq + std::fmt::Debug,
+    {
+        // Find the right segment
+        let branchseg_id = *self.branches.get(parent).with_context(|| {
+            format!(
+                "should had found the parent {:?} by key. in branches {:?}",
+                parent, self.branches
+            )
+        })?;
+
+        let _branchseg = &mut self.segments[branchseg_id];
+
+        // Create branch name for it
+        self.branches.insert(name, branchseg_id);
+        Ok(())
+    }
+
+    pub fn calculate(&mut self, retention_period: u64) -> anyhow::Result<SegmentSize> {
+        // Phase 1: Mark all the segments that need to be retained
+        for (_branch, &last_seg_id) in self.branches.iter() {
+            let last_seg = &self.segments[last_seg_id];
+            let cutoff_lsn = last_seg.start_lsn.saturating_sub(retention_period);
+            let mut seg_id = last_seg_id;
+            loop {
+                let seg = &mut self.segments[seg_id];
+                if seg.end_lsn < cutoff_lsn {
+                    break;
+                }
+                seg.needed = true;
+                if let Some(prev_seg_id) = seg.parent {
+                    seg_id = prev_seg_id;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        // Phase 2: For each oldest segment in a chain that needs to be retained,
+        // calculate if we should store snapshot or WAL
+        self.size_from_snapshot_later(0)
+    }
+
+    fn size_from_wal(&self, seg_id: usize) -> anyhow::Result<SegmentSize> {
+        let seg = &self.segments[seg_id];
+
+        let this_size = seg.end_lsn - seg.start_lsn;
+
+        let mut children = Vec::new();
+
+        // try both ways
+        for &child_id in seg.children_after.iter() {
+            // try each child both ways
+            let child = &self.segments[child_id];
+            let p1 = self.size_from_wal(child_id)?;
+
+            let p = if !child.needed {
+                let p2 = self.size_from_snapshot_later(child_id)?;
+                if p1.total() < p2.total() {
+                    p1
+                } else {
+                    p2
+                }
+            } else {
+                p1
+            };
+            children.push(p);
+        }
+        Ok(SegmentSize {
+            seg_id,
+            method: if seg.needed { WalNeeded } else { Wal },
+            this_size,
+            children,
+        })
+    }
+
+    fn size_from_snapshot_later(&self, seg_id: usize) -> anyhow::Result<SegmentSize> {
+        // If this is needed, then it's time to do the snapshot and continue
+        // with wal method.
+        let seg = &self.segments[seg_id];
+        //eprintln!("snap: seg{}: {} needed: {}", seg_id, seg.children_after.len(), seg.needed);
+        if seg.needed {
+            let mut children = Vec::new();
+
+            for &child_id in seg.children_after.iter() {
+                // try each child both ways
+                let child = &self.segments[child_id];
+                let p1 = self.size_from_wal(child_id)?;
+
+                let p = if !child.needed {
+                    let p2 = self.size_from_snapshot_later(child_id)?;
+                    if p1.total() < p2.total() {
+                        p1
+                    } else {
+                        p2
+                    }
+                } else {
+                    p1
+                };
+                children.push(p);
+            }
+            Ok(SegmentSize {
+                seg_id,
+                method: WalNeeded,
+                this_size: seg.start_size,
+                children,
+            })
+        } else {
+            // If any of the direct children are "needed", need to be able to reconstruct here
+            let mut children_needed = false;
+            for &child in seg.children_after.iter() {
+                let seg = &self.segments[child];
+                if seg.needed {
+                    children_needed = true;
+                    break;
+                }
+            }
+
+            let method1 = if !children_needed {
+                let mut children = Vec::new();
+                for child in seg.children_after.iter() {
+                    children.push(self.size_from_snapshot_later(*child)?);
+                }
+                Some(SegmentSize {
+                    seg_id,
+                    method: Skipped,
+                    this_size: 0,
+                    children,
+                })
+            } else {
+                None
+            };
+
+            // If this a junction, consider snapshotting here
+            let method2 = if children_needed || seg.children_after.len() >= 2 {
+                let mut children = Vec::new();
+                for child in seg.children_after.iter() {
+                    children.push(self.size_from_wal(*child)?);
+                }
+                let Some(this_size) = seg.end_size else { anyhow::bail!("no end_size at junction {seg_id}") };
+                Some(SegmentSize {
+                    seg_id,
+                    method: SnapshotAfter,
+                    this_size,
+                    children,
+                })
+            } else {
+                None
+            };
+
+            Ok(match (method1, method2) {
+                (None, None) => anyhow::bail!(
+                    "neither method was applicable: children_after={}, children_needed={}",
+                    seg.children_after.len(),
+                    children_needed
+                ),
+                (Some(method), None) => method,
+                (None, Some(method)) => method,
+                (Some(method1), Some(method2)) => {
+                    if method1.total() < method2.total() {
+                        method1
+                    } else {
+                        method2
+                    }
+                }
+            })
+        }
+    }
+
+    pub fn into_segments(self) -> Vec<Segment> {
+        self.segments
+    }
+}
--- a/libs/tenant_size_model/src/main.rs
+++ b/libs/tenant_size_model/src/main.rs
@@ -0,0 +1,269 @@
+//! Tenant size model testing ground.
+//!
+//! Has a number of scenarios and a `main` for invoking these by number, calculating the history
+//! size, outputs graphviz graph. Makefile in directory shows how to use graphviz to turn scenarios
+//! into pngs.
+
+use tenant_size_model::{Segment, SegmentSize, Storage};
+
+// Main branch only. Some updates on it.
+fn scenario_1() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Main branch only. Some updates on it.
+fn scenario_2() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    storage.update("main", 1_000)?;
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Like 2, but more updates on main
+fn scenario_3() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+// Diverged branches
+fn scenario_4() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    // Create main branch
+    let mut storage = Storage::new("main");
+
+    // Bulk load 5 GB of data to it
+    storage.insert("main", 5_000)?;
+
+    // Stream of updates
+    for _ in 0..5 {
+        storage.update("main", 1_000)?;
+    }
+
+    // Branch
+    storage.branch("main", "child")?;
+    storage.update("child", 1_000)?;
+
+    // More updates on parent
+    for _ in 0..8 {
+        storage.update("main", 1_000)?;
+    }
+
+    let size = storage.calculate(1000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn scenario_5() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    let mut storage = Storage::new("a");
+    storage.insert("a", 5000)?;
+    storage.branch("a", "b")?;
+    storage.update("b", 4000)?;
+    storage.update("a", 2000)?;
+    storage.branch("a", "c")?;
+    storage.insert("c", 4000)?;
+    storage.insert("a", 2000)?;
+
+    let size = storage.calculate(5000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn scenario_6() -> anyhow::Result<(Vec<Segment>, SegmentSize)> {
+    use std::borrow::Cow;
+
+    const NO_OP: Cow<'static, str> = Cow::Borrowed("");
+
+    let branches = [
+        Some(0x7ff1edab8182025f15ae33482edb590a_u128),
+        Some(0xb1719e044db05401a05a2ed588a3ad3f),
+        Some(0xb68d6691c895ad0a70809470020929ef),
+    ];
+
+    // compared to other scenarios, this one uses bytes instead of kB
+
+    let mut storage = Storage::new(None);
+
+    storage.branch(&None, branches[0])?; // at 0
+    storage.modify_branch(&branches[0], NO_OP, 108951064, 43696128)?; // at 108951064
+    storage.branch(&branches[0], branches[1])?; // at 108951064
+    storage.modify_branch(&branches[1], NO_OP, 15560408, -1851392)?; // at 124511472
+    storage.modify_branch(&branches[0], NO_OP, 174464360, -1531904)?; // at 283415424
+    storage.branch(&branches[0], branches[2])?; // at 283415424
+    storage.modify_branch(&branches[2], NO_OP, 15906192, 8192)?; // at 299321616
+    storage.modify_branch(&branches[0], NO_OP, 18909976, 32768)?; // at 302325400
+
+    let size = storage.calculate(100_000)?;
+
+    Ok((storage.into_segments(), size))
+}
+
+fn main() {
+    let args: Vec<String> = std::env::args().collect();
+
+    let scenario = if args.len() < 2 { "1" } else { &args[1] };
+
+    let (segments, size) = match scenario {
+        "1" => scenario_1(),
+        "2" => scenario_2(),
+        "3" => scenario_3(),
+        "4" => scenario_4(),
+        "5" => scenario_5(),
+        "6" => scenario_6(),
+        other => {
+            eprintln!("invalid scenario {}", other);
+            std::process::exit(1);
+        }
+    }
+    .unwrap();
+
+    graphviz_tree(&segments, &size);
+}
+
+fn graphviz_recurse(segments: &[Segment], node: &SegmentSize) {
+    use tenant_size_model::SegmentMethod::*;
+
+    let seg_id = node.seg_id;
+    let seg = segments.get(seg_id).unwrap();
+    let lsn = seg.end_lsn;
+    let size = seg.end_size.unwrap_or(0);
+    let method = node.method;
+
+    println!("  {{");
+    println!("    node [width=0.1 height=0.1 shape=oval]");
+
+    let tenant_size = node.total_children();
+
+    let penwidth = if seg.needed { 6 } else { 3 };
+    let x = match method {
+        SnapshotAfter =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" style=filled penwidth={penwidth}"),
+        Wal =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
+        WalNeeded =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"black\" penwidth={penwidth}"),
+        Skipped =>
+            format!("label=\"lsn: {lsn}\\nsize: {size}\\ntenant_size: {tenant_size}\" color=\"gray\" penwidth={penwidth}"),
+    };
+
+    println!("    \"seg{seg_id}\" [{x}]");
+    println!("  }}");
+
+    // Recurse. Much of the data is actually on the edge
+    for child in node.children.iter() {
+        let child_id = child.seg_id;
+        graphviz_recurse(segments, child);
+
+        let edge_color = match child.method {
+            SnapshotAfter => "gray",
+            Wal => "black",
+            WalNeeded => "black",
+            Skipped => "gray",
+        };
+
+        println!("  {{");
+        println!("    edge [] ");
+        print!("    \"seg{seg_id}\" -> \"seg{child_id}\" [");
+        print!("color={edge_color}");
+        if child.method == WalNeeded {
+            print!(" penwidth=6");
+        }
+        if child.method == Wal {
+            print!(" penwidth=3");
+        }
+
+        let next = segments.get(child_id).unwrap();
+
+        if next.op.is_empty() {
+            print!(
+                " label=\"{} / {}\"",
+                next.end_lsn - seg.end_lsn,
+                (next.end_size.unwrap_or(0) as i128 - seg.end_size.unwrap_or(0) as i128)
+            );
+        } else {
+            print!(" label=\"{}: {}\"", next.op, next.end_lsn - seg.end_lsn);
+        }
+        println!("]");
+        println!("  }}");
+    }
+}
+
+fn graphviz_tree(segments: &[Segment], tree: &SegmentSize) {
+    println!("digraph G {{");
+    println!("  fontname=\"Helvetica,Arial,sans-serif\"");
+    println!("  node [fontname=\"Helvetica,Arial,sans-serif\"]");
+    println!("  edge [fontname=\"Helvetica,Arial,sans-serif\"]");
+    println!("  graph [center=1 rankdir=LR]");
+    println!("  edge [dir=none]");
+
+    graphviz_recurse(segments, tree);
+
+    println!("}}");
+}
+
+#[test]
+fn scenarios_return_same_size() {
+    type ScenarioFn = fn() -> anyhow::Result<(Vec<Segment>, SegmentSize)>;
+    let truths: &[(u32, ScenarioFn, _)] = &[
+        (line!(), scenario_1, 8000),
+        (line!(), scenario_2, 9000),
+        (line!(), scenario_3, 13000),
+        (line!(), scenario_4, 16000),
+        (line!(), scenario_5, 17000),
+        (line!(), scenario_6, 333_792_000),
+    ];
+
+    for (line, scenario, expected) in truths {
+        let (_, size) = scenario().unwrap();
+        assert_eq!(*expected, size.total_children(), "scenario on line {line}");
+    }
+}
--- a/libs/tenant_size_model/src/svg.rs
+++ b/libs/tenant_size_model/src/svg.rs
@@ -1,193 +0,0 @@
-use crate::{SegmentMethod, SegmentSizeResult, SizeResult, StorageModel};
-use std::fmt::Write;
-
-const SVG_WIDTH: f32 = 500.0;
-
-struct SvgDraw<'a> {
-    storage: &'a StorageModel,
-    branches: &'a [String],
-    seg_to_branch: &'a [usize],
-    sizes: &'a [SegmentSizeResult],
-
-    // layout
-    xscale: f32,
-    min_lsn: u64,
-    seg_coordinates: Vec<(f32, f32)>,
-}
-
-fn draw_legend(result: &mut String) -> anyhow::Result<()> {
-    writeln!(
-        result,
-        "<circle cx=\"10\" cy=\"10\" r=\"5\" stroke=\"red\"/>"
-    )?;
-    writeln!(result, "<text x=\"20\" y=\"15\">logical snapshot</text>")?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"30\" x2=\"15\" y2=\"30\" stroke-width=\"6\" stroke=\"black\" />"
-    )?;
-    writeln!(
-        result,
-        "<text x=\"20\" y=\"35\">WAL within retention period</text>"
-    )?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"50\" x2=\"15\" y2=\"50\" stroke-width=\"3\" stroke=\"black\" />"
-    )?;
-    writeln!(
-        result,
-        "<text x=\"20\" y=\"55\">WAL retained to avoid copy</text>"
-    )?;
-    writeln!(
-        result,
-        "<line x1=\"5\" y1=\"70\" x2=\"15\" y2=\"70\" stroke-width=\"1\" stroke=\"gray\" />"
-    )?;
-    writeln!(result, "<text x=\"20\" y=\"75\">WAL not retained</text>")?;
-    Ok(())
-}
-
-pub fn draw_svg(
-    storage: &StorageModel,
-    branches: &[String],
-    seg_to_branch: &[usize],
-    sizes: &SizeResult,
-) -> anyhow::Result<String> {
-    let mut draw = SvgDraw {
-        storage,
-        branches,
-        seg_to_branch,
-        sizes: &sizes.segments,
-
-        xscale: 0.0,
-        min_lsn: 0,
-        seg_coordinates: Vec::new(),
-    };
-
-    let mut result = String::new();
-
-    writeln!(result, "<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" height=\"300\" width=\"500\">")?;
-
-    draw.calculate_svg_layout();
-
-    // Draw the tree
-    for (seg_id, _seg) in storage.segments.iter().enumerate() {
-        draw.draw_seg_phase1(seg_id, &mut result)?;
-    }
-
-    // Draw snapshots
-    for (seg_id, _seg) in storage.segments.iter().enumerate() {
-        draw.draw_seg_phase2(seg_id, &mut result)?;
-    }
-
-    draw_legend(&mut result)?;
-
-    write!(result, "</svg>")?;
-
-    Ok(result)
-}
-
-impl<'a> SvgDraw<'a> {
-    fn calculate_svg_layout(&mut self) {
-        // Find x scale
-        let segments = &self.storage.segments;
-        let min_lsn = segments.iter().map(|s| s.lsn).fold(u64::MAX, std::cmp::min);
-        let max_lsn = segments.iter().map(|s| s.lsn).fold(0, std::cmp::max);
-
-        // Start with 1 pixel = 1 byte. Double the scale until it fits into the image
-        let mut xscale = 1.0;
-        while (max_lsn - min_lsn) as f32 / xscale > SVG_WIDTH {
-            xscale *= 2.0;
-        }
-
-        // Layout the timelines on Y dimension.
-        // TODO
-        let mut y = 100.0;
-        let mut branch_y_coordinates = Vec::new();
-        for _branch in self.branches {
-            branch_y_coordinates.push(y);
-            y += 40.0;
-        }
-
-        // Calculate coordinates for each point
-        let seg_coordinates = std::iter::zip(segments, self.seg_to_branch)
-            .map(|(seg, branch_id)| {
-                let x = (seg.lsn - min_lsn) as f32 / xscale;
-                let y = branch_y_coordinates[*branch_id];
-                (x, y)
-            })
-            .collect();
-
-        self.xscale = xscale;
-        self.min_lsn = min_lsn;
-        self.seg_coordinates = seg_coordinates;
-    }
-
-    /// Draws lines between points
-    fn draw_seg_phase1(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {
-        let seg = &self.storage.segments[seg_id];
-
-        let wal_bytes = if let Some(parent_id) = seg.parent {
-            seg.lsn - self.storage.segments[parent_id].lsn
-        } else {
-            0
-        };
-
-        let style = match self.sizes[seg_id].method {
-            SegmentMethod::SnapshotHere => "stroke-width=\"1\" stroke=\"gray\"",
-            SegmentMethod::Wal if seg.needed && wal_bytes > 0 => {
-                "stroke-width=\"6\" stroke=\"black\""
-            }
-            SegmentMethod::Wal => "stroke-width=\"3\" stroke=\"black\"",
-            SegmentMethod::Skipped => "stroke-width=\"1\" stroke=\"gray\"",
-        };
-        if let Some(parent_id) = seg.parent {
-            let (x1, y1) = self.seg_coordinates[parent_id];
-            let (x2, y2) = self.seg_coordinates[seg_id];
-
-            writeln!(
-                result,
-                "<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" {style}>",
-            )?;
-            writeln!(
-                result,
-                "  <title>{wal_bytes} bytes of WAL (seg {seg_id})</title>"
-            )?;
-            writeln!(result, "</line>")?;
-        } else {
-            // draw a little dash to mark the starting point of this branch
-            let (x, y) = self.seg_coordinates[seg_id];
-            let (x1, y1) = (x, y - 5.0);
-            let (x2, y2) = (x, y + 5.0);
-
-            writeln!(
-                result,
-                "<line x1=\"{x1}\" y1=\"{y1}\" x2=\"{x2}\" y2=\"{y2}\" {style}>",
-            )?;
-            writeln!(result, "  <title>(seg {seg_id})</title>")?;
-            writeln!(result, "</line>")?;
-        }
-
-        Ok(())
-    }
-
-    /// Draw circles where snapshots are taken
-    fn draw_seg_phase2(&self, seg_id: usize, result: &mut String) -> anyhow::Result<()> {
-        let seg = &self.storage.segments[seg_id];
-
-        // draw a snapshot point if it's needed
-        let (coord_x, coord_y) = self.seg_coordinates[seg_id];
-        if self.sizes[seg_id].method == SegmentMethod::SnapshotHere {
-            writeln!(
-                result,
-                "<circle cx=\"{coord_x}\" cy=\"{coord_y}\" r=\"5\" stroke=\"red\">",
-            )?;
-            writeln!(
-                result,
-                "  <title>logical size {}</title>",
-                seg.size.unwrap()
-            )?;
-            write!(result, "</circle>")?;
-        }
-
-        Ok(())
-    }
-}
--- a/libs/tenant_size_model/tests/tests.rs
+++ b/libs/tenant_size_model/tests/tests.rs
@@ -1,313 +0,0 @@
-//! Tenant size model tests.
-
-use tenant_size_model::{Segment, SizeResult, StorageModel};
-
-use std::collections::HashMap;
-
-struct ScenarioBuilder {
-    segments: Vec<Segment>,
-
-    /// Mapping from the branch name to the index of a segment describing its latest state.
-    branches: HashMap<String, usize>,
-}
-
-impl ScenarioBuilder {
-    /// Creates a new storage with the given default branch name.
-    pub fn new(initial_branch: &str) -> ScenarioBuilder {
-        let init_segment = Segment {
-            parent: None,
-            lsn: 0,
-            size: Some(0),
-            needed: false, // determined later
-        };
-
-        ScenarioBuilder {
-            segments: vec![init_segment],
-            branches: HashMap::from([(initial_branch.into(), 0)]),
-        }
-    }
-
-    /// Advances the branch with the named operation, by the relative LSN and logical size bytes.
-    pub fn modify_branch(&mut self, branch: &str, lsn_bytes: u64, size_bytes: i64) {
-        let lastseg_id = *self.branches.get(branch).unwrap();
-        let newseg_id = self.segments.len();
-        let lastseg = &mut self.segments[lastseg_id];
-
-        let newseg = Segment {
-            parent: Some(lastseg_id),
-            lsn: lastseg.lsn + lsn_bytes,
-            size: Some((lastseg.size.unwrap() as i64 + size_bytes) as u64),
-            needed: false,
-        };
-
-        self.segments.push(newseg);
-        *self.branches.get_mut(branch).expect("read already") = newseg_id;
-    }
-
-    pub fn insert(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, bytes as i64);
-    }
-
-    pub fn update(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, 0i64);
-    }
-
-    pub fn _delete(&mut self, branch: &str, bytes: u64) {
-        self.modify_branch(branch, bytes, -(bytes as i64));
-    }
-
-    /// Panics if the parent branch cannot be found.
-    pub fn branch(&mut self, parent: &str, name: &str) {
-        // Find the right segment
-        let branchseg_id = *self
-            .branches
-            .get(parent)
-            .expect("should had found the parent by key");
-        let _branchseg = &mut self.segments[branchseg_id];
-
-        // Create branch name for it
-        self.branches.insert(name.to_string(), branchseg_id);
-    }
-
-    pub fn calculate(&mut self, retention_period: u64) -> (StorageModel, SizeResult) {
-        // Phase 1: Mark all the segments that need to be retained
-        for (_branch, &last_seg_id) in self.branches.iter() {
-            let last_seg = &self.segments[last_seg_id];
-            let cutoff_lsn = last_seg.lsn.saturating_sub(retention_period);
-            let mut seg_id = last_seg_id;
-            loop {
-                let seg = &mut self.segments[seg_id];
-                if seg.lsn <= cutoff_lsn {
-                    break;
-                }
-                seg.needed = true;
-                if let Some(prev_seg_id) = seg.parent {
-                    seg_id = prev_seg_id;
-                } else {
-                    break;
-                }
-            }
-        }
-
-        // Perform the calculation
-        let storage_model = StorageModel {
-            segments: self.segments.clone(),
-        };
-        let size_result = storage_model.calculate();
-        (storage_model, size_result)
-    }
-}
-
-// Main branch only. Some updates on it.
-#[test]
-fn scenario_1() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Calculate the synthetic size with retention horizon 1000
-    let (_model, result) = scenario.calculate(1000);
-
-    // The end of the branch is at LSN 10000. Need to retain
-    // a logical snapshot at LSN 9000, plus the WAL between 9000-10000.
-    // The logical snapshot has size 5000.
-    assert_eq!(result.total_size, 5000 + 1000);
-}
-
-// Main branch only. Some updates on it.
-#[test]
-fn scenario_2() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    scenario.update("main", 1_000);
-
-    //
-    // The history looks like this now:
-    //
-    //         10000          11000
-    // *----*----*--------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, we need to retain logical snapshot
-    // at the branch point, size 5000, and the WAL from 10000-11000 on
-    // both branches.
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 1000);
-}
-
-// Like 2, but more updates on main
-#[test]
-fn scenario_3() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    //
-    // The history looks like this now:
-    //
-    //         10000                                 15000
-    // *----*----*------------------------------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, it's still cheapest to retain
-    // - snapshot at branch point (size 5000)
-    // - WAL on child between 10000-11000
-    // - WAL on main between 10000-15000
-    //
-    // This is in total 5000 + 1000 + 5000
-    //
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 5000);
-}
-
-// Diverged branches
-#[test]
-fn scenario_4() {
-    // Create main branch
-    let mut scenario = ScenarioBuilder::new("main");
-
-    // Bulk load 5 GB of data to it
-    scenario.insert("main", 5_000);
-
-    // Stream of updates
-    for _ in 0..5 {
-        scenario.update("main", 1_000);
-    }
-
-    // Branch
-    scenario.branch("main", "child");
-    scenario.update("child", 1_000);
-
-    // More updates on parent
-    for _ in 0..8 {
-        scenario.update("main", 1_000);
-    }
-
-    //
-    // The history looks like this now:
-    //
-    //         10000                                 18000
-    // *----*----*------------------------------------*    main
-    //           |
-    //           |            11000
-    //           +--------------     child
-    //
-    //
-    // With retention horizon 1000, it's now cheapest to retain
-    // separate snapshots on both branches:
-    // - snapshot on main branch at LSN 17000 (size 5000)
-    // - WAL on main between 17000-18000
-    // - snapshot on child branch at LSN 10000 (size 5000)
-    // - WAL on child between 10000-11000
-    //
-    // This is in total 5000 + 1000 + 5000 + 1000 = 12000
-    //
-    // (If we used the the method from the previous scenario, and
-    // kept only snapshot at the branch point, we'd need to keep
-    // all the WAL between 10000-18000 on the main branch, so
-    // the total size would be 5000 + 1000 + 8000 = 14000. The
-    // calculation always picks the cheapest alternative)
-
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 5000 + 1000 + 5000 + 1000);
-}
-
-#[test]
-fn scenario_5() {
-    let mut scenario = ScenarioBuilder::new("a");
-    scenario.insert("a", 5000);
-    scenario.branch("a", "b");
-    scenario.update("b", 4000);
-    scenario.update("a", 2000);
-    scenario.branch("a", "c");
-    scenario.insert("c", 4000);
-    scenario.insert("a", 2000);
-
-    let (_model, result) = scenario.calculate(1000);
-
-    assert_eq!(result.total_size, 17000);
-}
-
-#[test]
-fn scenario_6() {
-    let branches = [
-        "7ff1edab8182025f15ae33482edb590a",
-        "b1719e044db05401a05a2ed588a3ad3f",
-        "0xb68d6691c895ad0a70809470020929ef",
-    ];
-
-    // compared to other scenarios, this one uses bytes instead of kB
-
-    let mut scenario = ScenarioBuilder::new("");
-
-    scenario.branch("", branches[0]); // at 0
-    scenario.modify_branch(branches[0], 108951064, 43696128); // at 108951064
-    scenario.branch(branches[0], branches[1]); // at 108951064
-    scenario.modify_branch(branches[1], 15560408, -1851392); // at 124511472
-    scenario.modify_branch(branches[0], 174464360, -1531904); // at 283415424
-    scenario.branch(branches[0], branches[2]); // at 283415424
-    scenario.modify_branch(branches[2], 15906192, 8192); // at 299321616
-    scenario.modify_branch(branches[0], 18909976, 32768); // at 302325400
-
-    let (model, result) = scenario.calculate(100_000);
-
-    // FIXME: We previously calculated 333_792_000. But with this PR, we get
-    // a much lower number. At a quick look at the model output and the
-    // calculations here, the new result seems correct to me.
-    eprintln!(
-        " MODEL: {}",
-        serde_json::to_string(&model.segments).unwrap()
-    );
-    eprintln!(
-        "RESULT: {}",
-        serde_json::to_string(&result.segments).unwrap()
-    );
-
-    assert_eq!(result.total_size, 136_236_928);
-}
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -11,39 +11,41 @@ async-trait.workspace = true
 anyhow.workspace = true
 bincode.workspace = true
 bytes.workspace = true
-heapless.workspace = true
-hex = { workspace = true, features = ["serde"] }
 hyper = { workspace = true, features = ["full"] }
-futures = { workspace = true}
-jsonwebtoken.workspace = true
-nix.workspace = true
-once_cell.workspace = true
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
-signal-hook.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tokio-rustls.workspace = true
 tracing.workspace = true
 tracing-subscriber = { workspace = true, features = ["json"] }
+nix.workspace = true
+signal-hook.workspace = true
 rand.workspace = true
+jsonwebtoken.workspace = true
+hex = { workspace = true, features = ["serde"] }
 rustls.workspace = true
+rustls-split.workspace = true
+git-version.workspace = true
 serde_with.workspace = true
+once_cell.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
-url.workspace = true
-uuid = { version = "1.2", features = ["v4", "serde"] }

 metrics.workspace = true
+pq_proto.workspace = true
+
 workspace_hack.workspace = true
+url.workspace = true

 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
-criterion.workspace = true
 hex-literal.workspace = true
 tempfile.workspace = true
+criterion.workspace = true
+rustls-pemfile.workspace = true

 [[bench]]
 name = "benchmarks"
--- a/libs/utils/src/history_buffer.rs
+++ b/libs/utils/src/history_buffer.rs
@@ -1,161 +0,0 @@
-//! A heapless buffer for events of sorts.
-
-use std::ops;
-
-use heapless::HistoryBuffer;
-
-#[derive(Debug, Clone)]
-pub struct HistoryBufferWithDropCounter<T, const L: usize> {
-    buffer: HistoryBuffer<T, L>,
-    drop_count: u64,
-}
-
-impl<T, const L: usize> HistoryBufferWithDropCounter<T, L> {
-    pub fn write(&mut self, data: T) {
-        let len_before = self.buffer.len();
-        self.buffer.write(data);
-        let len_after = self.buffer.len();
-        self.drop_count += u64::from(len_before == len_after);
-    }
-    pub fn drop_count(&self) -> u64 {
-        self.drop_count
-    }
-    pub fn map<U, F: Fn(&T) -> U>(&self, f: F) -> HistoryBufferWithDropCounter<U, L> {
-        let mut buffer = HistoryBuffer::new();
-        buffer.extend(self.buffer.oldest_ordered().map(f));
-        HistoryBufferWithDropCounter::<U, L> {
-            buffer,
-            drop_count: self.drop_count,
-        }
-    }
-}
-
-impl<T, const L: usize> Default for HistoryBufferWithDropCounter<T, L> {
-    fn default() -> Self {
-        Self {
-            buffer: HistoryBuffer::default(),
-            drop_count: 0,
-        }
-    }
-}
-
-impl<T, const L: usize> ops::Deref for HistoryBufferWithDropCounter<T, L> {
-    type Target = HistoryBuffer<T, L>;
-
-    fn deref(&self) -> &Self::Target {
-        &self.buffer
-    }
-}
-
-#[derive(serde::Serialize)]
-struct SerdeRepr<T> {
-    buffer: Vec<T>,
-    drop_count: u64,
-}
-
-impl<'a, T, const L: usize> From<&'a HistoryBufferWithDropCounter<T, L>> for SerdeRepr<T>
-where
-    T: Clone + serde::Serialize,
-{
-    fn from(value: &'a HistoryBufferWithDropCounter<T, L>) -> Self {
-        let HistoryBufferWithDropCounter { buffer, drop_count } = value;
-        SerdeRepr {
-            buffer: buffer.iter().cloned().collect(),
-            drop_count: *drop_count,
-        }
-    }
-}
-
-impl<T, const L: usize> serde::Serialize for HistoryBufferWithDropCounter<T, L>
-where
-    T: Clone + serde::Serialize,
-{
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        SerdeRepr::from(self).serialize(serializer)
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::HistoryBufferWithDropCounter;
-
-    #[test]
-    fn test_basics() {
-        let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
-        b.write(1);
-        b.write(2);
-        b.write(3);
-        assert!(b.iter().any(|e| *e == 2));
-        assert!(b.iter().any(|e| *e == 3));
-        assert!(!b.iter().any(|e| *e == 1));
-    }
-
-    #[test]
-    fn test_drop_count_works() {
-        let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
-        b.write(1);
-        assert_eq!(b.drop_count(), 0);
-        b.write(2);
-        assert_eq!(b.drop_count(), 0);
-        b.write(3);
-        assert_eq!(b.drop_count(), 1);
-        b.write(4);
-        assert_eq!(b.drop_count(), 2);
-    }
-
-    #[test]
-    fn test_clone_works() {
-        let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
-        b.write(1);
-        b.write(2);
-        b.write(3);
-        assert_eq!(b.drop_count(), 1);
-        let mut c = b.clone();
-        assert_eq!(c.drop_count(), 1);
-        assert!(c.iter().any(|e| *e == 2));
-        assert!(c.iter().any(|e| *e == 3));
-        assert!(!c.iter().any(|e| *e == 1));
-
-        c.write(4);
-        assert!(c.iter().any(|e| *e == 4));
-        assert!(!b.iter().any(|e| *e == 4));
-    }
-
-    #[test]
-    fn test_map() {
-        let mut b = HistoryBufferWithDropCounter::<_, 2>::default();
-
-        b.write(1);
-        assert_eq!(b.drop_count(), 0);
-        {
-            let c = b.map(|i| i + 10);
-            assert_eq!(c.oldest_ordered().cloned().collect::<Vec<_>>(), vec![11]);
-            assert_eq!(c.drop_count(), 0);
-        }
-
-        b.write(2);
-        assert_eq!(b.drop_count(), 0);
-        {
-            let c = b.map(|i| i + 10);
-            assert_eq!(
-                c.oldest_ordered().cloned().collect::<Vec<_>>(),
-                vec![11, 12]
-            );
-            assert_eq!(c.drop_count(), 0);
-        }
-
-        b.write(3);
-        assert_eq!(b.drop_count(), 1);
-        {
-            let c = b.map(|i| i + 10);
-            assert_eq!(
-                c.oldest_ordered().cloned().collect::<Vec<_>>(),
-                vec![12, 13]
-            );
-            assert_eq!(c.drop_count(), 1);
-        }
-    }
-}
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -1,20 +1,18 @@
 use crate::auth::{Claims, JwtAuth};
 use crate::http::error;
-use anyhow::{anyhow, Context};
-use hyper::header::{HeaderName, AUTHORIZATION};
-use hyper::http::HeaderValue;
+use anyhow::anyhow;
+use hyper::header::AUTHORIZATION;
 use hyper::{header::CONTENT_TYPE, Body, Request, Response, Server};
-use hyper::{Method, StatusCode};
 use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder};
 use once_cell::sync::Lazy;
 use routerify::ext::RequestExt;
-use routerify::{Middleware, RequestInfo, Router, RouterBuilder, RouterService};
+use routerify::RequestInfo;
+use routerify::{Middleware, Router, RouterBuilder, RouterService};
 use tokio::task::JoinError;
-use tracing;
+use tracing::info;

 use std::future::Future;
 use std::net::TcpListener;
-use std::str::FromStr;

 use super::error::ApiError;

@@ -26,36 +24,8 @@ static SERVE_METRICS_COUNT: Lazy<IntCounter> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-static X_REQUEST_ID_HEADER_STR: &str = "x-request-id";
-
-static X_REQUEST_ID_HEADER: HeaderName = HeaderName::from_static(X_REQUEST_ID_HEADER_STR);
-#[derive(Debug, Default, Clone)]
-struct RequestId(String);
-
 async fn logger(res: Response<Body>, info: RequestInfo) -> Result<Response<Body>, ApiError> {
-    let request_id = info.context::<RequestId>().unwrap_or_default().0;
-
-    // cannot factor out the Level to avoid the repetition
-    // because tracing can only work with const Level
-    // which is not the case here
-
-    if info.method() == Method::GET && res.status() == StatusCode::OK {
-        tracing::debug!(
-            "{} {} {} {}",
-            info.method(),
-            info.uri().path(),
-            request_id,
-            res.status()
-        );
-    } else {
-        tracing::info!(
-            "{} {} {} {}",
-            info.method(),
-            info.uri().path(),
-            request_id,
-            res.status()
-        );
-    }
+    info!("{} {} {}", info.method(), info.uri().path(), res.status(),);
    Ok(res)
 }

@@ -83,52 +53,9 @@ async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body
    Ok(response)
 }

-pub fn add_request_id_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
-) -> Middleware<B, ApiError> {
-    Middleware::pre(move |req| async move {
-        let request_id = match req.headers().get(&X_REQUEST_ID_HEADER) {
-            Some(request_id) => request_id
-                .to_str()
-                .expect("extract request id value")
-                .to_owned(),
-            None => {
-                let request_id = uuid::Uuid::new_v4();
-                request_id.to_string()
-            }
-        };
-
-        if req.method() == Method::GET {
-            tracing::debug!("{} {} {}", req.method(), req.uri().path(), request_id);
-        } else {
-            tracing::info!("{} {} {}", req.method(), req.uri().path(), request_id);
-        }
-        req.set_context(RequestId(request_id));
-
-        Ok(req)
-    })
-}
-
-async fn add_request_id_header_to_response(
-    mut res: Response<Body>,
-    req_info: RequestInfo,
-) -> Result<Response<Body>, ApiError> {
-    if let Some(request_id) = req_info.context::<RequestId>() {
-        if let Ok(request_header_value) = HeaderValue::from_str(&request_id.0) {
-            res.headers_mut()
-                .insert(&X_REQUEST_ID_HEADER, request_header_value);
-        };
-    };
-
-    Ok(res)
-}
-
 pub fn make_router() -> RouterBuilder<hyper::Body, ApiError> {
    Router::builder()
-        .middleware(add_request_id_middleware())
        .middleware(Middleware::post_with_info(logger))
-        .middleware(Middleware::post_with_info(
-            add_request_id_header_to_response,
-        ))
        .get("/metrics", prometheus_metrics_handler)
        .err_handler(error::handler)
 }
@@ -216,38 +143,6 @@ pub fn auth_middleware<B: hyper::body::HttpBody + Send + Sync + 'static>(
    })
 }

-pub fn add_response_header_middleware<B>(
-    header: &str,
-    value: &str,
-) -> anyhow::Result<Middleware<B, ApiError>>
-where
-    B: hyper::body::HttpBody + Send + Sync + 'static,
-{
-    let name =
-        HeaderName::from_str(header).with_context(|| format!("invalid header name: {header}"))?;
-    let value =
-        HeaderValue::from_str(value).with_context(|| format!("invalid header value: {value}"))?;
-    Ok(Middleware::post_with_info(
-        move |mut response, request_info| {
-            let name = name.clone();
-            let value = value.clone();
-            async move {
-                let headers = response.headers_mut();
-                if headers.contains_key(&name) {
-                    tracing::warn!(
-                        "{} response already contains header {:?}",
-                        request_info.uri(),
-                        &name,
-                    );
-                } else {
-                    headers.insert(name, value);
-                }
-                Ok(response)
-            }
-        },
-    ))
-}
-
 pub fn check_permission_with(
    req: &Request<Body>,
    check_permission: impl Fn(&Claims) -> Result<(), anyhow::Error>,
@@ -274,7 +169,7 @@ pub fn serve_thread_main<S>(
 where
    S: Future<Output = ()> + Send + Sync,
 {
-    tracing::info!("Starting an HTTP endpoint at {}", listener.local_addr()?);
+    info!("Starting an HTTP endpoint at {}", listener.local_addr()?);

    // Create a Service from the router above to handle incoming requests.
    let service = RouterService::new(router_builder.build().map_err(|err| anyhow!(err))?).unwrap();
@@ -294,48 +189,3 @@ where

    Ok(())
 }
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use futures::future::poll_fn;
-    use hyper::service::Service;
-    use routerify::RequestServiceBuilder;
-    use std::net::{IpAddr, SocketAddr};
-
-    #[tokio::test]
-    async fn test_request_id_returned() {
-        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
-        let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
-        let mut service = builder.build(remote_addr);
-        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
-            panic!("request service is not ready: {:?}", e);
-        }
-
-        let mut req: Request<Body> = Request::default();
-        req.headers_mut()
-            .append(&X_REQUEST_ID_HEADER, HeaderValue::from_str("42").unwrap());
-
-        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
-
-        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER).unwrap();
-
-        assert!(header_val == "42", "response header mismatch");
-    }
-
-    #[tokio::test]
-    async fn test_request_id_empty() {
-        let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap();
-        let remote_addr = SocketAddr::new(IpAddr::from_str("127.0.0.1").unwrap(), 80);
-        let mut service = builder.build(remote_addr);
-        if let Err(e) = poll_fn(|ctx| service.poll_ready(ctx)).await {
-            panic!("request service is not ready: {:?}", e);
-        }
-
-        let req: Request<Body> = Request::default();
-        let resp: Response<hyper::body::Body> = service.call(req).await.unwrap();
-
-        let header_val = resp.headers().get(&X_REQUEST_ID_HEADER);
-
-        assert_ne!(header_val, None, "response header should NOT be empty");
-    }
-}
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -13,6 +13,8 @@ pub mod simple_rcu;
 pub mod vec_map;

 pub mod bin_ser;
+pub mod postgres_backend;
+pub mod postgres_backend_async;

 // helper functions for creating and fsyncing
 pub mod crashsafe;
@@ -25,6 +27,9 @@ pub mod id;
 // http endpoint utils
 pub mod http;

+// socket splitting utils
+pub mod sock_split;
+
 // common log initialisation routine
 pub mod logging;

@@ -47,8 +52,6 @@ pub mod signals;

 pub mod fs_ext;

-pub mod history_buffer;
-
 /// use with fail::cfg("$name", "return(2000)")
 #[macro_export]
 macro_rules! failpoint_sleep_millis_async {
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -45,115 +45,3 @@ pub fn init(log_format: LogFormat) -> anyhow::Result<()> {

    Ok(())
 }
-
-/// Disable the default rust panic hook by using `set_hook`.
-///
-/// For neon binaries, the assumption is that tracing is configured before with [`init`], after
-/// that sentry is configured (if needed). sentry will install it's own on top of this, always
-/// processing the panic before we log it.
-///
-/// When the return value is dropped, the hook is reverted to std default hook (prints to stderr).
-/// If the assumptions about the initialization order are not held, use
-/// [`TracingPanicHookGuard::disarm`] but keep in mind, if tracing is stopped, then panics will be
-/// lost.
-#[must_use]
-pub fn replace_panic_hook_with_tracing_panic_hook() -> TracingPanicHookGuard {
-    std::panic::set_hook(Box::new(tracing_panic_hook));
-    TracingPanicHookGuard::new()
-}
-
-/// Drop guard which restores the std panic hook on drop.
-///
-/// Tracing should not be used when it's not configured, but we cannot really latch on to any
-/// imaginary lifetime of tracing.
-pub struct TracingPanicHookGuard {
-    act: bool,
-}
-
-impl TracingPanicHookGuard {
-    fn new() -> Self {
-        TracingPanicHookGuard { act: true }
-    }
-
-    /// Make this hook guard not do anything when dropped.
-    pub fn forget(&mut self) {
-        self.act = false;
-    }
-}
-
-impl Drop for TracingPanicHookGuard {
-    fn drop(&mut self) {
-        if self.act {
-            let _ = std::panic::take_hook();
-        }
-    }
-}
-
-/// Named symbol for our panic hook, which logs the panic.
-fn tracing_panic_hook(info: &std::panic::PanicInfo) {
-    // following rust 1.66.1 std implementation:
-    // https://github.com/rust-lang/rust/blob/90743e7298aca107ddaa0c202a4d3604e29bfeb6/library/std/src/panicking.rs#L235-L288
-    let location = info.location();
-
-    let msg = match info.payload().downcast_ref::<&'static str>() {
-        Some(s) => *s,
-        None => match info.payload().downcast_ref::<String>() {
-            Some(s) => &s[..],
-            None => "Box<dyn Any>",
-        },
-    };
-
-    let thread = std::thread::current();
-    let thread = thread.name().unwrap_or("<unnamed>");
-    let backtrace = std::backtrace::Backtrace::capture();
-
-    let _entered = if let Some(location) = location {
-        tracing::error_span!("panic", %thread, location = %PrettyLocation(location))
-    } else {
-        // very unlikely to hit here, but the guarantees of std could change
-        tracing::error_span!("panic", %thread)
-    }
-    .entered();
-
-    if backtrace.status() == std::backtrace::BacktraceStatus::Captured {
-        // this has an annoying extra '\n' in the end which anyhow doesn't do, but we cannot really
-        // get rid of it as we cannot get in between of std::fmt::Formatter<'_>; we could format to
-        // string, maybe even to a TLS one but tracing already does that.
-        tracing::error!("{msg}\n\nStack backtrace:\n{backtrace}");
-    } else {
-        tracing::error!("{msg}");
-    }
-
-    // ensure that we log something on the panic if this hook is left after tracing has been
-    // unconfigured. worst case when teardown is racing the panic is to log the panic twice.
-    tracing::dispatcher::get_default(|d| {
-        if let Some(_none) = d.downcast_ref::<tracing::subscriber::NoSubscriber>() {
-            let location = location.map(PrettyLocation);
-            log_panic_to_stderr(thread, msg, location, &backtrace);
-        }
-    });
-}
-
-#[cold]
-fn log_panic_to_stderr(
-    thread: &str,
-    msg: &str,
-    location: Option<PrettyLocation<'_, '_>>,
-    backtrace: &std::backtrace::Backtrace,
-) {
-    eprintln!("panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}");
-}
-
-struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>);
-
-impl std::fmt::Display for PrettyLocation<'_, '_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}:{}:{}", self.0.file(), self.0.line(), self.0.column())
-    }
-}
-
-impl std::fmt::Debug for PrettyLocation<'_, '_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        <Self as std::fmt::Display>::fmt(self, f)
-    }
-}
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -0,0 +1,485 @@
+//! Server-side synchronous Postgres connection, as limited as we need.
+//! To use, create PostgresBackend and run() it, passing the Handler
+//! implementation determining how to process the queries. Currently its API
+//! is rather narrow, but we can extend it once required.
+
+use crate::postgres_backend_async::{log_query_error, short_error, QueryError};
+use crate::sock_split::{BidiStream, ReadStream, WriteStream};
+use anyhow::Context;
+use bytes::{Bytes, BytesMut};
+use pq_proto::{BeMessage, FeMessage, FeStartupPacket};
+use serde::{Deserialize, Serialize};
+use std::fmt;
+use std::io::{self, Write};
+use std::net::{Shutdown, SocketAddr, TcpStream};
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::Duration;
+use tracing::*;
+
+pub trait Handler {
+    /// Handle single query.
+    /// postgres_backend will issue ReadyForQuery after calling this (this
+    /// might be not what we want after CopyData streaming, but currently we don't
+    /// care).
+    fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> Result<(), QueryError>;
+
+    /// Called on startup packet receival, allows to process params.
+    ///
+    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
+    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
+    /// to override whole init logic in implementations.
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
+        Ok(())
+    }
+
+    /// Check auth jwt
+    fn check_auth_jwt(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _jwt_response: &[u8],
+    ) -> Result<(), QueryError> {
+        Err(QueryError::Other(anyhow::anyhow!("JWT auth failed")))
+    }
+
+    fn is_shutdown_requested(&self) -> bool {
+        false
+    }
+}
+
+/// PostgresBackend protocol state.
+/// XXX: The order of the constructors matters.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
+pub enum ProtoState {
+    Initialization,
+    Encrypted,
+    Authentication,
+    Established,
+}
+
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
+pub enum AuthType {
+    Trust,
+    // This mimics postgres's AuthenticationCleartextPassword but instead of password expects JWT
+    NeonJWT,
+}
+
+impl FromStr for AuthType {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "Trust" => Ok(Self::Trust),
+            "NeonJWT" => Ok(Self::NeonJWT),
+            _ => anyhow::bail!("invalid value \"{s}\" for auth type"),
+        }
+    }
+}
+
+impl fmt::Display for AuthType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(match self {
+            AuthType::Trust => "Trust",
+            AuthType::NeonJWT => "NeonJWT",
+        })
+    }
+}
+
+#[derive(Clone, Copy)]
+pub enum ProcessMsgResult {
+    Continue,
+    Break,
+}
+
+/// Always-writeable sock_split stream.
+/// May not be readable. See [`PostgresBackend::take_stream_in`]
+pub enum Stream {
+    Bidirectional(BidiStream),
+    WriteOnly(WriteStream),
+}
+
+impl Stream {
+    fn shutdown(&mut self, how: Shutdown) -> io::Result<()> {
+        match self {
+            Self::Bidirectional(bidi_stream) => bidi_stream.shutdown(how),
+            Self::WriteOnly(write_stream) => write_stream.shutdown(how),
+        }
+    }
+}
+
+impl io::Write for Stream {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match self {
+            Self::Bidirectional(bidi_stream) => bidi_stream.write(buf),
+            Self::WriteOnly(write_stream) => write_stream.write(buf),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        match self {
+            Self::Bidirectional(bidi_stream) => bidi_stream.flush(),
+            Self::WriteOnly(write_stream) => write_stream.flush(),
+        }
+    }
+}
+
+pub struct PostgresBackend {
+    stream: Option<Stream>,
+    // Output buffer. c.f. BeMessage::write why we are using BytesMut here.
+    buf_out: BytesMut,
+
+    pub state: ProtoState,
+
+    auth_type: AuthType,
+
+    peer_addr: SocketAddr,
+    pub tls_config: Option<Arc<rustls::ServerConfig>>,
+}
+
+pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
+    let mut query_string = query_string.to_vec();
+    if let Some(ch) = query_string.last() {
+        if *ch == 0 {
+            query_string.pop();
+        }
+    }
+    query_string
+}
+
+// Helper function for socket read loops
+pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool {
+    for cause in error.chain() {
+        if let Some(io_error) = cause.downcast_ref::<io::Error>() {
+            if io_error.kind() == std::io::ErrorKind::WouldBlock {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+// Cast a byte slice to a string slice, dropping null terminator if there's one.
+fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
+    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
+    std::str::from_utf8(without_null).map_err(|e| e.into())
+}
+
+impl PostgresBackend {
+    pub fn new(
+        socket: TcpStream,
+        auth_type: AuthType,
+        tls_config: Option<Arc<rustls::ServerConfig>>,
+        set_read_timeout: bool,
+    ) -> io::Result<Self> {
+        let peer_addr = socket.peer_addr()?;
+        if set_read_timeout {
+            socket
+                .set_read_timeout(Some(Duration::from_secs(5)))
+                .unwrap();
+        }
+
+        Ok(Self {
+            stream: Some(Stream::Bidirectional(BidiStream::from_tcp(socket))),
+            buf_out: BytesMut::with_capacity(10 * 1024),
+            state: ProtoState::Initialization,
+            auth_type,
+            tls_config,
+            peer_addr,
+        })
+    }
+
+    pub fn into_stream(self) -> Stream {
+        self.stream.unwrap()
+    }
+
+    /// Get direct reference (into the Option) to the read stream.
+    fn get_stream_in(&mut self) -> anyhow::Result<&mut BidiStream> {
+        match &mut self.stream {
+            Some(Stream::Bidirectional(stream)) => Ok(stream),
+            _ => anyhow::bail!("reader taken"),
+        }
+    }
+
+    pub fn get_peer_addr(&self) -> &SocketAddr {
+        &self.peer_addr
+    }
+
+    pub fn take_stream_in(&mut self) -> Option<ReadStream> {
+        let stream = self.stream.take();
+        match stream {
+            Some(Stream::Bidirectional(bidi_stream)) => {
+                let (read, write) = bidi_stream.split();
+                self.stream = Some(Stream::WriteOnly(write));
+                Some(read)
+            }
+            stream => {
+                self.stream = stream;
+                None
+            }
+        }
+    }
+
+    /// Read full message or return None if connection is closed.
+    pub fn read_message(&mut self) -> Result<Option<FeMessage>, QueryError> {
+        let (state, stream) = (self.state, self.get_stream_in()?);
+
+        use ProtoState::*;
+        match state {
+            Initialization | Encrypted => FeStartupPacket::read(stream),
+            Authentication | Established => FeMessage::read(stream),
+        }
+        .map_err(QueryError::from)
+    }
+
+    /// Write message into internal output buffer.
+    pub fn write_message_noflush(&mut self, message: &BeMessage) -> io::Result<&mut Self> {
+        BeMessage::write(&mut self.buf_out, message)?;
+        Ok(self)
+    }
+
+    /// Flush output buffer into the socket.
+    pub fn flush(&mut self) -> io::Result<&mut Self> {
+        let stream = self.stream.as_mut().unwrap();
+        stream.write_all(&self.buf_out)?;
+        self.buf_out.clear();
+        Ok(self)
+    }
+
+    /// Write message into internal buffer and flush it.
+    pub fn write_message(&mut self, message: &BeMessage) -> io::Result<&mut Self> {
+        self.write_message_noflush(message)?;
+        self.flush()
+    }
+
+    // Wrapper for run_message_loop() that shuts down socket when we are done
+    pub fn run(mut self, handler: &mut impl Handler) -> Result<(), QueryError> {
+        let ret = self.run_message_loop(handler);
+        if let Some(stream) = self.stream.as_mut() {
+            let _ = stream.shutdown(Shutdown::Both);
+        }
+        ret
+    }
+
+    fn run_message_loop(&mut self, handler: &mut impl Handler) -> Result<(), QueryError> {
+        trace!("postgres backend to {:?} started", self.peer_addr);
+
+        let mut unnamed_query_string = Bytes::new();
+
+        while !handler.is_shutdown_requested() {
+            match self.read_message() {
+                Ok(message) => {
+                    if let Some(msg) = message {
+                        trace!("got message {msg:?}");
+
+                        match self.process_message(handler, msg, &mut unnamed_query_string)? {
+                            ProcessMsgResult::Continue => continue,
+                            ProcessMsgResult::Break => break,
+                        }
+                    } else {
+                        break;
+                    }
+                }
+                Err(e) => {
+                    if let QueryError::Other(e) = &e {
+                        if is_socket_read_timed_out(e) {
+                            continue;
+                        }
+                    }
+                    return Err(e);
+                }
+            }
+        }
+
+        trace!("postgres backend to {:?} exited", self.peer_addr);
+        Ok(())
+    }
+
+    pub fn start_tls(&mut self) -> anyhow::Result<()> {
+        match self.stream.take() {
+            Some(Stream::Bidirectional(bidi_stream)) => {
+                let conn = rustls::ServerConnection::new(self.tls_config.clone().unwrap())?;
+                self.stream = Some(Stream::Bidirectional(bidi_stream.start_tls(conn)?));
+                Ok(())
+            }
+            stream => {
+                self.stream = stream;
+                anyhow::bail!("can't start TLs without bidi stream");
+            }
+        }
+    }
+
+    fn process_message(
+        &mut self,
+        handler: &mut impl Handler,
+        msg: FeMessage,
+        unnamed_query_string: &mut Bytes,
+    ) -> Result<ProcessMsgResult, QueryError> {
+        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
+        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
+        if self.state < ProtoState::Established
+            && !matches!(
+                msg,
+                FeMessage::PasswordMessage(_) | FeMessage::StartupPacket(_)
+            )
+        {
+            return Err(QueryError::Other(anyhow::anyhow!("protocol violation")));
+        }
+
+        let have_tls = self.tls_config.is_some();
+        match msg {
+            FeMessage::StartupPacket(m) => {
+                trace!("got startup message {m:?}");
+
+                match m {
+                    FeStartupPacket::SslRequest => {
+                        debug!("SSL requested");
+
+                        self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
+                        if have_tls {
+                            self.start_tls()?;
+                            self.state = ProtoState::Encrypted;
+                        }
+                    }
+                    FeStartupPacket::GssEncRequest => {
+                        debug!("GSS requested");
+                        self.write_message(&BeMessage::EncryptionResponse(false))?;
+                    }
+                    FeStartupPacket::StartupMessage { .. } => {
+                        if have_tls && !matches!(self.state, ProtoState::Encrypted) {
+                            self.write_message(&BeMessage::ErrorResponse(
+                                "must connect with TLS",
+                                None,
+                            ))?;
+                            return Err(QueryError::Other(anyhow::anyhow!(
+                                "client did not connect with TLS"
+                            )));
+                        }
+
+                        // NB: startup() may change self.auth_type -- we are using that in proxy code
+                        // to bypass auth for new users.
+                        handler.startup(self, &m)?;
+
+                        match self.auth_type {
+                            AuthType::Trust => {
+                                self.write_message_noflush(&BeMessage::AuthenticationOk)?
+                                    .write_message_noflush(&BeMessage::CLIENT_ENCODING)?
+                                    // The async python driver requires a valid server_version
+                                    .write_message_noflush(&BeMessage::server_version("14.1"))?
+                                    .write_message(&BeMessage::ReadyForQuery)?;
+                                self.state = ProtoState::Established;
+                            }
+                            AuthType::NeonJWT => {
+                                self.write_message(&BeMessage::AuthenticationCleartextPassword)?;
+                                self.state = ProtoState::Authentication;
+                            }
+                        }
+                    }
+                    FeStartupPacket::CancelRequest { .. } => {
+                        return Ok(ProcessMsgResult::Break);
+                    }
+                }
+            }
+
+            FeMessage::PasswordMessage(m) => {
+                trace!("got password message '{:?}'", m);
+
+                assert!(self.state == ProtoState::Authentication);
+
+                match self.auth_type {
+                    AuthType::Trust => unreachable!(),
+                    AuthType::NeonJWT => {
+                        let (_, jwt_response) = m.split_last().context("protocol violation")?;
+
+                        if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
+                            self.write_message(&BeMessage::ErrorResponse(
+                                &e.to_string(),
+                                Some(e.pg_error_code()),
+                            ))?;
+                            return Err(e);
+                        }
+                    }
+                }
+                self.write_message_noflush(&BeMessage::AuthenticationOk)?
+                    .write_message_noflush(&BeMessage::CLIENT_ENCODING)?
+                    .write_message(&BeMessage::ReadyForQuery)?;
+                self.state = ProtoState::Established;
+            }
+
+            FeMessage::Query(body) => {
+                // remove null terminator
+                let query_string = cstr_to_str(&body)?;
+
+                trace!("got query {query_string:?}");
+                if let Err(e) = handler.process_query(self, query_string) {
+                    log_query_error(query_string, &e);
+                    let short_error = short_error(&e);
+                    self.write_message_noflush(&BeMessage::ErrorResponse(
+                        &short_error,
+                        Some(e.pg_error_code()),
+                    ))?;
+                }
+                self.write_message(&BeMessage::ReadyForQuery)?;
+            }
+
+            FeMessage::Parse(m) => {
+                *unnamed_query_string = m.query_string;
+                self.write_message(&BeMessage::ParseComplete)?;
+            }
+
+            FeMessage::Describe(_) => {
+                self.write_message_noflush(&BeMessage::ParameterDescription)?
+                    .write_message(&BeMessage::NoData)?;
+            }
+
+            FeMessage::Bind(_) => {
+                self.write_message(&BeMessage::BindComplete)?;
+            }
+
+            FeMessage::Close(_) => {
+                self.write_message(&BeMessage::CloseComplete)?;
+            }
+
+            FeMessage::Execute(_) => {
+                let query_string = cstr_to_str(unnamed_query_string)?;
+                trace!("got execute {query_string:?}");
+                if let Err(e) = handler.process_query(self, query_string) {
+                    log_query_error(query_string, &e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?;
+                }
+                // NOTE there is no ReadyForQuery message. This handler is used
+                // for basebackup and it uses CopyOut which doesn't require
+                // ReadyForQuery message and backend just switches back to
+                // processing mode after sending CopyDone or ErrorResponse.
+            }
+
+            FeMessage::Sync => {
+                self.write_message(&BeMessage::ReadyForQuery)?;
+            }
+
+            FeMessage::Terminate => {
+                return Ok(ProcessMsgResult::Break);
+            }
+
+            // We prefer explicit pattern matching to wildcards, because
+            // this helps us spot the places where new variants are missing
+            FeMessage::CopyData(_) | FeMessage::CopyDone | FeMessage::CopyFail => {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "unexpected message type: {msg:?}"
+                )));
+            }
+        }
+
+        Ok(ProcessMsgResult::Continue)
+    }
+}
--- a/libs/utils/src/postgres_backend_async.rs
+++ b/libs/utils/src/postgres_backend_async.rs
@@ -0,0 +1,634 @@
+//! Server-side asynchronous Postgres connection, as limited as we need.
+//! To use, create PostgresBackend and run() it, passing the Handler
+//! implementation determining how to process the queries. Currently its API
+//! is rather narrow, but we can extend it once required.
+
+use crate::postgres_backend::AuthType;
+use anyhow::Context;
+use bytes::{Buf, Bytes, BytesMut};
+use pq_proto::{BeMessage, ConnectionError, FeMessage, FeStartupPacket, SQLSTATE_INTERNAL_ERROR};
+use std::io;
+use std::net::SocketAddr;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::Poll;
+use std::{future::Future, task::ready};
+use tracing::{debug, error, info, trace};
+
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
+use tokio_rustls::TlsAcceptor;
+
+pub fn is_expected_io_error(e: &io::Error) -> bool {
+    use io::ErrorKind::*;
+    matches!(
+        e.kind(),
+        ConnectionRefused | ConnectionAborted | ConnectionReset
+    )
+}
+
+/// An error, occurred during query processing:
+/// either during the connection ([`ConnectionError`]) or before/after it.
+#[derive(thiserror::Error, Debug)]
+pub enum QueryError {
+    /// The connection was lost while processing the query.
+    #[error(transparent)]
+    Disconnected(#[from] ConnectionError),
+    /// Some other error
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<io::Error> for QueryError {
+    fn from(e: io::Error) -> Self {
+        Self::Disconnected(ConnectionError::Socket(e))
+    }
+}
+
+impl QueryError {
+    pub fn pg_error_code(&self) -> &'static [u8; 5] {
+        match self {
+            Self::Disconnected(_) => b"08006",         // connection failure
+            Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error
+        }
+    }
+}
+
+#[async_trait::async_trait]
+pub trait Handler {
+    /// Handle single query.
+    /// postgres_backend will issue ReadyForQuery after calling this (this
+    /// might be not what we want after CopyData streaming, but currently we don't
+    /// care).
+    async fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> Result<(), QueryError>;
+
+    /// Called on startup packet receival, allows to process params.
+    ///
+    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
+    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
+    /// to override whole init logic in implementations.
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
+        Ok(())
+    }
+
+    /// Check auth jwt
+    fn check_auth_jwt(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _jwt_response: &[u8],
+    ) -> Result<(), QueryError> {
+        Err(QueryError::Other(anyhow::anyhow!("JWT auth failed")))
+    }
+}
+
+/// PostgresBackend protocol state.
+/// XXX: The order of the constructors matters.
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd)]
+pub enum ProtoState {
+    Initialization,
+    Encrypted,
+    Authentication,
+    Established,
+    Closed,
+}
+
+#[derive(Clone, Copy)]
+pub enum ProcessMsgResult {
+    Continue,
+    Break,
+}
+
+/// Always-writeable sock_split stream.
+/// May not be readable. See [`PostgresBackend::take_stream_in`]
+pub enum Stream {
+    Unencrypted(BufReader<tokio::net::TcpStream>),
+    Tls(Box<tokio_rustls::server::TlsStream<BufReader<tokio::net::TcpStream>>>),
+    Broken,
+}
+
+impl AsyncWrite for Stream {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &[u8],
+    ) -> Poll<io::Result<usize>> {
+        match self.get_mut() {
+            Self::Unencrypted(stream) => Pin::new(stream).poll_write(cx, buf),
+            Self::Tls(stream) => Pin::new(stream).poll_write(cx, buf),
+            Self::Broken => unreachable!(),
+        }
+    }
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<io::Result<()>> {
+        match self.get_mut() {
+            Self::Unencrypted(stream) => Pin::new(stream).poll_flush(cx),
+            Self::Tls(stream) => Pin::new(stream).poll_flush(cx),
+            Self::Broken => unreachable!(),
+        }
+    }
+    fn poll_shutdown(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<io::Result<()>> {
+        match self.get_mut() {
+            Self::Unencrypted(stream) => Pin::new(stream).poll_shutdown(cx),
+            Self::Tls(stream) => Pin::new(stream).poll_shutdown(cx),
+            Self::Broken => unreachable!(),
+        }
+    }
+}
+impl AsyncRead for Stream {
+    fn poll_read(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &mut tokio::io::ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        match self.get_mut() {
+            Self::Unencrypted(stream) => Pin::new(stream).poll_read(cx, buf),
+            Self::Tls(stream) => Pin::new(stream).poll_read(cx, buf),
+            Self::Broken => unreachable!(),
+        }
+    }
+}
+
+pub struct PostgresBackend {
+    stream: Stream,
+
+    // Output buffer. c.f. BeMessage::write why we are using BytesMut here.
+    // The data between 0 and "current position" as tracked by the bytes::Buf
+    // implementation of BytesMut, have already been written.
+    buf_out: BytesMut,
+
+    pub state: ProtoState,
+
+    auth_type: AuthType,
+
+    peer_addr: SocketAddr,
+    pub tls_config: Option<Arc<rustls::ServerConfig>>,
+}
+
+pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
+    let mut query_string = query_string.to_vec();
+    if let Some(ch) = query_string.last() {
+        if *ch == 0 {
+            query_string.pop();
+        }
+    }
+    query_string
+}
+
+// Cast a byte slice to a string slice, dropping null terminator if there's one.
+fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
+    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
+    std::str::from_utf8(without_null).map_err(|e| e.into())
+}
+
+impl PostgresBackend {
+    pub fn new(
+        socket: tokio::net::TcpStream,
+        auth_type: AuthType,
+        tls_config: Option<Arc<rustls::ServerConfig>>,
+    ) -> io::Result<Self> {
+        let peer_addr = socket.peer_addr()?;
+
+        Ok(Self {
+            stream: Stream::Unencrypted(BufReader::new(socket)),
+            buf_out: BytesMut::with_capacity(10 * 1024),
+            state: ProtoState::Initialization,
+            auth_type,
+            tls_config,
+            peer_addr,
+        })
+    }
+
+    pub fn get_peer_addr(&self) -> &SocketAddr {
+        &self.peer_addr
+    }
+
+    /// Read full message or return None if connection is closed.
+    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, QueryError> {
+        use ProtoState::*;
+        match self.state {
+            Initialization | Encrypted => FeStartupPacket::read_fut(&mut self.stream).await,
+            Authentication | Established => FeMessage::read_fut(&mut self.stream).await,
+            Closed => Ok(None),
+        }
+        .map_err(QueryError::from)
+    }
+
+    /// Flush output buffer into the socket.
+    pub async fn flush(&mut self) -> io::Result<()> {
+        while self.buf_out.has_remaining() {
+            let bytes_written = self.stream.write(self.buf_out.chunk()).await?;
+            self.buf_out.advance(bytes_written);
+        }
+        self.buf_out.clear();
+        Ok(())
+    }
+
+    /// Write message into internal output buffer.
+    pub fn write_message(&mut self, message: &BeMessage<'_>) -> io::Result<&mut Self> {
+        BeMessage::write(&mut self.buf_out, message)?;
+        Ok(self)
+    }
+
+    /// Returns an AsyncWrite implementation that wraps all the data written
+    /// to it in CopyData messages, and writes them to the connection
+    ///
+    /// The caller is responsible for sending CopyOutResponse and CopyDone messages.
+    pub fn copyout_writer(&mut self) -> CopyDataWriter {
+        CopyDataWriter { pgb: self }
+    }
+
+    /// A polling function that tries to write all the data from 'buf_out' to the
+    /// underlying stream.
+    fn poll_write_buf(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        while self.buf_out.has_remaining() {
+            match ready!(Pin::new(&mut self.stream).poll_write(cx, self.buf_out.chunk())) {
+                Ok(bytes_written) => self.buf_out.advance(bytes_written),
+                Err(err) => return Poll::Ready(Err(err)),
+            }
+        }
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_flush(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<(), std::io::Error>> {
+        Pin::new(&mut self.stream).poll_flush(cx)
+    }
+
+    // Wrapper for run_message_loop() that shuts down socket when we are done
+    pub async fn run<F, S>(
+        mut self,
+        handler: &mut impl Handler,
+        shutdown_watcher: F,
+    ) -> Result<(), QueryError>
+    where
+        F: Fn() -> S,
+        S: Future,
+    {
+        let ret = self.run_message_loop(handler, shutdown_watcher).await;
+        let _ = self.stream.shutdown();
+        ret
+    }
+
+    async fn run_message_loop<F, S>(
+        &mut self,
+        handler: &mut impl Handler,
+        shutdown_watcher: F,
+    ) -> Result<(), QueryError>
+    where
+        F: Fn() -> S,
+        S: Future,
+    {
+        trace!("postgres backend to {:?} started", self.peer_addr);
+
+        tokio::select!(
+            biased;
+
+            _ = shutdown_watcher() => {
+                // We were requested to shut down.
+                tracing::info!("shutdown request received during handshake");
+                return Ok(())
+            },
+
+            result = async {
+                while self.state < ProtoState::Established {
+                    if let Some(msg) = self.read_message().await? {
+                        trace!("got message {msg:?} during handshake");
+
+                        match self.process_handshake_message(handler, msg).await? {
+                            ProcessMsgResult::Continue => {
+                                self.flush().await?;
+                                continue;
+                            }
+                            ProcessMsgResult::Break => {
+                                trace!("postgres backend to {:?} exited during handshake", self.peer_addr);
+                                return Ok(());
+                            }
+                        }
+                    } else {
+                        trace!("postgres backend to {:?} exited during handshake", self.peer_addr);
+                        return Ok(());
+                    }
+                }
+                Ok::<(), QueryError>(())
+            } => {
+                // Handshake complete.
+                result?;
+            }
+        );
+
+        // Authentication completed
+        let mut query_string = Bytes::new();
+        while let Some(msg) = tokio::select!(
+            biased;
+            _ = shutdown_watcher() => {
+                // We were requested to shut down.
+                tracing::info!("shutdown request received in run_message_loop");
+                Ok(None)
+            },
+            msg = self.read_message() => { msg },
+        )? {
+            trace!("got message {:?}", msg);
+
+            let result = self.process_message(handler, msg, &mut query_string).await;
+            self.flush().await?;
+            match result? {
+                ProcessMsgResult::Continue => {
+                    self.flush().await?;
+                    continue;
+                }
+                ProcessMsgResult::Break => break,
+            }
+        }
+
+        trace!("postgres backend to {:?} exited", self.peer_addr);
+        Ok(())
+    }
+
+    async fn start_tls(&mut self) -> anyhow::Result<()> {
+        if let Stream::Unencrypted(plain_stream) =
+            std::mem::replace(&mut self.stream, Stream::Broken)
+        {
+            let acceptor = TlsAcceptor::from(self.tls_config.clone().unwrap());
+            let tls_stream = acceptor.accept(plain_stream).await?;
+
+            self.stream = Stream::Tls(Box::new(tls_stream));
+            return Ok(());
+        };
+        anyhow::bail!("TLS already started");
+    }
+
+    async fn process_handshake_message(
+        &mut self,
+        handler: &mut impl Handler,
+        msg: FeMessage,
+    ) -> Result<ProcessMsgResult, QueryError> {
+        assert!(self.state < ProtoState::Established);
+        let have_tls = self.tls_config.is_some();
+        match msg {
+            FeMessage::StartupPacket(m) => {
+                trace!("got startup message {m:?}");
+
+                match m {
+                    FeStartupPacket::SslRequest => {
+                        debug!("SSL requested");
+
+                        self.write_message(&BeMessage::EncryptionResponse(have_tls))?;
+                        if have_tls {
+                            self.start_tls().await?;
+                            self.state = ProtoState::Encrypted;
+                        }
+                    }
+                    FeStartupPacket::GssEncRequest => {
+                        debug!("GSS requested");
+                        self.write_message(&BeMessage::EncryptionResponse(false))?;
+                    }
+                    FeStartupPacket::StartupMessage { .. } => {
+                        if have_tls && !matches!(self.state, ProtoState::Encrypted) {
+                            self.write_message(&BeMessage::ErrorResponse(
+                                "must connect with TLS",
+                                None,
+                            ))?;
+                            return Err(QueryError::Other(anyhow::anyhow!(
+                                "client did not connect with TLS"
+                            )));
+                        }
+
+                        // NB: startup() may change self.auth_type -- we are using that in proxy code
+                        // to bypass auth for new users.
+                        handler.startup(self, &m)?;
+
+                        match self.auth_type {
+                            AuthType::Trust => {
+                                self.write_message(&BeMessage::AuthenticationOk)?
+                                    .write_message(&BeMessage::CLIENT_ENCODING)?
+                                    // The async python driver requires a valid server_version
+                                    .write_message(&BeMessage::server_version("14.1"))?
+                                    .write_message(&BeMessage::ReadyForQuery)?;
+                                self.state = ProtoState::Established;
+                            }
+                            AuthType::NeonJWT => {
+                                self.write_message(&BeMessage::AuthenticationCleartextPassword)?;
+                                self.state = ProtoState::Authentication;
+                            }
+                        }
+                    }
+                    FeStartupPacket::CancelRequest { .. } => {
+                        self.state = ProtoState::Closed;
+                        return Ok(ProcessMsgResult::Break);
+                    }
+                }
+            }
+
+            FeMessage::PasswordMessage(m) => {
+                trace!("got password message '{:?}'", m);
+
+                assert!(self.state == ProtoState::Authentication);
+
+                match self.auth_type {
+                    AuthType::Trust => unreachable!(),
+                    AuthType::NeonJWT => {
+                        let (_, jwt_response) = m.split_last().context("protocol violation")?;
+
+                        if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
+                            self.write_message(&BeMessage::ErrorResponse(
+                                &e.to_string(),
+                                Some(e.pg_error_code()),
+                            ))?;
+                            return Err(e);
+                        }
+                    }
+                }
+                self.write_message(&BeMessage::AuthenticationOk)?
+                    .write_message(&BeMessage::CLIENT_ENCODING)?
+                    .write_message(&BeMessage::ReadyForQuery)?;
+                self.state = ProtoState::Established;
+            }
+
+            _ => {
+                self.state = ProtoState::Closed;
+                return Ok(ProcessMsgResult::Break);
+            }
+        }
+        Ok(ProcessMsgResult::Continue)
+    }
+
+    async fn process_message(
+        &mut self,
+        handler: &mut impl Handler,
+        msg: FeMessage,
+        unnamed_query_string: &mut Bytes,
+    ) -> Result<ProcessMsgResult, QueryError> {
+        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
+        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
+        assert!(self.state == ProtoState::Established);
+
+        match msg {
+            FeMessage::StartupPacket(_) | FeMessage::PasswordMessage(_) => {
+                return Err(QueryError::Other(anyhow::anyhow!("protocol violation")));
+            }
+
+            FeMessage::Query(body) => {
+                // remove null terminator
+                let query_string = cstr_to_str(&body)?;
+
+                trace!("got query {query_string:?}");
+                if let Err(e) = handler.process_query(self, query_string).await {
+                    log_query_error(query_string, &e);
+                    let short_error = short_error(&e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &short_error,
+                        Some(e.pg_error_code()),
+                    ))?;
+                }
+                self.write_message(&BeMessage::ReadyForQuery)?;
+            }
+
+            FeMessage::Parse(m) => {
+                *unnamed_query_string = m.query_string;
+                self.write_message(&BeMessage::ParseComplete)?;
+            }
+
+            FeMessage::Describe(_) => {
+                self.write_message(&BeMessage::ParameterDescription)?
+                    .write_message(&BeMessage::NoData)?;
+            }
+
+            FeMessage::Bind(_) => {
+                self.write_message(&BeMessage::BindComplete)?;
+            }
+
+            FeMessage::Close(_) => {
+                self.write_message(&BeMessage::CloseComplete)?;
+            }
+
+            FeMessage::Execute(_) => {
+                let query_string = cstr_to_str(unnamed_query_string)?;
+                trace!("got execute {query_string:?}");
+                if let Err(e) = handler.process_query(self, query_string).await {
+                    log_query_error(query_string, &e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?;
+                }
+                // NOTE there is no ReadyForQuery message. This handler is used
+                // for basebackup and it uses CopyOut which doesn't require
+                // ReadyForQuery message and backend just switches back to
+                // processing mode after sending CopyDone or ErrorResponse.
+            }
+
+            FeMessage::Sync => {
+                self.write_message(&BeMessage::ReadyForQuery)?;
+            }
+
+            FeMessage::Terminate => {
+                return Ok(ProcessMsgResult::Break);
+            }
+
+            // We prefer explicit pattern matching to wildcards, because
+            // this helps us spot the places where new variants are missing
+            FeMessage::CopyData(_) | FeMessage::CopyDone | FeMessage::CopyFail => {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "unexpected message type: {:?}",
+                    msg
+                )));
+            }
+        }
+
+        Ok(ProcessMsgResult::Continue)
+    }
+}
+
+///
+/// A futures::AsyncWrite implementation that wraps all data written to it in CopyData
+/// messages.
+///
+
+pub struct CopyDataWriter<'a> {
+    pgb: &'a mut PostgresBackend,
+}
+
+impl<'a> AsyncWrite for CopyDataWriter<'a> {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, std::io::Error>> {
+        let this = self.get_mut();
+
+        // It's not strictly required to flush between each message, but makes it easier
+        // to view in wireshark, and usually the messages that the callers write are
+        // decently-sized anyway.
+        match ready!(this.pgb.poll_write_buf(cx)) {
+            Ok(()) => {}
+            Err(err) => return Poll::Ready(Err(err)),
+        }
+
+        // CopyData
+        // XXX: if the input is large, we should split it into multiple messages.
+        // Not sure what the threshold should be, but the ultimate hard limit is that
+        // the length cannot exceed u32.
+        this.pgb.write_message(&BeMessage::CopyData(buf))?;
+
+        Poll::Ready(Ok(buf.len()))
+    }
+
+    fn poll_flush(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let this = self.get_mut();
+        match ready!(this.pgb.poll_write_buf(cx)) {
+            Ok(()) => {}
+            Err(err) => return Poll::Ready(Err(err)),
+        }
+        this.pgb.poll_flush(cx)
+    }
+    fn poll_shutdown(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let this = self.get_mut();
+        match ready!(this.pgb.poll_write_buf(cx)) {
+            Ok(()) => {}
+            Err(err) => return Poll::Ready(Err(err)),
+        }
+        this.pgb.poll_flush(cx)
+    }
+}
+
+pub fn short_error(e: &QueryError) -> String {
+    match e {
+        QueryError::Disconnected(connection_error) => connection_error.to_string(),
+        QueryError::Other(e) => format!("{e:#}"),
+    }
+}
+
+pub(super) fn log_query_error(query: &str, e: &QueryError) {
+    match e {
+        QueryError::Disconnected(ConnectionError::Socket(io_error)) => {
+            if is_expected_io_error(io_error) {
+                info!("query handler for '{query}' failed with expected io error: {io_error}");
+            } else {
+                error!("query handler for '{query}' failed with io error: {io_error}");
+            }
+        }
+        QueryError::Disconnected(other_connection_error) => {
+            error!("query handler for '{query}' failed with connection error: {other_connection_error:?}")
+        }
+        QueryError::Other(e) => {
+            error!("query handler for '{query}' failed: {e:?}");
+        }
+    }
+}
--- a/libs/utils/src/sock_split.rs
+++ b/libs/utils/src/sock_split.rs
@@ -0,0 +1,206 @@
+use std::{
+    io::{self, BufReader, Write},
+    net::{Shutdown, TcpStream},
+    sync::Arc,
+};
+
+use rustls::Connection;
+
+/// Wrapper supporting reads of a shared TcpStream.
+pub struct ArcTcpRead(Arc<TcpStream>);
+
+impl io::Read for ArcTcpRead {
+    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
+        (&*self.0).read(buf)
+    }
+}
+
+impl std::ops::Deref for ArcTcpRead {
+    type Target = TcpStream;
+
+    fn deref(&self) -> &Self::Target {
+        self.0.deref()
+    }
+}
+
+/// Wrapper around a TCP Stream supporting buffered reads.
+pub struct BufStream(BufReader<ArcTcpRead>);
+
+impl io::Read for BufStream {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        self.0.read(buf)
+    }
+}
+
+impl io::Write for BufStream {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.get_ref().write(buf)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.get_ref().flush()
+    }
+}
+
+impl BufStream {
+    /// Unwrap into the internal BufReader.
+    fn into_reader(self) -> BufReader<ArcTcpRead> {
+        self.0
+    }
+
+    /// Returns a reference to the underlying TcpStream.
+    fn get_ref(&self) -> &TcpStream {
+        &self.0.get_ref().0
+    }
+}
+
+pub enum ReadStream {
+    Tcp(BufReader<ArcTcpRead>),
+    Tls(rustls_split::ReadHalf),
+}
+
+impl io::Read for ReadStream {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match self {
+            Self::Tcp(reader) => reader.read(buf),
+            Self::Tls(read_half) => read_half.read(buf),
+        }
+    }
+}
+
+impl ReadStream {
+    pub fn shutdown(&mut self, how: Shutdown) -> io::Result<()> {
+        match self {
+            Self::Tcp(stream) => stream.get_ref().shutdown(how),
+            Self::Tls(write_half) => write_half.shutdown(how),
+        }
+    }
+}
+
+pub enum WriteStream {
+    Tcp(Arc<TcpStream>),
+    Tls(rustls_split::WriteHalf),
+}
+
+impl WriteStream {
+    pub fn shutdown(&mut self, how: Shutdown) -> io::Result<()> {
+        match self {
+            Self::Tcp(stream) => stream.shutdown(how),
+            Self::Tls(write_half) => write_half.shutdown(how),
+        }
+    }
+}
+
+impl io::Write for WriteStream {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match self {
+            Self::Tcp(stream) => stream.as_ref().write(buf),
+            Self::Tls(write_half) => write_half.write(buf),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        match self {
+            Self::Tcp(stream) => stream.as_ref().flush(),
+            Self::Tls(write_half) => write_half.flush(),
+        }
+    }
+}
+
+type TlsStream<T> = rustls::StreamOwned<rustls::ServerConnection, T>;
+
+pub enum BidiStream {
+    Tcp(BufStream),
+    /// This variant is boxed, because [`rustls::ServerConnection`] is quite larger than [`BufStream`].
+    Tls(Box<TlsStream<BufStream>>),
+}
+
+impl BidiStream {
+    pub fn from_tcp(stream: TcpStream) -> Self {
+        Self::Tcp(BufStream(BufReader::new(ArcTcpRead(Arc::new(stream)))))
+    }
+
+    pub fn shutdown(&mut self, how: Shutdown) -> io::Result<()> {
+        match self {
+            Self::Tcp(stream) => stream.get_ref().shutdown(how),
+            Self::Tls(tls_boxed) => {
+                if how == Shutdown::Read {
+                    tls_boxed.sock.get_ref().shutdown(how)
+                } else {
+                    tls_boxed.conn.send_close_notify();
+                    let res = tls_boxed.flush();
+                    tls_boxed.sock.get_ref().shutdown(how)?;
+                    res
+                }
+            }
+        }
+    }
+
+    /// Split the bi-directional stream into two owned read and write halves.
+    pub fn split(self) -> (ReadStream, WriteStream) {
+        match self {
+            Self::Tcp(stream) => {
+                let reader = stream.into_reader();
+                let stream: Arc<TcpStream> = reader.get_ref().0.clone();
+
+                (ReadStream::Tcp(reader), WriteStream::Tcp(stream))
+            }
+            Self::Tls(tls_boxed) => {
+                let reader = tls_boxed.sock.into_reader();
+                let buffer_data = reader.buffer().to_owned();
+                let read_buf_cfg = rustls_split::BufCfg::with_data(buffer_data, 8192);
+                let write_buf_cfg = rustls_split::BufCfg::with_capacity(8192);
+
+                // TODO would be nice to avoid the Arc here
+                let socket = Arc::try_unwrap(reader.into_inner().0).unwrap();
+
+                let (read_half, write_half) = rustls_split::split(
+                    socket,
+                    Connection::Server(tls_boxed.conn),
+                    read_buf_cfg,
+                    write_buf_cfg,
+                );
+                (ReadStream::Tls(read_half), WriteStream::Tls(write_half))
+            }
+        }
+    }
+
+    pub fn start_tls(self, mut conn: rustls::ServerConnection) -> io::Result<Self> {
+        match self {
+            Self::Tcp(mut stream) => {
+                conn.complete_io(&mut stream)?;
+                assert!(!conn.is_handshaking());
+                Ok(Self::Tls(Box::new(TlsStream::new(conn, stream))))
+            }
+            Self::Tls { .. } => Err(io::Error::new(
+                io::ErrorKind::InvalidInput,
+                "TLS is already started on this stream",
+            )),
+        }
+    }
+}
+
+impl io::Read for BidiStream {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match self {
+            Self::Tcp(stream) => stream.read(buf),
+            Self::Tls(tls_boxed) => tls_boxed.read(buf),
+        }
+    }
+}
+
+impl io::Write for BidiStream {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match self {
+            Self::Tcp(stream) => stream.write(buf),
+            Self::Tls(tls_boxed) => tls_boxed.write(buf),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        match self {
+            Self::Tcp(stream) => stream.flush(),
+            Self::Tls(tls_boxed) => tls_boxed.flush(),
+        }
+    }
+}
--- a/libs/postgres_backend/tests/cert.pem
+++ b/libs/postgres_backend/tests/cert.pem
--- a/libs/postgres_backend/tests/key.pem
+++ b/libs/postgres_backend/tests/key.pem
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -0,0 +1,238 @@
+use std::{
+    collections::HashMap,
+    io::{Cursor, Read, Write},
+    net::{TcpListener, TcpStream},
+    sync::Arc,
+};
+
+use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use once_cell::sync::Lazy;
+
+use utils::{
+    postgres_backend::{AuthType, Handler, PostgresBackend},
+    postgres_backend_async::QueryError,
+};
+
+fn make_tcp_pair() -> (TcpStream, TcpStream) {
+    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
+    let addr = listener.local_addr().unwrap();
+    let client_stream = TcpStream::connect(addr).unwrap();
+    let (server_stream, _) = listener.accept().unwrap();
+    (server_stream, client_stream)
+}
+
+static KEY: Lazy<rustls::PrivateKey> = Lazy::new(|| {
+    let mut cursor = Cursor::new(include_bytes!("key.pem"));
+    rustls::PrivateKey(rustls_pemfile::rsa_private_keys(&mut cursor).unwrap()[0].clone())
+});
+
+static CERT: Lazy<rustls::Certificate> = Lazy::new(|| {
+    let mut cursor = Cursor::new(include_bytes!("cert.pem"));
+    rustls::Certificate(rustls_pemfile::certs(&mut cursor).unwrap()[0].clone())
+});
+
+#[test]
+// [false-positive](https://github.com/rust-lang/rust-clippy/issues/9274),
+// we resize the vector so doing some modifications after all
+#[allow(clippy::read_zero_byte_vec)]
+fn ssl() {
+    let (mut client_sock, server_sock) = make_tcp_pair();
+
+    const QUERY: &str = "hello world";
+
+    let client_jh = std::thread::spawn(move || {
+        // SSLRequest
+        client_sock.write_u32::<BigEndian>(8).unwrap();
+        client_sock.write_u32::<BigEndian>(80877103).unwrap();
+
+        let ssl_response = client_sock.read_u8().unwrap();
+        assert_eq!(b'S', ssl_response);
+
+        let cfg = rustls::ClientConfig::builder()
+            .with_safe_defaults()
+            .with_root_certificates({
+                let mut store = rustls::RootCertStore::empty();
+                store.add(&CERT).unwrap();
+                store
+            })
+            .with_no_client_auth();
+        let client_config = Arc::new(cfg);
+
+        let dns_name = "localhost".try_into().unwrap();
+        let mut conn = rustls::ClientConnection::new(client_config, dns_name).unwrap();
+
+        conn.complete_io(&mut client_sock).unwrap();
+        assert!(!conn.is_handshaking());
+
+        let mut stream = rustls::Stream::new(&mut conn, &mut client_sock);
+
+        // StartupMessage
+        stream.write_u32::<BigEndian>(9).unwrap();
+        stream.write_u32::<BigEndian>(196608).unwrap();
+        stream.write_u8(0).unwrap();
+        stream.flush().unwrap();
+
+        // wait for ReadyForQuery
+        let mut msg_buf = Vec::new();
+        loop {
+            let msg = stream.read_u8().unwrap();
+            let size = stream.read_u32::<BigEndian>().unwrap() - 4;
+            msg_buf.resize(size as usize, 0);
+            stream.read_exact(&mut msg_buf).unwrap();
+
+            if msg == b'Z' {
+                // ReadyForQuery
+                break;
+            }
+        }
+
+        // Query
+        stream.write_u8(b'Q').unwrap();
+        stream
+            .write_u32::<BigEndian>(4u32 + QUERY.len() as u32)
+            .unwrap();
+        stream.write_all(QUERY.as_ref()).unwrap();
+        stream.flush().unwrap();
+
+        // ReadyForQuery
+        let msg = stream.read_u8().unwrap();
+        assert_eq!(msg, b'Z');
+    });
+
+    struct TestHandler {
+        got_query: bool,
+    }
+    impl Handler for TestHandler {
+        fn process_query(
+            &mut self,
+            _pgb: &mut PostgresBackend,
+            query_string: &str,
+        ) -> Result<(), QueryError> {
+            self.got_query = query_string == QUERY;
+            Ok(())
+        }
+    }
+    let mut handler = TestHandler { got_query: false };
+
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
+        .unwrap();
+    let tls_config = Some(Arc::new(cfg));
+
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config, true).unwrap();
+    pgb.run(&mut handler).unwrap();
+    assert!(handler.got_query);
+
+    client_jh.join().unwrap();
+
+    // TODO consider shutdown behavior
+}
+
+#[test]
+fn no_ssl() {
+    let (mut client_sock, server_sock) = make_tcp_pair();
+
+    let client_jh = std::thread::spawn(move || {
+        let mut buf = BytesMut::new();
+
+        // SSLRequest
+        buf.put_u32(8);
+        buf.put_u32(80877103);
+        client_sock.write_all(&buf).unwrap();
+        buf.clear();
+
+        let ssl_response = client_sock.read_u8().unwrap();
+        assert_eq!(b'N', ssl_response);
+    });
+
+    struct TestHandler;
+
+    impl Handler for TestHandler {
+        fn process_query(
+            &mut self,
+            _pgb: &mut PostgresBackend,
+            _query_string: &str,
+        ) -> Result<(), QueryError> {
+            panic!()
+        }
+    }
+
+    let mut handler = TestHandler;
+
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, None, true).unwrap();
+    pgb.run(&mut handler).unwrap();
+
+    client_jh.join().unwrap();
+}
+
+#[test]
+fn server_forces_ssl() {
+    let (mut client_sock, server_sock) = make_tcp_pair();
+
+    let client_jh = std::thread::spawn(move || {
+        // StartupMessage
+        client_sock.write_u32::<BigEndian>(9).unwrap();
+        client_sock.write_u32::<BigEndian>(196608).unwrap();
+        client_sock.write_u8(0).unwrap();
+        client_sock.flush().unwrap();
+
+        // ErrorResponse
+        assert_eq!(client_sock.read_u8().unwrap(), b'E');
+        let len = client_sock.read_u32::<BigEndian>().unwrap() - 4;
+
+        let mut body = vec![0; len as usize];
+        client_sock.read_exact(&mut body).unwrap();
+        let mut body = Bytes::from(body);
+
+        let mut errors = HashMap::new();
+        loop {
+            let field_type = body.get_u8();
+            if field_type == 0u8 {
+                break;
+            }
+
+            let end_idx = body.iter().position(|&b| b == 0u8).unwrap();
+            let mut value = body.split_to(end_idx + 1);
+            assert_eq!(value[end_idx], 0u8);
+            value.truncate(end_idx);
+            let old = errors.insert(field_type, value);
+            assert!(old.is_none());
+        }
+
+        assert!(!body.has_remaining());
+
+        assert_eq!("must connect with TLS", errors.get(&b'M').unwrap());
+
+        // TODO read failure
+    });
+
+    struct TestHandler;
+    impl Handler for TestHandler {
+        fn process_query(
+            &mut self,
+            _pgb: &mut PostgresBackend,
+            _query_string: &str,
+        ) -> Result<(), QueryError> {
+            panic!()
+        }
+    }
+    let mut handler = TestHandler;
+
+    let cfg = rustls::ServerConfig::builder()
+        .with_safe_defaults()
+        .with_no_client_auth()
+        .with_single_cert(vec![CERT.clone()], KEY.clone())
+        .unwrap();
+    let tls_config = Some(Arc::new(cfg));
+
+    let pgb = PostgresBackend::new(server_sock, AuthType::Trust, tls_config, true).unwrap();
+    let res = pgb.run(&mut handler).unwrap_err();
+    assert_eq!("client did not connect with TLS", format!("{}", res));
+
+    client_jh.join().unwrap();
+
+    // TODO consider shutdown behavior
+}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -23,7 +23,6 @@ const_format.workspace = true
 consumption_metrics.workspace = true
 crc32c.workspace = true
 crossbeam-utils.workspace = true
-either.workspace = true
 fail.workspace = true
 futures.workspace = true
 git-version.workspace = true
@@ -37,7 +36,6 @@ num-traits.workspace = true
 once_cell.workspace = true
 pin-project-lite.workspace = true
 postgres.workspace = true
-postgres_backend.workspace = true
 postgres-protocol.workspace = true
 postgres-types.workspace = true
 rand.workspace = true
@@ -53,7 +51,7 @@ thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
 tokio-postgres.workspace = true
 tokio-util.workspace = true
-toml_edit = { workspace = true, features = [ "serde" ] }
+toml_edit.workspace = true
 tracing.workspace = true
 url.workspace = true
 walkdir.workspace = true
@@ -69,10 +67,6 @@ utils.workspace = true
 workspace_hack.workspace = true
 reqwest.workspace = true
 rpds.workspace = true
-enum-map.workspace = true
-enumset.workspace = true
-strum.workspace = true
-strum_macros.workspace = true

 [dev-dependencies]
 criterion.workspace = true
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -1,7 +1,8 @@
 use pageserver::keyspace::{KeyPartitioning, KeySpace};
 use pageserver::repository::Key;
 use pageserver::tenant::layer_map::LayerMap;
-use pageserver::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
+use pageserver::tenant::storage_layer::Layer;
+use pageserver::tenant::storage_layer::{DeltaFileName, ImageFileName, LayerDescriptor};
 use rand::prelude::{SeedableRng, SliceRandom, StdRng};
 use std::cmp::{max, min};
 use std::fs::File;
@@ -25,15 +26,30 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {

    let mut updates = layer_map.batch_update();
    for fname in filenames {
-        let fname = fname.unwrap();
-        let fname = LayerFileName::from_str(&fname).unwrap();
-        let layer = LayerDescriptor::from(fname);
-
-        let lsn_range = layer.get_lsn_range();
-        min_lsn = min(min_lsn, lsn_range.start);
-        max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));
-
-        updates.insert_historic(Arc::new(layer));
+        let fname = &fname.unwrap();
+        if let Some(imgfilename) = ImageFileName::parse_str(fname) {
+            let layer = LayerDescriptor {
+                key: imgfilename.key_range,
+                lsn: imgfilename.lsn..(imgfilename.lsn + 1),
+                is_incremental: false,
+                short_id: fname.to_string(),
+            };
+            updates.insert_historic(Arc::new(layer));
+            min_lsn = min(min_lsn, imgfilename.lsn);
+            max_lsn = max(max_lsn, imgfilename.lsn);
+        } else if let Some(deltafilename) = DeltaFileName::parse_str(fname) {
+            let layer = LayerDescriptor {
+                key: deltafilename.key_range.clone(),
+                lsn: deltafilename.lsn_range.clone(),
+                is_incremental: true,
+                short_id: fname.to_string(),
+            };
+            updates.insert_historic(Arc::new(layer));
+            min_lsn = min(min_lsn, deltafilename.lsn_range.start);
+            max_lsn = max(max_lsn, deltafilename.lsn_range.end);
+        } else {
+            panic!("unexpected filename {fname}");
+        }
    }

    println!("min: {min_lsn}, max: {max_lsn}");
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -33,7 +33,6 @@ use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
 use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PGDATA_SUBDIRS, PG_HBA};
-use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM};
 use postgres_ffi::TransactionId;
 use postgres_ffi::XLogFileName;
 use postgres_ffi::PG_TLI;
@@ -191,31 +190,14 @@ where
        {
            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

-            // If full backup is requested, include all relation files.
-            // Otherwise only include init forks of unlogged relations.
-            let rels = self
-                .timeline
-                .list_rels(spcnode, dbnode, self.lsn, self.ctx)
-                .await?;
-            for &rel in rels.iter() {
-                // Send init fork as main fork to provide well formed empty
-                // contents of UNLOGGED relations. Postgres copies it in
-                // `reinit.c` during recovery.
-                if rel.forknum == INIT_FORKNUM {
-                    // I doubt we need _init fork itself, but having it at least
-                    // serves as a marker relation is unlogged.
-                    self.add_rel(rel, rel).await?;
-                    self.add_rel(rel, rel.with_forknum(MAIN_FORKNUM)).await?;
-                    continue;
-                }
-
-                if self.full_backup {
-                    if rel.forknum == MAIN_FORKNUM && rels.contains(&rel.with_forknum(INIT_FORKNUM))
-                    {
-                        // skip this, will include it when we reach the init fork
-                        continue;
-                    }
-                    self.add_rel(rel, rel).await?;
+            // Gather and send relational files in each database if full backup is requested.
+            if self.full_backup {
+                for rel in self
+                    .timeline
+                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                    .await?
+                {
+                    self.add_rel(rel).await?;
                }
            }
        }
@@ -238,16 +220,15 @@ where
        Ok(())
    }

-    /// Add contents of relfilenode `src`, naming it as `dst`.
-    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> {
+    async fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
        let nblocks = self
            .timeline
-            .get_rel_size(src, self.lsn, false, self.ctx)
+            .get_rel_size(tag, self.lsn, false, self.ctx)
            .await?;

        // If the relation is empty, create an empty file
        if nblocks == 0 {
-            let file_name = dst.to_segfile_name(0);
+            let file_name = tag.to_segfile_name(0);
            let header = new_tar_header(&file_name, 0)?;
            self.ar.append(&header, &mut io::empty()).await?;
            return Ok(());
@@ -263,12 +244,12 @@ where
            for blknum in startblk..endblk {
                let img = self
                    .timeline
-                    .get_rel_page_at_lsn(src, blknum, self.lsn, false, self.ctx)
+                    .get_rel_page_at_lsn(tag, blknum, self.lsn, false, self.ctx)
                    .await?;
                segment_data.extend_from_slice(&img[..]);
            }

-            let file_name = dst.to_segfile_name(seg as u32);
+            let file_name = tag.to_segfile_name(seg as u32);
            let header = new_tar_header(&file_name, segment_data.len() as u64)?;
            self.ar.append(&header, segment_data.as_slice()).await?;

--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -7,7 +7,6 @@ use std::{env, ops::ControlFlow, path::Path, str::FromStr};
 use anyhow::{anyhow, Context};
 use clap::{Arg, ArgAction, Command};
 use fail::FailScenario;
-use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp};
 use remote_storage::GenericRemoteStorage;
 use tracing::*;

@@ -23,10 +22,11 @@ use pageserver::{
    tenant::mgr,
    virtual_file,
 };
-use postgres_backend::AuthType;
 use utils::{
    auth::JwtAuth,
-    logging, project_git_version,
+    logging,
+    postgres_backend::AuthType,
+    project_git_version,
    sentry_init::init_sentry,
    signals::{self, Signal},
    tcp_listener,
@@ -52,8 +52,6 @@ fn version() -> String {
 }

 fn main() -> anyhow::Result<()> {
-    let launch_ts = Box::leak(Box::new(LaunchTimestamp::generate()));
-
    let arg_matches = cli().get_matches();

    if arg_matches.get_flag("enabled-features") {
@@ -87,13 +85,6 @@ fn main() -> anyhow::Result<()> {
        }
    };

-    // Initialize logging, which must be initialized before the custom panic hook is installed.
-    logging::init(conf.log_format)?;
-
-    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
-    // disarming this hook on pageserver, because we never tear down tracing.
-    logging::replace_panic_hook_with_tracing_panic_hook().forget();
-
    // initialize sentry if SENTRY_DSN is provided
    let _sentry_guard = init_sentry(
        Some(GIT_VERSION.into()),
@@ -117,7 +108,7 @@ fn main() -> anyhow::Result<()> {
    virtual_file::init(conf.max_file_descriptors);
    page_cache::init(conf.page_cache_size);

-    start_pageserver(launch_ts, conf).context("Failed to start pageserver")?;
+    start_pageserver(conf).context("Failed to start pageserver")?;

    scenario.teardown();
    Ok(())
@@ -212,21 +203,13 @@ fn initialize_config(
    })
 }

-fn start_pageserver(
-    launch_ts: &'static LaunchTimestamp,
-    conf: &'static PageServerConf,
-) -> anyhow::Result<()> {
-    // Print version and launch timestamp to the log,
-    // and expose them as prometheus metrics.
-    // A changed version string indicates changed software.
-    // A changed launch timestamp indicates a pageserver restart.
-    info!(
-        "version: {} launch_timestamp: {}",
-        version(),
-        launch_ts.to_string()
-    );
+fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
+    // Initialize logging
+    logging::init(conf.log_format)?;
+
+    // Print version to the log, and expose it as a prometheus metric too.
+    info!("version: {}", version());
    set_build_info_metric(GIT_VERSION);
-    set_launch_timestamp_metric(launch_ts);

    // If any failpoints were set from FAILPOINTS environment variable,
    // print them to the log for debugging purposes
@@ -324,7 +307,7 @@ fn start_pageserver(
    {
        let _rt_guard = MGMT_REQUEST_RUNTIME.enter();

-        let router = http::make_router(conf, launch_ts, auth.clone(), remote_storage)?
+        let router = http::make_router(conf, auth.clone(), remote_storage)?
            .build()
            .map_err(|err| anyhow!(err))?;
        let service = utils::http::RouterService::new(router).unwrap();
@@ -364,7 +347,6 @@ fn start_pageserver(
                    pageserver::consumption_metrics::collect_metrics(
                        metric_collection_endpoint,
                        conf.metric_collection_interval,
-                        conf.cached_metric_collection_interval,
                        conf.synthetic_size_calculation_interval,
                        conf.id,
                        metrics_ctx,
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -21,10 +21,10 @@ use std::time::Duration;
 use toml_edit;
 use toml_edit::{Document, Item};

-use postgres_backend::AuthType;
 use utils::{
    id::{NodeId, TenantId, TimelineId},
    logging::LogFormat,
+    postgres_backend::AuthType,
 };

 use crate::tenant::config::TenantConf;
@@ -58,7 +58,6 @@ pub mod defaults {
        super::ConfigurableSemaphore::DEFAULT_INITIAL.get();

    pub const DEFAULT_METRIC_COLLECTION_INTERVAL: &str = "10 min";
-    pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour";
    pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";

@@ -86,7 +85,6 @@ pub mod defaults {
 #concurrent_tenant_size_logical_size_queries = '{DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES}'

 #metric_collection_interval = '{DEFAULT_METRIC_COLLECTION_INTERVAL}'
-#cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
 #synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}'

 # [tenant_config]
@@ -156,8 +154,6 @@ pub struct PageServerConf {

    // How often to collect metrics and send them to the metrics endpoint.
    pub metric_collection_interval: Duration,
-    // How often to send unchanged cached metrics to the metrics endpoint.
-    pub cached_metric_collection_interval: Duration,
    pub metric_collection_endpoint: Option<Url>,
    pub synthetic_size_calculation_interval: Duration,

@@ -224,7 +220,6 @@ struct PageServerConfigBuilder {
    concurrent_tenant_size_logical_size_queries: BuilderValue<ConfigurableSemaphore>,

    metric_collection_interval: BuilderValue<Duration>,
-    cached_metric_collection_interval: BuilderValue<Duration>,
    metric_collection_endpoint: BuilderValue<Option<Url>>,
    synthetic_size_calculation_interval: BuilderValue<Duration>,

@@ -269,10 +264,6 @@ impl Default for PageServerConfigBuilder {
                DEFAULT_METRIC_COLLECTION_INTERVAL,
            )
            .expect("cannot parse default metric collection interval")),
-            cached_metric_collection_interval: Set(humantime::parse_duration(
-                DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL,
-            )
-            .expect("cannot parse default cached_metric_collection_interval")),
            synthetic_size_calculation_interval: Set(humantime::parse_duration(
                DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL,
            )
@@ -362,14 +353,6 @@ impl PageServerConfigBuilder {
        self.metric_collection_interval = BuilderValue::Set(metric_collection_interval)
    }

-    pub fn cached_metric_collection_interval(
-        &mut self,
-        cached_metric_collection_interval: Duration,
-    ) {
-        self.cached_metric_collection_interval =
-            BuilderValue::Set(cached_metric_collection_interval)
-    }
-
    pub fn metric_collection_endpoint(&mut self, metric_collection_endpoint: Option<Url>) {
        self.metric_collection_endpoint = BuilderValue::Set(metric_collection_endpoint)
    }
@@ -444,9 +427,6 @@ impl PageServerConfigBuilder {
            metric_collection_interval: self
                .metric_collection_interval
                .ok_or(anyhow!("missing metric_collection_interval"))?,
-            cached_metric_collection_interval: self
-                .cached_metric_collection_interval
-                .ok_or(anyhow!("missing cached_metric_collection_interval"))?,
            metric_collection_endpoint: self
                .metric_collection_endpoint
                .ok_or(anyhow!("missing metric_collection_endpoint"))?,
@@ -632,7 +612,6 @@ impl PageServerConf {
                    ConfigurableSemaphore::new(permits)
                }),
                "metric_collection_interval" => builder.metric_collection_interval(parse_toml_duration(key, item)?),
-                "cached_metric_collection_interval" => builder.cached_metric_collection_interval(parse_toml_duration(key, item)?),
                "metric_collection_endpoint" => {
                    let endpoint = parse_toml_string(key, item)?.parse().context("failed to parse metric_collection_endpoint")?;
                    builder.metric_collection_endpoint(Some(endpoint));
@@ -731,13 +710,6 @@ impl PageServerConf {
                })?);
        }

-        if let Some(eviction_policy) = item.get("eviction_policy") {
-            t_conf.eviction_policy = Some(
-                toml_edit::de::from_item(eviction_policy.clone())
-                    .context("parse eviction_policy")?,
-            );
-        }
-
        Ok(t_conf)
    }

@@ -769,7 +741,6 @@ impl PageServerConf {
            log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
            concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
            metric_collection_interval: Duration::from_secs(60),
-            cached_metric_collection_interval: Duration::from_secs(60 * 60),
            metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
            synthetic_size_calculation_interval: Duration::from_secs(60),
            test_remote_failures: 0,
@@ -910,7 +881,6 @@ initial_superuser_name = 'zzzz'
 id = 10

 metric_collection_interval = '222 s'
-cached_metric_collection_interval = '22200 s'
 metric_collection_endpoint = 'http://localhost:80/metrics'
 synthetic_size_calculation_interval = '333 s'
 log_format = 'json'
@@ -958,9 +928,6 @@ log_format = 'json'
                metric_collection_interval: humantime::parse_duration(
                    defaults::DEFAULT_METRIC_COLLECTION_INTERVAL
                )?,
-                cached_metric_collection_interval: humantime::parse_duration(
-                    defaults::DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL
-                )?,
                metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
                synthetic_size_calculation_interval: humantime::parse_duration(
                    defaults::DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL
@@ -1011,7 +978,6 @@ log_format = 'json'
                log_format: LogFormat::Json,
                concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
                metric_collection_interval: Duration::from_secs(222),
-                cached_metric_collection_interval: Duration::from_secs(22200),
                metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
                synthetic_size_calculation_interval: Duration::from_secs(333),
                test_remote_failures: 0,
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -25,7 +25,7 @@ const REMOTE_STORAGE_SIZE: &str = "remote_storage_size";
 const TIMELINE_LOGICAL_SIZE: &str = "timeline_logical_size";

 #[serde_as]
-#[derive(Serialize, Debug)]
+#[derive(Serialize)]
 struct Ids {
    #[serde_as(as = "DisplayFromStr")]
    tenant_id: TenantId,
@@ -46,12 +46,12 @@ pub struct PageserverConsumptionMetricsKey {
 pub async fn collect_metrics(
    metric_collection_endpoint: &Url,
    metric_collection_interval: Duration,
-    cached_metric_collection_interval: Duration,
    synthetic_size_calculation_interval: Duration,
    node_id: NodeId,
    ctx: RequestContext,
 ) -> anyhow::Result<()> {
    let mut ticker = tokio::time::interval(metric_collection_interval);
+
    info!("starting collect_metrics");

    // spin up background worker that caclulates tenant sizes
@@ -75,7 +75,6 @@ pub async fn collect_metrics(
    // define client here to reuse it for all requests
    let client = reqwest::Client::new();
    let mut cached_metrics: HashMap<PageserverConsumptionMetricsKey, u64> = HashMap::new();
-    let mut prev_iteration_time: std::time::Instant = std::time::Instant::now();

    loop {
        tokio::select! {
@@ -84,15 +83,10 @@ pub async fn collect_metrics(
                return Ok(());
            },
            _ = ticker.tick() => {
-
-                // send cached metrics every cached_metric_collection_interval
-                let send_cached = prev_iteration_time.elapsed() >= cached_metric_collection_interval;
-
-                if send_cached {
-                    prev_iteration_time = std::time::Instant::now();
+                if let Err(err) = collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx).await
+                {
+                    error!("metrics collection failed: {err:?}");
                }
-
-                collect_metrics_iteration(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &ctx, send_cached).await;
            }
        }
    }
@@ -103,19 +97,17 @@ pub async fn collect_metrics(
 /// Gather per-tenant and per-timeline metrics and send them to the `metric_collection_endpoint`.
 /// Cache metrics to avoid sending the same metrics multiple times.
 ///
-/// This function handles all errors internally
-/// and doesn't break iteration if just one tenant fails.
-///
 /// TODO
 /// - refactor this function (chunking+sending part) to reuse it in proxy module;
+/// - improve error handling. Now if one tenant fails to collect metrics,
+/// the whole iteration fails and metrics for other tenants are not collected.
 pub async fn collect_metrics_iteration(
    client: &reqwest::Client,
    cached_metrics: &mut HashMap<PageserverConsumptionMetricsKey, u64>,
    metric_collection_endpoint: &reqwest::Url,
    node_id: NodeId,
    ctx: &RequestContext,
-    send_cached: bool,
-) {
+) -> anyhow::Result<()> {
    let mut current_metrics: Vec<(PageserverConsumptionMetricsKey, u64)> = Vec::new();
    trace!(
        "starting collect_metrics_iteration. metric_collection_endpoint: {}",
@@ -123,13 +115,7 @@ pub async fn collect_metrics_iteration(
    );

    // get list of tenants
-    let tenants = match mgr::list_tenants().await {
-        Ok(tenants) => tenants,
-        Err(err) => {
-            error!("failed to list tenants: {:?}", err);
-            return;
-        }
-    };
+    let tenants = mgr::list_tenants().await?;

    // iterate through list of Active tenants and collect metrics
    for (tenant_id, tenant_state) in tenants {
@@ -137,15 +123,7 @@ pub async fn collect_metrics_iteration(
            continue;
        }

-        let tenant = match mgr::get_tenant(tenant_id, true).await {
-            Ok(tenant) => tenant,
-            Err(err) => {
-                // It is possible that tenant was deleted between
-                // `list_tenants` and `get_tenant`, so just warn about it.
-                warn!("failed to get tenant {tenant_id:?}: {err:?}");
-                continue;
-            }
-        };
+        let tenant = mgr::get_tenant(tenant_id, true).await?;

        let mut tenant_resident_size = 0;

@@ -164,51 +142,29 @@ pub async fn collect_metrics_iteration(
                    timeline_written_size,
                ));

-                match timeline.get_current_logical_size(ctx) {
-                    // Only send timeline logical size when it is fully calculated.
-                    Ok((size, is_exact)) if is_exact => {
-                        current_metrics.push((
-                            PageserverConsumptionMetricsKey {
-                                tenant_id,
-                                timeline_id: Some(timeline.timeline_id),
-                                metric: TIMELINE_LOGICAL_SIZE,
-                            },
-                            size,
-                        ));
-                    }
-                    Ok((_, _)) => {}
-                    Err(err) => {
-                        error!(
-                            "failed to get current logical size for timeline {}: {err:?}",
-                            timeline.timeline_id
-                        );
-                        continue;
-                    }
-                };
+                let (timeline_logical_size, is_exact) = timeline.get_current_logical_size(ctx)?;
+                // Only send timeline logical size when it is fully calculated.
+                if is_exact {
+                    current_metrics.push((
+                        PageserverConsumptionMetricsKey {
+                            tenant_id,
+                            timeline_id: Some(timeline.timeline_id),
+                            metric: TIMELINE_LOGICAL_SIZE,
+                        },
+                        timeline_logical_size,
+                    ));
+                }
            }

            let timeline_resident_size = timeline.get_resident_physical_size();
            tenant_resident_size += timeline_resident_size;
        }

-        match tenant.get_remote_size().await {
-            Ok(tenant_remote_size) => {
-                current_metrics.push((
-                    PageserverConsumptionMetricsKey {
-                        tenant_id,
-                        timeline_id: None,
-                        metric: REMOTE_STORAGE_SIZE,
-                    },
-                    tenant_remote_size,
-                ));
-            }
-            Err(err) => {
-                error!(
-                    "failed to get remote size for tenant {}: {err:?}",
-                    tenant_id
-                );
-            }
-        }
+        let tenant_remote_size = tenant.get_remote_size().await?;
+        debug!(
+            "collected current metrics for tenant: {}: state={:?} resident_size={} remote_size={}",
+            tenant_id, tenant_state, tenant_resident_size, tenant_remote_size
+        );

        current_metrics.push((
            PageserverConsumptionMetricsKey {
@@ -219,6 +175,15 @@ pub async fn collect_metrics_iteration(
            tenant_resident_size,
        ));

+        current_metrics.push((
+            PageserverConsumptionMetricsKey {
+                tenant_id,
+                timeline_id: None,
+                metric: REMOTE_STORAGE_SIZE,
+            },
+            tenant_remote_size,
+        ));
+
        // Note that this metric is calculated in a separate bgworker
        // Here we only use cached value, which may lag behind the real latest one
        let tenant_synthetic_size = tenant.get_cached_synthetic_size();
@@ -232,18 +197,15 @@ pub async fn collect_metrics_iteration(
        ));
    }

-    // Filter metrics, unless we want to send all metrics, including cached ones.
-    // See: https://github.com/neondatabase/neon/issues/3485
-    if !send_cached {
-        current_metrics.retain(|(curr_key, curr_val)| match cached_metrics.get(curr_key) {
-            Some(val) => val != curr_val,
-            None => true,
-        });
-    }
+    // Filter metrics
+    current_metrics.retain(|(curr_key, curr_val)| match cached_metrics.get(curr_key) {
+        Some(val) => val != curr_val,
+        None => true,
+    });

    if current_metrics.is_empty() {
        trace!("no new metrics to send");
-        return;
+        return Ok(());
    }

    // Send metrics.
@@ -287,12 +249,6 @@ pub async fn collect_metrics_iteration(
                    }
                } else {
                    error!("metrics endpoint refused the sent metrics: {:?}", res);
-                    for metric in chunk_to_send.iter() {
-                        // Report if the metric value is suspiciously large
-                        if metric.value > (1u64 << 40) {
-                            error!("potentially abnormal metric value: {:?}", metric);
-                        }
-                    }
                }
            }
            Err(err) => {
@@ -300,6 +256,8 @@ pub async fn collect_metrics_iteration(
            }
        }
    }
+
+    Ok(())
 }

 /// Caclculate synthetic size for each active tenant
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -437,13 +437,6 @@ paths:
          type: boolean
        description: |
          When true, skip calculation and only provide the model inputs (for debugging). Defaults to false.
-      - name: retention_period
-        in: query
-        required: false
-        schema:
-          type: integer
-        description: |
-          Override the default retention period (in bytes) used for size calculation.
    get:
      description: |
        Calculate tenant's size, which is a mixture of WAL (bytes) and logical_size (bytes).
@@ -671,55 +664,6 @@ paths:
            application/json:
              schema:
                $ref: "#/components/schemas/Error"
-  /v1/tenant/{tenant_id}/config/:
-    parameters:
-      - name: tenant_id
-        in: path
-        required: true
-        schema:
-          type: string
-          format: hex
-    get:
-      description: |
-        Returns tenant's config description: specific config overrides a tenant has
-        and the effective config.
-      responses:
-        "200":
-          description: Tenant config, specific and effective
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/TenantConfig"
-        "400":
-          description: Malformed get tenanant config request
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
-        "401":
-          description: Unauthorized Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/UnauthorizedError"
-        "403":
-          description: Forbidden Error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/ForbiddenError"
-        "404":
-          description: Tenand or timeline were not found
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/NotFoundError"
-        "500":
-          description: Generic operation error
-          content:
-            application/json:
-              schema:
-                $ref: "#/components/schemas/Error"
 components:
  securitySchemes:
    JWT:
@@ -780,33 +724,10 @@ components:
          type: integer
        checkpoint_timeout:
          type: string
-        compaction_target_size:
-          type: integer
        compaction_period:
          type: string
        compaction_threshold:
          type: string
-        image_creation_threshold:
-          type: integer
-        walreceiver_connect_timeout:
-          type: string
-        lagging_wal_timeout:
-          type: string
-        max_lsn_wal_lag:
-          type: integer
-        trace_read_requests:
-          type: boolean
-    TenantConfig:
-      type: object
-      properties:
-        tenant_specific_overrides:
-          type: object
-          schema:
-            $ref: "#/components/schemas/TenantConfigInfo"
-        effective_config:
-          type: object
-          schema:
-            $ref: "#/components/schemas/TenantConfigInfo"
    TimelineInfo:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1,28 +1,23 @@
-use std::collections::HashMap;
 use std::sync::Arc;

 use anyhow::{anyhow, Context, Result};
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
-use metrics::launch_timestamp::LaunchTimestamp;
 use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest;
 use remote_storage::GenericRemoteStorage;
-use tenant_size_model::{SizeResult, StorageModel};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
+use utils::http::request::{must_get_query_param, parse_query_param};

 use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
-    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
+    TimelineCreateRequest, TimelineInfo,
 };
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::mgr::TenantMapInsertError;
-use crate::tenant::size::ModelInputs;
-use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::{PageReconstructError, Timeline};
 use crate::{config::PageServerConf, tenant::mgr};
 use utils::{
@@ -40,7 +35,7 @@ use utils::{

 // Imports only used for testing APIs
 #[cfg(feature = "testing")]
-use super::models::ConfigureFailpointsRequest;
+use super::models::{ConfigureFailpointsRequest, TimelineGcRequest};

 struct State {
    conf: &'static PageServerConf,
@@ -322,7 +317,10 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = mgr::get_tenant(tenant_id, true)
+        .await
+        .and_then(|tenant| tenant.get_timeline(timeline_id, true))
+        .map_err(ApiError::NotFound)?;
    let result = timeline
        .find_lsn_for_timestamp(timestamp_pg, &ctx)
        .await
@@ -481,19 +479,11 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
 /// to debug any of the calculations. Requires `tenant_id` request parameter, supports
 /// `inputs_only=true|false` (default false) which supports debugging failure to calculate model
 /// values.
-///
-/// 'retention_period' query parameter overrides the cutoff that is used to calculate the size
-/// (only if it is shorter than the real cutoff).
-///
-/// Note: we don't update the cached size and prometheus metric here.
-/// The retention period might be different, and it's nice to have a method to just calculate it
-/// without modifying anything anyway.
 async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;
+
    let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
-    let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
-    let headers = request.headers();

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
    let tenant = mgr::get_tenant(tenant_id, true)
@@ -502,29 +492,20 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A

    // this can be long operation
    let inputs = tenant
-        .gather_size_inputs(retention_period, &ctx)
+        .gather_size_inputs(&ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

-    let mut sizes = None;
-    if !inputs_only.unwrap_or(false) {
-        let storage_model = inputs
-            .calculate_model()
-            .map_err(ApiError::InternalServerError)?;
-        let size = storage_model.calculate();
+    let size = if !inputs_only.unwrap_or(false) {
+        Some(inputs.calculate().map_err(ApiError::InternalServerError)?)
+    } else {
+        None
+    };

-        // If request header expects html, return html
-        if headers["Accept"] == "text/html" {
-            return synthetic_size_html_response(inputs, storage_model, size);
-        }
-        sizes = Some(size);
-    } else if headers["Accept"] == "text/html" {
-        return Err(ApiError::BadRequest(anyhow!(
-            "inputs_only parameter is incompatible with html output request"
-        )));
-    }
-
-    /// The type resides in the pageserver not to expose `ModelInputs`.
+    /// Private response type with the additional "unstable" `inputs` field.
+    ///
+    /// The type is described with `id` and `size` in the openapi_spec file, but the `inputs` is
+    /// intentionally left out. The type resides in the pageserver not to expose `ModelInputs`.
    #[serde_with::serde_as]
    #[derive(serde::Serialize)]
    struct TenantHistorySize {
@@ -534,9 +515,6 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
        ///
        /// Will be none if `?inputs_only=true` was given.
        size: Option<u64>,
-        /// Size of each segment used in the model.
-        /// Will be null if `?inputs_only=true` was given.
-        segment_sizes: Option<Vec<tenant_size_model::SegmentSizeResult>>,
        inputs: crate::tenant::size::ModelInputs,
    }

@@ -544,128 +522,12 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
        StatusCode::OK,
        TenantHistorySize {
            id: tenant_id,
-            size: sizes.as_ref().map(|x| x.total_size),
-            segment_sizes: sizes.map(|x| x.segments),
+            size,
            inputs,
        },
    )
 }

-async fn layer_map_info_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    let reset: LayerAccessStatsReset =
-        parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);
-
-    check_permission(&request, Some(tenant_id))?;
-
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let layer_map_info = timeline.layer_map_info(reset);
-
-    json_response(StatusCode::OK, layer_map_info)
-}
-
-async fn layer_download_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    let layer_file_name = get_request_param(&request, "layer_file_name")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let downloaded = timeline
-        .download_layer(layer_file_name)
-        .await
-        .map_err(ApiError::InternalServerError)?;
-
-    match downloaded {
-        Some(true) => json_response(StatusCode::OK, ()),
-        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
-        None => json_response(
-            StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
-        ),
-    }
-}
-
-async fn evict_timeline_layer_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    let layer_file_name = get_request_param(&request, "layer_file_name")?;
-
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
-    let evicted = timeline
-        .evict_layer(layer_file_name)
-        .await
-        .map_err(ApiError::InternalServerError)?;
-
-    match evicted {
-        Some(true) => json_response(StatusCode::OK, ()),
-        Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
-        None => json_response(
-            StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
-        ),
-    }
-}
-
-/// Get tenant_size SVG graph along with the JSON data.
-fn synthetic_size_html_response(
-    inputs: ModelInputs,
-    storage_model: StorageModel,
-    sizes: SizeResult,
-) -> Result<Response<Body>, ApiError> {
-    let mut timeline_ids: Vec<String> = Vec::new();
-    let mut timeline_map: HashMap<TimelineId, usize> = HashMap::new();
-    for (index, ti) in inputs.timeline_inputs.iter().enumerate() {
-        timeline_map.insert(ti.timeline_id, index);
-        timeline_ids.push(ti.timeline_id.to_string());
-    }
-    let seg_to_branch: Vec<usize> = inputs
-        .segments
-        .iter()
-        .map(|seg| *timeline_map.get(&seg.timeline_id).unwrap())
-        .collect();
-
-    let svg =
-        tenant_size_model::svg::draw_svg(&storage_model, &timeline_ids, &seg_to_branch, &sizes)
-            .map_err(ApiError::InternalServerError)?;
-
-    let mut response = String::new();
-
-    use std::fmt::Write;
-    write!(response, "<html>\n<body>\n").unwrap();
-    write!(response, "<div>\n{svg}\n</div>").unwrap();
-    writeln!(response, "Project size: {}", sizes.total_size).unwrap();
-    writeln!(response, "<pre>").unwrap();
-    writeln!(
-        response,
-        "{}",
-        serde_json::to_string_pretty(&inputs).unwrap()
-    )
-    .unwrap();
-    writeln!(
-        response,
-        "{}",
-        serde_json::to_string_pretty(&sizes.segments).unwrap()
-    )
-    .unwrap();
-    writeln!(response, "</pre>").unwrap();
-    write!(response, "</body>\n</html>\n").unwrap();
-
-    html_response(StatusCode::OK, response)
-}
-
-pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>, ApiError> {
-    let response = Response::builder()
-        .status(status)
-        .header(hyper::header::CONTENT_TYPE, "text/html")
-        .body(Body::from(data.as_bytes().to_vec()))
-        .map_err(|e| ApiError::InternalServerError(e.into()))?;
-    Ok(response)
-}
-
 // Helper function to standardize the error messages we produce on bad durations
 //
 // Intended to be used with anyhow's `with_context`, e.g.:
@@ -782,40 +644,12 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
    )
 }

-async fn get_tenant_config_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    let tenant = mgr::get_tenant(tenant_id, false)
-        .await
-        .map_err(ApiError::NotFound)?;
-
-    let response = HashMap::from([
-        (
-            "tenant_specific_overrides",
-            serde_json::to_value(tenant.tenant_specific_overrides())
-                .context("serializing tenant specific overrides")
-                .map_err(ApiError::InternalServerError)?,
-        ),
-        (
-            "effective_config",
-            serde_json::to_value(tenant.effective_config())
-                .context("serializing effective config")
-                .map_err(ApiError::InternalServerError)?,
-        ),
-    ]);
-
-    json_response(StatusCode::OK, response)
-}
-
-async fn update_tenant_config_handler(
-    mut request: Request<Body>,
-) -> Result<Response<Body>, ApiError> {
+async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let request_data: TenantConfigRequest = json_request(&mut request).await?;
    let tenant_id = request_data.tenant_id;
    check_permission(&request, Some(tenant_id))?;

-    let mut tenant_conf = TenantConfOpt::default();
+    let mut tenant_conf: TenantConfOpt = Default::default();
    if let Some(gc_period) = request_data.gc_period {
        tenant_conf.gc_period = Some(
            humantime::parse_duration(&gc_period)
@@ -850,8 +684,12 @@ async fn update_tenant_config_handler(
                .map_err(ApiError::BadRequest)?,
        );
    }
-    tenant_conf.max_lsn_wal_lag = request_data.max_lsn_wal_lag;
-    tenant_conf.trace_read_requests = request_data.trace_read_requests;
+    if let Some(max_lsn_wal_lag) = request_data.max_lsn_wal_lag {
+        tenant_conf.max_lsn_wal_lag = Some(max_lsn_wal_lag);
+    }
+    if let Some(trace_read_requests) = request_data.trace_read_requests {
+        tenant_conf.trace_read_requests = Some(trace_read_requests);
+    }

    tenant_conf.checkpoint_distance = request_data.checkpoint_distance;
    if let Some(checkpoint_timeout) = request_data.checkpoint_timeout {
@@ -872,16 +710,8 @@ async fn update_tenant_config_handler(
        );
    }

-    if let Some(eviction_policy) = request_data.eviction_policy {
-        tenant_conf.eviction_policy = Some(
-            serde_json::from_value(eviction_policy)
-                .context("parse field `eviction_policy`")
-                .map_err(ApiError::BadRequest)?,
-        );
-    }
-
    let state = get_state(&request);
-    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
+    mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
        .await
        // FIXME: `update_tenant_config` can fail because of both user and internal errors.
@@ -925,6 +755,7 @@ async fn failpoints_handler(mut request: Request<Body>) -> Result<Response<Body>
 }

 // Run GC immediately on given timeline.
+#[cfg(feature = "testing")]
 async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
@@ -973,7 +804,12 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
    check_permission(&request, Some(tenant_id))?;

    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let tenant = mgr::get_tenant(tenant_id, true)
+        .await
+        .map_err(ApiError::NotFound)?;
+    let timeline = tenant
+        .get_timeline(timeline_id, true)
+        .map_err(ApiError::NotFound)?;
    timeline
        .freeze_and_flush()
        .await
@@ -994,7 +830,12 @@ async fn timeline_download_remote_layers_handler_post(
    let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
    check_permission(&request, Some(tenant_id))?;

-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let tenant = mgr::get_tenant(tenant_id, true)
+        .await
+        .map_err(ApiError::NotFound)?;
+    let timeline = tenant
+        .get_timeline(timeline_id, true)
+        .map_err(ApiError::NotFound)?;
    match timeline.spawn_download_all_remote_layers(body).await {
        Ok(st) => json_response(StatusCode::ACCEPTED, st),
        Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -1005,10 +846,15 @@ async fn timeline_download_remote_layers_handler_get(
    request: Request<Body>,
 ) -> Result<Response<Body>, ApiError> {
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
+    check_permission(&request, Some(tenant_id))?;

-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let tenant = mgr::get_tenant(tenant_id, true)
+        .await
+        .map_err(ApiError::NotFound)?;
+    let timeline = tenant
+        .get_timeline(timeline_id, true)
+        .map_err(ApiError::NotFound)?;
    let info = timeline
        .get_download_all_remote_layers_task_info()
        .context("task never started since last pageserver process start")
@@ -1016,29 +862,6 @@ async fn timeline_download_remote_layers_handler_get(
    json_response(StatusCode::OK, info)
 }

-async fn active_timeline_of_active_tenant(
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-) -> Result<Arc<Timeline>, ApiError> {
-    let tenant = mgr::get_tenant(tenant_id, true)
-        .await
-        .map_err(ApiError::NotFound)?;
-    tenant
-        .get_timeline(timeline_id, true)
-        .map_err(ApiError::NotFound)
-}
-
-async fn always_panic_handler(req: Request<Body>) -> Result<Response<Body>, ApiError> {
-    // Deliberately cause a panic to exercise the panic hook registered via std::panic::set_hook().
-    // For pageserver, the relevant panic hook is `tracing_panic_hook` , and the `sentry` crate's wrapper around it.
-    // Use catch_unwind to ensure that tokio nor hyper are distracted by our panic.
-    let query = req.uri().query();
-    let _ = std::panic::catch_unwind(|| {
-        panic!("unconditional panic for testing panic hook integration; request query: {query:?}")
-    });
-    json_response(StatusCode::NO_CONTENT, ())
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -1048,7 +871,6 @@ async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {

 pub fn make_router(
    conf: &'static PageServerConf,
-    launch_ts: &'static LaunchTimestamp,
    auth: Option<Arc<JwtAuth>>,
    remote_storage: Option<GenericRemoteStorage>,
 ) -> anyhow::Result<RouterBuilder<hyper::Body, ApiError>> {
@@ -1065,14 +887,6 @@ pub fn make_router(
        }))
    }

-    router = router.middleware(
-        endpoint::add_response_header_middleware(
-            "PAGESERVER_LAUNCH_TIMESTAMP",
-            &launch_ts.to_string(),
-        )
-        .expect("construct launch timestamp header middleware"),
-    );
-
    macro_rules! testing_api {
        ($handler_desc:literal, $handler:path $(,)?) => {{
            #[cfg(not(feature = "testing"))]
@@ -1104,9 +918,8 @@ pub fn make_router(
        .get("/v1/tenant", tenant_list_handler)
        .post("/v1/tenant", tenant_create_handler)
        .get("/v1/tenant/:tenant_id", tenant_status)
-        .get("/v1/tenant/:tenant_id/synthetic_size", tenant_size_handler)
-        .put("/v1/tenant/config", update_tenant_config_handler)
-        .get("/v1/tenant/:tenant_id/config", get_tenant_config_handler)
+        .get("/v1/tenant/:tenant_id/size", tenant_size_handler)
+        .put("/v1/tenant/config", tenant_config_handler)
        .get("/v1/tenant/:tenant_id/timeline", timeline_list_handler)
        .post("/v1/tenant/:tenant_id/timeline", timeline_create_handler)
        .post("/v1/tenant/:tenant_id/attach", tenant_attach_handler)
@@ -1123,7 +936,7 @@ pub fn make_router(
        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc",
-            timeline_gc_handler,
+            testing_api!("run timeline GC", timeline_gc_handler),
        )
        .put(
            "/v1/tenant/:tenant_id/timeline/:timeline_id/compact",
@@ -1145,18 +958,5 @@ pub fn make_router(
            "/v1/tenant/:tenant_id/timeline/:timeline_id",
            timeline_delete_handler,
        )
-        .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer",
-            layer_map_info_handler,
-        )
-        .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
-            layer_download_handler,
-        )
-        .delete(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
-            evict_timeline_layer_handler,
-        )
-        .get("/v1/panic", always_panic_handler)
        .any(handler_404))
 }
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -150,15 +150,6 @@ pub static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
    .expect("Failed to register pageserver_tenant_states_count metric")
 });

-pub static TENANT_SYNTHETIC_SIZE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
-    register_uint_gauge_vec!(
-        "pageserver_tenant_synthetic_size",
-        "Synthetic size of each tenant",
-        &["tenant_id"]
-    )
-    .expect("Failed to register pageserver_tenant_synthetic_size metric")
-});
-
 // Metrics for cloud upload. These metrics reflect data uploaded to cloud storage,
 // or in testing they estimate how much we would upload if we did.
 static NUM_PERSISTENT_FILES_CREATED: Lazy<IntCounterVec> = Lazy::new(|| {
@@ -602,7 +593,6 @@ impl Drop for TimelineMetrics {

 pub fn remove_tenant_metrics(tenant_id: &TenantId) {
    let tid = tenant_id.to_string();
-    let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
    for state in TENANT_STATE_OPTIONS {
        let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]);
    }
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -20,8 +20,7 @@ use pageserver_api::models::{
    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
    PagestreamNblocksRequest, PagestreamNblocksResponse,
 };
-use postgres_backend::{self, is_expected_io_error, AuthType, PostgresBackend, QueryError};
-use pq_proto::framed::ConnectionError;
+use pq_proto::ConnectionError;
 use pq_proto::FeStartupPacket;
 use pq_proto::{BeMessage, FeMessage, RowDescriptor};
 use std::io;
@@ -36,6 +35,8 @@ use utils::{
    auth::{Claims, JwtAuth, Scope},
    id::{TenantId, TimelineId},
    lsn::Lsn,
+    postgres_backend::AuthType,
+    postgres_backend_async::{self, is_expected_io_error, PostgresBackend, QueryError},
    simple_rcu::RcuReadGuard,
 };

@@ -63,11 +64,11 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                _ = task_mgr::shutdown_watcher() => {
                    // We were requested to shut down.
                    let msg = format!("pageserver is shutting down");
-                    let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None));
+                    let _ = pgb.write_message(&BeMessage::ErrorResponse(&msg, None));
                    Err(QueryError::Other(anyhow::anyhow!(msg)))
                }

-                msg = pgb.read_message() => { msg.map_err(QueryError::from)}
+                msg = pgb.read_message() => { msg }
            };

            match msg {
@@ -78,16 +79,14 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                        FeMessage::Sync => continue,
                        FeMessage::Terminate => {
                            let msg = "client terminated connection with Terminate message during COPY";
-                            let query_error = QueryError::Disconnected(ConnectionError::Io(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
-                            // error can't happen here, ErrorResponse serialization should be always ok
-                            pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, Some(query_error.pg_error_code()))).map_err(|e| e.into_io_error())?;
+                            let query_error_error = QueryError::Disconnected(ConnectionError::Socket(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
+                            pgb.write_message(&BeMessage::ErrorResponse(msg, Some(query_error_error.pg_error_code())))?;
                            Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
                            break;
                        }
                        m => {
                            let msg = format!("unexpected message {m:?}");
-                            // error can't happen here, ErrorResponse serialization should be always ok
-                            pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None)).map_err(|e| e.into_io_error())?;
+                            pgb.write_message(&BeMessage::ErrorResponse(&msg, None))?;
                            Err(io::Error::new(io::ErrorKind::Other, msg))?;
                            break;
                        }
@@ -97,17 +96,16 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                }
                Ok(None) => {
                    let msg = "client closed connection during COPY";
-                    let query_error = QueryError::Disconnected(ConnectionError::Io(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
-                    // error can't happen here, ErrorResponse serialization should be always ok
-                    pgb.write_message_noflush(&BeMessage::ErrorResponse(msg, Some(query_error.pg_error_code()))).map_err(|e| e.into_io_error())?;
+                    let query_error_error = QueryError::Disconnected(ConnectionError::Socket(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
+                    pgb.write_message(&BeMessage::ErrorResponse(msg, Some(query_error_error.pg_error_code())))?;
                    pgb.flush().await?;
                    Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
                }
-                Err(QueryError::Disconnected(ConnectionError::Io(io_error))) => {
+                Err(QueryError::Disconnected(ConnectionError::Socket(io_error))) => {
                    Err(io_error)?;
                }
                Err(other) => {
-                    Err(io::Error::new(io::ErrorKind::Other, other.to_string()))?;
+                    Err(io::Error::new(io::ErrorKind::Other, other))?;
                }
            };
        }
@@ -214,7 +212,7 @@ async fn page_service_conn_main(
            // we've been requested to shut down
            Ok(())
        }
-        Err(QueryError::Disconnected(ConnectionError::Io(io_error))) => {
+        Err(QueryError::Disconnected(ConnectionError::Socket(io_error))) => {
            if is_expected_io_error(&io_error) {
                info!("Postgres client disconnected ({io_error})");
                Ok(())
@@ -313,7 +311,7 @@ impl PageServerHandler {
        let timeline = tenant.get_timeline(timeline_id, true)?;

        // switch client to COPYBOTH
-        pgb.write_message_noflush(&BeMessage::CopyBothResponse)?;
+        pgb.write_message(&BeMessage::CopyBothResponse)?;
        pgb.flush().await?;

        let metrics = PageRequestMetrics::new(&tenant_id, &timeline_id);
@@ -382,7 +380,7 @@ impl PageServerHandler {
                })
            });

-            pgb.write_message_noflush(&BeMessage::CopyData(&response.serialize()))?;
+            pgb.write_message(&BeMessage::CopyData(&response.serialize()))?;
            pgb.flush().await?;
        }
        Ok(())
@@ -418,7 +416,7 @@ impl PageServerHandler {

        // Import basebackup provided via CopyData
        info!("importing basebackup");
-        pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
+        pgb.write_message(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;

        let mut copyin_stream = Box::pin(copyin_stream(pgb));
@@ -470,7 +468,7 @@ impl PageServerHandler {

        // Import wal provided via CopyData
        info!("importing wal");
-        pgb.write_message_noflush(&BeMessage::CopyInResponse)?;
+        pgb.write_message(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;
        let mut copyin_stream = Box::pin(copyin_stream(pgb));
        let mut reader = tokio_util::io::StreamReader::new(&mut copyin_stream);
@@ -680,7 +678,7 @@ impl PageServerHandler {
        }

        // switch client to COPYOUT
-        pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
+        pgb.write_message(&BeMessage::CopyOutResponse)?;
        pgb.flush().await?;

        // Send a tarball of the latest layer on the timeline
@@ -697,7 +695,7 @@ impl PageServerHandler {
            .await?;
        }

-        pgb.write_message_noflush(&BeMessage::CopyDone)?;
+        pgb.write_message(&BeMessage::CopyDone)?;
        pgb.flush().await?;
        info!("basebackup complete");

@@ -723,7 +721,7 @@ impl PageServerHandler {
 }

 #[async_trait::async_trait]
-impl postgres_backend::Handler for PageServerHandler {
+impl postgres_backend_async::Handler for PageServerHandler {
    fn check_auth_jwt(
        &mut self,
        _pgb: &mut PostgresBackend,
@@ -814,7 +812,7 @@ impl postgres_backend::Handler for PageServerHandler {
            // Check that the timeline exists
            self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, None, false, ctx)
                .await?;
-            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        // return pair of prev_lsn and last_lsn
        else if query_string.starts_with("get_last_record_rlsn ") {
@@ -837,15 +835,15 @@ impl postgres_backend::Handler for PageServerHandler {

            let end_of_timeline = timeline.get_last_record_rlsn();

-            pgb.write_message_noflush(&BeMessage::RowDescription(&[
+            pgb.write_message(&BeMessage::RowDescription(&[
                RowDescriptor::text_col(b"prev_lsn"),
                RowDescriptor::text_col(b"last_lsn"),
            ]))?
-            .write_message_noflush(&BeMessage::DataRow(&[
+            .write_message(&BeMessage::DataRow(&[
                Some(end_of_timeline.prev.to_string().as_bytes()),
                Some(end_of_timeline.last.to_string().as_bytes()),
            ]))?
-            .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        }
        // same as basebackup, but result includes relational data as well
        else if query_string.starts_with("fullbackup ") {
@@ -886,7 +884,7 @@ impl postgres_backend::Handler for PageServerHandler {
            // Check that the timeline exists
            self.handle_basebackup_request(pgb, tenant_id, timeline_id, lsn, prev_lsn, true, ctx)
                .await?;
-            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("import basebackup ") {
            // Import the `base` section (everything but the wal) of a basebackup.
            // Assumes the tenant already exists on this pageserver.
@@ -931,10 +929,10 @@ impl postgres_backend::Handler for PageServerHandler {
                )
                .await
            {
-                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Ok(()) => pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?,
                Err(e) => {
                    error!("error importing base backup between {base_lsn} and {end_lsn}: {e:?}");
-                    pgb.write_message_noflush(&BeMessage::ErrorResponse(
+                    pgb.write_message(&BeMessage::ErrorResponse(
                        &e.to_string(),
                        Some(e.pg_error_code()),
                    ))?
@@ -967,10 +965,10 @@ impl postgres_backend::Handler for PageServerHandler {
                .handle_import_wal(pgb, tenant_id, timeline_id, start_lsn, end_lsn, ctx)
                .await
            {
-                Ok(()) => pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?,
+                Ok(()) => pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?,
                Err(e) => {
                    error!("error importing WAL between {start_lsn} and {end_lsn}: {e:?}");
-                    pgb.write_message_noflush(&BeMessage::ErrorResponse(
+                    pgb.write_message(&BeMessage::ErrorResponse(
                        &e.to_string(),
                        Some(e.pg_error_code()),
                    ))?
@@ -979,7 +977,7 @@ impl postgres_backend::Handler for PageServerHandler {
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
            // important because psycopg2 executes "SET datestyle TO 'ISO'"
            // on connect
-            pgb.write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+            pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else if query_string.starts_with("show ") {
            // show <tenant_id>
            let (_, params_raw) = query_string.split_at("show ".len());
@@ -995,7 +993,7 @@ impl postgres_backend::Handler for PageServerHandler {
            self.check_permission(Some(tenant_id))?;

            let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
-            pgb.write_message_noflush(&BeMessage::RowDescription(&[
+            pgb.write_message(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
                RowDescriptor::int8_col(b"checkpoint_timeout"),
                RowDescriptor::int8_col(b"compaction_target_size"),
@@ -1006,7 +1004,7 @@ impl postgres_backend::Handler for PageServerHandler {
                RowDescriptor::int8_col(b"image_creation_threshold"),
                RowDescriptor::int8_col(b"pitr_interval"),
            ]))?
-            .write_message_noflush(&BeMessage::DataRow(&[
+            .write_message(&BeMessage::DataRow(&[
                Some(tenant.get_checkpoint_distance().to_string().as_bytes()),
                Some(
                    tenant
@@ -1029,7 +1027,7 @@ impl postgres_backend::Handler for PageServerHandler {
                Some(tenant.get_image_creation_threshold().to_string().as_bytes()),
                Some(tenant.get_pitr_interval().as_secs().to_string().as_bytes()),
            ]))?
-            .write_message_noflush(&BeMessage::CommandComplete(b"SELECT 1"))?;
+            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else {
            return Err(QueryError::Other(anyhow::anyhow!(
                "unknown command {query_string}"
@@ -1057,7 +1055,7 @@ impl From<GetActiveTenantError> for QueryError {
    fn from(e: GetActiveTenantError) -> Self {
        match e {
            GetActiveTenantError::WaitForActiveTimeout { .. } => QueryError::Disconnected(
-                ConnectionError::Io(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
+                ConnectionError::Socket(io::Error::new(io::ErrorKind::TimedOut, e.to_string())),
            ),
            GetActiveTenantError::Other(e) => QueryError::Other(e),
        }
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -7,11 +7,11 @@ use std::fmt;
 use std::ops::{AddAssign, Range};
 use std::time::Duration;

+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 /// Key used in the Repository kv-store.
 ///
 /// The Repository treats this as an opaque struct, but see the code in pgdatadir_mapping.rs
 /// for what we actually store in these fields.
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Ord, PartialOrd, Serialize, Deserialize)]
 pub struct Key {
    pub field1: u8,
    pub field2: u32,
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -169,14 +169,7 @@ task_local! {
 /// Note that we don't try to limit how many task of a certain kind can be running
 /// at the same time.
 ///
-#[derive(
-    Debug,
-    // NB: enumset::EnumSetType derives PartialEq, Eq, Clone, Copy
-    enumset::EnumSetType,
-    serde::Serialize,
-    serde::Deserialize,
-    strum_macros::IntoStaticStr,
-)]
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
 pub enum TaskKind {
    // Pageserver startup, i.e., `main`
    Startup,
@@ -231,9 +224,6 @@ pub enum TaskKind {
    // Compaction. One per tenant.
    Compaction,

-    // Eviction. One per timeline.
-    Eviction,
-
    // Initial logical size calculation
    InitialLogicalSizeCalculation,

--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -45,14 +45,13 @@ use std::sync::MutexGuard;
 use std::sync::{Mutex, RwLock};
 use std::time::{Duration, Instant};

-use self::config::TenantConf;
 use self::metadata::TimelineMetadata;
 use self::remote_timeline_client::RemoteTimelineClient;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir;
 use crate::is_uninit_mark;
-use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC};
+use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC};
 use crate::repository::GcResult;
 use crate::task_mgr;
 use crate::task_mgr::TaskKind;
@@ -1619,16 +1618,8 @@ fn tree_sort_timelines(
    Ok(result)
 }

+/// Private functions
 impl Tenant {
-    pub fn tenant_specific_overrides(&self) -> TenantConfOpt {
-        *self.tenant_conf.read().unwrap()
-    }
-
-    pub fn effective_config(&self) -> TenantConf {
-        self.tenant_specific_overrides()
-            .merge(self.conf.default_tenant_conf)
-    }
-
    pub fn get_checkpoint_distance(&self) -> u64 {
        let tenant_conf = self.tenant_conf.read().unwrap();
        tenant_conf
@@ -1699,8 +1690,8 @@ impl Tenant {
            .unwrap_or(self.conf.default_tenant_conf.trace_read_requests)
    }

-    pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
-        *self.tenant_conf.write().unwrap() = new_tenant_conf;
+    pub fn update_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
+        self.tenant_conf.write().unwrap().update(&new_tenant_conf);
    }

    fn create_timeline_data(
@@ -2418,9 +2409,6 @@ impl Tenant {
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    pub async fn gather_size_inputs(
        &self,
-        // `max_retention_period` overrides the cutoff that is used to calculate the size
-        // (only if it is shorter than the real cutoff).
-        max_retention_period: Option<u64>,
        ctx: &RequestContext,
    ) -> anyhow::Result<size::ModelInputs> {
        let logical_sizes_at_once = self
@@ -2428,50 +2416,29 @@ impl Tenant {
            .concurrent_tenant_size_logical_size_queries
            .inner();

-        // TODO: Having a single mutex block concurrent reads is not great for performance.
-        //
-        // But the only case where we need to run multiple of these at once is when we
-        // request a size for a tenant manually via API, while another background calculation
-        // is in progress (which is not a common case).
+        // TODO: Having a single mutex block concurrent reads is unfortunate, but since the queries
+        // are for testing/experimenting, we tolerate this.
        //
        // See more for on the issue #2748 condenced out of the initial PR review.
        let mut shared_cache = self.cached_logical_sizes.lock().await;

-        size::gather_inputs(
-            self,
-            logical_sizes_at_once,
-            max_retention_period,
-            &mut shared_cache,
-            ctx,
-        )
-        .await
+        size::gather_inputs(self, logical_sizes_at_once, &mut shared_cache, ctx).await
    }

-    /// Calculate synthetic tenant size and cache the result.
+    /// Calculate synthetic tenant size
    /// This is periodically called by background worker.
    /// result is cached in tenant struct
    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    pub async fn calculate_synthetic_size(&self, ctx: &RequestContext) -> anyhow::Result<u64> {
-        let inputs = self.gather_size_inputs(None, ctx).await?;
+        let inputs = self.gather_size_inputs(ctx).await?;

        let size = inputs.calculate()?;

-        self.set_cached_synthetic_size(size);
-
-        Ok(size)
-    }
-
-    /// Cache given synthetic size and update the metric value
-    pub fn set_cached_synthetic_size(&self, size: u64) {
        self.cached_synthetic_tenant_size
            .store(size, Ordering::Relaxed);

-        TENANT_SYNTHETIC_SIZE_METRIC
-            .get_metric_with_label_values(&[&self.tenant_id.to_string()])
-            .unwrap()
-            .set(size);
+        Ok(size)
    }
-
    pub fn get_cached_synthetic_size(&self) -> u64 {
        self.cached_synthetic_tenant_size.load(Ordering::Relaxed)
    }
@@ -2767,7 +2734,6 @@ pub mod harness {
                lagging_wal_timeout: Some(tenant_conf.lagging_wal_timeout),
                max_lsn_wal_lag: Some(tenant_conf.max_lsn_wal_lag),
                trace_read_requests: Some(tenant_conf.trace_read_requests),
-                eviction_policy: Some(tenant_conf.eviction_policy),
            }
        }
    }
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -51,7 +51,6 @@ pub struct TenantConf {
    pub checkpoint_distance: u64,
    // Inmemory layer is also flushed at least once in checkpoint_timeout to
    // eventually upload WAL after activity is stopped.
-    #[serde(with = "humantime_serde")]
    pub checkpoint_timeout: Duration,
    // Target file size, when creating image and delta layers.
    // This parameter determines L1 layer file size.
@@ -91,97 +90,30 @@ pub struct TenantConf {
    /// to avoid eager reconnects.
    pub max_lsn_wal_lag: NonZeroU64,
    pub trace_read_requests: bool,
-    pub eviction_policy: EvictionPolicy,
 }

 /// Same as TenantConf, but this struct preserves the information about
 /// which parameters are set and which are not.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
 pub struct TenantConfOpt {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub checkpoint_distance: Option<u64>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub checkpoint_timeout: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub compaction_target_size: Option<u64>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
-    #[serde(default)]
    pub compaction_period: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub compaction_threshold: Option<usize>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub gc_horizon: Option<u64>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
-    #[serde(default)]
    pub gc_period: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub image_creation_threshold: Option<usize>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
-    #[serde(default)]
    pub pitr_interval: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
-    #[serde(default)]
    pub walreceiver_connect_timeout: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(with = "humantime_serde")]
-    #[serde(default)]
    pub lagging_wal_timeout: Option<Duration>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub max_lsn_wal_lag: Option<NonZeroU64>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
    pub trace_read_requests: Option<bool>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(default)]
-    pub eviction_policy: Option<EvictionPolicy>,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(tag = "kind")]
-pub enum EvictionPolicy {
-    NoEviction,
-    LayerAccessThreshold(EvictionPolicyLayerAccessThreshold),
-}
-
-impl EvictionPolicy {
-    pub fn discriminant_str(&self) -> &'static str {
-        match self {
-            EvictionPolicy::NoEviction => "NoEviction",
-            EvictionPolicy::LayerAccessThreshold(_) => "LayerAccessThreshold",
-        }
-    }
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct EvictionPolicyLayerAccessThreshold {
-    #[serde(with = "humantime_serde")]
-    pub period: Duration,
-    #[serde(with = "humantime_serde")]
-    pub threshold: Duration,
 }

 impl TenantConfOpt {
@@ -218,7 +150,6 @@ impl TenantConfOpt {
            trace_read_requests: self
                .trace_read_requests
                .unwrap_or(global_conf.trace_read_requests),
-            eviction_policy: self.eviction_policy.unwrap_or(global_conf.eviction_policy),
        }
    }

@@ -291,28 +222,6 @@ impl Default for TenantConf {
            max_lsn_wal_lag: NonZeroU64::new(DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG)
                .expect("cannot parse default max walreceiver Lsn wal lag"),
            trace_read_requests: false,
-            eviction_policy: EvictionPolicy::NoEviction,
        }
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn de_serializing_pageserver_config_omits_empty_values() {
-        let small_conf = TenantConfOpt {
-            gc_horizon: Some(42),
-            ..TenantConfOpt::default()
-        };
-
-        let toml_form = toml_edit::easy::to_string(&small_conf).unwrap();
-        assert_eq!(toml_form, "gc_horizon = 42\n");
-        assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap());
-
-        let json_form = serde_json::to_string(&small_conf).unwrap();
-        assert_eq!(json_form, "{\"gc_horizon\":42}");
-        assert_eq!(small_conf, serde_json::from_str(&json_form).unwrap());
-    }
-}
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -59,7 +59,6 @@ use std::sync::Arc;
 use utils::lsn::Lsn;

 use historic_layer_coverage::BufferedHistoricLayerCoverage;
-pub use historic_layer_coverage::Replacement;

 use super::storage_layer::range_eq;

@@ -139,30 +138,6 @@ where
        self.layer_map.remove_historic_noflush(layer)
    }

-    /// Replaces existing layer iff it is the `expected`.
-    ///
-    /// If the expected layer has been removed it will not be inserted by this function.
-    ///
-    /// Returned `Replacement` describes succeeding in replacement or the reason why it could not
-    /// be done.
-    ///
-    /// TODO replacement can be done without buffering and rebuilding layer map updates.
-    ///      One way to do that is to add a layer of indirection for returned values, so
-    ///      that we can replace values only by updating a hashmap.
-    pub fn replace_historic(
-        &mut self,
-        expected: &Arc<L>,
-        new: Arc<L>,
-    ) -> anyhow::Result<Replacement<Arc<L>>> {
-        fail::fail_point!("layermap-replace-notfound", |_| Ok(
-            // this is not what happens if an L0 layer was not found a anyhow error but perhaps
-            // that should be changed. this is good enough to show a replacement failure.
-            Replacement::NotFound
-        ));
-
-        self.layer_map.replace_historic_noflush(expected, new)
-    }
-
    // We will flush on drop anyway, but this method makes it
    // more explicit that there is some work being done.
    /// Apply all updates
@@ -279,8 +254,14 @@ where
    /// Helper function for BatchedUpdates::insert_historic
    ///
    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
+        let kr = layer.get_key_range();
+        let lr = layer.get_lsn_range();
        self.historic.insert(
-            historic_layer_coverage::LayerKey::from(&*layer),
+            historic_layer_coverage::LayerKey {
+                key: kr.start.to_i128()..kr.end.to_i128(),
+                lsn: lr.start.0..lr.end.0,
+                is_image: !layer.is_incremental(),
+            },
            Arc::clone(&layer),
        );

@@ -297,72 +278,30 @@ where
    /// Helper function for BatchedUpdates::remove_historic
    ///
    pub fn remove_historic_noflush(&mut self, layer: Arc<L>) {
-        self.historic
-            .remove(historic_layer_coverage::LayerKey::from(&*layer));
+        let kr = layer.get_key_range();
+        let lr = layer.get_lsn_range();
+        self.historic.remove(historic_layer_coverage::LayerKey {
+            key: kr.start.to_i128()..kr.end.to_i128(),
+            lsn: lr.start.0..lr.end.0,
+            is_image: !layer.is_incremental(),
+        });

        if Self::is_l0(&layer) {
            let len_before = self.l0_delta_layers.len();
+
+            // FIXME: ptr_eq might fail to return true for 'dyn'
+            // references.  Clippy complains about this. In practice it
+            // seems to work, the assertion below would be triggered
+            // otherwise but this ought to be fixed.
+            #[allow(clippy::vtable_address_comparisons)]
            self.l0_delta_layers
-                .retain(|other| !Self::compare_arced_layers(other, &layer));
-            // this assertion is related to use of Arc::ptr_eq in Self::compare_arced_layers,
-            // there's a chance that the comparison fails at runtime due to it comparing (pointer,
-            // vtable) pairs.
-            assert_eq!(
-                self.l0_delta_layers.len(),
-                len_before - 1,
-                "failed to locate removed historic layer from l0_delta_layers"
-            );
+                .retain(|other| !Arc::ptr_eq(other, &layer));
+            assert_eq!(self.l0_delta_layers.len(), len_before - 1);
        }

        NUM_ONDISK_LAYERS.dec();
    }

-    pub(self) fn replace_historic_noflush(
-        &mut self,
-        expected: &Arc<L>,
-        new: Arc<L>,
-    ) -> anyhow::Result<Replacement<Arc<L>>> {
-        let key = historic_layer_coverage::LayerKey::from(&**expected);
-        let other = historic_layer_coverage::LayerKey::from(&*new);
-
-        let expected_l0 = Self::is_l0(expected);
-        let new_l0 = Self::is_l0(&new);
-
-        anyhow::ensure!(
-            key == other,
-            "expected and new must have equal LayerKeys: {key:?} != {other:?}"
-        );
-
-        anyhow::ensure!(
-            expected_l0 == new_l0,
-            "expected and new must both be l0 deltas or neither should be: {expected_l0} != {new_l0}"
-        );
-
-        let l0_index = if expected_l0 {
-            // find the index in case replace worked, we need to replace that as well
-            Some(
-                self.l0_delta_layers
-                    .iter()
-                    .position(|slot| Self::compare_arced_layers(slot, expected))
-                    .ok_or_else(|| anyhow::anyhow!("existing l0 delta layer was not found"))?,
-            )
-        } else {
-            None
-        };
-
-        let replaced = self.historic.replace(&key, new.clone(), |existing| {
-            Self::compare_arced_layers(existing, expected)
-        });
-
-        if let Replacement::Replaced { .. } = &replaced {
-            if let Some(index) = l0_index {
-                self.l0_delta_layers[index] = new;
-            }
-        }
-
-        Ok(replaced)
-    }
-
    /// Helper function for BatchedUpdates::drop.
    pub(self) fn flush_updates(&mut self) {
        self.historic.rebuild();
@@ -736,119 +675,4 @@ where
        println!("End dump LayerMap");
        Ok(())
    }
-
-    /// Similar to `Arc::ptr_eq`, but only compares the object pointers, not vtables.
-    ///
-    /// Returns `true` if the two `Arc` point to the same layer, false otherwise.
-    #[inline(always)]
-    pub fn compare_arced_layers(left: &Arc<L>, right: &Arc<L>) -> bool {
-        // "dyn Trait" objects are "fat pointers" in that they have two components:
-        // - pointer to the object
-        // - pointer to the vtable
-        //
-        // rust does not provide a guarantee that these vtables are unique, but however
-        // `Arc::ptr_eq` as of writing (at least up to 1.67) uses a comparison where both the
-        // pointer and the vtable need to be equal.
-        //
-        // See: https://github.com/rust-lang/rust/issues/103763
-        //
-        // A future version of rust will most likely use this form below, where we cast each
-        // pointer into a pointer to unit, which drops the inaccessible vtable pointer, making it
-        // not affect the comparison.
-        //
-        // See: https://github.com/rust-lang/rust/pull/106450
-        let left = Arc::as_ptr(left) as *const ();
-        let right = Arc::as_ptr(right) as *const ();
-
-        left == right
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::{LayerMap, Replacement};
-    use crate::tenant::storage_layer::{Layer, LayerDescriptor, LayerFileName};
-    use std::str::FromStr;
-    use std::sync::Arc;
-
-    mod l0_delta_layers_updated {
-
-        use super::*;
-
-        #[test]
-        fn for_full_range_delta() {
-            // l0_delta_layers are used by compaction, and should observe all buffered updates
-            l0_delta_layers_updated_scenario(
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000053423C21-0000000053424D69",
-                true
-            )
-        }
-
-        #[test]
-        fn for_non_full_range_delta() {
-            // has minimal uncovered areas compared to l0_delta_layers_updated_on_insert_replace_remove_for_full_range_delta
-            l0_delta_layers_updated_scenario(
-                "000000000000000000000000000000000001-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE__0000000053423C21-0000000053424D69",
-                // because not full range
-                false
-            )
-        }
-
-        #[test]
-        fn for_image() {
-            l0_delta_layers_updated_scenario(
-                "000000000000000000000000000000000000-000000000000000000000000000000010000__0000000053424D69",
-                // code only checks if it is a full range layer, doesn't care about images, which must
-                // mean we should in practice never have full range images
-                false
-            )
-        }
-
-        fn l0_delta_layers_updated_scenario(layer_name: &str, expected_l0: bool) {
-            let name = LayerFileName::from_str(layer_name).unwrap();
-            let skeleton = LayerDescriptor::from(name);
-
-            let remote: Arc<dyn Layer> = Arc::new(skeleton.clone());
-            let downloaded: Arc<dyn Layer> = Arc::new(skeleton);
-
-            let mut map = LayerMap::default();
-
-            // two disjoint Arcs in different lifecycle phases.
-            assert!(!LayerMap::compare_arced_layers(&remote, &downloaded));
-
-            let expected_in_counts = (1, usize::from(expected_l0));
-
-            map.batch_update().insert_historic(remote.clone());
-            assert_eq!(count_layer_in(&map, &remote), expected_in_counts);
-
-            let replaced = map
-                .batch_update()
-                .replace_historic(&remote, downloaded.clone())
-                .expect("name derived attributes are the same");
-            assert!(
-                matches!(replaced, Replacement::Replaced { .. }),
-                "{replaced:?}"
-            );
-            assert_eq!(count_layer_in(&map, &downloaded), expected_in_counts);
-
-            map.batch_update().remove_historic(downloaded.clone());
-            assert_eq!(count_layer_in(&map, &downloaded), (0, 0));
-        }
-
-        fn count_layer_in(map: &LayerMap<dyn Layer>, layer: &Arc<dyn Layer>) -> (usize, usize) {
-            let historic = map
-                .iter_historic_layers()
-                .filter(|x| LayerMap::compare_arced_layers(x, layer))
-                .count();
-            let l0s = map
-                .get_level0_deltas()
-                .expect("why does this return a result");
-            let l0 = l0s
-                .iter()
-                .filter(|x| LayerMap::compare_arced_layers(x, layer))
-                .count();
-
-            (historic, l0)
-        }
-    }
 }
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -41,18 +41,6 @@ impl Ord for LayerKey {
    }
 }

-impl<'a, L: crate::tenant::storage_layer::Layer + ?Sized> From<&'a L> for LayerKey {
-    fn from(layer: &'a L) -> Self {
-        let kr = layer.get_key_range();
-        let lr = layer.get_lsn_range();
-        LayerKey {
-            key: kr.start.to_i128()..kr.end.to_i128(),
-            lsn: lr.start.0..lr.end.0,
-            is_image: !layer.is_incremental(),
-        }
-    }
-}
-
 /// Efficiently queryable layer coverage for each LSN.
 ///
 /// Allows answering layer map queries very efficiently,
@@ -94,13 +82,15 @@ impl<Value: Clone> HistoricLayerCoverage<Value> {
        }

        // Insert into data structure
-        let target = if layer_key.is_image {
-            &mut self.head.image_coverage
+        if layer_key.is_image {
+            self.head
+                .image_coverage
+                .insert(layer_key.key, layer_key.lsn.clone(), value);
        } else {
-            &mut self.head.delta_coverage
-        };
-
-        target.insert(layer_key.key, layer_key.lsn.clone(), value);
+            self.head
+                .delta_coverage
+                .insert(layer_key.key, layer_key.lsn.clone(), value);
+        }

        // Remember history. Clone is O(1)
        self.historic.insert(layer_key.lsn.start, self.head.clone());
@@ -425,59 +415,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        self.buffer.insert(layer_key, None);
    }

-    /// Replaces a previous layer with a new layer value.
-    ///
-    /// The replacement is conditional on:
-    /// - there is an existing `LayerKey` record
-    /// - there is no buffered removal for the given `LayerKey`
-    /// - the given closure returns true for the current `Value`
-    ///
-    /// The closure is used to compare the latest value (buffered insert, or existing layer)
-    /// against some expectation. This allows to use `Arc::ptr_eq` or similar which would be
-    /// inaccessible via `PartialEq` trait.
-    ///
-    /// Returns a `Replacement` value describing the outcome; only the case of
-    /// `Replacement::Replaced` modifies the map and requires a rebuild.
-    pub fn replace<F>(
-        &mut self,
-        layer_key: &LayerKey,
-        new: Value,
-        check_expected: F,
-    ) -> Replacement<Value>
-    where
-        F: FnOnce(&Value) -> bool,
-    {
-        let (slot, in_buffered) = match self.buffer.get(layer_key) {
-            Some(inner @ Some(_)) => {
-                // we compare against the buffered version, because there will be a later
-                // rebuild before querying
-                (inner.as_ref(), true)
-            }
-            Some(None) => {
-                // buffer has removal for this key; it will not be equivalent by any check_expected.
-                return Replacement::RemovalBuffered;
-            }
-            None => {
-                // no pending modification for the key, check layers
-                (self.layers.get(layer_key), false)
-            }
-        };
-
-        match slot {
-            Some(existing) if !check_expected(existing) => {
-                // unfortunate clone here, but otherwise the nll borrowck grows the region of
-                // 'a to cover the whole function, and we could not mutate in the other
-                // Some(existing) branch
-                Replacement::Unexpected(existing.clone())
-            }
-            None => Replacement::NotFound,
-            Some(_existing) => {
-                self.insert(layer_key.to_owned(), new);
-                Replacement::Replaced { in_buffered }
-            }
-        }
-    }
-
    pub fn rebuild(&mut self) {
        // Find the first LSN that needs to be rebuilt
        let rebuild_since: u64 = match self.buffer.iter().next() {
@@ -546,22 +483,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
    }
 }

-/// Outcome of the replace operation.
-#[derive(Debug)]
-pub enum Replacement<Value> {
-    /// Previous value was replaced with the new value.
-    Replaced {
-        /// Replacement happened for a scheduled insert.
-        in_buffered: bool,
-    },
-    /// Key was not found buffered updates or existing layers.
-    NotFound,
-    /// Key has been scheduled for removal, it was not replaced.
-    RemovalBuffered,
-    /// Previous value was rejected by the closure.
-    Unexpected(Value),
-}
-
 #[test]
 fn test_retroactive_regression_1() {
    let mut map = BufferedHistoricLayerCoverage::new();
@@ -627,7 +548,7 @@ fn test_retroactive_simple() {
        LayerKey {
            key: 2..5,
            lsn: 105..106,
-            is_image: false,
+            is_image: true,
        },
        "Delta 1".to_string(),
    );
@@ -635,24 +556,17 @@ fn test_retroactive_simple() {
    // Rebuild so we can start querying
    map.rebuild();

-    {
-        let map = map.get().expect("rebuilt");
-
-        let version = map.get_version(90);
-        assert!(version.is_none());
-        let version = map.get_version(102).unwrap();
-        assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
-
-        let version = map.get_version(107).unwrap();
-        assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
-        assert_eq!(version.delta_coverage.query(4), Some("Delta 1".to_string()));
-
-        let version = map.get_version(115).unwrap();
-        assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
-
-        let version = map.get_version(125).unwrap();
-        assert_eq!(version.image_coverage.query(4), Some("Image 3".to_string()));
-    }
+    // Query key 4
+    let version = map.get().unwrap().get_version(90);
+    assert!(version.is_none());
+    let version = map.get().unwrap().get_version(102).unwrap();
+    assert_eq!(version.image_coverage.query(4), Some("Image 1".to_string()));
+    let version = map.get().unwrap().get_version(107).unwrap();
+    assert_eq!(version.image_coverage.query(4), Some("Delta 1".to_string()));
+    let version = map.get().unwrap().get_version(115).unwrap();
+    assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
+    let version = map.get().unwrap().get_version(125).unwrap();
+    assert_eq!(version.image_coverage.query(4), Some("Image 3".to_string()));

    // Remove Image 3
    map.remove(LayerKey {
@@ -662,147 +576,8 @@ fn test_retroactive_simple() {
    });
    map.rebuild();

-    {
-        // Check deletion worked
-        let map = map.get().expect("rebuilt");
-        let version = map.get_version(125).unwrap();
-        assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
-        assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
-    }
-}
-
-#[test]
-fn test_retroactive_replacement() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-
-    let keys = [
-        LayerKey {
-            key: 0..5,
-            lsn: 100..101,
-            is_image: true,
-        },
-        LayerKey {
-            key: 3..9,
-            lsn: 110..111,
-            is_image: true,
-        },
-        LayerKey {
-            key: 4..6,
-            lsn: 120..121,
-            is_image: true,
-        },
-    ];
-
-    let layers = [
-        "Image 1".to_string(),
-        "Image 2".to_string(),
-        "Image 3".to_string(),
-    ];
-
-    for (key, layer) in keys.iter().zip(layers.iter()) {
-        map.insert(key.to_owned(), layer.to_owned());
-    }
-
-    // rebuild is not necessary here, because replace works for both buffered updates and existing
-    // layers.
-
-    for (key, orig_layer) in keys.iter().zip(layers.iter()) {
-        let replacement = format!("Remote {orig_layer}");
-
-        // evict
-        let ret = map.replace(key, replacement.clone(), |l| l == orig_layer);
-        assert!(
-            matches!(ret, Replacement::Replaced { .. }),
-            "replace {orig_layer}: {ret:?}"
-        );
-        map.rebuild();
-
-        let at = key.lsn.end + 1;
-
-        let version = map.get().expect("rebuilt").get_version(at).unwrap();
-        assert_eq!(
-            version.image_coverage.query(4).as_deref(),
-            Some(replacement.as_str()),
-            "query for 4 at version {at} after eviction",
-        );
-
-        // download
-        let ret = map.replace(key, orig_layer.clone(), |l| l == &replacement);
-        assert!(
-            matches!(ret, Replacement::Replaced { .. }),
-            "replace {orig_layer} back: {ret:?}"
-        );
-        map.rebuild();
-        let version = map.get().expect("rebuilt").get_version(at).unwrap();
-        assert_eq!(
-            version.image_coverage.query(4).as_deref(),
-            Some(orig_layer.as_str()),
-            "query for 4 at version {at} after download",
-        );
-    }
-}
-
-#[test]
-fn missing_key_is_not_inserted_with_replace() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-    let key = LayerKey {
-        key: 0..5,
-        lsn: 100..101,
-        is_image: true,
-    };
-
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(matches!(ret, Replacement::NotFound), "{ret:?}");
-    map.rebuild();
-    assert!(map
-        .get()
-        .expect("no changes to rebuild")
-        .get_version(102)
-        .is_none());
-}
-
-#[test]
-fn replacing_buffered_insert_and_remove() {
-    let mut map = BufferedHistoricLayerCoverage::new();
-    let key = LayerKey {
-        key: 0..5,
-        lsn: 100..101,
-        is_image: true,
-    };
-
-    map.insert(key.clone(), "Image 1");
-    let ret = map.replace(&key, "Remote Image 1", |&l| l == "Image 1");
-    assert!(
-        matches!(ret, Replacement::Replaced { in_buffered: true }),
-        "{ret:?}"
-    );
-    map.rebuild();
-
-    assert_eq!(
-        map.get()
-            .expect("rebuilt")
-            .get_version(102)
-            .unwrap()
-            .image_coverage
-            .query(4),
-        Some("Remote Image 1")
-    );
-
-    map.remove(key.clone());
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(
-        matches!(ret, Replacement::RemovalBuffered),
-        "cannot replace after scheduled remove: {ret:?}"
-    );
-
-    map.rebuild();
-
-    let ret = map.replace(&key, "should not replace", |_| true);
-    assert!(
-        matches!(ret, Replacement::NotFound),
-        "cannot replace after remove + rebuild: {ret:?}"
-    );
-
-    let at_version = map.get().expect("rebuilt").get_version(102);
-    assert!(at_version.is_none());
+    // Check deletion worked
+    let version = map.get().unwrap().get_version(125).unwrap();
+    assert_eq!(version.image_coverage.query(4), Some("Image 2".to_string()));
+    assert_eq!(version.image_coverage.query(8), Some("Image 4".to_string()));
 }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -285,22 +285,17 @@ pub async fn create_tenant(
    }).await
 }

-pub async fn set_new_tenant_config(
+pub async fn update_tenant_config(
    conf: &'static PageServerConf,
-    new_tenant_conf: TenantConfOpt,
+    tenant_conf: TenantConfOpt,
    tenant_id: TenantId,
 ) -> anyhow::Result<()> {
    info!("configuring tenant {tenant_id}");
    let tenant = get_tenant(tenant_id, true).await?;

+    tenant.update_tenant_config(tenant_conf);
    let tenant_config_path = conf.tenant_config_path(tenant_id);
-    Tenant::persist_tenant_config(
-        &tenant.tenant_id(),
-        &tenant_config_path,
-        new_tenant_conf,
-        false,
-    )?;
-    tenant.set_new_tenant_config(new_tenant_conf);
+    Tenant::persist_tenant_config(&tenant.tenant_id(), &tenant_config_path, tenant_conf, false)?;
    Ok(())
 }

@@ -540,11 +535,13 @@ where
    }
 }

+#[cfg(feature = "testing")]
 use {
    crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
    utils::http::error::ApiError,
 };

+#[cfg(feature = "testing")]
 pub async fn immediate_gc(
    tenant_id: TenantId,
    timeline_id: TimelineId,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -571,15 +571,14 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    /// Launch a delete operation in the background.
    ///
-    /// The operation does not modify local state but assumes the local files have already been
-    /// deleted, and is used to mirror those changes to remote.
+    /// Launch a delete operation in the background.
    ///
    /// Note: This schedules an index file upload before the deletions.  The
    /// deletion won't actually be performed, until any previously scheduled
    /// upload operations, and the index file upload, have completed
    /// succesfully.
+    ///
    pub fn schedule_layer_file_deletion(
        self: &Arc<Self>,
        names: &[LayerFileName],
@@ -1136,29 +1135,18 @@ mod tests {
        client.init_upload_queue_for_empty_remote(&metadata)?;

        // Create a couple of dummy files,  schedule upload for them
-        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
-        let layer_file_name_2: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D9-00000000016B5A52".parse().unwrap();
-        let layer_file_name_3: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59DA-00000000016B5A53".parse().unwrap();
-        let content_1 = dummy_contents("foo");
-        let content_2 = dummy_contents("bar");
-        let content_3 = dummy_contents("baz");
-        std::fs::write(
-            timeline_path.join(layer_file_name_1.file_name()),
-            &content_1,
-        )?;
-        std::fs::write(
-            timeline_path.join(layer_file_name_2.file_name()),
-            &content_2,
-        )?;
-        std::fs::write(timeline_path.join(layer_file_name_3.file_name()), content_3)?;
+        let content_foo = dummy_contents("foo");
+        let content_bar = dummy_contents("bar");
+        std::fs::write(timeline_path.join("foo"), &content_foo)?;
+        std::fs::write(timeline_path.join("bar"), &content_bar)?;

        client.schedule_layer_file_upload(
-            &layer_file_name_1,
-            &LayerFileMetadata::new(content_1.len() as u64),
+            &LayerFileName::Test("foo".to_owned()),
+            &LayerFileMetadata::new(content_foo.len() as u64),
        )?;
        client.schedule_layer_file_upload(
-            &layer_file_name_2,
-            &LayerFileMetadata::new(content_2.len() as u64),
+            &LayerFileName::Test("bar".to_owned()),
+            &LayerFileMetadata::new(content_bar.len() as u64),
        )?;

        // Check that they are started immediately, not queued
@@ -1195,13 +1183,7 @@ mod tests {

        // Download back the index.json, and check that the list of files is correct
        let index_part = runtime.block_on(client.download_index_file())?;
-        assert_file_list(
-            &index_part.timeline_layers,
-            &[
-                &layer_file_name_1.file_name(),
-                &layer_file_name_2.file_name(),
-            ],
-        );
+        assert_file_list(&index_part.timeline_layers, &["foo", "bar"]);
        let downloaded_metadata = index_part.parse_metadata()?;
        assert_eq!(downloaded_metadata, metadata);

@@ -1209,10 +1191,10 @@ mod tests {
        let content_baz = dummy_contents("baz");
        std::fs::write(timeline_path.join("baz"), &content_baz)?;
        client.schedule_layer_file_upload(
-            &layer_file_name_3,
+            &LayerFileName::Test("baz".to_owned()),
            &LayerFileMetadata::new(content_baz.len() as u64),
        )?;
-        client.schedule_layer_file_deletion(&[layer_file_name_1.clone()])?;
+        client.schedule_layer_file_deletion(&[LayerFileName::Test("foo".to_owned())])?;
        {
            let mut guard = client.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut().unwrap();
@@ -1224,26 +1206,12 @@ mod tests {
            assert!(upload_queue.num_inprogress_deletions == 0);
            assert!(upload_queue.latest_files_changes_since_metadata_upload_scheduled == 0);
        }
-        assert_remote_files(
-            &[
-                &layer_file_name_1.file_name(),
-                &layer_file_name_2.file_name(),
-                "index_part.json",
-            ],
-            &remote_timeline_dir,
-        );
+        assert_remote_files(&["foo", "bar", "index_part.json"], &remote_timeline_dir);

        // Finish them
        runtime.block_on(client.wait_completion())?;

-        assert_remote_files(
-            &[
-                &layer_file_name_2.file_name(),
-                &layer_file_name_3.file_name(),
-                "index_part.json",
-            ],
-            &remote_timeline_dir,
-        );
+        assert_remote_files(&["bar", "baz", "index_part.json"], &remote_timeline_dir);

        Ok(())
    }
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -8,8 +8,7 @@ use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use tracing::warn;

-use crate::tenant::metadata::TimelineMetadata;
-use crate::tenant::storage_layer::LayerFileName;
+use crate::tenant::{metadata::TimelineMetadata, storage_layer::LayerFileName};

 use utils::lsn::Lsn;

@@ -275,7 +274,7 @@ mod tests {
            "timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
            "layer_metadata":{
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
+                "LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
            },
            "disk_consistent_lsn":"0/16960E8",
            "metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
@@ -289,7 +288,7 @@ mod tests {
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: Some(25600000),
                }),
-                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
+                (LayerFileName::new_test("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: Some(9007199254741001),
@@ -313,7 +312,7 @@ mod tests {
            "missing_layers":["This shouldn't fail deserialization"],
            "layer_metadata":{
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
-                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51": { "file_size": 9007199254741001 }
+                "LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
            },
            "disk_consistent_lsn":"0/16960E8",
            "metadata_bytes":[112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
@@ -327,7 +326,7 @@ mod tests {
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: Some(25600000),
                }),
-                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap(), IndexLayerMetadata {
+                (LayerFileName::new_test("not_a_real_layer_but_adding_coverage"), IndexLayerMetadata {
                    // serde_json should always parse this but this might be a double with jq for
                    // example.
                    file_size: Some(9007199254741001),
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -6,25 +6,14 @@ mod image_layer;
 mod inmemory_layer;
 mod remote_layer;

-use crate::config::PageServerConf;
 use crate::context::RequestContext;
 use crate::repository::{Key, Value};
-use crate::task_mgr::TaskKind;
 use crate::walrecord::NeonWalRecord;
 use anyhow::Result;
 use bytes::Bytes;
-use either::Either;
-use enum_map::EnumMap;
-use enumset::EnumSet;
-use pageserver_api::models::LayerAccessKind;
-use pageserver_api::models::{
-    HistoricLayerInfo, LayerResidenceEvent, LayerResidenceEventReason, LayerResidenceStatus,
-};
 use std::ops::Range;
 use std::path::PathBuf;
-use std::sync::{Arc, Mutex};
-use std::time::{SystemTime, UNIX_EPOCH};
-use utils::history_buffer::HistoryBufferWithDropCounter;
+use std::sync::Arc;

 use utils::{
    id::{TenantId, TimelineId},
@@ -32,7 +21,7 @@ use utils::{
 };

 pub use delta_layer::{DeltaLayer, DeltaLayerWriter};
-pub use filename::{DeltaFileName, ImageFileName, LayerFileName};
+pub use filename::{DeltaFileName, ImageFileName, LayerFileName, PathOrConf};
 pub use image_layer::{ImageLayer, ImageLayerWriter};
 pub use inmemory_layer::InMemoryLayer;
 pub use remote_layer::RemoteLayer;
@@ -92,191 +81,9 @@ pub enum ValueReconstructResult {
    Missing,
 }

-#[derive(Debug)]
-pub struct LayerAccessStats(Mutex<LayerAccessStatsLocked>);
-
-/// This struct holds two instances of [`LayerAccessStatsInner`].
-/// Accesses are recorded to both instances.
-/// The `for_scraping_api`instance can be reset from the management API via [`LayerAccessStatsReset`].
-/// The `for_eviction_policy` is never reset.
-#[derive(Debug, Default, Clone)]
-struct LayerAccessStatsLocked {
-    for_scraping_api: LayerAccessStatsInner,
-    for_eviction_policy: LayerAccessStatsInner,
-}
-
-impl LayerAccessStatsLocked {
-    fn iter_mut(&mut self) -> impl Iterator<Item = &mut LayerAccessStatsInner> {
-        [&mut self.for_scraping_api, &mut self.for_eviction_policy].into_iter()
-    }
-}
-
-#[derive(Debug, Default, Clone)]
-struct LayerAccessStatsInner {
-    first_access: Option<LayerAccessStatFullDetails>,
-    count_by_access_kind: EnumMap<LayerAccessKind, u64>,
-    task_kind_flag: EnumSet<TaskKind>,
-    last_accesses: HistoryBufferWithDropCounter<LayerAccessStatFullDetails, 16>,
-    last_residence_changes: HistoryBufferWithDropCounter<LayerResidenceEvent, 16>,
-}
-
-#[derive(Debug, Clone, Copy)]
-pub(super) struct LayerAccessStatFullDetails {
-    pub(super) when: SystemTime,
-    pub(super) task_kind: TaskKind,
-    pub(super) access_kind: LayerAccessKind,
-}
-
-#[derive(Clone, Copy, strum_macros::EnumString)]
-pub enum LayerAccessStatsReset {
-    NoReset,
-    JustTaskKindFlags,
-    AllStats,
-}
-
-fn system_time_to_millis_since_epoch(ts: &SystemTime) -> u64 {
-    ts.duration_since(UNIX_EPOCH)
-        .expect("better to die in this unlikely case than report false stats")
-        .as_millis()
-        .try_into()
-        .expect("64 bits is enough for few more years")
-}
-
-impl LayerAccessStatFullDetails {
-    fn as_api_model(&self) -> pageserver_api::models::LayerAccessStatFullDetails {
-        let Self {
-            when,
-            task_kind,
-            access_kind,
-        } = self;
-        pageserver_api::models::LayerAccessStatFullDetails {
-            when_millis_since_epoch: system_time_to_millis_since_epoch(when),
-            task_kind: task_kind.into(), // into static str, powered by strum_macros
-            access_kind: *access_kind,
-        }
-    }
-}
-
-impl LayerAccessStats {
-    pub(crate) fn for_loading_layer(status: LayerResidenceStatus) -> Self {
-        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
-        new.record_residence_event(status, LayerResidenceEventReason::LayerLoad);
-        new
-    }
-
-    pub(crate) fn for_new_layer_file() -> Self {
-        let new = LayerAccessStats(Mutex::new(LayerAccessStatsLocked::default()));
-        new.record_residence_event(
-            LayerResidenceStatus::Resident,
-            LayerResidenceEventReason::LayerCreate,
-        );
-        new
-    }
-
-    /// Creates a clone of `self` and records `new_status` in the clone.
-    /// The `new_status` is not recorded in `self`
-    pub(crate) fn clone_for_residence_change(
-        &self,
-        new_status: LayerResidenceStatus,
-    ) -> LayerAccessStats {
-        let clone = {
-            let inner = self.0.lock().unwrap();
-            inner.clone()
-        };
-        let new = LayerAccessStats(Mutex::new(clone));
-        new.record_residence_event(new_status, LayerResidenceEventReason::ResidenceChange);
-        new
-    }
-
-    fn record_residence_event(
-        &self,
-        status: LayerResidenceStatus,
-        reason: LayerResidenceEventReason,
-    ) {
-        let mut locked = self.0.lock().unwrap();
-        locked.iter_mut().for_each(|inner| {
-            inner
-                .last_residence_changes
-                .write(LayerResidenceEvent::new(status, reason))
-        });
-    }
-
-    fn record_access(&self, access_kind: LayerAccessKind, task_kind: TaskKind) {
-        let this_access = LayerAccessStatFullDetails {
-            when: SystemTime::now(),
-            task_kind,
-            access_kind,
-        };
-
-        let mut locked = self.0.lock().unwrap();
-        locked.iter_mut().for_each(|inner| {
-            inner.first_access.get_or_insert(this_access);
-            inner.count_by_access_kind[access_kind] += 1;
-            inner.task_kind_flag |= task_kind;
-            inner.last_accesses.write(this_access);
-        })
-    }
-
-    fn as_api_model(
-        &self,
-        reset: LayerAccessStatsReset,
-    ) -> pageserver_api::models::LayerAccessStats {
-        let mut locked = self.0.lock().unwrap();
-        let inner = &mut locked.for_scraping_api;
-        let LayerAccessStatsInner {
-            first_access,
-            count_by_access_kind,
-            task_kind_flag,
-            last_accesses,
-            last_residence_changes,
-        } = inner;
-        let ret = pageserver_api::models::LayerAccessStats {
-            access_count_by_access_kind: count_by_access_kind
-                .iter()
-                .map(|(kind, count)| (kind, *count))
-                .collect(),
-            task_kind_access_flag: task_kind_flag
-                .iter()
-                .map(|task_kind| task_kind.into()) // into static str, powered by strum_macros
-                .collect(),
-            first: first_access.as_ref().map(|a| a.as_api_model()),
-            accesses_history: last_accesses.map(|m| m.as_api_model()),
-            residence_events_history: last_residence_changes.clone(),
-        };
-        match reset {
-            LayerAccessStatsReset::NoReset => (),
-            LayerAccessStatsReset::JustTaskKindFlags => {
-                inner.task_kind_flag.clear();
-            }
-            LayerAccessStatsReset::AllStats => {
-                *inner = LayerAccessStatsInner::default();
-            }
-        }
-        ret
-    }
-
-    pub(super) fn most_recent_access_or_residence_event(
-        &self,
-    ) -> Either<LayerAccessStatFullDetails, LayerResidenceEvent> {
-        let locked = self.0.lock().unwrap();
-        let inner = &locked.for_eviction_policy;
-        match inner.last_accesses.recent() {
-            Some(a) => Either::Left(*a),
-            None => match inner.last_residence_changes.recent() {
-                Some(e) => Either::Right(e.clone()),
-                None => unreachable!("constructors for LayerAccessStats ensure that there's always a residence change event"),
-            }
-        }
-    }
-}
-
 /// Supertrait of the [`Layer`] trait that captures the bare minimum interface
 /// required by [`LayerMap`].
-///
-/// All layers should implement a minimal `std::fmt::Debug` without tenant or
-/// timeline names, because those are known in the context of which the layers
-/// are used in (timeline).
-pub trait Layer: std::fmt::Debug + Send + Sync {
+pub trait Layer: Send + Sync {
    /// Range of keys that this layer covers
    fn get_key_range(&self) -> Range<Key>;

@@ -339,7 +146,8 @@ pub type LayerKeyIter<'i> = Box<dyn Iterator<Item = (Key, Lsn, u64)> + 'i>;
 /// Furthermore, there are two kinds of on-disk layers: delta and image layers.
 /// A delta layer contains all modifications within a range of LSNs and keys.
 /// An image layer is a snapshot of all the data in a key-range, at a single
-/// LSN.
+/// LSN
+///
 pub trait PersistentLayer: Layer {
    fn get_tenant_id(&self) -> TenantId;

@@ -379,10 +187,6 @@ pub trait PersistentLayer: Layer {
    /// Should not change over the lifetime of the layer object because
    /// current_physical_size is computed as the som of this value.
    fn file_size(&self) -> Option<u64>;
-
-    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo;
-
-    fn access_stats(&self) -> &LayerAccessStats;
 }

 pub fn downcast_remote_layer(
@@ -395,11 +199,15 @@ pub fn downcast_remote_layer(
    }
 }

+impl std::fmt::Debug for dyn Layer {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Layer")
+            .field("short_id", &self.short_id())
+            .finish()
+    }
+}
+
 /// Holds metadata about a layer without any content. Used mostly for testing.
-///
-/// To use filenames as fixtures, parse them as [`LayerFileName`] then convert from that to a
-/// LayerDescriptor.
-#[derive(Clone, Debug)]
 pub struct LayerDescriptor {
    pub key: Range<Key>,
    pub lsn: Range<Lsn>,
@@ -438,61 +246,3 @@ impl Layer for LayerDescriptor {
        todo!()
    }
 }
-
-impl From<DeltaFileName> for LayerDescriptor {
-    fn from(value: DeltaFileName) -> Self {
-        let short_id = value.to_string();
-        LayerDescriptor {
-            key: value.key_range,
-            lsn: value.lsn_range,
-            is_incremental: true,
-            short_id,
-        }
-    }
-}
-
-impl From<ImageFileName> for LayerDescriptor {
-    fn from(value: ImageFileName) -> Self {
-        let short_id = value.to_string();
-        let lsn = value.lsn_as_range();
-        LayerDescriptor {
-            key: value.key_range,
-            lsn,
-            is_incremental: false,
-            short_id,
-        }
-    }
-}
-
-impl From<LayerFileName> for LayerDescriptor {
-    fn from(value: LayerFileName) -> Self {
-        match value {
-            LayerFileName::Delta(d) => Self::from(d),
-            LayerFileName::Image(i) => Self::from(i),
-        }
-    }
-}
-
-/// Helper enum to hold a PageServerConf, or a path
-///
-/// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
-/// global config, and paths to layer files are constructed using the tenant/timeline
-/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
-/// struct for a file on disk, without having a page server running, so that we have no
-/// config. In that case, we use the Path variant to hold the full path to the file on
-/// disk.
-enum PathOrConf {
-    Path(PathBuf),
-    Conf(&'static PageServerConf),
-}
-
-/// Range wrapping newtype, which uses display to render Debug.
-///
-/// Useful with `Key`, which has too verbose `{:?}` for printing multiple layers.
-struct RangeDisplayDebug<'a, T: std::fmt::Display>(&'a Range<T>);
-
-impl<'a, T: std::fmt::Display> std::fmt::Debug for RangeDisplayDebug<'a, T> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}..{}", self.0.start, self.0.end)
-    }
-}
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -37,7 +37,6 @@ use crate::virtual_file::VirtualFile;
 use crate::{walrecord, TEMP_FILE_SUFFIX};
 use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION};
 use anyhow::{bail, ensure, Context, Result};
-use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::{self, File};
@@ -55,10 +54,7 @@ use utils::{
    lsn::Lsn,
 };

-use super::{
-    DeltaFileName, Layer, LayerAccessStats, LayerAccessStatsReset, LayerFileName, LayerIter,
-    LayerKeyIter, LayerResidenceStatus, PathOrConf,
-};
+use super::{DeltaFileName, Layer, LayerFileName, LayerIter, LayerKeyIter, PathOrConf};

 ///
 /// Header stored in the beginning of the file
@@ -170,13 +166,14 @@ impl DeltaKey {
    }
 }

-/// DeltaLayer is the in-memory data structure associated with an on-disk delta
-/// file.
 ///
-/// We keep a DeltaLayer in memory for each file, in the LayerMap. If a layer
-/// is in "loaded" state, we have a copy of the index in memory, in 'inner'.
-/// Otherwise the struct is just a placeholder for a file that exists on disk,
-/// and it needs to be loaded before using it in queries.
+/// DeltaLayer is the in-memory data structure associated with an
+/// on-disk delta file.  We keep a DeltaLayer in memory for each
+/// file, in the LayerMap. If a layer is in "loaded" state, we have a
+/// copy of the index in memory, in 'inner'. Otherwise the struct is
+/// just a placeholder for a file that exists on disk, and it needs to
+/// be loaded before using it in queries.
+///
 pub struct DeltaLayer {
    path_or_conf: PathOrConf,

@@ -187,24 +184,9 @@ pub struct DeltaLayer {

    pub file_size: u64,

-    access_stats: LayerAccessStats,
-
    inner: RwLock<DeltaLayerInner>,
 }

-impl std::fmt::Debug for DeltaLayer {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("DeltaLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn_range", &self.lsn_range)
-            .field("file_size", &self.file_size)
-            .field("inner", &self.inner)
-            .finish()
-    }
-}
-
 pub struct DeltaLayerInner {
    /// If false, the fields below have not been loaded into memory yet.
    loaded: bool,
@@ -217,16 +199,6 @@ pub struct DeltaLayerInner {
    file: Option<FileBlockReader<VirtualFile>>,
 }

-impl std::fmt::Debug for DeltaLayerInner {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("DeltaLayerInner")
-            .field("loaded", &self.loaded)
-            .field("index_start_blk", &self.index_start_blk)
-            .field("index_root_blk", &self.index_root_blk)
-            .finish()
-    }
-}
-
 impl Layer for DeltaLayer {
    fn get_key_range(&self) -> Range<Key> {
        self.key_range.clone()
@@ -258,7 +230,7 @@ impl Layer for DeltaLayer {
            return Ok(());
        }

-        let inner = self.load(LayerAccessKind::Dump, ctx)?;
+        let inner = self.load(ctx)?;

        println!(
            "index_start_blk: {}, root {}",
@@ -331,7 +303,7 @@ impl Layer for DeltaLayer {

        {
            // Open the file and lock the metadata in memory
-            let inner = self.load(LayerAccessKind::GetValueReconstructData, ctx)?;
+            let inner = self.load(ctx)?;

            // Scan the page versions backwards, starting from `lsn`.
            let file = inner.file.as_ref().unwrap();
@@ -422,9 +394,7 @@ impl PersistentLayer for DeltaLayer {
    }

    fn iter(&self, ctx: &RequestContext) -> Result<LayerIter<'_>> {
-        let inner = self
-            .load(LayerAccessKind::KeyIter, ctx)
-            .context("load delta layer")?;
+        let inner = self.load(ctx).context("load delta layer")?;
        Ok(match DeltaValueIter::new(inner) {
            Ok(iter) => Box::new(iter),
            Err(err) => Box::new(std::iter::once(Err(err))),
@@ -432,7 +402,7 @@ impl PersistentLayer for DeltaLayer {
    }

    fn key_iter(&self, ctx: &RequestContext) -> Result<LayerKeyIter<'_>> {
-        let inner = self.load(LayerAccessKind::KeyIter, ctx)?;
+        let inner = self.load(ctx)?;
        Ok(Box::new(
            DeltaKeyIter::new(inner).context("Layer index is corrupted")?,
        ))
@@ -447,26 +417,6 @@ impl PersistentLayer for DeltaLayer {
    fn file_size(&self) -> Option<u64> {
        Some(self.file_size)
    }
-
-    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
-        let layer_file_name = self.filename().file_name();
-        let lsn_range = self.get_lsn_range();
-
-        let access_stats = self.access_stats.as_api_model(reset);
-
-        HistoricLayerInfo::Delta {
-            layer_file_name,
-            layer_file_size: Some(self.file_size),
-            lsn_start: lsn_range.start,
-            lsn_end: lsn_range.end,
-            remote: false,
-            access_stats,
-        }
-    }
-
-    fn access_stats(&self) -> &LayerAccessStats {
-        &self.access_stats
-    }
 }

 impl DeltaLayer {
@@ -511,13 +461,7 @@ impl DeltaLayer {
    /// Open the underlying file and read the metadata into memory, if it's
    /// not loaded already.
    ///
-    fn load(
-        &self,
-        access_kind: LayerAccessKind,
-        ctx: &RequestContext,
-    ) -> Result<RwLockReadGuard<DeltaLayerInner>> {
-        self.access_stats
-            .record_access(access_kind, ctx.task_kind());
+    fn load(&self, _ctx: &RequestContext) -> Result<RwLockReadGuard<DeltaLayerInner>> {
        loop {
            // Quick exit if already loaded
            let inner = self.inner.read().unwrap();
@@ -598,7 +542,6 @@ impl DeltaLayer {
        tenant_id: TenantId,
        filename: &DeltaFileName,
        file_size: u64,
-        access_stats: LayerAccessStats,
    ) -> DeltaLayer {
        DeltaLayer {
            path_or_conf: PathOrConf::Conf(conf),
@@ -607,7 +550,6 @@ impl DeltaLayer {
            key_range: filename.key_range.clone(),
            lsn_range: filename.lsn_range.clone(),
            file_size,
-            access_stats,
            inner: RwLock::new(DeltaLayerInner {
                loaded: false,
                file: None,
@@ -637,7 +579,6 @@ impl DeltaLayer {
            key_range: summary.key_range,
            lsn_range: summary.lsn_range,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident),
            inner: RwLock::new(DeltaLayerInner {
                loaded: false,
                file: None,
@@ -808,7 +749,6 @@ impl DeltaLayerWriterInner {
            key_range: self.key_start..key_end,
            lsn_range: self.lsn_range.clone(),
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_new_layer_file(),
            inner: RwLock::new(DeltaLayerInner {
                loaded: false,
                file: None,
--- a/pageserver/src/tenant/storage_layer/filename.rs
+++ b/pageserver/src/tenant/storage_layer/filename.rs
@@ -1,32 +1,23 @@
 //!
 //! Helper functions for dealing with filenames of the image and delta layer files.
 //!
+use crate::config::PageServerConf;
 use crate::repository::Key;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Range;
+use std::path::PathBuf;
 use std::str::FromStr;

 use utils::lsn::Lsn;

 // Note: Timeline::load_layer_map() relies on this sort order
-#[derive(PartialEq, Eq, Clone, Hash)]
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub struct DeltaFileName {
    pub key_range: Range<Key>,
    pub lsn_range: Range<Lsn>,
 }

-impl std::fmt::Debug for DeltaFileName {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("DeltaFileName")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn_range", &self.lsn_range)
-            .finish()
-    }
-}
-
 impl PartialOrd for DeltaFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
@@ -111,23 +102,12 @@ impl fmt::Display for DeltaFileName {
    }
 }

-#[derive(PartialEq, Eq, Clone, Hash)]
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
 pub struct ImageFileName {
    pub key_range: Range<Key>,
    pub lsn: Lsn,
 }

-impl std::fmt::Debug for ImageFileName {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("ImageFileName")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("lsn", &self.lsn)
-            .finish()
-    }
-}
-
 impl PartialOrd for ImageFileName {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
@@ -150,13 +130,6 @@ impl Ord for ImageFileName {
    }
 }

-impl ImageFileName {
-    pub fn lsn_as_range(&self) -> Range<Lsn> {
-        // Saves from having to copypaste this all over
-        self.lsn..(self.lsn + 1)
-    }
-}
-
 ///
 /// Represents the filename of an ImageLayer
 ///
@@ -204,32 +177,49 @@ impl fmt::Display for ImageFileName {
 pub enum LayerFileName {
    Image(ImageFileName),
    Delta(DeltaFileName),
+    #[cfg(test)]
+    Test(String),
 }

 impl LayerFileName {
    pub fn file_name(&self) -> String {
        match self {
-            Self::Image(fname) => fname.to_string(),
-            Self::Delta(fname) => fname.to_string(),
+            LayerFileName::Image(fname) => format!("{fname}"),
+            LayerFileName::Delta(fname) => format!("{fname}"),
+            #[cfg(test)]
+            LayerFileName::Test(fname) => fname.to_string(),
        }
    }
+    #[cfg(test)]
+    pub(crate) fn new_test(name: &str) -> LayerFileName {
+        LayerFileName::Test(name.to_owned())
+    }
 }

 impl From<ImageFileName> for LayerFileName {
    fn from(fname: ImageFileName) -> Self {
-        Self::Image(fname)
+        LayerFileName::Image(fname)
    }
 }
 impl From<DeltaFileName> for LayerFileName {
    fn from(fname: DeltaFileName) -> Self {
-        Self::Delta(fname)
+        LayerFileName::Delta(fname)
    }
 }

+// include a `/` in the name as an additional layer of robustness
+// because `/` chars are not allowed in UNIX paths
+#[cfg(test)]
+const LAYER_FILE_NAME_TEST_PREFIX: &str = "LAYER_FILE_NAME::test/";
+
 impl FromStr for LayerFileName {
    type Err = String;

    fn from_str(value: &str) -> Result<Self, Self::Err> {
+        #[cfg(test)]
+        if let Some(value) = value.strip_prefix(LAYER_FILE_NAME_TEST_PREFIX) {
+            return Ok(LayerFileName::Test(value.to_owned()));
+        }
        let delta = DeltaFileName::parse_str(value);
        let image = ImageFileName::parse_str(value);
        let ok = match (delta, image) {
@@ -238,8 +228,8 @@ impl FromStr for LayerFileName {
                    "neither delta nor image layer file name: {value:?}"
                ))
            }
-            (Some(delta), None) => Self::Delta(delta),
-            (None, Some(image)) => Self::Image(image),
+            (Some(delta), None) => LayerFileName::Delta(delta),
+            (None, Some(image)) => LayerFileName::Image(image),
            (Some(_), Some(_)) => unreachable!(),
        };
        Ok(ok)
@@ -252,8 +242,12 @@ impl serde::Serialize for LayerFileName {
        S: serde::Serializer,
    {
        match self {
-            Self::Image(fname) => serializer.serialize_str(&fname.to_string()),
-            Self::Delta(fname) => serializer.serialize_str(&fname.to_string()),
+            LayerFileName::Image(fname) => serializer.serialize_str(&format!("{}", fname)),
+            LayerFileName::Delta(fname) => serializer.serialize_str(&format!("{}", fname)),
+            #[cfg(test)]
+            LayerFileName::Test(t) => {
+                serializer.serialize_str(&format!("{LAYER_FILE_NAME_TEST_PREFIX}{t}"))
+            }
        }
    }
 }
@@ -276,3 +270,16 @@ impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor {
        v.parse().map_err(|e| E::custom(e))
    }
 }
+
+/// Helper enum to hold a PageServerConf, or a path
+///
+/// This is used by DeltaLayer and ImageLayer. Normally, this holds a reference to the
+/// global config, and paths to layer files are constructed using the tenant/timeline
+/// path from the config. But in the 'pageserver_binutils' binary, we need to construct a Layer
+/// struct for a file on disk, without having a page server running, so that we have no
+/// config. In that case, we use the Path variant to hold the full path to the file on
+/// disk.
+pub enum PathOrConf {
+    Path(PathBuf),
+    Conf(&'static PageServerConf),
+}
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -27,14 +27,13 @@ use crate::tenant::blob_io::{BlobCursor, BlobWriter, WriteBlobWriter};
 use crate::tenant::block_io::{BlockBuf, BlockReader, FileBlockReader};
 use crate::tenant::disk_btree::{DiskBtreeBuilder, DiskBtreeReader, VisitDirection};
 use crate::tenant::storage_layer::{
-    LayerAccessStats, PersistentLayer, ValueReconstructResult, ValueReconstructState,
+    PersistentLayer, ValueReconstructResult, ValueReconstructState,
 };
 use crate::virtual_file::VirtualFile;
 use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX};
 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
 use hex;
-use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::{self, File};
@@ -52,8 +51,8 @@ use utils::{
    lsn::Lsn,
 };

-use super::filename::{ImageFileName, LayerFileName};
-use super::{Layer, LayerAccessStatsReset, LayerIter, LayerResidenceStatus, PathOrConf};
+use super::filename::{ImageFileName, LayerFileName, PathOrConf};
+use super::{Layer, LayerIter};

 ///
 /// Header stored in the beginning of the file
@@ -95,13 +94,13 @@ impl From<&ImageLayer> for Summary {
    }
 }

-/// ImageLayer is the in-memory data structure associated with an on-disk image
-/// file.
 ///
-/// We keep an ImageLayer in memory for each file, in the LayerMap. If a layer
-/// is in "loaded" state, we have a copy of the index in memory, in 'inner'.
+/// ImageLayer is the in-memory data structure associated with an on-disk image
+/// file.  We keep an ImageLayer in memory for each file, in the LayerMap. If a
+/// layer is in "loaded" state, we have a copy of the index in memory, in 'inner'.
 /// Otherwise the struct is just a placeholder for a file that exists on disk,
 /// and it needs to be loaded before using it in queries.
+///
 pub struct ImageLayer {
    path_or_conf: PathOrConf,
    pub tenant_id: TenantId,
@@ -112,24 +111,9 @@ pub struct ImageLayer {
    // This entry contains an image of all pages as of this LSN
    pub lsn: Lsn,

-    access_stats: LayerAccessStats,
-
    inner: RwLock<ImageLayerInner>,
 }

-impl std::fmt::Debug for ImageLayer {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        use super::RangeDisplayDebug;
-
-        f.debug_struct("ImageLayer")
-            .field("key_range", &RangeDisplayDebug(&self.key_range))
-            .field("file_size", &self.file_size)
-            .field("lsn", &self.lsn)
-            .field("inner", &self.inner)
-            .finish()
-    }
-}
-
 pub struct ImageLayerInner {
    /// If false, the 'index' has not been loaded into memory yet.
    loaded: bool,
@@ -142,16 +126,6 @@ pub struct ImageLayerInner {
    file: Option<FileBlockReader<VirtualFile>>,
 }

-impl std::fmt::Debug for ImageLayerInner {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_struct("ImageLayerInner")
-            .field("loaded", &self.loaded)
-            .field("index_start_blk", &self.index_start_blk)
-            .field("index_root_blk", &self.index_root_blk)
-            .finish()
-    }
-}
-
 impl Layer for ImageLayer {
    fn get_key_range(&self) -> Range<Key> {
        self.key_range.clone()
@@ -180,7 +154,7 @@ impl Layer for ImageLayer {
            return Ok(());
        }

-        let inner = self.load(LayerAccessKind::Dump, ctx)?;
+        let inner = self.load(ctx)?;
        let file = inner.file.as_ref().unwrap();
        let tree_reader =
            DiskBtreeReader::<_, KEY_SIZE>::new(inner.index_start_blk, inner.index_root_blk, file);
@@ -207,7 +181,7 @@ impl Layer for ImageLayer {
        assert!(lsn_range.start >= self.lsn);
        assert!(lsn_range.end >= self.lsn);

-        let inner = self.load(LayerAccessKind::GetValueReconstructData, ctx)?;
+        let inner = self.load(ctx)?;

        let file = inner.file.as_ref().unwrap();
        let tree_reader = DiskBtreeReader::new(inner.index_start_blk, inner.index_root_blk, file);
@@ -261,23 +235,6 @@ impl PersistentLayer for ImageLayer {
    fn file_size(&self) -> Option<u64> {
        Some(self.file_size)
    }
-
-    fn info(&self, reset: LayerAccessStatsReset) -> HistoricLayerInfo {
-        let layer_file_name = self.filename().file_name();
-        let lsn_range = self.get_lsn_range();
-
-        HistoricLayerInfo::Image {
-            layer_file_name,
-            layer_file_size: Some(self.file_size),
-            lsn_start: lsn_range.start,
-            remote: false,
-            access_stats: self.access_stats.as_api_model(reset),
-        }
-    }
-
-    fn access_stats(&self) -> &LayerAccessStats {
-        &self.access_stats
-    }
 }

 impl ImageLayer {
@@ -315,13 +272,7 @@ impl ImageLayer {
    /// Open the underlying file and read the metadata into memory, if it's
    /// not loaded already.
    ///
-    fn load(
-        &self,
-        access_kind: LayerAccessKind,
-        ctx: &RequestContext,
-    ) -> Result<RwLockReadGuard<ImageLayerInner>> {
-        self.access_stats
-            .record_access(access_kind, ctx.task_kind());
+    fn load(&self, _ctx: &RequestContext) -> Result<RwLockReadGuard<ImageLayerInner>> {
        loop {
            // Quick exit if already loaded
            let inner = self.inner.read().unwrap();
@@ -401,7 +352,6 @@ impl ImageLayer {
        tenant_id: TenantId,
        filename: &ImageFileName,
        file_size: u64,
-        access_stats: LayerAccessStats,
    ) -> ImageLayer {
        ImageLayer {
            path_or_conf: PathOrConf::Conf(conf),
@@ -410,7 +360,6 @@ impl ImageLayer {
            key_range: filename.key_range.clone(),
            lsn: filename.lsn,
            file_size,
-            access_stats,
            inner: RwLock::new(ImageLayerInner {
                loaded: false,
                file: None,
@@ -438,7 +387,6 @@ impl ImageLayer {
            key_range: summary.key_range,
            lsn: summary.lsn,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_loading_layer(LayerResidenceStatus::Resident),
            inner: RwLock::new(ImageLayerInner {
                file: None,
                loaded: false,
@@ -598,7 +546,6 @@ impl ImageLayerWriterInner {
            key_range: self.key_range.clone(),
            lsn: self.lsn,
            file_size: metadata.len(),
-            access_stats: LayerAccessStats::for_new_layer_file(),
            inner: RwLock::new(ImageLayerInner {
                loaded: false,
                file: None,
--- a/Show More
+++ b/Show More