Hacky support for a new connection router in link-proxy

Add a hack to support new way of accessing compute without using a NodePort. Now to access compute in cross-k8s setup (console->compute and link-proxy->compute) we need to connect to the pg_sni_router service using a TLS. Destination compute address is encoded in domain/SNI. However, for link-proxy it is hard add support for outgoing TLS connections as our trick with stealing stream from tokio-postgres doesn't work with TLS. So set sni_host option and use unencrupted connection instead. Once we add encryption support for outgoing connections to the proxy, we can remove this hack.
clean up logging around on-demand downloads (#4030 )
2026-05-17 13:10:38 +00:00 · 2023-04-27 14:50:52 +03:00 · 2023-04-27 11:54:48 +02:00 · 2023-04-27 09:50:52 +04:00 · 2023-04-26 17:47:54 +03:00 · 2023-04-26 15:18:26 +02:00
130 changed files with 5203 additions and 2401 deletions
--- a/.config/hakari.toml
+++ b/.config/hakari.toml
@@ -4,7 +4,7 @@
 hakari-package = "workspace_hack"

 # Format for `workspace-hack = ...` lines in other Cargo.tomls. Requires cargo-hakari 0.9.8 or above.
-dep-format-version = "3"
+dep-format-version = "4"

 # Setting workspace.resolver = "2" in the root Cargo.toml is HIGHLY recommended.
 # Hakari works much better with the new feature resolver.
--- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md
+++ b/.github/PULL_REQUEST_TEMPLATE/release-pr.md
@@ -10,6 +10,7 @@
 <!-- List everything that should be done **before** release, any issues / setting changes / etc -->

 ### Checklist after release
+- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them).
 - [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files))
 - [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel
 - [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true)
--- a/.github/actions/allure-report/action.yml
+++ b/.github/actions/allure-report/action.yml
@@ -45,12 +45,12 @@ runs:
      shell: bash -euxo pipefail {0}
      run: |
        if [ "${{ inputs.action }}" != "store" ] && [ "${{ inputs.action }}" != "generate" ]; then
-          echo 2>&1 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
+          echo >&2 "Unknown inputs.action type '${{ inputs.action }}'; allowed 'generate' or 'store' only"
          exit 1
        fi

        if [ -z "${{ inputs.test_selection }}" ] && [ "${{ inputs.action }}" == "store" ]; then
-          echo 2>&1 "inputs.test_selection must be set for 'store' action"
+          echo >&2 "inputs.test_selection must be set for 'store' action"
          exit 2
        fi

--- a/.github/actions/download/action.yml
+++ b/.github/actions/download/action.yml
@@ -37,7 +37,7 @@ runs:
            echo 'SKIPPED=true' >> $GITHUB_OUTPUT
            exit 0
          else
-            echo 2>&1 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
+            echo >&2 "Neither s3://${BUCKET}/${PREFIX}/${FILENAME} nor its version from previous attempts exist"
            exit 1
          fi
        fi
--- a/.github/actions/neon-branch-create/action.yml
+++ b/.github/actions/neon-branch-create/action.yml
@@ -58,7 +58,7 @@ runs:
        done

        if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
-          echo 2>&1 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
+          echo >&2 "Failed to create branch after 10 attempts, the latest response was: ${branch}"
          exit 1
        fi

@@ -122,7 +122,7 @@ runs:
        done

        if [ -z "${password}" ] || [ "${password}" == "null" ]; then
-          echo 2>&1 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
+          echo >&2 "Failed to reset password after 10 attempts, the latest response was: ${reset_password}"
          exit 1
        fi

--- a/.github/actions/neon-branch-delete/action.yml
+++ b/.github/actions/neon-branch-delete/action.yml
@@ -48,7 +48,7 @@ runs:
        done

        if [ -z "${branch_id}" ] || [ "${branch_id}" == "null" ]; then
-          echo 2>&1 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
+          echo >&2 "Failed to delete branch after 10 attempts, the latest response was: ${deleted_branch}"
          exit 1
        fi
      env:
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -202,7 +202,7 @@ runs:
        prefix: latest

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: store
--- a/.github/actions/upload/action.yml
+++ b/.github/actions/upload/action.yml
@@ -23,7 +23,7 @@ runs:
        mkdir -p $(dirname $ARCHIVE)

        if [ -f ${ARCHIVE} ]; then
-          echo 2>&1 "File ${ARCHIVE} already exist. Something went wrong before"
+          echo >&2 "File ${ARCHIVE} already exist. Something went wrong before"
          exit 1
        fi

@@ -33,10 +33,10 @@ runs:
        elif [ -f ${SOURCE} ]; then
          time tar -cf ${ARCHIVE} --zstd ${SOURCE}
        elif ! ls ${SOURCE} > /dev/null 2>&1; then
-          echo 2>&1 "${SOURCE} does not exist"
+          echo >&2 "${SOURCE} does not exist"
          exit 2
        else
-          echo 2>&1 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
+          echo >&2 "${SOURCE} is neither a directory nor a file, do not know how to handle it"
          exit 3
        fi

--- a/.github/ansible/prod.ap-southeast-1.hosts.yaml
+++ b/.github/ansible/prod.ap-southeast-1.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "10m"
          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.eu-central-1.hosts.yaml
+++ b/.github/ansible/prod.eu-central-1.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "10m"
          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.us-east-1.hosts.yaml
+++ b/.github/ansible/prod.us-east-1.hosts.yaml
@@ -0,0 +1,50 @@
+storage:
+  vars:
+    bucket_name: neon-prod-storage-us-east-1
+    bucket_region: us-east-1
+    console_mgmt_base_url: http://neon-internal-api.aws.neon.tech
+    broker_endpoint: http://storage-broker-lb.theta.us-east-1.internal.aws.neon.tech:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 85 # TODO: decrease to 80 after all pageservers are below 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "10m"
+          threshold: &default_eviction_threshold "24h"
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: us-east-1
+    ansible_aws_ssm_bucket_name: neon-prod-storage-us-east-1
+    console_region_id: aws-us-east-1
+    sentry_environment: production
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.us-east-1.aws.neon.tech:
+          ansible_host: i-085222088b0d2e0c7
+        pageserver-1.us-east-1.aws.neon.tech:
+          ansible_host: i-0969d4f684d23a21e
+        pageserver-2.us-east-1.aws.neon.tech:
+          ansible_host: i-05dee87895da58dad
+
+    safekeepers:
+      hosts:
+        safekeeper-0.us-east-1.aws.neon.tech:
+          ansible_host: i-04ce739e88793d864
+        safekeeper-1.us-east-1.aws.neon.tech:
+          ansible_host: i-0e9e6c9227fb81410
+        safekeeper-2.us-east-1.aws.neon.tech:
+          ansible_host: i-072f4dd86a327d52f
--- a/.github/ansible/prod.us-east-2.hosts.yaml
+++ b/.github/ansible/prod.us-east-2.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "10m"
          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "10m"
          threshold: &default_eviction_threshold "24h"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
@@ -34,7 +34,7 @@ storage:
    pageservers:
      hosts:
        pageserver-0.us-west-2.aws.neon.tech:
-          ansible_host: i-0d9f6dfae0e1c780d 
+          ansible_host: i-0d9f6dfae0e1c780d
        pageserver-1.us-west-2.aws.neon.tech:
          ansible_host: i-0c834be1dddba8b3f
        pageserver-2.us-west-2.aws.neon.tech:
@@ -49,5 +49,5 @@ storage:
        safekeeper-1.us-west-2.aws.neon.tech:
          ansible_host: i-074682f9d3c712e7c
        safekeeper-2.us-west-2.aws.neon.tech:
-          ansible_host: i-042b7efb1729d7966 
-          
+          ansible_host: i-042b7efb1729d7966
+
--- a/.github/ansible/staging.eu-central-1.hosts.yaml
+++ b/.github/ansible/staging.eu-central-1.hosts.yaml
@@ -0,0 +1,47 @@
+storage:
+  vars:
+    bucket_name: neon-dev-storage-eu-central-1
+    bucket_region: eu-central-1
+    # We only register/update storage in one preview console and manually copy to other instances
+    console_mgmt_base_url: http://neon-internal-api.helium.aws.neon.build
+    broker_endpoint: http://storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build:50051
+    pageserver_config_stub:
+      pg_distrib_dir: /usr/local
+      metric_collection_endpoint: http://neon-internal-api.helium.aws.neon.build/billing/api/v1/usage_events
+      metric_collection_interval: 10min
+      disk_usage_based_eviction:
+        max_usage_pct: 80
+        min_avail_bytes: 0
+        period: "10s"
+      tenant_config:
+        eviction_policy:
+          kind: "LayerAccessThreshold"
+          period: "20m"
+          threshold: &default_eviction_threshold "20m"
+      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+      remote_storage:
+        bucket_name: "{{ bucket_name }}"
+        bucket_region: "{{ bucket_region }}"
+        prefix_in_bucket: "pageserver/v1"
+    safekeeper_s3_prefix: safekeeper/v1/wal
+    hostname_suffix: ""
+    remote_user: ssm-user
+    ansible_aws_ssm_region: eu-central-1
+    ansible_aws_ssm_bucket_name: neon-dev-storage-eu-central-1
+    console_region_id: aws-eu-central-1
+    sentry_environment: staging
+
+  children:
+    pageservers:
+      hosts:
+        pageserver-0.eu-central-1.aws.neon.build:
+          ansible_host: i-011f93ec26cfba2d4
+
+    safekeepers:
+      hosts:
+        safekeeper-0.eu-central-1.aws.neon.build:
+          ansible_host: i-0ff026d27babf8ddd
+        safekeeper-1.eu-central-1.aws.neon.build:
+          ansible_host: i-03983a49ee54725d9
+        safekeeper-2.eu-central-1.aws.neon.build:
+          ansible_host: i-0bd025ecdb61b0db3
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "20m"
          threshold: &default_eviction_threshold "20m"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -17,7 +17,7 @@ storage:
          kind: "LayerAccessThreshold"
          period: "20m"
          threshold: &default_eviction_threshold "20m"
-      evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
+        evictions_low_residence_duration_metric_threshold: *default_eviction_threshold
      remote_storage:
        bucket_name: "{{ bucket_name }}"
        bucket_region: "{{ bucket_region }}"
@@ -48,9 +48,9 @@ storage:
      hosts:
        safekeeper-0.us-east-2.aws.neon.build:
          ansible_host: i-027662bd552bf5db0
-        safekeeper-1.us-east-2.aws.neon.build:
-          ansible_host: i-0171efc3604a7b907
        safekeeper-2.us-east-2.aws.neon.build:
          ansible_host: i-0de0b03a51676a6ce
+        safekeeper-3.us-east-2.aws.neon.build:
+          ansible_host: i-05f8ba2cda243bd18
        safekeeper-99.us-east-2.aws.neon.build:
          ansible_host: i-0d61b6a2ea32028d5
--- a/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
+++ b/.github/helm-values/dev-eu-central-1-alpha.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: staging
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.alpha.eu-central-1.internal.aws.neon.build
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "staging"
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800

 image:
@@ -23,6 +23,7 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
  domain: "*.eu-west-1.aws.neon.build"
+  otelExporterOtlpEndpoint: "https://otel-collector.zeta.eu-west-1.internal.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-link.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "https://console.stage.neon.tech/authenticate_proxy_request/"
  uri: "https://console.stage.neon.tech/psql_session/"
  domain: "pg.neon.build"
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
  sentryEnvironment: "staging"
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
  metricCollectionInterval: "1min"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -1,6 +1,22 @@
 # Helm chart values for neon-proxy-scram.
 # This is a YAML-formatted file.

+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+
 image:
  repository: neondatabase/neon

@@ -8,6 +24,7 @@ settings:
  authBackend: "console"
  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
  domain: "*.cloud.stage.neon.tech"
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -7,15 +7,16 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800

+
 image:
  repository: neondatabase/neon

@@ -24,6 +25,7 @@ settings:
  authEndpoint: "http://neon-internal-api.aws.neon.build/management/api/v2"
  domain: "*.us-east-2.aws.neon.build"
  extraDomains: ["*.us-east-2.postgres.zenith.tech", "*.us-east-2.retooldb-staging.com"]
+  otelExporterOtlpEndpoint: "https://otel-collector.beta.us-east-2.internal.aws.neon.build"
  sentryEnvironment: "staging"
  wssPort: 8443
  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.build/billing/api/v1/usage_events"
--- a/.github/helm-values/preview-template.neon-proxy-scram.yaml
+++ b/.github/helm-values/preview-template.neon-proxy-scram.yaml
@@ -0,0 +1,67 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/management/api/v2"
+  domain: "*.cloud.${PREVIEW_NAME}.aws.neon.build"
+  sentryEnvironment: "staging"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.${PREVIEW_NAME}.aws.neon.build/billing/api/v1/usage_events"
+  metricCollectionInterval: "1min"
+
+# -- Additional labels for neon-proxy pods
+podLabels:
+  neon_service: proxy-scram
+  neon_env: test
+  neon_region: ${PREVIEW_NAME}.eu-central-1
+
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: cloud.${PREVIEW_NAME}.aws.neon.build
+  httpsPort: 443
+
+#metrics:
+#  enabled: true
+#  serviceMonitor:
+#    enabled: true
+#    selector:
+#      release: kube-prometheus-stack
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800


--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800


--- a/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-proxy-scram.yaml
@@ -0,0 +1,69 @@
+# Helm chart values for neon-proxy-scram.
+# This is a YAML-formatted file.
+
+deploymentStrategy:
+  type: RollingUpdate
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 50%
+
+# Delay the kill signal by 5 minutes (5 * 60)
+# The pod(s) will stay in Terminating, keeps the existing connections
+# but doesn't receive new ones
+containerLifecycle:
+  preStop:
+    exec:
+      command: ["/bin/sh", "-c", "sleep 300"]
+terminationGracePeriodSeconds: 604800
+
+image:
+  repository: neondatabase/neon
+
+settings:
+  authBackend: "console"
+  authEndpoint: "http://neon-internal-api.aws.neon.tech/management/api/v2"
+  domain: "*.us-east-1.aws.neon.tech"
+  # *.us-east-1.retooldb.com hasn't been delegated yet.
+  extraDomains: ["*.us-east-1.postgres.vercel-storage.com"]
+  sentryEnvironment: "production"
+  wssPort: 8443
+  metricCollectionEndpoint: "http://neon-internal-api.aws.neon.tech/billing/api/v1/usage_events"
+  metricCollectionInterval: "10min"
+
+podLabels:
+  neon_service: proxy-scram
+  neon_env: prod
+  neon_region: us-east-1
+
+exposedService:
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
+    external-dns.alpha.kubernetes.io/hostname: us-east-1.aws.neon.tech
+  httpsPort: 443
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-proxy.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-proxy-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-proxy
+        app.kubernetes.io/instance: "{{ include \"neon-proxy.fullname\" . }}"
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-proxy"
+      endpoints:
+        - port: http
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
--- a/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.neon-storage-broker.yaml
@@ -0,0 +1,52 @@
+# Helm chart values for neon-storage-broker
+podLabels:
+  neon_env: production
+  neon_service: storage-broker
+
+# Use L4 LB
+service:
+  # service.annotations -- Annotations to add to the service
+  annotations:
+    service.beta.kubernetes.io/aws-load-balancer-type: external  # use newer AWS Load Balancer Controller
+    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
+    service.beta.kubernetes.io/aws-load-balancer-scheme: internal  # deploy LB to private subnet
+    # assign service to this name at external-dns
+    external-dns.alpha.kubernetes.io/hostname: storage-broker-lb.theta.us-east-1.internal.aws.neon.tech
+  # service.type -- Service type
+  type: LoadBalancer
+  # service.port -- broker listen port
+  port: 50051
+
+ingress:
+  enabled: false
+
+metrics:
+  enabled: false
+
+extraManifests:
+  - apiVersion: operator.victoriametrics.com/v1beta1
+    kind: VMServiceScrape
+    metadata:
+      name: "{{ include \"neon-storage-broker.fullname\" . }}"
+      labels:
+        helm.sh/chart: neon-storage-broker-{{ .Chart.Version }}
+        app.kubernetes.io/name: neon-storage-broker
+        app.kubernetes.io/instance: neon-storage-broker
+        app.kubernetes.io/version: "{{ .Chart.AppVersion }}"
+        app.kubernetes.io/managed-by: Helm
+      namespace: "{{ .Release.Namespace }}"
+    spec:
+      selector:
+        matchLabels:
+          app.kubernetes.io/name: "neon-storage-broker"
+      endpoints:
+        - port: broker
+          path: /metrics
+          interval: 10s
+          scrapeTimeout: 10s
+      namespaceSelector:
+        matchNames:
+          - "{{ .Release.Namespace }}"
+
+settings:
+  sentryEnvironment: "production"
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800


--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram-legacy.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800


--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -7,13 +7,13 @@ deploymentStrategy:
    maxSurge: 100%
    maxUnavailable: 50%

-# Delay the kill signal by 7 days (7 * 24 * 60 * 60)
+# Delay the kill signal by 5 minutes (5 * 60)
 # The pod(s) will stay in Terminating, keeps the existing connections
 # but doesn't receive new ones
 containerLifecycle:
  preStop:
    exec:
-      command: ["/bin/sh", "-c", "sleep 604800"]
+      command: ["/bin/sh", "-c", "sleep 300"]
 terminationGracePeriodSeconds: 604800


--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -30,7 +30,7 @@ defaults:

 concurrency:
  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true

 jobs:
@@ -42,7 +42,7 @@ jobs:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: "neon-staging"

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -92,7 +92,7 @@ jobs:
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: generate
@@ -174,7 +174,7 @@ jobs:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -226,7 +226,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'neon-captest-new', 'neon-captest-freetier', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -282,7 +282,7 @@ jobs:
        api_key: ${{ secrets.NEON_STAGING_API_KEY }}

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: generate
@@ -305,7 +305,7 @@ jobs:
    #
    # *_CLICKBENCH_CONNSTR: Genuine ClickBench DB with ~100M rows
    # *_CLICKBENCH_10M_CONNSTR: DB with the first 10M rows of ClickBench DB
-    if: success() || failure()
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, pgbench-compare ]

    strategy:
@@ -317,7 +317,7 @@ jobs:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -356,7 +356,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -379,7 +379,7 @@ jobs:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: generate
@@ -401,7 +401,7 @@ jobs:
    # We might change it after https://github.com/neondatabase/neon/issues/2900.
    #
    # *_TPCH_S10_CONNSTR: DB generated with scale factor 10 (~10 GB)
-    if: success() || failure()
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, clickbench-compare ]

    strategy:
@@ -413,7 +413,7 @@ jobs:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -452,7 +452,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_TPCH_S10_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -475,7 +475,7 @@ jobs:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: generate
@@ -491,7 +491,7 @@ jobs:
        SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}

  user-examples-compare:
-    if: success() || failure()
+    if: ${{ !cancelled() }}
    needs: [ generate-matrices, tpch-compare ]

    strategy:
@@ -503,7 +503,7 @@ jobs:
      DEFAULT_PG_VERSION: 14
      TEST_OUTPUT: /tmp/test_output
      BUILD_TYPE: remote
-      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref == 'refs/heads/main' ) }}
+      SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
      PLATFORM: ${{ matrix.platform }}

    runs-on: [ self-hosted, us-east-2, x64 ]
@@ -542,7 +542,7 @@ jobs:
            CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }}
            ;;
          *)
-            echo 2>&1 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
+            echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'"
            exit 1
            ;;
        esac
@@ -565,7 +565,7 @@ jobs:
        BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}

    - name: Create Allure report
-      if: success() || failure()
+      if: ${{ !cancelled() }}
      uses: ./.github/actions/allure-report
      with:
        action: generate
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -13,7 +13,7 @@ defaults:

 concurrency:
  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true

 env:
@@ -368,7 +368,7 @@ jobs:
          build_type: ${{ matrix.build_type }}
          test_selection: performance
          run_in_parallel: false
-          save_perf_report: ${{ github.ref == 'refs/heads/main' }}
+          save_perf_report: ${{ github.ref_name == 'main' }}
        env:
          VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
          PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
@@ -541,7 +541,7 @@ jobs:
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
      options: --init
-    needs: [ push-docker-hub, tag ]
+    needs: [ promote-images, tag ]
    steps:
      - name: Set PR's status to pending and request a remote CI test
        run: |
@@ -584,8 +584,7 @@ jobs:
  neon-image:
    runs-on: [ self-hosted, gen3, large ]
    needs: [ tag ]
-    # https://github.com/GoogleContainerTools/kaniko/issues/2005
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    defaults:
      run:
        shell: sh -eu {0}
@@ -597,11 +596,32 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build neon
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.sha }}
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/neon:${{needs.tag.outputs.build-tag}}

      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
@@ -652,7 +672,7 @@ jobs:
  compute-tools-image:
    runs-on: [ self-hosted, gen3, large ]
    needs: [ tag ]
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    defaults:
      run:
        shell: sh -eu {0}
@@ -661,18 +681,41 @@ jobs:
      - name: Checkout
        uses: actions/checkout@v1 # v3 won't work with kaniko

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build compute tools
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache --context . --build-arg GIT_VERSION=${{ github.sha }} --dockerfile Dockerfile.compute-tools --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.sha }}
+                           --dockerfile Dockerfile.compute-tools
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}

+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

  compute-node-image:
    runs-on: [ self-hosted, gen3, large ]
-    container: gcr.io/kaniko-project/executor:v1.7.0-debug
+    container: gcr.io/kaniko-project/executor:v1.9.2-debug
    needs: [ tag ]
    strategy:
      fail-fast: false
@@ -689,12 +732,36 @@ jobs:
          submodules: true
          fetch-depth: 0

-      - name: Configure ECR login
-        run: echo "{\"credsStore\":\"ecr-login\"}" > /kaniko/.docker/config.json
+      - name: Configure ECR and Docker Hub login
+        run: |
+          DOCKERHUB_AUTH=$(echo -n "${{ secrets.NEON_DOCKERHUB_USERNAME }}:${{ secrets.NEON_DOCKERHUB_PASSWORD }}" | base64)
+          echo "::add-mask::${DOCKERHUB_AUTH}"
+
+          cat <<-EOF > /kaniko/.docker/config.json
+            {
+              "auths": {
+                "https://index.docker.io/v1/": {
+                  "auth": "${DOCKERHUB_AUTH}"
+                }
+              },
+              "credHelpers": {
+                "369495373322.dkr.ecr.eu-central-1.amazonaws.com": "ecr-login"
+              }
+            }
+          EOF

      - name: Kaniko build compute node with extensions
-        run: /kaniko/executor --reproducible --snapshotMode=redo --skip-unused-stages --cache=true --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache  --context . --build-arg GIT_VERSION=${{ github.sha }} --build-arg PG_VERSION=${{ matrix.version }} --dockerfile Dockerfile.compute-node --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+        run:
+          /kaniko/executor --reproducible --snapshot-mode=redo --skip-unused-stages --cache=true
+                           --cache-repo 369495373322.dkr.ecr.eu-central-1.amazonaws.com/cache
+                           --context .
+                           --build-arg GIT_VERSION=${{ github.sha }}
+                           --build-arg PG_VERSION=${{ matrix.version }}
+                           --dockerfile Dockerfile.compute-node
+                           --destination 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}
+                           --destination neondatabase/compute-node-${{ matrix.version }}:${{needs.tag.outputs.build-tag}}

+      # Cleanup script fails otherwise - rm: cannot remove '/nvme/actions-runner/_work/_temp/_github_home/.ecr': Permission denied
      - name: Cleanup ECR folder
        run: rm -rf ~/.ecr

@@ -786,13 +853,11 @@ jobs:
    runs-on: [ self-hosted, gen3, small ]
    needs: [ tag, test-images, vm-compute-node-image ]
    container: golang:1.19-bullseye
-    if: github.event_name != 'workflow_dispatch'
+    # Don't add if-condition here.
+    # The job should always be run because we have dependant other jobs that shouldn't be skipped

    steps:
      - name: Install Crane & ECR helper
-        if: |
-          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
        run: |
          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
@@ -802,10 +867,15 @@ jobs:
          mkdir /github/home/.docker/
          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json

+      - name: Copy vm-compute-node images to Docker Hub
+        run: |
+          crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
+          crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
+
      - name: Add latest tag to images
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
+           github.event_name != 'workflow_dispatch'
        run: |
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} latest
@@ -814,50 +884,10 @@ jobs:
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} latest
          crane tag 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} latest

-      - name: Cleanup ECR folder
-        run: rm -rf ~/.ecr
-
-  push-docker-hub:
-    runs-on: [ self-hosted, dev, x64 ]
-    needs: [ promote-images, tag ]
-    container: golang:1.19-bullseye
-
-    steps:
-      - name: Install Crane & ECR helper
-        run: |
-          go install github.com/google/go-containerregistry/cmd/crane@31786c6cbb82d6ec4fb8eb79cd9387905130534e # v0.11.0
-          go install github.com/awslabs/amazon-ecr-credential-helper/ecr-login/cli/docker-credential-ecr-login@69c85dc22db6511932bbf119e1a0cc5c90c69a7f # v0.6.0
-
-      - name: Configure ECR login
-        run: |
-          mkdir /github/home/.docker/
-          echo "{\"credsStore\":\"ecr-login\"}" > /github/home/.docker/config.json
-
-      - name: Pull neon image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} neon
-
-      - name: Pull compute tools image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} compute-tools
-
-      - name: Pull compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v14:${{needs.tag.outputs.build-tag}} compute-node-v14
-
-      - name: Pull vm compute node v14 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v14:${{needs.tag.outputs.build-tag}} vm-compute-node-v14
-
-      - name: Pull compute node v15 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-node-v15:${{needs.tag.outputs.build-tag}} compute-node-v15
-
-      - name: Pull vm compute node v15 image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/vm-compute-node-v15:${{needs.tag.outputs.build-tag}} vm-compute-node-v15
-
-      - name: Pull rust image from ECR
-        run: crane pull 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned rust
-
      - name: Push images to production ECR
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
-          github.event_name != 'workflow_dispatch'
+           github.event_name != 'workflow_dispatch'
        run: |
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/neon:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/neon:latest
          crane copy 369495373322.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:${{needs.tag.outputs.build-tag}} 093970136003.dkr.ecr.eu-central-1.amazonaws.com/compute-tools:latest
@@ -872,28 +902,12 @@ jobs:
          echo "" > /github/home/.docker/config.json
          crane auth login -u ${{ secrets.NEON_DOCKERHUB_USERNAME }} -p ${{ secrets.NEON_DOCKERHUB_PASSWORD }} index.docker.io

-      - name: Push neon image to Docker Hub
-        run: crane push neon neondatabase/neon:${{needs.tag.outputs.build-tag}}
+      - name: Push vm-compute-node to Docker Hub
+        run: |
+          crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
+          crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}

-      - name: Push compute tools image to Docker Hub
-        run: crane push compute-tools neondatabase/compute-tools:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node v14 image to Docker Hub
-        run: crane push compute-node-v14 neondatabase/compute-node-v14:${{needs.tag.outputs.build-tag}}
-
-      - name: Push vm compute node v14 image to Docker Hub
-        run: crane push vm-compute-node-v14 neondatabase/vm-compute-node-v14:${{needs.tag.outputs.build-tag}}
-
-      - name: Push compute node v15 image to Docker Hub
-        run: crane push compute-node-v15 neondatabase/compute-node-v15:${{needs.tag.outputs.build-tag}}
-
-      - name: Push vm compute node v15 image to Docker Hub
-        run: crane push vm-compute-node-v15 neondatabase/vm-compute-node-v15:${{needs.tag.outputs.build-tag}}
-
-      - name: Push rust image to Docker Hub
-        run: crane push rust neondatabase/rust:pinned
-
-      - name: Add latest tag to images in Docker Hub
+      - name: Push latest tags to Docker Hub
        if: |
          (github.ref_name == 'main' || github.ref_name == 'release') &&
          github.event_name != 'workflow_dispatch'
@@ -913,7 +927,7 @@ jobs:
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
    # We need both storage **and** compute images for deploy, because control plane picks the compute version based on the storage version.
    # If it notices a fresh storage it may bump the compute version. And if compute image failed to build it may break things badly
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ promote-images, tag, regress-tests ]
    if: |
      contains(github.event.pull_request.labels.*.name, 'deploy-test-storage') &&
      github.event_name != 'workflow_dispatch'
@@ -947,7 +961,7 @@ jobs:
  deploy:
    runs-on: [ self-hosted, gen3, small ]
    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ promote-images, tag, regress-tests ]
    if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
    steps:
      - name: Fix git ownership
@@ -984,7 +998,7 @@ jobs:
    container:
      image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/rust:pinned
      options: --init
-    needs: [ push-docker-hub, tag, regress-tests ]
+    needs: [ promote-images, tag, regress-tests ]
    if: github.ref_name == 'release' && github.event_name != 'workflow_dispatch'
    steps:
      - name: Promote compatibility snapshot for the release
@@ -1007,7 +1021,7 @@ jobs:

            S3_KEY=$(aws s3api list-objects-v2 --bucket ${BUCKET} --prefix ${OLD_PREFIX} | jq -r '.Contents[].Key' | grep ${FILENAME} | sort --version-sort | tail -1 || true)
            if [ -z "${S3_KEY}" ]; then
-              echo 2>&1 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist"
+              echo >&2 "Neither s3://${BUCKET}/${OLD_PREFIX}/${FILENAME} nor its version from previous attempts exist"
              exit 1
            fi

--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -48,7 +48,8 @@ jobs:
        shell: bash
    strategy:
      matrix:
-        target_region: [ eu-west-1, us-east-2 ]
+        # TODO(sergey): Fix storage deploy in eu-central-1
+        target_region: [ eu-west-1, us-east-2]
    environment:
      name: dev-${{ matrix.target_region }}
    steps:
@@ -133,6 +134,53 @@ jobs:
  
      - name: Cleanup helm folder
        run: rm -rf ~/.cache
+
+  deploy-preview-proxy-new:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployProxy
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  eu-central-1
+            target_cluster: dev-eu-central-1-alpha
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1-node16
+        with:
+          role-to-assume: arn:aws:iam::369495373322:role/github-runner
+          aws-region: eu-central-1
+          role-skip-session-tagging: true
+          role-duration-seconds: 1800
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Re-deploy preview proxies
+        run: |
+          DOCKER_TAG=${{ inputs.dockerTag }}
+          for PREVIEW_NAME in helium argon krypton xenon radon oganesson hydrogen nitrogen oxygen fluorine chlorine; do
+            export PREVIEW_NAME
+            envsubst <.github/helm-values/preview-template.neon-proxy-scram.yaml >preview-${PREVIEW_NAME}.neon-proxy-scram.yaml
+            helm upgrade neon-proxy-scram-${PREVIEW_NAME} neondatabase/neon-proxy --namespace neon-proxy-${PREVIEW_NAME} --create-namespace --install --atomic -f preview-${PREVIEW_NAME}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          done
+
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
  
  deploy-storage-broker-new:
    runs-on: [ self-hosted, gen3, small ]
@@ -148,6 +196,8 @@ jobs:
            target_cluster: dev-us-east-2-beta
          - target_region:  eu-west-1
            target_cluster: dev-eu-west-1-zeta
+          - target_region:  eu-central-1
+            target_cluster: dev-eu-central-1-alpha
    environment:
      name: dev-${{ matrix.target_region }}
    steps:
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -49,7 +49,7 @@ jobs:
        shell: bash
    strategy:
      matrix:
-        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
+        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1, us-east-1 ]
    environment:
      name: prod-${{ matrix.target_region }}
    steps:
@@ -97,6 +97,10 @@ jobs:
            target_cluster: prod-ap-southeast-1-epsilon
            deploy_link_proxy: false
            deploy_legacy_scram_proxy: false
+          - target_region: us-east-1
+            target_cluster: prod-us-east-1-theta
+            deploy_link_proxy: false
+            deploy_legacy_scram_proxy: false
    environment:
      name: prod-${{ matrix.target_region }}
    steps:
@@ -147,6 +151,8 @@ jobs:
            target_cluster: prod-eu-central-1-gamma
          - target_region: ap-southeast-1
            target_cluster: prod-ap-southeast-1-epsilon
+          - target_region: us-east-1
+            target_cluster: prod-us-east-1-theta
    environment:
      name: prod-${{ matrix.target_region }}
    steps:
--- a/.github/workflows/neon_extra_builds.yml
+++ b/.github/workflows/neon_extra_builds.yml
@@ -12,7 +12,7 @@ defaults:

 concurrency:
  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true

 env:
--- a/.github/workflows/pg_clients.yml
+++ b/.github/workflows/pg_clients.yml
@@ -14,7 +14,7 @@ on:

 concurrency:
  # Allow only one workflow per any non-`main` branch.
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }}
+  group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
  cancel-in-progress: true

 jobs:
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,10 +24,10 @@ atty = "0.2.14"
 aws-config = { version = "0.51.0", default-features = false, features=["rustls"] }
 aws-sdk-s3 = "0.21.0"
 aws-smithy-http = "0.51.0"
-aws-types = "0.51.0"
+aws-types = "0.55"
 base64 = "0.13.0"
 bincode = "1.3"
-bindgen = "0.61"
+bindgen = "0.65"
 bstr = "1.0"
 byteorder = "1.4"
 bytes = "1.0"
@@ -50,7 +50,7 @@ git-version = "0.3"
 hashbrown = "0.13"
 hashlink = "0.8.1"
 hex = "0.4"
-hex-literal = "0.3"
+hex-literal = "0.4"
 hmac = "0.12.1"
 hostname = "0.3.1"
 humantime = "2.1"
@@ -80,18 +80,18 @@ reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"
 reqwest-tracing = { version = "0.4.0", features = ["opentelemetry_0_18"] }
 reqwest-middleware = "0.2.0"
 routerify = "3"
-rpds = "0.12.0"
+rpds = "0.13"
 rustls = "0.20"
 rustls-pemfile = "1"
 rustls-split = "0.3"
 scopeguard = "1.1"
-sentry = { version = "0.29", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
+sentry = { version = "0.30", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_with = "2.0"
 sha2 = "0.10.2"
 signal-hook = "0.3"
-socket2 = "0.4.4"
+socket2 = "0.5"
 strum = "0.24"
 strum_macros = "0.24"
 svg_fmt = "0.4.1"
@@ -106,17 +106,18 @@ tokio-postgres-rustls = "0.9.0"
 tokio-rustls = "0.23"
 tokio-stream = "0.1"
 tokio-util = { version = "0.7", features = ["io"] }
-toml = "0.5"
-toml_edit = { version = "0.17", features = ["easy"] }
-tonic = {version = "0.8", features = ["tls", "tls-roots"]}
+toml = "0.7"
+toml_edit = "0.19"
+tonic = {version = "0.9", features = ["tls", "tls-roots"]}
 tracing = "0.1"
+tracing-error = "0.2.0"
 tracing-opentelemetry = "0.18.0"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 url = "2.2"
 uuid = { version = "1.2", features = ["v4", "serde"] }
 walkdir = "2.3.2"
-webpki-roots = "0.22.5"
-x509-parser = "0.14"
+webpki-roots = "0.23"
+x509-parser = "0.15"

 ## TODO replace this with tracing
 env_logger = "0.10"
@@ -154,9 +155,9 @@ workspace_hack = { version = "0.1", path = "./workspace_hack/" }
 ## Build dependencies
 criterion = "0.4"
 rcgen = "0.10"
-rstest = "0.16"
+rstest = "0.17"
 tempfile = "3.4"
-tonic-build = "0.8"
+tonic-build = "0.9"

 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -12,7 +12,7 @@ FROM debian:bullseye-slim AS build-deps
 RUN apt update &&  \
    apt install -y git autoconf automake libtool build-essential bison flex libreadline-dev \
    zlib1g-dev libxml2-dev libcurl4-openssl-dev libossp-uuid-dev wget pkg-config libssl-dev \
-    libicu-dev libxslt1-dev
+    libicu-dev libxslt1-dev liblz4-dev libzstd-dev

 #########################################################################################
 #
@@ -24,8 +24,13 @@ FROM build-deps AS pg-build
 ARG PG_VERSION
 COPY vendor/postgres-${PG_VERSION} postgres
 RUN cd postgres && \
-    ./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp --with-icu \
-    --with-libxml --with-libxslt && \
+    export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \
+    --with-icu --with-libxml --with-libxslt --with-lz4" && \
+    if [ "${PG_VERSION}" != "v14" ]; then \
+        # zstd is available only from PG15
+        export CONFIGURE_CMD="${CONFIGURE_CMD} --with-zstd"; \
+    fi && \
+    eval $CONFIGURE_CMD && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s install && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
    # Install headers
@@ -60,6 +65,7 @@ RUN apt update && \

 # SFCGAL > 1.3 requires CGAL > 5.2, Bullseye's libcgal-dev is 5.2
 RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar.gz -O SFCGAL.tar.gz && \
+    echo "4e39b3b2adada6254a7bdba6d297bb28e1a9835a9f879b74f37e2dab70203232 SFCGAL.tar.gz" | sha256sum --check && \
    mkdir sfcgal-src && cd sfcgal-src && tar xvzf ../SFCGAL.tar.gz --strip-components=1 -C . && \
    cmake . && make -j $(getconf _NPROCESSORS_ONLN) && \
    DESTDIR=/sfcgal make install -j $(getconf _NPROCESSORS_ONLN) && \
@@ -68,6 +74,7 @@ RUN wget https://gitlab.com/Oslandia/SFCGAL/-/archive/v1.3.10/SFCGAL-v1.3.10.tar
 ENV PATH "/usr/local/pgsql/bin:$PATH"

 RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postgis.tar.gz && \
+    echo "9a2a219da005a1730a39d1959a1c7cec619b1efb009b65be80ffc25bad299068 postgis.tar.gz" | sha256sum --check && \
    mkdir postgis-src && cd postgis-src && tar xvzf ../postgis.tar.gz --strip-components=1 -C . && \
    ./autogen.sh && \
    ./configure --with-sfcgal=/usr/local/bin/sfcgal-config && \
@@ -84,6 +91,7 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.2.tar.gz -O postg
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control

 RUN wget https://github.com/pgRouting/pgrouting/archive/v3.4.2.tar.gz -O pgrouting.tar.gz && \
+    echo "cac297c07d34460887c4f3b522b35c470138760fe358e351ad1db4edb6ee306e pgrouting.tar.gz" | sha256sum --check && \
    mkdir pgrouting-src && cd pgrouting-src && tar xvzf ../pgrouting.tar.gz --strip-components=1 -C . && \
    mkdir build && \
    cd build && \
@@ -104,6 +112,7 @@ RUN apt update && \
    apt install -y ninja-build python3-dev libncurses5 binutils clang

 RUN wget https://github.com/plv8/plv8/archive/refs/tags/v3.1.5.tar.gz -O plv8.tar.gz && \
+    echo "1e108d5df639e4c189e1c5bdfa2432a521c126ca89e7e5a969d46899ca7bf106 plv8.tar.gz" | sha256sum --check && \
    mkdir plv8-src && cd plv8-src && tar xvzf ../plv8.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make DOCKER=1 -j $(getconf _NPROCESSORS_ONLN) install && \
@@ -125,11 +134,13 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
 # packaged cmake is too old
 RUN wget https://github.com/Kitware/CMake/releases/download/v3.24.2/cmake-3.24.2-linux-x86_64.sh \
      -q -O /tmp/cmake-install.sh \
+      && echo "739d372726cb23129d57a539ce1432453448816e345e1545f6127296926b6754 /tmp/cmake-install.sh" | sha256sum --check \
      && chmod u+x /tmp/cmake-install.sh \
      && /tmp/cmake-install.sh --skip-license --prefix=/usr/local/ \
      && rm /tmp/cmake-install.sh

 RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz && \
+    echo "ec99f1f5974846bde64f4513cf8d2ea1b8d172d2218ab41803bf6a63532272bc h3.tar.gz" | sha256sum --check && \
    mkdir h3-src && cd h3-src && tar xvzf ../h3.tar.gz --strip-components=1 -C . && \
    mkdir build && cd build && \
    cmake .. -DCMAKE_BUILD_TYPE=Release && \
@@ -139,6 +150,7 @@ RUN wget https://github.com/uber/h3/archive/refs/tags/v4.1.0.tar.gz -O h3.tar.gz
    rm -rf build

 RUN wget https://github.com/zachasme/h3-pg/archive/refs/tags/v4.1.2.tar.gz -O h3-pg.tar.gz && \
+    echo "c135aa45999b2ad1326d2537c1cadef96d52660838e4ca371706c08fdea1a956 h3-pg.tar.gz" | sha256sum --check && \
    mkdir h3-pg-src && cd h3-pg-src && tar xvzf ../h3-pg.tar.gz --strip-components=1 -C . && \
    export PATH="/usr/local/pgsql/bin:$PATH" && \
    make -j $(getconf _NPROCESSORS_ONLN) && \
@@ -156,6 +168,7 @@ FROM build-deps AS unit-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/df7cb/postgresql-unit/archive/refs/tags/7.7.tar.gz -O postgresql-unit.tar.gz && \
+    echo "411d05beeb97e5a4abf17572bfcfbb5a68d98d1018918feff995f6ee3bb03e79 postgresql-unit.tar.gz" | sha256sum --check && \
    mkdir postgresql-unit-src && cd postgresql-unit-src && tar xvzf ../postgresql-unit.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -176,6 +189,7 @@ FROM build-deps AS vector-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/pgvector/pgvector/archive/refs/tags/v0.4.0.tar.gz -O pgvector.tar.gz && \
+    echo "b76cf84ddad452cc880a6c8c661d137ddd8679c000a16332f4f03ecf6e10bcc8 pgvector.tar.gz" | sha256sum --check && \
    mkdir pgvector-src && cd pgvector-src && tar xvzf ../pgvector.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -192,6 +206,7 @@ COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 # 9742dab1b2f297ad3811120db7b21451bca2d3c9 made on 13/11/2021
 RUN wget https://github.com/michelp/pgjwt/archive/9742dab1b2f297ad3811120db7b21451bca2d3c9.tar.gz -O pgjwt.tar.gz && \
+    echo "cfdefb15007286f67d3d45510f04a6a7a495004be5b3aecb12cda667e774203f pgjwt.tar.gz" | sha256sum --check && \
    mkdir pgjwt-src && cd pgjwt-src && tar xvzf ../pgjwt.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgjwt.control
@@ -206,6 +221,7 @@ FROM build-deps AS hypopg-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/HypoPG/hypopg/archive/refs/tags/1.3.1.tar.gz -O hypopg.tar.gz && \
+    echo "e7f01ee0259dc1713f318a108f987663d60f3041948c2ada57a94b469565ca8e hypopg.tar.gz" | sha256sum --check && \
    mkdir hypopg-src && cd hypopg-src && tar xvzf ../hypopg.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -221,6 +237,7 @@ FROM build-deps AS pg-hashids-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/iCyberon/pg_hashids/archive/refs/tags/v1.2.1.tar.gz -O pg_hashids.tar.gz && \
+    echo "74576b992d9277c92196dd8d816baa2cc2d8046fe102f3dcd7f3c3febed6822a pg_hashids.tar.gz" | sha256sum --check && \
    mkdir pg_hashids-src && cd pg_hashids-src && tar xvzf ../pg_hashids.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -236,6 +253,7 @@ FROM build-deps AS rum-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/postgrespro/rum/archive/refs/tags/1.3.13.tar.gz -O rum.tar.gz && \
+    echo "6ab370532c965568df6210bd844ac6ba649f53055e48243525b0b7e5c4d69a7d rum.tar.gz" | sha256sum --check && \
    mkdir rum-src && cd rum-src && tar xvzf ../rum.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -251,11 +269,28 @@ FROM build-deps AS pgtap-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/theory/pgtap/archive/refs/tags/v1.2.0.tar.gz -O pgtap.tar.gz && \
+    echo "9c7c3de67ea41638e14f06da5da57bac6f5bd03fea05c165a0ec862205a5c052 pgtap.tar.gz" | sha256sum --check && \
    mkdir pgtap-src && cd pgtap-src && tar xvzf ../pgtap.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgtap.control

+#########################################################################################
+#
+# Layer "ip4r-pg-build"
+# compile ip4r extension
+#
+#########################################################################################
+FROM build-deps AS ip4r-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+RUN wget https://github.com/RhodiumToad/ip4r/archive/refs/tags/2.4.1.tar.gz -O ip4r.tar.gz && \
+    echo "78b9f0c1ae45c22182768fe892a32d533c82281035e10914111400bf6301c726 ip4r.tar.gz" | sha256sum --check && \
+    mkdir ip4r-src && cd ip4r-src && tar xvzf ../ip4r.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/ip4r.control
+
 #########################################################################################
 #
 # Layer "prefix-pg-build"
@@ -266,6 +301,7 @@ FROM build-deps AS prefix-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/dimitri/prefix/archive/refs/tags/v1.2.9.tar.gz -O prefix.tar.gz && \
+    echo "38d30a08d0241a8bbb8e1eb8f0152b385051665a8e621c8899e7c5068f8b511e prefix.tar.gz" | sha256sum --check && \
    mkdir prefix-src && cd prefix-src && tar xvzf ../prefix.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -281,6 +317,7 @@ FROM build-deps AS hll-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/citusdata/postgresql-hll/archive/refs/tags/v2.17.tar.gz -O hll.tar.gz && \
+    echo "9a18288e884f197196b0d29b9f178ba595b0dfc21fbf7a8699380e77fa04c1e9 hll.tar.gz" | sha256sum --check && \
    mkdir hll-src && cd hll-src && tar xvzf ../hll.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config && \
@@ -296,6 +333,7 @@ FROM build-deps AS plpgsql-check-pg-build
 COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/

 RUN wget https://github.com/okbob/plpgsql_check/archive/refs/tags/v2.3.2.tar.gz -O plpgsql_check.tar.gz && \
+    echo "9d81167c4bbeb74eebf7d60147b21961506161addc2aee537f95ad8efeae427b plpgsql_check.tar.gz" | sha256sum --check && \
    mkdir plpgsql_check-src && cd plpgsql_check-src && tar xvzf ../plpgsql_check.tar.gz --strip-components=1 -C . && \
    make -j $(getconf _NPROCESSORS_ONLN) PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
    make -j $(getconf _NPROCESSORS_ONLN) install PG_CONFIG=/usr/local/pgsql/bin/pg_config USE_PGXS=1 && \
@@ -315,6 +353,7 @@ ENV PATH "/usr/local/pgsql/bin:$PATH"
 RUN apt-get update && \
    apt-get install -y cmake && \
    wget https://github.com/timescale/timescaledb/archive/refs/tags/2.10.1.tar.gz -O timescaledb.tar.gz && \
+    echo "6fca72a6ed0f6d32d2b3523951ede73dc5f9b0077b38450a029a5f411fdb8c73 timescaledb.tar.gz" | sha256sum --check && \
    mkdir timescaledb-src && cd timescaledb-src && tar xvzf ../timescaledb.tar.gz --strip-components=1 -C . && \
    ./bootstrap -DSEND_TELEMETRY_DEFAULT:BOOL=OFF -DUSE_TELEMETRY:BOOL=OFF -DAPACHE_ONLY:BOOL=ON && \
    cd build && \
@@ -323,7 +362,39 @@ RUN apt-get update && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/timescaledb.control

 #########################################################################################
-# 
+#
+# Layer "pg-hint-plan-pg-build"
+# compile pg_hint_plan extension
+#
+#########################################################################################
+FROM build-deps AS pg-hint-plan-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ARG PG_VERSION
+ENV PATH "/usr/local/pgsql/bin:$PATH"
+
+RUN case "${PG_VERSION}" in \
+      "v14") \
+        export PG_HINT_PLAN_VERSION=14_1_4_1 \
+        export PG_HINT_PLAN_CHECKSUM=c3501becf70ead27f70626bce80ea401ceac6a77e2083ee5f3ff1f1444ec1ad1 \
+        ;; \
+      "v15") \
+        export PG_HINT_PLAN_VERSION=15_1_5_0 \
+        export PG_HINT_PLAN_CHECKSUM=564cbbf4820973ffece63fbf76e3c0af62c4ab23543142c7caaa682bc48918be \
+        ;; \
+      *) \
+        echo "Export the valid PG_HINT_PLAN_VERSION variable" && exit 1 \
+        ;; \
+    esac && \
+    wget https://github.com/ossc-db/pg_hint_plan/archive/refs/tags/REL${PG_HINT_PLAN_VERSION}.tar.gz -O pg_hint_plan.tar.gz && \
+    echo "${PG_HINT_PLAN_CHECKSUM} pg_hint_plan.tar.gz" | sha256sum --check && \
+    mkdir pg_hint_plan-src && cd pg_hint_plan-src && tar xvzf ../pg_hint_plan.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make install -j $(getconf _NPROCESSORS_ONLN) && \
+    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_hint_plan.control
+
+#########################################################################################
+#
 # Layer "rust extensions"
 # This layer is used to build `pgx` deps
 #
@@ -351,7 +422,7 @@ RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux
 USER root

 #########################################################################################
-# 
+#
 # Layer "pg-jsonschema-pg-build"
 # Compile "pg_jsonschema" extension
 #
@@ -359,15 +430,17 @@ USER root

 FROM rust-extensions-build AS pg-jsonschema-pg-build

-# there is no release tag yet, but we need it due to the superuser fix in the control file
+# caeab60d70b2fd3ae421ec66466a3abbb37b7ee6 made on 06/03/2023
+# there is no release tag yet, but we need it due to the superuser fix in the control file, switch to git tag after release >= 0.1.5
 RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421ec66466a3abbb37b7ee6.tar.gz -O pg_jsonschema.tar.gz && \
+    echo "54129ce2e7ee7a585648dbb4cef6d73f795d94fe72f248ac01119992518469a4 pg_jsonschema.tar.gz" | sha256sum --check && \
    mkdir pg_jsonschema-src && cd pg_jsonschema-src && tar xvzf ../pg_jsonschema.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgx = "0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    cargo pgx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_jsonschema.control

 #########################################################################################
-# 
+#
 # Layer "pg-graphql-pg-build"
 # Compile "pg_graphql" extension
 #
@@ -375,11 +448,13 @@ RUN wget https://github.com/supabase/pg_jsonschema/archive/caeab60d70b2fd3ae421e

 FROM rust-extensions-build AS pg-graphql-pg-build

+# b4988843647450a153439be367168ed09971af85 made on 22/02/2023 (from remove-pgx-contrib-spiext branch)
 # Currently pgx version bump to >= 0.7.2  causes "call to unsafe function" compliation errors in
 # pgx-contrib-spiext. There is a branch that removes that dependency, so use it. It is on the
 # same 1.1 version we've used before.
-RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yrashk/pg_graphql && \
-    cd pg_graphql && \
+RUN wget https://github.com/yrashk/pg_graphql/archive/b4988843647450a153439be367168ed09971af85.tar.gz -O pg_graphql.tar.gz && \
+    echo "0c7b0e746441b2ec24187d0e03555faf935c2159e2839bddd14df6dafbc8c9bd pg_graphql.tar.gz" | sha256sum --check && \
+    mkdir pg_graphql-src && cd pg_graphql-src && tar xvzf ../pg_graphql.tar.gz --strip-components=1 -C . && \
    sed -i 's/pgx = "~0.7.1"/pgx = { version = "0.7.3", features = [ "unsafe-postgres" ] }/g' Cargo.toml && \
    sed -i 's/pgx-tests = "~0.7.1"/pgx-tests = "0.7.3"/g' Cargo.toml && \
    cargo pgx install --release && \
@@ -396,8 +471,10 @@ RUN git clone -b remove-pgx-contrib-spiext --single-branch https://github.com/yr

 FROM rust-extensions-build AS pg-tiktoken-pg-build

-RUN git clone --depth=1 --single-branch https://github.com/kelvich/pg_tiktoken && \
-    cd pg_tiktoken && \
+# 801f84f08c6881c8aa30f405fafbf00eec386a72 made on 10/03/2023
+RUN wget https://github.com/kelvich/pg_tiktoken/archive/801f84f08c6881c8aa30f405fafbf00eec386a72.tar.gz -O pg_tiktoken.tar.gz && \
+    echo "52f60ac800993a49aa8c609961842b611b6b1949717b69ce2ec9117117e16e4a pg_tiktoken.tar.gz" | sha256sum --check && \
+    mkdir pg_tiktoken-src && cd pg_tiktoken-src && tar xvzf ../pg_tiktoken.tar.gz --strip-components=1 -C . && \
    cargo pgx install --release && \
    echo "trusted = true" >> /usr/local/pgsql/share/extension/pg_tiktoken.control

@@ -423,10 +500,12 @@ COPY --from=hypopg-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-hashids-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rum-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pgtap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=ip4r-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=prefix-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=hll-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=plpgsql-check-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=timescaledb-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-hint-plan-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY pgxn/ pgxn/

 RUN make -j $(getconf _NPROCESSORS_ONLN) \
@@ -491,13 +570,17 @@ COPY --from=compute-tools --chown=postgres /home/nonroot/target/release-line-deb
 # Install:
 # libreadline8 for psql
 # libicu67, locales for collations (including ICU and plpgsql_check)
+# liblz4-1 for lz4
 # libossp-uuid16 for extension ossp-uuid
 # libgeos, libgdal, libsfcgal1, libproj and libprotobuf-c1 for PostGIS
 # libxml2, libxslt1.1 for xml2
+# libzstd1 for zstd
 RUN apt update &&  \
    apt install --no-install-recommends -y \
+        gdb \
        locales \
        libicu67 \
+        liblz4-1 \
        libreadline8 \
        libossp-uuid16 \
        libgeos-c1v5 \
@@ -507,7 +590,8 @@ RUN apt update &&  \
        libsfcgal1 \
        libxml2 \
        libxslt1.1 \
-        gdb && \
+        libzstd1 \
+        procps && \
    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \
    localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8

--- a/Dockerfile.vm-compute-node
+++ b/Dockerfile.vm-compute-node
@@ -54,7 +54,7 @@ RUN set -e \

 RUN set -e \
 	&& echo "::sysinit:cgconfigparser -l /etc/cgconfig.conf -s 1664" >> /etc/inittab \
-	&& CONNSTR="dbname=neondb user=cloud_admin sslmode=disable" \
+	&& CONNSTR="dbname=postgres user=cloud_admin sslmode=disable" \
 	&& ARGS="--auto-restart --cgroup=neon-postgres --pgconnstr=\"$CONNSTR\"" \
 	&& echo "::respawn:su vm-informant -c '/usr/local/bin/vm-informant $ARGS'" >> /etc/inittab

--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -44,9 +44,9 @@ use tracing::{error, info};
 use url::Url;

 use compute_api::responses::ComputeStatus;
-use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2};

-use compute_tools::compute::{ComputeNode, ComputeState};
+use compute_tools::compute::{ComputeNode, ComputeState, ParsedSpec};
+use compute_tools::configurator::launch_configurator;
 use compute_tools::http::api::launch_http_server;
 use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
@@ -58,9 +58,6 @@ fn main() -> Result<()> {

    let matches = cli().get_matches();

-    let http_port = *matches
-        .get_one::<u16>("http-port")
-        .expect("http-port is required");
    let pgdata = matches
        .get_one::<String>("pgdata")
        .expect("PGDATA path is required");
@@ -76,7 +73,7 @@ fn main() -> Result<()> {
    // Try to use just 'postgres' if no path is provided
    let pgbin = matches.get_one::<String>("pgbin").unwrap();

-    let mut spec: Option<ComputeSpecAnyVersion> = None;
+    let spec;
    let mut live_config_allowed = false;
    match spec_json {
        // First, try to get cluster spec from the cli argument
@@ -92,9 +89,13 @@ fn main() -> Result<()> {
            } else if let Some(id) = compute_id {
                if let Some(cp_base) = control_plane_uri {
                    live_config_allowed = true;
-                    if let Ok(s) = get_spec_from_control_plane(cp_base, id) {
-                        spec = Some(s);
-                    }
+                    spec = match get_spec_from_control_plane(cp_base, id) {
+                        Ok(s) => s,
+                        Err(e) => {
+                            error!("cannot get response from control plane: {}", e);
+                            panic!("neither spec nor confirmation that compute is in the Empty state was received");
+                        }
+                    };
                } else {
                    panic!("must specify both --control-plane-uri and --compute-id or none");
                }
@@ -110,15 +111,13 @@ fn main() -> Result<()> {
    let mut new_state = ComputeState::new();
    let spec_set;
    if let Some(spec) = spec {
-        // Parse the spec file, upgrading it from older format if necessary
-        let spec: ComputeSpecV2 = ComputeSpecV2::try_from(spec)?;
-        new_state.spec = Some(spec);
+        let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?;
+        new_state.pspec = Some(pspec);
        spec_set = true;
    } else {
        spec_set = false;
    }
    let compute_node = ComputeNode {
-        start_time: Utc::now(),
        connstr: Url::parse(connstr).context("cannot parse connstr as a URL")?,
        pgdata: pgdata.to_string(),
        pgbin: pgbin.to_string(),
@@ -130,8 +129,7 @@ fn main() -> Result<()> {

    // Launch http service first, so we were able to serve control-plane
    // requests, while configuration is still in progress.
-    let _http_handle =
-        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");
+    let _http_handle = launch_http_server(&compute).expect("cannot launch http endpoint thread");

    if !spec_set {
        // No spec provided, hang waiting for it.
@@ -150,8 +148,19 @@ fn main() -> Result<()> {

    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
-    let spec = state.spec.as_ref().expect("spec must be set");
-    let startup_tracing_context = spec.startup_tracing_context.clone();
+    let pspec = state.pspec.as_ref().expect("spec must be set");
+    let startup_tracing_context = pspec.spec.startup_tracing_context.clone();
+
+    // Record for how long we slept waiting for the spec.
+    state.metrics.wait_for_spec_ms = Utc::now()
+        .signed_duration_since(state.start_time)
+        .to_std()
+        .unwrap()
+        .as_millis() as u64;
+    // Reset start time to the actual start of the configuration, so that
+    // total startup time was properly measured at the end.
+    state.start_time = Utc::now();
+
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();
    drop(state);
@@ -181,6 +190,8 @@ fn main() -> Result<()> {

    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute).expect("cannot launch compute monitor thread");
+    let _configurator_handle =
+        launch_configurator(&compute).expect("cannot launch configurator thread");

    // Start Postgres
    let mut delay_exit = false;
@@ -251,14 +262,6 @@ fn cli() -> clap::Command {
    let version = option_env!("CARGO_PKG_VERSION").unwrap_or("unknown");
    clap::Command::new("compute_ctl")
        .version(version)
-        .arg(
-            Arg::new("http-port")
-                .long("http-port")
-                .value_name("HTTP_PORT")
-                .default_value("3080")
-                .value_parser(clap::value_parser!(u16))
-                .required(false),
-        )
        .arg(
            Arg::new("connstr")
                .short('C')
--- a/compute_tools/src/checker.rs
+++ b/compute_tools/src/checker.rs
@@ -1,12 +1,28 @@
 use anyhow::{anyhow, Result};
-use postgres::Client;
 use tokio_postgres::NoTls;
 use tracing::{error, instrument};

 use crate::compute::ComputeNode;

+/// Update timestamp in a row in a special service table to check
+/// that we can actually write some data in this particular timeline.
+/// Create table if it's missing.
 #[instrument(skip_all)]
-pub fn create_writability_check_data(client: &mut Client) -> Result<()> {
+pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
+    // Connect to the database.
+    let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
+    if client.is_closed() {
+        return Err(anyhow!("connection to postgres closed"));
+    }
+
+    // The connection object performs the actual communication with the database,
+    // so spawn it off to run on its own.
+    tokio::spawn(async move {
+        if let Err(e) = connection.await {
+            error!("connection error: {}", e);
+        }
+    });
+
    let query = "
    CREATE TABLE IF NOT EXISTS health_check (
        id serial primary key,
@@ -15,31 +31,15 @@ pub fn create_writability_check_data(client: &mut Client) -> Result<()> {
    INSERT INTO health_check VALUES (1, now())
        ON CONFLICT (id) DO UPDATE
         SET updated_at = now();";
-    let result = client.simple_query(query)?;
-    if result.len() < 2 {
-        return Err(anyhow::format_err!("executed  {} queries", result.len()));
-    }
-    Ok(())
-}
-
-#[instrument(skip_all)]
-pub async fn check_writability(compute: &ComputeNode) -> Result<()> {
-    let (client, connection) = tokio_postgres::connect(compute.connstr.as_str(), NoTls).await?;
-    if client.is_closed() {
-        return Err(anyhow!("connection to postgres closed"));
-    }
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            error!("connection error: {}", e);
-        }
-    });
-
-    let result = client
-        .simple_query("UPDATE health_check SET updated_at = now() WHERE id = 1;")
-        .await?;
-
-    if result.len() != 1 {
-        return Err(anyhow!("statement can't be executed"));
+
+    let result = client.simple_query(query).await?;
+
+    if result.len() != 2 {
+        return Err(anyhow::format_err!(
+            "expected 2 query results, but got {}",
+            result.len()
+        ));
    }
+
    Ok(())
 }
--- a/compute_tools/src/compute.rs
+++ b/compute_tools/src/compute.rs
@@ -26,19 +26,18 @@ use chrono::{DateTime, Utc};
 use postgres::{Client, NoTls};
 use tokio_postgres;
 use tracing::{info, instrument, warn};
+use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

 use compute_api::responses::{ComputeMetrics, ComputeStatus};
-use compute_api::spec::ComputeSpecV2;
+use compute_api::spec::ComputeSpec;

-use crate::checker::create_writability_check_data;
 use crate::config;
 use crate::pg_helpers::*;
 use crate::spec::*;

 /// Compute node info shared across several `compute_ctl` threads.
 pub struct ComputeNode {
-    pub start_time: DateTime<Utc>,
    // Url type maintains proper escaping
    pub connstr: url::Url,
    pub pgdata: String,
@@ -66,21 +65,23 @@ pub struct ComputeNode {

 #[derive(Clone, Debug)]
 pub struct ComputeState {
+    pub start_time: DateTime<Utc>,
    pub status: ComputeStatus,
    /// Timestamp of the last Postgres activity
    pub last_active: DateTime<Utc>,
    pub error: Option<String>,
-    pub spec: Option<ComputeSpecV2>,
+    pub pspec: Option<ParsedSpec>,
    pub metrics: ComputeMetrics,
 }

 impl ComputeState {
    pub fn new() -> Self {
        Self {
+            start_time: Utc::now(),
            status: ComputeStatus::Empty,
            last_active: Utc::now(),
            error: None,
-            spec: None,
+            pspec: None,
            metrics: ComputeMetrics::default(),
        }
    }
@@ -92,6 +93,49 @@ impl Default for ComputeState {
    }
 }

+#[derive(Clone, Debug)]
+pub struct ParsedSpec {
+    pub spec: ComputeSpec,
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub pageserver_connstr: String,
+    pub storage_auth_token: Option<String>,
+}
+
+impl TryFrom<ComputeSpec> for ParsedSpec {
+    type Error = String;
+    fn try_from(spec: ComputeSpec) -> Result<Self, String> {
+        let pageserver_connstr = spec
+            .cluster
+            .settings
+            .find("neon.pageserver_connstring")
+            .ok_or("pageserver connstr should be provided")?;
+        let storage_auth_token = spec.storage_auth_token.clone();
+        let tenant_id: TenantId = spec
+            .cluster
+            .settings
+            .find("neon.tenant_id")
+            .ok_or("tenant id should be provided")
+            .map(|s| TenantId::from_str(&s))?
+            .or(Err("invalid tenant id"))?;
+        let timeline_id: TimelineId = spec
+            .cluster
+            .settings
+            .find("neon.timeline_id")
+            .ok_or("timeline id should be provided")
+            .map(|s| TimelineId::from_str(&s))?
+            .or(Err("invalid timeline id"))?;
+
+        Ok(ParsedSpec {
+            spec,
+            pageserver_connstr,
+            storage_auth_token,
+            tenant_id,
+            timeline_id,
+        })
+    }
+}
+
 impl ComputeNode {
    pub fn set_status(&self, status: ComputeStatus) {
        let mut state = self.state.lock().unwrap();
@@ -118,10 +162,10 @@ impl ComputeNode {
    // unarchive it to `pgdata` directory overriding all its previous content.
    #[instrument(skip(self, compute_state))]
    fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> {
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
+        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        let start_time = Utc::now();

-        let mut config = postgres::Config::from_str(&spec.pageserver_connstring)?;
+        let mut config = postgres::Config::from_str(&spec.pageserver_connstr)?;

        // Use the storage auth token from the config file, if given.
        // Note: this overrides any password set in the connection string.
@@ -205,34 +249,27 @@ impl ComputeNode {
    /// safekeepers sync, basebackup, etc.
    #[instrument(skip(self, compute_state))]
    pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> {
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
+        let pspec = compute_state.pspec.as_ref().expect("spec must be set");
        let pgdata_path = Path::new(&self.pgdata);

        // Remove/create an empty pgdata directory and put configuration there.
        self.create_pgdata()?;
-        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
+        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &pspec.spec)?;

-        let lsn = if let Some(lsn) = spec.lsn {
-            // Read-only node, anchored at 'lsn'
-            lsn
-        } else {
-            // Primary that continues to write at end of the timeline
-            info!("starting safekeepers syncing");
-            let last_lsn = self
-                .sync_safekeepers(spec.storage_auth_token.clone())
-                .with_context(|| "failed to sync safekeepers")?;
-            info!("safekeepers synced at LSN {}", last_lsn);
-            last_lsn
-        };
+        info!("starting safekeepers syncing");
+        let lsn = self
+            .sync_safekeepers(pspec.storage_auth_token.clone())
+            .with_context(|| "failed to sync safekeepers")?;
+        info!("safekeepers synced at LSN {}", lsn);

        info!(
            "getting basebackup@{} from pageserver {}",
-            lsn, &spec.pageserver_connstring
+            lsn, &pspec.pageserver_connstr
        );
        self.get_basebackup(compute_state, lsn).with_context(|| {
            format!(
                "failed to get basebackup@{} from pageserver {}",
-                lsn, &spec.pageserver_connstring
+                lsn, &pspec.pageserver_connstr
            )
        })?;

@@ -300,18 +337,62 @@ impl ComputeNode {
        };

        // Proceed with post-startup configuration. Note, that order of operations is important.
-        let spec = &compute_state.spec.as_ref().expect("spec must be set");
+        let spec = &compute_state.pspec.as_ref().expect("spec must be set").spec;
        handle_roles(spec, &mut client)?;
        handle_databases(spec, &mut client)?;
        handle_role_deletions(spec, self.connstr.as_str(), &mut client)?;
        handle_grants(spec, self.connstr.as_str(), &mut client)?;
-        create_writability_check_data(&mut client)?;
        handle_extensions(spec, &mut client)?;

        // 'Close' connection
        drop(client);

-        info!("finished configuration of compute");
+        info!(
+            "finished configuration of compute for project {}",
+            spec.cluster.cluster_id
+        );
+
+        Ok(())
+    }
+
+    // We could've wrapped this around `pg_ctl reload`, but right now we don't use
+    // `pg_ctl` for start / stop, so this just seems much easier to do as we already
+    // have opened connection to Postgres and superuser access.
+    #[instrument(skip(self, client))]
+    fn pg_reload_conf(&self, client: &mut Client) -> Result<()> {
+        client.simple_query("SELECT pg_reload_conf()")?;
+        Ok(())
+    }
+
+    /// Similar to `apply_config()`, but does a bit different sequence of operations,
+    /// as it's used to reconfigure a previously started and configured Postgres node.
+    #[instrument(skip(self))]
+    pub fn reconfigure(&self) -> Result<()> {
+        let spec = self.state.lock().unwrap().pspec.clone().unwrap().spec;
+
+        // Write new config
+        let pgdata_path = Path::new(&self.pgdata);
+        config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), &spec)?;
+
+        let mut client = Client::connect(self.connstr.as_str(), NoTls)?;
+        self.pg_reload_conf(&mut client)?;
+
+        // Proceed with post-startup configuration. Note, that order of operations is important.
+        handle_roles(&spec, &mut client)?;
+        handle_databases(&spec, &mut client)?;
+        handle_role_deletions(&spec, self.connstr.as_str(), &mut client)?;
+        handle_grants(&spec, self.connstr.as_str(), &mut client)?;
+        handle_extensions(&spec, &mut client)?;
+
+        // 'Close' connection
+        drop(client);
+
+        let unknown_op = "unknown".to_string();
+        let op_id = spec.operation_uuid.as_ref().unwrap_or(&unknown_op);
+        info!(
+            "finished reconfiguration of compute node for operation {}",
+            op_id
+        );

        Ok(())
    }
@@ -319,11 +400,11 @@ impl ComputeNode {
    #[instrument(skip(self))]
    pub fn start_compute(&self) -> Result<std::process::Child> {
        let compute_state = self.state.lock().unwrap().clone();
-        let spec = compute_state.spec.as_ref().expect("spec must be set");
+        let spec = compute_state.pspec.as_ref().expect("spec must be set");
        info!(
            "starting compute for project {}, operation {}, tenant {}, timeline {}",
-            spec.project_id.as_deref().unwrap_or("None"),
-            spec.operation_uuid.as_deref().unwrap_or("None"),
+            spec.spec.cluster.cluster_id,
+            spec.spec.operation_uuid.as_deref().unwrap_or("None"),
            spec.tenant_id,
            spec.timeline_id,
        );
@@ -345,7 +426,7 @@ impl ComputeNode {
                .unwrap()
                .as_millis() as u64;
            state.metrics.total_startup_ms = startup_end_time
-                .signed_duration_since(self.start_time)
+                .signed_duration_since(compute_state.start_time)
                .to_std()
                .unwrap()
                .as_millis() as u64;
--- a/compute_tools/src/config.rs
+++ b/compute_tools/src/config.rs
@@ -5,8 +5,8 @@ use std::path::Path;

 use anyhow::Result;

-use crate::pg_helpers::escape_conf_value;
-use compute_api::spec::ComputeSpecV2;
+use crate::pg_helpers::PgOptionsSerialize;
+use compute_api::spec::ComputeSpec;

 /// Check that `line` is inside a text file and put it there if it is not.
 /// Create file if it doesn't exist.
@@ -32,54 +32,20 @@ pub fn line_in_file(path: &Path, line: &str) -> Result<bool> {
 }

 /// Create or completely rewrite configuration file specified by `path`
-pub fn write_postgres_conf(path: &Path, spec: &ComputeSpecV2) -> Result<()> {
+pub fn write_postgres_conf(path: &Path, spec: &ComputeSpec) -> Result<()> {
    // File::create() destroys the file content if it exists.
-    let mut file = File::create(path)?;
+    let mut postgres_conf = File::create(path)?;

-    // Write the postgresql.conf content from the spec file as is.
-    if let Some(conf) = &spec.postgresql_conf {
-        writeln!(file, "{}", conf)?;
-    }
-
-    // Append any extra options from the spec file
-    if let Some(settings) = &spec.settings {
-        writeln!(file, "\n# Extra settings from spec document")?;
-
-        for setting in settings {
-            if let Some(value) = &setting.value {
-                let escaped_value: String = value.replace('\'', "''").replace('\\', "\\\\");
-                writeln!(file, "{} = '{}'", setting.name, escaped_value)?;
-            } else {
-                // If there is no value, then just append the line verbatim
-                writeln!(file, "{}", setting.name)?;
-            }
-        }
-    }
-
-    // Append options for connecting to storage
-    writeln!(file, "\n# Neon storage settings")?;
-    writeln!(
-        file,
-        "neon.pageserver_connstring='{}'",
-        escape_conf_value(&spec.pageserver_connstring)
-    )?;
-    if !spec.safekeeper_connstrings.is_empty() {
-        writeln!(
-            file,
-            "neon.safekeepers='{}'",
-            escape_conf_value(&spec.safekeeper_connstrings.join(","))
-        )?;
-    }
-    writeln!(
-        file,
-        "neon.tenant_id='{}'",
-        escape_conf_value(&spec.tenant_id.to_string())
-    )?;
-    writeln!(
-        file,
-        "neon.timeline_id='{}'",
-        escape_conf_value(&spec.timeline_id.to_string())
-    )?;
+    write_auto_managed_block(&mut postgres_conf, &spec.cluster.settings.as_pg_settings())?;
+
+    Ok(())
+}
+
+// Write Postgres config block wrapped with generated comment section
+fn write_auto_managed_block(file: &mut File, buf: &str) -> Result<()> {
+    writeln!(file, "# Managed by compute_ctl: begin")?;
+    writeln!(file, "{}", buf)?;
+    writeln!(file, "# Managed by compute_ctl: end")?;

    Ok(())
 }
--- a/compute_tools/src/configurator.rs
+++ b/compute_tools/src/configurator.rs
@@ -0,0 +1,54 @@
+use std::sync::Arc;
+use std::thread;
+
+use anyhow::Result;
+use tracing::{error, info, instrument};
+
+use compute_api::responses::ComputeStatus;
+
+use crate::compute::ComputeNode;
+
+#[instrument(skip(compute))]
+fn configurator_main_loop(compute: &Arc<ComputeNode>) {
+    info!("waiting for reconfiguration requests");
+    loop {
+        let state = compute.state.lock().unwrap();
+        let mut state = compute.state_changed.wait(state).unwrap();
+
+        if state.status == ComputeStatus::ConfigurationPending {
+            info!("got configuration request");
+            state.status = ComputeStatus::Configuration;
+            compute.state_changed.notify_all();
+            drop(state);
+
+            let mut new_status = ComputeStatus::Failed;
+            if let Err(e) = compute.reconfigure() {
+                error!("could not configure compute node: {}", e);
+            } else {
+                new_status = ComputeStatus::Running;
+                info!("compute node configured");
+            }
+
+            // XXX: used to test that API is blocking
+            // std::thread::sleep(std::time::Duration::from_millis(10000));
+
+            compute.set_status(new_status);
+        } else if state.status == ComputeStatus::Failed {
+            info!("compute node is now in Failed state, exiting");
+            break;
+        } else {
+            info!("woken up for compute status: {:?}, sleeping", state.status);
+        }
+    }
+}
+
+pub fn launch_configurator(compute: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+    let compute = Arc::clone(compute);
+
+    Ok(thread::Builder::new()
+        .name("compute-configurator".into())
+        .spawn(move || {
+            configurator_main_loop(&compute);
+            info!("configurator thread is exited");
+        })?)
+}
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -3,10 +3,9 @@ use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;

-use crate::compute::{ComputeNode, ComputeState};
+use crate::compute::{ComputeNode, ComputeState, ParsedSpec};
 use compute_api::requests::ConfigurationRequest;
 use compute_api::responses::{ComputeStatus, ComputeStatusResponse, GenericAPIError};
-use compute_api::spec::ComputeSpecV2;

 use anyhow::Result;
 use hyper::service::{make_service_fn, service_fn};
@@ -19,8 +18,15 @@ use tracing_utils::http::OtelName;

 fn status_response_from_state(state: &ComputeState) -> ComputeStatusResponse {
    ComputeStatusResponse {
-        tenant: state.spec.as_ref().map(|spec| spec.tenant_id.to_string()),
-        timeline: state.spec.as_ref().map(|spec| spec.timeline_id.to_string()),
+        start_time: state.start_time,
+        tenant: state
+            .pspec
+            .as_ref()
+            .map(|pspec| pspec.tenant_id.to_string()),
+        timeline: state
+            .pspec
+            .as_ref()
+            .map(|pspec| pspec.timeline_id.to_string()),
        status: state.status,
        last_active: state.last_active,
        error: state.error.clone(),
@@ -80,7 +86,10 @@ async fn routes(req: Request<Body>, compute: &Arc<ComputeNode>) -> Response<Body
            let res = crate::checker::check_writability(compute).await;
            match res {
                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => Response::new(Body::from(e.to_string())),
+                Err(e) => {
+                    error!("check_writability failed: {}", e);
+                    Response::new(Body::from(e.to_string()))
+                }
            }
        }

@@ -135,9 +144,11 @@ async fn handle_configure_request(
    let body_bytes = hyper::body::to_bytes(req.into_body()).await.unwrap();
    let spec_raw = String::from_utf8(body_bytes.to_vec()).unwrap();
    if let Ok(request) = serde_json::from_str::<ConfigurationRequest>(&spec_raw) {
-        let specv2 = match ComputeSpecV2::try_from(request.spec) {
+        let spec = request.spec;
+
+        let parsed_spec = match ParsedSpec::try_from(spec) {
            Ok(ps) => ps,
-            Err(err) => return Err((err.to_string(), StatusCode::PRECONDITION_FAILED)),
+            Err(msg) => return Err((msg, StatusCode::PRECONDITION_FAILED)),
        };

        // XXX: wrap state update under lock in code blocks. Otherwise,
@@ -148,14 +159,14 @@ async fn handle_configure_request(
        // ```
        {
            let mut state = compute.state.lock().unwrap();
-            if state.status != ComputeStatus::Empty {
+            if state.status != ComputeStatus::Empty && state.status != ComputeStatus::Running {
                let msg = format!(
                    "invalid compute status for configuration request: {:?}",
                    state.status.clone()
                );
                return Err((msg, StatusCode::PRECONDITION_FAILED));
            }
-            state.spec = Some(specv2);
+            state.pspec = Some(parsed_spec);
            state.status = ComputeStatus::ConfigurationPending;
            compute.state_changed.notify_all();
            drop(state);
@@ -209,8 +220,8 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {

 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
-async fn serve(port: u16, state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], port));
+async fn serve(state: Arc<ComputeNode>) {
+    let addr = SocketAddr::from(([0, 0, 0, 0], 3080));

    let make_service = make_service_fn(move |_conn| {
        let state = state.clone();
@@ -245,10 +256,10 @@ async fn serve(port: u16, state: Arc<ComputeNode>) {
 }

 /// Launch a separate Hyper HTTP API server thread and return its `JoinHandle`.
-pub fn launch_http_server(port: u16, state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
+pub fn launch_http_server(state: &Arc<ComputeNode>) -> Result<thread::JoinHandle<()>> {
    let state = Arc::clone(state);

    Ok(thread::Builder::new()
        .name("http-endpoint".into())
-        .spawn(move || serve(port, state))?)
+        .spawn(move || serve(state))?)
 }
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -152,11 +152,14 @@ components:
      type: object
      description: Compute startup metrics.
      required:
+        - wait_for_spec_ms
        - sync_safekeepers_ms
        - basebackup_ms
        - config_ms
        - total_startup_ms
      properties:
+        wait_for_spec_ms:
+          type: integer
        sync_safekeepers_ms:
          type: integer
        basebackup_ms:
@@ -181,6 +184,13 @@ components:
        - status
        - last_active
      properties:
+        start_time:
+          type: string
+          description: |
+            Time when compute was started. If initially compute was started in the `empty`
+            state and then provided with valid spec, `start_time` will be reset to the
+            moment, when spec was received.
+          example: "2022-10-12T07:20:50.52Z"
        status:
          $ref: '#/components/schemas/ComputeStatus'
        last_active:
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -4,6 +4,7 @@
 //!
 pub mod checker;
 pub mod config;
+pub mod configurator;
 pub mod http;
 #[macro_use]
 pub mod logger;
--- a/compute_tools/src/pg_helpers.rs
+++ b/compute_tools/src/pg_helpers.rs
@@ -23,12 +23,13 @@ fn escape_literal(s: &str) -> String {

 /// Escape a string so that it can be used in postgresql.conf.
 /// Same as escape_literal, currently.
-pub fn escape_conf_value(s: &str) -> String {
+fn escape_conf_value(s: &str) -> String {
    s.replace('\'', "''").replace('\\', "\\\\")
 }

 trait GenericOptionExt {
    fn to_pg_option(&self) -> String;
+    fn to_pg_setting(&self) -> String;
 }

 impl GenericOptionExt for GenericOption {
@@ -43,10 +44,23 @@ impl GenericOptionExt for GenericOption {
            self.name.to_owned()
        }
    }
+
+    /// Represent `GenericOption` as configuration option.
+    fn to_pg_setting(&self) -> String {
+        if let Some(val) = &self.value {
+            match self.vartype.as_ref() {
+                "string" => format!("{} = '{}'", self.name, escape_conf_value(val)),
+                _ => format!("{} = {}", self.name, val),
+            }
+        } else {
+            self.name.to_owned()
+        }
+    }
 }

 pub trait PgOptionsSerialize {
    fn as_pg_options(&self) -> String;
+    fn as_pg_settings(&self) -> String;
 }

 impl PgOptionsSerialize for GenericOptions {
@@ -62,6 +76,20 @@ impl PgOptionsSerialize for GenericOptions {
            "".to_string()
        }
    }
+
+    /// Serialize an optional collection of `GenericOption`'s to
+    /// `postgresql.conf` compatible format.
+    fn as_pg_settings(&self) -> String {
+        if let Some(ops) = &self {
+            ops.iter()
+                .map(|op| op.to_pg_setting())
+                .collect::<Vec<String>>()
+                .join("\n")
+                + "\n" // newline after last setting
+        } else {
+            "".to_string()
+        }
+    }
 }

 pub trait GenericOptionsSearch {
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,47 +1,126 @@
-//! Functions to reconciliate Postgres cluster with the spec file
 use std::path::Path;
 use std::str::FromStr;

-use anyhow::Result;
+use anyhow::{anyhow, bail, Result};
 use postgres::config::Config;
 use postgres::{Client, NoTls};
-use tracing::{info, info_span, instrument, span_enabled, warn, Level};
+use reqwest::StatusCode;
+use tracing::{error, info, info_span, instrument, span_enabled, warn, Level};

 use crate::config;
 use crate::params::PG_HBA_ALL_MD5;
 use crate::pg_helpers::*;

-use compute_api::spec::{ComputeSpecAnyVersion, ComputeSpecV2, Database, PgIdent, Role};
+use compute_api::responses::{ControlPlaneComputeStatus, ControlPlaneSpecResponse};
+use compute_api::spec::{ComputeSpec, Database, PgIdent, Role};
+
+// Do control plane request and return response if any. In case of error it
+// returns a bool flag indicating whether it makes sense to retry the request
+// and a string with error message.
+fn do_control_plane_request(
+    uri: &str,
+    jwt: &str,
+) -> Result<ControlPlaneSpecResponse, (bool, String)> {
+    let resp = reqwest::blocking::Client::new()
+        .get(uri)
+        .header("Authorization", jwt)
+        .send()
+        .map_err(|e| {
+            (
+                true,
+                format!("could not perform spec request to control plane: {}", e),
+            )
+        })?;
+
+    match resp.status() {
+        StatusCode::OK => match resp.json::<ControlPlaneSpecResponse>() {
+            Ok(spec_resp) => Ok(spec_resp),
+            Err(e) => Err((
+                true,
+                format!("could not deserialize control plane response: {}", e),
+            )),
+        },
+        StatusCode::SERVICE_UNAVAILABLE => {
+            Err((true, "control plane is temporarily unavailable".to_string()))
+        }
+        StatusCode::BAD_GATEWAY => {
+            // We have a problem with intermittent 502 errors now
+            // https://github.com/neondatabase/cloud/issues/2353
+            // It's fine to retry GET request in this case.
+            Err((true, "control plane request failed with 502".to_string()))
+        }
+        // Another code, likely 500 or 404, means that compute is unknown to the control plane
+        // or some internal failure happened. Doesn't make much sense to retry in this case.
+        _ => Err((
+            false,
+            format!(
+                "unexpected control plane response status code: {}",
+                resp.status()
+            ),
+        )),
+    }
+}

 /// Request spec from the control-plane by compute_id. If `NEON_CONSOLE_JWT`
 /// env variable is set, it will be used for authorization.
 pub fn get_spec_from_control_plane(
    base_uri: &str,
    compute_id: &str,
-) -> Result<ComputeSpecAnyVersion> {
+) -> Result<Option<ComputeSpec>> {
    let cp_uri = format!("{base_uri}/management/api/v2/computes/{compute_id}/spec");
-    let jwt: String = match std::env::var("NEON_CONSOLE_JWT") {
+    let jwt: String = match std::env::var("NEON_CONTROL_PLANE_TOKEN") {
        Ok(v) => v,
        Err(_) => "".to_string(),
    };
+    let mut attempt = 1;
+    let mut spec: Result<Option<ComputeSpec>> = Ok(None);
+
    info!("getting spec from control plane: {}", cp_uri);

-    // TODO: check the response. We should distinguish cases when it's
-    // - network error, then retry
-    // - no spec for compute yet, then wait
-    // - compute id is unknown or any other error, then bail out
-    let json = reqwest::blocking::Client::new()
-        .get(cp_uri)
-        .header("Authorization", jwt)
-        .send()?
-        .json()?;
-    Ok(ComputeSpecAnyVersion(json))
+    // Do 3 attempts to get spec from the control plane using the following logic:
+    // - network error -> then retry
+    // - compute id is unknown or any other error -> bail out
+    // - no spec for compute yet (Empty state) -> return Ok(None)
+    // - got spec -> return Ok(Some(spec))
+    while attempt < 4 {
+        spec = match do_control_plane_request(&cp_uri, &jwt) {
+            Ok(spec_resp) => match spec_resp.status {
+                ControlPlaneComputeStatus::Empty => Ok(None),
+                ControlPlaneComputeStatus::Attached => {
+                    if let Some(spec) = spec_resp.spec {
+                        Ok(Some(spec))
+                    } else {
+                        bail!("compute is attached, but spec is empty")
+                    }
+                }
+            },
+            Err((retry, msg)) => {
+                if retry {
+                    Err(anyhow!(msg))
+                } else {
+                    bail!(msg);
+                }
+            }
+        };
+
+        if let Err(e) = &spec {
+            error!("attempt {} to get spec failed with: {}", attempt, e);
+        } else {
+            return spec;
+        }
+
+        attempt += 1;
+        std::thread::sleep(std::time::Duration::from_millis(100));
+    }
+
+    // All attempts failed, return error.
+    spec
 }

 /// It takes cluster specification and does the following:
 /// - Serialize cluster config and put it into `postgresql.conf` completely rewriting the file.
 /// - Update `pg_hba.conf` to allow external connections.
-pub fn handle_configuration(spec: &ComputeSpecV2, pgdata_path: &Path) -> Result<()> {
+pub fn handle_configuration(spec: &ComputeSpec, pgdata_path: &Path) -> Result<()> {
    // File `postgresql.conf` is no longer included into `basebackup`, so just
    // always write all config into it creating new file.
    config::write_postgres_conf(&pgdata_path.join("postgresql.conf"), spec)?;
@@ -69,7 +148,7 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> {
 /// Given a cluster spec json and open transaction it handles roles creation,
 /// deletion and update.
 #[instrument(skip_all)]
-pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
+pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let mut xact = client.transaction()?;
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

@@ -125,7 +204,7 @@ pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
    let existing_roles: Vec<Role> = get_existing_roles(&mut xact)?;

    info!("cluster spec roles:");
-    for role in &spec.roles {
+    for role in &spec.cluster.roles {
        let name = &role.name;
        // XXX: with a limited number of roles it is fine, but consider making it a HashMap
        let pg_role = existing_roles.iter().find(|r| r.name == *name);
@@ -210,11 +289,7 @@ pub fn handle_roles(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {

 /// Reassign all dependent objects and delete requested roles.
 #[instrument(skip_all)]
-pub fn handle_role_deletions(
-    spec: &ComputeSpecV2,
-    connstr: &str,
-    client: &mut Client,
-) -> Result<()> {
+pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
    if let Some(ops) = &spec.delta_operations {
        // First, reassign all dependent objects to db owners.
        info!("reassigning dependent objects of to-be-deleted roles");
@@ -256,8 +331,8 @@ pub fn handle_role_deletions(
 }

 // Reassign all owned objects in all databases to the owner of the database.
-fn reassign_owned_objects(spec: &ComputeSpecV2, connstr: &str, role_name: &PgIdent) -> Result<()> {
-    for db in &spec.databases {
+fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
+    for db in &spec.cluster.databases {
        if db.owner != *role_name {
            let mut conf = Config::from_str(connstr)?;
            conf.dbname(&db.name);
@@ -291,7 +366,7 @@ fn reassign_owned_objects(spec: &ComputeSpecV2, connstr: &str, role_name: &PgIde
 /// atomicity should be enough here due to the order of operations and various checks,
 /// which together provide us idempotency.
 #[instrument(skip_all)]
-pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
+pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    // Print a list of existing Postgres databases (only in debug mode)
@@ -339,7 +414,7 @@ pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()>
    let existing_dbs: Vec<Database> = get_existing_dbs(client)?;

    info!("cluster spec databases:");
-    for db in &spec.databases {
+    for db in &spec.cluster.databases {
        let name = &db.name;

        // XXX: with a limited number of databases it is fine, but consider making it a HashMap
@@ -404,7 +479,7 @@ pub fn handle_databases(spec: &ComputeSpecV2, client: &mut Client) -> Result<()>
 /// Grant CREATE ON DATABASE to the database owner and do some other alters and grants
 /// to allow users creating trusted extensions and re-creating `public` schema, for example.
 #[instrument(skip_all)]
-pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -> Result<()> {
+pub fn handle_grants(spec: &ComputeSpec, connstr: &str, client: &mut Client) -> Result<()> {
    info!("cluster spec grants:");

    // We now have a separate `web_access` role to connect to the database
@@ -414,12 +489,13 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -
    // XXX: later we should stop messing with Postgres ACL in such horrible
    // ways.
    let roles = spec
+        .cluster
        .roles
        .iter()
        .map(|r| r.name.pg_quote())
        .collect::<Vec<_>>();

-    for db in &spec.databases {
+    for db in &spec.cluster.databases {
        let dbname = &db.name;

        let query: String = format!(
@@ -435,7 +511,7 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -
    // Do some per-database access adjustments. We'd better do this at db creation time,
    // but CREATE DATABASE isn't transactional. So we cannot create db + do some grants
    // atomically.
-    for db in &spec.databases {
+    for db in &spec.cluster.databases {
        let mut conf = Config::from_str(connstr)?;
        conf.dbname(&db.name);

@@ -505,11 +581,14 @@ pub fn handle_grants(spec: &ComputeSpecV2, connstr: &str, client: &mut Client) -

 /// Create required system extensions
 #[instrument(skip_all)]
-pub fn handle_extensions(spec: &ComputeSpecV2, client: &mut Client) -> Result<()> {
-    for extension in &spec.extensions {
-        let query = format!("CREATE EXTENSION IF NOT EXISTS {}", extension.pg_quote());
-        info!("creating system extensions with query: {}", query);
-        client.simple_query(&query)?;
+pub fn handle_extensions(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
+    if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") {
+        if libs.contains("pg_stat_statements") {
+            // Create extension only if this compute really needs it
+            let query = "CREATE EXTENSION IF NOT EXISTS pg_stat_statements";
+            info!("creating system extensions with query: {}", query);
+            client.simple_query(query)?;
+        }
    }

    Ok(())
--- a/compute_tools/tests/pg_helpers_tests.rs
+++ b/compute_tools/tests/pg_helpers_tests.rs
@@ -1,24 +1,57 @@
 #[cfg(test)]
 mod pg_helpers_tests {
-    use anyhow::Result;
-    use compute_api::spec::{ComputeSpecV2, GenericOption, GenericOptions, PgIdent};
+    use std::fs::File;
+
+    use compute_api::spec::{ComputeSpec, GenericOption, GenericOptions, PgIdent};
    use compute_tools::pg_helpers::*;

    #[test]
-    fn params_serialize() -> Result<()> {
-        let spec_v1_str =
-            std::fs::read_to_string("../libs/compute_api/tests/spec-v1.json").unwrap();
-        let spec = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
+    fn params_serialize() {
+        let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();

        assert_eq!(
-            spec.databases.first().unwrap().to_pg_options(),
+            spec.cluster.databases.first().unwrap().to_pg_options(),
            "LC_COLLATE 'C' LC_CTYPE 'C' TEMPLATE template0 OWNER \"alexk\""
        );
        assert_eq!(
-            spec.roles.first().unwrap().to_pg_options(),
+            spec.cluster.roles.first().unwrap().to_pg_options(),
            "LOGIN PASSWORD 'md56b1d16b78004bbd51fa06af9eda75972'"
        );
-        Ok(())
+    }
+
+    #[test]
+    fn settings_serialize() {
+        let file = File::open("../libs/compute_api/tests/cluster_spec.json").unwrap();
+        let spec: ComputeSpec = serde_json::from_reader(file).unwrap();
+
+        assert_eq!(
+            spec.cluster.settings.as_pg_settings(),
+            r#"fsync = off
+wal_level = replica
+hot_standby = on
+neon.safekeepers = '127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501'
+wal_log_hints = on
+log_connections = on
+shared_buffers = 32768
+port = 55432
+max_connections = 100
+max_wal_senders = 10
+listen_addresses = '0.0.0.0'
+wal_sender_timeout = 0
+password_encryption = md5
+maintenance_work_mem = 65536
+max_parallel_workers = 8
+max_worker_processes = 8
+neon.tenant_id = 'b0554b632bd4d547a63b86c3630317e8'
+max_replication_slots = 10
+neon.timeline_id = '2414a61ffc94e428f14b5758fe308e13'
+shared_preload_libraries = 'neon'
+synchronous_standby_names = 'walproposer'
+neon.pageserver_connstring = 'host=127.0.0.1 port=6400'
+test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hooray'
+"#
+        );
    }

    #[test]
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,7 +6,6 @@ license.workspace = true

 [dependencies]
 anyhow.workspace = true
-chrono.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
 git-version.workspace = true
@@ -27,7 +26,6 @@ url.workspace = true
 pageserver_api.workspace = true
 postgres_backend.workspace = true
 safekeeper_api.workspace = true
-compute_api.workspace = true
 postgres_connection.workspace = true
 storage_broker.workspace = true
 utils.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -108,6 +108,7 @@ fn main() -> Result<()> {
            "pageserver" => handle_pageserver(sub_args, &env),
            "safekeeper" => handle_safekeeper(sub_args, &env),
            "endpoint" => handle_endpoint(sub_args, &env),
+            "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
            _ => bail!("unexpected subcommand {sub_name}"),
        };

@@ -473,7 +474,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
            env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;

            println!("Creating endpoint for imported timeline ...");
-            cplane.new_endpoint(name, tenant_id, timeline_id, None, None, None, pg_version)?;
+            cplane.new_endpoint(tenant_id, name, timeline_id, None, None, pg_version)?;
            println!("Done");
        }
        Some(("branch", branch_match)) => {
@@ -582,7 +583,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(

                table.add_row([
                    endpoint_id.as_str(),
-                    &endpoint.pg_address.to_string(),
+                    &endpoint.address.to_string(),
                    &endpoint.timeline_id.to_string(),
                    branch_name,
                    lsn_str.as_str(),
@@ -611,45 +612,21 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .get_branch_timeline_id(branch_name, tenant_id)
                .ok_or_else(|| anyhow!("Found no timeline id for branch name '{branch_name}'"))?;

-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
+
            let pg_version = sub_args
                .get_one::<u32>("pg-version")
                .copied()
                .context("Failed to parse postgres version from the argument string")?;

-            cplane.new_endpoint(
-                &endpoint_id,
-                tenant_id,
-                timeline_id,
-                lsn,
-                pg_port,
-                http_port,
-                pg_version,
-            )?;
+            cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, lsn, port, pg_version)?;
        }
        "start" => {
-            let pg_port: Option<u16> = sub_args.get_one::<u16>("pg-port").copied();
-            let http_port: Option<u16> = sub_args.get_one::<u16>("http-port").copied();
+            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
            let endpoint_id = sub_args
                .get_one::<String>("endpoint_id")
                .ok_or_else(|| anyhow!("No endpoint ID was provided to start"))?;

-            // If --safekeepers argument is given, use only the listed safekeeper nodes.
-            let safekeepers =
-                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
-                    let mut safekeepers: Vec<NodeId> = Vec::new();
-                    for sk_id in safekeepers_str.split(',').map(str::trim) {
-                        let sk_id = NodeId(u64::from_str(sk_id).map_err(|_| {
-                            anyhow!("invalid node ID \"{sk_id}\" in --safekeepers list")
-                        })?);
-                        safekeepers.push(sk_id);
-                    }
-                    safekeepers
-                } else {
-                    env.safekeepers.iter().map(|sk| sk.id).collect()
-                };
-
            let endpoint = cplane.endpoints.get(endpoint_id.as_str());

            let auth_token = if matches!(env.pageserver.pg_auth_type, AuthType::NeonJWT) {
@@ -662,7 +639,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(

            if let Some(endpoint) = endpoint {
                println!("Starting existing endpoint {endpoint_id}...");
-                endpoint.start(&auth_token, safekeepers)?;
+                endpoint.start(&auth_token)?;
            } else {
                let branch_name = sub_args
                    .get_one::<String>("branch-name")
@@ -690,15 +667,14 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                println!("Starting new endpoint {endpoint_id} (PostgreSQL v{pg_version}) on timeline {timeline_id} ...");

                let ep = cplane.new_endpoint(
-                    endpoint_id,
                    tenant_id,
+                    endpoint_id,
                    timeline_id,
                    lsn,
-                    pg_port,
-                    http_port,
+                    port,
                    pg_version,
                )?;
-                ep.start(&auth_token, safekeepers)?;
+                ep.start(&auth_token)?;
            }
        }
        "stop" => {
@@ -926,22 +902,11 @@ fn cli() -> Command {
        .value_parser(value_parser!(u32))
        .default_value(DEFAULT_PG_VERSION);

-    let pg_port_arg = Arg::new("pg-port")
-        .long("pg-port")
+    let port_arg = Arg::new("port")
+        .long("port")
        .required(false)
        .value_parser(value_parser!(u16))
-        .value_name("pg-port");
-
-    let http_port_arg = Arg::new("http-port")
-        .long("http-port")
-        .required(false)
-        .value_parser(value_parser!(u16))
-        .value_name("http-port");
-
-    let safekeepers_arg = Arg::new("safekeepers")
-        .long("safekeepers")
-        .required(false)
-        .value_name("safekeepers");
+        .value_name("port");

    let stop_mode_arg = Arg::new("stop-mode")
        .short('m')
@@ -1080,8 +1045,7 @@ fn cli() -> Command {
                    .arg(branch_name_arg.clone())
                    .arg(tenant_id_arg.clone())
                    .arg(lsn_arg.clone())
-                    .arg(pg_port_arg.clone())
-                    .arg(http_port_arg.clone())
+                    .arg(port_arg.clone())
                    .arg(
                        Arg::new("config-only")
                            .help("Don't do basebackup, create endpoint directory with only config files")
@@ -1096,10 +1060,8 @@ fn cli() -> Command {
                    .arg(branch_name_arg)
                    .arg(timeline_id_arg)
                    .arg(lsn_arg)
-                    .arg(pg_port_arg)
-                    .arg(http_port_arg)
+                    .arg(port_arg)
                    .arg(pg_version_arg)
-                    .arg(safekeepers_arg)
                )
                .subcommand(
                    Command::new("stop")
@@ -1115,6 +1077,13 @@ fn cli() -> Command {
                )

        )
+        // Obsolete old name for 'endpoint'. We now just print an error if it's used.
+        .subcommand(
+            Command::new("pg")
+                .hide(true)
+                .arg(Arg::new("ignore-rest").allow_hyphen_values(true).num_args(0..).required(false))
+                .trailing_var_arg(true)
+        )
        .subcommand(
            Command::new("start")
                .about("Start page server and safekeepers")
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -1,9 +1,3 @@
-//! Code to manage the storage broker
-//!
-//! In the local test environment, the data for each safekeeper is stored in
-//!
-//!   .neon/safekeepers/<safekeeper id>
-//!
 use anyhow::Context;

 use std::path::PathBuf;
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -1,78 +1,25 @@
-//! Code to manage compute endpoints
-//!
-//! In the local test environment, the data for each endpoint is stored in
-//!
-//!   .neon/endpoints/<endpoint id>
-//!
-//! Some basic information about the endpoint, like the tenant and timeline IDs,
-//! are stored in the `endpoint.json` file. The `endpoint.json` file is created
-//! when the endpoint is created, and doesn't change afterwards.
-//!
-//! The endpoint is managed by the `compute_ctl` binary. When an endpoint is
-//! started, we launch `compute_ctl` It synchronizes the safekeepers, downloads
-//! the basebackup from the pageserver to initialize the the data directory, and
-//! finally launches the PostgreSQL process. It watches the PostgreSQL process
-//! until it exits.
-//!
-//! When an endpoint is created, a `postgresql.conf` file is also created in
-//! the endpoint's directory. The file can be modified before starting PostgreSQL.
-//! However, the `postgresql.conf` file in the endpoint directory is not used directly
-//! by PostgreSQL. It is passed to `compute_ctl`, and `compute_ctl` writes another
-//! copy of it in the data directory.
-//!
-//! Directory contents:
-//!
-//! ```ignore
-//! .neon/endpoints/main/
-//!     compute.log               - log output of `compute_ctl` and `postgres`
-//!     endpoint.json             - serialized `EndpointConf` struct
-//!     postgresql.conf           - postgresql settings
-//!     spec.json                 - passed to `compute_ctl`
-//!     pgdata/
-//!         postgresql.conf       - copy of postgresql.conf created by `compute_ctl`
-//!         zenith.signal
-//!         <other PostgreSQL files>
-//! ```
-//!
 use std::collections::BTreeMap;
+use std::fs::{self, File};
+use std::io::Write;
 use std::net::SocketAddr;
 use std::net::TcpStream;
+use std::os::unix::fs::PermissionsExt;
 use std::path::PathBuf;
-use std::process::Command;
+use std::process::{Command, Stdio};
+use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;

-use anyhow::{anyhow, bail, Context, Result};
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
+use anyhow::{Context, Result};
 use utils::{
-    id::{NodeId, TenantId, TimelineId},
+    id::{TenantId, TimelineId},
    lsn::Lsn,
 };

-use crate::local_env::LocalEnv;
+use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

-use compute_api::responses::{ComputeState, ComputeStatus};
-use compute_api::spec::ComputeSpecV2;
-
-// contents of a endpoint.json file
-#[serde_as]
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-pub struct EndpointConf {
-    endpoint_id: String,
-    #[serde_as(as = "DisplayFromStr")]
-    tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    timeline_id: TimelineId,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    lsn: Option<Lsn>,
-    pg_port: u16,
-    http_port: u16,
-    pg_version: u32,
-}
-
 //
 // ComputeControlPlane
 //
@@ -92,11 +39,11 @@ impl ComputeControlPlane {
        let pageserver = Arc::new(PageServerNode::from_env(&env));

        let mut endpoints = BTreeMap::default();
-        for endpoint_dir in std::fs::read_dir(env.endpoints_path())
+        for endpoint_dir in fs::read_dir(env.endpoints_path())
            .with_context(|| format!("failed to list {}", env.endpoints_path().display()))?
        {
            let ep = Endpoint::from_dir_entry(endpoint_dir?, &env, &pageserver)?;
-            endpoints.insert(ep.endpoint_id.clone(), Arc::new(ep));
+            endpoints.insert(ep.name.clone(), Arc::new(ep));
        }

        Ok(ComputeControlPlane {
@@ -111,28 +58,24 @@ impl ComputeControlPlane {
        1 + self
            .endpoints
            .values()
-            .map(|ep| std::cmp::max(ep.pg_address.port(), ep.http_address.port()))
+            .map(|ep| ep.address.port())
            .max()
            .unwrap_or(self.base_port)
    }

-    #[allow(clippy::too_many_arguments)]
    pub fn new_endpoint(
        &mut self,
-        endpoint_id: &str,
        tenant_id: TenantId,
+        name: &str,
        timeline_id: TimelineId,
        lsn: Option<Lsn>,
-        pg_port: Option<u16>,
-        http_port: Option<u16>,
+        port: Option<u16>,
        pg_version: u32,
    ) -> Result<Arc<Endpoint>> {
-        let pg_port = pg_port.unwrap_or_else(|| self.get_port());
-        let http_port = http_port.unwrap_or_else(|| self.get_port() + 1);
+        let port = port.unwrap_or_else(|| self.get_port());
        let ep = Arc::new(Endpoint {
-            endpoint_id: endpoint_id.to_owned(),
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), http_port),
+            name: name.to_owned(),
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
            env: self.env.clone(),
            pageserver: Arc::clone(&self.pageserver),
            timeline_id,
@@ -141,26 +84,10 @@ impl ComputeControlPlane {
            pg_version,
        });

-        ep.create_endpoint_dir()?;
-        std::fs::write(
-            ep.endpoint_path().join("endpoint.json"),
-            serde_json::to_string_pretty(&EndpointConf {
-                endpoint_id: endpoint_id.to_string(),
-                tenant_id,
-                timeline_id,
-                lsn,
-                http_port,
-                pg_port,
-                pg_version,
-            })?,
-        )?;
-        std::fs::write(
-            ep.endpoint_path().join("postgresql.conf"),
-            ep.setup_pg_conf()?.to_string(),
-        )?;
+        ep.create_pgdata()?;
+        ep.setup_pg_conf()?;

-        self.endpoints
-            .insert(ep.endpoint_id.clone(), Arc::clone(&ep));
+        self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));

        Ok(ep)
    }
@@ -171,15 +98,14 @@ impl ComputeControlPlane {
 #[derive(Debug)]
 pub struct Endpoint {
    /// used as the directory name
-    endpoint_id: String,
+    name: String,
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
    pub lsn: Option<Lsn>,

-    // port and address of the Postgres server and `compute_ctl`'s HTTP API
-    pub pg_address: SocketAddr,
-    pub http_address: SocketAddr,
+    // port and address of the Postgres server
+    pub address: SocketAddr,
    pg_version: u32,

    // These are not part of the endpoint as such, but the environment
@@ -203,36 +129,145 @@ impl Endpoint {

        // parse data directory name
        let fname = entry.file_name();
-        let endpoint_id = fname.to_str().unwrap().to_string();
+        let name = fname.to_str().unwrap().to_string();

-        // Read the endpoint.json file
-        let conf: EndpointConf =
-            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;
+        // Read config file into memory
+        let cfg_path = entry.path().join("pgdata").join("postgresql.conf");
+        let cfg_path_str = cfg_path.to_string_lossy();
+        let mut conf_file = File::open(&cfg_path)
+            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
+        let conf = PostgresConf::read(&mut conf_file)
+            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;

+        // Read a few options from the config file
+        let context = format!("in config file {}", cfg_path_str);
+        let port: u16 = conf.parse_field("port", &context)?;
+        let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
+        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
+
+        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
+        // If it doesn't exist, assume broken data directory and use default pg version.
+        let pg_version_path = entry.path().join("PG_VERSION");
+
+        let pg_version_str =
+            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
+        let pg_version = u32::from_str(&pg_version_str)?;
+
+        // parse recovery_target_lsn, if any
+        let recovery_target_lsn: Option<Lsn> =
+            conf.parse_field_optional("recovery_target_lsn", &context)?;
+
+        // ok now
        Ok(Endpoint {
-            pg_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.pg_port),
-            http_address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.http_port),
-            endpoint_id,
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            name,
            env: env.clone(),
            pageserver: Arc::clone(pageserver),
-            timeline_id: conf.timeline_id,
-            lsn: conf.lsn,
-            tenant_id: conf.tenant_id,
-            pg_version: conf.pg_version,
+            timeline_id,
+            lsn: recovery_target_lsn,
+            tenant_id,
+            pg_version,
        })
    }

-    fn create_endpoint_dir(&self) -> Result<()> {
-        std::fs::create_dir_all(self.endpoint_path()).with_context(|| {
-            format!(
-                "could not create endpoint directory {}",
-                self.endpoint_path().display()
+    fn sync_safekeepers(&self, auth_token: &Option<String>, pg_version: u32) -> Result<Lsn> {
+        let pg_path = self.env.pg_bin_dir(pg_version)?.join("postgres");
+        let mut cmd = Command::new(pg_path);
+
+        cmd.arg("--sync-safekeepers")
+            .env_clear()
+            .env(
+                "LD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
            )
-        })
+            .env(
+                "DYLD_LIBRARY_PATH",
+                self.env.pg_lib_dir(pg_version)?.to_str().unwrap(),
+            )
+            .env("PGDATA", self.pgdata().to_str().unwrap())
+            .stdout(Stdio::piped())
+            // Comment this to avoid capturing stderr (useful if command hangs)
+            .stderr(Stdio::piped());
+
+        if let Some(token) = auth_token {
+            cmd.env("NEON_AUTH_TOKEN", token);
+        }
+
+        let sync_handle = cmd
+            .spawn()
+            .expect("postgres --sync-safekeepers failed to start");
+
+        let sync_output = sync_handle
+            .wait_with_output()
+            .expect("postgres --sync-safekeepers failed");
+        if !sync_output.status.success() {
+            anyhow::bail!(
+                "sync-safekeepers failed: '{}'",
+                String::from_utf8_lossy(&sync_output.stderr)
+            );
+        }
+
+        let lsn = Lsn::from_str(std::str::from_utf8(&sync_output.stdout)?.trim())?;
+        println!("Safekeepers synced on {}", lsn);
+        Ok(lsn)
    }

-    // Generate postgresql.conf with default configuration
-    fn setup_pg_conf(&self) -> Result<PostgresConf> {
+    /// Get basebackup from the pageserver as a tar archive and extract it
+    /// to the `self.pgdata()` directory.
+    fn do_basebackup(&self, lsn: Option<Lsn>) -> Result<()> {
+        println!(
+            "Extracting base backup to create postgres instance: path={} port={}",
+            self.pgdata().display(),
+            self.address.port()
+        );
+
+        let sql = if let Some(lsn) = lsn {
+            format!("basebackup {} {} {}", self.tenant_id, self.timeline_id, lsn)
+        } else {
+            format!("basebackup {} {}", self.tenant_id, self.timeline_id)
+        };
+
+        let mut client = self
+            .pageserver
+            .page_server_psql_client()
+            .context("connecting to page server failed")?;
+
+        let copyreader = client
+            .copy_out(sql.as_str())
+            .context("page server 'basebackup' command failed")?;
+
+        // Read the archive directly from the `CopyOutReader`
+        //
+        // Set `ignore_zeros` so that unpack() reads all the Copy data and
+        // doesn't stop at the end-of-archive marker. Otherwise, if the server
+        // sends an Error after finishing the tarball, we will not notice it.
+        let mut ar = tar::Archive::new(copyreader);
+        ar.set_ignore_zeros(true);
+        ar.unpack(&self.pgdata())
+            .context("extracting base backup failed")?;
+
+        Ok(())
+    }
+
+    fn create_pgdata(&self) -> Result<()> {
+        fs::create_dir_all(self.pgdata()).with_context(|| {
+            format!(
+                "could not create data directory {}",
+                self.pgdata().display()
+            )
+        })?;
+        fs::set_permissions(self.pgdata().as_path(), fs::Permissions::from_mode(0o700))
+            .with_context(|| {
+                format!(
+                    "could not set permissions in data directory {}",
+                    self.pgdata().display()
+                )
+            })
+    }
+
+    // Write postgresql.conf with default configuration
+    // and PG_VERSION file to the data directory of a new endpoint.
+    fn setup_pg_conf(&self) -> Result<()> {
        let mut conf = PostgresConf::new();
        conf.append("max_wal_senders", "10");
        conf.append("wal_log_hints", "off");
@@ -245,16 +280,30 @@ impl Endpoint {
        // wal_sender_timeout is the maximum time to wait for WAL replication.
        // It also defines how often the walreciever will send a feedback message to the wal sender.
        conf.append("wal_sender_timeout", "5s");
-        conf.append("listen_addresses", &self.pg_address.ip().to_string());
-        conf.append("port", &self.pg_address.port().to_string());
+        conf.append("listen_addresses", &self.address.ip().to_string());
+        conf.append("port", &self.address.port().to_string());
        conf.append("wal_keep_size", "0");
        // walproposer panics when basebackup is invalid, it is pointless to restart in this case.
        conf.append("restart_after_crash", "off");

-        // Load the 'neon' extension
+        // Configure the Neon Postgres extension to fetch pages from pageserver
+        let pageserver_connstr = {
+            let config = &self.pageserver.pg_connection_config;
+            let (host, port) = (config.host(), config.port());
+
+            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
+            format!("postgresql://no_user@{host}:{port}")
+        };
        conf.append("shared_preload_libraries", "neon");
        conf.append_line("");
+        conf.append("neon.pageserver_connstring", &pageserver_connstr);
+        conf.append("neon.tenant_id", &self.tenant_id.to_string());
+        conf.append("neon.timeline_id", &self.timeline_id.to_string());
+        if let Some(lsn) = self.lsn {
+            conf.append("recovery_target_lsn", &lsn.to_string());
+        }

+        conf.append_line("");
        // Configure backpressure
        // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
        //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
@@ -275,6 +324,15 @@ impl Endpoint {
        if !self.env.safekeepers.is_empty() {
            // Configure Postgres to connect to the safekeepers
            conf.append("synchronous_standby_names", "walproposer");
+
+            let safekeepers = self
+                .env
+                .safekeepers
+                .iter()
+                .map(|sk| format!("localhost:{}", sk.pg_port))
+                .collect::<Vec<String>>()
+                .join(",");
+            conf.append("neon.safekeepers", &safekeepers);
        } else {
            // We only use setup without safekeepers for tests,
            // and don't care about data durability on pageserver,
@@ -287,11 +345,40 @@ impl Endpoint {
            conf.append("synchronous_standby_names", "pageserver");
        }

-        Ok(conf)
+        let mut file = File::create(self.pgdata().join("postgresql.conf"))?;
+        file.write_all(conf.to_string().as_bytes())?;
+
+        let mut file = File::create(self.pgdata().join("PG_VERSION"))?;
+        file.write_all(self.pg_version.to_string().as_bytes())?;
+
+        Ok(())
+    }
+
+    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
+        let backup_lsn = if let Some(lsn) = self.lsn {
+            Some(lsn)
+        } else if !self.env.safekeepers.is_empty() {
+            // LSN 0 means that it is bootstrap and we need to download just
+            // latest data from the pageserver. That is a bit clumsy but whole bootstrap
+            // procedure evolves quite actively right now, so let's think about it again
+            // when things would be more stable (TODO).
+            let lsn = self.sync_safekeepers(auth_token, self.pg_version)?;
+            if lsn == Lsn(0) {
+                None
+            } else {
+                Some(lsn)
+            }
+        } else {
+            None
+        };
+
+        self.do_basebackup(backup_lsn)?;
+
+        Ok(())
    }

    pub fn endpoint_path(&self) -> PathBuf {
-        self.env.endpoints_path().join(&self.endpoint_id)
+        self.env.endpoints_path().join(&self.name)
    }

    pub fn pgdata(&self) -> PathBuf {
@@ -301,7 +388,7 @@ impl Endpoint {
    pub fn status(&self) -> &str {
        let timeout = Duration::from_millis(300);
        let has_pidfile = self.pgdata().join("postmaster.pid").exists();
-        let can_connect = TcpStream::connect_timeout(&self.pg_address, timeout).is_ok();
+        let can_connect = TcpStream::connect_timeout(&self.address, timeout).is_ok();

        match (has_pidfile, can_connect) {
            (true, true) => "running",
@@ -319,8 +406,8 @@ impl Endpoint {
                &[
                    "-D",
                    self.pgdata().to_str().unwrap(),
-                    "-l", // FIXME: does this make sense when we don't use pg_ctl start ?
-                    self.endpoint_path().join("pg.log").to_str().unwrap(),
+                    "-l",
+                    self.pgdata().join("pg.log").to_str().unwrap(),
                    "-w", //wait till pg_ctl actually does what was asked
                ],
                args,
@@ -356,183 +443,36 @@ impl Endpoint {
        Ok(())
    }

-    pub fn start(&self, auth_token: &Option<String>, safekeepers: Vec<NodeId>) -> Result<()> {
+    pub fn start(&self, auth_token: &Option<String>) -> Result<()> {
        if self.status() == "running" {
            anyhow::bail!("The endpoint is already running");
        }

-        // Slurp the endpoints/<endpoint id>/postgresql.conf file into
-        // memory. We will include it in the spec file that we pass to
-        // `compute_ctl`, and `compute_ctl` will write it to the postgresql.conf
-        // in the data directory.
-        let postgresql_conf_path = self.endpoint_path().join("postgresql.conf");
-        let postgresql_conf = match std::fs::read(&postgresql_conf_path) {
-            Ok(content) => String::from_utf8(content)?,
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => "".to_string(),
-            Err(e) => {
-                return Err(anyhow::Error::new(e).context(format!(
-                    "failed to read config file in {}",
-                    postgresql_conf_path.to_str().unwrap()
-                )))
-            }
-        };
-
-        // We always start the compute node from scratch, so if the Postgres
-        // data dir exists from a previous launch, remove it first.
-        if self.pgdata().exists() {
-            std::fs::remove_dir_all(self.pgdata())?;
-        }
-
-        let pageserver_connstring = {
-            let config = &self.pageserver.pg_connection_config;
-            let (host, port) = (config.host(), config.port());
-
-            // NOTE: avoid spaces in connection string, because it is less error prone if we forward it somewhere.
-            format!("postgresql://no_user@{host}:{port}")
-        };
-        let mut safekeeper_connstrings = Vec::new();
-        for sk_id in safekeepers {
-            let sk = self
-                .env
-                .safekeepers
-                .iter()
-                .find(|node| node.id == sk_id)
-                .ok_or_else(|| anyhow!("safekeeper {sk_id} does not exist"))?;
-            safekeeper_connstrings.push(format!("127.0.0.1:{}", sk.pg_port));
-        }
-
-        // Create spec file
-        let spec = ComputeSpecV2 {
-            format_version: 2,
-
-            project_id: None,
-            endpoint_id: Some(self.endpoint_id.clone()),
-            operation_uuid: None,
-
-            startup_tracing_context: None,
-
-            tenant_id: self.tenant_id,
-            timeline_id: self.timeline_id,
-            lsn: self.lsn,
-            pageserver_connstring,
-            safekeeper_connstrings,
-            storage_auth_token: auth_token.clone(),
-
-            postgresql_conf: Some(postgresql_conf),
-            settings: None,
-
-            roles: vec![],
-            databases: vec![],
-            extensions: vec![],
-            delta_operations: None,
-        };
-        let spec_path = self.endpoint_path().join("spec.json");
-        std::fs::write(spec_path, serde_json::to_string_pretty(&spec)?)?;
-
-        // Open log file. We'll redirect the stdout and stderr of `compute_ctl` to it.
-        let logfile = std::fs::OpenOptions::new()
-            .create(true)
-            .append(true)
-            .open(self.endpoint_path().join("compute.log"))?;
-
-        // Launch compute_ctl
-        println!("Starting postgres node at '{}'", self.connstr());
-        let mut cmd = Command::new(self.env.neon_distrib_dir.join("compute_ctl"));
-        cmd.args(["--http-port", &self.http_address.port().to_string()])
-            .args(["--pgdata", self.pgdata().to_str().unwrap()])
-            .args(["--connstr", &self.connstr()])
-            .args([
-                "--spec-path",
-                self.endpoint_path().join("spec.json").to_str().unwrap(),
-            ])
-            .args([
-                "--pgbin",
-                self.env
-                    .pg_bin_dir(self.pg_version)?
-                    .join("postgres")
-                    .to_str()
-                    .unwrap(),
-            ])
-            .stdin(std::process::Stdio::null())
-            .stderr(logfile.try_clone()?)
-            .stdout(logfile);
-        let _child = cmd.spawn()?;
-
-        // Wait for it to start
-        let mut attempt = 0;
-        const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100);
-        const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
-        loop {
-            attempt += 1;
-            match self.get_status() {
-                Ok(state) => {
-                    match state.status {
-                        ComputeStatus::Init => {
-                            if attempt == MAX_ATTEMPTS {
-                                bail!("compute startup timed out; still in Init state");
-                            }
-                            // keep retrying
-                        }
-                        ComputeStatus::Running => {
-                            // All good!
-                            break;
-                        }
-                        ComputeStatus::Failed => {
-                            bail!(
-                                "compute startup failed: {}",
-                                state
-                                    .error
-                                    .as_deref()
-                                    .unwrap_or("<no error from compute_ctl>")
-                            );
-                        }
-                        ComputeStatus::Empty | ComputeStatus::ConfigurationPending => {
-                            bail!("unexpected compute status: {:?}", state.status)
-                        }
-                    }
-                }
-                Err(e) => {
-                    if attempt == MAX_ATTEMPTS {
-                        return Err(e).context(
-                            "timed out waiting to connect to compute_ctl HTTP; last error: {e}",
-                        );
-                    }
-                }
-            }
-            std::thread::sleep(ATTEMPT_INTERVAL);
-        }
-
-        Ok(())
-    }
-
-    // Call the /status HTTP API
-    pub fn get_status(&self) -> Result<ComputeState> {
-        let client = reqwest::blocking::Client::new();
-
-        let response = client
-            .request(
-                reqwest::Method::GET,
-                format!(
-                    "http://{}:{}/status",
-                    self.http_address.ip(),
-                    self.http_address.port()
-                ),
+        // 1. We always start Postgres from scratch, so
+        // if old dir exists, preserve 'postgresql.conf' and drop the directory
+        let postgresql_conf_path = self.pgdata().join("postgresql.conf");
+        let postgresql_conf = fs::read(&postgresql_conf_path).with_context(|| {
+            format!(
+                "failed to read config file in {}",
+                postgresql_conf_path.to_str().unwrap()
            )
-            .send()?;
+        })?;
+        fs::remove_dir_all(self.pgdata())?;
+        self.create_pgdata()?;

-        // Interpret the response
-        let status = response.status();
-        if !(status.is_client_error() || status.is_server_error()) {
-            Ok(response.json()?)
-        } else {
-            // reqwest does not export its error construction utility functions, so let's craft the message ourselves
-            let url = response.url().to_owned();
-            let msg = match response.text() {
-                Ok(err_body) => format!("Error: {}", err_body),
-                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
-            };
-            Err(anyhow::anyhow!(msg))
+        // 2. Bring back config files
+        fs::write(&postgresql_conf_path, postgresql_conf)?;
+
+        // 3. Load basebackup
+        self.load_basebackup(auth_token)?;
+
+        if self.lsn.is_some() {
+            File::create(self.pgdata().join("standby.signal"))?;
        }
+
+        // 4. Finally start postgres
+        println!("Starting postgres at '{}'", self.connstr());
+        self.pg_ctl(&["start"], auth_token)
    }

    pub fn stop(&self, destroy: bool) -> Result<()> {
@@ -549,7 +489,7 @@ impl Endpoint {
                "Destroying postgres data directory '{}'",
                self.pgdata().to_str().unwrap()
            );
-            std::fs::remove_dir_all(self.endpoint_path())?;
+            fs::remove_dir_all(self.endpoint_path())?;
        } else {
            self.pg_ctl(&["stop"], &None)?;
        }
@@ -558,10 +498,10 @@ impl Endpoint {

    pub fn connstr(&self) -> String {
        format!(
-            "postgresql://{}@{}:{}/{}",
+            "host={} port={} user={} dbname={}",
+            self.address.ip(),
+            self.address.port(),
            "cloud_admin",
-            self.pg_address.ip(),
-            self.pg_address.port(),
            "postgres"
        )
    }
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -37,7 +37,7 @@ pub const DEFAULT_PG_VERSION: u32 = 14;
 #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
 pub struct LocalEnv {
    // Base directory for all the nodes (the pageserver, safekeepers and
-    // compute endpoints).
+    // compute nodes).
    //
    // This is not stored in the config file. Rather, this is the path where the
    // config file itself is. It is read from the NEON_REPO_DIR env variable or
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -1,9 +1,3 @@
-//! Code to manage pageservers
-//!
-//! In the local test environment, the pageserver stores its data directly in
-//!
-//!   .neon/
-//!
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fs::File;
@@ -365,8 +359,8 @@ impl PageServerNode {
                .transpose()
                .context("Failed to parse 'trace_read_requests' as bool")?,
            eviction_policy: settings
-                .get("eviction_policy")
-                .map(|x| serde_json::from_str(x))
+                .remove("eviction_policy")
+                .map(serde_json::from_str)
                .transpose()
                .context("Failed to parse 'eviction_policy' json")?,
            min_resident_size_override: settings
@@ -374,6 +368,9 @@ impl PageServerNode {
                .map(|x| x.parse::<u64>())
                .transpose()
                .context("Failed to parse 'min_resident_size_override' as integer")?,
+            evictions_low_residence_duration_metric_threshold: settings
+                .remove("evictions_low_residence_duration_metric_threshold")
+                .map(|x| x.to_string()),
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -451,6 +448,9 @@ impl PageServerNode {
                    .map(|x| x.parse::<u64>())
                    .transpose()
                    .context("Failed to parse 'min_resident_size_override' as an integer")?,
+                evictions_low_residence_duration_metric_threshold: settings
+                    .get("evictions_low_residence_duration_metric_threshold")
+                    .map(|x| x.to_string()),
            })
            .send()?
            .error_from_body()?;
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -1,9 +1,3 @@
-//! Code to manage safekeepers
-//!
-//! In the local test environment, the data for each safekeeper is stored in
-//!
-//!   .neon/safekeepers/<safekeeper id>
-//!
 use std::io::Write;
 use std::path::PathBuf;
 use std::process::Child;
--- a/libs/compute_api/Cargo.toml
+++ b/libs/compute_api/Cargo.toml
@@ -10,6 +10,5 @@ chrono.workspace = true
 serde.workspace = true
 serde_with.workspace = true
 serde_json.workspace = true
-utils.workspace = true

 workspace_hack.workspace = true
--- a/libs/compute_api/src/models.rs
+++ b/libs/compute_api/src/models.rs
@@ -1,32 +0,0 @@
-//! Structs representing the JSON formats used in the compute_ctl's HTTP API.
-use crate::rfc3339_serialize;
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-
-/// Response of the /status API
-#[derive(Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct ComputeState {
-    pub status: ComputeStatus,
-    /// Timestamp of the last Postgres activity
-    #[serde(serialize_with = "rfc3339_serialize")]
-    pub last_active: DateTime<Utc>,
-    pub error: Option<String>,
-}
-
-#[derive(Deserialize, Serialize, Clone, Copy, PartialEq, Eq)]
-#[serde(rename_all = "snake_case")]
-pub enum ComputeStatus {
-    Init,
-    Running,
-    Failed,
-}
-
-/// Response of the /metrics.json API
-#[derive(Clone, Default, Serialize)]
-pub struct ComputeMetrics {
-    pub sync_safekeepers_ms: u64,
-    pub basebackup_ms: u64,
-    pub config_ms: u64,
-    pub total_startup_ms: u64,
-}
--- a/libs/compute_api/src/requests.rs
+++ b/libs/compute_api/src/requests.rs
@@ -1,6 +1,6 @@
 //! Structs representing the JSON formats used in the compute_ctl's HTTP API.

-use crate::spec::ComputeSpecAnyVersion;
+use crate::spec::ComputeSpec;
 use serde::Deserialize;

 /// Request of the /configure API
@@ -10,5 +10,5 @@ use serde::Deserialize;
 /// `spec` into a struct initially to be more flexible in the future.
 #[derive(Deserialize, Debug)]
 pub struct ConfigurationRequest {
-    pub spec: ComputeSpecAnyVersion,
+    pub spec: ComputeSpec,
 }
--- a/libs/compute_api/src/responses.rs
+++ b/libs/compute_api/src/responses.rs
@@ -3,15 +3,18 @@
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize, Serializer};

-#[derive(Serialize, Debug, Deserialize)]
+use crate::spec::ComputeSpec;
+
+#[derive(Serialize, Debug)]
 pub struct GenericAPIError {
    pub error: String,
 }

 /// Response of the /status API
-#[derive(Serialize, Debug, Deserialize)]
+#[derive(Serialize, Debug)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeStatusResponse {
+    pub start_time: DateTime<Utc>,
    pub tenant: Option<String>,
    pub timeline: Option<String>,
    pub status: ComputeStatus,
@@ -20,7 +23,7 @@ pub struct ComputeStatusResponse {
    pub error: Option<String>,
 }

-#[derive(Deserialize, Serialize)]
+#[derive(Serialize)]
 #[serde(rename_all = "snake_case")]
 pub struct ComputeState {
    pub status: ComputeStatus,
@@ -30,7 +33,7 @@ pub struct ComputeState {
    pub error: Option<String>,
 }

-#[derive(Serialize, Clone, Copy, Debug, Deserialize, PartialEq, Eq)]
+#[derive(Serialize, Clone, Copy, Debug, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
 pub enum ComputeStatus {
    // Spec wasn't provided at start, waiting for it to be
@@ -43,6 +46,8 @@ pub enum ComputeStatus {
    Init,
    // Compute is configured and running.
    Running,
+    // New spec is being applied.
+    Configuration,
    // Either startup or configuration failed,
    // compute will exit soon or is waiting for
    // control-plane to terminate it.
@@ -59,8 +64,29 @@ where
 /// Response of the /metrics.json API
 #[derive(Clone, Debug, Default, Serialize)]
 pub struct ComputeMetrics {
+    pub wait_for_spec_ms: u64,
    pub sync_safekeepers_ms: u64,
    pub basebackup_ms: u64,
    pub config_ms: u64,
    pub total_startup_ms: u64,
 }
+
+/// Response of the `/computes/{compute_id}/spec` control-plane API.
+/// This is not actually a compute API response, so consider moving
+/// to a different place.
+#[derive(Deserialize, Debug)]
+pub struct ControlPlaneSpecResponse {
+    pub spec: Option<ComputeSpec>,
+    pub status: ControlPlaneComputeStatus,
+}
+
+#[derive(Deserialize, Clone, Copy, Debug, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+pub enum ControlPlaneComputeStatus {
+    // Compute is known to control-plane, but it's not
+    // yet attached to any timeline / endpoint.
+    Empty,
+    // Compute is attached to some timeline / endpoint and
+    // should be able to start with provided spec.
+    Attached,
+}
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -3,13 +3,8 @@
 //! The spec.json file is used to pass information to 'compute_ctl'. It contains
 //! all the information needed to start up the right version of PostgreSQL,
 //! and connect it to the storage nodes.
-use anyhow::anyhow;
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
+use serde::Deserialize;
 use std::collections::HashMap;
-use std::str::FromStr;
-use utils::id::{TenantId, TimelineId};
-use utils::lsn::Lsn;

 /// String type alias representing Postgres identifier and
 /// intended to be used for DB / role names.
@@ -17,191 +12,31 @@ pub type PgIdent = String;

 /// Cluster spec or configuration represented as an optional number of
 /// delta operations + final cluster state description.
-#[serde_as]
-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct ComputeSpecV2 {
-    pub format_version: u64,
-
-    // For debugging purposes only
-    pub project_id: Option<String>,
-    pub endpoint_id: Option<String>,
-    pub operation_uuid: Option<String>,
-
-    /// W3C trace context of the launch operation, for OpenTelemetry tracing
-    pub startup_tracing_context: Option<HashMap<String, String>>,
-
-    // Information needed to connect to the storage layer.
-    //
-    // `tenant_id`, `timeline_id` and `pageserver_connstring` are always needed.
-    //
-    // If Lsn == None, this is a primary endpoint that continues writing WAL at
-    // the end of the timeline. If 'lsn' is set, this is a read-only node
-    // "anchored" at that LSN. 'safekeeper_connstrings' must be non-empty for a
-    // primary.
-    #[serde_as(as = "DisplayFromStr")]
-    pub tenant_id: TenantId,
-    #[serde_as(as = "DisplayFromStr")]
-    pub timeline_id: TimelineId,
-    #[serde_as(as = "Option<DisplayFromStr>")]
-    pub lsn: Option<Lsn>,
-    pub pageserver_connstring: String,
-    pub safekeeper_connstrings: Vec<String>,
-
-    /// If set, 'storage_auth_token' is used as the password to authenticate to
-    /// the pageserver and safekeepers.
-    pub storage_auth_token: Option<String>,
-
-    /// Contents of postgresql.conf file
-    pub postgresql_conf: Option<String>,
-
-    /// Extra settings to append to the postgresql.conf
-    pub settings: GenericOptions,
-
-    // Expected cluster state at the end of transition process.
-    pub roles: Vec<Role>,
-    pub databases: Vec<Database>,
-    pub extensions: Vec<PgIdent>,
-    pub delta_operations: Option<Vec<DeltaOp>>,
-}
-
-#[derive(Deserialize)]
-struct FormatVersionOnly {
-    format_version: u64,
-}
-
-impl TryFrom<ComputeSpecAnyVersion> for ComputeSpecV2 {
-    type Error = anyhow::Error;
-
-    fn try_from(input: ComputeSpecAnyVersion) -> Result<ComputeSpecV2, anyhow::Error> {
-        // First check the 'format_version' field
-        match serde_json::from_value::<FormatVersionOnly>(input.0.clone())?.format_version {
-            1 => {
-                let v1: ComputeSpecV1 = serde_json::from_value(input.0)?;
-
-                ComputeSpecV2::upgrade_from_v1(v1)
-            }
-            2 => {
-                let v2: ComputeSpecV2 = serde_json::from_value(input.0)?;
-                Ok(v2)
-            }
-            other => Err(anyhow::anyhow!(
-                "unexpected format version {other} in spec file"
-            )),
-        }
-    }
-}
-
-impl ComputeSpecV2 {
-    pub fn parse_and_upgrade(input: &str) -> anyhow::Result<ComputeSpecV2> {
-        ComputeSpecV2::try_from(ComputeSpecAnyVersion(serde_json::from_str::<
-            serde_json::Value,
-        >(input)?))
-    }
-
-    pub fn upgrade_from_v1(spec_v1: ComputeSpecV1) -> anyhow::Result<ComputeSpecV2> {
-        let mut tenant_id = None;
-        let mut timeline_id = None;
-        let mut pageserver_connstring = None;
-        let mut safekeeper_connstrings: Vec<String> = Vec::new();
-
-        let mut extensions: Vec<String> = Vec::new();
-
-        let mut settings: Vec<GenericOption> = Vec::new();
-        for setting in &spec_v1.cluster.settings {
-            if let Some(value) = &setting.value {
-                match setting.name.as_str() {
-                    "neon.tenant_id" => {
-                        tenant_id = Some(TenantId::from_str(value)?);
-                    }
-                    "neon.timeline_id" => {
-                        timeline_id = Some(TimelineId::from_str(value)?);
-                    }
-                    "neon.pageserver_connstring" => {
-                        pageserver_connstring = Some(value.clone());
-                    }
-                    "neon.safekeepers" => {
-                        // neon.safekeepers is a comma-separated list of poestgres connection URLs
-                        safekeeper_connstrings =
-                            value.split(',').map(|s| s.trim().to_string()).collect();
-                    }
-                    "shared_preload_libraries" => {
-                        if value.contains("pg_stat_statements") {
-                            extensions.push("pg_stat_statements".to_string());
-                        }
-                        settings.push(setting.clone())
-                    }
-                    _ => settings.push(setting.clone()),
-                }
-            } else {
-                settings.push(setting.clone())
-            }
-        }
-        let tenant_id =
-            tenant_id.ok_or_else(|| anyhow!("neon.tenant_id missing from spec file"))?;
-        let timeline_id =
-            timeline_id.ok_or_else(|| anyhow!("neon.timeline_id missing from spec file"))?;
-        let pageserver_connstring = pageserver_connstring
-            .ok_or_else(|| anyhow!("neon.pageserver_connstring missing from spec file"))?;
-
-        Ok(ComputeSpecV2 {
-            format_version: 2,
-
-            project_id: Some(spec_v1.cluster.cluster_id),
-            endpoint_id: Some(spec_v1.cluster.name),
-            operation_uuid: spec_v1.operation_uuid,
-
-            startup_tracing_context: spec_v1.startup_tracing_context,
-
-            tenant_id,
-            timeline_id,
-            lsn: None, // Not supported in V1
-            pageserver_connstring,
-            safekeeper_connstrings,
-
-            storage_auth_token: spec_v1.storage_auth_token,
-
-            postgresql_conf: None,
-            settings: Some(settings),
-
-            roles: spec_v1.cluster.roles,
-            databases: spec_v1.cluster.databases,
-            extensions,
-            delta_operations: spec_v1.delta_operations,
-        })
-    }
-}
-
-#[serde_as]
-#[derive(Deserialize, Debug)]
-pub struct ComputeSpecAnyVersion(pub serde_json::Value);
-
-// Old format that didn't have explicit 'tenant_id', 'timeline_id, 'pageserver_connstring'
-// and 'safekeeper_connstrings' fields. They were stored in as GUCS in the 'cluster.settings'
-// list
-#[serde_as]
-#[derive(Clone, Deserialize, Serialize)]
-pub struct ComputeSpecV1 {
-    pub format_version: u64,
+#[derive(Clone, Debug, Default, Deserialize)]
+pub struct ComputeSpec {
+    pub format_version: f32,

    // The control plane also includes a 'timestamp' field in the JSON document,
    // but we don't use it for anything. Serde will ignore missing fields when
    // deserializing it.
    pub operation_uuid: Option<String>,
-    pub cluster: ClusterV1,
+    /// Expected cluster state at the end of transition process.
+    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
+
    pub storage_auth_token: Option<String>,

    pub startup_tracing_context: Option<HashMap<String, String>>,
 }

-#[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct ClusterV1 {
+#[derive(Clone, Debug, Default, Deserialize)]
+pub struct Cluster {
    pub cluster_id: String,
    pub name: String,
    pub state: Option<String>,
    pub roles: Vec<Role>,
    pub databases: Vec<Database>,
-    pub settings: Vec<GenericOption>,
+    pub settings: GenericOptions,
 }

 /// Single cluster state changing operation that could not be represented as
@@ -210,7 +45,7 @@ pub struct ClusterV1 {
 /// - DROP ROLE
 /// - ALTER ROLE name RENAME TO new_name
 /// - ALTER DATABASE name RENAME TO new_name
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize)]
 pub struct DeltaOp {
    pub action: String,
    pub name: PgIdent,
@@ -219,7 +54,7 @@ pub struct DeltaOp {

 /// Rust representation of Postgres role info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize)]
 pub struct Role {
    pub name: PgIdent,
    pub encrypted_password: Option<String>,
@@ -228,7 +63,7 @@ pub struct Role {

 /// Rust representation of Postgres database info with only those fields
 /// that matter for us.
-#[derive(Clone, Debug, Deserialize, Serialize)]
+#[derive(Clone, Debug, Deserialize)]
 pub struct Database {
    pub name: PgIdent,
    pub owner: PgIdent,
@@ -238,7 +73,7 @@ pub struct Database {
 /// Common type representing both SQL statement params with or without value,
 /// like `LOGIN` or `OWNER username` in the `CREATE/ALTER ROLE`, and config
 /// options like `wal_level = logical`.
-#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq)]
+#[derive(Clone, Debug, Deserialize)]
 pub struct GenericOption {
    pub name: String,
    pub value: Option<String>,
@@ -252,70 +87,11 @@ pub type GenericOptions = Option<Vec<GenericOption>>;
 #[cfg(test)]
 mod tests {
    use super::*;
+    use std::fs::File;

    #[test]
-    fn test_upgrade_v1_to_v2() -> anyhow::Result<()> {
-        let spec_v1_str = std::fs::read_to_string("tests/spec-v1.json").unwrap();
-        let spec_v2 = ComputeSpecV2::parse_and_upgrade(&spec_v1_str)?;
-
-        // The original V1 file contains also neon.tenant_id, neon.timeline_id,
-        // neon.pageserver_connstring and neon.safekeepers. They are put to exclicit
-        // fields at the top level in V2.
-        assert_eq!(
-            spec_v2.tenant_id,
-            TenantId::from_str("3d1f7595b468230304e0b73cecbcb081")?
-        );
-        assert_eq!(
-            spec_v2.timeline_id,
-            TimelineId::from_str("7f2aff2a1042b93a2617f44851638422")?
-        );
-        assert_eq!(spec_v2.pageserver_connstring, "host=172.30.42.12 port=6400");
-        assert_eq!(
-            spec_v2.safekeeper_connstrings,
-            vec![
-                "172.30.42.23:6500",
-                "172.30.42.22:6500",
-                "172.30.42.21:6500"
-            ]
-        );
-
-        fn opt(name: &str, value: &str, vartype: &str) -> GenericOption {
-            GenericOption {
-                name: name.to_string(),
-                value: Some(value.to_string()),
-                vartype: vartype.to_string(),
-            }
-        }
-
-        assert_eq!(spec_v2.postgresql_conf, None);
-        assert_eq!(
-            spec_v2.settings.as_ref().unwrap(),
-            &vec![
-                opt("max_replication_write_lag", "500", "integer"),
-                opt("restart_after_crash", "off", "bool"),
-                opt("password_encryption", "md5", "enum"),
-                opt(
-                    "shared_preload_libraries",
-                    "neon, pg_stat_statements",
-                    "string"
-                ),
-                opt("synchronous_standby_names", "walproposer", "string"),
-                opt("wal_level", "replica", "enum"),
-                opt("listen_addresses", "0.0.0.0", "string"),
-                opt("neon.max_cluster_size", "10240", "integer"),
-                opt("shared_buffers", "65536", "integer"),
-                opt(
-                    "test.escaping",
-                    r#"here's a backslash \ and a quote ' and a double-quote " hooray"#,
-                    "string"
-                ),
-            ]
-        );
-
-        assert_eq!(spec_v2.extensions, vec!["pg_stat_statements"]);
-
-        eprintln!("SPEC: {}", serde_json::to_string_pretty(&spec_v2)?);
-
-        Ok(())
+    fn parse_spec_file() {
+        let file = File::open("tests/cluster_spec.json").unwrap();
+        let _spec: ComputeSpec = serde_json::from_reader(file).unwrap();
    }
 }
--- a/libs/compute_api/tests/cluster_spec.json
+++ b/libs/compute_api/tests/cluster_spec.json
@@ -0,0 +1,209 @@
+{
+    "format_version": 1.0,
+
+    "timestamp": "2021-05-23T18:25:43.511Z",
+    "operation_uuid": "0f657b36-4b0f-4a2d-9c2e-1dcd615e7d8b",
+
+    "cluster": {
+        "cluster_id": "test-cluster-42",
+        "name": "Zenith Test",
+        "state": "restarted",
+        "roles": [
+            {
+                "name": "postgres",
+                "encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "alexk",
+                "encrypted_password": null,
+                "options": null
+            },
+            {
+                "name": "zenith \"new\"",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
+                "options": null
+            },
+            {
+                "name": "zen",
+                "encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "\"name\";\\n select 1;",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            },
+            {
+                "name": "MyRole",
+                "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
+            }
+        ],
+        "databases": [
+            {
+                "name": "DB2",
+                "owner": "alexk",
+                "options": [
+                    {
+                        "name": "LC_COLLATE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "LC_CTYPE",
+                        "value": "C",
+                        "vartype": "string"
+                    },
+                    {
+                        "name": "TEMPLATE",
+                        "value": "template0",
+                        "vartype": "enum"
+                    }
+                ]
+            },
+            {
+                "name": "zenith",
+                "owner": "MyRole"
+            },
+            {
+                "name": "zen",
+                "owner": "zen"
+            }
+        ],
+        "settings": [
+            {
+                "name": "fsync",
+                "value": "off",
+                "vartype": "bool"
+            },
+            {
+                "name": "wal_level",
+                "value": "replica",
+                "vartype": "enum"
+            },
+            {
+                "name": "hot_standby",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "neon.safekeepers",
+                "value": "127.0.0.1:6502,127.0.0.1:6503,127.0.0.1:6501",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_log_hints",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "log_connections",
+                "value": "on",
+                "vartype": "bool"
+            },
+            {
+                "name": "shared_buffers",
+                "value": "32768",
+                "vartype": "integer"
+            },
+            {
+                "name": "port",
+                "value": "55432",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_connections",
+                "value": "100",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_wal_senders",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "listen_addresses",
+                "value": "0.0.0.0",
+                "vartype": "string"
+            },
+            {
+                "name": "wal_sender_timeout",
+                "value": "0",
+                "vartype": "integer"
+            },
+            {
+                "name": "password_encryption",
+                "value": "md5",
+                "vartype": "enum"
+            },
+            {
+                "name": "maintenance_work_mem",
+                "value": "65536",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_parallel_workers",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "max_worker_processes",
+                "value": "8",
+                "vartype": "integer"
+            },
+            {
+                "name": "neon.tenant_id",
+                "value": "b0554b632bd4d547a63b86c3630317e8",
+                "vartype": "string"
+            },
+            {
+                "name": "max_replication_slots",
+                "value": "10",
+                "vartype": "integer"
+            },
+            {
+                "name": "neon.timeline_id",
+                "value": "2414a61ffc94e428f14b5758fe308e13",
+                "vartype": "string"
+            },
+            {
+                "name": "shared_preload_libraries",
+                "value": "neon",
+                "vartype": "string"
+            },
+            {
+                "name": "synchronous_standby_names",
+                "value": "walproposer",
+                "vartype": "string"
+            },
+            {
+                "name": "neon.pageserver_connstring",
+                "value": "host=127.0.0.1 port=6400",
+                "vartype": "string"
+            },
+            {
+                "name": "test.escaping",
+                "value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
+                "vartype": "string"
+            }
+        ]
+    },
+    "delta_operations": [
+        {
+            "action": "delete_db",
+            "name": "zenith_test"
+        },
+        {
+            "action": "rename_db",
+            "name": "DB",
+            "new_name": "DB2"
+        },
+        {
+            "action": "delete_role",
+            "name": "zenith2"
+        },
+        {
+            "action": "rename_role",
+            "name": "zenith new",
+            "new_name": "zenith \"new\""
+        }
+    ]
+}
--- a/libs/compute_api/tests/spec-v1.json
+++ b/libs/compute_api/tests/spec-v1.json
@@ -1,175 +0,0 @@
-{
-  "cluster": {
-    "cluster_id": "young-snowflake-871338",
-    "name": "young-snowflake-871338",
-    "settings": [
-      {
-        "name": "max_replication_write_lag",
-        "value": "500",
-        "vartype": "integer"
-      },
-      {
-        "name": "neon.pageserver_connstring",
-        "value": "host=172.30.42.12 port=6400",
-        "vartype": "string"
-      },
-      {
-        "name": "restart_after_crash",
-        "value": "off",
-        "vartype": "bool"
-      },
-      {
-        "name": "password_encryption",
-        "value": "md5",
-        "vartype": "enum"
-      },
-      {
-        "name": "shared_preload_libraries",
-        "value": "neon, pg_stat_statements",
-        "vartype": "string"
-      },
-      {
-        "name": "synchronous_standby_names",
-        "value": "walproposer",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.tenant_id",
-        "value": "3d1f7595b468230304e0b73cecbcb081",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.timeline_id",
-        "value": "7f2aff2a1042b93a2617f44851638422",
-        "vartype": "string"
-      },
-      {
-        "name": "wal_level",
-        "value": "replica",
-        "vartype": "enum"
-      },
-      {
-        "name": "listen_addresses",
-        "value": "0.0.0.0",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.safekeepers",
-        "value": "172.30.42.23:6500,172.30.42.22:6500,172.30.42.21:6500",
-        "vartype": "string"
-      },
-      {
-        "name": "neon.max_cluster_size",
-        "value": "10240",
-        "vartype": "integer"
-      },
-      {
-        "name": "shared_buffers",
-        "value": "65536",
-        "vartype": "integer"
-      },
-      {
-        "name": "test.escaping",
-        "value": "here's a backslash \\ and a quote ' and a double-quote \" hooray",
-        "vartype": "string"
-      }
-    ],
-    "roles": [
-      {
-        "name": "postgres",
-        "encrypted_password": "6b1d16b78004bbd51fa06af9eda75972",
-        "options": null
-      },
-      {
-        "name": "testuser",
-        "encrypted_password": "SCRAM-SHA-256$4096:R4V8wIc+aH8T7vy3weC5qg==$aXXM6IQKnEWsRgeyjbxydif6f29LZOGvAWe/oOnuXSM=:5IE7U/woZLZbYSYOJ3v4x3qlLOXS6xcsdJYnMdVkzQY=",
-        "options": null
-      },
-      {
-        "name": "alexk",
-        "encrypted_password": null,
-        "options": null
-      },
-      {
-        "name": "neon \"new\"",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972",
-        "options": null
-      },
-      {
-        "name": "bar",
-        "encrypted_password": "9b1d16b78004bbd51fa06af9eda75972"
-      },
-      {
-        "name": "\"name\";\\n select 1;",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
-      },
-      {
-        "name": "MyRole",
-        "encrypted_password": "5b1d16b78004bbd51fa06af9eda75972"
-      }
-    ],
-    "databases": [
-      {
-        "name": "DB2",
-        "owner": "alexk",
-        "options": [
-          {
-            "name": "LC_COLLATE",
-            "value": "C",
-            "vartype": "string"
-          },
-          {
-            "name": "LC_CTYPE",
-            "value": "C",
-            "vartype": "string"
-          },
-          {
-            "name": "TEMPLATE",
-            "value": "template0",
-            "vartype": "enum"
-          }
-        ]
-      },
-      {
-        "name": "neondb",
-        "owner": "testuser",
-        "options": null
-      },
-      {
-        "name": "mydb",
-        "owner": "MyRole"
-      },
-      {
-        "name": "foo",
-        "owner": "bar"
-      }
-    ]
-  },
-  "delta_operations": [
-    {
-      "action": "delete_db",
-      "name": "neon_test"
-    },
-    {
-      "action": "rename_db",
-      "name": "DB",
-      "new_name": "DB2"
-    },
-    {
-      "action": "delete_role",
-      "name": "neon2"
-    },
-    {
-      "action": "rename_role",
-      "name": "neon new",
-      "new_name": "neon \"new\""
-    }
-  ],
-  "format_version": 1,
-  "operation_uuid": "73c843c3-46dd-496f-b819-e6c5a190f584",
-  "timestamp": "2023-03-25T21:36:16.729366596Z",
-  "storage_auth_token": "dummy",
-  "startup_tracing_context": {
-    "traceparent": "00-1b79dca0e798ee42961cd13990326551-5e0222e8d7314785-01"
-  }
-}
--- a/libs/consumption_metrics/Cargo.toml
+++ b/libs/consumption_metrics/Cargo.toml
@@ -4,13 +4,12 @@ version = "0.1.0"
 edition = "2021"
 license = "Apache-2.0"

-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
-
 [dependencies]
-anyhow = "1.0.68"
-chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
-rand = "0.8.3"
-serde = "1.0.152"
-serde_with = "2.1.0"
-utils = { version = "0.1.0", path = "../utils" }
-workspace_hack = { version = "0.1.0", path = "../../workspace_hack" }
+anyhow.workspace = true
+chrono.workspace = true
+rand.workspace = true
+serde.workspace = true
+serde_with.workspace = true
+utils.workspace = true
+
+workspace_hack.workspace = true
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -7,6 +7,7 @@ license.workspace = true
 [dependencies]
 serde.workspace = true
 serde_with.workspace = true
+serde_json.workspace = true
 const_format.workspace = true
 anyhow.workspace = true
 bytes.workspace = true
@@ -14,6 +15,7 @@ byteorder.workspace = true
 utils.workspace = true
 postgres_ffi.workspace = true
 enum-map.workspace = true
-serde_json.workspace = true
+strum.workspace = true
+strum_macros.workspace = true

 workspace_hack.workspace = true
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -7,6 +7,7 @@ use std::{
 use byteorder::{BigEndian, ReadBytesExt};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
+use strum_macros;
 use utils::{
    history_buffer::HistoryBufferWithDropCounter,
    id::{NodeId, TenantId, TimelineId},
@@ -18,11 +19,23 @@ use anyhow::bail;
 use bytes::{BufMut, Bytes, BytesMut};

 /// A state of a tenant in pageserver's memory.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
+#[derive(
+    Clone,
+    PartialEq,
+    Eq,
+    serde::Serialize,
+    serde::Deserialize,
+    strum_macros::Display,
+    strum_macros::EnumString,
+    strum_macros::EnumVariantNames,
+    strum_macros::AsRefStr,
+    strum_macros::IntoStaticStr,
+)]
+#[serde(tag = "slug", content = "data")]
 pub enum TenantState {
-    // This tenant is being loaded from local disk
+    /// This tenant is being loaded from local disk
    Loading,
-    // This tenant is being downloaded from cloud storage.
+    /// This tenant is being downloaded from cloud storage.
    Attaching,
    /// Tenant is fully operational
    Active,
@@ -31,15 +44,7 @@ pub enum TenantState {
    Stopping,
    /// A tenant is recognized by the pageserver, but can no longer be used for
    /// any operations, because it failed to be activated.
-    Broken,
-}
-
-pub mod state {
-    pub const LOADING: &str = "loading";
-    pub const ATTACHING: &str = "attaching";
-    pub const ACTIVE: &str = "active";
-    pub const STOPPING: &str = "stopping";
-    pub const BROKEN: &str = "broken";
+    Broken { reason: String, backtrace: String },
 }

 impl TenantState {
@@ -49,17 +54,26 @@ impl TenantState {
            Self::Attaching => true,
            Self::Active => false,
            Self::Stopping => false,
-            Self::Broken => false,
+            Self::Broken { .. } => false,
        }
    }

-    pub fn as_str(&self) -> &'static str {
+    pub fn broken_from_reason(reason: String) -> Self {
+        let backtrace_str: String = format!("{}", std::backtrace::Backtrace::force_capture());
+        Self::Broken {
+            reason,
+            backtrace: backtrace_str,
+        }
+    }
+}
+
+impl std::fmt::Debug for TenantState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
-            TenantState::Loading => state::LOADING,
-            TenantState::Attaching => state::ATTACHING,
-            TenantState::Active => state::ACTIVE,
-            TenantState::Stopping => state::STOPPING,
-            TenantState::Broken => state::BROKEN,
+            Self::Broken { reason, backtrace } if !reason.is_empty() => {
+                write!(f, "Broken due to: {reason}. Backtrace:\n{backtrace}")
+            }
+            _ => write!(f, "{self}"),
        }
    }
 }
@@ -121,6 +135,7 @@ pub struct TenantCreateRequest {
    // For now, this field is not even documented in the openapi_spec.yml.
    pub eviction_policy: Option<serde_json::Value>,
    pub min_resident_size_override: Option<u64>,
+    pub evictions_low_residence_duration_metric_threshold: Option<String>,
 }

 #[serde_as]
@@ -167,6 +182,7 @@ pub struct TenantConfigRequest {
    // For now, this field is not even documented in the openapi_spec.yml.
    pub eviction_policy: Option<serde_json::Value>,
    pub min_resident_size_override: Option<u64>,
+    pub evictions_low_residence_duration_metric_threshold: Option<String>,
 }

 impl TenantConfigRequest {
@@ -188,6 +204,7 @@ impl TenantConfigRequest {
            trace_read_requests: None,
            eviction_policy: None,
            min_resident_size_override: None,
+            evictions_low_residence_duration_metric_threshold: None,
        }
    }
 }
@@ -615,6 +632,7 @@ impl PagestreamBeMessage {
 #[cfg(test)]
 mod tests {
    use bytes::Buf;
+    use serde_json::json;

    use super::*;

@@ -665,4 +683,57 @@ mod tests {
            assert!(msg == reconstructed);
        }
    }
+
+    #[test]
+    fn test_tenantinfo_serde() {
+        // Test serialization/deserialization of TenantInfo
+        let original_active = TenantInfo {
+            id: TenantId::generate(),
+            state: TenantState::Active,
+            current_physical_size: Some(42),
+            has_in_progress_downloads: Some(false),
+        };
+        let expected_active = json!({
+            "id": original_active.id.to_string(),
+            "state": {
+                "slug": "Active",
+            },
+            "current_physical_size": 42,
+            "has_in_progress_downloads": false,
+        });
+
+        let original_broken = TenantInfo {
+            id: TenantId::generate(),
+            state: TenantState::Broken {
+                reason: "reason".into(),
+                backtrace: "backtrace info".into(),
+            },
+            current_physical_size: Some(42),
+            has_in_progress_downloads: Some(false),
+        };
+        let expected_broken = json!({
+            "id": original_broken.id.to_string(),
+            "state": {
+                "slug": "Broken",
+                "data": {
+                    "backtrace": "backtrace info",
+                    "reason": "reason",
+                }
+            },
+            "current_physical_size": 42,
+            "has_in_progress_downloads": false,
+        });
+
+        assert_eq!(
+            serde_json::to_value(&original_active).unwrap(),
+            expected_active
+        );
+
+        assert_eq!(
+            serde_json::to_value(&original_broken).unwrap(),
+            expected_broken
+        );
+        assert!(format!("{:?}", &original_broken.state).contains("reason"));
+        assert!(format!("{:?}", &original_broken.state).contains("backtrace info"));
+    }
 }
--- a/libs/postgres_ffi/build.rs
+++ b/libs/postgres_ffi/build.rs
@@ -5,7 +5,7 @@ use std::path::PathBuf;
 use std::process::Command;

 use anyhow::{anyhow, Context};
-use bindgen::callbacks::ParseCallbacks;
+use bindgen::callbacks::{DeriveInfo, ParseCallbacks};

 #[derive(Debug)]
 struct PostgresFfiCallbacks;
@@ -20,7 +20,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {

    // Add any custom #[derive] attributes to the data structures that bindgen
    // creates.
-    fn add_derives(&self, name: &str) -> Vec<String> {
+    fn add_derives(&self, derive_info: &DeriveInfo) -> Vec<String> {
        // This is the list of data structures that we want to serialize/deserialize.
        let serde_list = [
            "XLogRecord",
@@ -31,7 +31,7 @@ impl ParseCallbacks for PostgresFfiCallbacks {
            "ControlFileData",
        ];

-        if serde_list.contains(&name) {
+        if serde_list.contains(&derive_info.name) {
            vec![
                "Default".into(), // Default allows us to easily fill the padding fields with 0.
                "Serialize".into(),
--- a/libs/remote_storage/tests/pagination_tests.rs
+++ b/libs/remote_storage/tests/pagination_tests.rs
@@ -99,7 +99,11 @@ struct S3WithTestBlobs {
 #[async_trait::async_trait]
 impl AsyncTestContext for MaybeEnabledS3 {
    async fn setup() -> Self {
-        utils::logging::init(utils::logging::LogFormat::Test).expect("logging init failed");
+        utils::logging::init(
+            utils::logging::LogFormat::Test,
+            utils::logging::TracingErrorLayerEnablement::Disabled,
+        )
+        .expect("logging init failed");
        if env::var(ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME).is_err() {
            info!(
                "`{}` env variable is not set, skipping the test",
@@ -204,12 +208,7 @@ async fn upload_s3_data(
            let data = format!("remote blob data {i}").into_bytes();
            let data_len = data.len();
            task_client
-                .upload(
-                    Box::new(std::io::Cursor::new(data)),
-                    data_len,
-                    &blob_path,
-                    None,
-                )
+                .upload(std::io::Cursor::new(data), data_len, &blob_path, None)
                .await?;

            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
--- a/libs/tracing-utils/Cargo.toml
+++ b/libs/tracing-utils/Cargo.toml
@@ -14,4 +14,5 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
 tracing.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
-workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+
+workspace_hack.workspace = true
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -27,13 +27,14 @@ signal-hook.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
 tracing.workspace = true
-tracing-subscriber = { workspace = true, features = ["json"] }
+tracing-error.workspace = true
+tracing-subscriber = { workspace = true, features = ["json", "registry"] }
 rand.workspace = true
 serde_with.workspace = true
 strum.workspace = true
 strum_macros.workspace = true
 url.workspace = true
-uuid = { version = "1.2", features = ["v4", "serde"] }
+uuid.workspace = true

 metrics.workspace = true
 workspace_hack.workspace = true
--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -76,6 +76,7 @@ where

        let log_quietly = method == Method::GET;
        async move {
+            let cancellation_guard = RequestCancelled::warn_when_dropped_without_responding();
            if log_quietly {
                debug!("Handling request");
            } else {
@@ -87,7 +88,11 @@ where
            // Usage of the error handler also means that we expect only the `ApiError` errors to be raised in this call.
            //
            // Panics are not handled separately, there's a `tracing_panic_hook` from another module to do that globally.
-            match (self.0)(request).await {
+            let res = (self.0)(request).await;
+
+            cancellation_guard.disarm();
+
+            match res {
                Ok(response) => {
                    let response_status = response.status();
                    if log_quietly && response_status.is_success() {
@@ -105,6 +110,38 @@ where
    }
 }

+/// Drop guard to WARN in case the request was dropped before completion.
+struct RequestCancelled {
+    warn: Option<tracing::Span>,
+}
+
+impl RequestCancelled {
+    /// Create the drop guard using the [`tracing::Span::current`] as the span.
+    fn warn_when_dropped_without_responding() -> Self {
+        RequestCancelled {
+            warn: Some(tracing::Span::current()),
+        }
+    }
+
+    /// Consume the drop guard without logging anything.
+    fn disarm(mut self) {
+        self.warn = None;
+    }
+}
+
+impl Drop for RequestCancelled {
+    fn drop(&mut self) {
+        if let Some(span) = self.warn.take() {
+            // the span has all of the info already, but the outer `.instrument(span)` has already
+            // been dropped, so we need to manually re-enter it for this message.
+            //
+            // this is what the instrument would do before polling so it is fine.
+            let _g = span.entered();
+            warn!("request was dropped before completing");
+        }
+    }
+}
+
 async fn prometheus_metrics_handler(_req: Request<Body>) -> Result<Response<Body>, ApiError> {
    SERVE_METRICS_COUNT.inc();

--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -54,6 +54,8 @@ pub mod measured_stream;
 pub mod serde_percent;
 pub mod serde_regex;

+pub mod tracing_span_assert;
+
 /// use with fail::cfg("$name", "return(2000)")
 #[macro_export]
 macro_rules! failpoint_sleep_millis_async {
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,6 +1,7 @@
 use std::str::FromStr;

 use anyhow::Context;
+use once_cell::sync::Lazy;
 use strum_macros::{EnumString, EnumVariantNames};

 #[derive(EnumString, EnumVariantNames, Eq, PartialEq, Debug, Clone, Copy)]
@@ -23,24 +24,81 @@ impl LogFormat {
    }
 }

-pub fn init(log_format: LogFormat) -> anyhow::Result<()> {
-    let default_filter_str = "info";
+static TRACING_EVENT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
+    metrics::register_int_counter_vec!(
+        "libmetrics_tracing_event_count",
+        "Number of tracing events, by level",
+        &["level"]
+    )
+    .expect("failed to define metric")
+});

+struct TracingEventCountLayer(&'static metrics::IntCounterVec);
+
+impl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer
+where
+    S: tracing::Subscriber,
+{
+    fn on_event(
+        &self,
+        event: &tracing::Event<'_>,
+        _ctx: tracing_subscriber::layer::Context<'_, S>,
+    ) {
+        let level = event.metadata().level();
+        let level = match *level {
+            tracing::Level::ERROR => "error",
+            tracing::Level::WARN => "warn",
+            tracing::Level::INFO => "info",
+            tracing::Level::DEBUG => "debug",
+            tracing::Level::TRACE => "trace",
+        };
+        self.0.with_label_values(&[level]).inc();
+    }
+}
+
+/// Whether to add the `tracing_error` crate's `ErrorLayer`
+/// to the global tracing subscriber.
+///
+pub enum TracingErrorLayerEnablement {
+    /// Do not add the `ErrorLayer`.
+    Disabled,
+    /// Add the `ErrorLayer` with the filter specified by RUST_LOG, defaulting to `info` if `RUST_LOG` is unset.
+    EnableWithRustLogFilter,
+}
+
+pub fn init(
+    log_format: LogFormat,
+    tracing_error_layer_enablement: TracingErrorLayerEnablement,
+) -> anyhow::Result<()> {
    // We fall back to printing all spans at info-level or above if
    // the RUST_LOG environment variable is not set.
-    let env_filter = tracing_subscriber::EnvFilter::try_from_default_env()
-        .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new(default_filter_str));
+    let rust_log_env_filter = || {
+        tracing_subscriber::EnvFilter::try_from_default_env()
+            .unwrap_or_else(|_| tracing_subscriber::EnvFilter::new("info"))
+    };

-    let base_logger = tracing_subscriber::fmt()
-        .with_env_filter(env_filter)
-        .with_target(false)
-        .with_ansi(atty::is(atty::Stream::Stdout))
-        .with_writer(std::io::stdout);
-
-    match log_format {
-        LogFormat::Json => base_logger.json().init(),
-        LogFormat::Plain => base_logger.init(),
-        LogFormat::Test => base_logger.with_test_writer().init(),
+    // NB: the order of the with() calls does not matter.
+    // See https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
+    use tracing_subscriber::prelude::*;
+    let r = tracing_subscriber::registry();
+    let r = r.with({
+        let log_layer = tracing_subscriber::fmt::layer()
+            .with_target(false)
+            .with_ansi(atty::is(atty::Stream::Stdout))
+            .with_writer(std::io::stdout);
+        let log_layer = match log_format {
+            LogFormat::Json => log_layer.json().boxed(),
+            LogFormat::Plain => log_layer.boxed(),
+            LogFormat::Test => log_layer.with_test_writer().boxed(),
+        };
+        log_layer.with_filter(rust_log_env_filter())
+    });
+    let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
+    match tracing_error_layer_enablement {
+        TracingErrorLayerEnablement::EnableWithRustLogFilter => r
+            .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
+            .init(),
+        TracingErrorLayerEnablement::Disabled => r.init(),
    }

    Ok(())
@@ -157,3 +215,33 @@ impl std::fmt::Debug for PrettyLocation<'_, '_> {
        <Self as std::fmt::Display>::fmt(self, f)
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use metrics::{core::Opts, IntCounterVec};
+
+    use super::TracingEventCountLayer;
+
+    #[test]
+    fn tracing_event_count_metric() {
+        let counter_vec =
+            IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap();
+        let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static
+        let layer = TracingEventCountLayer(counter_vec);
+        use tracing_subscriber::prelude::*;
+
+        tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {
+            tracing::trace!("foo");
+            tracing::debug!("foo");
+            tracing::info!("foo");
+            tracing::warn!("foo");
+            tracing::error!("foo");
+        });
+
+        assert_eq!(counter_vec.with_label_values(&["trace"]).get(), 1);
+        assert_eq!(counter_vec.with_label_values(&["debug"]).get(), 1);
+        assert_eq!(counter_vec.with_label_values(&["info"]).get(), 1);
+        assert_eq!(counter_vec.with_label_values(&["warn"]).get(), 1);
+        assert_eq!(counter_vec.with_label_values(&["error"]).get(), 1);
+    }
+}
--- a/libs/utils/src/tracing_span_assert.rs
+++ b/libs/utils/src/tracing_span_assert.rs
@@ -0,0 +1,287 @@
+//! Assert that the current [`tracing::Span`] has a given set of fields.
+//!
+//! # Usage
+//!
+//! ```
+//! use tracing_subscriber::prelude::*;
+//! let registry = tracing_subscriber::registry()
+//!    .with(tracing_error::ErrorLayer::default());
+//!
+//! // Register the registry as the global subscriber.
+//! // In this example, we'll only use it as a thread-local subscriber.
+//! let _guard = tracing::subscriber::set_default(registry);
+//!
+//! // Then, in the main code:
+//!
+//! let span = tracing::info_span!("TestSpan", test_id = 1);
+//! let _guard = span.enter();
+//!
+//! // ... down the call stack
+//!
+//! use utils::tracing_span_assert::{check_fields_present, MultiNameExtractor};
+//! let extractor = MultiNameExtractor::new("TestExtractor", ["test", "test_id"]);
+//! match check_fields_present([&extractor]) {
+//!    Ok(()) => {},
+//!    Err(missing) => {
+//!        panic!("Missing fields: {:?}", missing.into_iter().map(|f| f.name() ).collect::<Vec<_>>());
+//!    }
+//! }
+//! ```
+//!
+//! Recommended reading: https://docs.rs/tracing-subscriber/0.3.16/tracing_subscriber/layer/index.html#per-layer-filtering
+//!
+
+use std::{
+    collections::HashSet,
+    fmt::{self},
+    hash::{Hash, Hasher},
+};
+
+pub enum ExtractionResult {
+    Present,
+    Absent,
+}
+
+pub trait Extractor: Send + Sync + std::fmt::Debug {
+    fn name(&self) -> &str;
+    fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult;
+}
+
+#[derive(Debug)]
+pub struct MultiNameExtractor<const L: usize> {
+    name: &'static str,
+    field_names: [&'static str; L],
+}
+
+impl<const L: usize> MultiNameExtractor<L> {
+    pub fn new(name: &'static str, field_names: [&'static str; L]) -> MultiNameExtractor<L> {
+        MultiNameExtractor { name, field_names }
+    }
+}
+impl<const L: usize> Extractor for MultiNameExtractor<L> {
+    fn name(&self) -> &str {
+        self.name
+    }
+    fn extract(&self, fields: &tracing::field::FieldSet) -> ExtractionResult {
+        if fields.iter().any(|f| self.field_names.contains(&f.name())) {
+            ExtractionResult::Present
+        } else {
+            ExtractionResult::Absent
+        }
+    }
+}
+
+struct MemoryIdentity<'a>(&'a dyn Extractor);
+
+impl<'a> MemoryIdentity<'a> {
+    fn as_ptr(&self) -> *const () {
+        self.0 as *const _ as *const ()
+    }
+}
+impl<'a> PartialEq for MemoryIdentity<'a> {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_ptr() == other.as_ptr()
+    }
+}
+impl<'a> Eq for MemoryIdentity<'a> {}
+impl<'a> Hash for MemoryIdentity<'a> {
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.as_ptr().hash(state);
+    }
+}
+impl<'a> fmt::Debug for MemoryIdentity<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:p}: {}", self.as_ptr(), self.0.name())
+    }
+}
+
+/// The extractor names passed as keys to [`new`].
+pub fn check_fields_present<const L: usize>(
+    must_be_present: [&dyn Extractor; L],
+) -> Result<(), Vec<&dyn Extractor>> {
+    let mut missing: HashSet<MemoryIdentity> =
+        HashSet::from_iter(must_be_present.into_iter().map(|r| MemoryIdentity(r)));
+    let trace = tracing_error::SpanTrace::capture();
+    trace.with_spans(|md, _formatted_fields| {
+        missing.retain(|extractor| match extractor.0.extract(md.fields()) {
+            ExtractionResult::Present => false,
+            ExtractionResult::Absent => true,
+        });
+        !missing.is_empty() // continue walking up until we've found all missing
+    });
+    if missing.is_empty() {
+        Ok(())
+    } else {
+        Err(missing.into_iter().map(|mi| mi.0).collect())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use tracing_subscriber::prelude::*;
+
+    use super::*;
+
+    struct Setup {
+        _current_thread_subscriber_guard: tracing::subscriber::DefaultGuard,
+        tenant_extractor: MultiNameExtractor<2>,
+        timeline_extractor: MultiNameExtractor<2>,
+    }
+
+    fn setup_current_thread() -> Setup {
+        let tenant_extractor = MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"]);
+        let timeline_extractor = MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"]);
+
+        let registry = tracing_subscriber::registry()
+            .with(tracing_subscriber::fmt::layer())
+            .with(tracing_error::ErrorLayer::default());
+
+        let guard = tracing::subscriber::set_default(registry);
+
+        Setup {
+            _current_thread_subscriber_guard: guard,
+            tenant_extractor,
+            timeline_extractor,
+        }
+    }
+
+    fn assert_missing(missing: Vec<&dyn Extractor>, expected: Vec<&dyn Extractor>) {
+        let missing: HashSet<MemoryIdentity> =
+            HashSet::from_iter(missing.into_iter().map(MemoryIdentity));
+        let expected: HashSet<MemoryIdentity> =
+            HashSet::from_iter(expected.into_iter().map(MemoryIdentity));
+        assert_eq!(missing, expected);
+    }
+
+    #[test]
+    fn positive_one_level() {
+        let setup = setup_current_thread();
+        let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1");
+        let _guard = span.enter();
+        check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap();
+    }
+
+    #[test]
+    fn negative_one_level() {
+        let setup = setup_current_thread();
+        let span = tracing::info_span!("root", timeline_id = "timeline-1");
+        let _guard = span.enter();
+        let missing =
+            check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap_err();
+        assert_missing(missing, vec![&setup.tenant_extractor]);
+    }
+
+    #[test]
+    fn positive_multiple_levels() {
+        let setup = setup_current_thread();
+
+        let span = tracing::info_span!("root");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("child", tenant_id = "tenant-1");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("grandchild", timeline_id = "timeline-1");
+        let _guard = span.enter();
+
+        check_fields_present([&setup.tenant_extractor, &setup.timeline_extractor]).unwrap();
+    }
+
+    #[test]
+    fn negative_multiple_levels() {
+        let setup = setup_current_thread();
+
+        let span = tracing::info_span!("root");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("child", timeline_id = "timeline-1");
+        let _guard = span.enter();
+
+        let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
+        assert_missing(missing, vec![&setup.tenant_extractor]);
+    }
+
+    #[test]
+    fn positive_subset_one_level() {
+        let setup = setup_current_thread();
+        let span = tracing::info_span!("root", tenant_id = "tenant-1", timeline_id = "timeline-1");
+        let _guard = span.enter();
+        check_fields_present([&setup.tenant_extractor]).unwrap();
+    }
+
+    #[test]
+    fn positive_subset_multiple_levels() {
+        let setup = setup_current_thread();
+
+        let span = tracing::info_span!("root");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("child", tenant_id = "tenant-1");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("grandchild", timeline_id = "timeline-1");
+        let _guard = span.enter();
+
+        check_fields_present([&setup.tenant_extractor]).unwrap();
+    }
+
+    #[test]
+    fn negative_subset_one_level() {
+        let setup = setup_current_thread();
+        let span = tracing::info_span!("root", timeline_id = "timeline-1");
+        let _guard = span.enter();
+        let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
+        assert_missing(missing, vec![&setup.tenant_extractor]);
+    }
+
+    #[test]
+    fn negative_subset_multiple_levels() {
+        let setup = setup_current_thread();
+
+        let span = tracing::info_span!("root");
+        let _guard = span.enter();
+
+        let span = tracing::info_span!("child", timeline_id = "timeline-1");
+        let _guard = span.enter();
+
+        let missing = check_fields_present([&setup.tenant_extractor]).unwrap_err();
+        assert_missing(missing, vec![&setup.tenant_extractor]);
+    }
+
+    #[test]
+    fn tracing_error_subscriber_not_set_up() {
+        // no setup
+
+        let span = tracing::info_span!("foo", e = "some value");
+        let _guard = span.enter();
+
+        let extractor = MultiNameExtractor::new("E", ["e"]);
+        let missing = check_fields_present([&extractor]).unwrap_err();
+        assert_missing(missing, vec![&extractor]);
+    }
+
+    #[test]
+    #[should_panic]
+    fn panics_if_tracing_error_subscriber_has_wrong_filter() {
+        let r = tracing_subscriber::registry().with({
+            tracing_error::ErrorLayer::default().with_filter(
+                tracing_subscriber::filter::dynamic_filter_fn(|md, _| {
+                    if md.is_span() && *md.level() == tracing::Level::INFO {
+                        return false;
+                    }
+                    true
+                }),
+            )
+        });
+
+        let _guard = tracing::subscriber::set_default(r);
+
+        let span = tracing::info_span!("foo", e = "some value");
+        let _guard = span.enter();
+
+        let extractor = MultiNameExtractor::new("E", ["e"]);
+        let missing = check_fields_present([&extractor]).unwrap_err();
+        assert_missing(missing, vec![&extractor]);
+    }
+}
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -13,7 +13,7 @@ use std::time::Instant;

 use utils::lsn::Lsn;

-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};

 fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
    let mut layer_map = LayerMap::<LayerDescriptor>::default();
@@ -33,7 +33,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
        min_lsn = min(min_lsn, lsn_range.start);
        max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));

-        updates.insert_historic(Arc::new(layer));
+        updates.insert_historic(Arc::new(layer)).unwrap();
    }

    println!("min: {min_lsn}, max: {max_lsn}");
@@ -114,7 +114,7 @@ fn bench_from_captest_env(c: &mut Criterion) {
    c.bench_function("captest_uniform_queries", |b| {
        b.iter(|| {
            for q in queries.clone().into_iter() {
-                layer_map.search(q.0, q.1);
+                black_box(layer_map.search(q.0, q.1));
            }
        });
    });
@@ -122,11 +122,11 @@ fn bench_from_captest_env(c: &mut Criterion) {
    // test with a key that corresponds to the RelDir entry. See pgdatadir_mapping.rs.
    c.bench_function("captest_rel_dir_query", |b| {
        b.iter(|| {
-            let result = layer_map.search(
+            let result = black_box(layer_map.search(
                Key::from_hex("000000067F00008000000000000000000001").unwrap(),
                // This LSN is higher than any of the LSNs in the tree
                Lsn::from_str("D0/80208AE1").unwrap(),
-            );
+            ));
            result.unwrap();
        });
    });
@@ -183,7 +183,7 @@ fn bench_from_real_project(c: &mut Criterion) {
    group.bench_function("uniform_queries", |b| {
        b.iter(|| {
            for q in queries.clone().into_iter() {
-                layer_map.search(q.0, q.1);
+                black_box(layer_map.search(q.0, q.1));
            }
        });
    });
@@ -215,7 +215,7 @@ fn bench_sequential(c: &mut Criterion) {
            is_incremental: false,
            short_id: format!("Layer {}", i),
        };
-        updates.insert_historic(Arc::new(layer));
+        updates.insert_historic(Arc::new(layer)).unwrap();
    }
    updates.flush();
    println!("Finished layer map init in {:?}", now.elapsed());
@@ -232,7 +232,7 @@ fn bench_sequential(c: &mut Criterion) {
    group.bench_function("uniform_queries", |b| {
        b.iter(|| {
            for q in queries.clone().into_iter() {
-                layer_map.search(q.0, q.1);
+                black_box(layer_map.search(q.0, q.1));
            }
        });
    });
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -25,6 +25,7 @@ use pageserver::{
    virtual_file,
 };
 use postgres_backend::AuthType;
+use utils::logging::TracingErrorLayerEnablement;
 use utils::signals::ShutdownSignals;
 use utils::{
    auth::JwtAuth, logging, project_git_version, sentry_init::init_sentry, signals::Signal,
@@ -86,8 +87,19 @@ fn main() -> anyhow::Result<()> {
        }
    };

-    // Initialize logging, which must be initialized before the custom panic hook is installed.
-    logging::init(conf.log_format)?;
+    // Initialize logging.
+    //
+    // It must be initialized before the custom panic hook is installed below.
+    //
+    // Regarding tracing_error enablement: at this time, we only use the
+    // tracing_error crate to debug_assert that log spans contain tenant and timeline ids.
+    // See `debug_assert_current_span_has_tenant_and_timeline_id` in the timeline module
+    let tracing_error_layer_enablement = if cfg!(debug_assertions) {
+        TracingErrorLayerEnablement::EnableWithRustLogFilter
+    } else {
+        TracingErrorLayerEnablement::Disabled
+    };
+    logging::init(conf.log_format, tracing_error_layer_enablement)?;

    // mind the order required here: 1. logging, 2. panic_hook, 3. sentry.
    // disarming this hook on pageserver, because we never tear down tracing.
@@ -226,6 +238,7 @@ fn start_pageserver(
    );
    set_build_info_metric(GIT_VERSION);
    set_launch_timestamp_metric(launch_ts);
+    pageserver::preinitialize_metrics();

    // If any failpoints were set from FAILPOINTS environment variable,
    // print them to the log for debugging purposes
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -6,6 +6,7 @@

 use anyhow::{anyhow, bail, ensure, Context, Result};
 use remote_storage::{RemotePath, RemoteStorageConfig};
+use serde::de::IntoDeserializer;
 use std::env;
 use storage_broker::Uri;
 use utils::crashsafe::path_with_suffix_extension;
@@ -62,7 +63,6 @@ pub mod defaults {
    pub const DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL: &str = "1 hour";
    pub const DEFAULT_METRIC_COLLECTION_ENDPOINT: Option<reqwest::Url> = None;
    pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
-    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";

    ///
    /// Default built-in configuration file.
@@ -91,7 +91,6 @@ pub mod defaults {
 #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
 #synthetic_size_calculation_interval = '{DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL}'

-#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'

 #disk_usage_based_eviction = {{ max_usage_pct = .., min_avail_bytes = .., period = "10s"}}

@@ -108,6 +107,7 @@ pub mod defaults {
 #pitr_interval = '{DEFAULT_PITR_INTERVAL}'

 #min_resident_size_override = .. # in bytes
+#evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'

 # [remote_storage]

@@ -182,9 +182,6 @@ pub struct PageServerConf {
    pub metric_collection_endpoint: Option<Url>,
    pub synthetic_size_calculation_interval: Duration,

-    // See the corresponding metric's help string.
-    pub evictions_low_residence_duration_metric_threshold: Duration,
-
    pub disk_usage_based_eviction: Option<DiskUsageEvictionTaskConfig>,

    pub test_remote_failures: u64,
@@ -257,8 +254,6 @@ struct PageServerConfigBuilder {
    metric_collection_endpoint: BuilderValue<Option<Url>>,
    synthetic_size_calculation_interval: BuilderValue<Duration>,

-    evictions_low_residence_duration_metric_threshold: BuilderValue<Duration>,
-
    disk_usage_based_eviction: BuilderValue<Option<DiskUsageEvictionTaskConfig>>,

    test_remote_failures: BuilderValue<u64>,
@@ -316,11 +311,6 @@ impl Default for PageServerConfigBuilder {
            .expect("cannot parse default synthetic size calculation interval")),
            metric_collection_endpoint: Set(DEFAULT_METRIC_COLLECTION_ENDPOINT),

-            evictions_low_residence_duration_metric_threshold: Set(humantime::parse_duration(
-                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
-            )
-            .expect("cannot parse DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD")),
-
            disk_usage_based_eviction: Set(None),

            test_remote_failures: Set(0),
@@ -438,10 +428,6 @@ impl PageServerConfigBuilder {
        self.test_remote_failures = BuilderValue::Set(fail_first);
    }

-    pub fn evictions_low_residence_duration_metric_threshold(&mut self, value: Duration) {
-        self.evictions_low_residence_duration_metric_threshold = BuilderValue::Set(value);
-    }
-
    pub fn disk_usage_based_eviction(&mut self, value: Option<DiskUsageEvictionTaskConfig>) {
        self.disk_usage_based_eviction = BuilderValue::Set(value);
    }
@@ -525,11 +511,6 @@ impl PageServerConfigBuilder {
            synthetic_size_calculation_interval: self
                .synthetic_size_calculation_interval
                .ok_or(anyhow!("missing synthetic_size_calculation_interval"))?,
-            evictions_low_residence_duration_metric_threshold: self
-                .evictions_low_residence_duration_metric_threshold
-                .ok_or(anyhow!(
-                    "missing evictions_low_residence_duration_metric_threshold"
-                ))?,
            disk_usage_based_eviction: self
                .disk_usage_based_eviction
                .ok_or(anyhow!("missing disk_usage_based_eviction"))?,
@@ -721,12 +702,12 @@ impl PageServerConf {
                "synthetic_size_calculation_interval" =>
                    builder.synthetic_size_calculation_interval(parse_toml_duration(key, item)?),
                "test_remote_failures" => builder.test_remote_failures(parse_toml_u64(key, item)?),
-                "evictions_low_residence_duration_metric_threshold" => builder.evictions_low_residence_duration_metric_threshold(parse_toml_duration(key, item)?),
                "disk_usage_based_eviction" => {
                    tracing::info!("disk_usage_based_eviction: {:#?}", &item);
                    builder.disk_usage_based_eviction(
-                    toml_edit::de::from_item(item.clone())
-                    .context("parse disk_usage_based_eviction")?)
+                        deserialize_from_item("disk_usage_based_eviction", item)
+                            .context("parse disk_usage_based_eviction")?
+                    )
                },
                "ondemand_download_behavior_treat_error_as_warn" => builder.ondemand_download_behavior_treat_error_as_warn(parse_toml_bool(key, item)?),
                _ => bail!("unrecognized pageserver option '{key}'"),
@@ -827,18 +808,25 @@ impl PageServerConf {

        if let Some(eviction_policy) = item.get("eviction_policy") {
            t_conf.eviction_policy = Some(
-                toml_edit::de::from_item(eviction_policy.clone())
+                deserialize_from_item("eviction_policy", eviction_policy)
                    .context("parse eviction_policy")?,
            );
        }

        if let Some(item) = item.get("min_resident_size_override") {
            t_conf.min_resident_size_override = Some(
-                toml_edit::de::from_item(item.clone())
+                deserialize_from_item("min_resident_size_override", item)
                    .context("parse min_resident_size_override")?,
            );
        }

+        if let Some(item) = item.get("evictions_low_residence_duration_metric_threshold") {
+            t_conf.evictions_low_residence_duration_metric_threshold = Some(parse_toml_duration(
+                "evictions_low_residence_duration_metric_threshold",
+                item,
+            )?);
+        }
+
        Ok(t_conf)
    }

@@ -877,10 +865,6 @@ impl PageServerConf {
            cached_metric_collection_interval: Duration::from_secs(60 * 60),
            metric_collection_endpoint: defaults::DEFAULT_METRIC_COLLECTION_ENDPOINT,
            synthetic_size_calculation_interval: Duration::from_secs(60),
-            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
-                defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
-            )
-            .unwrap(),
            disk_usage_based_eviction: None,
            test_remote_failures: 0,
            ondemand_download_behavior_treat_error_as_warn: false,
@@ -938,6 +922,18 @@ where
    })
 }

+fn deserialize_from_item<T>(name: &str, item: &Item) -> anyhow::Result<T>
+where
+    T: serde::de::DeserializeOwned,
+{
+    // ValueDeserializer::new is not public, so use the ValueDeserializer's documented way
+    let deserializer = match item.clone().into_value() {
+        Ok(value) => value.into_deserializer(),
+        Err(item) => anyhow::bail!("toml_edit::Item '{item}' is not a toml_edit::Value"),
+    };
+    T::deserialize(deserializer).with_context(|| format!("deserializing item for node {name}"))
+}
+
 /// Configurable semaphore permits setting.
 ///
 /// Does not allow semaphore permits to be zero, because at runtime initially zero permits and empty
@@ -1004,9 +1000,10 @@ mod tests {

    use remote_storage::{RemoteStorageKind, S3Config};
    use tempfile::{tempdir, TempDir};
+    use utils::serde_percent::Percent;

    use super::*;
-    use crate::DEFAULT_PG_VERSION;
+    use crate::{tenant::config::EvictionPolicy, DEFAULT_PG_VERSION};

    const ALL_BASE_VALUES_TOML: &str = r#"
 # Initial configuration file created by 'pageserver --init'
@@ -1029,8 +1026,6 @@ cached_metric_collection_interval = '22200 s'
 metric_collection_endpoint = 'http://localhost:80/metrics'
 synthetic_size_calculation_interval = '333 s'

-evictions_low_residence_duration_metric_threshold = '444 s'
-
 log_format = 'json'

 "#;
@@ -1087,9 +1082,6 @@ log_format = 'json'
                synthetic_size_calculation_interval: humantime::parse_duration(
                    defaults::DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL
                )?,
-                evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
-                    defaults::DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD
-                )?,
                disk_usage_based_eviction: None,
                test_remote_failures: 0,
                ondemand_download_behavior_treat_error_as_warn: false,
@@ -1144,7 +1136,6 @@ log_format = 'json'
                cached_metric_collection_interval: Duration::from_secs(22200),
                metric_collection_endpoint: Some(Url::parse("http://localhost:80/metrics")?),
                synthetic_size_calculation_interval: Duration::from_secs(333),
-                evictions_low_residence_duration_metric_threshold: Duration::from_secs(444),
                disk_usage_based_eviction: None,
                test_remote_failures: 0,
                ondemand_download_behavior_treat_error_as_warn: false,
@@ -1310,6 +1301,71 @@ trace_read_requests = {trace_read_requests}"#,
        Ok(())
    }

+    #[test]
+    fn eviction_pageserver_config_parse() -> anyhow::Result<()> {
+        let tempdir = tempdir()?;
+        let (workdir, pg_distrib_dir) = prepare_fs(&tempdir)?;
+
+        let pageserver_conf_toml = format!(
+            r#"pg_distrib_dir = "{}"
+metric_collection_endpoint = "http://sample.url"
+metric_collection_interval = "10min"
+id = 222
+
+[disk_usage_based_eviction]
+max_usage_pct = 80
+min_avail_bytes = 0
+period = "10s"
+
+[tenant_config]
+evictions_low_residence_duration_metric_threshold = "20m"
+
+[tenant_config.eviction_policy]
+kind = "LayerAccessThreshold"
+period = "20m"
+threshold = "20m"
+"#,
+            pg_distrib_dir.display(),
+        );
+        let toml: Document = pageserver_conf_toml.parse()?;
+        let conf = PageServerConf::parse_and_validate(&toml, &workdir)?;
+
+        assert_eq!(conf.pg_distrib_dir, pg_distrib_dir);
+        assert_eq!(
+            conf.metric_collection_endpoint,
+            Some("http://sample.url".parse().unwrap())
+        );
+        assert_eq!(
+            conf.metric_collection_interval,
+            Duration::from_secs(10 * 60)
+        );
+        assert_eq!(
+            conf.default_tenant_conf
+                .evictions_low_residence_duration_metric_threshold,
+            Duration::from_secs(20 * 60)
+        );
+        assert_eq!(conf.id, NodeId(222));
+        assert_eq!(
+            conf.disk_usage_based_eviction,
+            Some(DiskUsageEvictionTaskConfig {
+                max_usage_pct: Percent::new(80).unwrap(),
+                min_avail_bytes: 0,
+                period: Duration::from_secs(10),
+                #[cfg(feature = "testing")]
+                mock_statvfs: None,
+            })
+        );
+        match &conf.default_tenant_conf.eviction_policy {
+            EvictionPolicy::NoEviction => panic!("Unexpected eviction opolicy tenant settings"),
+            EvictionPolicy::LayerAccessThreshold(eviction_thresold) => {
+                assert_eq!(eviction_thresold.period, Duration::from_secs(20 * 60));
+                assert_eq!(eviction_thresold.threshold, Duration::from_secs(20 * 60));
+            }
+        }
+
+        Ok(())
+    }
+
    fn prepare_fs(tempdir: &TempDir) -> anyhow::Result<(PathBuf, PathBuf)> {
        let tempdir_path = tempdir.path();

--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -520,6 +520,43 @@ paths:
              schema:
                $ref: "#/components/schemas/Error"

+  /v1/tenant/{tenant_id}/synthetic_size:
+    parameters:
+      - name: tenant_id
+        in: path
+        required: true
+        schema:
+          type: string
+          format: hex
+    get:
+      description: |
+        Calculate tenant's synthetic size
+      responses:
+        "200":
+          description: Tenant's synthetic size
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/SyntheticSizeResponse"
+        "401":
+          description: Unauthorized Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/UnauthorizedError"
+        "403":
+          description: Forbidden Error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/ForbiddenError"
+        "500":
+          description: Generic operation error
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/Error"
+
  /v1/tenant/{tenant_id}/size:
    parameters:
      - name: tenant_id
@@ -829,12 +866,9 @@ components:
      type: object
      required:
        - id
-        - state
      properties:
        id:
          type: string
-        state:
-          type: string
        current_physical_size:
          type: integer
        has_in_progress_downloads:
@@ -951,6 +985,84 @@ components:
        latest_gc_cutoff_lsn:
          type: string
          format: hex
+
+    SyntheticSizeResponse:
+      type: object
+      required:
+        - id
+        - size
+        - segment_sizes
+        - inputs
+      properties:
+        id:
+          type: string
+          format: hex
+        size:
+          type: integer
+        segment_sizes:
+          type: array
+          items:
+            $ref: "#/components/schemas/SegmentSize"
+        inputs:
+          type: object
+          properties:
+            segments:
+              type: array
+              items:
+                $ref: "#/components/schemas/SegmentData"
+            timeline_inputs:
+              type: array
+              items:
+                $ref: "#/components/schemas/TimelineInput"
+
+    SegmentSize:
+      type: object
+      required:
+        - method
+        - accum_size
+      properties:
+        method:
+          type: string
+        accum_size:
+          type: integer
+
+    SegmentData:
+      type: object
+      required:
+        - segment
+      properties:
+        segment:
+          type: object
+          required:
+            - lsn
+          properties:
+            parent:
+              type: integer
+            lsn:
+              type: integer
+            size:
+              type: integer
+            needed:
+              type: boolean
+        timeline_id:
+          type: string
+          format: hex
+        kind:
+          type: string
+
+    TimelineInput:
+      type: object
+      required:
+        - timeline_id
+      properties:
+        ancestor_id:
+          type: string
+        ancestor_lsn:
+          type: string
+        timeline_id:
+          type: string
+          format: hex
+
    Error:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -465,7 +465,7 @@ async fn tenant_list_handler(request: Request<Body>) -> Result<Response<Body>, A
        .iter()
        .map(|(id, state)| TenantInfo {
            id: *id,
-            state: *state,
+            state: state.clone(),
            current_physical_size: None,
            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
        })
@@ -490,7 +490,7 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
        let state = tenant.current_state();
        Ok(TenantInfo {
            id: tenant_id,
-            state,
+            state: state.clone(),
            current_physical_size: Some(current_physical_size),
            has_in_progress_downloads: Some(state.has_in_progress_downloads()),
        })
@@ -781,6 +781,19 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo

    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;

+    if let Some(evictions_low_residence_duration_metric_threshold) =
+        request_data.evictions_low_residence_duration_metric_threshold
+    {
+        tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
+            humantime::parse_duration(&evictions_low_residence_duration_metric_threshold)
+                .with_context(bad_duration(
+                    "evictions_low_residence_duration_metric_threshold",
+                    &evictions_low_residence_duration_metric_threshold,
+                ))
+                .map_err(ApiError::BadRequest)?,
+        );
+    }
+
    let target_tenant_id = request_data
        .new_tenant_id
        .map(TenantId::from)
@@ -914,6 +927,19 @@ async fn update_tenant_config_handler(

    tenant_conf.min_resident_size_override = request_data.min_resident_size_override;

+    if let Some(evictions_low_residence_duration_metric_threshold) =
+        request_data.evictions_low_residence_duration_metric_threshold
+    {
+        tenant_conf.evictions_low_residence_duration_metric_threshold = Some(
+            humantime::parse_duration(&evictions_low_residence_duration_metric_threshold)
+                .with_context(bad_duration(
+                    "evictions_low_residence_duration_metric_threshold",
+                    &evictions_low_residence_duration_metric_threshold,
+                ))
+                .map_err(ApiError::BadRequest)?,
+        );
+    }
+
    let state = get_state(&request);
    mgr::set_new_tenant_config(state.conf, tenant_conf, tenant_id)
        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
@@ -931,7 +957,7 @@ async fn handle_tenant_break(r: Request<Body>) -> Result<Response<Body>, ApiErro
        .await
        .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;

-    tenant.set_broken("broken from test");
+    tenant.set_broken("broken from test".to_owned());

    json_response(StatusCode::OK, ())
 }
@@ -1175,6 +1201,37 @@ async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    )
 }

+#[cfg(feature = "testing")]
+async fn post_tracing_event_handler(mut r: Request<Body>) -> Result<Response<Body>, ApiError> {
+    #[derive(Debug, serde::Deserialize)]
+    #[serde(rename_all = "lowercase")]
+    enum Level {
+        Error,
+        Warn,
+        Info,
+        Debug,
+        Trace,
+    }
+    #[derive(Debug, serde::Deserialize)]
+    struct Request {
+        level: Level,
+        message: String,
+    }
+    let body: Request = json_request(&mut r)
+        .await
+        .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?;
+
+    match body.level {
+        Level::Error => tracing::error!(?body.message),
+        Level::Warn => tracing::warn!(?body.message),
+        Level::Info => tracing::info!(?body.message),
+        Level::Debug => tracing::debug!(?body.message),
+        Level::Trace => tracing::trace!(?body.message),
+    }
+
+    json_response(StatusCode::OK, ())
+}
+
 pub fn make_router(
    conf: &'static PageServerConf,
    launch_ts: &'static LaunchTimestamp,
@@ -1315,5 +1372,9 @@ pub fn make_router(
            testing_api!("set tenant state to broken", handle_tenant_break),
        )
        .get("/v1/panic", |r| RequestSpan(always_panic_handler).handle(r))
+        .post(
+            "/v1/tracing/event",
+            testing_api!("emit a tracing event", post_tracing_event_handler),
+        )
        .any(handler_404))
 }
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -44,6 +44,8 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

 static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);

+pub use crate::metrics::preinitialize_metrics;
+
 pub async fn shutdown_pageserver(exit_code: i32) {
    // Shut down the libpq endpoint task. This prevents new connections from
    // being accepted.
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,12 +1,13 @@
 use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
    register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
-    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
-    Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
-    UIntGauge, UIntGaugeVec,
+    register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec, Counter, CounterVec,
+    Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge,
+    UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
-use pageserver_api::models::state;
+use pageserver_api::models::TenantState;
+use strum::VariantNames;
 use utils::id::{TenantId, TimelineId};

 /// Prometheus histogram buckets (in seconds) for operations in the critical
@@ -147,15 +148,6 @@ static CURRENT_LOGICAL_SIZE: Lazy<UIntGaugeVec> = Lazy::new(|| {
    .expect("failed to define current logical size metric")
 });

-// Metrics collected on tenant states.
-const TENANT_STATE_OPTIONS: &[&str] = &[
-    state::LOADING,
-    state::ATTACHING,
-    state::ACTIVE,
-    state::STOPPING,
-    state::BROKEN,
-];
-
 pub static TENANT_STATE_METRIC: Lazy<UIntGaugeVec> = Lazy::new(|| {
    register_uint_gauge_vec!(
        "pageserver_tenant_states_count",
@@ -213,6 +205,15 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
    .expect("failed to define a metric")
 });

+pub static UNEXPECTED_ONDEMAND_DOWNLOADS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_unexpected_ondemand_downloads_count",
+        "Number of unexpected on-demand downloads. \
+         We log more context for each increment, so, forgo any labels in this metric.",
+    )
+    .expect("failed to define a metric")
+});
+
 /// Each [`Timeline`]'s  [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
 #[derive(Debug)]
 pub struct EvictionsWithLowResidenceDuration {
@@ -265,6 +266,22 @@ impl EvictionsWithLowResidenceDuration {
        }
    }

+    pub fn change_threshold(
+        &mut self,
+        tenant_id: &str,
+        timeline_id: &str,
+        new_threshold: Duration,
+    ) {
+        if new_threshold == self.threshold {
+            return;
+        }
+        let mut with_new =
+            EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
+                .build(tenant_id, timeline_id);
+        std::mem::swap(self, &mut with_new);
+        with_new.remove(tenant_id, timeline_id);
+    }
+
    // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
    fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
        let Some(_counter) = self.counter.take() else {
@@ -342,11 +359,6 @@ pub static LIVE_CONNECTIONS_COUNT: Lazy<IntGaugeVec> = Lazy::new(|| {
    .expect("failed to define a metric")
 });

-pub static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {
-    register_int_gauge!("pageserver_ondisk_layers", "Number of layers on-disk")
-        .expect("failed to define a metric")
-});
-
 // remote storage metrics

 /// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
@@ -377,6 +389,26 @@ static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new
    .expect("failed to define a metric")
 });

+static REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_remote_timeline_client_bytes_started",
+        "Incremented by the number of bytes associated with a remote timeline client operation. \
+         The increment happens when the operation is scheduled.",
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
+    )
+    .expect("failed to define a metric")
+});
+
+static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_remote_timeline_client_bytes_finished",
+        "Incremented by the number of bytes associated with a remote timeline client operation. \
+         The increment happens when the operation finishes (regardless of success/failure/shutdown).",
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
+    )
+    .expect("failed to define a metric")
+});
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RemoteOpKind {
    Upload,
@@ -597,7 +629,7 @@ pub struct TimelineMetrics {
    pub num_persistent_files_created: IntCounter,
    pub persistent_bytes_written: IntCounter,
    pub evictions: IntCounter,
-    pub evictions_with_low_residence_duration: EvictionsWithLowResidenceDuration,
+    pub evictions_with_low_residence_duration: std::sync::RwLock<EvictionsWithLowResidenceDuration>,
 }

 impl TimelineMetrics {
@@ -664,7 +696,9 @@ impl TimelineMetrics {
            num_persistent_files_created,
            persistent_bytes_written,
            evictions,
-            evictions_with_low_residence_duration,
+            evictions_with_low_residence_duration: std::sync::RwLock::new(
+                evictions_with_low_residence_duration,
+            ),
        }
    }
 }
@@ -683,6 +717,8 @@ impl Drop for TimelineMetrics {
        let _ = PERSISTENT_BYTES_WRITTEN.remove_label_values(&[tenant_id, timeline_id]);
        let _ = EVICTIONS.remove_label_values(&[tenant_id, timeline_id]);
        self.evictions_with_low_residence_duration
+            .write()
+            .unwrap()
            .remove(tenant_id, timeline_id);
        for op in STORAGE_TIME_OPERATIONS {
            let _ =
@@ -707,7 +743,7 @@ impl Drop for TimelineMetrics {
 pub fn remove_tenant_metrics(tenant_id: &TenantId) {
    let tid = tenant_id.to_string();
    let _ = TENANT_SYNTHETIC_SIZE_METRIC.remove_label_values(&[&tid]);
-    for state in TENANT_STATE_OPTIONS {
+    for state in TenantState::VARIANTS {
        let _ = TENANT_STATE_METRIC.remove_label_values(&[&tid, state]);
    }
 }
@@ -727,6 +763,8 @@ pub struct RemoteTimelineClientMetrics {
    remote_operation_time: Mutex<HashMap<(&'static str, &'static str, &'static str), Histogram>>,
    calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
    calls_started_hist: Mutex<HashMap<(&'static str, &'static str), Histogram>>,
+    bytes_started_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
+    bytes_finished_counter: Mutex<HashMap<(&'static str, &'static str), IntCounter>>,
 }

 impl RemoteTimelineClientMetrics {
@@ -737,6 +775,8 @@ impl RemoteTimelineClientMetrics {
            remote_operation_time: Mutex::new(HashMap::default()),
            calls_unfinished_gauge: Mutex::new(HashMap::default()),
            calls_started_hist: Mutex::new(HashMap::default()),
+            bytes_started_counter: Mutex::new(HashMap::default()),
+            bytes_finished_counter: Mutex::new(HashMap::default()),
            remote_physical_size_gauge: Mutex::new(None),
        }
    }
@@ -775,6 +815,7 @@ impl RemoteTimelineClientMetrics {
        });
        metric.clone()
    }
+
    fn calls_unfinished_gauge(
        &self,
        file_kind: &RemoteOpFileKind,
@@ -816,32 +857,125 @@ impl RemoteTimelineClientMetrics {
        });
        metric.clone()
    }
+
+    fn bytes_started_counter(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> IntCounter {
+        // XXX would be nice to have an upgradable RwLock
+        let mut guard = self.bytes_started_counter.lock().unwrap();
+        let key = (file_kind.as_str(), op_kind.as_str());
+        let metric = guard.entry(key).or_insert_with(move || {
+            REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER
+                .get_metric_with_label_values(&[
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
+                    key.0,
+                    key.1,
+                ])
+                .unwrap()
+        });
+        metric.clone()
+    }
+
+    fn bytes_finished_counter(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> IntCounter {
+        // XXX would be nice to have an upgradable RwLock
+        let mut guard = self.bytes_finished_counter.lock().unwrap();
+        let key = (file_kind.as_str(), op_kind.as_str());
+        let metric = guard.entry(key).or_insert_with(move || {
+            REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER
+                .get_metric_with_label_values(&[
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
+                    key.0,
+                    key.1,
+                ])
+                .unwrap()
+        });
+        metric.clone()
+    }
+}
+
+#[cfg(test)]
+impl RemoteTimelineClientMetrics {
+    pub fn get_bytes_started_counter_value(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> Option<u64> {
+        let guard = self.bytes_started_counter.lock().unwrap();
+        let key = (file_kind.as_str(), op_kind.as_str());
+        guard.get(&key).map(|counter| counter.get())
+    }
+
+    pub fn get_bytes_finished_counter_value(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> Option<u64> {
+        let guard = self.bytes_finished_counter.lock().unwrap();
+        let key = (file_kind.as_str(), op_kind.as_str());
+        guard.get(&key).map(|counter| counter.get())
+    }
 }

 /// See [`RemoteTimelineClientMetrics::call_begin`].
 #[must_use]
-pub(crate) struct RemoteTimelineClientCallMetricGuard(Option<IntGauge>);
+pub(crate) struct RemoteTimelineClientCallMetricGuard {
+    /// Decremented on drop.
+    calls_unfinished_metric: Option<IntGauge>,
+    /// If Some(), this references the bytes_finished metric, and we increment it by the given `u64` on drop.
+    bytes_finished: Option<(IntCounter, u64)>,
+}

 impl RemoteTimelineClientCallMetricGuard {
-    /// Consume this guard object without decrementing the metric.
-    /// The caller vouches to do this manually, so that the prior increment of the gauge will cancel out.
+    /// Consume this guard object without performing the metric updates it would do on `drop()`.
+    /// The caller vouches to do the metric updates manually.
    pub fn will_decrement_manually(mut self) {
-        self.0 = None; // prevent drop() from decrementing
+        let RemoteTimelineClientCallMetricGuard {
+            calls_unfinished_metric,
+            bytes_finished,
+        } = &mut self;
+        calls_unfinished_metric.take();
+        bytes_finished.take();
    }
 }

 impl Drop for RemoteTimelineClientCallMetricGuard {
    fn drop(&mut self) {
-        if let RemoteTimelineClientCallMetricGuard(Some(guard)) = self {
+        let RemoteTimelineClientCallMetricGuard {
+            calls_unfinished_metric,
+            bytes_finished,
+        } = self;
+        if let Some(guard) = calls_unfinished_metric.take() {
            guard.dec();
        }
+        if let Some((bytes_finished_metric, value)) = bytes_finished {
+            bytes_finished_metric.inc_by(*value);
+        }
    }
 }

+/// The enum variants communicate to the [`RemoteTimelineClientMetrics`] whether to
+/// track the byte size of this call in applicable metric(s).
+pub(crate) enum RemoteTimelineClientMetricsCallTrackSize {
+    /// Do not account for this call's byte size in any metrics.
+    /// The `reason` field is there to make the call sites self-documenting
+    /// about why they don't need the metric.
+    DontTrackSize { reason: &'static str },
+    /// Track the byte size of the call in applicable metric(s).
+    Bytes(u64),
+}
+
 impl RemoteTimelineClientMetrics {
-    /// Increment the metrics that track ongoing calls to the remote timeline client instance.
+    /// Update the metrics that change when a call to the remote timeline client instance starts.
    ///
-    /// Drop the returned guard object once the operation is finished to decrement the values.
+    /// Drop the returned guard object once the operation is finished to updates corresponding metrics that track completions.
    /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`] if that
    /// is more suitable.
    /// Never do both.
@@ -849,24 +983,51 @@ impl RemoteTimelineClientMetrics {
        &self,
        file_kind: &RemoteOpFileKind,
        op_kind: &RemoteOpKind,
+        size: RemoteTimelineClientMetricsCallTrackSize,
    ) -> RemoteTimelineClientCallMetricGuard {
-        let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+        let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
        self.calls_started_hist(file_kind, op_kind)
-            .observe(unfinished_metric.get() as f64);
-        unfinished_metric.inc();
-        RemoteTimelineClientCallMetricGuard(Some(unfinished_metric))
+            .observe(calls_unfinished_metric.get() as f64);
+        calls_unfinished_metric.inc(); // NB: inc after the histogram, see comment on underlying metric
+
+        let bytes_finished = match size {
+            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {
+                // nothing to do
+                None
+            }
+            RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
+                self.bytes_started_counter(file_kind, op_kind).inc_by(size);
+                let finished_counter = self.bytes_finished_counter(file_kind, op_kind);
+                Some((finished_counter, size))
+            }
+        };
+        RemoteTimelineClientCallMetricGuard {
+            calls_unfinished_metric: Some(calls_unfinished_metric),
+            bytes_finished,
+        }
    }

-    /// Manually decrement the metric instead of using the guard object.
+    /// Manually udpate the metrics that track completions, instead of using the guard object.
    /// Using the guard object is generally preferable.
    /// See [`call_begin`] for more context.
-    pub(crate) fn call_end(&self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind) {
-        let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+    pub(crate) fn call_end(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+        size: RemoteTimelineClientMetricsCallTrackSize,
+    ) {
+        let calls_unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
        debug_assert!(
-            unfinished_metric.get() > 0,
+            calls_unfinished_metric.get() > 0,
            "begin and end should cancel out"
        );
-        unfinished_metric.dec();
+        calls_unfinished_metric.dec();
+        match size {
+            RemoteTimelineClientMetricsCallTrackSize::DontTrackSize { reason: _reason } => {}
+            RemoteTimelineClientMetricsCallTrackSize::Bytes(size) => {
+                self.bytes_finished_counter(file_kind, op_kind).inc_by(size);
+            }
+        }
    }
 }

@@ -879,6 +1040,8 @@ impl Drop for RemoteTimelineClientMetrics {
            remote_operation_time,
            calls_unfinished_gauge,
            calls_started_hist,
+            bytes_started_counter,
+            bytes_finished_counter,
        } = self;
        for ((a, b, c), _) in remote_operation_time.get_mut().unwrap().drain() {
            let _ = REMOTE_OPERATION_TIME.remove_label_values(&[tenant_id, timeline_id, a, b, c]);
@@ -899,6 +1062,22 @@ impl Drop for RemoteTimelineClientMetrics {
                b,
            ]);
        }
+        for ((a, b), _) in bytes_started_counter.get_mut().unwrap().drain() {
+            let _ = REMOTE_TIMELINE_CLIENT_BYTES_STARTED_COUNTER.remove_label_values(&[
+                tenant_id,
+                timeline_id,
+                a,
+                b,
+            ]);
+        }
+        for ((a, b), _) in bytes_finished_counter.get_mut().unwrap().drain() {
+            let _ = REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER.remove_label_values(&[
+                tenant_id,
+                timeline_id,
+                a,
+                b,
+            ]);
+        }
        {
            let _ = remote_physical_size_gauge; // use to avoid 'unused' warning in desctructuring above
            let _ = REMOTE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, timeline_id]);
@@ -962,3 +1141,10 @@ impl<F: Future<Output = Result<O, E>>, O, E> Future for MeasuredRemoteOp<F> {
        poll_result
    }
 }
+
+pub fn preinitialize_metrics() {
+    // We want to alert on this metric increasing.
+    // Initialize it eagerly, so that our alert rule can distinguish absence of the metric from metric value 0.
+    assert_eq!(UNEXPECTED_ONDEMAND_DOWNLOADS.get(), 0);
+    UNEXPECTED_ONDEMAND_DOWNLOADS.reset();
+}
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -65,7 +65,7 @@ fn copyin_stream(pgb: &mut PostgresBackendTCP) -> impl Stream<Item = io::Result<

                _ = task_mgr::shutdown_watcher() => {
                    // We were requested to shut down.
-                    let msg = format!("pageserver is shutting down");
+                    let msg = "pageserver is shutting down".to_string();
                    let _ = pgb.write_message_noflush(&BeMessage::ErrorResponse(&msg, None));
                    Err(QueryError::Other(anyhow::anyhow!(msg)))
                }
@@ -700,6 +700,8 @@ impl PageServerHandler {
        full_backup: bool,
        ctx: RequestContext,
    ) -> anyhow::Result<()> {
+        let started = std::time::Instant::now();
+
        // check that the timeline exists
        let timeline = get_active_tenant_timeline(tenant_id, timeline_id, &ctx).await?;
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
@@ -712,6 +714,8 @@ impl PageServerHandler {
                .context("invalid basebackup lsn")?;
        }

+        let lsn_awaited_after = started.elapsed();
+
        // switch client to COPYOUT
        pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
        pgb.flush().await?;
@@ -732,7 +736,17 @@ impl PageServerHandler {

        pgb.write_message_noflush(&BeMessage::CopyDone)?;
        pgb.flush().await?;
-        info!("basebackup complete");
+
+        let basebackup_after = started
+            .elapsed()
+            .checked_sub(lsn_awaited_after)
+            .unwrap_or(Duration::ZERO);
+
+        info!(
+            lsn_await_millis = lsn_awaited_after.as_millis(),
+            basebackup_millis = basebackup_after.as_millis(),
+            "basebackup complete"
+        );

        Ok(())
    }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -118,6 +118,10 @@ pub struct Tenant {
    // Global pageserver config parameters
    pub conf: &'static PageServerConf,

+    /// The value creation timestamp, used to measure activation delay, see:
+    /// <https://github.com/neondatabase/neon/issues/4025>
+    loading_started_at: Instant,
+
    state: watch::Sender<TenantState>,

    // Overridden tenant-specific config parameters.
@@ -177,9 +181,9 @@ impl UninitializedTimeline<'_> {
    ///
    /// The new timeline is initialized in Active state, and its background jobs are
    /// started
-    pub fn initialize(self, _ctx: &RequestContext) -> anyhow::Result<Arc<Timeline>> {
+    pub fn initialize(self, ctx: &RequestContext) -> anyhow::Result<Arc<Timeline>> {
        let mut timelines = self.owning_tenant.timelines.lock().unwrap();
-        self.initialize_with_lock(&mut timelines, true, true)
+        self.initialize_with_lock(ctx, &mut timelines, true, true)
    }

    /// Like `initialize`, but the caller is already holding lock on Tenant::timelines.
@@ -189,6 +193,7 @@ impl UninitializedTimeline<'_> {
    /// been initialized.
    fn initialize_with_lock(
        mut self,
+        ctx: &RequestContext,
        timelines: &mut HashMap<TimelineId, Arc<Timeline>>,
        load_layer_map: bool,
        activate: bool,
@@ -229,7 +234,9 @@ impl UninitializedTimeline<'_> {
                new_timeline.maybe_spawn_flush_loop();

                if activate {
-                    new_timeline.activate();
+                    new_timeline
+                        .activate(ctx)
+                        .context("initializing timeline activation")?;
                }
            }
        }
@@ -264,7 +271,10 @@ impl UninitializedTimeline<'_> {
            .await
            .context("Failed to flush after basebackup import")?;

-        self.initialize(ctx)
+        // Initialize without loading the layer map. We started with an empty layer map, and already
+        // updated it for the layers that we created during the import.
+        let mut timelines = self.owning_tenant.timelines.lock().unwrap();
+        self.initialize_with_lock(ctx, &mut timelines, false, true)
    }

    fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
@@ -469,7 +479,7 @@ impl Tenant {
        local_metadata: Option<TimelineMetadata>,
        ancestor: Option<Arc<Timeline>>,
        first_save: bool,
-        _ctx: &RequestContext,
+        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
        let tenant_id = self.tenant_id;

@@ -504,7 +514,7 @@ impl Tenant {
            // Do not start walreceiver here. We do need loaded layer map for reconcile_with_remote
            // But we shouldnt start walreceiver before we have all the data locally, because working walreceiver
            // will ingest data which may require looking at the layers which are not yet available locally
-            match timeline.initialize_with_lock(&mut timelines_accessor, true, false) {
+            match timeline.initialize_with_lock(ctx, &mut timelines_accessor, true, false) {
                Ok(new_timeline) => new_timeline,
                Err(e) => {
                    error!("Failed to initialize timeline {tenant_id}/{timeline_id}: {e:?}");
@@ -616,7 +626,7 @@ impl Tenant {
                match tenant_clone.attach(ctx).await {
                    Ok(_) => {}
                    Err(e) => {
-                        tenant_clone.set_broken(&e.to_string());
+                        tenant_clone.set_broken(e.to_string());
                        error!("error attaching tenant: {:?}", e);
                    }
                }
@@ -629,7 +639,7 @@ impl Tenant {
    ///
    /// Background task that downloads all data for a tenant and brings it to Active state.
    ///
-    #[instrument(skip(self, ctx), fields(tenant_id=%self.tenant_id))]
+    #[instrument(skip_all, fields(tenant_id=%self.tenant_id))]
    async fn attach(self: &Arc<Tenant>, ctx: RequestContext) -> anyhow::Result<()> {
        // Create directory with marker file to indicate attaching state.
        // The load_local_tenants() function in tenant::mgr relies on the marker file
@@ -750,7 +760,7 @@ impl Tenant {

        // Start background operations and open the tenant for business.
        // The loops will shut themselves down when they notice that the tenant is inactive.
-        self.activate()?;
+        self.activate(&ctx)?;

        info!("Done");

@@ -824,7 +834,10 @@ impl Tenant {
    pub fn create_broken_tenant(conf: &'static PageServerConf, tenant_id: TenantId) -> Arc<Tenant> {
        let wal_redo_manager = Arc::new(PostgresRedoManager::new(conf, tenant_id));
        Arc::new(Tenant::new(
-            TenantState::Broken,
+            TenantState::Broken {
+                reason: "create_broken_tenant".into(),
+                backtrace: String::new(),
+            },
            conf,
            TenantConfOpt::default(),
            wal_redo_manager,
@@ -885,7 +898,7 @@ impl Tenant {
                match tenant_clone.load(&ctx).await {
                    Ok(()) => {}
                    Err(err) => {
-                        tenant_clone.set_broken(&err.to_string());
+                        tenant_clone.set_broken(err.to_string());
                        error!("could not load tenant {tenant_id}: {err:?}");
                    }
                }
@@ -1022,7 +1035,7 @@ impl Tenant {

        // Start background operations and open the tenant for business.
        // The loops will shut themselves down when they notice that the tenant is inactive.
-        self.activate()?;
+        self.activate(ctx)?;

        info!("Done");

@@ -1358,12 +1371,7 @@ impl Tenant {

        // Stop the walreceiver first.
        debug!("waiting for wal receiver to shutdown");
-        task_mgr::shutdown_tasks(
-            Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_id),
-            Some(timeline_id),
-        )
-        .await;
+        timeline.walreceiver.stop().await;
        debug!("wal receiver shutdown confirmed");

        info!("waiting for timeline tasks to shutdown");
@@ -1442,7 +1450,7 @@ impl Tenant {
    }

    pub fn current_state(&self) -> TenantState {
-        *self.state.borrow()
+        self.state.borrow().clone()
    }

    pub fn is_active(&self) -> bool {
@@ -1450,18 +1458,18 @@ impl Tenant {
    }

    /// Changes tenant status to active, unless shutdown was already requested.
-    fn activate(&self) -> anyhow::Result<()> {
+    fn activate(&self, ctx: &RequestContext) -> anyhow::Result<()> {
        let mut result = Ok(());
        self.state.send_modify(|current_state| {
-            match *current_state {
+            match &*current_state {
                TenantState::Active => {
                    // activate() was called on an already Active tenant. Shouldn't happen.
                    result = Err(anyhow::anyhow!("Tenant is already active"));
                }
-                TenantState::Broken => {
+                TenantState::Broken { reason, .. } => {
                    // This shouldn't happen either
                    result = Err(anyhow::anyhow!(
-                        "Could not activate tenant because it is in broken state"
+                        "Could not activate tenant because it is in broken state due to: {reason}",
                    ));
                }
                TenantState::Stopping => {
@@ -1472,7 +1480,7 @@ impl Tenant {
                TenantState::Loading | TenantState::Attaching => {
                    *current_state = TenantState::Active;

-                    info!("Activating tenant {}", self.tenant_id);
+                    debug!(tenant_id = %self.tenant_id, "Activating tenant");

                    let timelines_accessor = self.timelines.lock().unwrap();
                    let not_broken_timelines = timelines_accessor
@@ -1483,9 +1491,47 @@ impl Tenant {
                    // down when they notice that the tenant is inactive.
                    tasks::start_background_loops(self.tenant_id);

+                    let mut activated_timelines = 0;
+                    let mut timelines_broken_during_activation = 0;
+
                    for timeline in not_broken_timelines {
-                        timeline.activate();
+                        match timeline
+                            .activate(ctx)
+                            .context("timeline activation for activating tenant")
+                        {
+                            Ok(()) => {
+                                activated_timelines += 1;
+                            }
+                            Err(e) => {
+                                error!(
+                                    "Failed to activate timeline {}: {:#}",
+                                    timeline.timeline_id, e
+                                );
+                                timeline.set_state(TimelineState::Broken);
+                                *current_state = TenantState::broken_from_reason(format!(
+                                    "failed to activate timeline {}: {}",
+                                    timeline.timeline_id, e
+                                ));
+
+                                timelines_broken_during_activation += 1;
+                            }
+                        }
                    }
+
+                    let elapsed = self.loading_started_at.elapsed();
+                    let total_timelines = timelines_accessor.len();
+
+                    // log a lot of stuff, because some tenants sometimes suffer from user-visible
+                    // times to activate. see https://github.com/neondatabase/neon/issues/4025
+                    info!(
+                        since_creation_millis = elapsed.as_millis(),
+                        tenant_id = %self.tenant_id,
+                        activated_timelines,
+                        timelines_broken_during_activation,
+                        total_timelines,
+                        post_state = <&'static str>::from(&*current_state),
+                        "activation attempt finished"
+                    );
                }
            }
        });
@@ -1495,7 +1541,7 @@ impl Tenant {
    /// Change tenant status to Stopping, to mark that it is being shut down
    pub fn set_stopping(&self) {
        self.state.send_modify(|current_state| {
-            match *current_state {
+            match current_state {
                TenantState::Active | TenantState::Loading | TenantState::Attaching => {
                    *current_state = TenantState::Stopping;

@@ -1511,8 +1557,8 @@ impl Tenant {
                        timeline.set_state(TimelineState::Stopping);
                    }
                }
-                TenantState::Broken => {
-                    info!("Cannot set tenant to Stopping state, it is already in Broken state");
+                TenantState::Broken { reason, .. } => {
+                    info!("Cannot set tenant to Stopping state, it is in Broken state due to: {reason}");
                }
                TenantState::Stopping => {
                    // The tenant was detached, or system shutdown was requested, while we were
@@ -1523,7 +1569,7 @@ impl Tenant {
        });
    }

-    pub fn set_broken(&self, reason: &str) {
+    pub fn set_broken(&self, reason: String) {
        self.state.send_modify(|current_state| {
            match *current_state {
                TenantState::Active => {
@@ -1531,24 +1577,24 @@ impl Tenant {
                    // while loading or attaching a tenant. A tenant that has already been
                    // activated should never be marked as broken. We cope with it the best
                    // we can, but it shouldn't happen.
-                    *current_state = TenantState::Broken;
                    warn!("Changing Active tenant to Broken state, reason: {}", reason);
+                    *current_state = TenantState::broken_from_reason(reason);
                }
-                TenantState::Broken => {
+                TenantState::Broken { .. } => {
                    // This shouldn't happen either
                    warn!("Tenant is already in Broken state");
                }
                TenantState::Stopping => {
                    // This shouldn't happen either
-                    *current_state = TenantState::Broken;
                    warn!(
                        "Marking Stopping tenant as Broken state, reason: {}",
                        reason
                    );
+                    *current_state = TenantState::broken_from_reason(reason);
                }
                TenantState::Loading | TenantState::Attaching => {
                    info!("Setting tenant as Broken state, reason: {}", reason);
-                    *current_state = TenantState::Broken;
+                    *current_state = TenantState::broken_from_reason(reason);
                }
            }
        });
@@ -1561,7 +1607,7 @@ impl Tenant {
    pub async fn wait_to_become_active(&self) -> anyhow::Result<()> {
        let mut receiver = self.state.subscribe();
        loop {
-            let current_state = *receiver.borrow_and_update();
+            let current_state = receiver.borrow_and_update().clone();
            match current_state {
                TenantState::Loading | TenantState::Attaching => {
                    // in these states, there's a chance that we can reach ::Active
@@ -1570,12 +1616,12 @@ impl Tenant {
                TenantState::Active { .. } => {
                    return Ok(());
                }
-                TenantState::Broken | TenantState::Stopping => {
+                TenantState::Broken { .. } | TenantState::Stopping => {
                    // There's no chance the tenant can transition back into ::Active
                    anyhow::bail!(
                        "Tenant {} will not become active. Current state: {:?}",
                        self.tenant_id,
-                        current_state,
+                        &current_state,
                    );
                }
            }
@@ -1715,6 +1761,13 @@ impl Tenant {

    pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
        *self.tenant_conf.write().unwrap() = new_tenant_conf;
+        // Don't hold self.timelines.lock() during the notifies.
+        // There's no risk of deadlock right now, but there could be if we consolidate
+        // mutexes in struct Timeline in the future.
+        let timelines = self.list_timelines();
+        for timeline in timelines {
+            timeline.tenant_conf_updated();
+        }
    }

    fn create_timeline_data(
@@ -1756,21 +1809,23 @@ impl Tenant {
        let (state, mut rx) = watch::channel(state);

        tokio::spawn(async move {
-            let current_state = *rx.borrow_and_update();
+            let mut current_state: &'static str = From::from(&*rx.borrow_and_update());
            let tid = tenant_id.to_string();
            TENANT_STATE_METRIC
-                .with_label_values(&[&tid, current_state.as_str()])
+                .with_label_values(&[&tid, current_state])
                .inc();
            loop {
                match rx.changed().await {
                    Ok(()) => {
-                        let new_state = *rx.borrow();
+                        let new_state: &'static str = From::from(&*rx.borrow_and_update());
                        TENANT_STATE_METRIC
-                            .with_label_values(&[&tid, current_state.as_str()])
+                            .with_label_values(&[&tid, current_state])
                            .dec();
                        TENANT_STATE_METRIC
-                            .with_label_values(&[&tid, new_state.as_str()])
+                            .with_label_values(&[&tid, new_state])
                            .inc();
+
+                        current_state = new_state;
                    }
                    Err(_sender_dropped_error) => {
                        info!("Tenant dropped the state updates sender, quitting waiting for tenant state change");
@@ -1783,6 +1838,9 @@ impl Tenant {
        Tenant {
            tenant_id,
            conf,
+            // using now here is good enough approximation to catch tenants with really long
+            // activation times.
+            loading_started_at: Instant::now(),
            tenant_conf: Arc::new(RwLock::new(tenant_conf)),
            timelines: Mutex::new(HashMap::new()),
            gc_cs: tokio::sync::Mutex::new(()),
@@ -1865,7 +1923,7 @@ impl Tenant {
            .to_string();

            // Convert the config to a toml file.
-            conf_content += &toml_edit::easy::to_string(&tenant_conf)?;
+            conf_content += &toml_edit::ser::to_string(&tenant_conf)?;

            let mut target_config_file = VirtualFile::open_with_options(
                target_config_path,
@@ -2093,7 +2151,7 @@ impl Tenant {
        src_timeline: &Arc<Timeline>,
        dst_id: TimelineId,
        start_lsn: Option<Lsn>,
-        _ctx: &RequestContext,
+        ctx: &RequestContext,
    ) -> anyhow::Result<Arc<Timeline>> {
        let src_id = src_timeline.timeline_id;

@@ -2186,7 +2244,7 @@ impl Tenant {
                false,
                Some(Arc::clone(src_timeline)),
            )?
-            .initialize_with_lock(&mut timelines, true, true)?;
+            .initialize_with_lock(ctx, &mut timelines, true, true)?;
        drop(timelines);

        // Root timeline gets its layers during creation and uploads them along with the metadata.
@@ -2297,9 +2355,11 @@ impl Tenant {
                )
            })?;

+        // Initialize the timeline without loading the layer map, because we already updated the layer
+        // map above, when we imported the datadir.
        let timeline = {
            let mut timelines = self.timelines.lock().unwrap();
-            raw_timeline.initialize_with_lock(&mut timelines, false, true)?
+            raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)?
        };

        info!(
@@ -2791,6 +2851,9 @@ pub mod harness {
                trace_read_requests: Some(tenant_conf.trace_read_requests),
                eviction_policy: Some(tenant_conf.eviction_policy),
                min_resident_size_override: tenant_conf.min_resident_size_override,
+                evictions_low_residence_duration_metric_threshold: Some(
+                    tenant_conf.evictions_low_residence_duration_metric_threshold,
+                ),
            }
        }
    }
@@ -2823,7 +2886,13 @@ pub mod harness {
            };

            LOG_HANDLE.get_or_init(|| {
-                logging::init(logging::LogFormat::Test).expect("Failed to init test logging")
+                logging::init(
+                    logging::LogFormat::Test,
+                    // enable it in case in case the tests exercise code paths that use
+                    // debug_assert_current_span_has_tenant_and_timeline_id
+                    logging::TracingErrorLayerEnablement::EnableWithRustLogFilter,
+                )
+                .expect("Failed to init test logging")
            });

            let repo_dir = PageServerConf::test_repo_dir(test_name);
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -39,6 +39,7 @@ pub mod defaults {
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
+    pub const DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD: &str = "24 hour";
 }

 /// Per-tenant configuration options
@@ -93,6 +94,9 @@ pub struct TenantConf {
    pub trace_read_requests: bool,
    pub eviction_policy: EvictionPolicy,
    pub min_resident_size_override: Option<u64>,
+    // See the corresponding metric's help string.
+    #[serde(with = "humantime_serde")]
+    pub evictions_low_residence_duration_metric_threshold: Duration,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -164,6 +168,11 @@ pub struct TenantConfOpt {
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
    pub min_resident_size_override: Option<u64>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(with = "humantime_serde")]
+    #[serde(default)]
+    pub evictions_low_residence_duration_metric_threshold: Option<Duration>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -228,6 +237,9 @@ impl TenantConfOpt {
            min_resident_size_override: self
                .min_resident_size_override
                .or(global_conf.min_resident_size_override),
+            evictions_low_residence_duration_metric_threshold: self
+                .evictions_low_residence_duration_metric_threshold
+                .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
        }
    }
 }
@@ -260,6 +272,10 @@ impl Default for TenantConf {
            trace_read_requests: false,
            eviction_policy: EvictionPolicy::NoEviction,
            min_resident_size_override: None,
+            evictions_low_residence_duration_metric_threshold: humantime::parse_duration(
+                DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD,
+            )
+            .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
        }
    }
 }
@@ -275,9 +291,9 @@ mod tests {
            ..TenantConfOpt::default()
        };

-        let toml_form = toml_edit::easy::to_string(&small_conf).unwrap();
+        let toml_form = toml_edit::ser::to_string(&small_conf).unwrap();
        assert_eq!(toml_form, "gc_horizon = 42\n");
-        assert_eq!(small_conf, toml_edit::easy::from_str(&toml_form).unwrap());
+        assert_eq!(small_conf, toml_edit::de::from_str(&toml_form).unwrap());

        let json_form = serde_json::to_string(&small_conf).unwrap();
        assert_eq!(json_form, "{\"gc_horizon\":42}");
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -48,11 +48,10 @@ mod layer_coverage;

 use crate::context::RequestContext;
 use crate::keyspace::KeyPartitioning;
-use crate::metrics::NUM_ONDISK_LAYERS;
 use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use crate::tenant::storage_layer::Layer;
-use anyhow::Result;
+use anyhow::{bail, Result};
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
@@ -126,7 +125,7 @@ where
    ///
    /// Insert an on-disk layer.
    ///
-    pub fn insert_historic(&mut self, layer: Arc<L>) {
+    pub fn insert_historic(&mut self, layer: Arc<L>) -> anyhow::Result<()> {
        self.layer_map.insert_historic_noflush(layer)
    }

@@ -274,17 +273,21 @@ where
    ///
    /// Helper function for BatchedUpdates::insert_historic
    ///
-    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
-        self.historic.insert(
-            historic_layer_coverage::LayerKey::from(&*layer),
-            Arc::clone(&layer),
-        );
+    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) -> anyhow::Result<()> {
+        let key = historic_layer_coverage::LayerKey::from(&*layer);
+        if self.historic.contains(&key) {
+            bail!(
+                "Attempt to insert duplicate layer {} in layer map",
+                layer.short_id()
+            );
+        }
+        self.historic.insert(key, Arc::clone(&layer));

        if Self::is_l0(&layer) {
            self.l0_delta_layers.push(layer);
        }

-        NUM_ONDISK_LAYERS.inc();
+        Ok(())
    }

    ///
@@ -309,8 +312,6 @@ where
                "failed to locate removed historic layer from l0_delta_layers"
            );
        }
-
-        NUM_ONDISK_LAYERS.dec();
    }

    pub(self) fn replace_historic_noflush(
@@ -838,7 +839,7 @@ mod tests {

            let expected_in_counts = (1, usize::from(expected_l0));

-            map.batch_update().insert_historic(remote.clone());
+            map.batch_update().insert_historic(remote.clone()).unwrap();
            assert_eq!(count_layer_in(&map, &remote), expected_in_counts);

            let replaced = map
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -417,6 +417,14 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        }
    }

+    pub fn contains(&self, layer_key: &LayerKey) -> bool {
+        match self.buffer.get(layer_key) {
+            Some(None) => false,                         // layer remove was buffered
+            Some(_) => true,                             // layer insert was buffered
+            None => self.layers.contains_key(layer_key), // no buffered ops for this layer
+        }
+    }
+
    pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
        self.buffer.insert(layer_key, Some(value));
    }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -537,7 +537,7 @@ where
            Some(tenant) => match tenant.current_state() {
                TenantState::Attaching
                | TenantState::Loading
-                | TenantState::Broken
+                | TenantState::Broken { .. }
                | TenantState::Active => tenant.set_stopping(),
                TenantState::Stopping => return Err(TenantStateError::IsStopping(tenant_id)),
            },
@@ -565,7 +565,7 @@ where
            let tenants_accessor = TENANTS.read().await;
            match tenants_accessor.get(&tenant_id) {
                Some(tenant) => {
-                    tenant.set_broken(&e.to_string());
+                    tenant.set_broken(e.to_string());
                }
                None => {
                    warn!("Tenant {tenant_id} got removed from memory");
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -219,7 +219,8 @@ use utils::lsn::Lsn;

 use crate::metrics::{
    MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
-    REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
+    RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES,
+    REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
 };
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
 use crate::{
@@ -367,9 +368,13 @@ impl RemoteTimelineClient {

    /// Download index file
    pub async fn download_index_file(&self) -> Result<IndexPart, DownloadError> {
-        let _unfinished_gauge_guard = self
-            .metrics
-            .call_begin(&RemoteOpFileKind::Index, &RemoteOpKind::Download);
+        let _unfinished_gauge_guard = self.metrics.call_begin(
+            &RemoteOpFileKind::Index,
+            &RemoteOpKind::Download,
+            crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize {
+                reason: "no need for a downloads gauge",
+            },
+        );

        download::download_index_part(
            self.conf,
@@ -398,9 +403,13 @@ impl RemoteTimelineClient {
        layer_metadata: &LayerFileMetadata,
    ) -> anyhow::Result<u64> {
        let downloaded_size = {
-            let _unfinished_gauge_guard = self
-                .metrics
-                .call_begin(&RemoteOpFileKind::Layer, &RemoteOpKind::Download);
+            let _unfinished_gauge_guard = self.metrics.call_begin(
+                &RemoteOpFileKind::Layer,
+                &RemoteOpKind::Download,
+                crate::metrics::RemoteTimelineClientMetricsCallTrackSize::DontTrackSize {
+                    reason: "no need for a downloads gauge",
+                },
+            );
            download::download_layer_file(
                self.conf,
                &self.storage_impl,
@@ -886,11 +895,32 @@ impl RemoteTimelineClient {
    fn calls_unfinished_metric_impl(
        &self,
        op: &UploadOp,
-    ) -> Option<(RemoteOpFileKind, RemoteOpKind)> {
+    ) -> Option<(
+        RemoteOpFileKind,
+        RemoteOpKind,
+        RemoteTimelineClientMetricsCallTrackSize,
+    )> {
+        use RemoteTimelineClientMetricsCallTrackSize::DontTrackSize;
        let res = match op {
-            UploadOp::UploadLayer(_, _) => (RemoteOpFileKind::Layer, RemoteOpKind::Upload),
-            UploadOp::UploadMetadata(_, _) => (RemoteOpFileKind::Index, RemoteOpKind::Upload),
-            UploadOp::Delete(file_kind, _) => (*file_kind, RemoteOpKind::Delete),
+            UploadOp::UploadLayer(_, m) => (
+                RemoteOpFileKind::Layer,
+                RemoteOpKind::Upload,
+                RemoteTimelineClientMetricsCallTrackSize::Bytes(m.file_size()),
+            ),
+            UploadOp::UploadMetadata(_, _) => (
+                RemoteOpFileKind::Index,
+                RemoteOpKind::Upload,
+                DontTrackSize {
+                    reason: "metadata uploads are tiny",
+                },
+            ),
+            UploadOp::Delete(file_kind, _) => (
+                *file_kind,
+                RemoteOpKind::Delete,
+                DontTrackSize {
+                    reason: "should we track deletes? positive or negative sign?",
+                },
+            ),
            UploadOp::Barrier(_) => {
                // we do not account these
                return None;
@@ -900,20 +930,20 @@ impl RemoteTimelineClient {
    }

    fn calls_unfinished_metric_begin(&self, op: &UploadOp) {
-        let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) {
+        let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) {
            Some(x) => x,
            None => return,
        };
-        let guard = self.metrics.call_begin(&file_kind, &op_kind);
+        let guard = self.metrics.call_begin(&file_kind, &op_kind, track_bytes);
        guard.will_decrement_manually(); // in unfinished_ops_metric_end()
    }

    fn calls_unfinished_metric_end(&self, op: &UploadOp) {
-        let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) {
+        let (file_kind, op_kind, track_bytes) = match self.calls_unfinished_metric_impl(op) {
            Some(x) => x,
            None => return,
        };
-        self.metrics.call_end(&file_kind, &op_kind);
+        self.metrics.call_end(&file_kind, &op_kind, track_bytes);
    }

    fn stop(&self) {
@@ -981,11 +1011,19 @@ impl RemoteTimelineClient {
 mod tests {
    use super::*;
    use crate::{
-        tenant::harness::{TenantHarness, TIMELINE_ID},
+        context::RequestContext,
+        tenant::{
+            harness::{TenantHarness, TIMELINE_ID},
+            Tenant,
+        },
        DEFAULT_PG_VERSION,
    };
    use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
-    use std::{collections::HashSet, path::Path};
+    use std::{
+        collections::HashSet,
+        path::{Path, PathBuf},
+    };
+    use tokio::runtime::EnterGuard;
    use utils::lsn::Lsn;

    pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
@@ -1034,39 +1072,80 @@ mod tests {
        assert_eq!(found, expected);
    }

+    struct TestSetup {
+        runtime: &'static tokio::runtime::Runtime,
+        entered_runtime: EnterGuard<'static>,
+        harness: TenantHarness<'static>,
+        tenant: Arc<Tenant>,
+        tenant_ctx: RequestContext,
+        remote_fs_dir: PathBuf,
+        client: Arc<RemoteTimelineClient>,
+    }
+
+    impl TestSetup {
+        fn new(test_name: &str) -> anyhow::Result<Self> {
+            // Use a current-thread runtime in the test
+            let runtime = Box::leak(Box::new(
+                tokio::runtime::Builder::new_current_thread()
+                    .enable_all()
+                    .build()?,
+            ));
+            let entered_runtime = runtime.enter();
+
+            let test_name = Box::leak(Box::new(format!("remote_timeline_client__{test_name}")));
+            let harness = TenantHarness::create(test_name)?;
+            let (tenant, ctx) = runtime.block_on(harness.load());
+            // create an empty timeline directory
+            let timeline =
+                tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
+            let _ = timeline.initialize(&ctx).unwrap();
+
+            let remote_fs_dir = harness.conf.workdir.join("remote_fs");
+            std::fs::create_dir_all(remote_fs_dir)?;
+            let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?;
+
+            let storage_config = RemoteStorageConfig {
+                max_concurrent_syncs: std::num::NonZeroUsize::new(
+                    remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
+                )
+                .unwrap(),
+                max_sync_errors: std::num::NonZeroU32::new(
+                    remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
+                )
+                .unwrap(),
+                storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
+            };
+
+            let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
+
+            let client = Arc::new(RemoteTimelineClient {
+                conf: harness.conf,
+                runtime,
+                tenant_id: harness.tenant_id,
+                timeline_id: TIMELINE_ID,
+                storage_impl: storage,
+                upload_queue: Mutex::new(UploadQueue::Uninitialized),
+                metrics: Arc::new(RemoteTimelineClientMetrics::new(
+                    &harness.tenant_id,
+                    &TIMELINE_ID,
+                )),
+            });
+
+            Ok(Self {
+                runtime,
+                entered_runtime,
+                harness,
+                tenant,
+                tenant_ctx: ctx,
+                remote_fs_dir,
+                client,
+            })
+        }
+    }
+
    // Test scheduling
    #[test]
    fn upload_scheduling() -> anyhow::Result<()> {
-        // Use a current-thread runtime in the test
-        let runtime = Box::leak(Box::new(
-            tokio::runtime::Builder::new_current_thread()
-                .enable_all()
-                .build()?,
-        ));
-        let _entered = runtime.enter();
-
-        let harness = TenantHarness::create("upload_scheduling")?;
-        let (tenant, ctx) = runtime.block_on(harness.load());
-        let _timeline =
-            tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
-        let timeline_path = harness.timeline_path(&TIMELINE_ID);
-
-        let remote_fs_dir = harness.conf.workdir.join("remote_fs");
-        std::fs::create_dir_all(remote_fs_dir)?;
-        let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?;
-
-        let storage_config = RemoteStorageConfig {
-            max_concurrent_syncs: std::num::NonZeroUsize::new(
-                remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
-            )
-            .unwrap(),
-            max_sync_errors: std::num::NonZeroU32::new(
-                remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
-            )
-            .unwrap(),
-            storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
-        };
-
        // Test outline:
        //
        // Schedule upload of a bunch of layers. Check that they are started immediately, not queued
@@ -1081,21 +1160,19 @@ mod tests {
        // Schedule another deletion. Check that it's launched immediately.
        // Schedule index upload. Check that it's queued

-        println!("workdir: {}", harness.conf.workdir.display());
-
-        let storage_impl = GenericRemoteStorage::from_config(&storage_config)?;
-        let client = Arc::new(RemoteTimelineClient {
-            conf: harness.conf,
+        let TestSetup {
            runtime,
-            tenant_id: harness.tenant_id,
-            timeline_id: TIMELINE_ID,
-            storage_impl,
-            upload_queue: Mutex::new(UploadQueue::Uninitialized),
-            metrics: Arc::new(RemoteTimelineClientMetrics::new(
-                &harness.tenant_id,
-                &TIMELINE_ID,
-            )),
-        });
+            entered_runtime: _entered_runtime,
+            harness,
+            tenant: _tenant,
+            tenant_ctx: _tenant_ctx,
+            remote_fs_dir,
+            client,
+        } = TestSetup::new("upload_scheduling").unwrap();
+
+        let timeline_path = harness.timeline_path(&TIMELINE_ID);
+
+        println!("workdir: {}", harness.conf.workdir.display());

        let remote_timeline_dir =
            remote_fs_dir.join(timeline_path.strip_prefix(&harness.conf.workdir)?);
@@ -1216,4 +1293,90 @@ mod tests {

        Ok(())
    }
+
+    #[test]
+    fn bytes_unfinished_gauge_for_layer_file_uploads() -> anyhow::Result<()> {
+        // Setup
+
+        let TestSetup {
+            runtime,
+            harness,
+            client,
+            ..
+        } = TestSetup::new("metrics")?;
+
+        let metadata = dummy_metadata(Lsn(0x10));
+        client.init_upload_queue_for_empty_remote(&metadata)?;
+
+        let timeline_path = harness.timeline_path(&TIMELINE_ID);
+
+        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
+        let content_1 = dummy_contents("foo");
+        std::fs::write(
+            timeline_path.join(layer_file_name_1.file_name()),
+            &content_1,
+        )?;
+
+        #[derive(Debug, PartialEq)]
+        struct BytesStartedFinished {
+            started: Option<usize>,
+            finished: Option<usize>,
+        }
+        let get_bytes_started_stopped = || {
+            let started = client
+                .metrics
+                .get_bytes_started_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload)
+                .map(|v| v.try_into().unwrap());
+            let stopped = client
+                .metrics
+                .get_bytes_finished_counter_value(&RemoteOpFileKind::Layer, &RemoteOpKind::Upload)
+                .map(|v| v.try_into().unwrap());
+            BytesStartedFinished {
+                started,
+                finished: stopped,
+            }
+        };
+
+        // Test
+
+        let init = get_bytes_started_stopped();
+
+        client.schedule_layer_file_upload(
+            &layer_file_name_1,
+            &LayerFileMetadata::new(content_1.len() as u64),
+        )?;
+
+        let pre = get_bytes_started_stopped();
+
+        runtime.block_on(client.wait_completion())?;
+
+        let post = get_bytes_started_stopped();
+
+        // Validate
+
+        assert_eq!(
+            init,
+            BytesStartedFinished {
+                started: None,
+                finished: None
+            }
+        );
+        assert_eq!(
+            pre,
+            BytesStartedFinished {
+                started: Some(content_1.len()),
+                // assert that the _finished metric is created eagerly so that subtractions work on first sample
+                finished: Some(0),
+            }
+        );
+        assert_eq!(
+            post,
+            BytesStartedFinished {
+                started: Some(content_1.len()),
+                finished: Some(content_1.len())
+            }
+        );
+
+        Ok(())
+    }
 }
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -16,6 +16,7 @@ use tracing::{info, warn};

 use crate::config::PageServerConf;
 use crate::tenant::storage_layer::LayerFileName;
+use crate::tenant::timeline::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::{exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS};
 use remote_storage::{DownloadError, GenericRemoteStorage};
 use utils::crashsafe::path_with_suffix_extension;
@@ -43,6 +44,8 @@ pub async fn download_layer_file<'a>(
    layer_file_name: &'a LayerFileName,
    layer_metadata: &'a LayerFileMetadata,
 ) -> Result<u64, DownloadError> {
+    debug_assert_current_span_has_tenant_and_timeline_id();
+
    let timeline_path = conf.timeline_path(&timeline_id, &tenant_id);

    let local_path = timeline_path.join(layer_file_name.file_name());
@@ -154,7 +157,7 @@ pub async fn download_layer_file<'a>(
        .with_context(|| format!("Could not fsync layer file {}", local_path.display(),))
        .map_err(DownloadError::Other)?;

-    tracing::info!("download complete: {}", local_path.display());
+    tracing::debug!("download complete: {}", local_path.display());

    Ok(bytes_amount)
 }
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -74,7 +74,7 @@ pub(super) async fn upload_timeline_layer<'a>(
    })?;

    storage
-        .upload(Box::new(source_file), fs_size, &storage_path, None)
+        .upload(source_file, fs_size, &storage_path, None)
        .await
        .with_context(|| {
            format!(
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -209,7 +209,7 @@ async fn wait_for_active_tenant(
        loop {
            match tenant_state_updates.changed().await {
                Ok(()) => {
-                    let new_state = *tenant_state_updates.borrow();
+                    let new_state = &*tenant_state_updates.borrow();
                    match new_state {
                        TenantState::Active => {
                            debug!("Tenant state changed to active, continuing the task loop");
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -14,10 +14,12 @@ use pageserver_api::models::{
    DownloadRemoteLayersTaskState, LayerMapInfo, LayerResidenceStatus, TimelineState,
 };
 use remote_storage::GenericRemoteStorage;
+use storage_broker::BrokerClientChannel;
 use tokio::sync::{oneshot, watch, Semaphore, TryAcquireError};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::id::TenantTimelineId;
+use utils::tracing_span_assert;

 use std::cmp::{max, min, Ordering};
 use std::collections::BinaryHeap;
@@ -30,7 +32,7 @@ use std::sync::atomic::{AtomicI64, Ordering as AtomicOrdering};
 use std::sync::{Arc, Mutex, MutexGuard, RwLock, Weak};
 use std::time::{Duration, Instant, SystemTime};

-use crate::broker_client::is_broker_client_initialized;
+use crate::broker_client::{get_broker_client, is_broker_client_initialized};
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::tenant::remote_timeline_client::{self, index::LayerFileMetadata};
 use crate::tenant::storage_layer::{
@@ -47,7 +49,7 @@ use crate::tenant::{

 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
-use crate::metrics::TimelineMetrics;
+use crate::metrics::{TimelineMetrics, UNEXPECTED_ONDEMAND_DOWNLOADS};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
@@ -71,11 +73,12 @@ use crate::walredo::WalRedoManager;
 use crate::METADATA_FILE_NAME;
 use crate::ZERO_PAGE;
 use crate::{is_temporary, task_mgr};
-use walreceiver::spawn_connection_manager_task;

 pub(super) use self::eviction_task::EvictionTaskTenantState;
 use self::eviction_task::EvictionTaskTimelineState;
+use self::walreceiver::{WalReceiver, WalReceiverConf};

+use super::config::TenantConf;
 use super::layer_map::BatchedUpdates;
 use super::remote_timeline_client::index::IndexPart;
 use super::remote_timeline_client::RemoteTimelineClient;
@@ -160,7 +163,7 @@ pub struct Timeline {
    ancestor_timeline: Option<Arc<Timeline>>,
    ancestor_lsn: Lsn,

-    metrics: TimelineMetrics,
+    pub(super) metrics: TimelineMetrics,

    /// Ensures layers aren't frozen by checkpointer between
    /// [`Timeline::get_layer_for_write`] and layer reads.
@@ -214,6 +217,7 @@ pub struct Timeline {
    /// or None if WAL receiver has not received anything for this timeline
    /// yet.
    pub last_received_wal: Mutex<Option<WalReceiverInfo>>,
+    pub walreceiver: WalReceiver,

    /// Relation size cache
    pub rel_size_cache: RwLock<HashMap<RelTag, (Lsn, BlockNumber)>>,
@@ -866,10 +870,18 @@ impl Timeline {
        Ok(())
    }

-    pub fn activate(self: &Arc<Self>) {
+    pub fn activate(self: &Arc<Self>, ctx: &RequestContext) -> anyhow::Result<()> {
+        if is_broker_client_initialized() {
+            self.launch_wal_receiver(ctx, get_broker_client().clone())?;
+        } else if cfg!(test) {
+            info!("not launching WAL receiver because broker client hasn't been initialized");
+        } else {
+            anyhow::bail!("broker client not initialized");
+        }
+
        self.set_state(TimelineState::Active);
-        self.launch_wal_receiver();
        self.launch_eviction_task();
+        Ok(())
    }

    pub fn set_state(&self, new_state: TimelineState) {
@@ -925,6 +937,7 @@ impl Timeline {
        }
    }

+    #[instrument(skip_all, fields(tenant = %self.tenant_id, timeline = %self.timeline_id))]
    pub async fn download_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
        let Some(layer) = self.find_layer(layer_file_name) else { return Ok(None) };
        let Some(remote_layer) = layer.downcast_remote_layer() else { return  Ok(Some(false)) };
@@ -1126,6 +1139,8 @@ impl Timeline {
                if let Some(delta) = local_layer_residence_duration {
                    self.metrics
                        .evictions_with_low_residence_duration
+                        .read()
+                        .unwrap()
                        .observe(delta);
                    info!(layer=%local_layer.short_id(), residence_millis=delta.as_millis(), "evicted layer after known residence period");
                } else {
@@ -1199,6 +1214,35 @@ impl Timeline {
            .unwrap_or(self.conf.default_tenant_conf.eviction_policy)
    }

+    fn get_evictions_low_residence_duration_metric_threshold(
+        tenant_conf: &TenantConfOpt,
+        default_tenant_conf: &TenantConf,
+    ) -> Duration {
+        tenant_conf
+            .evictions_low_residence_duration_metric_threshold
+            .unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold)
+    }
+
+    pub(super) fn tenant_conf_updated(&self) {
+        // NB: Most tenant conf options are read by background loops, so,
+        // changes will automatically be picked up.
+
+        // The threshold is embedded in the metric. So, we need to update it.
+        {
+            let new_threshold = Self::get_evictions_low_residence_duration_metric_threshold(
+                &self.tenant_conf.read().unwrap(),
+                &self.conf.default_tenant_conf,
+            );
+            let tenant_id_str = self.tenant_id.to_string();
+            let timeline_id_str = self.timeline_id.to_string();
+            self.metrics
+                .evictions_with_low_residence_duration
+                .write()
+                .unwrap()
+                .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
+        }
+    }
+
    /// Open a Timeline handle.
    ///
    /// Loads the metadata for the timeline into memory, but not the layer map.
@@ -1220,7 +1264,36 @@ impl Timeline {
        let (layer_flush_start_tx, _) = tokio::sync::watch::channel(0);
        let (layer_flush_done_tx, _) = tokio::sync::watch::channel((0, Ok(())));

+        let tenant_conf_guard = tenant_conf.read().unwrap();
+        let wal_connect_timeout = tenant_conf_guard
+            .walreceiver_connect_timeout
+            .unwrap_or(conf.default_tenant_conf.walreceiver_connect_timeout);
+        let lagging_wal_timeout = tenant_conf_guard
+            .lagging_wal_timeout
+            .unwrap_or(conf.default_tenant_conf.lagging_wal_timeout);
+        let max_lsn_wal_lag = tenant_conf_guard
+            .max_lsn_wal_lag
+            .unwrap_or(conf.default_tenant_conf.max_lsn_wal_lag);
+        let evictions_low_residence_duration_metric_threshold =
+            Self::get_evictions_low_residence_duration_metric_threshold(
+                &tenant_conf_guard,
+                &conf.default_tenant_conf,
+            );
+        drop(tenant_conf_guard);
+
        Arc::new_cyclic(|myself| {
+            let walreceiver = WalReceiver::new(
+                TenantTimelineId::new(tenant_id, timeline_id),
+                Weak::clone(myself),
+                WalReceiverConf {
+                    wal_connect_timeout,
+                    lagging_wal_timeout,
+                    max_lsn_wal_lag,
+                    auth_token: crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
+                    availability_zone: conf.availability_zone.clone(),
+                },
+            );
+
            let mut result = Timeline {
                conf,
                tenant_conf,
@@ -1231,6 +1304,7 @@ impl Timeline {
                layers: RwLock::new(LayerMap::default()),

                walredo_mgr,
+                walreceiver,

                remote_client: remote_client.map(Arc::new),

@@ -1252,7 +1326,7 @@ impl Timeline {
                    &timeline_id,
                    crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
                        "mtime",
-                        conf.evictions_low_residence_duration_metric_threshold,
+                        evictions_low_residence_duration_metric_threshold,
                    ),
                ),

@@ -1350,44 +1424,17 @@ impl Timeline {
        *flush_loop_state = FlushLoopState::Running;
    }

-    pub(super) fn launch_wal_receiver(self: &Arc<Self>) {
-        if !is_broker_client_initialized() {
-            if cfg!(test) {
-                info!("not launching WAL receiver because broker client hasn't been initialized");
-                return;
-            } else {
-                panic!("broker client not initialized");
-            }
-        }
-
+    pub(super) fn launch_wal_receiver(
+        &self,
+        ctx: &RequestContext,
+        broker_client: BrokerClientChannel,
+    ) -> anyhow::Result<()> {
        info!(
            "launching WAL receiver for timeline {} of tenant {}",
            self.timeline_id, self.tenant_id
        );
-        let tenant_conf_guard = self.tenant_conf.read().unwrap();
-        let lagging_wal_timeout = tenant_conf_guard
-            .lagging_wal_timeout
-            .unwrap_or(self.conf.default_tenant_conf.lagging_wal_timeout);
-        let walreceiver_connect_timeout = tenant_conf_guard
-            .walreceiver_connect_timeout
-            .unwrap_or(self.conf.default_tenant_conf.walreceiver_connect_timeout);
-        let max_lsn_wal_lag = tenant_conf_guard
-            .max_lsn_wal_lag
-            .unwrap_or(self.conf.default_tenant_conf.max_lsn_wal_lag);
-        drop(tenant_conf_guard);
-        let self_clone = Arc::clone(self);
-        let background_ctx =
-            // XXX: this is a detached_child. Plumb through the ctx from call sites.
-            RequestContext::todo_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
-        spawn_connection_manager_task(
-            self_clone,
-            walreceiver_connect_timeout,
-            lagging_wal_timeout,
-            max_lsn_wal_lag,
-            crate::config::SAFEKEEPER_AUTH_TOKEN.get().cloned(),
-            self.conf.availability_zone.clone(),
-            background_ctx,
-        );
+        self.walreceiver.start(ctx, broker_client)?;
+        Ok(())
    }

    ///
@@ -1438,7 +1485,7 @@ impl Timeline {

                trace!("found layer {}", layer.path().display());
                total_physical_size += file_size;
-                updates.insert_historic(Arc::new(layer));
+                updates.insert_historic(Arc::new(layer))?;
                num_layers += 1;
            } else if let Some(deltafilename) = DeltaFileName::parse_str(&fname) {
                // Create a DeltaLayer struct for each delta file.
@@ -1470,7 +1517,7 @@ impl Timeline {

                trace!("found layer {}", layer.path().display());
                total_physical_size += file_size;
-                updates.insert_historic(Arc::new(layer));
+                updates.insert_historic(Arc::new(layer))?;
                num_layers += 1;
            } else if fname == METADATA_FILE_NAME || fname.ends_with(".old") {
                // ignore these
@@ -1544,7 +1591,7 @@ impl Timeline {
            // remote index file?
            // If so, rename_to_backup those files & replace their local layer with
            // a RemoteLayer in the layer map so that we re-download them on-demand.
-            if let Some(local_layer) = local_layer {
+            if let Some(local_layer) = &local_layer {
                let local_layer_path = local_layer
                    .local_path()
                    .expect("caller must ensure that local_layers only contains local layers");
@@ -1569,7 +1616,6 @@ impl Timeline {
                        anyhow::bail!("could not rename file {local_layer_path:?}: {err:?}");
                    } else {
                        self.metrics.resident_physical_size_gauge.sub(local_size);
-                        updates.remove_historic(local_layer);
                        // fall-through to adding the remote layer
                    }
                } else {
@@ -1605,7 +1651,11 @@ impl Timeline {
                    );
                    let remote_layer = Arc::new(remote_layer);

-                    updates.insert_historic(remote_layer);
+                    if let Some(local_layer) = &local_layer {
+                        updates.replace_historic(local_layer, remote_layer)?;
+                    } else {
+                        updates.insert_historic(remote_layer)?;
+                    }
                }
                LayerFileName::Delta(deltafilename) => {
                    // Create a RemoteLayer for the delta file.
@@ -1629,7 +1679,11 @@ impl Timeline {
                        LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted),
                    );
                    let remote_layer = Arc::new(remote_layer);
-                    updates.insert_historic(remote_layer);
+                    if let Some(local_layer) = &local_layer {
+                        updates.replace_historic(local_layer, remote_layer)?;
+                    } else {
+                        updates.insert_historic(remote_layer)?;
+                    }
                }
            }
        }
@@ -2303,6 +2357,7 @@ impl Timeline {
                            id,
                            ctx.task_kind()
                        );
+                        UNEXPECTED_ONDEMAND_DOWNLOADS.inc();
                        timeline.download_remote_layer(remote_layer).await?;
                        continue 'layer_map_search;
                    }
@@ -2676,7 +2731,7 @@ impl Timeline {
            .write()
            .unwrap()
            .batch_update()
-            .insert_historic(Arc::new(new_delta));
+            .insert_historic(Arc::new(new_delta))?;

        // update the timeline's physical size
        let sz = new_delta_path.metadata()?.len();
@@ -2881,7 +2936,7 @@ impl Timeline {
            self.metrics
                .resident_physical_size_gauge
                .add(metadata.len());
-            updates.insert_historic(Arc::new(l));
+            updates.insert_historic(Arc::new(l))?;
        }
        updates.flush();
        drop(layers);
@@ -3314,7 +3369,7 @@ impl Timeline {

            new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len()));
            let x: Arc<dyn PersistentLayer + 'static> = Arc::new(l);
-            updates.insert_historic(x);
+            updates.insert_historic(x)?;
        }

        // Now that we have reshuffled the data to set of new delta layers, we can
@@ -3766,11 +3821,13 @@ impl Timeline {
    /// If the caller has a deadline or needs a timeout, they can simply stop polling:
    /// we're **cancellation-safe** because the download happens in a separate task_mgr task.
    /// So, the current download attempt will run to completion even if we stop polling.
-    #[instrument(skip_all, fields(tenant_id=%self.tenant_id, timeline_id=%self.timeline_id, layer=%remote_layer.short_id()))]
+    #[instrument(skip_all, fields(layer=%remote_layer.short_id()))]
    pub async fn download_remote_layer(
        &self,
        remote_layer: Arc<RemoteLayer>,
    ) -> anyhow::Result<()> {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+
        use std::sync::atomic::Ordering::Relaxed;

        let permit = match Arc::clone(&remote_layer.ongoing_download)
@@ -3814,6 +3871,8 @@ impl Timeline {
                    .await;

                if let Ok(size) = &result {
+                    info!("layer file download finished");
+
                    // XXX the temp file is still around in Err() case
                    // and consumes space until we clean up upon pageserver restart.
                    self_clone.metrics.resident_physical_size_gauge.add(*size);
@@ -3885,6 +3944,8 @@ impl Timeline {
                    updates.flush();
                    drop(layers);

+                    info!("on-demand download successful");
+
                    // Now that we've inserted the download into the layer map,
                    // close the semaphore. This will make other waiters for
                    // this download return Ok(()).
@@ -3892,7 +3953,7 @@ impl Timeline {
                    remote_layer.ongoing_download.close();
                } else {
                    // Keep semaphore open. We'll drop the permit at the end of the function.
-                    error!("on-demand download failed: {:?}", result.as_ref().unwrap_err());
+                    error!("layer file download failed: {:?}", result.as_ref().unwrap_err());
                }

                // Don't treat it as an error if the task that triggered the download
@@ -4203,3 +4264,30 @@ fn rename_to_backup(path: &Path) -> anyhow::Result<()> {

    bail!("couldn't find an unused backup number for {:?}", path)
 }
+
+#[inline]
+pub(crate) fn debug_assert_current_span_has_tenant_and_timeline_id() {
+    pub static TENANT_ID_EXTRACTOR: once_cell::sync::Lazy<
+        tracing_span_assert::MultiNameExtractor<2>,
+    > = once_cell::sync::Lazy::new(|| {
+        tracing_span_assert::MultiNameExtractor::new("TenantId", ["tenant_id", "tenant"])
+    });
+
+    pub static TIMELINE_ID_EXTRACTOR: once_cell::sync::Lazy<
+        tracing_span_assert::MultiNameExtractor<2>,
+    > = once_cell::sync::Lazy::new(|| {
+        tracing_span_assert::MultiNameExtractor::new("TimelineId", ["timeline_id", "timeline"])
+    });
+
+    #[cfg(debug_assertions)]
+    match tracing_span_assert::check_fields_present([
+        &*TENANT_ID_EXTRACTOR,
+        &*TIMELINE_ID_EXTRACTOR,
+    ]) {
+        Ok(()) => (),
+        Err(missing) => panic!(
+            "missing extractors: {:?}",
+            missing.into_iter().map(|e| e.name()).collect::<Vec<_>>()
+        ),
+    }
+}
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -23,14 +23,133 @@
 mod connection_manager;
 mod walreceiver_connection;

-use crate::task_mgr::WALRECEIVER_RUNTIME;
+use crate::context::{DownloadBehavior, RequestContext};
+use crate::task_mgr::{self, TaskKind, WALRECEIVER_RUNTIME};
+use crate::tenant::timeline::walreceiver::connection_manager::{
+    connection_manager_loop_step, ConnectionManagerState,
+};

+use anyhow::Context;
 use std::future::Future;
+use std::num::NonZeroU64;
+use std::ops::ControlFlow;
+use std::sync::atomic::{self, AtomicBool};
+use std::sync::{Arc, Weak};
+use std::time::Duration;
+use storage_broker::BrokerClientChannel;
+use tokio::select;
 use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
 use tracing::*;

-pub use connection_manager::spawn_connection_manager_task;
+use utils::id::TenantTimelineId;
+
+use super::Timeline;
+
+#[derive(Clone)]
+pub struct WalReceiverConf {
+    /// The timeout on the connection to safekeeper for WAL streaming.
+    pub wal_connect_timeout: Duration,
+    /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one.
+    pub lagging_wal_timeout: Duration,
+    /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one.
+    pub max_lsn_wal_lag: NonZeroU64,
+    pub auth_token: Option<Arc<String>>,
+    pub availability_zone: Option<String>,
+}
+
+pub struct WalReceiver {
+    timeline: TenantTimelineId,
+    timeline_ref: Weak<Timeline>,
+    conf: WalReceiverConf,
+    started: AtomicBool,
+}
+
+impl WalReceiver {
+    pub fn new(
+        timeline: TenantTimelineId,
+        timeline_ref: Weak<Timeline>,
+        conf: WalReceiverConf,
+    ) -> Self {
+        Self {
+            timeline,
+            timeline_ref,
+            conf,
+            started: AtomicBool::new(false),
+        }
+    }
+
+    pub fn start(
+        &self,
+        ctx: &RequestContext,
+        mut broker_client: BrokerClientChannel,
+    ) -> anyhow::Result<()> {
+        if self.started.load(atomic::Ordering::Acquire) {
+            anyhow::bail!("Wal receiver is already started");
+        }
+
+        let timeline = self.timeline_ref.upgrade().with_context(|| {
+            format!("walreceiver start on a dropped timeline {}", self.timeline)
+        })?;
+
+        let tenant_id = timeline.tenant_id;
+        let timeline_id = timeline.timeline_id;
+        let walreceiver_ctx =
+            ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
+
+        let wal_receiver_conf = self.conf.clone();
+        task_mgr::spawn(
+            WALRECEIVER_RUNTIME.handle(),
+            TaskKind::WalReceiverManager,
+            Some(tenant_id),
+            Some(timeline_id),
+            &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
+            false,
+            async move {
+                info!("WAL receiver manager started, connecting to broker");
+                let mut connection_manager_state = ConnectionManagerState::new(
+                    timeline,
+                    wal_receiver_conf,
+                );
+                loop {
+                    select! {
+                        _ = task_mgr::shutdown_watcher() => {
+                            info!("WAL receiver shutdown requested, shutting down");
+                            connection_manager_state.shutdown().await;
+                            return Ok(());
+                        },
+                        loop_step_result = connection_manager_loop_step(
+                            &mut broker_client,
+                            &mut connection_manager_state,
+                            &walreceiver_ctx,
+                        ) => match loop_step_result {
+                            ControlFlow::Continue(()) => continue,
+                            ControlFlow::Break(()) => {
+                                info!("Connection manager loop ended, shutting down");
+                                connection_manager_state.shutdown().await;
+                                return Ok(());
+                            }
+                        },
+                    }
+                }
+            }.instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id))
+        );
+
+        self.started.store(true, atomic::Ordering::Release);
+
+        Ok(())
+    }
+
+    pub async fn stop(&self) {
+        task_mgr::shutdown_tasks(
+            Some(TaskKind::WalReceiverManager),
+            Some(self.timeline.tenant_id),
+            Some(self.timeline.timeline_id),
+        )
+        .await;
+        self.started.store(false, atomic::Ordering::Release);
+    }
+}

 /// A handle of an asynchronous task.
 /// The task has a channel that it can use to communicate its lifecycle events in a certain form, see [`TaskEvent`]
@@ -39,26 +158,26 @@ pub use connection_manager::spawn_connection_manager_task;
 /// Note that the communication happens via the `watch` channel, that does not accumulate the events, replacing the old one with the never one on submission.
 /// That may lead to certain events not being observed by the listener.
 #[derive(Debug)]
-pub struct TaskHandle<E> {
+struct TaskHandle<E> {
    join_handle: Option<tokio::task::JoinHandle<anyhow::Result<()>>>,
    events_receiver: watch::Receiver<TaskStateUpdate<E>>,
    cancellation: CancellationToken,
 }

-pub enum TaskEvent<E> {
+enum TaskEvent<E> {
    Update(TaskStateUpdate<E>),
    End(anyhow::Result<()>),
 }

 #[derive(Debug, Clone)]
-pub enum TaskStateUpdate<E> {
+enum TaskStateUpdate<E> {
    Started,
    Progress(E),
 }

 impl<E: Clone> TaskHandle<E> {
    /// Initializes the task, starting it immediately after the creation.
-    pub fn spawn<Fut>(
+    fn spawn<Fut>(
        task: impl FnOnce(watch::Sender<TaskStateUpdate<E>>, CancellationToken) -> Fut + Send + 'static,
    ) -> Self
    where
@@ -131,7 +250,7 @@ impl<E: Clone> TaskHandle<E> {
    }

    /// Aborts current task, waiting for it to finish.
-    pub async fn shutdown(self) {
+    async fn shutdown(self) {
        if let Some(jh) = self.join_handle {
            self.cancellation.cancel();
            match jh.await {
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -11,11 +11,9 @@

 use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration};

-use super::TaskStateUpdate;
-use crate::broker_client::get_broker_client;
+use super::{TaskStateUpdate, WalReceiverConf};
 use crate::context::{DownloadBehavior, RequestContext};
-use crate::task_mgr::WALRECEIVER_RUNTIME;
-use crate::task_mgr::{self, TaskKind};
+use crate::task_mgr::TaskKind;
 use crate::tenant::Timeline;
 use anyhow::Context;
 use chrono::{NaiveDateTime, Utc};
@@ -38,75 +36,17 @@ use utils::{

 use super::{walreceiver_connection::WalConnectionStatus, TaskEvent, TaskHandle};

-/// Spawns the loop to take care of the timeline's WAL streaming connection.
-pub fn spawn_connection_manager_task(
-    timeline: Arc<Timeline>,
-    wal_connect_timeout: Duration,
-    lagging_wal_timeout: Duration,
-    max_lsn_wal_lag: NonZeroU64,
-    auth_token: Option<Arc<String>>,
-    availability_zone: Option<String>,
-    ctx: RequestContext,
-) {
-    let mut broker_client = get_broker_client().clone();
-
-    let tenant_id = timeline.tenant_id;
-    let timeline_id = timeline.timeline_id;
-
-    task_mgr::spawn(
-        WALRECEIVER_RUNTIME.handle(),
-        TaskKind::WalReceiverManager,
-        Some(tenant_id),
-        Some(timeline_id),
-        &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
-        false,
-        async move {
-            info!("WAL receiver manager started, connecting to broker");
-            let mut walreceiver_state = WalreceiverState::new(
-                timeline,
-                wal_connect_timeout,
-                lagging_wal_timeout,
-                max_lsn_wal_lag,
-                auth_token,
-                availability_zone,
-            );
-            loop {
-                select! {
-                    _ = task_mgr::shutdown_watcher() => {
-                        info!("WAL receiver shutdown requested, shutting down");
-                        walreceiver_state.shutdown().await;
-                        return Ok(());
-                    },
-                    loop_step_result = connection_manager_loop_step(
-                        &mut broker_client,
-                        &mut walreceiver_state,
-                        &ctx,
-                    ) => match loop_step_result {
-                        ControlFlow::Continue(()) => continue,
-                        ControlFlow::Break(()) => {
-                            info!("Connection manager loop ended, shutting down");
-                            walreceiver_state.shutdown().await;
-                            return Ok(());
-                        }
-                    },
-                }
-            }
-        }
-        .instrument(
-            info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id),
-        ),
-    );
-}
-
 /// Attempts to subscribe for timeline updates, pushed by safekeepers into the broker.
 /// Based on the updates, desides whether to start, keep or stop a WAL receiver task.
 /// If storage broker subscription is cancelled, exits.
-async fn connection_manager_loop_step(
+pub(super) async fn connection_manager_loop_step(
    broker_client: &mut BrokerClientChannel,
-    walreceiver_state: &mut WalreceiverState,
+    connection_manager_state: &mut ConnectionManagerState,
    ctx: &RequestContext,
 ) -> ControlFlow<(), ()> {
-    let mut timeline_state_updates = walreceiver_state.timeline.subscribe_for_state_updates();
+    let mut timeline_state_updates = connection_manager_state
+        .timeline
+        .subscribe_for_state_updates();

    match wait_for_active_timeline(&mut timeline_state_updates).await {
        ControlFlow::Continue(()) => {}
@@ -117,8 +57,8 @@ async fn connection_manager_loop_step(
    }

    let id = TenantTimelineId {
-        tenant_id: walreceiver_state.timeline.tenant_id,
-        timeline_id: walreceiver_state.timeline.timeline_id,
+        tenant_id: connection_manager_state.timeline.tenant_id,
+        timeline_id: connection_manager_state.timeline.timeline_id,
    };

    // Subscribe to the broker updates. Stream shares underlying TCP connection
@@ -128,7 +68,7 @@ async fn connection_manager_loop_step(
    info!("Subscribed for broker timeline updates");

    loop {
-        let time_until_next_retry = walreceiver_state.time_until_next_retry();
+        let time_until_next_retry = connection_manager_state.time_until_next_retry();

        // These things are happening concurrently:
        //
@@ -141,12 +81,12 @@ async fn connection_manager_loop_step(
        //  - timeline state changes to something that does not allow walreceiver to run concurrently
        select! {
            Some(wal_connection_update) = async {
-                match walreceiver_state.wal_connection.as_mut() {
+                match connection_manager_state.wal_connection.as_mut() {
                    Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),
                    None => None,
                }
            } => {
-                let wal_connection = walreceiver_state.wal_connection.as_mut()
+                let wal_connection = connection_manager_state.wal_connection.as_mut()
                    .expect("Should have a connection, as checked by the corresponding select! guard");
                match wal_connection_update {
                    TaskEvent::Update(TaskStateUpdate::Started) => {},
@@ -156,7 +96,7 @@ async fn connection_manager_loop_step(
                            // from this safekeeper. This is good enough to clean unsuccessful
                            // retries history and allow reconnecting to this safekeeper without
                            // sleeping for a long time.
-                            walreceiver_state.wal_connection_retries.remove(&wal_connection.sk_id);
+                            connection_manager_state.wal_connection_retries.remove(&wal_connection.sk_id);
                        }
                        wal_connection.status = new_status;
                    }
@@ -165,7 +105,7 @@ async fn connection_manager_loop_step(
                            Ok(()) => debug!("WAL receiving task finished"),
                            Err(e) => error!("wal receiver task finished with an error: {e:?}"),
                        }
-                        walreceiver_state.drop_old_connection(false).await;
+                        connection_manager_state.drop_old_connection(false).await;
                    },
                }
            },
@@ -173,7 +113,7 @@ async fn connection_manager_loop_step(
            // Got a new update from the broker
            broker_update = broker_subscription.message() => {
                match broker_update {
-                    Ok(Some(broker_update)) => walreceiver_state.register_timeline_update(broker_update),
+                    Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
                    Err(e) => {
                        error!("broker subscription failed: {e}");
                        return ControlFlow::Continue(());
@@ -187,12 +127,12 @@ async fn connection_manager_loop_step(

            new_event = async {
                loop {
-                    if walreceiver_state.timeline.current_state() == TimelineState::Loading {
+                    if connection_manager_state.timeline.current_state() == TimelineState::Loading {
                        warn!("wal connection manager should only be launched after timeline has become active");
                    }
                    match timeline_state_updates.changed().await {
                        Ok(()) => {
-                            let new_state = walreceiver_state.timeline.current_state();
+                            let new_state = connection_manager_state.timeline.current_state();
                            match new_state {
                                // we're already active as walreceiver, no need to reactivate
                                TimelineState::Active => continue,
@@ -234,9 +174,9 @@ async fn connection_manager_loop_step(
            } => debug!("Waking up for the next retry after waiting for {time_until_next_retry:?}"),
        }

-        if let Some(new_candidate) = walreceiver_state.next_connection_candidate() {
+        if let Some(new_candidate) = connection_manager_state.next_connection_candidate() {
            info!("Switching to new connection candidate: {new_candidate:?}");
-            walreceiver_state
+            connection_manager_state
                .change_connection(new_candidate, ctx)
                .await
        }
@@ -314,25 +254,17 @@ const WALCONNECTION_RETRY_MAX_BACKOFF_SECONDS: f64 = 15.0;
 const WALCONNECTION_RETRY_BACKOFF_MULTIPLIER: f64 = 1.5;

 /// All data that's needed to run endless broker loop and keep the WAL streaming connection alive, if possible.
-struct WalreceiverState {
+pub(super) struct ConnectionManagerState {
    id: TenantTimelineId,
-
    /// Use pageserver data about the timeline to filter out some of the safekeepers.
    timeline: Arc<Timeline>,
-    /// The timeout on the connection to safekeeper for WAL streaming.
-    wal_connect_timeout: Duration,
-    /// The timeout to use to determine when the current connection is "stale" and reconnect to the other one.
-    lagging_wal_timeout: Duration,
-    /// The Lsn lag to use to determine when the current connection is lagging to much behind and reconnect to the other one.
-    max_lsn_wal_lag: NonZeroU64,
+    conf: WalReceiverConf,
    /// Current connection to safekeeper for WAL streaming.
    wal_connection: Option<WalConnection>,
    /// Info about retries and unsuccessful attempts to connect to safekeepers.
    wal_connection_retries: HashMap<NodeId, RetryInfo>,
    /// Data about all timelines, available for connection, fetched from storage broker, grouped by their corresponding safekeeper node id.
    wal_stream_candidates: HashMap<NodeId, BrokerSkTimeline>,
-    auth_token: Option<Arc<String>>,
-    availability_zone: Option<String>,
 }

 /// Current connection data.
@@ -375,15 +307,8 @@ struct BrokerSkTimeline {
    latest_update: NaiveDateTime,
 }

-impl WalreceiverState {
-    fn new(
-        timeline: Arc<Timeline>,
-        wal_connect_timeout: Duration,
-        lagging_wal_timeout: Duration,
-        max_lsn_wal_lag: NonZeroU64,
-        auth_token: Option<Arc<String>>,
-        availability_zone: Option<String>,
-    ) -> Self {
+impl ConnectionManagerState {
+    pub(super) fn new(timeline: Arc<Timeline>, conf: WalReceiverConf) -> Self {
        let id = TenantTimelineId {
            tenant_id: timeline.tenant_id,
            timeline_id: timeline.timeline_id,
@@ -391,14 +316,10 @@ impl WalreceiverState {
        Self {
            id,
            timeline,
-            wal_connect_timeout,
-            lagging_wal_timeout,
-            max_lsn_wal_lag,
+            conf,
            wal_connection: None,
            wal_stream_candidates: HashMap::new(),
            wal_connection_retries: HashMap::new(),
-            auth_token,
-            availability_zone,
        }
    }

@@ -407,7 +328,7 @@ impl WalreceiverState {
        self.drop_old_connection(true).await;

        let id = self.id;
-        let connect_timeout = self.wal_connect_timeout;
+        let connect_timeout = self.conf.wal_connect_timeout;
        let timeline = Arc::clone(&self.timeline);
        let ctx = ctx.detached_child(
            TaskKind::WalReceiverConnectionHandler,
@@ -427,7 +348,7 @@ impl WalreceiverState {
                .context("walreceiver connection handling failure")
            }
            .instrument(
-                info_span!("walreceiver_connection", id = %id, node_id = %new_sk.safekeeper_id),
+                info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, node_id = %new_sk.safekeeper_id),
            )
        });

@@ -563,7 +484,7 @@ impl WalreceiverState {
                    (now - existing_wal_connection.status.latest_connection_update).to_std()
                {
                    // Drop connection if we haven't received keepalive message for a while.
-                    if latest_interaciton > self.wal_connect_timeout {
+                    if latest_interaciton > self.conf.wal_connect_timeout {
                        return Some(NewWalConnectionCandidate {
                            safekeeper_id: new_sk_id,
                            wal_source_connconf: new_wal_source_connconf,
@@ -573,7 +494,7 @@ impl WalreceiverState {
                                    existing_wal_connection.status.latest_connection_update,
                                ),
                                check_time: now,
-                                threshold: self.wal_connect_timeout,
+                                threshold: self.conf.wal_connect_timeout,
                            },
                        });
                    }
@@ -589,7 +510,7 @@ impl WalreceiverState {
                    // Check if the new candidate has much more WAL than the current one.
                    match new_commit_lsn.0.checked_sub(current_commit_lsn.0) {
                        Some(new_sk_lsn_advantage) => {
-                            if new_sk_lsn_advantage >= self.max_lsn_wal_lag.get() {
+                            if new_sk_lsn_advantage >= self.conf.max_lsn_wal_lag.get() {
                                return Some(NewWalConnectionCandidate {
                                    safekeeper_id: new_sk_id,
                                    wal_source_connconf: new_wal_source_connconf,
@@ -597,16 +518,16 @@ impl WalreceiverState {
                                    reason: ReconnectReason::LaggingWal {
                                        current_commit_lsn,
                                        new_commit_lsn,
-                                        threshold: self.max_lsn_wal_lag,
+                                        threshold: self.conf.max_lsn_wal_lag,
                                    },
                                });
                            }
                            // If we have a candidate with the same commit_lsn as the current one, which is in the same AZ as pageserver,
                            // and the current one is not, switch to the new one.
-                            if self.availability_zone.is_some()
+                            if self.conf.availability_zone.is_some()
                                && existing_wal_connection.availability_zone
-                                    != self.availability_zone
-                                && self.availability_zone == new_availability_zone
+                                    != self.conf.availability_zone
+                                && self.conf.availability_zone == new_availability_zone
                            {
                                return Some(NewWalConnectionCandidate {
                                    safekeeper_id: new_sk_id,
@@ -677,7 +598,7 @@ impl WalreceiverState {
                if let Some(waiting_for_new_lsn_since) = waiting_for_new_lsn_since {
                    if let Ok(waiting_for_new_wal) = (now - waiting_for_new_lsn_since).to_std() {
                        if candidate_commit_lsn > current_commit_lsn
-                            && waiting_for_new_wal > self.lagging_wal_timeout
+                            && waiting_for_new_wal > self.conf.lagging_wal_timeout
                        {
                            return Some(NewWalConnectionCandidate {
                                safekeeper_id: new_sk_id,
@@ -691,7 +612,7 @@ impl WalreceiverState {
                                        existing_wal_connection.status.latest_wal_update,
                                    ),
                                    check_time: now,
-                                    threshold: self.lagging_wal_timeout,
+                                    threshold: self.conf.lagging_wal_timeout,
                                },
                            });
                        }
@@ -757,11 +678,11 @@ impl WalreceiverState {
                match wal_stream_connection_config(
                    self.id,
                    info.safekeeper_connstr.as_ref(),
-                    match &self.auth_token {
+                    match &self.conf.auth_token {
                        None => None,
                        Some(x) => Some(x),
                    },
-                    self.availability_zone.as_deref(),
+                    self.conf.availability_zone.as_deref(),
                ) {
                    Ok(connstr) => Some((*sk_id, info, connstr)),
                    Err(e) => {
@@ -775,7 +696,7 @@ impl WalreceiverState {
    /// Remove candidates which haven't sent broker updates for a while.
    fn cleanup_old_candidates(&mut self) {
        let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());
-        let lagging_wal_timeout = self.lagging_wal_timeout;
+        let lagging_wal_timeout = self.conf.lagging_wal_timeout;

        self.wal_stream_candidates.retain(|node_id, broker_info| {
            if let Ok(time_since_latest_broker_update) =
@@ -799,7 +720,7 @@ impl WalreceiverState {
        }
    }

-    async fn shutdown(mut self) {
+    pub(super) async fn shutdown(mut self) {
        if let Some(wal_connection) = self.wal_connection.take() {
            wal_connection.connection_task.shutdown().await;
        }
@@ -903,7 +824,7 @@ mod tests {
        let mut state = dummy_state(&harness).await;
        let now = Utc::now().naive_utc();

-        let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
+        let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?;
        let delay_over_threshold = now - lagging_wal_timeout - lagging_wal_timeout;

        state.wal_connection = None;
@@ -914,7 +835,7 @@ mod tests {
            (
                NodeId(3),
                dummy_broker_sk_timeline(
-                    1 + state.max_lsn_wal_lag.get(),
+                    1 + state.conf.max_lsn_wal_lag.get(),
                    "delay_over_threshold",
                    delay_over_threshold,
                ),
@@ -948,7 +869,7 @@ mod tests {
            streaming_lsn: Some(Lsn(current_lsn)),
        };

-        state.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
+        state.conf.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
        state.wal_connection = Some(WalConnection {
            started_at: now,
            sk_id: connected_sk_id,
@@ -966,7 +887,7 @@ mod tests {
            (
                connected_sk_id,
                dummy_broker_sk_timeline(
-                    current_lsn + state.max_lsn_wal_lag.get() * 2,
+                    current_lsn + state.conf.max_lsn_wal_lag.get() * 2,
                    DUMMY_SAFEKEEPER_HOST,
                    now,
                ),
@@ -978,7 +899,7 @@ mod tests {
            (
                NodeId(2),
                dummy_broker_sk_timeline(
-                    current_lsn + state.max_lsn_wal_lag.get() / 2,
+                    current_lsn + state.conf.max_lsn_wal_lag.get() / 2,
                    "not_enough_advanced_lsn",
                    now,
                ),
@@ -1003,7 +924,11 @@ mod tests {
        state.wal_connection = None;
        state.wal_stream_candidates = HashMap::from([(
            NodeId(0),
-            dummy_broker_sk_timeline(1 + state.max_lsn_wal_lag.get(), DUMMY_SAFEKEEPER_HOST, now),
+            dummy_broker_sk_timeline(
+                1 + state.conf.max_lsn_wal_lag.get(),
+                DUMMY_SAFEKEEPER_HOST,
+                now,
+            ),
        )]);

        let only_candidate = state
@@ -1101,7 +1026,7 @@ mod tests {
        let now = Utc::now().naive_utc();

        let connected_sk_id = NodeId(0);
-        let new_lsn = Lsn(current_lsn.0 + state.max_lsn_wal_lag.get() + 1);
+        let new_lsn = Lsn(current_lsn.0 + state.conf.max_lsn_wal_lag.get() + 1);

        let connection_status = WalConnectionStatus {
            is_connected: true,
@@ -1146,7 +1071,7 @@ mod tests {
            ReconnectReason::LaggingWal {
                current_commit_lsn: current_lsn,
                new_commit_lsn: new_lsn,
-                threshold: state.max_lsn_wal_lag
+                threshold: state.conf.max_lsn_wal_lag
            },
            "Should select bigger WAL safekeeper if it starts to lag enough"
        );
@@ -1165,7 +1090,7 @@ mod tests {
        let current_lsn = Lsn(100_000).align();
        let now = Utc::now().naive_utc();

-        let wal_connect_timeout = chrono::Duration::from_std(state.wal_connect_timeout)?;
+        let wal_connect_timeout = chrono::Duration::from_std(state.conf.wal_connect_timeout)?;
        let time_over_threshold =
            Utc::now().naive_utc() - wal_connect_timeout - wal_connect_timeout;

@@ -1208,7 +1133,7 @@ mod tests {
                ..
            } => {
                assert_eq!(last_keep_alive, Some(time_over_threshold));
-                assert_eq!(threshold, state.lagging_wal_timeout);
+                assert_eq!(threshold, state.conf.lagging_wal_timeout);
            }
            unexpected => panic!("Unexpected reason: {unexpected:?}"),
        }
@@ -1228,7 +1153,7 @@ mod tests {
        let new_lsn = Lsn(100_100).align();
        let now = Utc::now().naive_utc();

-        let lagging_wal_timeout = chrono::Duration::from_std(state.lagging_wal_timeout)?;
+        let lagging_wal_timeout = chrono::Duration::from_std(state.conf.lagging_wal_timeout)?;
        let time_over_threshold =
            Utc::now().naive_utc() - lagging_wal_timeout - lagging_wal_timeout;

@@ -1275,7 +1200,7 @@ mod tests {
                assert_eq!(current_commit_lsn, current_lsn);
                assert_eq!(candidate_commit_lsn, new_lsn);
                assert_eq!(last_wal_interaction, Some(time_over_threshold));
-                assert_eq!(threshold, state.lagging_wal_timeout);
+                assert_eq!(threshold, state.conf.lagging_wal_timeout);
            }
            unexpected => panic!("Unexpected reason: {unexpected:?}"),
        }
@@ -1289,27 +1214,29 @@ mod tests {

    const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";

-    async fn dummy_state(harness: &TenantHarness<'_>) -> WalreceiverState {
+    async fn dummy_state(harness: &TenantHarness<'_>) -> ConnectionManagerState {
        let (tenant, ctx) = harness.load().await;
        let timeline = tenant
            .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &ctx)
            .expect("Failed to create an empty timeline for dummy wal connection manager");
        let timeline = timeline.initialize(&ctx).unwrap();

-        WalreceiverState {
+        ConnectionManagerState {
            id: TenantTimelineId {
                tenant_id: harness.tenant_id,
                timeline_id: TIMELINE_ID,
            },
            timeline,
-            wal_connect_timeout: Duration::from_secs(1),
-            lagging_wal_timeout: Duration::from_secs(1),
-            max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
+            conf: WalReceiverConf {
+                wal_connect_timeout: Duration::from_secs(1),
+                lagging_wal_timeout: Duration::from_secs(1),
+                max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
+                auth_token: None,
+                availability_zone: None,
+            },
            wal_connection: None,
            wal_stream_candidates: HashMap::new(),
            wal_connection_retries: HashMap::new(),
-            auth_token: None,
-            availability_zone: None,
        }
    }

@@ -1321,7 +1248,7 @@ mod tests {

        let harness = TenantHarness::create("switch_to_same_availability_zone")?;
        let mut state = dummy_state(&harness).await;
-        state.availability_zone = test_az.clone();
+        state.conf.availability_zone = test_az.clone();
        let current_lsn = Lsn(100_000).align();
        let now = Utc::now().naive_utc();

--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -42,7 +42,7 @@ use utils::lsn::Lsn;

 /// Status of the connection.
 #[derive(Debug, Clone, Copy)]
-pub struct WalConnectionStatus {
+pub(super) struct WalConnectionStatus {
    /// If we were able to initiate a postgres connection, this means that safekeeper process is at least running.
    pub is_connected: bool,
    /// Defines a healthy connection as one on which pageserver received WAL from safekeeper
@@ -60,7 +60,7 @@ pub struct WalConnectionStatus {

 /// Open a connection to the given safekeeper and receive WAL, sending back progress
 /// messages as we go.
-pub async fn handle_walreceiver_connection(
+pub(super) async fn handle_walreceiver_connection(
    timeline: Arc<Timeline>,
    wal_source_connconf: PgConnectionConfig,
    events_sender: watch::Sender<TaskStateUpdate<WalConnectionStatus>>,
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -64,6 +64,7 @@ webpki-roots.workspace = true
 x509-parser.workspace = true

 workspace_hack.workspace = true
+tokio-util.workspace = true

 [dev-dependencies]
 rcgen.workspace = true
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -87,6 +87,20 @@ pub(super) async fn authenticate(
        .dbname(&db_info.dbname)
        .user(&db_info.user);

+    // That is a hack to support new way of accessing compute without using a
+    // NodePort. Now to access compute in cross-k8s setup (console->compute
+    // and link-proxy->compute) we need to connect to the pg_sni_router service
+    // using a TLS. Destination compute address is encoded in domain/SNI.
+    //
+    // However, for link-proxy it is hard add support for outgoing TLS connections
+    // as our trick with stealing stream from tokio-postgres doesn't work with TLS.
+    // So set sni_host option and use unencrupted connection instead. Once we add
+    // encryption support for outgoing connections to the proxy, we can remove
+    // this hack.
+    if db_info.host.contains("cluster.local") {
+        config.options(format!("sni_host={}", db_info.host).as_str());
+    }
+
    if let Some(password) = db_info.password {
        config.password(password.as_ref());
    }
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,8 +1,8 @@
 use crate::{cancellation::CancelClosure, error::UserFacingError};
-use futures::TryFutureExt;
+use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
-use std::{io, net::SocketAddr};
+use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
 use tokio::net::TcpStream;
 use tokio_postgres::NoTls;
@@ -130,9 +130,23 @@ impl ConnCfg {
    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
        use tokio_postgres::config::Host;

+        // wrap TcpStream::connect with timeout
+        let connect_with_timeout = |host, port| {
+            let connection_timeout = Duration::from_millis(10000);
+            tokio::time::timeout(connection_timeout, TcpStream::connect((host, port))).map(
+                move |res| match res {
+                    Ok(tcpstream_connect_res) => tcpstream_connect_res,
+                    Err(_) => Err(io::Error::new(
+                        io::ErrorKind::TimedOut,
+                        format!("exceeded connection timeout {connection_timeout:?}"),
+                    )),
+                },
+            )
+        };
+
        let connect_once = |host, port| {
            info!("trying to connect to compute node at {host}:{port}");
-            TcpStream::connect((host, port)).and_then(|socket| async {
+            connect_with_timeout(host, port).and_then(|socket| async {
                let socket_addr = socket.peer_addr()?;
                // This prevents load balancer from severing the connection.
                socket2::SockRef::from(&socket).set_keepalive(true)?;
@@ -165,7 +179,6 @@ impl ConnCfg {
                Host::Unix(_) => continue, // unix sockets are not welcome here
            };

-            // TODO: maybe we should add a timeout.
            match connect_once(host, *port).await {
                Ok(socket) => return Ok(socket),
                Err(err) => {
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -40,7 +40,7 @@ pub fn configure_tls(
    let mut cert_resolver = CertResolver::new();

    // add default certificate
-    cert_resolver.add_cert(key_path, cert_path)?;
+    cert_resolver.add_cert(key_path, cert_path, true)?;

    // add extra certificates
    if let Some(certs_dir) = certs_dir {
@@ -52,8 +52,11 @@ pub fn configure_tls(
                let key_path = path.join("tls.key");
                let cert_path = path.join("tls.crt");
                if key_path.exists() && cert_path.exists() {
-                    cert_resolver
-                        .add_cert(&key_path.to_string_lossy(), &cert_path.to_string_lossy())?;
+                    cert_resolver.add_cert(
+                        &key_path.to_string_lossy(),
+                        &cert_path.to_string_lossy(),
+                        false,
+                    )?;
                }
            }
        }
@@ -78,16 +81,23 @@ pub fn configure_tls(

 struct CertResolver {
    certs: HashMap<String, Arc<rustls::sign::CertifiedKey>>,
+    default: Option<Arc<rustls::sign::CertifiedKey>>,
 }

 impl CertResolver {
    fn new() -> Self {
        Self {
            certs: HashMap::new(),
+            default: None,
        }
    }

-    fn add_cert(&mut self, key_path: &str, cert_path: &str) -> anyhow::Result<()> {
+    fn add_cert(
+        &mut self,
+        key_path: &str,
+        cert_path: &str,
+        is_default: bool,
+    ) -> anyhow::Result<()> {
        let priv_key = {
            let key_bytes = std::fs::read(key_path).context("TLS key file")?;
            let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
@@ -136,10 +146,13 @@ impl CertResolver {
            "Failed to parse common name from certificate at '{cert_path}'."
        ))?;

-        self.certs.insert(
-            common_name,
-            Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key)),
-        );
+        let cert = Arc::new(rustls::sign::CertifiedKey::new(cert_chain, key));
+
+        if is_default {
+            self.default = Some(cert.clone());
+        }
+
+        self.certs.insert(common_name, cert);

        Ok(())
    }
@@ -172,7 +185,17 @@ impl rustls::server::ResolvesServerCert for CertResolver {
                }
            }
        } else {
-            None
+            // No SNI, use the default certificate, otherwise we can't get to
+            // options parameter which can be used to set endpoint name too.
+            // That means that non-SNI flow will not work for CNAME domains in
+            // verify-full mode.
+            //
+            // If that will be a problem we can:
+            //
+            // a) Instead of multi-cert approach use single cert with extra
+            //    domains listed in Subject Alternative Name (SAN).
+            // b) Deploy separate proxy instances for extra domains.
+            self.default.as_ref().cloned()
        }
    }
 }
--- a/Show More
+++ b/Show More