Merge remote-tracking branch 'origin/main' into dkr/deleted-flag-in-remote-index

2026-07-06 21:50:37 +00:00 · 2023-05-03 18:07:36 +02:00
parent b58bf56670 b114ef26c2
commit de3f23344a
74 changed files with 2880 additions and 839 deletions
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -41,6 +41,14 @@ storage:
          ansible_host: i-051642d372c0a4f32
        pageserver-3.us-west-2.aws.neon.tech:
          ansible_host: i-00c3844beb9ad1c6b
+        pageserver-4.us-west-2.aws.neon.tech:
+          ansible_host: i-013263dd1c239adcc
+        pageserver-5.us-west-2.aws.neon.tech:
+          ansible_host: i-00ca6417c7bf96820
+        pageserver-6.us-west-2.aws.neon.tech:
+          ansible_host: i-01cdf7d2bc1433b6a
+        pageserver-7.us-west-2.aws.neon.tech:
+          ansible_host: i-02eec9b40617db5bc

    safekeepers:
      hosts:
@@ -50,4 +58,15 @@ storage:
          ansible_host: i-074682f9d3c712e7c
        safekeeper-2.us-west-2.aws.neon.tech:
          ansible_host: i-042b7efb1729d7966
-
+        safekeeper-3.us-west-2.aws.neon.tech:
+          ansible_host: i-089f6b9ef426dff76
+        safekeeper-4.us-west-2.aws.neon.tech:
+          ansible_host: i-0fe6bf912c4710c82
+        safekeeper-5.us-west-2.aws.neon.tech:
+          ansible_host: i-0a83c1c46d2b4e409
+        safekeeper-6.us-west-2.aws.neon.tech:
+          ansible_host: i-0fef5317b8fdc9f8d
+        safekeeper-7.us-west-2.aws.neon.tech:
+          ansible_host: i-0be739190d4289bf9
+        safekeeper-8.us-west-2.aws.neon.tech:
+          ansible_host: i-00e851803669e5cfe                    
--- a/.github/ansible/staging.eu-west-1.hosts.yaml
+++ b/.github/ansible/staging.eu-west-1.hosts.yaml
@@ -35,6 +35,8 @@ storage:
      hosts:
        pageserver-0.eu-west-1.aws.neon.build:
          ansible_host: i-01d496c5041c7f34c
+        pageserver-1.eu-west-1.aws.neon.build:
+          ansible_host: i-0e8013e239ce3928c

    safekeepers:
      hosts:
@@ -44,3 +46,15 @@ storage:
          ansible_host: i-06969ee1bf2958bfc
        safekeeper-2.eu-west-1.aws.neon.build:
          ansible_host: i-087892e9625984a0b
+        safekeeper-3.eu-west-1.aws.neon.build:
+          ansible_host: i-0a6f91660e99e8891
+        safekeeper-4.eu-west-1.aws.neon.build:
+          ansible_host: i-0012e309e28e7c249
+        safekeeper-5.eu-west-1.aws.neon.build:
+          ansible_host: i-085a2b1193287b32e
+        safekeeper-6.eu-west-1.aws.neon.build:
+          ansible_host: i-0c713248465ed0fbd
+        safekeeper-7.eu-west-1.aws.neon.build:
+          ansible_host: i-02ad231aed2a80b7a
+        safekeeper-8.eu-west-1.aws.neon.build:
+          ansible_host: i-0dbbd8ffef66efda8
--- a/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
+++ b/.github/helm-values/dev-eu-central-1-alpha.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
+
+settings:
+  domain: "*.snirouter.alpha.eu-central-1.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
+
+settings:
+  domain: "*.snirouter.zeta.eu-west-1.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
+
+settings:
+  domain: "*.snirouter.beta.us-east-2.internal.aws.neon.build"
+  sentryEnvironment: "staging"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
+
+settings:
+  domain: "*.snirouter.epsilon.ap-southeast-1.internal.aws.neon.tech"
+  sentryEnvironment: "production"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
+
+settings:
+  domain: "*.snirouter.gamma.eu-central-1.internal.aws.neon.tech"
+  sentryEnvironment: "production"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-east-1-theta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
+
+settings:
+  domain: "*.snirouter.theta.us-east-1.internal.aws.neon.tech"
+  sentryEnvironment: "production"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
+
+settings:
+  domain: "*.snirouter.delta.us-east-2.internal.aws.neon.tech"
+  sentryEnvironment: "production"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.pg-sni-router.yaml
@@ -0,0 +1,19 @@
+useCertManager: true
+
+replicaCount: 3
+
+exposedService:
+  # exposedService.port -- Exposed Service proxy port
+  port: 4432
+  annotations:
+    external-dns.alpha.kubernetes.io/hostname: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
+
+settings:
+  domain: "*.snirouter.eta.us-west-2.internal.aws.neon.tech"
+  sentryEnvironment: "production"
+
+imagePullSecrets:
+  - name: docker-hub-neon
+
+metrics:
+  enabled: false
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -418,10 +418,7 @@ jobs:
      - uses: actions/github-script@v6
        if: >
          !cancelled() &&
-          github.event_name == 'pull_request' && (
-            steps.create-allure-report-debug.outputs.report-url ||
-            steps.create-allure-report-release.outputs.report-url
-          )
+          github.event_name == 'pull_request'
        with:
          # Retry script for 5XX server errors: https://github.com/actions/github-script#retries
          retries: 5
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -27,6 +27,11 @@ on:
        required: true
        type: boolean
        default: true
+      deployPgSniRouter:
+        description: 'Deploy pg-sni-router'
+        required: true
+        type: boolean
+        default: true

 env:
  AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_DEV }}
@@ -227,3 +232,49 @@ jobs:
  
      - name: Cleanup helm folder
        run: rm -rf ~/.cache
+
+  deploy-pg-sni-router:
+    runs-on: [ self-hosted, gen3, small ]
+    container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:pinned
+    if: inputs.deployPgSniRouter
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: dev-us-east-2-beta
+          - target_region:  eu-west-1
+            target_cluster: dev-eu-west-1-zeta
+          - target_region:  eu-central-1
+            target_cluster: dev-eu-central-1-alpha
+    environment:
+      name: dev-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure AWS Credentials
+        uses: aws-actions/configure-aws-credentials@v1-node16
+        with:
+          role-to-assume: arn:aws:iam::369495373322:role/github-runner
+          aws-region: eu-central-1
+          role-skip-session-tagging: true
+          role-duration-seconds: 1800
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Deploy pg-sni-router
+        run:
+          helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --debug --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
+  
+      - name: Cleanup helm folder
+        run: rm -rf ~/.cache
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -27,6 +27,11 @@ on:
        required: true
        type: boolean
        default: true
+      deployPgSniRouter:
+        description: 'Deploy pg-sni-router'
+        required: true
+        type: boolean
+        default: true
      disclamerAcknowledged:
        description: 'I confirm that there is an emergency and I can not use regular release workflow'
        required: true
@@ -171,3 +176,42 @@ jobs:
      - name: Deploy storage-broker
        run:
          helm upgrade neon-storage-broker-lb neondatabase/neon-storage-broker --namespace neon-storage-broker-lb --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-storage-broker.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 5m0s
+
+  deploy-pg-sni-router:
+    runs-on: prod
+    container: 093970136003.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
+    if: inputs.deployPgSniRouter && inputs.disclamerAcknowledged
+    defaults:
+      run:
+        shell: bash
+    strategy:
+      matrix:
+        include:
+          - target_region:  us-east-2
+            target_cluster: prod-us-east-2-delta
+          - target_region:  us-west-2
+            target_cluster: prod-us-west-2-eta
+          - target_region: eu-central-1
+            target_cluster: prod-eu-central-1-gamma
+          - target_region: ap-southeast-1
+            target_cluster: prod-ap-southeast-1-epsilon
+          - target_region: us-east-1
+            target_cluster: prod-us-east-1-theta
+    environment:
+      name: prod-${{ matrix.target_region }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+          fetch-depth: 0
+          ref: ${{ inputs.branch }}
+  
+      - name: Configure environment
+        run: |
+          helm repo add neondatabase https://neondatabase.github.io/helm-charts
+          aws --region ${{ matrix.target_region }} eks update-kubeconfig --name  ${{ matrix.target_cluster }}
+  
+      - name: Deploy pg-sni-router
+        run:
+          helm upgrade neon-pg-sni-router neondatabase/neon-pg-sni-router --namespace neon-pg-sni-router --create-namespace --install --debug --atomic -f .github/helm-values/${{ matrix.target_cluster }}.pg-sni-router.yaml --set image.tag=${{ inputs.dockerTag }} --set settings.sentryUrl=${{ secrets.SENTRY_URL_BROKER }} --wait --timeout 15m0s
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1574,6 +1574,21 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

+[[package]]
+name = "foreign-types"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
+dependencies = [
+ "foreign-types-shared",
+]
+
+[[package]]
+name = "foreign-types-shared"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.1.0"
@@ -2361,6 +2376,24 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

+[[package]]
+name = "native-tls"
+version = "0.2.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
+dependencies = [
+ "lazy_static",
+ "libc",
+ "log",
+ "openssl",
+ "openssl-probe",
+ "openssl-sys",
+ "schannel",
+ "security-framework",
+ "security-framework-sys",
+ "tempfile",
+]
+
 [[package]]
 name = "nix"
 version = "0.26.2"
@@ -2483,12 +2516,50 @@ version = "11.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

+[[package]]
+name = "openssl"
+version = "0.10.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01b8574602df80f7b85fdfc5392fa884a4e3b3f4f35402c070ab34c3d3f78d56"
+dependencies = [
+ "bitflags",
+ "cfg-if",
+ "foreign-types",
+ "libc",
+ "once_cell",
+ "openssl-macros",
+ "openssl-sys",
+]
+
+[[package]]
+name = "openssl-macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.15",
+]
+
 [[package]]
 name = "openssl-probe"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

+[[package]]
+name = "openssl-sys"
+version = "0.9.87"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e17f59264b2809d77ae94f0e1ebabc434773f370d6ca667bd223ea10e06cc7e"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+ "vcpkg",
+]
+
 [[package]]
 name = "opentelemetry"
 version = "0.18.0"
@@ -2682,6 +2753,7 @@ dependencies = [
 "tenant_size_model",
 "thiserror",
 "tokio",
+ "tokio-io-timeout",
 "tokio-postgres",
 "tokio-tar",
 "tokio-util",
@@ -2816,6 +2888,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"

+[[package]]
+name = "pkg-config"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+
 [[package]]
 name = "plotters"
 version = "0.3.4"
@@ -2847,7 +2925,7 @@ dependencies = [
 [[package]]
 name = "postgres"
 version = "0.19.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -2857,10 +2935,21 @@ dependencies = [
 "tokio-postgres",
 ]

+[[package]]
+name = "postgres-native-tls"
+version = "0.5.0"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
+dependencies = [
+ "native-tls",
+ "tokio",
+ "tokio-native-tls",
+ "tokio-postgres",
+]
+
 [[package]]
 name = "postgres-protocol"
 version = "0.6.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
 dependencies = [
 "base64 0.20.0",
 "byteorder",
@@ -2878,7 +2967,7 @@ dependencies = [
 [[package]]
 name = "postgres-types"
 version = "0.2.4"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
 dependencies = [
 "bytes",
 "fallible-iterator",
@@ -2959,7 +3048,6 @@ dependencies = [
 "pin-project-lite",
 "postgres-protocol",
 "rand",
- "serde",
 "thiserror",
 "tokio",
 "tracing",
@@ -3110,10 +3198,12 @@ dependencies = [
 "itertools",
 "md5",
 "metrics",
+ "native-tls",
 "once_cell",
 "opentelemetry",
 "parking_lot",
 "pin-project-lite",
+ "postgres-native-tls",
 "postgres_backend",
 "pq_proto",
 "prometheus",
@@ -3568,6 +3658,7 @@ dependencies = [
 "const_format",
 "crc32c",
 "fs2",
+ "futures",
 "git-version",
 "hex",
 "humantime",
@@ -3582,7 +3673,9 @@ dependencies = [
 "pq_proto",
 "regex",
 "remote_storage",
+ "reqwest",
 "safekeeper_api",
+ "scopeguard",
 "serde",
 "serde_json",
 "serde_with",
@@ -3869,8 +3962,7 @@ dependencies = [
 [[package]]
 name = "sharded-slab"
 version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
+source = "git+https://github.com/neondatabase/sharded-slab.git?rev=98d16753ab01c61f0a028de44167307a00efea00#98d16753ab01c61f0a028de44167307a00efea00"
 dependencies = [
 "lazy_static",
 ]
@@ -4326,10 +4418,20 @@ dependencies = [
 "syn 2.0.15",
 ]

+[[package]]
+name = "tokio-native-tls"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2"
+dependencies = [
+ "native-tls",
+ "tokio",
+]
+
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
-source = "git+https://github.com/neondatabase/rust-postgres.git?rev=43e6db254a97fdecbce33d8bc0890accfd74495e#43e6db254a97fdecbce33d8bc0890accfd74495e"
+source = "git+https://github.com/neondatabase/rust-postgres.git?rev=0bc41d8503c092b040142214aac3cf7d11d0c19f#0bc41d8503c092b040142214aac3cf7d11d0c19f"
 dependencies = [
 "async-trait",
 "byteorder",
@@ -4871,6 +4973,7 @@ dependencies = [
 "bincode",
 "byteorder",
 "bytes",
+ "chrono",
 "criterion",
 "futures",
 "heapless",
@@ -4882,6 +4985,7 @@ dependencies = [
 "nix",
 "once_cell",
 "pin-project-lite",
+ "pq_proto",
 "rand",
 "regex",
 "routerify",
@@ -4919,6 +5023,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"

+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -5297,13 +5407,11 @@ name = "workspace_hack"
 version = "0.1.0"
 dependencies = [
 "anyhow",
- "byteorder",
 "bytes",
 "chrono",
 "clap 4.2.2",
 "clap_builder",
 "crossbeam-utils",
- "digest",
 "either",
 "fail",
 "futures",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,6 +62,7 @@ jsonwebtoken = "8"
 libc = "0.2"
 md5 = "0.7.0"
 memoffset = "0.8"
+native-tls = "0.2"
 nix = "0.26"
 notify = "5.0.0"
 num_cpus = "1.15"
@@ -124,10 +125,11 @@ env_logger = "0.10"
 log = "0.4"

 ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed
-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
+postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-native-tls = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
 tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }

 ## Other git libraries
@@ -159,10 +161,16 @@ rstest = "0.17"
 tempfile = "3.4"
 tonic-build = "0.9"

+[patch.crates-io]
+
 # This is only needed for proxy's tests.
 # TODO: we should probably fork `tokio-postgres-rustls` instead.
-[patch.crates-io]
-tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
+tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="0bc41d8503c092b040142214aac3cf7d11d0c19f" }
+
+# Changes the MAX_THREADS limit from 4096 to 32768.
+# This is a temporary workaround for using tracing from many threads in safekeepers code,
+# until async safekeepers patch is merged to the main.
+sharded-slab = { git = "https://github.com/neondatabase/sharded-slab.git", rev="98d16753ab01c61f0a028de44167307a00efea00" }

 ################# Binary contents sections

--- a/11
+++ b/11
@@ -44,7 +44,15 @@ COPY --chown=nonroot . .
 # Show build caching stats to check if it was used in the end.
 # Has to be the part of the same RUN since cachepot daemon is killed in the end of this RUN, losing the compilation stats.
 RUN set -e \
-&& mold -run cargo build --bin pageserver --bin pageserver_binutils --bin draw_timeline_dir --bin safekeeper --bin storage_broker --bin proxy --locked --release \
+    && mold -run cargo build  \
+      --bin pg_sni_router  \
+      --bin pageserver  \
+      --bin pageserver_binutils  \
+      --bin draw_timeline_dir \
+      --bin safekeeper  \
+      --bin storage_broker  \
+      --bin proxy  \
+      --locked --release \
    && cachepot -s

 # Build final image
@@ -63,6 +71,7 @@ RUN set -e \
    && useradd -d /data neon \
    && chown -R neon:neon /data

+COPY --from=build --chown=neon:neon /home/nonroot/target/release/pg_sni_router       /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver          /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/pageserver_binutils /usr/local/bin
 COPY --from=build --chown=neon:neon /home/nonroot/target/release/draw_timeline_dir   /usr/local/bin
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -8,7 +8,7 @@
 use anyhow::{anyhow, bail, Context, Result};
 use clap::{value_parser, Arg, ArgAction, ArgMatches, Command};
 use control_plane::endpoint::ComputeControlPlane;
-use control_plane::endpoint::Replication;
+use control_plane::endpoint::ComputeMode;
 use control_plane::local_env::LocalEnv;
 use control_plane::pageserver::PageServerNode;
 use control_plane::safekeeper::SafekeeperNode;
@@ -481,7 +481,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                timeline_id,
                None,
                pg_version,
-                Replication::Primary,
+                ComputeMode::Primary,
            )?;
            println!("Done");
        }
@@ -568,8 +568,8 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .iter()
                .filter(|(_, endpoint)| endpoint.tenant_id == tenant_id)
            {
-                let lsn_str = match endpoint.replication {
-                    Replication::Static(lsn) => {
+                let lsn_str = match endpoint.mode {
+                    ComputeMode::Static(lsn) => {
                        // -> read-only endpoint
                        // Use the node's LSN.
                        lsn.to_string()
@@ -632,21 +632,14 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .copied()
                .unwrap_or(false);

-            let replication = match (lsn, hot_standby) {
-                (Some(lsn), false) => Replication::Static(lsn),
-                (None, true) => Replication::Replica,
-                (None, false) => Replication::Primary,
+            let mode = match (lsn, hot_standby) {
+                (Some(lsn), false) => ComputeMode::Static(lsn),
+                (None, true) => ComputeMode::Replica,
+                (None, false) => ComputeMode::Primary,
                (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
            };

-            cplane.new_endpoint(
-                tenant_id,
-                &endpoint_id,
-                timeline_id,
-                port,
-                pg_version,
-                replication,
-            )?;
+            cplane.new_endpoint(tenant_id, &endpoint_id, timeline_id, port, pg_version, mode)?;
        }
        "start" => {
            let port: Option<u16> = sub_args.get_one::<u16>("port").copied();
@@ -670,11 +663,11 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                .unwrap_or(false);

            if let Some(endpoint) = endpoint {
-                match (&endpoint.replication, hot_standby) {
-                    (Replication::Static(_), true) => {
+                match (&endpoint.mode, hot_standby) {
+                    (ComputeMode::Static(_), true) => {
                        bail!("Cannot start a node in hot standby mode when it is already configured as a static replica")
                    }
-                    (Replication::Primary, true) => {
+                    (ComputeMode::Primary, true) => {
                        bail!("Cannot start a node as a hot standby replica, it is already configured as primary node")
                    }
                    _ => {}
@@ -701,10 +694,10 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                    .copied()
                    .context("Failed to `pg-version` from the argument string")?;

-                let replication = match (lsn, hot_standby) {
-                    (Some(lsn), false) => Replication::Static(lsn),
-                    (None, true) => Replication::Replica,
-                    (None, false) => Replication::Primary,
+                let mode = match (lsn, hot_standby) {
+                    (Some(lsn), false) => ComputeMode::Static(lsn),
+                    (None, true) => ComputeMode::Replica,
+                    (None, false) => ComputeMode::Primary,
                    (Some(_), true) => anyhow::bail!("cannot specify both lsn and hot-standby"),
                };

@@ -721,7 +714,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                    timeline_id,
                    port,
                    pg_version,
-                    replication,
+                    mode,
                )?;
                ep.start(&auth_token)?;
            }
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -11,15 +11,31 @@ use std::sync::Arc;
 use std::time::Duration;

 use anyhow::{Context, Result};
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
 use utils::{
    id::{TenantId, TimelineId},
    lsn::Lsn,
 };

-use crate::local_env::{LocalEnv, DEFAULT_PG_VERSION};
+use crate::local_env::LocalEnv;
 use crate::pageserver::PageServerNode;
 use crate::postgresql_conf::PostgresConf;

+// contents of a endpoint.json file
+#[serde_as]
+#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
+pub struct EndpointConf {
+    name: String,
+    #[serde_as(as = "DisplayFromStr")]
+    tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    timeline_id: TimelineId,
+    mode: ComputeMode,
+    port: u16,
+    pg_version: u32,
+}
+
 //
 // ComputeControlPlane
 //
@@ -70,7 +86,7 @@ impl ComputeControlPlane {
        timeline_id: TimelineId,
        port: Option<u16>,
        pg_version: u32,
-        replication: Replication,
+        mode: ComputeMode,
    ) -> Result<Arc<Endpoint>> {
        let port = port.unwrap_or_else(|| self.get_port());

@@ -80,12 +96,22 @@ impl ComputeControlPlane {
            env: self.env.clone(),
            pageserver: Arc::clone(&self.pageserver),
            timeline_id,
-            replication,
+            mode,
            tenant_id,
            pg_version,
        });
-
        ep.create_pgdata()?;
+        std::fs::write(
+            ep.endpoint_path().join("endpoint.json"),
+            serde_json::to_string_pretty(&EndpointConf {
+                name: name.to_string(),
+                tenant_id,
+                timeline_id,
+                mode,
+                port,
+                pg_version,
+            })?,
+        )?;
        ep.setup_pg_conf()?;

        self.endpoints.insert(ep.name.clone(), Arc::clone(&ep));
@@ -96,12 +122,13 @@ impl ComputeControlPlane {

 ///////////////////////////////////////////////////////////////////////////////

-#[derive(Debug, Clone, Eq, PartialEq)]
-pub enum Replication {
+#[serde_as]
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, Eq, PartialEq)]
+pub enum ComputeMode {
    // Regular read-write node
    Primary,
    // if recovery_target_lsn is provided, and we want to pin the node to a specific LSN
-    Static(Lsn),
+    Static(#[serde_as(as = "DisplayFromStr")] Lsn),
    // Hot standby; read-only replica.
    // Future versions may want to distinguish between replicas with hot standby
    // feedback and other kinds of replication configurations.
@@ -115,7 +142,7 @@ pub struct Endpoint {
    pub tenant_id: TenantId,
    pub timeline_id: TimelineId,
    // Some(lsn) if this is a read-only endpoint anchored at 'lsn'. None for the primary.
-    pub replication: Replication,
+    pub mode: ComputeMode,

    // port and address of the Postgres server
    pub address: SocketAddr,
@@ -144,50 +171,20 @@ impl Endpoint {
        let fname = entry.file_name();
        let name = fname.to_str().unwrap().to_string();

-        // Read config file into memory
-        let cfg_path = entry.path().join("pgdata").join("postgresql.conf");
-        let cfg_path_str = cfg_path.to_string_lossy();
-        let mut conf_file = File::open(&cfg_path)
-            .with_context(|| format!("failed to open config file in {}", cfg_path_str))?;
-        let conf = PostgresConf::read(&mut conf_file)
-            .with_context(|| format!("failed to read config file in {}", cfg_path_str))?;
-
-        // Read a few options from the config file
-        let context = format!("in config file {}", cfg_path_str);
-        let port: u16 = conf.parse_field("port", &context)?;
-        let timeline_id: TimelineId = conf.parse_field("neon.timeline_id", &context)?;
-        let tenant_id: TenantId = conf.parse_field("neon.tenant_id", &context)?;
-
-        // Read postgres version from PG_VERSION file to determine which postgres version binary to use.
-        // If it doesn't exist, assume broken data directory and use default pg version.
-        let pg_version_path = entry.path().join("PG_VERSION");
-
-        let pg_version_str =
-            fs::read_to_string(pg_version_path).unwrap_or_else(|_| DEFAULT_PG_VERSION.to_string());
-        let pg_version = u32::from_str(&pg_version_str)?;
-
-        // parse recovery_target_lsn and primary_conninfo into Recovery Target, if any
-        let replication = if let Some(lsn_str) = conf.get("recovery_target_lsn") {
-            Replication::Static(Lsn::from_str(lsn_str)?)
-        } else if let Some(slot_name) = conf.get("primary_slot_name") {
-            let slot_name = slot_name.to_string();
-            let prefix = format!("repl_{}_", timeline_id);
-            assert!(slot_name.starts_with(&prefix));
-            Replication::Replica
-        } else {
-            Replication::Primary
-        };
+        // Read the endpoint.json file
+        let conf: EndpointConf =
+            serde_json::from_slice(&std::fs::read(entry.path().join("endpoint.json"))?)?;

        // ok now
        Ok(Endpoint {
-            address: SocketAddr::new("127.0.0.1".parse().unwrap(), port),
+            address: SocketAddr::new("127.0.0.1".parse().unwrap(), conf.port),
            name,
            env: env.clone(),
            pageserver: Arc::clone(pageserver),
-            timeline_id,
-            replication,
-            tenant_id,
-            pg_version,
+            timeline_id: conf.timeline_id,
+            mode: conf.mode,
+            tenant_id: conf.tenant_id,
+            pg_version: conf.pg_version,
        })
    }

@@ -323,8 +320,8 @@ impl Endpoint {

        conf.append_line("");
        // Replication-related configurations, such as WAL sending
-        match &self.replication {
-            Replication::Primary => {
+        match &self.mode {
+            ComputeMode::Primary => {
                // Configure backpressure
                // - Replication write lag depends on how fast the walreceiver can process incoming WAL.
                //   This lag determines latency of get_page_at_lsn. Speed of applying WAL is about 10MB/sec,
@@ -366,10 +363,10 @@ impl Endpoint {
                    conf.append("synchronous_standby_names", "pageserver");
                }
            }
-            Replication::Static(lsn) => {
+            ComputeMode::Static(lsn) => {
                conf.append("recovery_target_lsn", &lsn.to_string());
            }
-            Replication::Replica => {
+            ComputeMode::Replica => {
                assert!(!self.env.safekeepers.is_empty());

                // TODO: use future host field from safekeeper spec
@@ -409,8 +406,8 @@ impl Endpoint {
    }

    fn load_basebackup(&self, auth_token: &Option<String>) -> Result<()> {
-        let backup_lsn = match &self.replication {
-            Replication::Primary => {
+        let backup_lsn = match &self.mode {
+            ComputeMode::Primary => {
                if !self.env.safekeepers.is_empty() {
                    // LSN 0 means that it is bootstrap and we need to download just
                    // latest data from the pageserver. That is a bit clumsy but whole bootstrap
@@ -426,8 +423,8 @@ impl Endpoint {
                    None
                }
            }
-            Replication::Static(lsn) => Some(*lsn),
-            Replication::Replica => {
+            ComputeMode::Static(lsn) => Some(*lsn),
+            ComputeMode::Replica => {
                None // Take the latest snapshot available to start with
            }
        };
@@ -526,7 +523,7 @@ impl Endpoint {
        // 3. Load basebackup
        self.load_basebackup(auth_token)?;

-        if self.replication != Replication::Primary {
+        if self.mode != ComputeMode::Primary {
            File::create(self.pgdata().join("standby.signal"))?;
        }

--- a/libs/postgres_backend/src/lib.rs
+++ b/libs/postgres_backend/src/lib.rs
@@ -50,11 +50,14 @@ impl QueryError {
    }
 }

+/// Returns true if the given error is a normal consequence of a network issue,
+/// or the client closing the connection. These errors can happen during normal
+/// operations, and don't indicate a bug in our code.
 pub fn is_expected_io_error(e: &io::Error) -> bool {
    use io::ErrorKind::*;
    matches!(
        e.kind(),
-        ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
+        BrokenPipe | ConnectionRefused | ConnectionAborted | ConnectionReset | TimedOut
    )
 }

--- a/libs/postgres_ffi/wal_craft/src/lib.rs
+++ b/libs/postgres_ffi/wal_craft/src/lib.rs
@@ -1,15 +1,13 @@
-use anyhow::*;
-use core::time::Duration;
+use anyhow::{bail, ensure};
 use log::*;
 use postgres::types::PgLsn;
 use postgres::Client;
 use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ};
 use postgres_ffi::{XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD};
 use std::cmp::Ordering;
-use std::fs;
 use std::path::{Path, PathBuf};
-use std::process::{Command, Stdio};
-use std::time::Instant;
+use std::process::Command;
+use std::time::{Duration, Instant};
 use tempfile::{tempdir, TempDir};

 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -56,7 +54,7 @@ impl Conf {
        self.datadir.join("pg_wal")
    }

-    fn new_pg_command(&self, command: impl AsRef<Path>) -> Result<Command> {
+    fn new_pg_command(&self, command: impl AsRef<Path>) -> anyhow::Result<Command> {
        let path = self.pg_bin_dir()?.join(command);
        ensure!(path.exists(), "Command {:?} does not exist", path);
        let mut cmd = Command::new(path);
@@ -66,7 +64,7 @@ impl Conf {
        Ok(cmd)
    }

-    pub fn initdb(&self) -> Result<()> {
+    pub fn initdb(&self) -> anyhow::Result<()> {
        if let Some(parent) = self.datadir.parent() {
            info!("Pre-creating parent directory {:?}", parent);
            // Tests may be run concurrently and there may be a race to create `test_output/`.
@@ -80,7 +78,7 @@ impl Conf {
        let output = self
            .new_pg_command("initdb")?
            .arg("-D")
-            .arg(self.datadir.as_os_str())
+            .arg(&self.datadir)
            .args(["-U", "postgres", "--no-instructions", "--no-sync"])
            .output()?;
        debug!("initdb output: {:?}", output);
@@ -93,26 +91,18 @@ impl Conf {
        Ok(())
    }

-    pub fn start_server(&self) -> Result<PostgresServer> {
+    pub fn start_server(&self) -> anyhow::Result<PostgresServer> {
        info!("Starting Postgres server in {:?}", self.datadir);
-        let log_file = fs::File::create(self.datadir.join("pg.log")).with_context(|| {
-            format!(
-                "Failed to create pg.log file in directory {}",
-                self.datadir.display()
-            )
-        })?;
        let unix_socket_dir = tempdir()?; // We need a directory with a short name for Unix socket (up to 108 symbols)
        let unix_socket_dir_path = unix_socket_dir.path().to_owned();
        let server_process = self
            .new_pg_command("postgres")?
            .args(["-c", "listen_addresses="])
            .arg("-k")
-            .arg(unix_socket_dir_path.as_os_str())
+            .arg(&unix_socket_dir_path)
            .arg("-D")
-            .arg(self.datadir.as_os_str())
-            .args(["-c", "logging_collector=on"]) // stderr will mess up with tests output
+            .arg(&self.datadir)
            .args(REQUIRED_POSTGRES_CONFIG.iter().flat_map(|cfg| ["-c", cfg]))
-            .stderr(Stdio::from(log_file))
            .spawn()?;
        let server = PostgresServer {
            process: server_process,
@@ -121,7 +111,7 @@ impl Conf {
                let mut c = postgres::Config::new();
                c.host_path(&unix_socket_dir_path);
                c.user("postgres");
-                c.connect_timeout(Duration::from_millis(1000));
+                c.connect_timeout(Duration::from_millis(10000));
                c
            },
        };
@@ -132,7 +122,7 @@ impl Conf {
        &self,
        first_segment_name: &str,
        last_segment_name: &str,
-    ) -> Result<std::process::Output> {
+    ) -> anyhow::Result<std::process::Output> {
        let first_segment_file = self.datadir.join(first_segment_name);
        let last_segment_file = self.datadir.join(last_segment_name);
        info!(
@@ -142,10 +132,7 @@ impl Conf {
        );
        let output = self
            .new_pg_command("pg_waldump")?
-            .args([
-                &first_segment_file.as_os_str(),
-                &last_segment_file.as_os_str(),
-            ])
+            .args([&first_segment_file, &last_segment_file])
            .output()?;
        debug!("waldump output: {:?}", output);
        Ok(output)
@@ -153,10 +140,9 @@ impl Conf {
 }

 impl PostgresServer {
-    pub fn connect_with_timeout(&self) -> Result<Client> {
+    pub fn connect_with_timeout(&self) -> anyhow::Result<Client> {
        let retry_until = Instant::now() + *self.client_config.get_connect_timeout().unwrap();
        while Instant::now() < retry_until {
-            use std::result::Result::Ok;
            if let Ok(client) = self.client_config.connect(postgres::NoTls) {
                return Ok(client);
            }
@@ -173,7 +159,6 @@ impl PostgresServer {

 impl Drop for PostgresServer {
    fn drop(&mut self) {
-        use std::result::Result::Ok;
        match self.process.try_wait() {
            Ok(Some(_)) => return,
            Ok(None) => {
@@ -188,12 +173,12 @@ impl Drop for PostgresServer {
 }

 pub trait PostgresClientExt: postgres::GenericClient {
-    fn pg_current_wal_insert_lsn(&mut self) -> Result<PgLsn> {
+    fn pg_current_wal_insert_lsn(&mut self) -> anyhow::Result<PgLsn> {
        Ok(self
            .query_one("SELECT pg_current_wal_insert_lsn()", &[])?
            .get(0))
    }
-    fn pg_current_wal_flush_lsn(&mut self) -> Result<PgLsn> {
+    fn pg_current_wal_flush_lsn(&mut self) -> anyhow::Result<PgLsn> {
        Ok(self
            .query_one("SELECT pg_current_wal_flush_lsn()", &[])?
            .get(0))
@@ -202,7 +187,7 @@ pub trait PostgresClientExt: postgres::GenericClient {

 impl<C: postgres::GenericClient> PostgresClientExt for C {}

-pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> Result<()> {
+pub fn ensure_server_config(client: &mut impl postgres::GenericClient) -> anyhow::Result<()> {
    client.execute("create extension if not exists neon_test_utils", &[])?;

    let wal_keep_size: String = client.query_one("SHOW wal_keep_size", &[])?.get(0);
@@ -236,13 +221,13 @@ pub trait Crafter {
    /// * A vector of some valid "interesting" intermediate LSNs which one may start reading from.
    ///   May include or exclude Lsn(0) and the end-of-wal.
    /// * The expected end-of-wal LSN.
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)>;
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)>;
 }

 fn craft_internal<C: postgres::GenericClient>(
    client: &mut C,
-    f: impl Fn(&mut C, PgLsn) -> Result<(Vec<PgLsn>, Option<PgLsn>)>,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
+    f: impl Fn(&mut C, PgLsn) -> anyhow::Result<(Vec<PgLsn>, Option<PgLsn>)>,
+) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
    ensure_server_config(client)?;

    let initial_lsn = client.pg_current_wal_insert_lsn()?;
@@ -274,7 +259,7 @@ fn craft_internal<C: postgres::GenericClient>(
 pub struct Simple;
 impl Crafter for Simple {
    const NAME: &'static str = "simple";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        craft_internal(client, |client, _| {
            client.execute("CREATE table t(x int)", &[])?;
            Ok((Vec::new(), None))
@@ -285,7 +270,7 @@ impl Crafter for Simple {
 pub struct LastWalRecordXlogSwitch;
 impl Crafter for LastWalRecordXlogSwitch {
    const NAME: &'static str = "last_wal_record_xlog_switch";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -307,7 +292,7 @@ impl Crafter for LastWalRecordXlogSwitch {
 pub struct LastWalRecordXlogSwitchEndsOnPageBoundary;
 impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
    const NAME: &'static str = "last_wal_record_xlog_switch_ends_on_page_boundary";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        // Do not use generate_internal because here we end up with flush_lsn exactly on
        // the segment boundary and insert_lsn after the initial page header, which is unusual.
        ensure_server_config(client)?;
@@ -374,7 +359,7 @@ impl Crafter for LastWalRecordXlogSwitchEndsOnPageBoundary {
 fn craft_single_logical_message(
    client: &mut impl postgres::GenericClient,
    transactional: bool,
-) -> Result<(Vec<PgLsn>, PgLsn)> {
+) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
    craft_internal(client, |client, initial_lsn| {
        ensure!(
            initial_lsn < PgLsn::from(0x0200_0000 - 1024 * 1024),
@@ -416,7 +401,7 @@ fn craft_single_logical_message(
 pub struct WalRecordCrossingSegmentFollowedBySmallOne;
 impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
    const NAME: &'static str = "wal_record_crossing_segment_followed_by_small_one";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        craft_single_logical_message(client, true)
    }
 }
@@ -424,7 +409,7 @@ impl Crafter for WalRecordCrossingSegmentFollowedBySmallOne {
 pub struct LastWalRecordCrossingSegment;
 impl Crafter for LastWalRecordCrossingSegment {
    const NAME: &'static str = "last_wal_record_crossing_segment";
-    fn craft(client: &mut impl postgres::GenericClient) -> Result<(Vec<PgLsn>, PgLsn)> {
+    fn craft(client: &mut impl postgres::GenericClient) -> anyhow::Result<(Vec<PgLsn>, PgLsn)> {
        craft_single_logical_message(client, false)
    }
 }
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -10,7 +10,6 @@ byteorder.workspace = true
 pin-project-lite.workspace = true
 postgres-protocol.workspace = true
 rand.workspace = true
-serde.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 thiserror.workspace = true
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -6,15 +6,10 @@ pub mod framed;

 use byteorder::{BigEndian, ReadBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
-use postgres_protocol::PG_EPOCH;
-use serde::{Deserialize, Serialize};
-use std::{
-    borrow::Cow,
-    collections::HashMap,
-    fmt, io, str,
-    time::{Duration, SystemTime},
-};
-use tracing::{trace, warn};
+use std::{borrow::Cow, collections::HashMap, fmt, io, str};
+
+// re-export for use in utils pageserver_feedback.rs
+pub use postgres_protocol::PG_EPOCH;

 pub type Oid = u32;
 pub type SystemId = u64;
@@ -664,7 +659,7 @@ fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), ProtocolErr
 }

 /// Read cstring from buf, advancing it.
-fn read_cstr(buf: &mut Bytes) -> Result<Bytes, ProtocolError> {
+pub fn read_cstr(buf: &mut Bytes) -> Result<Bytes, ProtocolError> {
    let pos = buf
        .iter()
        .position(|x| *x == 0)
@@ -939,175 +934,10 @@ impl<'a> BeMessage<'a> {
    }
 }

-/// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
-/// Serialized in custom flexible key/value format. In replication protocol, it
-/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
-/// Standby status update / Hot standby feedback messages.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct PageserverFeedback {
-    /// Last known size of the timeline. Used to enforce timeline size limit.
-    pub current_timeline_size: u64,
-    /// LSN last received and ingested by the pageserver.
-    pub last_received_lsn: u64,
-    /// LSN up to which data is persisted by the pageserver to its local disc.
-    pub disk_consistent_lsn: u64,
-    /// LSN up to which data is persisted by the pageserver on s3; safekeepers
-    /// consider WAL before it can be removed.
-    pub remote_consistent_lsn: u64,
-    pub replytime: SystemTime,
-}
-
-// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback.
-// Do not remove previously available fields because this might be backwards incompatible.
-pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5;
-
-impl PageserverFeedback {
-    pub fn empty() -> PageserverFeedback {
-        PageserverFeedback {
-            current_timeline_size: 0,
-            last_received_lsn: 0,
-            remote_consistent_lsn: 0,
-            disk_consistent_lsn: 0,
-            replytime: SystemTime::now(),
-        }
-    }
-
-    // Serialize PageserverFeedback using custom format
-    // to support protocol extensibility.
-    //
-    // Following layout is used:
-    // char - number of key-value pairs that follow.
-    //
-    // key-value pairs:
-    // null-terminated string - key,
-    // uint32 - value length in bytes
-    // value itself
-    //
-    // TODO: change serialized fields names once all computes migrate to rename.
-    pub fn serialize(&self, buf: &mut BytesMut) {
-        buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys
-        buf.put_slice(b"current_timeline_size\0");
-        buf.put_i32(8);
-        buf.put_u64(self.current_timeline_size);
-
-        buf.put_slice(b"ps_writelsn\0");
-        buf.put_i32(8);
-        buf.put_u64(self.last_received_lsn);
-        buf.put_slice(b"ps_flushlsn\0");
-        buf.put_i32(8);
-        buf.put_u64(self.disk_consistent_lsn);
-        buf.put_slice(b"ps_applylsn\0");
-        buf.put_i32(8);
-        buf.put_u64(self.remote_consistent_lsn);
-
-        let timestamp = self
-            .replytime
-            .duration_since(*PG_EPOCH)
-            .expect("failed to serialize pg_replytime earlier than PG_EPOCH")
-            .as_micros() as i64;
-
-        buf.put_slice(b"ps_replytime\0");
-        buf.put_i32(8);
-        buf.put_i64(timestamp);
-    }
-
-    // Deserialize PageserverFeedback message
-    // TODO: change serialized fields names once all computes migrate to rename.
-    pub fn parse(mut buf: Bytes) -> PageserverFeedback {
-        let mut rf = PageserverFeedback::empty();
-        let nfields = buf.get_u8();
-        for _ in 0..nfields {
-            let key = read_cstr(&mut buf).unwrap();
-            match key.as_ref() {
-                b"current_timeline_size" => {
-                    let len = buf.get_i32();
-                    assert_eq!(len, 8);
-                    rf.current_timeline_size = buf.get_u64();
-                }
-                b"ps_writelsn" => {
-                    let len = buf.get_i32();
-                    assert_eq!(len, 8);
-                    rf.last_received_lsn = buf.get_u64();
-                }
-                b"ps_flushlsn" => {
-                    let len = buf.get_i32();
-                    assert_eq!(len, 8);
-                    rf.disk_consistent_lsn = buf.get_u64();
-                }
-                b"ps_applylsn" => {
-                    let len = buf.get_i32();
-                    assert_eq!(len, 8);
-                    rf.remote_consistent_lsn = buf.get_u64();
-                }
-                b"ps_replytime" => {
-                    let len = buf.get_i32();
-                    assert_eq!(len, 8);
-                    let raw_time = buf.get_i64();
-                    if raw_time > 0 {
-                        rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64);
-                    } else {
-                        rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64);
-                    }
-                }
-                _ => {
-                    let len = buf.get_i32();
-                    warn!(
-                        "PageserverFeedback parse. unknown key {} of len {len}. Skip it.",
-                        String::from_utf8_lossy(key.as_ref())
-                    );
-                    buf.advance(len as usize);
-                }
-            }
-        }
-        trace!("PageserverFeedback parsed is {:?}", rf);
-        rf
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;

-    #[test]
-    fn test_replication_feedback_serialization() {
-        let mut rf = PageserverFeedback::empty();
-        // Fill rf with some values
-        rf.current_timeline_size = 12345678;
-        // Set rounded time to be able to compare it with deserialized value,
-        // because it is rounded up to microseconds during serialization.
-        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
-        let mut data = BytesMut::new();
-        rf.serialize(&mut data);
-
-        let rf_parsed = PageserverFeedback::parse(data.freeze());
-        assert_eq!(rf, rf_parsed);
-    }
-
-    #[test]
-    fn test_replication_feedback_unknown_key() {
-        let mut rf = PageserverFeedback::empty();
-        // Fill rf with some values
-        rf.current_timeline_size = 12345678;
-        // Set rounded time to be able to compare it with deserialized value,
-        // because it is rounded up to microseconds during serialization.
-        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
-        let mut data = BytesMut::new();
-        rf.serialize(&mut data);
-
-        // Add an extra field to the buffer and adjust number of keys
-        if let Some(first) = data.first_mut() {
-            *first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1;
-        }
-
-        data.put_slice(b"new_field_one\0");
-        data.put_i32(8);
-        data.put_u64(42);
-
-        // Parse serialized data and check that new field is not parsed
-        let rf_parsed = PageserverFeedback::parse(data.freeze());
-        assert_eq!(rf, rf_parsed);
-    }
-
    #[test]
    fn test_startup_message_params_options_escaped() {
        fn split_options(params: &StartupMessageParams) -> Vec<Cow<'_, str>> {
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -11,6 +11,7 @@ async-trait.workspace = true
 anyhow.workspace = true
 bincode.workspace = true
 bytes.workspace = true
+chrono.workspace = true
 heapless.workspace = true
 hex = { workspace = true, features = ["serde"] }
 hyper = { workspace = true, features = ["full"] }
@@ -36,6 +37,7 @@ strum_macros.workspace = true
 url.workspace = true
 uuid.workspace = true

+pq_proto.workspace = true
 metrics.workspace = true
 workspace_hack.workspace = true

--- a/libs/utils/src/http/endpoint.rs
+++ b/libs/utils/src/http/endpoint.rs
@@ -131,7 +131,9 @@ impl RequestCancelled {

 impl Drop for RequestCancelled {
    fn drop(&mut self) {
-        if let Some(span) = self.warn.take() {
+        if std::thread::panicking() {
+            // we are unwinding due to panicking, assume we are not dropped for cancellation
+        } else if let Some(span) = self.warn.take() {
            // the span has all of the info already, but the outer `.instrument(span)` has already
            // been dropped, so we need to manually re-enter it for this message.
            //
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -1,9 +1,7 @@
-use std::fmt::Display;
-
 use anyhow::Context;
 use bytes::Buf;
 use hyper::{header, Body, Request, Response, StatusCode};
-use serde::{Deserialize, Serialize, Serializer};
+use serde::{Deserialize, Serialize};

 use super::error::ApiError;

@@ -33,12 +31,3 @@ pub fn json_response<T: Serialize>(
        .map_err(|e| ApiError::InternalServerError(e.into()))?;
    Ok(response)
 }
-
-/// Serialize through Display trait.
-pub fn display_serialize<S, F>(z: &F, s: S) -> Result<S::Ok, S::Error>
-where
-    S: Serializer,
-    F: Display,
-{
-    s.serialize_str(&format!("{}", z))
-}
--- a/libs/utils/src/id.rs
+++ b/libs/utils/src/id.rs
@@ -265,6 +265,26 @@ impl fmt::Display for TenantTimelineId {
    }
 }

+impl FromStr for TenantTimelineId {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let mut parts = s.split('/');
+        let tenant_id = parts
+            .next()
+            .ok_or_else(|| anyhow::anyhow!("TenantTimelineId must contain tenant_id"))?
+            .parse()?;
+        let timeline_id = parts
+            .next()
+            .ok_or_else(|| anyhow::anyhow!("TenantTimelineId must contain timeline_id"))?
+            .parse()?;
+        if parts.next().is_some() {
+            anyhow::bail!("TenantTimelineId must contain only tenant_id and timeline_id");
+        }
+        Ok(TenantTimelineId::new(tenant_id, timeline_id))
+    }
+}
+
 // Unique ID of a storage node (safekeeper or pageserver). Supposed to be issued
 // by the console.
 #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash, Debug, Serialize, Deserialize)]
--- a/libs/utils/src/lib.rs
+++ b/libs/utils/src/lib.rs
@@ -54,6 +54,8 @@ pub mod measured_stream;
 pub mod serde_percent;
 pub mod serde_regex;

+pub mod pageserver_feedback;
+
 pub mod tracing_span_assert;

 /// use with fail::cfg("$name", "return(2000)")
--- a/libs/utils/src/pageserver_feedback.rs
+++ b/libs/utils/src/pageserver_feedback.rs
@@ -0,0 +1,214 @@
+use std::time::{Duration, SystemTime};
+
+use bytes::{Buf, BufMut, Bytes, BytesMut};
+use pq_proto::{read_cstr, PG_EPOCH};
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
+use tracing::{trace, warn};
+
+use crate::lsn::Lsn;
+
+/// Feedback pageserver sends to safekeeper and safekeeper resends to compute.
+/// Serialized in custom flexible key/value format. In replication protocol, it
+/// is marked with NEON_STATUS_UPDATE_TAG_BYTE to differentiate from postgres
+/// Standby status update / Hot standby feedback messages.
+///
+/// serde Serialize is used only for human readable dump to json (e.g. in
+/// safekeepers debug_dump).
+#[serde_as]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub struct PageserverFeedback {
+    /// Last known size of the timeline. Used to enforce timeline size limit.
+    pub current_timeline_size: u64,
+    /// LSN last received and ingested by the pageserver. Controls backpressure.
+    #[serde_as(as = "DisplayFromStr")]
+    pub last_received_lsn: Lsn,
+    /// LSN up to which data is persisted by the pageserver to its local disc.
+    /// Controls backpressure.
+    #[serde_as(as = "DisplayFromStr")]
+    pub disk_consistent_lsn: Lsn,
+    /// LSN up to which data is persisted by the pageserver on s3; safekeepers
+    /// consider WAL before it can be removed.
+    #[serde_as(as = "DisplayFromStr")]
+    pub remote_consistent_lsn: Lsn,
+    // Serialize with RFC3339 format.
+    #[serde(with = "serde_systemtime")]
+    pub replytime: SystemTime,
+}
+
+// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback.
+// Do not remove previously available fields because this might be backwards incompatible.
+pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5;
+
+impl PageserverFeedback {
+    pub fn empty() -> PageserverFeedback {
+        PageserverFeedback {
+            current_timeline_size: 0,
+            last_received_lsn: Lsn::INVALID,
+            remote_consistent_lsn: Lsn::INVALID,
+            disk_consistent_lsn: Lsn::INVALID,
+            replytime: *PG_EPOCH,
+        }
+    }
+
+    // Serialize PageserverFeedback using custom format
+    // to support protocol extensibility.
+    //
+    // Following layout is used:
+    // char - number of key-value pairs that follow.
+    //
+    // key-value pairs:
+    // null-terminated string - key,
+    // uint32 - value length in bytes
+    // value itself
+    //
+    // TODO: change serialized fields names once all computes migrate to rename.
+    pub fn serialize(&self, buf: &mut BytesMut) {
+        buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys
+        buf.put_slice(b"current_timeline_size\0");
+        buf.put_i32(8);
+        buf.put_u64(self.current_timeline_size);
+
+        buf.put_slice(b"ps_writelsn\0");
+        buf.put_i32(8);
+        buf.put_u64(self.last_received_lsn.0);
+        buf.put_slice(b"ps_flushlsn\0");
+        buf.put_i32(8);
+        buf.put_u64(self.disk_consistent_lsn.0);
+        buf.put_slice(b"ps_applylsn\0");
+        buf.put_i32(8);
+        buf.put_u64(self.remote_consistent_lsn.0);
+
+        let timestamp = self
+            .replytime
+            .duration_since(*PG_EPOCH)
+            .expect("failed to serialize pg_replytime earlier than PG_EPOCH")
+            .as_micros() as i64;
+
+        buf.put_slice(b"ps_replytime\0");
+        buf.put_i32(8);
+        buf.put_i64(timestamp);
+    }
+
+    // Deserialize PageserverFeedback message
+    // TODO: change serialized fields names once all computes migrate to rename.
+    pub fn parse(mut buf: Bytes) -> PageserverFeedback {
+        let mut rf = PageserverFeedback::empty();
+        let nfields = buf.get_u8();
+        for _ in 0..nfields {
+            let key = read_cstr(&mut buf).unwrap();
+            match key.as_ref() {
+                b"current_timeline_size" => {
+                    let len = buf.get_i32();
+                    assert_eq!(len, 8);
+                    rf.current_timeline_size = buf.get_u64();
+                }
+                b"ps_writelsn" => {
+                    let len = buf.get_i32();
+                    assert_eq!(len, 8);
+                    rf.last_received_lsn = Lsn(buf.get_u64());
+                }
+                b"ps_flushlsn" => {
+                    let len = buf.get_i32();
+                    assert_eq!(len, 8);
+                    rf.disk_consistent_lsn = Lsn(buf.get_u64());
+                }
+                b"ps_applylsn" => {
+                    let len = buf.get_i32();
+                    assert_eq!(len, 8);
+                    rf.remote_consistent_lsn = Lsn(buf.get_u64());
+                }
+                b"ps_replytime" => {
+                    let len = buf.get_i32();
+                    assert_eq!(len, 8);
+                    let raw_time = buf.get_i64();
+                    if raw_time > 0 {
+                        rf.replytime = *PG_EPOCH + Duration::from_micros(raw_time as u64);
+                    } else {
+                        rf.replytime = *PG_EPOCH - Duration::from_micros(-raw_time as u64);
+                    }
+                }
+                _ => {
+                    let len = buf.get_i32();
+                    warn!(
+                        "PageserverFeedback parse. unknown key {} of len {len}. Skip it.",
+                        String::from_utf8_lossy(key.as_ref())
+                    );
+                    buf.advance(len as usize);
+                }
+            }
+        }
+        trace!("PageserverFeedback parsed is {:?}", rf);
+        rf
+    }
+}
+
+mod serde_systemtime {
+    use std::time::SystemTime;
+
+    use chrono::{DateTime, Utc};
+    use serde::{Deserialize, Deserializer, Serializer};
+
+    pub fn serialize<S>(ts: &SystemTime, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let chrono_dt: DateTime<Utc> = (*ts).into();
+        serializer.serialize_str(&chrono_dt.to_rfc3339())
+    }
+
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<SystemTime, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let time: String = Deserialize::deserialize(deserializer)?;
+        Ok(DateTime::parse_from_rfc3339(&time)
+            .map_err(serde::de::Error::custom)?
+            .into())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_replication_feedback_serialization() {
+        let mut rf = PageserverFeedback::empty();
+        // Fill rf with some values
+        rf.current_timeline_size = 12345678;
+        // Set rounded time to be able to compare it with deserialized value,
+        // because it is rounded up to microseconds during serialization.
+        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
+        let mut data = BytesMut::new();
+        rf.serialize(&mut data);
+
+        let rf_parsed = PageserverFeedback::parse(data.freeze());
+        assert_eq!(rf, rf_parsed);
+    }
+
+    #[test]
+    fn test_replication_feedback_unknown_key() {
+        let mut rf = PageserverFeedback::empty();
+        // Fill rf with some values
+        rf.current_timeline_size = 12345678;
+        // Set rounded time to be able to compare it with deserialized value,
+        // because it is rounded up to microseconds during serialization.
+        rf.replytime = *PG_EPOCH + Duration::from_secs(100_000_000);
+        let mut data = BytesMut::new();
+        rf.serialize(&mut data);
+
+        // Add an extra field to the buffer and adjust number of keys
+        if let Some(first) = data.first_mut() {
+            *first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1;
+        }
+
+        data.put_slice(b"new_field_one\0");
+        data.put_i32(8);
+        data.put_u64(42);
+
+        // Parse serialized data and check that new field is not parsed
+        let rf_parsed = PageserverFeedback::parse(data.freeze());
+        assert_eq!(rf, rf_parsed);
+    }
+}
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -52,6 +52,7 @@ sync_wrapper.workspace = true
 tokio-tar.workspace = true
 thiserror.workspace = true
 tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time"] }
+tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
--- a/pageserver/benches/bench_layer_map.rs
+++ b/pageserver/benches/bench_layer_map.rs
@@ -33,7 +33,7 @@ fn build_layer_map(filename_dump: PathBuf) -> LayerMap<LayerDescriptor> {
        min_lsn = min(min_lsn, lsn_range.start);
        max_lsn = max(max_lsn, Lsn(lsn_range.end.0 - 1));

-        updates.insert_historic(Arc::new(layer)).unwrap();
+        updates.insert_historic(Arc::new(layer));
    }

    println!("min: {min_lsn}, max: {max_lsn}");
@@ -215,7 +215,7 @@ fn bench_sequential(c: &mut Criterion) {
            is_incremental: false,
            short_id: format!("Layer {}", i),
        };
-        updates.insert_historic(Arc::new(layer)).unwrap();
+        updates.insert_historic(Arc::new(layer));
    }
    updates.flush();
    println!("Finished layer map init in {:?}", now.elapsed());
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -1,9 +1,9 @@
 use metrics::core::{AtomicU64, GenericCounter};
 use metrics::{
    register_counter_vec, register_histogram, register_histogram_vec, register_int_counter,
-    register_int_counter_vec, register_int_gauge_vec, register_uint_gauge_vec, Counter, CounterVec,
-    Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge,
-    UIntGaugeVec,
+    register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge_vec,
+    Counter, CounterVec, Histogram, HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec,
+    UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::models::TenantState;
@@ -287,14 +287,33 @@ impl EvictionsWithLowResidenceDuration {
        let Some(_counter) = self.counter.take() else {
            return;
        };
-        EVICTIONS_WITH_LOW_RESIDENCE_DURATION
-            .remove_label_values(&[
-                tenant_id,
-                timeline_id,
-                self.data_source,
-                &Self::threshold_label_value(self.threshold),
-            ])
-            .expect("we own the metric, no-one else should remove it");
+
+        let threshold = Self::threshold_label_value(self.threshold);
+
+        let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
+            tenant_id,
+            timeline_id,
+            self.data_source,
+            &threshold,
+        ]);
+
+        match removed {
+            Err(e) => {
+                // this has been hit in staging as
+                // <https://neondatabase.sentry.io/issues/4142396994/>, but we don't know how.
+                // because we can be in the drop path already, don't risk:
+                // - "double-panic => illegal instruction" or
+                // - future "drop panick => abort"
+                //
+                // so just nag: (the error has the labels)
+                tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}");
+            }
+            Ok(()) => {
+                // to help identify cases where we double-remove the same values, let's log all
+                // deletions?
+                tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source);
+            }
+        }
    }
 }

@@ -459,6 +478,56 @@ pub static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
    .expect("Failed to register tenant_task_events metric")
 });

+// walreceiver metrics
+
+pub static WALRECEIVER_STARTED_CONNECTIONS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_walreceiver_started_connections_total",
+        "Number of started walreceiver connections"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_ACTIVE_MANAGERS: Lazy<IntGauge> = Lazy::new(|| {
+    register_int_gauge!(
+        "pageserver_walreceiver_active_managers",
+        "Number of active walreceiver managers"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_SWITCHES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_walreceiver_switches_total",
+        "Number of walreceiver manager change_connection calls",
+        &["reason"]
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_BROKER_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "pageserver_walreceiver_broker_updates_total",
+        "Number of received broker updates in walreceiver"
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_CANDIDATES_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "pageserver_walreceiver_candidates_events_total",
+        "Number of walreceiver candidate events",
+        &["event"]
+    )
+    .expect("failed to define a metric")
+});
+
+pub static WALRECEIVER_CANDIDATES_ADDED: Lazy<IntCounter> =
+    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["add"]));
+
+pub static WALRECEIVER_CANDIDATES_REMOVED: Lazy<IntCounter> =
+    Lazy::new(|| WALRECEIVER_CANDIDATES_EVENTS.with_label_values(&["remove"]));
+
 // Metrics collected on WAL redo operations
 //
 // We collect the time spent in actual WAL redo ('redo'), and time waiting
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -250,6 +250,15 @@ async fn page_service_conn_main(

    let peer_addr = socket.peer_addr().context("get peer address")?;

+    // setup read timeout of 10 minutes. the timeout is rather arbitrary for requirements:
+    // - long enough for most valid compute connections
+    // - less than infinite to stop us from "leaking" connections to long-gone computes
+    //
+    // no write timeout is used, because the kernel is assumed to error writes after some time.
+    let mut socket = tokio_io_timeout::TimeoutReader::new(socket);
+    socket.set_timeout(Some(std::time::Duration::from_secs(60 * 10)));
+    let socket = std::pin::pin!(socket);
+
    // XXX: pgbackend.run() should take the connection_ctx,
    // and create a child per-query context when it invokes process_query.
    // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler
@@ -343,7 +352,7 @@ impl PageServerHandler {
        tenant_id: TenantId,
        timeline_id: TimelineId,
        ctx: RequestContext,
-    ) -> anyhow::Result<()>
+    ) -> Result<(), QueryError>
    where
        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
    {
@@ -389,7 +398,9 @@ impl PageServerHandler {
                Some(FeMessage::CopyData(bytes)) => bytes,
                Some(FeMessage::Terminate) => break,
                Some(m) => {
-                    anyhow::bail!("unexpected message: {m:?} during COPY");
+                    return Err(QueryError::Other(anyhow::anyhow!(
+                        "unexpected message: {m:?} during COPY"
+                    )));
                }
                None => break, // client disconnected
            };
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -272,10 +272,7 @@ impl UninitializedTimeline<'_> {
            .await
            .context("Failed to flush after basebackup import")?;

-        // Initialize without loading the layer map. We started with an empty layer map, and already
-        // updated it for the layers that we created during the import.
-        let mut timelines = self.owning_tenant.timelines.lock().unwrap();
-        self.initialize_with_lock(ctx, &mut timelines, false, true)
+        self.initialize(ctx)
    }

    fn raw_timeline(&self) -> anyhow::Result<&Arc<Timeline>> {
@@ -2404,8 +2401,6 @@ impl Tenant {
                )
            })?;

-        // Initialize the timeline without loading the layer map, because we already updated the layer
-        // map above, when we imported the datadir.
        let timeline = {
            let mut timelines = self.timelines.lock().unwrap();
            raw_timeline.initialize_with_lock(ctx, &mut timelines, false, true)?
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -51,7 +51,7 @@ use crate::keyspace::KeyPartitioning;
 use crate::repository::Key;
 use crate::tenant::storage_layer::InMemoryLayer;
 use crate::tenant::storage_layer::Layer;
-use anyhow::{bail, Result};
+use anyhow::Result;
 use std::collections::VecDeque;
 use std::ops::Range;
 use std::sync::Arc;
@@ -125,7 +125,7 @@ where
    ///
    /// Insert an on-disk layer.
    ///
-    pub fn insert_historic(&mut self, layer: Arc<L>) -> anyhow::Result<()> {
+    pub fn insert_historic(&mut self, layer: Arc<L>) {
        self.layer_map.insert_historic_noflush(layer)
    }

@@ -273,21 +273,16 @@ where
    ///
    /// Helper function for BatchedUpdates::insert_historic
    ///
-    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) -> anyhow::Result<()> {
-        let key = historic_layer_coverage::LayerKey::from(&*layer);
-        if self.historic.contains(&key) {
-            bail!(
-                "Attempt to insert duplicate layer {} in layer map",
-                layer.short_id()
-            );
-        }
-        self.historic.insert(key, Arc::clone(&layer));
+    pub(self) fn insert_historic_noflush(&mut self, layer: Arc<L>) {
+        // TODO: See #3869, resulting #4088, attempted fix and repro #4094
+        self.historic.insert(
+            historic_layer_coverage::LayerKey::from(&*layer),
+            Arc::clone(&layer),
+        );

        if Self::is_l0(&layer) {
            self.l0_delta_layers.push(layer);
        }
-
-        Ok(())
    }

    ///
@@ -839,7 +834,7 @@ mod tests {

            let expected_in_counts = (1, usize::from(expected_l0));

-            map.batch_update().insert_historic(remote.clone()).unwrap();
+            map.batch_update().insert_historic(remote.clone());
            assert_eq!(count_layer_in(&map, &remote), expected_in_counts);

            let replaced = map
--- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
+++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs
@@ -417,14 +417,6 @@ impl<Value: Clone> BufferedHistoricLayerCoverage<Value> {
        }
    }

-    pub fn contains(&self, layer_key: &LayerKey) -> bool {
-        match self.buffer.get(layer_key) {
-            Some(None) => false,                         // layer remove was buffered
-            Some(_) => true,                             // layer insert was buffered
-            None => self.layers.contains_key(layer_key), // no buffered ops for this layer
-        }
-    }
-
    pub fn insert(&mut self, layer_key: LayerKey, value: Value) {
        self.buffer.insert(layer_key, Some(value));
    }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -588,15 +588,25 @@ impl Timeline {

        let _timer = self.metrics.wait_lsn_time_histo.start_timer();

-        self.last_record_lsn.wait_for_timeout(lsn, self.conf.wait_lsn_timeout).await
-            .with_context(||
-                format!(
-                    "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}",
-                    lsn, self.get_last_record_lsn(), self.get_disk_consistent_lsn()
-                )
-            )?;
-
-        Ok(())
+        match self
+            .last_record_lsn
+            .wait_for_timeout(lsn, self.conf.wait_lsn_timeout)
+            .await
+        {
+            Ok(()) => Ok(()),
+            seqwait_error => {
+                drop(_timer);
+                let walreceiver_status = self.walreceiver.status().await;
+                seqwait_error.with_context(|| format!(
+                    "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, {}",
+                    lsn,
+                    self.get_last_record_lsn(),
+                    self.get_disk_consistent_lsn(),
+                    walreceiver_status.map(|status| status.to_human_readable_string())
+                            .unwrap_or_else(|| "WalReceiver status: Not active".to_string()),
+                ))
+            }
+        }
    }

    /// Check that it is valid to request operations with that lsn.
@@ -1484,7 +1494,7 @@ impl Timeline {

                trace!("found layer {}", layer.path().display());
                total_physical_size += file_size;
-                updates.insert_historic(Arc::new(layer))?;
+                updates.insert_historic(Arc::new(layer));
                num_layers += 1;
            } else if let Some(deltafilename) = DeltaFileName::parse_str(&fname) {
                // Create a DeltaLayer struct for each delta file.
@@ -1516,7 +1526,7 @@ impl Timeline {

                trace!("found layer {}", layer.path().display());
                total_physical_size += file_size;
-                updates.insert_historic(Arc::new(layer))?;
+                updates.insert_historic(Arc::new(layer));
                num_layers += 1;
            } else if fname == METADATA_FILE_NAME || fname.ends_with(".old") {
                // ignore these
@@ -1590,7 +1600,7 @@ impl Timeline {
            // remote index file?
            // If so, rename_to_backup those files & replace their local layer with
            // a RemoteLayer in the layer map so that we re-download them on-demand.
-            if let Some(local_layer) = &local_layer {
+            if let Some(local_layer) = local_layer {
                let local_layer_path = local_layer
                    .local_path()
                    .expect("caller must ensure that local_layers only contains local layers");
@@ -1615,6 +1625,7 @@ impl Timeline {
                        anyhow::bail!("could not rename file {local_layer_path:?}: {err:?}");
                    } else {
                        self.metrics.resident_physical_size_gauge.sub(local_size);
+                        updates.remove_historic(local_layer);
                        // fall-through to adding the remote layer
                    }
                } else {
@@ -1650,11 +1661,7 @@ impl Timeline {
                    );
                    let remote_layer = Arc::new(remote_layer);

-                    if let Some(local_layer) = &local_layer {
-                        updates.replace_historic(local_layer, remote_layer)?;
-                    } else {
-                        updates.insert_historic(remote_layer)?;
-                    }
+                    updates.insert_historic(remote_layer);
                }
                LayerFileName::Delta(deltafilename) => {
                    // Create a RemoteLayer for the delta file.
@@ -1678,11 +1685,7 @@ impl Timeline {
                        LayerAccessStats::for_loading_layer(LayerResidenceStatus::Evicted),
                    );
                    let remote_layer = Arc::new(remote_layer);
-                    if let Some(local_layer) = &local_layer {
-                        updates.replace_historic(local_layer, remote_layer)?;
-                    } else {
-                        updates.insert_historic(remote_layer)?;
-                    }
+                    updates.insert_historic(remote_layer);
                }
            }
        }
@@ -2730,7 +2733,7 @@ impl Timeline {
            .write()
            .unwrap()
            .batch_update()
-            .insert_historic(Arc::new(new_delta))?;
+            .insert_historic(Arc::new(new_delta));

        // update the timeline's physical size
        let sz = new_delta_path.metadata()?.len();
@@ -2935,7 +2938,7 @@ impl Timeline {
            self.metrics
                .resident_physical_size_gauge
                .add(metadata.len());
-            updates.insert_historic(Arc::new(l))?;
+            updates.insert_historic(Arc::new(l));
        }
        updates.flush();
        drop(layers);
@@ -3368,7 +3371,7 @@ impl Timeline {

            new_layer_paths.insert(new_delta_path, LayerFileMetadata::new(metadata.len()));
            let x: Arc<dyn PersistentLayer + 'static> = Arc::new(l);
-            updates.insert_historic(x)?;
+            updates.insert_historic(x);
        }

        // Now that we have reshuffled the data to set of new delta layers, we can
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -38,12 +38,14 @@ use std::sync::{Arc, Weak};
 use std::time::Duration;
 use storage_broker::BrokerClientChannel;
 use tokio::select;
-use tokio::sync::watch;
+use tokio::sync::{watch, RwLock};
 use tokio_util::sync::CancellationToken;
 use tracing::*;

 use utils::id::TenantTimelineId;

+use self::connection_manager::ConnectionManagerStatus;
+
 use super::Timeline;

 #[derive(Clone)]
@@ -63,6 +65,7 @@ pub struct WalReceiver {
    timeline_ref: Weak<Timeline>,
    conf: WalReceiverConf,
    started: AtomicBool,
+    manager_status: Arc<RwLock<Option<ConnectionManagerStatus>>>,
 }

 impl WalReceiver {
@@ -76,6 +79,7 @@ impl WalReceiver {
            timeline_ref,
            conf,
            started: AtomicBool::new(false),
+            manager_status: Arc::new(RwLock::new(None)),
        }
    }

@@ -96,8 +100,8 @@ impl WalReceiver {
        let timeline_id = timeline.timeline_id;
        let walreceiver_ctx =
            ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
-
        let wal_receiver_conf = self.conf.clone();
+        let loop_status = Arc::clone(&self.manager_status);
        task_mgr::spawn(
            WALRECEIVER_RUNTIME.handle(),
            TaskKind::WalReceiverManager,
@@ -115,24 +119,28 @@ impl WalReceiver {
                    select! {
                        _ = task_mgr::shutdown_watcher() => {
                            info!("WAL receiver shutdown requested, shutting down");
-                            connection_manager_state.shutdown().await;
-                            return Ok(());
+                            break;
                        },
                        loop_step_result = connection_manager_loop_step(
                            &mut broker_client,
                            &mut connection_manager_state,
                            &walreceiver_ctx,
+                            &loop_status,
                        ) => match loop_step_result {
                            ControlFlow::Continue(()) => continue,
                            ControlFlow::Break(()) => {
                                info!("Connection manager loop ended, shutting down");
-                                connection_manager_state.shutdown().await;
-                                return Ok(());
+                                break;
                            }
                        },
                    }
                }
-            }.instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id))
+
+                connection_manager_state.shutdown().await;
+                *loop_status.write().await = None;
+                Ok(())
+            }
+            .instrument(info_span!(parent: None, "wal_connection_manager", tenant = %tenant_id, timeline = %timeline_id))
        );

        self.started.store(true, atomic::Ordering::Release);
@@ -149,6 +157,10 @@ impl WalReceiver {
        .await;
        self.started.store(false, atomic::Ordering::Release);
    }
+
+    pub(super) async fn status(&self) -> Option<ConnectionManagerStatus> {
+        self.manager_status.read().await.clone()
+    }
 }

 /// A handle of an asynchronous task.
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -13,6 +13,10 @@ use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, ti

 use super::{TaskStateUpdate, WalReceiverConf};
 use crate::context::{DownloadBehavior, RequestContext};
+use crate::metrics::{
+    WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED,
+    WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES,
+};
 use crate::task_mgr::TaskKind;
 use crate::tenant::Timeline;
 use anyhow::Context;
@@ -24,6 +28,7 @@ use storage_broker::proto::SubscribeSafekeeperInfoRequest;
 use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
 use storage_broker::BrokerClientChannel;
 use storage_broker::Streaming;
+use tokio::sync::RwLock;
 use tokio::{select, sync::watch};
 use tracing::*;

@@ -43,6 +48,7 @@ pub(super) async fn connection_manager_loop_step(
    broker_client: &mut BrokerClientChannel,
    connection_manager_state: &mut ConnectionManagerState,
    ctx: &RequestContext,
+    manager_status: &RwLock<Option<ConnectionManagerStatus>>,
 ) -> ControlFlow<(), ()> {
    let mut timeline_state_updates = connection_manager_state
        .timeline
@@ -56,6 +62,11 @@ pub(super) async fn connection_manager_loop_step(
        }
    }

+    WALRECEIVER_ACTIVE_MANAGERS.inc();
+    scopeguard::defer! {
+        WALRECEIVER_ACTIVE_MANAGERS.dec();
+    }
+
    let id = TenantTimelineId {
        tenant_id: connection_manager_state.timeline.tenant_id,
        timeline_id: connection_manager_state.timeline.timeline_id,
@@ -180,6 +191,7 @@ pub(super) async fn connection_manager_loop_step(
                .change_connection(new_candidate, ctx)
                .await
        }
+        *manager_status.write().await = Some(connection_manager_state.manager_status());
    }
 }

@@ -267,6 +279,78 @@ pub(super) struct ConnectionManagerState {
    wal_stream_candidates: HashMap<NodeId, BrokerSkTimeline>,
 }

+/// An information about connection manager's current connection and connection candidates.
+#[derive(Debug, Clone)]
+pub struct ConnectionManagerStatus {
+    existing_connection: Option<WalConnectionStatus>,
+    wal_stream_candidates: HashMap<NodeId, BrokerSkTimeline>,
+}
+
+impl ConnectionManagerStatus {
+    /// Generates a string, describing current connection status in a form, suitable for logging.
+    pub fn to_human_readable_string(&self) -> String {
+        let mut resulting_string = "WalReceiver status".to_string();
+        match &self.existing_connection {
+            Some(connection) => {
+                if connection.has_processed_wal {
+                    resulting_string.push_str(&format!(
+                        " (update {}): streaming WAL from node {}, ",
+                        connection.latest_wal_update.format("%Y-%m-%d %H:%M:%S"),
+                        connection.node,
+                    ));
+
+                    match (connection.streaming_lsn, connection.commit_lsn) {
+                        (None, None) => resulting_string.push_str("no streaming data"),
+                        (None, Some(commit_lsn)) => {
+                            resulting_string.push_str(&format!("commit Lsn: {commit_lsn}"))
+                        }
+                        (Some(streaming_lsn), None) => {
+                            resulting_string.push_str(&format!("streaming Lsn: {streaming_lsn}"))
+                        }
+                        (Some(streaming_lsn), Some(commit_lsn)) => resulting_string.push_str(
+                            &format!("commit|streaming Lsn: {commit_lsn}|{streaming_lsn}"),
+                        ),
+                    }
+                } else if connection.is_connected {
+                    resulting_string.push_str(&format!(
+                        " (update {}): connecting to node {}",
+                        connection
+                            .latest_connection_update
+                            .format("%Y-%m-%d %H:%M:%S"),
+                        connection.node,
+                    ));
+                } else {
+                    resulting_string.push_str(&format!(
+                        " (update {}): initializing node {} connection",
+                        connection
+                            .latest_connection_update
+                            .format("%Y-%m-%d %H:%M:%S"),
+                        connection.node,
+                    ));
+                }
+            }
+            None => resulting_string.push_str(": disconnected"),
+        }
+
+        resulting_string.push_str(", safekeeper candidates (id|update_time|commit_lsn): [");
+        let mut candidates = self.wal_stream_candidates.iter().peekable();
+        while let Some((node_id, candidate_info)) = candidates.next() {
+            resulting_string.push_str(&format!(
+                "({}|{}|{})",
+                node_id,
+                candidate_info.latest_update.format("%H:%M:%S"),
+                Lsn(candidate_info.timeline.commit_lsn)
+            ));
+            if candidates.peek().is_some() {
+                resulting_string.push_str(", ");
+            }
+        }
+        resulting_string.push(']');
+
+        resulting_string
+    }
+}
+
 /// Current connection data.
 #[derive(Debug)]
 struct WalConnection {
@@ -293,14 +377,14 @@ struct NewCommittedWAL {
    discovered_at: NaiveDateTime,
 }

-#[derive(Debug)]
+#[derive(Debug, Clone, Copy)]
 struct RetryInfo {
    next_retry_at: Option<NaiveDateTime>,
    retry_duration_seconds: f64,
 }

 /// Data about the timeline to connect to, received from the broker.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 struct BrokerSkTimeline {
    timeline: SafekeeperTimelineInfo,
    /// Time at which the data was fetched from the broker last time, to track the stale data.
@@ -325,9 +409,14 @@ impl ConnectionManagerState {

    /// Shuts down the current connection (if any) and immediately starts another one with the given connection string.
    async fn change_connection(&mut self, new_sk: NewWalConnectionCandidate, ctx: &RequestContext) {
+        WALRECEIVER_SWITCHES
+            .with_label_values(&[new_sk.reason.name()])
+            .inc();
+
        self.drop_old_connection(true).await;

        let id = self.id;
+        let node_id = new_sk.safekeeper_id;
        let connect_timeout = self.conf.wal_connect_timeout;
        let timeline = Arc::clone(&self.timeline);
        let ctx = ctx.detached_child(
@@ -343,12 +432,13 @@ impl ConnectionManagerState {
                    cancellation,
                    connect_timeout,
                    ctx,
+                    node_id,
                )
                .await
                .context("walreceiver connection handling failure")
            }
            .instrument(
-                info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, node_id = %new_sk.safekeeper_id),
+                info_span!("walreceiver_connection", tenant_id = %id.tenant_id, timeline_id = %id.timeline_id, %node_id),
            )
        });

@@ -364,6 +454,7 @@ impl ConnectionManagerState {
                latest_wal_update: now,
                streaming_lsn: None,
                commit_lsn: None,
+                node: node_id,
            },
            connection_task: connection_handle,
            discovered_new_wal: None,
@@ -437,6 +528,8 @@ impl ConnectionManagerState {

    /// Adds another broker timeline into the state, if its more recent than the one already added there for the same key.
    fn register_timeline_update(&mut self, timeline_update: SafekeeperTimelineInfo) {
+        WALRECEIVER_BROKER_UPDATES.inc();
+
        let new_safekeeper_id = NodeId(timeline_update.safekeeper_id);
        let old_entry = self.wal_stream_candidates.insert(
            new_safekeeper_id,
@@ -448,6 +541,7 @@ impl ConnectionManagerState {

        if old_entry.is_none() {
            info!("New SK node was added: {new_safekeeper_id}");
+            WALRECEIVER_CANDIDATES_ADDED.inc();
        }
    }

@@ -716,6 +810,7 @@ impl ConnectionManagerState {
            for node_id in node_ids_to_remove {
                info!("Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections");
                self.wal_connection_retries.remove(&node_id);
+                WALRECEIVER_CANDIDATES_REMOVED.inc();
            }
        }
    }
@@ -725,6 +820,13 @@ impl ConnectionManagerState {
            wal_connection.connection_task.shutdown().await;
        }
    }
+
+    fn manager_status(&self) -> ConnectionManagerStatus {
+        ConnectionManagerStatus {
+            existing_connection: self.wal_connection.as_ref().map(|conn| conn.status),
+            wal_stream_candidates: self.wal_stream_candidates.clone(),
+        }
+    }
 }

 #[derive(Debug)]
@@ -732,8 +834,6 @@ struct NewWalConnectionCandidate {
    safekeeper_id: NodeId,
    wal_source_connconf: PgConnectionConfig,
    availability_zone: Option<String>,
-    // This field is used in `derive(Debug)` only.
-    #[allow(dead_code)]
    reason: ReconnectReason,
 }

@@ -762,6 +862,18 @@ enum ReconnectReason {
    },
 }

+impl ReconnectReason {
+    fn name(&self) -> &str {
+        match self {
+            ReconnectReason::NoExistingConnection => "NoExistingConnection",
+            ReconnectReason::LaggingWal { .. } => "LaggingWal",
+            ReconnectReason::SwitchAvailabilityZone => "SwitchAvailabilityZone",
+            ReconnectReason::NoWalTimeout { .. } => "NoWalTimeout",
+            ReconnectReason::NoKeepAlives { .. } => "NoKeepAlives",
+        }
+    }
+}
+
 fn wal_stream_connection_config(
    TenantTimelineId {
        tenant_id,
@@ -867,6 +979,7 @@ mod tests {
            latest_wal_update: now,
            commit_lsn: Some(Lsn(current_lsn)),
            streaming_lsn: Some(Lsn(current_lsn)),
+            node: NodeId(1),
        };

        state.conf.max_lsn_wal_lag = NonZeroU64::new(100).unwrap();
@@ -1035,6 +1148,7 @@ mod tests {
            latest_wal_update: now,
            commit_lsn: Some(current_lsn),
            streaming_lsn: Some(current_lsn),
+            node: connected_sk_id,
        };

        state.wal_connection = Some(WalConnection {
@@ -1101,6 +1215,7 @@ mod tests {
            latest_wal_update: time_over_threshold,
            commit_lsn: Some(current_lsn),
            streaming_lsn: Some(current_lsn),
+            node: NodeId(1),
        };

        state.wal_connection = Some(WalConnection {
@@ -1164,6 +1279,7 @@ mod tests {
            latest_wal_update: time_over_threshold,
            commit_lsn: Some(current_lsn),
            streaming_lsn: Some(current_lsn),
+            node: NodeId(1),
        };

        state.wal_connection = Some(WalConnection {
@@ -1261,6 +1377,7 @@ mod tests {
            latest_wal_update: now,
            commit_lsn: Some(current_lsn),
            streaming_lsn: Some(current_lsn),
+            node: connected_sk_id,
        };

        state.wal_connection = Some(WalConnection {
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -24,8 +24,8 @@ use tokio_util::sync::CancellationToken;
 use tracing::{debug, error, info, trace, warn};

 use super::TaskStateUpdate;
-use crate::context::RequestContext;
 use crate::metrics::LIVE_CONNECTIONS_COUNT;
+use crate::{context::RequestContext, metrics::WALRECEIVER_STARTED_CONNECTIONS};
 use crate::{
    task_mgr,
    task_mgr::TaskKind,
@@ -37,8 +37,8 @@ use crate::{
 use postgres_backend::is_expected_io_error;
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::waldecoder::WalStreamDecoder;
-use pq_proto::PageserverFeedback;
-use utils::lsn::Lsn;
+use utils::pageserver_feedback::PageserverFeedback;
+use utils::{id::NodeId, lsn::Lsn};

 /// Status of the connection.
 #[derive(Debug, Clone, Copy)]
@@ -56,6 +56,8 @@ pub(super) struct WalConnectionStatus {
    pub streaming_lsn: Option<Lsn>,
    /// Latest commit_lsn received from the safekeeper. Can be zero if no message has been received yet.
    pub commit_lsn: Option<Lsn>,
+    /// The node it is connected to
+    pub node: NodeId,
 }

 /// Open a connection to the given safekeeper and receive WAL, sending back progress
@@ -67,7 +69,10 @@ pub(super) async fn handle_walreceiver_connection(
    cancellation: CancellationToken,
    connect_timeout: Duration,
    ctx: RequestContext,
+    node: NodeId,
 ) -> anyhow::Result<()> {
+    WALRECEIVER_STARTED_CONNECTIONS.inc();
+
    // Connect to the database in replication mode.
    info!("connecting to {wal_source_connconf:?}");

@@ -100,6 +105,7 @@ pub(super) async fn handle_walreceiver_connection(
        latest_wal_update: Utc::now().naive_utc(),
        streaming_lsn: None,
        commit_lsn: None,
+        node,
    };
    if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) {
        warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}");
@@ -122,7 +128,7 @@ pub(super) async fn handle_walreceiver_connection(
        false,
        async move {
            select! {
-                connection_result = connection => match connection_result{
+                connection_result = connection => match connection_result {
                    Ok(()) => info!("Walreceiver db connection closed"),
                    Err(connection_error) => {
                        if let Err(e) = ignore_expected_errors(connection_error) {
@@ -319,12 +325,12 @@ pub(super) async fn handle_walreceiver_connection(
                timeline.get_remote_consistent_lsn().unwrap_or(Lsn(0));

            // The last LSN we processed. It is not guaranteed to survive pageserver crash.
-            let last_received_lsn = u64::from(last_lsn);
+            let last_received_lsn = last_lsn;
            // `disk_consistent_lsn` is the LSN at which page server guarantees local persistence of all received data
-            let disk_consistent_lsn = u64::from(timeline.get_disk_consistent_lsn());
+            let disk_consistent_lsn = timeline.get_disk_consistent_lsn();
            // The last LSN that is synced to remote storage and is guaranteed to survive pageserver crash
            // Used by safekeepers to remove WAL preceding `remote_consistent_lsn`.
-            let remote_consistent_lsn = u64::from(timeline_remote_consistent_lsn);
+            let remote_consistent_lsn = timeline_remote_consistent_lsn;
            let ts = SystemTime::now();

            // Update the status about what we just received. This is shown in the mgmt API.
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -96,6 +96,8 @@ static shmem_request_hook_type prev_shmem_request_hook;
 #endif
 static int   lfc_shrinking_factor; /* power of two by which local cache size will be shrinked when lfc_free_space_watermark is reached */

+void FileCacheMonitorMain(Datum main_arg);
+
 static void
 lfc_shmem_startup(void)
 {
@@ -378,7 +380,6 @@ lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
 {
 	BufferTag tag;
 	FileCacheEntry* entry;
-	ssize_t rc;
 	bool found;
 	int chunk_offs = blkno & (BLOCKS_PER_CHUNK-1);
 	uint32 hash;
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry and should not be changed by hand.

 [[package]]
 name = "aiohttp"
@@ -968,14 +968,14 @@ testing = ["pre-commit"]

 [[package]]
 name = "flask"
-version = "2.1.3"
+version = "2.2.5"
 description = "A simple framework for building complex web applications."
 category = "main"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Flask-2.1.3-py3-none-any.whl", hash = "sha256:9013281a7402ad527f8fd56375164f3aa021ecfaff89bfe3825346c24f87e04c"},
-    {file = "Flask-2.1.3.tar.gz", hash = "sha256:15972e5017df0575c3d6c090ba168b6db90259e620ac8d7ea813a396bad5b6cb"},
+    {file = "Flask-2.2.5-py3-none-any.whl", hash = "sha256:58107ed83443e86067e41eff4631b058178191a355886f8e479e347fa1285fdf"},
+    {file = "Flask-2.2.5.tar.gz", hash = "sha256:edee9b0a7ff26621bd5a8c10ff484ae28737a2410d99b0bb9a6850c7fb977aa0"},
 ]

 [package.dependencies]
@@ -983,7 +983,7 @@ click = ">=8.0"
 importlib-metadata = {version = ">=3.6.0", markers = "python_version < \"3.10\""}
 itsdangerous = ">=2.0"
 Jinja2 = ">=3.0"
-Werkzeug = ">=2.0"
+Werkzeug = ">=2.2.2"

 [package.extras]
 async = ["asgiref (>=3.2)"]
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -62,6 +62,8 @@ utils.workspace = true
 uuid.workspace = true
 webpki-roots.workspace = true
 x509-parser.workspace = true
+native-tls.workspace = true
+postgres-native-tls.workspace = true

 workspace_hack.workspace = true
 tokio-util.workspace = true
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -9,6 +9,7 @@ use crate::{
 use pq_proto::BeMessage as Be;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_postgres::config::SslMode;
 use tracing::{info, info_span};

 #[derive(Debug, Error)]
@@ -87,6 +88,16 @@ pub(super) async fn authenticate(
        .dbname(&db_info.dbname)
        .user(&db_info.user);

+    // Backwards compatibility. pg_sni_proxy uses "--" in domain names
+    // while direct connections do not. Once we migrate to pg_sni_proxy
+    // everywhere, we can remove this.
+    if db_info.host.contains("--") {
+        // we need TLS connection with SNI info to properly route it
+        config.ssl_mode(SslMode::Require);
+    } else {
+        config.ssl_mode(SslMode::Disable);
+    }
+
    if let Some(password) = db_info.password {
        config.password(password.as_ref());
    }
@@ -96,6 +107,7 @@ pub(super) async fn authenticate(
        value: NodeInfo {
            config,
            aux: db_info.aux.into(),
+            allow_self_signed_compute: false, // caller may override
        },
    })
 }
--- a/proxy/src/bin/pg_sni_router.rs
+++ b/proxy/src/bin/pg_sni_router.rs
@@ -0,0 +1,250 @@
+/// A stand-alone program that routes connections, e.g. from
+/// `aaa--bbb--1234.external.domain` to `aaa.bbb.internal.domain:1234`.
+///
+/// This allows connecting to pods/services running in the same Kubernetes cluster from
+/// the outside. Similar to an ingress controller for HTTPS.
+use std::{net::SocketAddr, sync::Arc};
+
+use tokio::net::TcpListener;
+
+use anyhow::{anyhow, bail, ensure, Context};
+use clap::{self, Arg};
+use futures::TryFutureExt;
+use proxy::console::messages::MetricsAuxInfo;
+use proxy::stream::{PqStream, Stream};
+
+use tokio::io::{AsyncRead, AsyncWrite};
+use tokio_util::sync::CancellationToken;
+use utils::{project_git_version, sentry_init::init_sentry};
+
+use tracing::{error, info, warn};
+
+project_git_version!(GIT_VERSION);
+
+fn cli() -> clap::Command {
+    clap::Command::new("Neon proxy/router")
+        .version(GIT_VERSION)
+        .arg(
+            Arg::new("listen")
+                .short('l')
+                .long("listen")
+                .help("listen for incoming client connections on ip:port")
+                .default_value("127.0.0.1:4432"),
+        )
+        .arg(
+            Arg::new("tls-key")
+                .short('k')
+                .long("tls-key")
+                .help("path to TLS key for client postgres connections")
+                .required(true),
+        )
+        .arg(
+            Arg::new("tls-cert")
+                .short('c')
+                .long("tls-cert")
+                .help("path to TLS cert for client postgres connections")
+                .required(true),
+        )
+        .arg(
+            Arg::new("dest")
+                .short('d')
+                .long("destination")
+                .help("append this domain zone to the SNI hostname to get the destination address")
+                .required(true),
+        )
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let _logging_guard = proxy::logging::init().await?;
+    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
+    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);
+
+    let args = cli().get_matches();
+    let destination: String = args.get_one::<String>("dest").unwrap().parse()?;
+
+    // Configure TLS
+    let tls_config: Arc<rustls::ServerConfig> = match (
+        args.get_one::<String>("tls-key"),
+        args.get_one::<String>("tls-cert"),
+    ) {
+        (Some(key_path), Some(cert_path)) => {
+            let key = {
+                let key_bytes = std::fs::read(key_path).context("TLS key file")?;
+                let mut keys = rustls_pemfile::pkcs8_private_keys(&mut &key_bytes[..])
+                    .context(format!("Failed to read TLS keys at '{key_path}'"))?;
+
+                ensure!(keys.len() == 1, "keys.len() = {} (should be 1)", keys.len());
+                keys.pop().map(rustls::PrivateKey).unwrap()
+            };
+
+            let cert_chain_bytes = std::fs::read(cert_path)
+                .context(format!("Failed to read TLS cert file at '{cert_path}.'"))?;
+
+            let cert_chain = {
+                rustls_pemfile::certs(&mut &cert_chain_bytes[..])
+                    .context(format!(
+                        "Failed to read TLS certificate chain from bytes from file at '{cert_path}'."
+                    ))?
+                    .into_iter()
+                    .map(rustls::Certificate)
+                    .collect()
+            };
+
+            rustls::ServerConfig::builder()
+                .with_safe_default_cipher_suites()
+                .with_safe_default_kx_groups()
+                .with_protocol_versions(&[&rustls::version::TLS13, &rustls::version::TLS12])?
+                .with_no_client_auth()
+                .with_single_cert(cert_chain, key)?
+                .into()
+        }
+        _ => bail!("tls-key and tls-cert must be specified"),
+    };
+
+    // Start listening for incoming client connections
+    let proxy_address: SocketAddr = args.get_one::<String>("listen").unwrap().parse()?;
+    info!("Starting sni router on {proxy_address}");
+    let proxy_listener = TcpListener::bind(proxy_address).await?;
+
+    let cancellation_token = CancellationToken::new();
+
+    let main = proxy::flatten_err(tokio::spawn(task_main(
+        Arc::new(destination),
+        tls_config,
+        proxy_listener,
+        cancellation_token.clone(),
+    )));
+    let signals_task = proxy::flatten_err(tokio::spawn(proxy::handle_signals(cancellation_token)));
+
+    tokio::select! {
+        res = main => { res?; },
+        res = signals_task => { res?; },
+    }
+
+    Ok(())
+}
+
+async fn task_main(
+    dest_suffix: Arc<String>,
+    tls_config: Arc<rustls::ServerConfig>,
+    listener: tokio::net::TcpListener,
+    cancellation_token: CancellationToken,
+) -> anyhow::Result<()> {
+    // When set for the server socket, the keepalive setting
+    // will be inherited by all accepted client sockets.
+    socket2::SockRef::from(&listener).set_keepalive(true)?;
+
+    let mut connections = tokio::task::JoinSet::new();
+
+    loop {
+        tokio::select! {
+            accept_result = listener.accept() => {
+                let (socket, peer_addr) = accept_result?;
+                info!("accepted postgres client connection from {peer_addr}");
+
+                let session_id = uuid::Uuid::new_v4();
+                let tls_config = Arc::clone(&tls_config);
+                let dest_suffix = Arc::clone(&dest_suffix);
+
+                connections.spawn(
+                    async move {
+                        info!("spawned a task for {peer_addr}");
+
+                        socket
+                            .set_nodelay(true)
+                            .context("failed to set socket option")?;
+
+                        handle_client(dest_suffix, tls_config, session_id, socket).await
+                    }
+                    .unwrap_or_else(|e| {
+                        // Acknowledge that the task has finished with an error.
+                        error!("per-client task finished with an error: {e:#}");
+                    }),
+                );
+            }
+            _ = cancellation_token.cancelled() => {
+                drop(listener);
+                break;
+            }
+        }
+    }
+
+    // Drain connections
+    info!("waiting for all client connections to finish");
+    while let Some(res) = connections.join_next().await {
+        if let Err(e) = res {
+            if !e.is_panic() && !e.is_cancelled() {
+                warn!("unexpected error from joined connection task: {e:?}");
+            }
+        }
+    }
+    info!("all client connections have finished");
+    Ok(())
+}
+
+const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
+
+async fn ssl_handshake<S: AsyncRead + AsyncWrite + Unpin>(
+    raw_stream: S,
+    tls_config: Arc<rustls::ServerConfig>,
+) -> anyhow::Result<Stream<S>> {
+    let mut stream = PqStream::new(Stream::from_raw(raw_stream));
+
+    let msg = stream.read_startup_packet().await?;
+    info!("received {msg:?}");
+    use pq_proto::FeStartupPacket::*;
+
+    match msg {
+        SslRequest => {
+            stream
+                .write_message(&pq_proto::BeMessage::EncryptionResponse(true))
+                .await?;
+            // Upgrade raw stream into a secure TLS-backed stream.
+            // NOTE: We've consumed `tls`; this fact will be used later.
+
+            let (raw, read_buf) = stream.into_inner();
+            // TODO: Normally, client doesn't send any data before
+            // server says TLS handshake is ok and read_buf is empy.
+            // However, you could imagine pipelining of postgres
+            // SSLRequest + TLS ClientHello in one hunk similar to
+            // pipelining in our node js driver. We should probably
+            // support that by chaining read_buf with the stream.
+            if !read_buf.is_empty() {
+                bail!("data is sent before server replied with EncryptionResponse");
+            }
+            Ok(raw.upgrade(tls_config).await?)
+        }
+        _ => stream.throw_error_str(ERR_INSECURE_CONNECTION).await?,
+    }
+}
+
+#[tracing::instrument(fields(session_id = ?session_id), skip_all)]
+async fn handle_client(
+    dest_suffix: Arc<String>,
+    tls_config: Arc<rustls::ServerConfig>,
+    session_id: uuid::Uuid,
+    stream: impl AsyncRead + AsyncWrite + Unpin,
+) -> anyhow::Result<()> {
+    let tls_stream = ssl_handshake(stream, tls_config).await?;
+
+    // Cut off first part of the SNI domain
+    // We receive required destination details in the format of
+    //   `{k8s_service_name}--{k8s_namespace}--{port}.non-sni-domain`
+    let sni = tls_stream.sni_hostname().ok_or(anyhow!("SNI missing"))?;
+    let dest: Vec<&str> = sni
+        .split_once('.')
+        .context("invalid SNI")?
+        .0
+        .splitn(3, "--")
+        .collect();
+    let port = dest[2].parse::<u16>().context("invalid port")?;
+    let destination = format!("{}.{}.{}:{}", dest[0], dest[1], dest_suffix, port);
+
+    info!("destination: {}", destination);
+
+    let client = tokio::net::TcpStream::connect(destination).await?;
+
+    let metrics_aux: MetricsAuxInfo = Default::default();
+    proxy::proxy::proxy_pass(tls_stream, client, &metrics_aux).await
+}
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -1,49 +1,23 @@
-//! Postgres protocol proxy/router.
-//!
-//! This service listens psql port and can check auth via external service
-//! (control plane API in our case) and can create new databases and accounts
-//! in somewhat transparent manner (again via communication with control plane API).
+use proxy::auth;
+use proxy::console;
+use proxy::http;
+use proxy::metrics;

-mod auth;
-mod cache;
-mod cancellation;
-mod compute;
-mod config;
-mod console;
-mod error;
-mod http;
-mod logging;
-mod metrics;
-mod parse;
-mod proxy;
-mod sasl;
-mod scram;
-mod stream;
-mod url;
-mod waiters;
-
-use anyhow::{bail, Context};
+use anyhow::bail;
 use clap::{self, Arg};
-use config::ProxyConfig;
-use futures::FutureExt;
-use std::{borrow::Cow, future::Future, net::SocketAddr};
-use tokio::{net::TcpListener, task::JoinError};
+use proxy::config::{self, ProxyConfig};
+use std::{borrow::Cow, net::SocketAddr};
+use tokio::net::TcpListener;
 use tokio_util::sync::CancellationToken;
-use tracing::{info, warn};
+use tracing::info;
+use tracing::warn;
 use utils::{project_git_version, sentry_init::init_sentry};

 project_git_version!(GIT_VERSION);

-/// Flattens `Result<Result<T>>` into `Result<T>`.
-async fn flatten_err(
-    f: impl Future<Output = Result<anyhow::Result<()>, JoinError>>,
-) -> anyhow::Result<()> {
-    f.map(|r| r.context("join error").and_then(|x| x)).await
-}
-
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
-    let _logging_guard = logging::init().await?;
+    let _logging_guard = proxy::logging::init().await?;
    let _panic_hook_guard = utils::logging::replace_panic_hook_with_tracing_panic_hook();
    let _sentry_guard = init_sentry(Some(GIT_VERSION.into()), &[]);

@@ -69,7 +43,7 @@ async fn main() -> anyhow::Result<()> {
    let proxy_listener = TcpListener::bind(proxy_address).await?;
    let cancellation_token = CancellationToken::new();

-    let mut client_tasks = vec![tokio::spawn(proxy::task_main(
+    let mut client_tasks = vec![tokio::spawn(proxy::proxy::task_main(
        config,
        proxy_listener,
        cancellation_token.clone(),
@@ -88,7 +62,7 @@ async fn main() -> anyhow::Result<()> {
    }

    let mut tasks = vec![
-        tokio::spawn(handle_signals(cancellation_token)),
+        tokio::spawn(proxy::handle_signals(cancellation_token)),
        tokio::spawn(http::server::task_main(http_listener)),
        tokio::spawn(console::mgmt::task_main(mgmt_listener)),
    ];
@@ -97,8 +71,9 @@ async fn main() -> anyhow::Result<()> {
        tasks.push(tokio::spawn(metrics::task_main(metrics_config)));
    }

-    let tasks = futures::future::try_join_all(tasks.into_iter().map(flatten_err));
-    let client_tasks = futures::future::try_join_all(client_tasks.into_iter().map(flatten_err));
+    let tasks = futures::future::try_join_all(tasks.into_iter().map(proxy::flatten_err));
+    let client_tasks =
+        futures::future::try_join_all(client_tasks.into_iter().map(proxy::flatten_err));
    tokio::select! {
        // We are only expecting an error from these forever tasks
        res = tasks => { res?; },
@@ -107,33 +82,6 @@ async fn main() -> anyhow::Result<()> {
    Ok(())
 }

-/// Handle unix signals appropriately.
-async fn handle_signals(token: CancellationToken) -> anyhow::Result<()> {
-    use tokio::signal::unix::{signal, SignalKind};
-
-    let mut hangup = signal(SignalKind::hangup())?;
-    let mut interrupt = signal(SignalKind::interrupt())?;
-    let mut terminate = signal(SignalKind::terminate())?;
-
-    loop {
-        tokio::select! {
-            // Hangup is commonly used for config reload.
-            _ = hangup.recv() => {
-                warn!("received SIGHUP; config reload is not supported");
-            }
-            // Shut down the whole application.
-            _ = interrupt.recv() => {
-                warn!("received SIGINT, exiting immediately");
-                bail!("interrupted");
-            }
-            _ = terminate.recv() => {
-                warn!("received SIGTERM, shutting down once all existing connections have closed");
-                token.cancel();
-            }
-        }
-    }
-}
-
 /// ProxyConfig is created at proxy startup, and lives forever.
 fn build_config(args: &clap::ArgMatches) -> anyhow::Result<&'static ProxyConfig> {
    let tls_config = match (
@@ -149,6 +97,14 @@ fn build_config(args: &clap::ArgMatches) -> anyhow::Result<&'static ProxyConfig>
        _ => bail!("either both or neither tls-key and tls-cert must be specified"),
    };

+    let allow_self_signed_compute: bool = args
+        .get_one::<String>("allow-self-signed-compute")
+        .unwrap()
+        .parse()?;
+    if allow_self_signed_compute {
+        warn!("allowing self-signed compute certificates");
+    }
+
    let metric_collection = match (
        args.get_one::<String>("metric-collection-endpoint"),
        args.get_one::<String>("metric-collection-interval"),
@@ -198,6 +154,7 @@ fn build_config(args: &clap::ArgMatches) -> anyhow::Result<&'static ProxyConfig>
        tls_config,
        auth_backend,
        metric_collection,
+        allow_self_signed_compute,
    }));

    Ok(config)
@@ -288,6 +245,12 @@ fn cli() -> clap::Command {
                .help("cache for `wake_compute` api method (use `size=0` to disable)")
                .default_value(config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO),
        )
+        .arg(
+            Arg::new("allow-self-signed-compute")
+                .long("allow-self-signed-compute")
+                .help("Allow self-signed certificates for compute nodes (for testing)")
+                .default_value("false"),
+        )
 }

 #[cfg(test)]
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -5,7 +5,7 @@ use pq_proto::StartupMessageParams;
 use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
 use tokio::net::TcpStream;
-use tokio_postgres::NoTls;
+use tokio_postgres::tls::MakeTlsConnect;
 use tracing::{error, info, warn};

 const COULD_NOT_CONNECT: &str = "Couldn't connect to compute node";
@@ -19,6 +19,9 @@ pub enum ConnectionError {

    #[error("{COULD_NOT_CONNECT}: {0}")]
    CouldNotConnect(#[from] io::Error),
+
+    #[error("{COULD_NOT_CONNECT}: {0}")]
+    TlsError(#[from] native_tls::Error),
 }

 impl UserFacingError for ConnectionError {
@@ -125,9 +128,15 @@ impl std::ops::DerefMut for ConnCfg {
    }
 }

+impl Default for ConnCfg {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 impl ConnCfg {
    /// Establish a raw TCP connection to the compute node.
-    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream)> {
+    async fn connect_raw(&self) -> io::Result<(SocketAddr, TcpStream, &str)> {
        use tokio_postgres::config::Host;

        // wrap TcpStream::connect with timeout
@@ -180,7 +189,7 @@ impl ConnCfg {
            };

            match connect_once(host, *port).await {
-                Ok(socket) => return Ok(socket),
+                Ok((sockaddr, stream)) => return Ok((sockaddr, stream, host)),
                Err(err) => {
                    // We can't throw an error here, as there might be more hosts to try.
                    warn!("couldn't connect to compute node at {host}:{port}: {err}");
@@ -200,7 +209,10 @@ impl ConnCfg {

 pub struct PostgresConnection {
    /// Socket connected to a compute node.
-    pub stream: TcpStream,
+    pub stream: tokio_postgres::maybe_tls_stream::MaybeTlsStream<
+        tokio::net::TcpStream,
+        postgres_native_tls::TlsStream<tokio::net::TcpStream>,
+    >,
    /// PostgreSQL connection parameters.
    pub params: std::collections::HashMap<String, String>,
    /// Query cancellation token.
@@ -208,11 +220,27 @@ pub struct PostgresConnection {
 }

 impl ConnCfg {
-    async fn do_connect(&self) -> Result<PostgresConnection, ConnectionError> {
-        // TODO: establish a secure connection to the DB.
-        let (socket_addr, mut stream) = self.connect_raw().await?;
-        let (client, connection) = self.0.connect_raw(&mut stream, NoTls).await?;
-        info!("connected to compute node at {socket_addr}");
+    async fn do_connect(
+        &self,
+        allow_self_signed_compute: bool,
+    ) -> Result<PostgresConnection, ConnectionError> {
+        let (socket_addr, stream, host) = self.connect_raw().await?;
+
+        let tls_connector = native_tls::TlsConnector::builder()
+            .danger_accept_invalid_certs(allow_self_signed_compute)
+            .build()
+            .unwrap();
+        let mut mk_tls = postgres_native_tls::MakeTlsConnector::new(tls_connector);
+        let tls = MakeTlsConnect::<tokio::net::TcpStream>::make_tls_connect(&mut mk_tls, host)?;
+
+        // connect_raw() will not use TLS if sslmode is "disable"
+        let (client, connection) = self.0.connect_raw(stream, tls).await?;
+        let stream = connection.stream.into_inner();
+
+        info!(
+            "connected to compute node at {host} ({socket_addr}) sslmode={:?}",
+            self.0.get_ssl_mode()
+        );

        // This is very ugly but as of now there's no better way to
        // extract the connection parameters from tokio-postgres' connection.
@@ -233,8 +261,11 @@ impl ConnCfg {
    }

    /// Connect to a corresponding compute node.
-    pub async fn connect(&self) -> Result<PostgresConnection, ConnectionError> {
-        self.do_connect()
+    pub async fn connect(
+        &self,
+        allow_self_signed_compute: bool,
+    ) -> Result<PostgresConnection, ConnectionError> {
+        self.do_connect(allow_self_signed_compute)
            .inspect_err(|err| {
                // Immediately log the error we have at our disposal.
                error!("couldn't connect to compute node: {err}");
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -12,6 +12,7 @@ pub struct ProxyConfig {
    pub tls_config: Option<TlsConfig>,
    pub auth_backend: auth::BackendType<'static, ()>,
    pub metric_collection: Option<MetricCollectionConfig>,
+    pub allow_self_signed_compute: bool,
 }

 #[derive(Debug)]
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -170,6 +170,9 @@ pub struct NodeInfo {

    /// Labels for proxy's metrics.
    pub aux: Arc<MetricsAuxInfo>,
+
+    /// Whether we should accept self-signed certificates (for testing)
+    pub allow_self_signed_compute: bool,
 }

 pub type NodeInfoCache = TimedLru<Arc<str>, NodeInfo>;
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -8,6 +8,7 @@ use crate::{auth::ClientCredentials, compute, error::io_error, scram, url::ApiUr
 use async_trait::async_trait;
 use futures::TryFutureExt;
 use thiserror::Error;
+use tokio_postgres::config::SslMode;
 use tracing::{error, info, info_span, warn, Instrument};

 #[derive(Debug, Error)]
@@ -86,11 +87,13 @@ impl Api {
        let mut config = compute::ConnCfg::new();
        config
            .host(self.endpoint.host_str().unwrap_or("localhost"))
-            .port(self.endpoint.port().unwrap_or(5432));
+            .port(self.endpoint.port().unwrap_or(5432))
+            .ssl_mode(SslMode::Disable);

        let node = NodeInfo {
            config,
            aux: Default::default(),
+            allow_self_signed_compute: false,
        };

        Ok(node)
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -8,6 +8,7 @@ use super::{
 use crate::{auth::ClientCredentials, compute, http, scram};
 use async_trait::async_trait;
 use futures::TryFutureExt;
+use tokio_postgres::config::SslMode;
 use tracing::{error, info, info_span, warn, Instrument};

 #[derive(Clone)]
@@ -100,11 +101,12 @@ impl Api {
            // We'll set username and such later using the startup message.
            // TODO: add more type safety (in progress).
            let mut config = compute::ConnCfg::new();
-            config.host(host).port(port);
+            config.host(host).port(port).ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes.

            let node = NodeInfo {
                config,
                aux: body.aux.into(),
+                allow_self_signed_compute: false,
            };

            Ok(node)
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -0,0 +1,57 @@
+use anyhow::{bail, Context};
+use futures::{Future, FutureExt};
+use tokio::task::JoinError;
+use tokio_util::sync::CancellationToken;
+use tracing::warn;
+
+pub mod auth;
+pub mod cache;
+pub mod cancellation;
+pub mod compute;
+pub mod config;
+pub mod console;
+pub mod error;
+pub mod http;
+pub mod logging;
+pub mod metrics;
+pub mod parse;
+pub mod proxy;
+pub mod sasl;
+pub mod scram;
+pub mod stream;
+pub mod url;
+pub mod waiters;
+
+/// Handle unix signals appropriately.
+pub async fn handle_signals(token: CancellationToken) -> anyhow::Result<()> {
+    use tokio::signal::unix::{signal, SignalKind};
+
+    let mut hangup = signal(SignalKind::hangup())?;
+    let mut interrupt = signal(SignalKind::interrupt())?;
+    let mut terminate = signal(SignalKind::terminate())?;
+
+    loop {
+        tokio::select! {
+            // Hangup is commonly used for config reload.
+            _ = hangup.recv() => {
+                warn!("received SIGHUP; config reload is not supported");
+            }
+            // Shut down the whole application.
+            _ = interrupt.recv() => {
+                warn!("received SIGINT, exiting immediately");
+                bail!("interrupted");
+            }
+            _ = terminate.recv() => {
+                warn!("received SIGTERM, shutting down once all existing connections have closed");
+                token.cancel();
+            }
+        }
+    }
+}
+
+/// Flattens `Result<Result<T>>` into `Result<T>`.
+pub async fn flatten_err(
+    f: impl Future<Output = Result<anyhow::Result<()>, JoinError>>,
+) -> anyhow::Result<()> {
+    f.map(|r| r.context("join error").and_then(|x| x)).await
+}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -155,7 +155,7 @@ pub async fn handle_ws_client(
        async { result }.or_else(|e| stream.throw_error(e)).await?
    };

-    let client = Client::new(stream, creds, &params, session_id);
+    let client = Client::new(stream, creds, &params, session_id, false);
    cancel_map
        .with_session(|session| client.connect_to_db(session, true))
        .await
@@ -194,7 +194,15 @@ async fn handle_client(
        async { result }.or_else(|e| stream.throw_error(e)).await?
    };

-    let client = Client::new(stream, creds, &params, session_id);
+    let allow_self_signed_compute = config.allow_self_signed_compute;
+
+    let client = Client::new(
+        stream,
+        creds,
+        &params,
+        session_id,
+        allow_self_signed_compute,
+    );
    cancel_map
        .with_session(|session| client.connect_to_db(session, false))
        .await
@@ -297,9 +305,11 @@ async fn connect_to_compute_once(
        NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
    };

+    let allow_self_signed_compute = node_info.allow_self_signed_compute;
+
    node_info
        .config
-        .connect()
+        .connect(allow_self_signed_compute)
        .inspect_err(invalidate_cache)
        .await
 }
@@ -378,7 +388,7 @@ async fn prepare_client_connection(

 /// Forward bytes in both directions (client <-> compute).
 #[tracing::instrument(skip_all)]
-async fn proxy_pass(
+pub async fn proxy_pass(
    client: impl AsyncRead + AsyncWrite + Unpin,
    compute: impl AsyncRead + AsyncWrite + Unpin,
    aux: &MetricsAuxInfo,
@@ -420,6 +430,8 @@ struct Client<'a, S> {
    params: &'a StartupMessageParams,
    /// Unique connection ID.
    session_id: uuid::Uuid,
+    /// Allow self-signed certificates (for testing).
+    allow_self_signed_compute: bool,
 }

 impl<'a, S> Client<'a, S> {
@@ -429,12 +441,14 @@ impl<'a, S> Client<'a, S> {
        creds: auth::BackendType<'a, auth::ClientCredentials<'a>>,
        params: &'a StartupMessageParams,
        session_id: uuid::Uuid,
+        allow_self_signed_compute: bool,
    ) -> Self {
        Self {
            stream,
            creds,
            params,
            session_id,
+            allow_self_signed_compute,
        }
    }
 }
@@ -451,6 +465,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
            mut creds,
            params,
            session_id,
+            allow_self_signed_compute,
        } = self;

        let extra = console::ConsoleReqExtra {
@@ -473,6 +488,8 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
            value: mut node_info,
        } = auth_result;

+        node_info.allow_self_signed_compute = allow_self_signed_compute;
+
        let mut node = connect_to_compute(&mut node_info, params, &extra, &creds)
            .or_else(|e| stream.throw_error(e))
            .await?;
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -19,11 +19,14 @@ git-version.workspace = true
 hex.workspace = true
 humantime.workspace = true
 hyper.workspace = true
+futures.workspace = true
 once_cell.workspace = true
 parking_lot.workspace = true
 postgres.workspace = true
 postgres-protocol.workspace = true
 regex.workspace = true
+scopeguard.workspace = true
+reqwest = { workspace = true, features = ["json"] }
 serde.workspace = true
 serde_json.workspace = true
 serde_with.workspace = true
@@ -33,6 +36,7 @@ tokio = { workspace = true, features = ["fs"] }
 tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
 toml_edit.workspace = true
+tempfile.workspace = true
 tracing.workspace = true
 url.workspace = true
 metrics.workspace = true
@@ -45,6 +49,3 @@ storage_broker.workspace = true
 utils.workspace = true

 workspace_hack.workspace = true
-
-[dev-dependencies]
-tempfile.workspace = true
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -14,10 +14,13 @@ use storage_broker::proto::SubscribeSafekeeperInfoRequest;
 use storage_broker::Request;

 use std::time::Duration;
+use std::time::Instant;
 use tokio::task::JoinHandle;
 use tokio::{runtime, time::sleep};
 use tracing::*;

+use crate::metrics::BROKER_PULLED_UPDATES;
+use crate::metrics::BROKER_PUSHED_UPDATES;
 use crate::GlobalTimelines;
 use crate::SafeKeeperConf;

@@ -49,12 +52,17 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
            // is under plain mutex. That's ok, all this code is not performance
            // sensitive and there is no risk of deadlock as we don't await while
            // lock is held.
+            let now = Instant::now();
            let mut active_tlis = GlobalTimelines::get_all();
            active_tlis.retain(|tli| tli.is_active());
            for tli in &active_tlis {
                let sk_info = tli.get_safekeeper_info(&conf);
                yield sk_info;
+                BROKER_PUSHED_UPDATES.inc();
            }
+            let elapsed = now.elapsed();
+            // Log duration every second. Should be about 10MB of logs per day.
+            info!("pushed {} timeline updates to broker in {:?}", active_tlis.len(), elapsed);
            sleep(push_interval).await;
        }
    };
@@ -79,6 +87,10 @@ async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
        .context("subscribe_safekeper_info request failed")?
        .into_inner();

+    let ok_counter = BROKER_PULLED_UPDATES.with_label_values(&["ok"]);
+    let not_found = BROKER_PULLED_UPDATES.with_label_values(&["not_found"]);
+    let err_counter = BROKER_PULLED_UPDATES.with_label_values(&["error"]);
+
    while let Some(msg) = stream.message().await? {
        let proto_ttid = msg
            .tenant_timeline_id
@@ -91,7 +103,15 @@ async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
            // connection to the broker.

            // note: there are blocking operations below, but it's considered fine for now
-            tli.record_safekeeper_info(&msg).await?
+            let res = tli.record_safekeeper_info(msg).await;
+            if res.is_ok() {
+                ok_counter.inc();
+            } else {
+                err_counter.inc();
+            }
+            res?;
+        } else {
+            not_found.inc();
        }
    }
    bail!("end of stream");
--- a/safekeeper/src/debug_dump.rs
+++ b/safekeeper/src/debug_dump.rs
@@ -9,9 +9,10 @@ use std::path::PathBuf;
 use anyhow::Result;
 use chrono::{DateTime, Utc};
 use postgres_ffi::XLogSegNo;
+use serde::Deserialize;
 use serde::Serialize;

-use utils::http::json::display_serialize;
+use serde_with::{serde_as, DisplayFromStr};
 use utils::id::NodeId;
 use utils::id::TenantTimelineId;
 use utils::id::{TenantId, TimelineId};
@@ -22,11 +23,11 @@ use crate::safekeeper::SafekeeperMemState;
 use crate::safekeeper::TermHistory;
 use crate::SafeKeeperConf;

-use crate::timeline::ReplicaState;
+use crate::send_wal::WalSenderState;
 use crate::GlobalTimelines;

 /// Various filters that influence the resulting JSON output.
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct Args {
    /// Dump all available safekeeper state. False by default.
    pub dump_all: bool,
@@ -51,7 +52,7 @@ pub struct Args {
 }

 /// Response for debug dump request.
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct Response {
    pub start_time: DateTime<Utc>,
    pub finish_time: DateTime<Utc>,
@@ -61,7 +62,7 @@ pub struct Response {
 }

 /// Safekeeper configuration.
-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct Config {
    pub id: NodeId,
    pub workdir: PathBuf,
@@ -72,22 +73,23 @@ pub struct Config {
    pub wal_backup_enabled: bool,
 }

-#[derive(Debug, Serialize)]
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct Timeline {
-    #[serde(serialize_with = "display_serialize")]
+    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
-    #[serde(serialize_with = "display_serialize")]
+    #[serde_as(as = "DisplayFromStr")]
    pub timeline_id: TimelineId,
    pub control_file: Option<SafeKeeperState>,
    pub memory: Option<Memory>,
    pub disk_content: Option<DiskContent>,
 }

-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct Memory {
    pub is_cancelled: bool,
    pub peers_info_len: usize,
-    pub replicas: Vec<Option<ReplicaState>>,
+    pub walsenders: Vec<WalSenderState>,
    pub wal_backup_active: bool,
    pub active: bool,
    pub num_computes: u32,
@@ -102,12 +104,12 @@ pub struct Memory {
    pub file_open: bool,
 }

-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct DiskContent {
    pub files: Vec<FileInfo>,
 }

-#[derive(Debug, Serialize)]
+#[derive(Debug, Serialize, Deserialize)]
 pub struct FileInfo {
    pub name: String,
    pub size: u64,
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -10,7 +10,7 @@ use tracing::{info, info_span, Instrument};
 use crate::auth::check_permission;
 use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};

-use crate::metrics::TrafficMetrics;
+use crate::metrics::{TrafficMetrics, PG_QUERIES_FINISHED, PG_QUERIES_RECEIVED};
 use crate::wal_service::ConnectionId;
 use crate::{GlobalTimelines, SafeKeeperConf};
 use postgres_backend::QueryError;
@@ -72,6 +72,15 @@ fn parse_cmd(cmd: &str) -> anyhow::Result<SafekeeperPostgresCommand> {
    }
 }

+fn cmd_to_string(cmd: &SafekeeperPostgresCommand) -> &str {
+    match cmd {
+        SafekeeperPostgresCommand::StartWalPush => "START_WAL_PUSH",
+        SafekeeperPostgresCommand::StartReplication { .. } => "START_REPLICATION",
+        SafekeeperPostgresCommand::IdentifySystem => "IDENTIFY_SYSTEM",
+        SafekeeperPostgresCommand::JSONCtrl { .. } => "JSON_CTRL",
+    }
+}
+
 #[async_trait::async_trait]
 impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
    for SafekeeperPostgresHandler
@@ -168,6 +177,12 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
        }

        let cmd = parse_cmd(query_string)?;
+        let cmd_str = cmd_to_string(&cmd);
+
+        PG_QUERIES_RECEIVED.with_label_values(&[cmd_str]).inc();
+        scopeguard::defer! {
+            PG_QUERIES_FINISHED.with_label_values(&[cmd_str]).inc();
+        }

        info!(
            "got query {:?} in timeline {:?}",
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -3,19 +3,21 @@ use hyper::{Body, Request, Response, StatusCode, Uri};
 use once_cell::sync::Lazy;
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use safekeeper_api::models::SkTimelineInfo;
-use serde::Serialize;
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
 use std::collections::{HashMap, HashSet};
 use std::fmt;
 use std::str::FromStr;
 use std::sync::Arc;
 use storage_broker::proto::SafekeeperTimelineInfo;
 use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
+use tokio::fs::File;
+use tokio::io::AsyncReadExt;
 use tokio::task::JoinError;
-use utils::http::json::display_serialize;

-use crate::debug_dump;
 use crate::safekeeper::ServerInfo;
 use crate::safekeeper::Term;
+use crate::{debug_dump, pull_timeline};

 use crate::timelines_global_map::TimelineDeleteForceResult;
 use crate::GlobalTimelines;
@@ -57,44 +59,46 @@ fn get_conf(request: &Request<Body>) -> &SafeKeeperConf {

 /// Same as TermSwitchEntry, but serializes LSN using display serializer
 /// in Postgres format, i.e. 0/FFFFFFFF. Used only for the API response.
-#[derive(Debug, Serialize)]
-struct TermSwitchApiEntry {
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TermSwitchApiEntry {
    pub term: Term,
-    #[serde(serialize_with = "display_serialize")]
+    #[serde_as(as = "DisplayFromStr")]
    pub lsn: Lsn,
 }

 /// Augment AcceptorState with epoch for convenience
-#[derive(Debug, Serialize)]
-struct AcceptorStateStatus {
-    term: Term,
-    epoch: Term,
-    term_history: Vec<TermSwitchApiEntry>,
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AcceptorStateStatus {
+    pub term: Term,
+    pub epoch: Term,
+    pub term_history: Vec<TermSwitchApiEntry>,
 }

 /// Info about timeline on safekeeper ready for reporting.
-#[derive(Debug, Serialize)]
-struct TimelineStatus {
-    #[serde(serialize_with = "display_serialize")]
-    tenant_id: TenantId,
-    #[serde(serialize_with = "display_serialize")]
-    timeline_id: TimelineId,
-    acceptor_state: AcceptorStateStatus,
-    pg_info: ServerInfo,
-    #[serde(serialize_with = "display_serialize")]
-    flush_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    timeline_start_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    local_start_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    commit_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    backup_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    peer_horizon_lsn: Lsn,
-    #[serde(serialize_with = "display_serialize")]
-    remote_consistent_lsn: Lsn,
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct TimelineStatus {
+    #[serde_as(as = "DisplayFromStr")]
+    pub tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_id: TimelineId,
+    pub acceptor_state: AcceptorStateStatus,
+    pub pg_info: ServerInfo,
+    #[serde_as(as = "DisplayFromStr")]
+    pub flush_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_start_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub local_start_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub commit_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub backup_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub peer_horizon_lsn: Lsn,
+    #[serde_as(as = "DisplayFromStr")]
+    pub remote_consistent_lsn: Lsn,
 }

 fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Result<(), ApiError> {
@@ -144,7 +148,7 @@ async fn timeline_status_handler(request: Request<Body>) -> Result<Response<Body
        commit_lsn: inmem.commit_lsn,
        backup_lsn: inmem.backup_lsn,
        peer_horizon_lsn: inmem.peer_horizon_lsn,
-        remote_consistent_lsn: inmem.remote_consistent_lsn,
+        remote_consistent_lsn: tli.get_walsenders().get_remote_consistent_lsn(),
    };
    json_response(StatusCode::OK, status)
 }
@@ -175,6 +179,49 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
    json_response(StatusCode::OK, ())
 }

+/// Pull timeline from peer safekeeper instances.
+async fn timeline_pull_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    check_permission(&request, None)?;
+
+    let data: pull_timeline::Request = json_request(&mut request).await?;
+
+    let resp = pull_timeline::handle_request(data)
+        .await
+        .map_err(ApiError::InternalServerError)?;
+    json_response(StatusCode::OK, resp)
+}
+
+/// Download a file from the timeline directory.
+// TODO: figure out a better way to copy files between safekeepers
+async fn timeline_files_handler(request: Request<Body>) -> Result<Response<Body>, ApiError> {
+    let ttid = TenantTimelineId::new(
+        parse_request_param(&request, "tenant_id")?,
+        parse_request_param(&request, "timeline_id")?,
+    );
+    check_permission(&request, Some(ttid.tenant_id))?;
+
+    let filename: String = parse_request_param(&request, "filename")?;
+
+    let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
+
+    let filepath = tli.timeline_dir.join(filename);
+    let mut file = File::open(&filepath)
+        .await
+        .map_err(|e| ApiError::InternalServerError(e.into()))?;
+
+    let mut content = Vec::new();
+    // TODO: don't store files in memory
+    file.read_to_end(&mut content)
+        .await
+        .map_err(|e| ApiError::InternalServerError(e.into()))?;
+
+    Response::builder()
+        .status(StatusCode::OK)
+        .header("Content-Type", "application/octet-stream")
+        .body(Body::from(content))
+        .map_err(|e| ApiError::InternalServerError(e.into()))
+}
+
 /// Deactivates the timeline and removes its data directory.
 async fn timeline_delete_force_handler(
    mut request: Request<Body>,
@@ -246,7 +293,7 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
    };

    let tli = GlobalTimelines::get(ttid).map_err(ApiError::from)?;
-    tli.record_safekeeper_info(&proto_sk_info)
+    tli.record_safekeeper_info(proto_sk_info)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -351,6 +398,11 @@ pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError>
            timeline_delete_force_handler,
        )
        .delete("/v1/tenant/:tenant_id", tenant_delete_force_handler)
+        .post("/v1/pull_timeline", timeline_pull_handler)
+        .get(
+            "/v1/tenant/:tenant_id/timeline/:timeline_id/file/:filename",
+            timeline_files_handler,
+        )
        // for tests
        .post(
            "/v1/record_safekeeper_info/:tenant_id/:timeline_id",
--- a/safekeeper/src/json_ctrl.rs
+++ b/safekeeper/src/json_ctrl.rs
@@ -50,7 +50,7 @@ pub struct AppendLogicalMessage {
    pub pg_version: u32,
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize)]
 struct AppendResult {
    // safekeeper state after append
    state: SafeKeeperState,
@@ -133,7 +133,7 @@ fn send_proposer_elected(tli: &Arc<Timeline>, term: Term, lsn: Lsn) -> anyhow::R
    Ok(())
 }

-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize)]
 pub struct InsertedWAL {
    begin_lsn: Lsn,
    pub end_lsn: Lsn,
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -15,6 +15,7 @@ pub mod handler;
 pub mod http;
 pub mod json_ctrl;
 pub mod metrics;
+pub mod pull_timeline;
 pub mod receive_wal;
 pub mod remove_wal;
 pub mod safekeeper;
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -10,16 +10,16 @@ use anyhow::Result;
 use metrics::{
    core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
    proto::MetricFamily,
-    register_int_counter_vec, Gauge, IntCounterVec, IntGaugeVec,
+    register_int_counter, register_int_counter_vec, Gauge, IntCounter, IntCounterVec, IntGaugeVec,
 };
 use once_cell::sync::Lazy;

 use postgres_ffi::XLogSegNo;
+use utils::pageserver_feedback::PageserverFeedback;
 use utils::{id::TenantTimelineId, lsn::Lsn};

 use crate::{
    safekeeper::{SafeKeeperState, SafekeeperMemState},
-    timeline::ReplicaState,
    GlobalTimelines,
 };

@@ -73,6 +73,58 @@ pub static PG_IO_BYTES: Lazy<IntCounterVec> = Lazy::new(|| {
    )
    .expect("Failed to register safekeeper_pg_io_bytes gauge")
 });
+pub static BROKER_PUSHED_UPDATES: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "safekeeper_broker_pushed_updates_total",
+        "Number of timeline updates pushed to the broker"
+    )
+    .expect("Failed to register safekeeper_broker_pushed_updates_total counter")
+});
+pub static BROKER_PULLED_UPDATES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "safekeeper_broker_pulled_updates_total",
+        "Number of timeline updates pulled and processed from the broker",
+        &["result"]
+    )
+    .expect("Failed to register safekeeper_broker_pulled_updates_total counter")
+});
+pub static PG_QUERIES_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "safekeeper_pg_queries_received_total",
+        "Number of queries received through pg protocol",
+        &["query"]
+    )
+    .expect("Failed to register safekeeper_pg_queries_received_total counter")
+});
+pub static PG_QUERIES_FINISHED: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "safekeeper_pg_queries_finished_total",
+        "Number of queries finished through pg protocol",
+        &["query"]
+    )
+    .expect("Failed to register safekeeper_pg_queries_finished_total counter")
+});
+pub static REMOVED_WAL_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "safekeeper_removed_wal_segments_total",
+        "Number of WAL segments removed from the disk"
+    )
+    .expect("Failed to register safekeeper_removed_wal_segments_total counter")
+});
+pub static BACKED_UP_SEGMENTS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "safekeeper_backed_up_segments_total",
+        "Number of WAL segments backed up to the broker"
+    )
+    .expect("Failed to register safekeeper_backed_up_segments_total counter")
+});
+pub static BACKUP_ERRORS: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "safekeeper_backup_errors_total",
+        "Number of errors during backup"
+    )
+    .expect("Failed to register safekeeper_backup_errors_total counter")
+});

 pub const LABEL_UNKNOWN: &str = "unknown";

@@ -231,7 +283,7 @@ pub fn time_io_closure(closure: impl FnOnce() -> Result<()>) -> Result<f64> {
 /// Metrics for a single timeline.
 pub struct FullTimelineInfo {
    pub ttid: TenantTimelineId,
-    pub replicas: Vec<ReplicaState>,
+    pub ps_feedback: PageserverFeedback,
    pub wal_backup_active: bool,
    pub timeline_is_active: bool,
    pub num_computes: u32,
@@ -242,6 +294,7 @@ pub struct FullTimelineInfo {
    pub persisted_state: SafeKeeperState,

    pub flush_lsn: Lsn,
+    pub remote_consistent_lsn: Lsn,

    pub wal_storage: WalStorageMetrics,
 }
@@ -514,19 +567,6 @@ impl Collector for TimelineCollector {
            let timeline_id = tli.ttid.timeline_id.to_string();
            let labels = &[tenant_id.as_str(), timeline_id.as_str()];

-            let mut most_advanced: Option<pq_proto::PageserverFeedback> = None;
-            for replica in tli.replicas.iter() {
-                if let Some(replica_feedback) = replica.pageserver_feedback {
-                    if let Some(current) = most_advanced {
-                        if current.last_received_lsn < replica_feedback.last_received_lsn {
-                            most_advanced = Some(replica_feedback);
-                        }
-                    } else {
-                        most_advanced = Some(replica_feedback);
-                    }
-                }
-            }
-
            self.commit_lsn
                .with_label_values(labels)
                .set(tli.mem_state.commit_lsn.into());
@@ -544,7 +584,7 @@ impl Collector for TimelineCollector {
                .set(tli.mem_state.peer_horizon_lsn.into());
            self.remote_consistent_lsn
                .with_label_values(labels)
-                .set(tli.mem_state.remote_consistent_lsn.into());
+                .set(tli.remote_consistent_lsn.into());
            self.timeline_active
                .with_label_values(labels)
                .set(tli.timeline_is_active as u64);
@@ -567,15 +607,17 @@ impl Collector for TimelineCollector {
                .with_label_values(labels)
                .set(tli.wal_storage.flush_wal_seconds);

-            if let Some(feedback) = most_advanced {
-                self.ps_last_received_lsn
+            self.ps_last_received_lsn
+                .with_label_values(labels)
+                .set(tli.ps_feedback.last_received_lsn.0);
+            if let Ok(unix_time) = tli
+                .ps_feedback
+                .replytime
+                .duration_since(SystemTime::UNIX_EPOCH)
+            {
+                self.feedback_last_time_seconds
                    .with_label_values(labels)
-                    .set(feedback.last_received_lsn);
-                if let Ok(unix_time) = feedback.replytime.duration_since(SystemTime::UNIX_EPOCH) {
-                    self.feedback_last_time_seconds
-                        .with_label_values(labels)
-                        .set(unix_time.as_secs());
-                }
+                    .set(unix_time.as_secs());
            }

            if tli.last_removed_segno != 0 {
--- a/safekeeper/src/pull_timeline.rs
+++ b/safekeeper/src/pull_timeline.rs
@@ -0,0 +1,240 @@
+use serde::{Deserialize, Serialize};
+
+use anyhow::{bail, Context, Result};
+use tokio::io::AsyncWriteExt;
+use tracing::info;
+use utils::id::{TenantId, TenantTimelineId, TimelineId};
+
+use serde_with::{serde_as, DisplayFromStr};
+
+use crate::{
+    control_file, debug_dump,
+    http::routes::TimelineStatus,
+    wal_storage::{self, Storage},
+    GlobalTimelines,
+};
+
+/// Info about timeline on safekeeper ready for reporting.
+#[serde_as]
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Request {
+    #[serde_as(as = "DisplayFromStr")]
+    pub tenant_id: TenantId,
+    #[serde_as(as = "DisplayFromStr")]
+    pub timeline_id: TimelineId,
+    pub http_hosts: Vec<String>,
+}
+
+#[derive(Debug, Serialize)]
+pub struct Response {
+    // Donor safekeeper host
+    pub safekeeper_host: String,
+    // TODO: add more fields?
+}
+
+/// Find the most advanced safekeeper and pull timeline from it.
+pub async fn handle_request(request: Request) -> Result<Response> {
+    let existing_tli = GlobalTimelines::get(TenantTimelineId::new(
+        request.tenant_id,
+        request.timeline_id,
+    ));
+    if existing_tli.is_ok() {
+        bail!("Timeline {} already exists", request.timeline_id);
+    }
+
+    let client = reqwest::Client::new();
+    let http_hosts = request.http_hosts.clone();
+
+    // Send request to /v1/tenant/:tenant_id/timeline/:timeline_id
+    let responses = futures::future::join_all(http_hosts.iter().map(|url| {
+        let url = format!(
+            "{}/v1/tenant/{}/timeline/{}",
+            url, request.tenant_id, request.timeline_id
+        );
+        client.get(url).send()
+    }))
+    .await;
+
+    let mut statuses = Vec::new();
+    for (i, response) in responses.into_iter().enumerate() {
+        let response = response.context(format!("Failed to get status from {}", http_hosts[i]))?;
+        let status: crate::http::routes::TimelineStatus = response.json().await?;
+        statuses.push((status, i));
+    }
+
+    // Find the most advanced safekeeper
+    // TODO: current logic may be wrong, fix it later
+    let (status, i) = statuses
+        .into_iter()
+        .max_by_key(|(status, _)| {
+            (
+                status.acceptor_state.epoch,
+                status.flush_lsn,
+                status.commit_lsn,
+            )
+        })
+        .unwrap();
+    let safekeeper_host = http_hosts[i].clone();
+
+    assert!(status.tenant_id == request.tenant_id);
+    assert!(status.timeline_id == request.timeline_id);
+
+    pull_timeline(status, safekeeper_host).await
+}
+
+async fn pull_timeline(status: TimelineStatus, host: String) -> Result<Response> {
+    let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id);
+    info!(
+        "Pulling timeline {} from safekeeper {}, commit_lsn={}, flush_lsn={}, term={}, epoch={}",
+        ttid,
+        host,
+        status.commit_lsn,
+        status.flush_lsn,
+        status.acceptor_state.term,
+        status.acceptor_state.epoch
+    );
+
+    let conf = &GlobalTimelines::get_global_config();
+
+    let client = reqwest::Client::new();
+    // TODO: don't use debug dump, it should be used only in tests.
+    //      This is a proof of concept, we should figure out a way
+    //      to use scp without implementing it manually.
+
+    // Implementing our own scp over HTTP.
+    // At first, we need to fetch list of files from safekeeper.
+    let dump: debug_dump::Response = client
+        .get(format!(
+            "{}/v1/debug_dump?dump_all=true&tenant_id={}&timeline_id={}",
+            host, status.tenant_id, status.timeline_id
+        ))
+        .send()
+        .await?
+        .json()
+        .await?;
+
+    if dump.timelines.len() != 1 {
+        bail!(
+            "Expected to fetch single timeline, got {} timelines",
+            dump.timelines.len()
+        );
+    }
+
+    let timeline = dump.timelines.into_iter().next().unwrap();
+    let disk_content = timeline.disk_content.ok_or(anyhow::anyhow!(
+        "Timeline {} doesn't have disk content",
+        ttid
+    ))?;
+
+    let mut filenames = disk_content
+        .files
+        .iter()
+        .map(|file| file.name.clone())
+        .collect::<Vec<_>>();
+
+    // Sort filenames to make sure we pull files in correct order
+    // After sorting, we should have:
+    // - 000000010000000000000001
+    // - ...
+    // - 000000010000000000000002.partial
+    // - safekeeper.control
+    filenames.sort();
+
+    // safekeeper.control should be the first file, so we need to move it to the beginning
+    let control_file_index = filenames
+        .iter()
+        .position(|name| name == "safekeeper.control")
+        .ok_or(anyhow::anyhow!("safekeeper.control not found"))?;
+    filenames.remove(control_file_index);
+    filenames.insert(0, "safekeeper.control".to_string());
+
+    info!(
+        "Downloading {} files from safekeeper {}",
+        filenames.len(),
+        host
+    );
+
+    // Creating temp directory for a new timeline. It needs to be
+    // located on the same filesystem as the rest of the timelines.
+
+    // conf.workdir is usually /storage/safekeeper/data
+    // will try to transform it into /storage/safekeeper/tmp
+    let temp_base = conf
+        .workdir
+        .parent()
+        .ok_or(anyhow::anyhow!("workdir has no parent"))?
+        .join("tmp");
+
+    tokio::fs::create_dir_all(&temp_base).await?;
+
+    let tli_dir = tempfile::Builder::new()
+        .suffix("_temptli")
+        .prefix(&format!("{}_{}_", ttid.tenant_id, ttid.timeline_id))
+        .tempdir_in(temp_base)?;
+    let tli_dir_path = tli_dir.path().to_owned();
+
+    // Note: some time happens between fetching list of files and fetching files themselves.
+    //       It's possible that some files will be removed from safekeeper and we will fail to fetch them.
+    //       This function will fail in this case, should be retried by the caller.
+    for filename in filenames {
+        let file_path = tli_dir_path.join(&filename);
+        // /v1/tenant/:tenant_id/timeline/:timeline_id/file/:filename
+        let http_url = format!(
+            "{}/v1/tenant/{}/timeline/{}/file/{}",
+            host, status.tenant_id, status.timeline_id, filename
+        );
+
+        let mut file = tokio::fs::File::create(&file_path).await?;
+        let mut response = client.get(&http_url).send().await?;
+        while let Some(chunk) = response.chunk().await? {
+            file.write_all(&chunk).await?;
+        }
+    }
+
+    // TODO: fsync?
+
+    // Let's create timeline from temp directory and verify that it's correct
+
+    let control_path = tli_dir_path.join("safekeeper.control");
+
+    let control_store = control_file::FileStorage::load_control_file(control_path)?;
+    if control_store.server.wal_seg_size == 0 {
+        bail!("wal_seg_size is not set");
+    }
+
+    let wal_store =
+        wal_storage::PhysicalStorage::new(&ttid, tli_dir_path.clone(), conf, &control_store)?;
+
+    let commit_lsn = status.commit_lsn;
+    let flush_lsn = wal_store.flush_lsn();
+
+    info!(
+        "Finished downloading timeline {}, commit_lsn={}, flush_lsn={}",
+        ttid, commit_lsn, flush_lsn
+    );
+    assert!(status.commit_lsn <= status.flush_lsn);
+
+    // Move timeline dir to the correct location
+    let timeline_path = conf.timeline_dir(&ttid);
+
+    info!(
+        "Moving timeline {} from {} to {}",
+        ttid,
+        tli_dir_path.display(),
+        timeline_path.display()
+    );
+    tokio::fs::create_dir_all(conf.tenant_dir(&ttid.tenant_id)).await?;
+    tokio::fs::rename(tli_dir_path, &timeline_path).await?;
+
+    let tli = GlobalTimelines::load_timeline(ttid).context("Failed to load timeline after copy")?;
+
+    info!(
+        "Loaded timeline {}, flush_lsn={}",
+        ttid,
+        tli.get_flush_lsn()
+    );
+
+    Ok(Response {
+        safekeeper_host: host,
+    })
+}
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -18,7 +18,8 @@ use crate::control_file;
 use crate::send_wal::HotStandbyFeedback;

 use crate::wal_storage;
-use pq_proto::{PageserverFeedback, SystemId};
+use pq_proto::SystemId;
+use utils::pageserver_feedback::PageserverFeedback;
 use utils::{
    bin_ser::LeSer,
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
@@ -205,14 +206,13 @@ pub struct SafeKeeperState {
    pub peers: PersistedPeers,
 }

-#[derive(Debug, Clone, Serialize)]
+#[derive(Debug, Clone, Serialize, Deserialize)]
 // In memory safekeeper state. Fields mirror ones in `SafeKeeperState`; values
 // are not flushed yet.
 pub struct SafekeeperMemState {
    pub commit_lsn: Lsn,
    pub backup_lsn: Lsn,
    pub peer_horizon_lsn: Lsn,
-    pub remote_consistent_lsn: Lsn,
    #[serde(with = "hex")]
    pub proposer_uuid: PgUuid,
 }
@@ -347,7 +347,7 @@ pub struct AppendRequestHeader {
 }

 /// Report safekeeper state to proposer
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(Debug, Serialize)]
 pub struct AppendResponse {
    // Current term of the safekeeper; if it is higher than proposer's, the
    // compute is out of date.
@@ -540,7 +540,6 @@ where
                commit_lsn: state.commit_lsn,
                backup_lsn: state.backup_lsn,
                peer_horizon_lsn: state.peer_horizon_lsn,
-                remote_consistent_lsn: state.remote_consistent_lsn,
                proposer_uuid: state.proposer_uuid,
            },
            state,
@@ -781,10 +780,6 @@ where

            // Initializing backup_lsn is useful to avoid making backup think it should upload 0 segment.
            self.inmem.backup_lsn = max(self.inmem.backup_lsn, state.timeline_start_lsn);
-            // Initializing remote_consistent_lsn sets that we have nothing to
-            // stream to pageserver(s) immediately after creation.
-            self.inmem.remote_consistent_lsn =
-                max(self.inmem.remote_consistent_lsn, state.timeline_start_lsn);

            state.acceptor_state.term_history = msg.term_history.clone();
            self.persist_control_file(state)?;
@@ -837,7 +832,6 @@ where
        state.commit_lsn = self.inmem.commit_lsn;
        state.backup_lsn = self.inmem.backup_lsn;
        state.peer_horizon_lsn = self.inmem.peer_horizon_lsn;
-        state.remote_consistent_lsn = self.inmem.remote_consistent_lsn;
        state.proposer_uuid = self.inmem.proposer_uuid;
        self.state.persist(&state)
    }
@@ -940,14 +934,12 @@ where
            self.state.backup_lsn + (self.state.server.wal_seg_size as u64) < new_backup_lsn;
        self.inmem.backup_lsn = new_backup_lsn;

-        let new_remote_consistent_lsn = max(
-            Lsn(sk_info.remote_consistent_lsn),
-            self.inmem.remote_consistent_lsn,
-        );
+        // value in sk_info should be maximized over our local in memory value.
+        let new_remote_consistent_lsn = Lsn(sk_info.remote_consistent_lsn);
+        assert!(self.state.remote_consistent_lsn <= new_remote_consistent_lsn);
        sync_control_file |= self.state.remote_consistent_lsn
            + (self.state.server.wal_seg_size as u64)
            < new_remote_consistent_lsn;
-        self.inmem.remote_consistent_lsn = new_remote_consistent_lsn;

        let new_peer_horizon_lsn = max(Lsn(sk_info.peer_horizon_lsn), self.inmem.peer_horizon_lsn);
        sync_control_file |= self.state.peer_horizon_lsn + (self.state.server.wal_seg_size as u64)
@@ -955,7 +947,12 @@ where
        self.inmem.peer_horizon_lsn = new_peer_horizon_lsn;

        if sync_control_file {
-            self.persist_control_file(self.state.clone())?;
+            let mut state = self.state.clone();
+            // Note: we do not persist remote_consistent_lsn in other paths of
+            // persisting cf -- that is not much needed currently. We could do
+            // that by storing Arc to walsenders in Safekeeper.
+            state.remote_consistent_lsn = new_remote_consistent_lsn;
+            self.persist_control_file(state)?;
        }
        Ok(())
    }
--- a/safekeeper/src/send_wal.rs
+++ b/safekeeper/src/send_wal.rs
@@ -1,21 +1,28 @@
 //! This module implements the streaming side of replication protocol, starting
-//! with the "START_REPLICATION" message.
+//! with the "START_REPLICATION" message, and registry of walsenders.

 use crate::handler::SafekeeperPostgresHandler;
-use crate::timeline::{ReplicaState, Timeline};
+use crate::timeline::Timeline;
+use crate::wal_service::ConnectionId;
 use crate::wal_storage::WalReader;
 use crate::GlobalTimelines;
 use anyhow::Context as AnyhowContext;
 use bytes::Bytes;
+use parking_lot::Mutex;
 use postgres_backend::PostgresBackend;
 use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError};
 use postgres_ffi::get_current_timestamp;
 use postgres_ffi::{TimestampTz, MAX_SEND_SIZE};
-use pq_proto::{BeMessage, PageserverFeedback, WalSndKeepAlive, XLogDataBody};
+use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody};
 use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
 use tokio::io::{AsyncRead, AsyncWrite};
+use utils::id::TenantTimelineId;
+use utils::lsn::AtomicLsn;
+use utils::pageserver_feedback::PageserverFeedback;

-use std::cmp::min;
+use std::cmp::{max, min};
+use std::net::SocketAddr;
 use std::str;
 use std::sync::Arc;
 use std::time::Duration;
@@ -40,6 +47,8 @@ pub struct HotStandbyFeedback {
    pub catalog_xmin: FullTransactionId,
 }

+const INVALID_FULL_TRANSACTION_ID: FullTransactionId = 0;
+
 impl HotStandbyFeedback {
    pub fn empty() -> HotStandbyFeedback {
        HotStandbyFeedback {
@@ -51,24 +60,294 @@ impl HotStandbyFeedback {
 }

 /// Standby status update
-#[derive(Debug, Clone, Deserialize)]
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
 pub struct StandbyReply {
-    pub write_lsn: Lsn, // last lsn received by pageserver
-    pub flush_lsn: Lsn, // pageserver's disk consistent lSN
-    pub apply_lsn: Lsn, // pageserver's remote consistent lSN
-    pub reply_ts: TimestampTz,
+    pub write_lsn: Lsn, // The location of the last WAL byte + 1 received and written to disk in the standby.
+    pub flush_lsn: Lsn, // The location of the last WAL byte + 1 flushed to disk in the standby.
+    pub apply_lsn: Lsn, // The location of the last WAL byte + 1 applied in the standby.
+    pub reply_ts: TimestampTz, // The client's system clock at the time of transmission, as microseconds since midnight on 2000-01-01.
    pub reply_requested: bool,
 }

-/// Scope guard to unregister replication connection from timeline
-struct ReplicationConnGuard {
-    replica: usize, // replica internal ID assigned by timeline
-    timeline: Arc<Timeline>,
+impl StandbyReply {
+    fn empty() -> Self {
+        StandbyReply {
+            write_lsn: Lsn::INVALID,
+            flush_lsn: Lsn::INVALID,
+            apply_lsn: Lsn::INVALID,
+            reply_ts: 0,
+            reply_requested: false,
+        }
+    }
 }

-impl Drop for ReplicationConnGuard {
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+pub struct StandbyFeedback {
+    reply: StandbyReply,
+    hs_feedback: HotStandbyFeedback,
+}
+
+/// WalSenders registry. Timeline holds it (wrapped in Arc).
+pub struct WalSenders {
+    /// Lsn maximized over all walsenders *and* peer data, so might be higher
+    /// than what we receive from replicas.
+    remote_consistent_lsn: AtomicLsn,
+    mutex: Mutex<WalSendersShared>,
+}
+
+impl WalSenders {
+    pub fn new(remote_consistent_lsn: Lsn) -> Arc<WalSenders> {
+        Arc::new(WalSenders {
+            remote_consistent_lsn: AtomicLsn::from(remote_consistent_lsn),
+            mutex: Mutex::new(WalSendersShared::new()),
+        })
+    }
+
+    /// Register new walsender. Returned guard provides access to the slot and
+    /// automatically deregisters in Drop.
+    fn register(
+        self: &Arc<WalSenders>,
+        ttid: TenantTimelineId,
+        addr: SocketAddr,
+        conn_id: ConnectionId,
+        appname: Option<String>,
+    ) -> WalSenderGuard {
+        let slots = &mut self.mutex.lock().slots;
+        let walsender_state = WalSenderState {
+            ttid,
+            addr,
+            conn_id,
+            appname,
+            feedback: ReplicationFeedback::Pageserver(PageserverFeedback::empty()),
+        };
+        // find empty slot or create new one
+        let pos = if let Some(pos) = slots.iter().position(|s| s.is_none()) {
+            slots[pos] = Some(walsender_state);
+            pos
+        } else {
+            let pos = slots.len();
+            slots.push(Some(walsender_state));
+            pos
+        };
+        WalSenderGuard {
+            id: pos,
+            walsenders: self.clone(),
+        }
+    }
+
+    /// Get state of all walsenders.
+    pub fn get_all(self: &Arc<WalSenders>) -> Vec<WalSenderState> {
+        self.mutex.lock().slots.iter().flatten().cloned().collect()
+    }
+
+    /// Get aggregated pageserver feedback.
+    pub fn get_ps_feedback(self: &Arc<WalSenders>) -> PageserverFeedback {
+        self.mutex.lock().agg_ps_feedback
+    }
+
+    /// Get aggregated pageserver and hot standby feedback (we send them to compute).
+    pub fn get_feedbacks(self: &Arc<WalSenders>) -> (PageserverFeedback, HotStandbyFeedback) {
+        let shared = self.mutex.lock();
+        (shared.agg_ps_feedback, shared.agg_hs_feedback)
+    }
+
+    /// Record new pageserver feedback, update aggregated values.
+    fn record_ps_feedback(self: &Arc<WalSenders>, id: WalSenderId, feedback: &PageserverFeedback) {
+        let mut shared = self.mutex.lock();
+        shared.get_slot_mut(id).feedback = ReplicationFeedback::Pageserver(*feedback);
+        shared.update_ps_feedback();
+        self.update_remote_consistent_lsn(shared.agg_ps_feedback.remote_consistent_lsn);
+    }
+
+    /// Record standby reply.
+    fn record_standby_reply(self: &Arc<WalSenders>, id: WalSenderId, reply: &StandbyReply) {
+        let mut shared = self.mutex.lock();
+        let slot = shared.get_slot_mut(id);
+        match &mut slot.feedback {
+            ReplicationFeedback::Standby(sf) => sf.reply = *reply,
+            ReplicationFeedback::Pageserver(_) => {
+                slot.feedback = ReplicationFeedback::Standby(StandbyFeedback {
+                    reply: *reply,
+                    hs_feedback: HotStandbyFeedback::empty(),
+                })
+            }
+        }
+    }
+
+    /// Record hot standby feedback, update aggregated value.
+    fn record_hs_feedback(self: &Arc<WalSenders>, id: WalSenderId, feedback: &HotStandbyFeedback) {
+        let mut shared = self.mutex.lock();
+        let slot = shared.get_slot_mut(id);
+        match &mut slot.feedback {
+            ReplicationFeedback::Standby(sf) => sf.hs_feedback = *feedback,
+            ReplicationFeedback::Pageserver(_) => {
+                slot.feedback = ReplicationFeedback::Standby(StandbyFeedback {
+                    reply: StandbyReply::empty(),
+                    hs_feedback: *feedback,
+                })
+            }
+        }
+        shared.update_hs_feedback();
+    }
+
+    /// Get remote_consistent_lsn reported by the pageserver. Returns None if
+    /// client is not pageserver.
+    fn get_ws_remote_consistent_lsn(self: &Arc<WalSenders>, id: WalSenderId) -> Option<Lsn> {
+        let shared = self.mutex.lock();
+        let slot = shared.get_slot(id);
+        match slot.feedback {
+            ReplicationFeedback::Pageserver(feedback) => Some(feedback.remote_consistent_lsn),
+            _ => None,
+        }
+    }
+
+    /// Get remote_consistent_lsn maximized across all walsenders and peers.
+    pub fn get_remote_consistent_lsn(self: &Arc<WalSenders>) -> Lsn {
+        self.remote_consistent_lsn.load()
+    }
+
+    /// Update maximized remote_consistent_lsn, return new (potentially) value.
+    pub fn update_remote_consistent_lsn(self: &Arc<WalSenders>, candidate: Lsn) -> Lsn {
+        self.remote_consistent_lsn
+            .fetch_max(candidate)
+            .max(candidate)
+    }
+
+    /// Unregister walsender.
+    fn unregister(self: &Arc<WalSenders>, id: WalSenderId) {
+        let mut shared = self.mutex.lock();
+        shared.slots[id] = None;
+        shared.update_hs_feedback();
+    }
+}
+
+struct WalSendersShared {
+    // aggregated over all walsenders value
+    agg_hs_feedback: HotStandbyFeedback,
+    // aggregated over all walsenders value
+    agg_ps_feedback: PageserverFeedback,
+    slots: Vec<Option<WalSenderState>>,
+}
+
+impl WalSendersShared {
+    fn new() -> Self {
+        WalSendersShared {
+            agg_hs_feedback: HotStandbyFeedback::empty(),
+            agg_ps_feedback: PageserverFeedback::empty(),
+            slots: Vec::new(),
+        }
+    }
+
+    /// Get content of provided id slot, it must exist.
+    fn get_slot(&self, id: WalSenderId) -> &WalSenderState {
+        self.slots[id].as_ref().expect("walsender doesn't exist")
+    }
+
+    /// Get mut content of provided id slot, it must exist.
+    fn get_slot_mut(&mut self, id: WalSenderId) -> &mut WalSenderState {
+        self.slots[id].as_mut().expect("walsender doesn't exist")
+    }
+
+    /// Update aggregated hot standy feedback. We just take min of valid xmins
+    /// and ts.
+    fn update_hs_feedback(&mut self) {
+        let mut agg = HotStandbyFeedback::empty();
+        for ws_state in self.slots.iter().flatten() {
+            if let ReplicationFeedback::Standby(standby_feedback) = ws_state.feedback {
+                let hs_feedback = standby_feedback.hs_feedback;
+                // doing Option math like op1.iter().chain(op2.iter()).min()
+                // would be nicer, but we serialize/deserialize this struct
+                // directly, so leave as is for now
+                if hs_feedback.xmin != INVALID_FULL_TRANSACTION_ID {
+                    if agg.xmin != INVALID_FULL_TRANSACTION_ID {
+                        agg.xmin = min(agg.xmin, hs_feedback.xmin);
+                    } else {
+                        agg.xmin = hs_feedback.xmin;
+                    }
+                    agg.ts = min(agg.ts, hs_feedback.ts);
+                }
+                if hs_feedback.catalog_xmin != INVALID_FULL_TRANSACTION_ID {
+                    if agg.catalog_xmin != INVALID_FULL_TRANSACTION_ID {
+                        agg.catalog_xmin = min(agg.catalog_xmin, hs_feedback.catalog_xmin);
+                    } else {
+                        agg.catalog_xmin = hs_feedback.catalog_xmin;
+                    }
+                    agg.ts = min(agg.ts, hs_feedback.ts);
+                }
+            }
+        }
+        self.agg_hs_feedback = agg;
+    }
+
+    /// Update aggregated pageserver feedback. LSNs (last_received,
+    /// disk_consistent, remote_consistent) and reply timestamp are just
+    /// maximized; timeline_size if taken from feedback with highest
+    /// last_received lsn. This is generally reasonable, but we might want to
+    /// implement other policies once multiple pageservers start to be actively
+    /// used.
+    fn update_ps_feedback(&mut self) {
+        let init = PageserverFeedback::empty();
+        let acc =
+            self.slots
+                .iter()
+                .flatten()
+                .fold(init, |mut acc, ws_state| match ws_state.feedback {
+                    ReplicationFeedback::Pageserver(feedback) => {
+                        if feedback.last_received_lsn > acc.last_received_lsn {
+                            acc.current_timeline_size = feedback.current_timeline_size;
+                        }
+                        acc.last_received_lsn =
+                            max(feedback.last_received_lsn, acc.last_received_lsn);
+                        acc.disk_consistent_lsn =
+                            max(feedback.disk_consistent_lsn, acc.disk_consistent_lsn);
+                        acc.remote_consistent_lsn =
+                            max(feedback.remote_consistent_lsn, acc.remote_consistent_lsn);
+                        acc.replytime = max(feedback.replytime, acc.replytime);
+                        acc
+                    }
+                    ReplicationFeedback::Standby(_) => acc,
+                });
+        self.agg_ps_feedback = acc;
+    }
+}
+
+// Serialized is used only for pretty printing in json.
+#[serde_as]
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WalSenderState {
+    #[serde_as(as = "DisplayFromStr")]
+    ttid: TenantTimelineId,
+    addr: SocketAddr,
+    conn_id: ConnectionId,
+    // postgres application_name
+    appname: Option<String>,
+    feedback: ReplicationFeedback,
+}
+
+// Receiver is either pageserver or regular standby, which have different
+// feedbacks.
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+enum ReplicationFeedback {
+    Pageserver(PageserverFeedback),
+    Standby(StandbyFeedback),
+}
+
+// id of the occupied slot in WalSenders to access it (and save in the
+// WalSenderGuard). We could give Arc directly to the slot, but there is not
+// much sense in that as values aggregation which is performed on each feedback
+// receival iterates over all walsenders.
+pub type WalSenderId = usize;
+
+/// Scope guard to access slot in WalSenders registry and unregister from it in
+/// Drop.
+pub struct WalSenderGuard {
+    id: WalSenderId,
+    walsenders: Arc<WalSenders>,
+}
+
+impl Drop for WalSenderGuard {
    fn drop(&mut self) {
-        self.timeline.remove_replica(self.replica);
+        self.walsenders.unregister(self.id);
    }
 }

@@ -97,16 +376,13 @@ impl SafekeeperPostgresHandler {
        let tli =
            GlobalTimelines::get(self.ttid).map_err(|e| CopyStreamHandlerEnd::Other(e.into()))?;

-        let state = ReplicaState::new();
-        // This replica_id is used below to check if it's time to stop replication.
-        let replica_id = tli.add_replica(state);
-
-        // Use a guard object to remove our entry from the timeline, when the background
-        // thread and us have both finished using it.
-        let _guard = Arc::new(ReplicationConnGuard {
-            replica: replica_id,
-            timeline: tli.clone(),
-        });
+        // Use a guard object to remove our entry from the timeline when we are done.
+        let ws_guard = Arc::new(tli.get_walsenders().register(
+            self.ttid,
+            *pgb.get_peer_addr(),
+            self.conn_id,
+            self.appname.clone(),
+        ));

        // Walproposer gets special handling: safekeeper must give proposer all
        // local WAL till the end, whether committed or not (walproposer will
@@ -154,16 +430,11 @@ impl SafekeeperPostgresHandler {
            end_pos,
            stop_pos,
            commit_lsn_watch_rx: tli.get_commit_lsn_watch_rx(),
-            replica_id,
+            ws_guard: ws_guard.clone(),
            wal_reader,
            send_buf: [0; MAX_SEND_SIZE],
        };
-        let mut reply_reader = ReplyReader {
-            reader,
-            tli,
-            replica_id,
-            feedback: ReplicaState::new(),
-        };
+        let mut reply_reader = ReplyReader { reader, ws_guard };

        let res = tokio::select! {
            // todo: add read|write .context to these errors
@@ -190,7 +461,7 @@ struct WalSender<'a, IO> {
    // in recovery.
    stop_pos: Option<Lsn>,
    commit_lsn_watch_rx: Receiver<Lsn>,
-    replica_id: usize,
+    ws_guard: Arc<WalSenderGuard>,
    wal_reader: WalReader,
    // buffer for readling WAL into to send it
    send_buf: [u8; MAX_SEND_SIZE],
@@ -264,14 +535,20 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
                return Ok(());
            }
            // Timed out waiting for WAL, check for termination and send KA
-            if self.tli.should_walsender_stop(self.replica_id) {
-                // Terminate if there is nothing more to send.
-                // TODO close the stream properly
-                return Err(CopyStreamHandlerEnd::ServerInitiated(format!(
-                    "ending streaming to {:?} at {}, receiver is caughtup and there is no computes",
-                    self.appname, self.start_pos,
-                )));
+            if let Some(remote_consistent_lsn) = self
+                .ws_guard
+                .walsenders
+                .get_ws_remote_consistent_lsn(self.ws_guard.id)
+            {
+                if self.tli.should_walsender_stop(remote_consistent_lsn) {
+                    // Terminate if there is nothing more to send.
+                    return Err(CopyStreamHandlerEnd::ServerInitiated(format!(
+                        "ending streaming to {:?} at {}, receiver is caughtup and there is no computes",
+                        self.appname, self.start_pos,
+                    )));
+                }
            }
+
            self.pgb
                .write_message(&BeMessage::KeepAlive(WalSndKeepAlive {
                    sent_ptr: self.end_pos.0,
@@ -286,9 +563,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> WalSender<'_, IO> {
 /// A half driving receiving replies.
 struct ReplyReader<IO> {
    reader: PostgresBackendReader<IO>,
-    tli: Arc<Timeline>,
-    replica_id: usize,
-    feedback: ReplicaState,
+    ws_guard: Arc<WalSenderGuard>,
 }

 impl<IO: AsyncRead + AsyncWrite + Unpin> ReplyReader<IO> {
@@ -303,29 +578,32 @@ impl<IO: AsyncRead + AsyncWrite + Unpin> ReplyReader<IO> {
        match msg.first().cloned() {
            Some(HOT_STANDBY_FEEDBACK_TAG_BYTE) => {
                // Note: deserializing is on m[1..] because we skip the tag byte.
-                self.feedback.hs_feedback = HotStandbyFeedback::des(&msg[1..])
+                let hs_feedback = HotStandbyFeedback::des(&msg[1..])
                    .context("failed to deserialize HotStandbyFeedback")?;
-                self.tli
-                    .update_replica_state(self.replica_id, self.feedback);
+                self.ws_guard
+                    .walsenders
+                    .record_hs_feedback(self.ws_guard.id, &hs_feedback);
            }
            Some(STANDBY_STATUS_UPDATE_TAG_BYTE) => {
-                let _reply =
+                let reply =
                    StandbyReply::des(&msg[1..]).context("failed to deserialize StandbyReply")?;
-                // This must be a regular postgres replica,
-                // because pageserver doesn't send this type of messages to safekeeper.
-                // Currently we just ignore this, tracking progress for them is not supported.
+                self.ws_guard
+                    .walsenders
+                    .record_standby_reply(self.ws_guard.id, &reply);
            }
            Some(NEON_STATUS_UPDATE_TAG_BYTE) => {
                // pageserver sends this.
                // Note: deserializing is on m[9..] because we skip the tag byte and len bytes.
                let buf = Bytes::copy_from_slice(&msg[9..]);
-                let reply = PageserverFeedback::parse(buf);
+                let ps_feedback = PageserverFeedback::parse(buf);

-                trace!("PageserverFeedback is {:?}", reply);
-                self.feedback.pageserver_feedback = Some(reply);
-
-                self.tli
-                    .update_replica_state(self.replica_id, self.feedback);
+                trace!("PageserverFeedback is {:?}", ps_feedback);
+                self.ws_guard
+                    .walsenders
+                    .record_ps_feedback(self.ws_guard.id, &ps_feedback);
+                // in principle new remote_consistent_lsn could allow to
+                // deactivate the timeline, but we check that regularly through
+                // broker updated, not need to do it here
            }
            _ => warn!("unexpected message {:?}", msg),
        }
@@ -368,3 +646,89 @@ async fn wait_for_lsn(rx: &mut Receiver<Lsn>, lsn: Lsn) -> anyhow::Result<Option
        Err(_) => Ok(None),
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use postgres_protocol::PG_EPOCH;
+    use utils::id::{TenantId, TimelineId};
+
+    use super::*;
+
+    fn mock_ttid() -> TenantTimelineId {
+        TenantTimelineId {
+            tenant_id: TenantId::from_slice(&[0x00; 16]).unwrap(),
+            timeline_id: TimelineId::from_slice(&[0x00; 16]).unwrap(),
+        }
+    }
+
+    fn mock_addr() -> SocketAddr {
+        "127.0.0.1:8080".parse().unwrap()
+    }
+
+    // add to wss specified feedback setting other fields to dummy values
+    fn push_feedback(wss: &mut WalSendersShared, feedback: ReplicationFeedback) {
+        let walsender_state = WalSenderState {
+            ttid: mock_ttid(),
+            addr: mock_addr(),
+            conn_id: 1,
+            appname: None,
+            feedback,
+        };
+        wss.slots.push(Some(walsender_state))
+    }
+
+    // form standby feedback with given hot standby feedback ts/xmin and the
+    // rest set to dummy values.
+    fn hs_feedback(ts: TimestampTz, xmin: FullTransactionId) -> ReplicationFeedback {
+        ReplicationFeedback::Standby(StandbyFeedback {
+            reply: StandbyReply::empty(),
+            hs_feedback: HotStandbyFeedback {
+                ts,
+                xmin,
+                catalog_xmin: 0,
+            },
+        })
+    }
+
+    // test that hs aggregation works as expected
+    #[test]
+    fn test_hs_feedback_no_valid() {
+        let mut wss = WalSendersShared::new();
+        push_feedback(&mut wss, hs_feedback(1, INVALID_FULL_TRANSACTION_ID));
+        wss.update_hs_feedback();
+        assert_eq!(wss.agg_hs_feedback.xmin, INVALID_FULL_TRANSACTION_ID);
+    }
+
+    #[test]
+    fn test_hs_feedback() {
+        let mut wss = WalSendersShared::new();
+        push_feedback(&mut wss, hs_feedback(1, INVALID_FULL_TRANSACTION_ID));
+        push_feedback(&mut wss, hs_feedback(1, 42));
+        push_feedback(&mut wss, hs_feedback(1, 64));
+        wss.update_hs_feedback();
+        assert_eq!(wss.agg_hs_feedback.xmin, 42);
+    }
+
+    // form pageserver feedback with given last_record_lsn / tli size and the
+    // rest set to dummy values.
+    fn ps_feedback(current_timeline_size: u64, last_received_lsn: Lsn) -> ReplicationFeedback {
+        ReplicationFeedback::Pageserver(PageserverFeedback {
+            current_timeline_size,
+            last_received_lsn,
+            disk_consistent_lsn: Lsn::INVALID,
+            remote_consistent_lsn: Lsn::INVALID,
+            replytime: *PG_EPOCH,
+        })
+    }
+
+    // test that ps aggregation works as expected
+    #[test]
+    fn test_ps_feedback() {
+        let mut wss = WalSendersShared::new();
+        push_feedback(&mut wss, ps_feedback(8, Lsn(42)));
+        push_feedback(&mut wss, ps_feedback(4, Lsn(84)));
+        wss.update_ps_feedback();
+        assert_eq!(wss.agg_ps_feedback.current_timeline_size, 4);
+        assert_eq!(wss.agg_ps_feedback.last_received_lsn, Lsn(84));
+    }
+}
--- a/safekeeper/src/timeline.rs
+++ b/safekeeper/src/timeline.rs
@@ -4,10 +4,10 @@
 use anyhow::{anyhow, bail, Result};
 use parking_lot::{Mutex, MutexGuard};
 use postgres_ffi::XLogSegNo;
-use pq_proto::PageserverFeedback;
-use serde::Serialize;
-use std::cmp::{max, min};
+
+use std::cmp::max;
 use std::path::PathBuf;
+use std::sync::Arc;
 use tokio::{
    sync::{mpsc::Sender, watch},
    time::Instant,
@@ -26,7 +26,7 @@ use crate::safekeeper::{
    AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, SafeKeeperState,
    SafekeeperMemState, ServerInfo, Term,
 };
-use crate::send_wal::HotStandbyFeedback;
+use crate::send_wal::WalSenders;
 use crate::{control_file, safekeeper::UNKNOWN_SERVER_VERSION};

 use crate::metrics::FullTimelineInfo;
@@ -81,48 +81,12 @@ impl PeersInfo {
    }
 }

-/// Replica status update + hot standby feedback
-#[derive(Debug, Clone, Copy, Serialize)]
-pub struct ReplicaState {
-    /// last known lsn received by replica
-    pub last_received_lsn: Lsn, // None means we don't know
-    /// combined remote consistent lsn of pageservers
-    pub remote_consistent_lsn: Lsn,
-    /// combined hot standby feedback from all replicas
-    pub hs_feedback: HotStandbyFeedback,
-    /// Replication specific feedback received from pageserver, if any
-    pub pageserver_feedback: Option<PageserverFeedback>,
-}
-
-impl Default for ReplicaState {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl ReplicaState {
-    pub fn new() -> ReplicaState {
-        ReplicaState {
-            last_received_lsn: Lsn::MAX,
-            remote_consistent_lsn: Lsn(0),
-            hs_feedback: HotStandbyFeedback {
-                ts: 0,
-                xmin: u64::MAX,
-                catalog_xmin: u64::MAX,
-            },
-            pageserver_feedback: None,
-        }
-    }
-}
-
 /// Shared state associated with database instance
 pub struct SharedState {
    /// Safekeeper object
    sk: SafeKeeper<control_file::FileStorage, wal_storage::PhysicalStorage>,
    /// In memory list containing state of peers sent in latest messages from them.
    peers_info: PeersInfo,
-    /// State of replicas
-    replicas: Vec<Option<ReplicaState>>,
    /// True when WAL backup launcher oversees the timeline, making sure WAL is
    /// offloaded, allows to bother launcher less.
    wal_backup_active: bool,
@@ -165,13 +129,13 @@ impl SharedState {
        // We don't want to write anything to disk, because we may have existing timeline there.
        // These functions should not change anything on disk.
        let control_store = control_file::FileStorage::create_new(ttid, conf, state)?;
-        let wal_store = wal_storage::PhysicalStorage::new(ttid, conf, &control_store)?;
+        let wal_store =
+            wal_storage::PhysicalStorage::new(ttid, conf.timeline_dir(ttid), conf, &control_store)?;
        let sk = SafeKeeper::new(control_store, wal_store, conf.my_id)?;

        Ok(Self {
            sk,
            peers_info: PeersInfo(vec![]),
-            replicas: vec![],
            wal_backup_active: false,
            active: false,
            num_computes: 0,
@@ -186,12 +150,12 @@ impl SharedState {
            bail!(TimelineError::UninitializedWalSegSize(*ttid));
        }

-        let wal_store = wal_storage::PhysicalStorage::new(ttid, conf, &control_store)?;
+        let wal_store =
+            wal_storage::PhysicalStorage::new(ttid, conf.timeline_dir(ttid), conf, &control_store)?;

        Ok(Self {
            sk: SafeKeeper::new(control_store, wal_store, conf.my_id)?,
            peers_info: PeersInfo(vec![]),
-            replicas: Vec::new(),
            wal_backup_active: false,
            active: false,
            num_computes: 0,
@@ -199,17 +163,17 @@ impl SharedState {
        })
    }

-    fn is_active(&self) -> bool {
+    fn is_active(&self, remote_consistent_lsn: Lsn) -> bool {
        self.is_wal_backup_required()
            // FIXME: add tracking of relevant pageservers and check them here individually,
            // otherwise migration won't work (we suspend too early).
-            || self.sk.inmem.remote_consistent_lsn < self.sk.inmem.commit_lsn
+            || remote_consistent_lsn < self.sk.inmem.commit_lsn
    }

    /// Mark timeline active/inactive and return whether s3 offloading requires
    /// start/stop action.
-    fn update_status(&mut self, ttid: TenantTimelineId) -> bool {
-        let is_active = self.is_active();
+    fn update_status(&mut self, remote_consistent_lsn: Lsn, ttid: TenantTimelineId) -> bool {
+        let is_active = self.is_active(remote_consistent_lsn);
        if self.active != is_active {
            info!("timeline {} active={} now", ttid, is_active);
        }
@@ -254,68 +218,11 @@ impl SharedState {
        self.sk.state.server.wal_seg_size as usize
    }

-    /// Get combined state of all alive replicas
-    pub fn get_replicas_state(&self) -> ReplicaState {
-        let mut acc = ReplicaState::new();
-        for state in self.replicas.iter().flatten() {
-            acc.hs_feedback.ts = max(acc.hs_feedback.ts, state.hs_feedback.ts);
-            acc.hs_feedback.xmin = min(acc.hs_feedback.xmin, state.hs_feedback.xmin);
-            acc.hs_feedback.catalog_xmin =
-                min(acc.hs_feedback.catalog_xmin, state.hs_feedback.catalog_xmin);
-
-            // FIXME
-            // If multiple pageservers are streaming WAL and send feedback for the same timeline simultaneously,
-            // this code is not correct.
-            // Now the most advanced feedback is used.
-            // If one pageserver lags when another doesn't, the backpressure won't be activated on compute and lagging
-            // pageserver is prone to timeout errors.
-            //
-            // To choose what feedback to use and resend to compute node,
-            // we need to know which pageserver compute node considers to be main.
-            // See https://github.com/neondatabase/neon/issues/1171
-            //
-            if let Some(pageserver_feedback) = state.pageserver_feedback {
-                if let Some(acc_feedback) = acc.pageserver_feedback {
-                    if acc_feedback.last_received_lsn < pageserver_feedback.last_received_lsn {
-                        warn!("More than one pageserver is streaming WAL for the timeline. Feedback resolving is not fully supported yet.");
-                        acc.pageserver_feedback = Some(pageserver_feedback);
-                    }
-                } else {
-                    acc.pageserver_feedback = Some(pageserver_feedback);
-                }
-
-                // last lsn received by pageserver
-                // FIXME if multiple pageservers are streaming WAL, last_received_lsn must be tracked per pageserver.
-                // See https://github.com/neondatabase/neon/issues/1171
-                acc.last_received_lsn = Lsn::from(pageserver_feedback.last_received_lsn);
-
-                // When at least one pageserver has preserved data up to remote_consistent_lsn,
-                // safekeeper is free to delete it, so choose max of all pageservers.
-                acc.remote_consistent_lsn = max(
-                    Lsn::from(pageserver_feedback.remote_consistent_lsn),
-                    acc.remote_consistent_lsn,
-                );
-            }
-        }
-        acc
-    }
-
-    /// Assign new replica ID. We choose first empty cell in the replicas vector
-    /// or extend the vector if there are no free slots.
-    pub fn add_replica(&mut self, state: ReplicaState) -> usize {
-        if let Some(pos) = self.replicas.iter().position(|r| r.is_none()) {
-            self.replicas[pos] = Some(state);
-            return pos;
-        }
-        let pos = self.replicas.len();
-        self.replicas.push(Some(state));
-        pos
-    }
-
    fn get_safekeeper_info(
        &self,
        ttid: &TenantTimelineId,
        conf: &SafeKeeperConf,
+        remote_consistent_lsn: Lsn,
    ) -> SafekeeperTimelineInfo {
        SafekeeperTimelineInfo {
            safekeeper_id: conf.my_id.0,
@@ -328,11 +235,7 @@ impl SharedState {
            // note: this value is not flushed to control file yet and can be lost
            commit_lsn: self.sk.inmem.commit_lsn.0,
            // TODO: rework feedbacks to avoid max here
-            remote_consistent_lsn: max(
-                self.get_replicas_state().remote_consistent_lsn,
-                self.sk.inmem.remote_consistent_lsn,
-            )
-            .0,
+            remote_consistent_lsn: remote_consistent_lsn.0,
            peer_horizon_lsn: self.sk.inmem.peer_horizon_lsn.0,
            safekeeper_connstr: conf.listen_pg_addr.clone(),
            backup_lsn: self.sk.inmem.backup_lsn.0,
@@ -387,6 +290,7 @@ pub struct Timeline {
    /// Safekeeper and other state, that should remain consistent and synchronized
    /// with the disk.
    mutex: Mutex<SharedState>,
+    walsenders: Arc<WalSenders>,

    /// Cancellation channel. Delete/cancel will send `true` here as a cancellation signal.
    cancellation_tx: watch::Sender<bool>,
@@ -409,6 +313,7 @@ impl Timeline {
        let _enter = info_span!("load_timeline", timeline = %ttid.timeline_id).entered();

        let shared_state = SharedState::restore(&conf, &ttid)?;
+        let rcl = shared_state.sk.state.remote_consistent_lsn;
        let (commit_lsn_watch_tx, commit_lsn_watch_rx) =
            watch::channel(shared_state.sk.state.commit_lsn);
        let (cancellation_tx, cancellation_rx) = watch::channel(false);
@@ -419,6 +324,7 @@ impl Timeline {
            commit_lsn_watch_tx,
            commit_lsn_watch_rx,
            mutex: Mutex::new(shared_state),
+            walsenders: WalSenders::new(rcl),
            cancellation_rx,
            cancellation_tx,
            timeline_dir: conf.timeline_dir(&ttid),
@@ -444,6 +350,7 @@ impl Timeline {
            commit_lsn_watch_tx,
            commit_lsn_watch_rx,
            mutex: Mutex::new(SharedState::create_new(&conf, &ttid, state)?),
+            walsenders: WalSenders::new(Lsn(0)),
            cancellation_rx,
            cancellation_tx,
            timeline_dir: conf.timeline_dir(&ttid),
@@ -475,7 +382,7 @@ impl Timeline {
        match || -> Result<()> {
            shared_state.sk.persist()?;
            // TODO: add more initialization steps here
-            shared_state.update_status(self.ttid);
+            self.update_status(shared_state);
            Ok(())
        }() {
            Ok(_) => Ok(()),
@@ -531,6 +438,10 @@ impl Timeline {
        self.mutex.lock()
    }

+    fn update_status(&self, shared_state: &mut SharedState) -> bool {
+        shared_state.update_status(self.get_walsenders().get_remote_consistent_lsn(), self.ttid)
+    }
+
    /// Register compute connection, starting timeline-related activity if it is
    /// not running yet.
    pub async fn on_compute_connect(&self) -> Result<()> {
@@ -542,7 +453,7 @@ impl Timeline {
        {
            let mut shared_state = self.write_shared_state();
            shared_state.num_computes += 1;
-            is_wal_backup_action_pending = shared_state.update_status(self.ttid);
+            is_wal_backup_action_pending = self.update_status(&mut shared_state);
        }
        // Wake up wal backup launcher, if offloading not started yet.
        if is_wal_backup_action_pending {
@@ -559,7 +470,7 @@ impl Timeline {
        {
            let mut shared_state = self.write_shared_state();
            shared_state.num_computes -= 1;
-            is_wal_backup_action_pending = shared_state.update_status(self.ttid);
+            is_wal_backup_action_pending = self.update_status(&mut shared_state);
        }
        // Wake up wal backup launcher, if it is time to stop the offloading.
        if is_wal_backup_action_pending {
@@ -574,26 +485,19 @@ impl Timeline {
        Ok(())
    }

-    /// Returns true if walsender should stop sending WAL to pageserver.
-    /// TODO: check this pageserver is actually interested in this timeline.
-    pub fn should_walsender_stop(&self, replica_id: usize) -> bool {
+    /// Returns true if walsender should stop sending WAL to pageserver. We
+    /// terminate it if remote_consistent_lsn reached commit_lsn and there is no
+    /// computes. While there might be nothing to stream already, we learn about
+    /// remote_consistent_lsn update through replication feedback, and we want
+    /// to stop pushing to the broker if pageserver is fully caughtup.
+    pub fn should_walsender_stop(&self, reported_remote_consistent_lsn: Lsn) -> bool {
        if self.is_cancelled() {
            return true;
        }
-        let mut shared_state = self.write_shared_state();
+        let shared_state = self.write_shared_state();
        if shared_state.num_computes == 0 {
-            let replica_state = shared_state.replicas[replica_id].unwrap();
-            let reported_remote_consistent_lsn = replica_state
-                .pageserver_feedback
-                .map(|f| Lsn(f.remote_consistent_lsn))
-                .unwrap_or(Lsn::INVALID);
-            let stop = shared_state.sk.inmem.commit_lsn == Lsn(0) || // no data at all yet
-            (reported_remote_consistent_lsn!= Lsn::MAX && // Lsn::MAX means that we don't know the latest LSN yet.
-            reported_remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn);
-            if stop {
-                shared_state.update_status(self.ttid);
-                return true;
-            }
+            return shared_state.sk.inmem.commit_lsn == Lsn(0) || // no data at all yet
+            reported_remote_consistent_lsn >= shared_state.sk.inmem.commit_lsn;
        }
        false
    }
@@ -628,13 +532,12 @@ impl Timeline {
            let mut shared_state = self.write_shared_state();
            rmsg = shared_state.sk.process_msg(msg)?;

-            // if this is AppendResponse, fill in proper hot standby feedback and disk consistent lsn
+            // if this is AppendResponse, fill in proper pageserver and hot
+            // standby feedback.
            if let Some(AcceptorProposerMessage::AppendResponse(ref mut resp)) = rmsg {
-                let state = shared_state.get_replicas_state();
-                resp.hs_feedback = state.hs_feedback;
-                if let Some(pageserver_feedback) = state.pageserver_feedback {
-                    resp.pageserver_feedback = pageserver_feedback;
-                }
+                let (ps_feedback, hs_feedback) = self.walsenders.get_feedbacks();
+                resp.hs_feedback = hs_feedback;
+                resp.pageserver_feedback = ps_feedback;
            }

            commit_lsn = shared_state.sk.inmem.commit_lsn;
@@ -684,19 +587,29 @@ impl Timeline {
    /// Get safekeeper info for broadcasting to broker and other peers.
    pub fn get_safekeeper_info(&self, conf: &SafeKeeperConf) -> SafekeeperTimelineInfo {
        let shared_state = self.write_shared_state();
-        shared_state.get_safekeeper_info(&self.ttid, conf)
+        shared_state.get_safekeeper_info(
+            &self.ttid,
+            conf,
+            self.walsenders.get_remote_consistent_lsn(),
+        )
    }

    /// Update timeline state with peer safekeeper data.
-    pub async fn record_safekeeper_info(&self, sk_info: &SafekeeperTimelineInfo) -> Result<()> {
+    pub async fn record_safekeeper_info(&self, mut sk_info: SafekeeperTimelineInfo) -> Result<()> {
+        // Update local remote_consistent_lsn in memory (in .walsenders) and in
+        // sk_info to pass it down to control file.
+        sk_info.remote_consistent_lsn = self
+            .walsenders
+            .update_remote_consistent_lsn(Lsn(sk_info.remote_consistent_lsn))
+            .0;
        let is_wal_backup_action_pending: bool;
        let commit_lsn: Lsn;
        {
            let mut shared_state = self.write_shared_state();
-            shared_state.sk.record_safekeeper_info(sk_info)?;
-            let peer_info = PeerInfo::from_sk_info(sk_info, Instant::now());
+            shared_state.sk.record_safekeeper_info(&sk_info)?;
+            let peer_info = PeerInfo::from_sk_info(&sk_info, Instant::now());
            shared_state.peers_info.upsert(&peer_info);
-            is_wal_backup_action_pending = shared_state.update_status(self.ttid);
+            is_wal_backup_action_pending = self.update_status(&mut shared_state);
            commit_lsn = shared_state.sk.inmem.commit_lsn;
        }
        self.commit_lsn_watch_tx.send(commit_lsn)?;
@@ -723,22 +636,8 @@ impl Timeline {
            .collect()
    }

-    /// Add send_wal replica to the in-memory vector of replicas.
-    pub fn add_replica(&self, state: ReplicaState) -> usize {
-        self.write_shared_state().add_replica(state)
-    }
-
-    /// Update replication replica state.
-    pub fn update_replica_state(&self, id: usize, state: ReplicaState) {
-        let mut shared_state = self.write_shared_state();
-        shared_state.replicas[id] = Some(state);
-    }
-
-    /// Remove send_wal replica from the in-memory vector of replicas.
-    pub fn remove_replica(&self, id: usize) {
-        let mut shared_state = self.write_shared_state();
-        assert!(shared_state.replicas[id].is_some());
-        shared_state.replicas[id] = None;
+    pub fn get_walsenders(&self) -> &Arc<WalSenders> {
+        &self.walsenders
    }

    /// Returns flush_lsn.
@@ -781,16 +680,12 @@ impl Timeline {
            return None;
        }

+        let ps_feedback = self.walsenders.get_ps_feedback();
        let state = self.write_shared_state();
        if state.active {
            Some(FullTimelineInfo {
                ttid: self.ttid,
-                replicas: state
-                    .replicas
-                    .iter()
-                    .filter_map(|r| r.as_ref())
-                    .copied()
-                    .collect(),
+                ps_feedback,
                wal_backup_active: state.wal_backup_active,
                timeline_is_active: state.active,
                num_computes: state.num_computes,
@@ -799,6 +694,7 @@ impl Timeline {
                mem_state: state.sk.inmem.clone(),
                persisted_state: state.sk.state.clone(),
                flush_lsn: state.sk.wal_store.flush_lsn(),
+                remote_consistent_lsn: self.get_walsenders().get_remote_consistent_lsn(),
                wal_storage: state.sk.wal_store.get_metrics(),
            })
        } else {
@@ -816,7 +712,7 @@ impl Timeline {
        debug_dump::Memory {
            is_cancelled: self.is_cancelled(),
            peers_info_len: state.peers_info.0.len(),
-            replicas: state.replicas.clone(),
+            walsenders: self.walsenders.get_all(),
            wal_backup_active: state.wal_backup_active,
            active: state.active,
            num_computes: state.num_computes,
--- a/safekeeper/src/timelines_global_map.rs
+++ b/safekeeper/src/timelines_global_map.rs
@@ -159,6 +159,26 @@ impl GlobalTimelines {
        Ok(())
    }

+    /// Load timeline from disk to the memory.
+    pub fn load_timeline(ttid: TenantTimelineId) -> Result<Arc<Timeline>> {
+        let (conf, wal_backup_launcher_tx) = TIMELINES_STATE.lock().unwrap().get_dependencies();
+
+        match Timeline::load_timeline(conf, ttid, wal_backup_launcher_tx) {
+            Ok(timeline) => {
+                let tli = Arc::new(timeline);
+                // TODO: prevent concurrent timeline creation/loading
+                TIMELINES_STATE
+                    .lock()
+                    .unwrap()
+                    .timelines
+                    .insert(ttid, tli.clone());
+                Ok(tli)
+            }
+            // If we can't load a timeline, it's bad. Caller will figure it out.
+            Err(e) => bail!("failed to load timeline {}, reason: {:?}", ttid, e),
+        }
+    }
+
    /// Get the number of timelines in the map.
    pub fn timelines_count() -> usize {
        TIMELINES_STATE.lock().unwrap().timelines.len()
--- a/safekeeper/src/wal_backup.rs
+++ b/safekeeper/src/wal_backup.rs
@@ -25,6 +25,7 @@ use tracing::*;

 use utils::{id::TenantTimelineId, lsn::Lsn};

+use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS};
 use crate::timeline::{PeerInfo, Timeline};
 use crate::{GlobalTimelines, SafeKeeperConf};

@@ -394,7 +395,13 @@ async fn backup_single_segment(
            )
        })?;

-    backup_object(&segment_file_path, &remote_segment_path, seg.size()).await?;
+    let res = backup_object(&segment_file_path, &remote_segment_path, seg.size()).await;
+    if res.is_ok() {
+        BACKED_UP_SEGMENTS.inc();
+    } else {
+        BACKUP_ERRORS.inc();
+    }
+    res?;
    debug!("Backup of {} done", segment_file_path.display());

    Ok(())
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -27,7 +27,7 @@ use tracing::*;

 use utils::{id::TenantTimelineId, lsn::Lsn};

-use crate::metrics::{time_io_closure, WalStorageMetrics};
+use crate::metrics::{time_io_closure, WalStorageMetrics, REMOVED_WAL_SEGMENTS};
 use crate::safekeeper::SafeKeeperState;

 use crate::wal_backup::read_object;
@@ -112,10 +112,10 @@ impl PhysicalStorage {
    /// the disk. Otherwise, all LSNs are set to zero.
    pub fn new(
        ttid: &TenantTimelineId,
+        timeline_dir: PathBuf,
        conf: &SafeKeeperConf,
        state: &SafeKeeperState,
    ) -> Result<PhysicalStorage> {
-        let timeline_dir = conf.timeline_dir(ttid);
        let wal_seg_size = state.server.wal_seg_size as usize;

        // Find out where stored WAL ends, starting at commit_lsn which is a
@@ -455,6 +455,7 @@ fn remove_segments_from_disk(
                n_removed += 1;
                min_removed = min(min_removed, segno);
                max_removed = max(max_removed, segno);
+                REMOVED_WAL_SEGMENTS.inc();
            }
        }
    }
--- a/scripts/pr-comment-test-report.js
+++ b/scripts/pr-comment-test-report.js
@@ -37,7 +37,7 @@ module.exports = async ({ github, context, fetch, reports }) => {
        const {buildType, reportUrl, jsonUrl} = report

        if (!reportUrl || !jsonUrl) {
-            console.warn(`"reportUrl" or "jsonUrl" aren't set for ${buildType} build`)
+            commentBody += `#### ${buildType} build: no tests were run or test report is not available\n`
            continue
        }

@@ -78,7 +78,7 @@ module.exports = async ({ github, context, fetch, reports }) => {
        }

        const totalTestsCount = failedTests.length + passedTests.length + skippedTests.length
-        commentBody += `#### ${buildType} build: ${totalTestsCount} tests run: ${passedTests.length} passed, ${failedTests.length} failed, ${skippedTests.length} ([full report](${reportUrl}))\n`
+        commentBody += `#### ${buildType} build: ${totalTestsCount} tests run: ${passedTests.length} passed, ${failedTests.length} failed, ${skippedTests.length} skipped ([full report](${reportUrl}))\n`
        if (failedTests.length > 0) {
            commentBody += `Failed tests:\n`
            for (const test of failedTests) {
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -1820,6 +1820,36 @@ class VanillaPostgres(PgProtocol):
            self.pg_bin.run_capture(["initdb", "-D", str(pgdatadir)])
        self.configure([f"port = {port}\n"])

+    def enable_tls(self):
+        assert not self.running
+        # generate self-signed certificate
+        subprocess.run(
+            [
+                "openssl",
+                "req",
+                "-new",
+                "-x509",
+                "-days",
+                "365",
+                "-nodes",
+                "-text",
+                "-out",
+                self.pgdatadir / "server.crt",
+                "-keyout",
+                self.pgdatadir / "server.key",
+                "-subj",
+                "/CN=localhost",
+            ]
+        )
+        # configure postgresql.conf
+        self.configure(
+            [
+                "ssl = on",
+                "ssl_cert_file = 'server.crt'",
+                "ssl_key_file = 'server.key'",
+            ]
+        )
+
    def configure(self, options: List[str]):
        """Append lines into postgresql.conf file."""
        assert not self.running
@@ -1992,6 +2022,7 @@ class NeonProxy(PgProtocol):
                # Link auth backend params
                *["--auth-backend", "link"],
                *["--uri", NeonProxy.link_auth_uri],
+                *["--allow-self-signed-compute", "true"],
            ]

    @dataclass(frozen=True)
@@ -2012,6 +2043,7 @@ class NeonProxy(PgProtocol):
    def __init__(
        self,
        neon_binpath: Path,
+        test_output_dir: Path,
        proxy_port: int,
        http_port: int,
        mgmt_port: int,
@@ -2025,6 +2057,7 @@ class NeonProxy(PgProtocol):
        self.host = host
        self.http_port = http_port
        self.neon_binpath = neon_binpath
+        self.test_output_dir = test_output_dir
        self.proxy_port = proxy_port
        self.mgmt_port = mgmt_port
        self.auth_backend = auth_backend
@@ -2051,7 +2084,8 @@ class NeonProxy(PgProtocol):
                *["--metric-collection-interval", self.metric_collection_interval],
            ]

-        self._popen = subprocess.Popen(args)
+        logfile = open(self.test_output_dir / "proxy.log", "w")
+        self._popen = subprocess.Popen(args, stdout=logfile, stderr=logfile)
        self._wait_until_ready()
        return self

@@ -2108,7 +2142,7 @@ class NeonProxy(PgProtocol):
            try:
                self._popen.wait(timeout=5)
            except subprocess.TimeoutExpired:
-                log.warn("failed to gracefully terminate proxy; killing")
+                log.warning("failed to gracefully terminate proxy; killing")
                self._popen.kill()

    @staticmethod
@@ -2119,6 +2153,7 @@ class NeonProxy(PgProtocol):

        if create_user:
            log.info("creating a new user for link auth test")
+            local_vanilla_pg.enable_tls()
            local_vanilla_pg.start()
            local_vanilla_pg.safe_psql(f"create user {pg_user} with login superuser")

@@ -2152,7 +2187,9 @@ class NeonProxy(PgProtocol):


@pytest.fixture(scope="function")
-def link_proxy(port_distributor: PortDistributor, neon_binpath: Path) -> Iterator[NeonProxy]:
+def link_proxy(
+    port_distributor: PortDistributor, neon_binpath: Path, test_output_dir: Path
+) -> Iterator[NeonProxy]:
    """Neon proxy that routes through link auth."""

    http_port = port_distributor.get_port()
@@ -2161,6 +2198,7 @@ def link_proxy(port_distributor: PortDistributor, neon_binpath: Path) -> Iterato

    with NeonProxy(
        neon_binpath=neon_binpath,
+        test_output_dir=test_output_dir,
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
@@ -2172,7 +2210,10 @@ def link_proxy(port_distributor: PortDistributor, neon_binpath: Path) -> Iterato

@pytest.fixture(scope="function")
 def static_proxy(
-    vanilla_pg: VanillaPostgres, port_distributor: PortDistributor, neon_binpath: Path
+    vanilla_pg: VanillaPostgres,
+    port_distributor: PortDistributor,
+    neon_binpath: Path,
+    test_output_dir: Path,
 ) -> Iterator[NeonProxy]:
    """Neon proxy that routes directly to vanilla postgres."""

@@ -2191,6 +2232,7 @@ def static_proxy(

    with NeonProxy(
        neon_binpath=neon_binpath,
+        test_output_dir=test_output_dir,
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
@@ -2586,6 +2628,7 @@ class SafekeeperTimelineStatus:
    commit_lsn: Lsn
    timeline_start_lsn: Lsn
    backup_lsn: Lsn
+    peer_horizon_lsn: Lsn
    remote_consistent_lsn: Lsn


@@ -2618,6 +2661,13 @@ class SafekeeperHttpClient(requests.Session):
        assert isinstance(res_json, dict)
        return res_json

+    def pull_timeline(self, body: Dict[str, Any]) -> Dict[str, Any]:
+        res = self.post(f"http://localhost:{self.port}/v1/pull_timeline", json=body)
+        res.raise_for_status()
+        res_json = res.json()
+        assert isinstance(res_json, dict)
+        return res_json
+
    def timeline_create(
        self, tenant_id: TenantId, timeline_id: TimelineId, pg_version: int, commit_lsn: Lsn
    ):
@@ -2643,6 +2693,7 @@ class SafekeeperHttpClient(requests.Session):
            commit_lsn=Lsn(resj["commit_lsn"]),
            timeline_start_lsn=Lsn(resj["timeline_start_lsn"]),
            backup_lsn=Lsn(resj["backup_lsn"]),
+            peer_horizon_lsn=Lsn(resj["peer_horizon_lsn"]),
            remote_consistent_lsn=Lsn(resj["remote_consistent_lsn"]),
        )

--- a/test_runner/regress/test_metric_collection.py
+++ b/test_runner/regress/test_metric_collection.py
@@ -199,9 +199,12 @@ def proxy_metrics_handler(request: Request) -> Response:
    return Response(status=200)


-@pytest.fixture(scope="session")
+@pytest.fixture(scope="function")
 def proxy_with_metric_collector(
-    port_distributor: PortDistributor, neon_binpath: Path, httpserver_listen_address
+    port_distributor: PortDistributor,
+    neon_binpath: Path,
+    httpserver_listen_address,
+    test_output_dir: Path,
 ) -> Iterator[NeonProxy]:
    """Neon proxy that routes through link auth and has metric collection enabled."""

@@ -215,6 +218,7 @@ def proxy_with_metric_collector(

    with NeonProxy(
        neon_binpath=neon_binpath,
+        test_output_dir=test_output_dir,
        proxy_port=proxy_port,
        http_port=http_port,
        mgmt_port=mgmt_port,
--- a/test_runner/regress/test_sni_router.py
+++ b/test_runner/regress/test_sni_router.py
@@ -0,0 +1,134 @@
+import socket
+import subprocess
+from pathlib import Path
+from types import TracebackType
+from typing import Optional, Type
+
+import backoff  # type: ignore
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import PgProtocol, PortDistributor, VanillaPostgres
+
+
+def generate_tls_cert(cn, certout, keyout):
+    subprocess.run(
+        [
+            "openssl",
+            "req",
+            "-new",
+            "-x509",
+            "-days",
+            "365",
+            "-nodes",
+            "-out",
+            certout,
+            "-keyout",
+            keyout,
+            "-subj",
+            f"/CN={cn}",
+        ]
+    )
+
+
+class PgSniRouter(PgProtocol):
+    def __init__(
+        self,
+        neon_binpath: Path,
+        port: int,
+        destination: str,
+        tls_cert: Path,
+        tls_key: Path,
+    ):
+        # Must use a hostname rather than IP here, for SNI to work
+        host = "localhost"
+        super().__init__(host=host, port=port)
+
+        self.host = host
+        self.neon_binpath = neon_binpath
+        self.port = port
+        self.destination = destination
+        self.tls_cert = tls_cert
+        self.tls_key = tls_key
+        self._popen: Optional[subprocess.Popen[bytes]] = None
+
+    def start(self) -> "PgSniRouter":
+        assert self._popen is None
+        args = [
+            str(self.neon_binpath / "pg_sni_router"),
+            *["--listen", f"127.0.0.1:{self.port}"],
+            *["--tls-cert", str(self.tls_cert)],
+            *["--tls-key", str(self.tls_key)],
+            *["--destination", self.destination],
+        ]
+
+        self._popen = subprocess.Popen(args)
+        self._wait_until_ready()
+        return self
+
+    @backoff.on_exception(backoff.expo, OSError, max_time=10)
+    def _wait_until_ready(self):
+        socket.create_connection((self.host, self.port))
+
+    # Sends SIGTERM to the proxy if it has been started
+    def terminate(self):
+        if self._popen:
+            self._popen.terminate()
+
+    # Waits for proxy to exit if it has been opened with a default timeout of
+    # two seconds. Raises subprocess.TimeoutExpired if the proxy does not exit in time.
+    def wait_for_exit(self, timeout=2):
+        if self._popen:
+            self._popen.wait(timeout=2)
+
+    def __enter__(self) -> "PgSniRouter":
+        return self
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc: Optional[BaseException],
+        tb: Optional[TracebackType],
+    ):
+        if self._popen is not None:
+            self._popen.terminate()
+            try:
+                self._popen.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                log.warning("failed to gracefully terminate pg_sni_router; killing")
+                self._popen.kill()
+
+
+def test_pg_sni_router(
+    vanilla_pg: VanillaPostgres,
+    port_distributor: PortDistributor,
+    neon_binpath: Path,
+    test_output_dir: Path,
+):
+    generate_tls_cert(
+        "endpoint.namespace.localtest.me",
+        test_output_dir / "router.crt",
+        test_output_dir / "router.key",
+    )
+
+    # Start a stand-alone Postgres to test with
+    vanilla_pg.start()
+    pg_port = vanilla_pg.default_options["port"]
+
+    router_port = port_distributor.get_port()
+
+    with PgSniRouter(
+        neon_binpath=neon_binpath,
+        port=router_port,
+        destination="localtest.me",
+        tls_cert=test_output_dir / "router.crt",
+        tls_key=test_output_dir / "router.key",
+    ) as router:
+        router.start()
+
+        out = router.safe_psql(
+            "select 1",
+            dbname="postgres",
+            sslmode="require",
+            host=f"endpoint--namespace--{pg_port}.localtest.me",
+            hostaddr="127.0.0.1",
+        )
+        assert out[0][0] == 1
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -299,7 +299,7 @@ def test_broker(neon_env_builder: NeonEnvBuilder):
            raise RuntimeError(
                f"timed out waiting {elapsed:.0f}s for remote_consistent_lsn propagation: status before {stat_before}, status current {stat_after}"
            )
-        time.sleep(0.5)
+        time.sleep(1)


 # Test that old WAL consumed by peers and pageserver is removed from safekeepers.
@@ -383,12 +383,15 @@ def test_wal_removal(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    wait(
        lambda first_segments=first_segments: all(not os.path.exists(p) for p in first_segments),
        "first segment get removed",
+        wait_f=lambda http_cli=http_cli, tenant_id=tenant_id, timeline_id=timeline_id: log.info(
+            f"waiting for segments removal, sk info: {http_cli.timeline_status(tenant_id=tenant_id, timeline_id=timeline_id)}"
+        ),
    )


 # Wait for something, defined as f() returning True, raising error if this
-# doesn't happen without timeout seconds.
-def wait(f, desc, timeout=30):
+# doesn't happen without timeout seconds, and calling wait_f while waiting.
+def wait(f, desc, timeout=30, wait_f=None):
    started_at = time.time()
    while True:
        if f():
@@ -397,6 +400,8 @@ def wait(f, desc, timeout=30):
        if elapsed > timeout:
            raise RuntimeError(f"timed out waiting {elapsed:.0f}s for {desc}")
        time.sleep(0.5)
+        if wait_f is not None:
+            wait_f()


 def is_segment_offloaded(
@@ -1249,3 +1254,98 @@ def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool):
    with closing(endpoint_other.connect()) as conn:
        with conn.cursor() as cur:
            cur.execute("INSERT INTO t (key) VALUES (123)")
+
+
+def test_pull_timeline(neon_env_builder: NeonEnvBuilder):
+    def safekeepers_guc(env: NeonEnv, sk_names: List[int]) -> str:
+        return ",".join([f"localhost:{sk.port.pg}" for sk in env.safekeepers if sk.id in sk_names])
+
+    def execute_payload(endpoint: Endpoint):
+        with closing(endpoint.connect()) as conn:
+            with conn.cursor() as cur:
+                # we rely upon autocommit after each statement
+                # as waiting for acceptors happens there
+                cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)")
+                cur.execute("INSERT INTO t VALUES (0, 'something')")
+                sum_before = query_scalar(cur, "SELECT SUM(key) FROM t")
+
+                cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
+                sum_after = query_scalar(cur, "SELECT SUM(key) FROM t")
+                assert sum_after == sum_before + 5000050000
+
+    def show_statuses(safekeepers: List[Safekeeper], tenant_id: TenantId, timeline_id: TimelineId):
+        for sk in safekeepers:
+            http_cli = sk.http_client()
+            try:
+                status = http_cli.timeline_status(tenant_id, timeline_id)
+                log.info(f"Safekeeper {sk.id} status: {status}")
+            except Exception as e:
+                log.info(f"Safekeeper {sk.id} status error: {e}")
+
+    neon_env_builder.num_safekeepers = 4
+    env = neon_env_builder.init_start()
+    env.neon_cli.create_branch("test_pull_timeline")
+
+    log.info("Use only first 3 safekeepers")
+    env.safekeepers[3].stop()
+    active_safekeepers = [1, 2, 3]
+    endpoint = env.endpoints.create("test_pull_timeline")
+    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.start()
+
+    # learn neon timeline from compute
+    tenant_id = TenantId(endpoint.safe_psql("show neon.tenant_id")[0][0])
+    timeline_id = TimelineId(endpoint.safe_psql("show neon.timeline_id")[0][0])
+
+    execute_payload(endpoint)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Kill safekeeper 2, continue with payload")
+    env.safekeepers[1].stop(immediate=True)
+    execute_payload(endpoint)
+
+    log.info("Initialize new safekeeper 4, pull data from 1 & 3")
+    env.safekeepers[3].start()
+
+    res = (
+        env.safekeepers[3]
+        .http_client()
+        .pull_timeline(
+            {
+                "tenant_id": str(tenant_id),
+                "timeline_id": str(timeline_id),
+                "http_hosts": [
+                    f"http://localhost:{env.safekeepers[0].port.http}",
+                    f"http://localhost:{env.safekeepers[2].port.http}",
+                ],
+            }
+        )
+    )
+    log.info("Finished pulling timeline")
+    log.info(res)
+
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Restarting compute with new config to verify that it works")
+    active_safekeepers = [1, 3, 4]
+
+    endpoint.stop_and_destroy().create("test_pull_timeline")
+    endpoint.adjust_for_safekeepers(safekeepers_guc(env, active_safekeepers))
+    endpoint.start()
+
+    execute_payload(endpoint)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Stop sk1 (simulate failure) and use only quorum of sk3 and sk4")
+    env.safekeepers[0].stop(immediate=True)
+    execute_payload(endpoint)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
+
+    log.info("Restart sk4 and and use quorum of sk1 and sk4")
+    env.safekeepers[3].stop()
+    env.safekeepers[2].stop()
+    env.safekeepers[0].start()
+    env.safekeepers[3].start()
+
+    execute_payload(endpoint)
+    show_statuses(env.safekeepers, tenant_id, timeline_id)
--- a/test_runner/regress/test_wal_receiver.py
+++ b/test_runner/regress/test_wal_receiver.py
@@ -0,0 +1,115 @@
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder
+from fixtures.types import Lsn, TenantId
+
+
+# Checks that pageserver's walreceiver state is printed in the logs during WAL wait timeout.
+# Ensures that walreceiver does not run without any data inserted and only starts after the insertion.
+def test_pageserver_lsn_wait_error_start(neon_env_builder: NeonEnvBuilder):
+    # Trigger WAL wait timeout faster
+    neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
+    env = neon_env_builder.init_start()
+    env.pageserver.http_client()
+
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
+    expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"
+    env.pageserver.allowed_errors.append(f".*{expected_timeout_error}.*")
+
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+        assert (
+            "WalReceiver status: Not active" in exception_string
+        ), "Walreceiver should not be active before any data writes"
+
+    insert_test_elements(env, tenant_id, start=0, count=1_000)
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+        assert (
+            "WalReceiver status: Not active" not in exception_string
+        ), "Should not be inactive anymore after INSERTs are made"
+        assert "WalReceiver status" in exception_string, "But still should have some other status"
+
+
+# Checks that all active safekeepers are shown in pageserver's walreceiver state printed on WAL wait timeout.
+# Kills one of the safekeepers and ensures that only the active ones are printed in the state.
+def test_pageserver_lsn_wait_error_safekeeper_stop(neon_env_builder: NeonEnvBuilder):
+    # Trigger WAL wait timeout faster
+    neon_env_builder.pageserver_config_override = "wait_lsn_timeout = '1s'"
+    # Have notable SK ids to ensure we check logs for their presence, not some other random numbers
+    neon_env_builder.safekeepers_id_start = 12345
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+    env.pageserver.http_client()
+
+    tenant_id, timeline_id = env.neon_cli.create_tenant()
+
+    elements_to_insert = 1_000_000
+    expected_timeout_error = f"Timed out while waiting for WAL record at LSN {future_lsn} to arrive"
+    env.pageserver.allowed_errors.append(f".*{expected_timeout_error}.*")
+
+    insert_test_elements(env, tenant_id, start=0, count=elements_to_insert)
+
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+
+        for safekeeper in env.safekeepers:
+            assert (
+                str(safekeeper.id) in exception_string
+            ), f"Should have safekeeper {safekeeper.id} printed in walreceiver state after WAL wait timeout"
+
+    stopped_safekeeper = env.safekeepers[-1]
+    stopped_safekeeper_id = stopped_safekeeper.id
+    log.info(f"Stopping safekeeper {stopped_safekeeper.id}")
+    stopped_safekeeper.stop()
+
+    # Spend some more time inserting, to ensure SKs report updated statuses and walreceiver in PS have time to update its connection stats.
+    insert_test_elements(env, tenant_id, start=elements_to_insert + 1, count=elements_to_insert)
+
+    try:
+        trigger_wait_lsn_timeout(env, tenant_id)
+    except Exception as e:
+        exception_string = str(e)
+        assert expected_timeout_error in exception_string, "Should time out during waiting for WAL"
+
+        for safekeeper in env.safekeepers:
+            if safekeeper.id == stopped_safekeeper_id:
+                assert (
+                    str(safekeeper.id) not in exception_string
+                ), f"Should not have stopped safekeeper {safekeeper.id} printed in walreceiver state after 2nd WAL wait timeout"
+            else:
+                assert (
+                    str(safekeeper.id) in exception_string
+                ), f"Should have safekeeper {safekeeper.id} printed in walreceiver state after 2nd WAL wait timeout"
+
+
+def insert_test_elements(env: NeonEnv, tenant_id: TenantId, start: int, count: int):
+    first_element_id = start
+    last_element_id = first_element_id + count
+    with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint:
+        with endpoint.cursor() as cur:
+            cur.execute("CREATE TABLE IF NOT EXISTS t(key serial primary key, value text)")
+            cur.execute(
+                f"INSERT INTO t SELECT i, CONCAT('payload_', i) FROM generate_series({first_element_id},{last_element_id}) as i"
+            )
+
+
+future_lsn = Lsn("0/FFFFFFFF")
+
+
+def trigger_wait_lsn_timeout(env: NeonEnv, tenant_id: TenantId):
+    with env.endpoints.create_start(
+        "main",
+        tenant_id=tenant_id,
+        lsn=future_lsn,
+    ) as endpoint:
+        with endpoint.cursor() as cur:
+            cur.execute("SELECT 1")
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -14,13 +14,11 @@ publish = false
 ### BEGIN HAKARI SECTION
 [dependencies]
 anyhow = { version = "1", features = ["backtrace"] }
-byteorder = { version = "1" }
 bytes = { version = "1", features = ["serde"] }
 chrono = { version = "0.4", default-features = false, features = ["clock", "serde"] }
 clap = { version = "4", features = ["derive", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] }
 crossbeam-utils = { version = "0.8" }
-digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 futures = { version = "0.3" }