Silence test failure caused by expected error in log.

Fix test_ondemand_download_large_rel if uploads are slow.
If the uploads after compaction happen slowly, they might have finished before the pageserver is shut down. The L0 files have been uploaded, so no data is lost, but then the query later in the test will need to download all the L0 files, and causes the test to fail because it specifically checks that download happens on-demand, not all at once.
2026-05-13 03:00:37 +00:00 · 2023-01-13 10:26:15 +02:00 · 2023-01-12 23:40:46 +02:00 · 2023-01-12 22:49:00 +02:00 · 2023-01-12 20:39:04 +02:00 · 2023-01-12 19:24:30 +02:00
127 changed files with 6018 additions and 4340 deletions
--- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
@@ -0,0 +1,10 @@
+## Describe your changes
+
+## Issue ticket number and link
+
+## Checklist before requesting a review
+- [ ] I have performed a self-review of my code.
+- [ ] If it is a core feature, I have added thorough tests.
+- [ ] Do we need to implement analytics? if so did you add the relevant metrics to the dashboard?
+- [ ] If this PR requires public announcement, mark it with /release-notes label and add several sentences in this section.
+
--- a/.github/ansible/prod.us-west-2.hosts.yaml
+++ b/.github/ansible/prod.us-west-2.hosts.yaml
@@ -25,6 +25,8 @@ storage:
          ansible_host: i-0d9f6dfae0e1c780d 
        pageserver-1.us-west-2.aws.neon.tech:
          ansible_host: i-0c834be1dddba8b3f
+        pageserver-2.us-west-2.aws.neon.tech:
+          ansible_host: i-051642d372c0a4f32

    safekeepers:
      hosts:
--- a/.github/ansible/staging.us-east-2.hosts.yaml
+++ b/.github/ansible/staging.us-east-2.hosts.yaml
@@ -27,6 +27,8 @@ storage:
          ansible_host: i-0c3e70929edb5d691
        pageserver-1.us-east-2.aws.neon.build:
          ansible_host: i-0565a8b4008aa3f40
+        pageserver-2.us-east-2.aws.neon.build:
+          ansible_host: i-01e31cdf7e970586a

    safekeepers:
      hosts:
--- a/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-eu-west-1-zeta.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.eu-west-1.aws.neon.build"
  sentryEnvironment: "development"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: eu-west-1.aws.neon.build
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram-legacy.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.cloud.stage.neon.tech"
  sentryEnvironment: "development"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: neon-proxy-scram-legacy.beta.us-east-2.aws.neon.build
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
+++ b/.github/helm-values/dev-us-east-2-beta.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-staging.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.build"
  sentryEnvironment: "development"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.build
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-ap-southeast-1-epsilon.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.ap-southeast-1.aws.neon.tech"
  sentryEnvironment: "production"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: ap-southeast-1.aws.neon.tech
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-eu-central-1-gamma.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.eu-central-1.aws.neon.tech"
  sentryEnvironment: "production"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: eu-central-1.aws.neon.tech
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-east-2-delta.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-east-2.aws.neon.tech"
  sentryEnvironment: "production"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: us-east-2.aws.neon.tech
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
+++ b/.github/helm-values/prod-us-west-2-eta.neon-proxy-scram.yaml
@@ -9,6 +9,7 @@ settings:
  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.us-west-2.aws.neon.tech"
  sentryEnvironment: "production"
+  wssPort: 8443

 # -- Additional labels for neon-proxy pods
 podLabels:
@@ -23,6 +24,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: us-west-2.aws.neon.tech
+  httpsPort: 443

 #metrics:
 #  enabled: true
--- a/.github/helm-values/production.proxy-scram.yaml
+++ b/.github/helm-values/production.proxy-scram.yaml
@@ -3,6 +3,7 @@ settings:
  authEndpoint: "http://console-release.local/management/api/v2"
  domain: "*.cloud.neon.tech"
  sentryEnvironment: "production"
+  wssPort: 8443

 podLabels:
  zenith_service: proxy-scram
@@ -16,6 +17,7 @@ exposedService:
    service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
    service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
    external-dns.alpha.kubernetes.io/hostname: '*.cloud.neon.tech'
+  httpsPort: 443

 metrics:
  enabled: true
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -111,6 +111,7 @@ jobs:
      # Some of our rust modules use FFI and need those to be checked
      - name: Get postgres headers
        run: make postgres-headers -j$(nproc)
+
      - name: Run cargo clippy
        run: ./run_clippy.sh

@@ -126,6 +127,11 @@ jobs:
          cargo hakari generate --diff  # workspace-hack Cargo.toml is up-to-date
          cargo hakari manage-deps --dry-run  # all workspace crates depend on workspace-hack

+      # https://github.com/EmbarkStudios/cargo-deny
+      - name: Check rust licenses/bans/advisories/sources
+        if: ${{ !cancelled() }}
+        run: cargo deny check
+
  build-neon:
    runs-on: [ self-hosted, dev, x64 ]
    container:
@@ -177,13 +183,12 @@ jobs:
      # corresponding Cargo.toml files for their descriptions.
      - name: Set env variables
        run: |
+          CARGO_FEATURES="--features testing"
          if [[ $BUILD_TYPE == "debug" ]]; then
            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
-            CARGO_FEATURES="--features testing"
            CARGO_FLAGS="--locked $CARGO_FEATURES"
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=""
-            CARGO_FEATURES="--features testing,profiling"
            CARGO_FLAGS="--locked --release $CARGO_FEATURES"
          fi
          echo "cov_prefix=${cov_prefix}" >> $GITHUB_ENV
@@ -789,6 +794,8 @@ jobs:
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
+    environment:
+      name: prod-old
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -834,7 +841,9 @@ jobs:
        shell: bash
    strategy:
      matrix:
-        target_region: [ us-east-2 ]
+        target_region: [ eu-west-1, us-east-2 ]
+    environment:
+      name: dev-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -906,6 +915,8 @@ jobs:
    strategy:
      matrix:
        target_region: [ us-east-2, us-west-2, eu-central-1, ap-southeast-1 ]
+    environment:
+      name: prod-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -945,6 +956,8 @@ jobs:
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
+    environment:
+      name: prod-old
    env:
      KUBECONFIG: .kubeconfig
    steps:
@@ -970,8 +983,8 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}.yaml       --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
-          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}       neondatabase/neon-proxy --namespace neon-proxy --install --atomic -f .github/helm-values/${{ matrix.proxy_config }}.yaml       --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade ${{ matrix.proxy_job }}-scram neondatabase/neon-proxy --namespace neon-proxy --install --atomic -f .github/helm-values/${{ matrix.proxy_config }}-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s

  deploy-storage-broker:
    name: deploy storage broker on old staging and old prod
@@ -988,6 +1001,8 @@ jobs:
    strategy:
      matrix:
        include: ${{fromJSON(needs.calculate-deploy-targets.outputs.matrix-include)}}
+    environment:
+      name: prod-old
    env:
      KUBECONFIG: .kubeconfig
    steps:
@@ -1036,6 +1051,8 @@ jobs:
            target_cluster: dev-eu-west-1-zeta
            deploy_link_proxy: false
            deploy_legacy_scram_proxy: false
+    environment:
+      name: dev-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -1051,19 +1068,19 @@ jobs:
      - name: Re-deploy scram proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s

      - name: Re-deploy link proxy
        if: matrix.deploy_link_proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-link neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-link.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s

      - name: Re-deploy legacy scram proxy
        if: matrix.deploy_legacy_scram_proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-scram-legacy neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram-legacy.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s

  deploy-storage-broker-dev-new:
    runs-on: [ self-hosted, dev, x64 ]
@@ -1083,6 +1100,8 @@ jobs:
            target_cluster: dev-us-east-2-beta
          - target_region:  eu-west-1
            target_cluster: dev-eu-west-1-zeta
+    environment:
+      name: dev-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -1121,6 +1140,8 @@ jobs:
            target_cluster: prod-eu-central-1-gamma
          - target_region: ap-southeast-1
            target_cluster: prod-ap-southeast-1-epsilon
+    environment:
+      name: prod-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
@@ -1136,7 +1157,7 @@ jobs:
      - name: Re-deploy proxy
        run: |
          DOCKER_TAG=${{needs.tag.outputs.build-tag}}
-          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s
+          helm upgrade neon-proxy-scram neondatabase/neon-proxy --namespace neon-proxy --create-namespace --install --atomic -f .github/helm-values/${{ matrix.target_cluster }}.neon-proxy-scram.yaml --set image.tag=${DOCKER_TAG} --set settings.sentryUrl=${{ secrets.SENTRY_URL_PROXY }} --wait --timeout 15m0s

  deploy-storage-broker-prod-new:
    runs-on: prod
@@ -1160,6 +1181,8 @@ jobs:
            target_cluster: prod-eu-central-1-gamma
          - target_region: ap-southeast-1
            target_cluster: prod-ap-southeast-1-epsilon
+    environment:
+      name: prod-${{ matrix.target_region }}
    steps:
      - name: Checkout
        uses: actions/checkout@v3
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -66,12 +66,6 @@ dependencies = [
 "backtrace",
 ]

-[[package]]
-name = "arrayvec"
-version = "0.7.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
-
 [[package]]
 name = "asn1-rs"
 version = "0.5.1"
@@ -633,12 +627,6 @@ version = "3.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"

-[[package]]
-name = "bytemuck"
-version = "1.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aaa3a8d9a1ca92e282c96a32d6511b695d7d994d1d102ba85d279f9b2756947f"
-
 [[package]]
 name = "byteorder"
 version = "1.4.3"
@@ -899,7 +887,7 @@ dependencies = [
 "clap 4.0.29",
 "comfy-table",
 "git-version",
- "nix 0.25.1",
+ "nix",
 "once_cell",
 "pageserver_api",
 "postgres",
@@ -934,15 +922,6 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc"

-[[package]]
-name = "cpp_demangle"
-version = "0.3.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f"
-dependencies = [
- "cfg-if",
-]
-
 [[package]]
 name = "cpufeatures"
 version = "0.2.5"
@@ -1066,7 +1045,7 @@ dependencies = [
 "crossterm_winapi",
 "libc",
 "mio",
- "parking_lot 0.12.1",
+ "parking_lot",
 "signal-hook",
 "signal-hook-mio",
 "winapi",
@@ -1176,15 +1155,6 @@ version = "2.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "23d8666cb01533c39dde32bcbab8e227b4ed6679b2c925eba05feabea39508fb"

-[[package]]
-name = "debugid"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6ee87af31d84ef885378aebca32be3d682b0e0dc119d5b4860a2c5bb5046730"
-dependencies = [
- "uuid 0.8.2",
-]
-
 [[package]]
 name = "debugid"
 version = "0.8.0"
@@ -1192,7 +1162,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
 dependencies = [
 "serde",
- "uuid 1.2.2",
+ "uuid",
 ]

 [[package]]
@@ -1318,18 +1288,6 @@ dependencies = [
 "windows-sys 0.42.0",
 ]

-[[package]]
-name = "findshlibs"
-version = "0.10.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
-dependencies = [
- "cc",
- "lazy_static",
- "libc",
- "winapi",
-]
-
 [[package]]
 name = "fixedbitset"
 version = "0.4.2"
@@ -1342,21 +1300,6 @@ version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"

-[[package]]
-name = "foreign-types"
-version = "0.3.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
-dependencies = [
- "foreign-types-shared",
-]
-
-[[package]]
-name = "foreign-types-shared"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
-
 [[package]]
 name = "form_urlencoded"
 version = "1.1.0"
@@ -1758,16 +1701,16 @@ dependencies = [
 ]

 [[package]]
-name = "hyper-tls"
-version = "0.5.0"
+name = "hyper-tungstenite"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905"
+checksum = "d62004bcd4f6f85d9e2aa4206f1466ee67031f5ededcb6c6e62d48f9306ad879"
 dependencies = [
- "bytes",
 "hyper",
- "native-tls",
+ "pin-project",
 "tokio",
- "tokio-native-tls",
+ "tokio-tungstenite",
+ "tungstenite",
 ]

 [[package]]
@@ -1821,24 +1764,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "inferno"
-version = "0.10.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de3886428c6400486522cf44b8626e7b94ad794c14390290f2a274dcf728a58f"
-dependencies = [
- "ahash",
- "atty",
- "indexmap",
- "itoa",
- "lazy_static",
- "log",
- "num-format",
- "quick-xml",
- "rgb",
- "str_stack",
-]
-
 [[package]]
 name = "inotify"
 version = "0.9.6"
@@ -2065,15 +1990,6 @@ version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"

-[[package]]
-name = "memmap2"
-version = "0.5.8"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
-dependencies = [
- "libc",
-]
-
 [[package]]
 name = "memoffset"
 version = "0.6.5"
@@ -2141,37 +2057,6 @@ version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"

-[[package]]
-name = "native-tls"
-version = "0.2.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e"
-dependencies = [
- "lazy_static",
- "libc",
- "log",
- "openssl",
- "openssl-probe",
- "openssl-sys",
- "schannel",
- "security-framework",
- "security-framework-sys",
- "tempfile",
-]
-
-[[package]]
-name = "nix"
-version = "0.23.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f3790c00a0150112de0f4cd161e3d7fc4b2d8a5542ffc35f099a2562aecb35c"
-dependencies = [
- "bitflags",
- "cc",
- "cfg-if",
- "libc",
- "memoffset 0.6.5",
-]
-
 [[package]]
 name = "nix"
 version = "0.25.1"
@@ -2235,16 +2120,6 @@ dependencies = [
 "num-traits",
 ]

-[[package]]
-name = "num-format"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
-dependencies = [
- "arrayvec",
- "itoa",
-]
-
 [[package]]
 name = "num-integer"
 version = "0.1.45"
@@ -2305,51 +2180,12 @@ version = "11.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575"

-[[package]]
-name = "openssl"
-version = "0.10.44"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "29d971fd5722fec23977260f6e81aa67d2f22cadbdc2aa049f1022d9a3be1566"
-dependencies = [
- "bitflags",
- "cfg-if",
- "foreign-types",
- "libc",
- "once_cell",
- "openssl-macros",
- "openssl-sys",
-]
-
-[[package]]
-name = "openssl-macros"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
 [[package]]
 name = "openssl-probe"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"

-[[package]]
-name = "openssl-sys"
-version = "0.9.79"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5454462c0eced1e97f2ec09036abc8da362e66802f66fd20f86854d9d8cbcbc4"
-dependencies = [
- "autocfg",
- "cc",
- "libc",
- "pkg-config",
- "vcpkg",
-]
-
 [[package]]
 name = "os_info"
 version = "3.5.1"
@@ -2400,7 +2236,7 @@ dependencies = [
 "hyper",
 "itertools",
 "metrics",
- "nix 0.25.1",
+ "nix",
 "num-traits",
 "once_cell",
 "pageserver_api",
@@ -2410,7 +2246,6 @@ dependencies = [
 "postgres-types",
 "postgres_connection",
 "postgres_ffi",
- "pprof",
 "pq_proto",
 "rand",
 "regex",
@@ -2424,12 +2259,12 @@ dependencies = [
 "signal-hook",
 "storage_broker",
 "svg_fmt",
- "tar",
 "tempfile",
 "tenant_size_model",
 "thiserror",
 "tokio",
 "tokio-postgres",
+ "tokio-tar",
 "tokio-util",
 "toml_edit",
 "tracing",
@@ -2454,17 +2289,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "parking_lot"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
-dependencies = [
- "instant",
- "lock_api",
- "parking_lot_core 0.8.5",
-]
-
 [[package]]
 name = "parking_lot"
 version = "0.12.1"
@@ -2472,21 +2296,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
 dependencies = [
 "lock_api",
- "parking_lot_core 0.9.5",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
-dependencies = [
- "cfg-if",
- "instant",
- "libc",
- "redox_syscall",
- "smallvec",
- "winapi",
+ "parking_lot_core",
 ]

 [[package]]
@@ -2583,12 +2393,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"

-[[package]]
-name = "pkg-config"
-version = "0.3.26"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
-
 [[package]]
 name = "plotters"
 version = "0.3.4"
@@ -2695,25 +2499,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "pprof"
-version = "0.6.1"
-source = "git+https://github.com/neondatabase/pprof-rs.git?branch=wallclock-profiling#4e011a87d22fb4d21d15cc38bce81ff1c75e4bc9"
-dependencies = [
- "backtrace",
- "cfg-if",
- "findshlibs",
- "inferno",
- "lazy_static",
- "libc",
- "log",
- "nix 0.23.2",
- "parking_lot 0.11.2",
- "symbolic-demangle",
- "tempfile",
- "thiserror",
-]
-
 [[package]]
 name = "ppv-lite86"
 version = "0.2.17"
@@ -2730,6 +2515,7 @@ dependencies = [
 "postgres-protocol",
 "rand",
 "serde",
+ "thiserror",
 "tokio",
 "tracing",
 "workspace_hack",
@@ -2808,7 +2594,7 @@ dependencies = [
 "lazy_static",
 "libc",
 "memchr",
- "parking_lot 0.12.1",
+ "parking_lot",
 "procfs",
 "thiserror",
 ]
@@ -2885,15 +2671,17 @@ dependencies = [
 "hex",
 "hmac",
 "hyper",
+ "hyper-tungstenite",
 "itertools",
 "md5",
 "metrics",
 "once_cell",
- "parking_lot 0.12.1",
+ "parking_lot",
 "pin-project-lite",
 "pq_proto",
 "rand",
 "rcgen",
+ "regex",
 "reqwest",
 "routerify",
 "rstest",
@@ -2905,6 +2693,7 @@ dependencies = [
 "sha2",
 "socket2",
 "thiserror",
+ "tls-listener",
 "tokio",
 "tokio-postgres",
 "tokio-postgres-rustls",
@@ -2913,20 +2702,12 @@ dependencies = [
 "tracing-subscriber",
 "url",
 "utils",
- "uuid 1.2.2",
+ "uuid",
+ "webpki-roots",
 "workspace_hack",
 "x509-parser",
 ]

-[[package]]
-name = "quick-xml"
-version = "0.22.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8533f14c8382aaad0d592c812ac3b826162128b65662331e1127b45c3d18536b"
-dependencies = [
- "memchr",
-]
-
 [[package]]
 name = "quote"
 version = "1.0.21"
@@ -3095,12 +2876,10 @@ dependencies = [
 "http-body",
 "hyper",
 "hyper-rustls",
- "hyper-tls",
 "ipnet",
 "js-sys",
 "log",
 "mime",
- "native-tls",
 "once_cell",
 "percent-encoding",
 "pin-project-lite",
@@ -3110,7 +2889,6 @@ dependencies = [
 "serde_json",
 "serde_urlencoded",
 "tokio",
- "tokio-native-tls",
 "tokio-rustls",
 "tower-service",
 "url",
@@ -3121,15 +2899,6 @@ dependencies = [
 "winreg",
 ]

-[[package]]
-name = "rgb"
-version = "0.8.34"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3603b7d71ca82644f79b5a06d1220e9a58ede60bd32255f698cb1af8838b8db3"
-dependencies = [
- "bytemuck",
-]
-
 [[package]]
 name = "ring"
 version = "0.16.20"
@@ -3310,9 +3079,9 @@ dependencies = [
 "humantime",
 "hyper",
 "metrics",
- "nix 0.25.1",
+ "nix",
 "once_cell",
- "parking_lot 0.12.1",
+ "parking_lot",
 "postgres",
 "postgres-protocol",
 "postgres_ffi",
@@ -3424,14 +3193,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "17ad137b9df78294b98cab1a650bef237cc6c950e82e5ce164655e674d07c5cc"
 dependencies = [
 "httpdate",
- "native-tls",
 "reqwest",
+ "rustls",
 "sentry-backtrace",
 "sentry-contexts",
 "sentry-core",
 "sentry-panic",
 "tokio",
 "ureq",
+ "webpki-roots",
 ]

 [[package]]
@@ -3489,7 +3259,7 @@ version = "0.29.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ccc95faa4078768a6bf8df45e2b894bbf372b3dbbfb364e9429c1c58ab7545c6"
 dependencies = [
- "debugid 0.8.0",
+ "debugid",
 "getrandom",
 "hex",
 "serde",
@@ -3497,7 +3267,7 @@ dependencies = [
 "thiserror",
 "time",
 "url",
- "uuid 1.2.2",
+ "uuid",
 ]

 [[package]]
@@ -3571,6 +3341,17 @@ dependencies = [
 "syn",
 ]

+[[package]]
+name = "sha-1"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5058ada175748e33390e40e872bd0fe59a19f265d0158daa551c5a88a76009c"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
 [[package]]
 name = "sha1"
 version = "0.10.5"
@@ -3719,7 +3500,7 @@ dependencies = [
 "hyper",
 "metrics",
 "once_cell",
- "parking_lot 0.12.1",
+ "parking_lot",
 "prost",
 "tokio",
 "tokio-stream",
@@ -3730,12 +3511,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "str_stack"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
-
 [[package]]
 name = "stringprep"
 version = "0.1.2"
@@ -3783,29 +3558,6 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8fb1df15f412ee2e9dfc1c504260fa695c1c3f10fe9f4a6ee2d2184d7d6450e2"

-[[package]]
-name = "symbolic-common"
-version = "8.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f551f902d5642e58039aee6a9021a61037926af96e071816361644983966f540"
-dependencies = [
- "debugid 0.7.3",
- "memmap2",
- "stable_deref_trait",
- "uuid 0.8.2",
-]
-
-[[package]]
-name = "symbolic-demangle"
-version = "8.8.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4564ca7b4e6eb14105aa8bbbce26e080f6b5d9c4373e67167ab31f7b86443750"
-dependencies = [
- "cpp_demangle",
- "rustc-demangle",
- "symbolic-common",
-]
-
 [[package]]
 name = "syn"
 version = "1.0.105"
@@ -3964,10 +3716,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"

 [[package]]
-name = "tokio"
-version = "1.21.1"
+name = "tls-listener"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0020c875007ad96677dcc890298f4b942882c5d4eb7cc8f439fc3bf813dc9c95"
+checksum = "c9d4ff21187d434ac7709bfc7441ca88f63681247e5ad99f0f08c8c91ddc103d"
+dependencies = [
+ "futures-util",
+ "hyper",
+ "pin-project-lite",
+ "thiserror",
+ "tokio",
+ "tokio-rustls",
+]
+
+[[package]]
+name = "tokio"
+version = "1.24.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d9f76183f91ecfb55e1d7d5602bd1d979e38a3a522fe900241cf195624d67ae"
 dependencies = [
 "autocfg",
 "bytes",
@@ -3975,12 +3741,11 @@ dependencies = [
 "memchr",
 "mio",
 "num_cpus",
- "once_cell",
 "pin-project-lite",
 "signal-hook-registry",
 "socket2",
 "tokio-macros",
- "winapi",
+ "windows-sys 0.42.0",
 ]

 [[package]]
@@ -4004,16 +3769,6 @@ dependencies = [
 "syn",
 ]

-[[package]]
-name = "tokio-native-tls"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b"
-dependencies = [
- "native-tls",
- "tokio",
-]
-
 [[package]]
 name = "tokio-postgres"
 version = "0.7.7"
@@ -4026,7 +3781,7 @@ dependencies = [
 "futures-channel",
 "futures-util",
 "log",
- "parking_lot 0.12.1",
+ "parking_lot",
 "percent-encoding",
 "phf",
 "pin-project-lite",
@@ -4073,6 +3828,32 @@ dependencies = [
 "tokio",
 ]

+[[package]]
+name = "tokio-tar"
+version = "0.3.0"
+source = "git+https://github.com/neondatabase/tokio-tar.git?rev=404df61437de0feef49ba2ccdbdd94eb8ad6e142#404df61437de0feef49ba2ccdbdd94eb8ad6e142"
+dependencies = [
+ "filetime",
+ "futures-core",
+ "libc",
+ "redox_syscall",
+ "tokio",
+ "tokio-stream",
+ "xattr",
+]
+
+[[package]]
+name = "tokio-tungstenite"
+version = "0.17.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f714dd15bead90401d77e04243611caec13726c2408afd5b31901dfcdcb3b181"
+dependencies = [
+ "futures-util",
+ "log",
+ "tokio",
+ "tungstenite",
+]
+
 [[package]]
 name = "tokio-util"
 version = "0.7.4"
@@ -4299,6 +4080,25 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642"

+[[package]]
+name = "tungstenite"
+version = "0.17.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e27992fd6a8c29ee7eef28fc78349aa244134e10ad447ce3b9f0ac0ed0fa4ce0"
+dependencies = [
+ "base64 0.13.1",
+ "byteorder",
+ "bytes",
+ "http",
+ "httparse",
+ "log",
+ "rand",
+ "sha-1",
+ "thiserror",
+ "url",
+ "utf-8",
+]
+
 [[package]]
 name = "typenum"
 version = "1.16.0"
@@ -4362,9 +4162,11 @@ dependencies = [
 "base64 0.13.1",
 "chunked_transfer",
 "log",
- "native-tls",
 "once_cell",
+ "rustls",
 "url",
+ "webpki",
+ "webpki-roots",
 ]

 [[package]]
@@ -4385,6 +4187,12 @@ version = "2.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9"

+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
 [[package]]
 name = "utils"
 version = "0.1.0"
@@ -4401,7 +4209,7 @@ dependencies = [
 "hyper",
 "jsonwebtoken",
 "metrics",
- "nix 0.25.1",
+ "nix",
 "once_cell",
 "pq_proto",
 "rand",
@@ -4425,12 +4233,6 @@ dependencies = [
 "workspace_hack",
 ]

-[[package]]
-name = "uuid"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
-
 [[package]]
 name = "uuid"
 version = "1.2.2"
@@ -4447,12 +4249,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"

-[[package]]
-name = "vcpkg"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
-
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -4751,7 +4547,6 @@ dependencies = [
 name = "workspace_hack"
 version = "0.1.0"
 dependencies = [
- "ahash",
 "anyhow",
 "bytes",
 "chrono",
@@ -4775,12 +4570,10 @@ dependencies = [
 "rand",
 "regex",
 "regex-syntax",
- "reqwest",
 "scopeguard",
 "serde",
 "serde_json",
 "socket2",
- "stable_deref_trait",
 "syn",
 "tokio",
 "tokio-util",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,14 +1,3 @@
-# 'named-profiles' feature was stabilized in cargo 1.57. This line makes the
-# build work with older cargo versions.
-#
-# We have this because as of this writing, the latest cargo Debian package
-# that's available is 1.56. (Confusingly, the Debian package version number
-# is 0.57, whereas 'cargo --version' says 1.56.)
-#
-# See https://tracker.debian.org/pkg/cargo for the current status of the
-# package. When that gets updated, we can remove this.
-cargo-features = ["named-profiles"]
-
 [workspace]
 members = [
    "compute_tools",
--- a/Dockerfile.compute-node-v14
+++ b/Dockerfile.compute-node-v14
@@ -29,7 +29,12 @@ RUN cd postgres && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
    # Install headers
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
+    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
+    # Enable some of contrib extensions
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control

 #########################################################################################
 #
@@ -55,7 +60,10 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control

 #########################################################################################
 #
--- a/Dockerfile.compute-node-v15
+++ b/Dockerfile.compute-node-v15
@@ -29,7 +29,12 @@ RUN cd postgres && \
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C contrib/ install && \
    # Install headers
    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/include install && \
-    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install
+    make MAKELEVEL=0 -j $(getconf _NPROCESSORS_ONLN) -s -C src/interfaces/libpq install && \
+    # Enable some of contrib extensions
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/bloom.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgrowlocks.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/intagg.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/pgstattuple.control

 #########################################################################################
 #
@@ -55,7 +60,10 @@ RUN wget https://download.osgeo.org/postgis/source/postgis-3.3.1.tar.gz && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_raster.control && \
    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_tiger_geocoder.control && \
-    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_topology.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/address_standardizer_data_us.control && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/postgis_sfcgal.control

 #########################################################################################
 #
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler
 * On Fedora, these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
-  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler
+  libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
+  protobuf-devel
 ```

 2. [Install Rust](https://www.rust-lang.org/tools/install)
@@ -117,11 +118,8 @@ Python (3.9 or higher), and install python3 packages using `./scripts/pysync` (r
 # Later that would be responsibility of a package install script
 > ./target/debug/neon_local init
 Starting pageserver at '127.0.0.1:64000' in '.neon'.
-pageserver started, pid: 2545906
-Successfully initialized timeline de200bd42b49cc1814412c7e592dd6e9
-Stopped pageserver 1 process with pid 2545906

-# start pageserver and safekeeper
+# start pageserver, safekeeper, and broker for their intercommunication
 > ./target/debug/neon_local start
 Starting neon broker at 127.0.0.1:50051
 storage_broker started, pid: 2918372
@@ -130,6 +128,12 @@ pageserver started, pid: 2918386
 Starting safekeeper at '127.0.0.1:5454' in '.neon/safekeepers/sk1'.
 safekeeper 1 started, pid: 2918437

+# create initial tenant and use it as a default for every future neon_local invocation
+> ./target/debug/neon_local tenant create --set-default
+tenant 9ef87a5bf0d92544f6fafeeb3239695c successfully created on the pageserver
+Created an initial timeline 'de200bd42b49cc1814412c7e592dd6e9' at Lsn 0/16B5A50 for tenant: 9ef87a5bf0d92544f6fafeeb3239695c
+Setting tenant 9ef87a5bf0d92544f6fafeeb3239695c as a default one
+
 # start postgres compute node
 > ./target/debug/neon_local pg start main
 Starting new postgres (v14) main on timeline de200bd42b49cc1814412c7e592dd6e9 ...
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -2,6 +2,7 @@
 name = "compute_tools"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 anyhow = "1.0"
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -9,29 +9,11 @@ use hyper::{Body, Method, Request, Response, Server, StatusCode};
 use log::{error, info};
 use serde_json;

-use crate::compute::{ComputeNode, ComputeStatus};
+use crate::compute::ComputeNode;

 // Service function to handle all available routes.
 async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body> {
    match (req.method(), req.uri().path()) {
-        // Timestamp of the last Postgres activity in the plain text.
-        // DEPRECATED in favour of /status
-        (&Method::GET, "/last_activity") => {
-            info!("serving /last_active GET request");
-            let state = compute.state.read().unwrap();
-
-            // Use RFC3339 format for consistency.
-            Response::new(Body::from(state.last_active.to_rfc3339()))
-        }
-
-        // Has compute setup process finished? -> true/false.
-        // DEPRECATED in favour of /status
-        (&Method::GET, "/ready") => {
-            info!("serving /ready GET request");
-            let status = compute.get_status();
-            Response::new(Body::from(format!("{}", status == ComputeStatus::Running)))
-        }
-
        // Serialized compute state.
        (&Method::GET, "/status") => {
            info!("serving /status GET request");
@@ -46,16 +28,6 @@ async fn routes(req: Request<Body>, compute: Arc<ComputeNode>) -> Response<Body>
            Response::new(Body::from(serde_json::to_string(&compute.metrics).unwrap()))
        }

-        // DEPRECATED, use POST instead
-        (&Method::GET, "/check_writability") => {
-            info!("serving /check_writability GET request");
-            let res = crate::checker::check_writability(&compute).await;
-            match res {
-                Ok(_) => Response::new(Body::from("true")),
-                Err(e) => Response::new(Body::from(e.to_string())),
-            }
-        }
-
        (&Method::POST, "/check_writability") => {
            info!("serving /check_writability POST request");
            let res = crate::checker::check_writability(&compute).await;
--- a/compute_tools/src/http/openapi_spec.yaml
+++ b/compute_tools/src/http/openapi_spec.yaml
@@ -37,58 +37,7 @@ paths:
              schema:
                $ref: "#/components/schemas/ComputeMetrics"

-  /ready:
-    get:
-      deprecated: true
-      tags:
-      - "info"
-      summary: Check whether compute startup process finished successfully
-      description: ""
-      operationId: computeIsReady
-      responses:
-        "200":
-          description: Compute is ready ('true') or not ('false')
-          content:
-            text/plain:
-              schema:
-                type: string
-                example: "true"
-
-  /last_activity:
-    get:
-      deprecated: true
-      tags:
-      - "info"
-      summary: Get timestamp of the last compute activity
-      description: ""
-      operationId: getLastComputeActivityTS
-      responses:
-        "200":
-          description: Timestamp of the last compute activity
-          content:
-            text/plain:
-              schema:
-                type: string
-                example: "2022-10-12T07:20:50.52Z"
-
  /check_writability:
-    get:
-      deprecated: true
-      tags:
-      - "check"
-      summary: Check that we can write new data on this compute
-      description: ""
-      operationId: checkComputeWritabilityDeprecated
-      responses:
-        "200":
-          description: Check result
-          content:
-            text/plain:
-              schema:
-                type: string
-                description: Error text or 'true' if check passed
-                example: "true"
-
    post:
      tags:
      - "check"
--- a/compute_tools/src/monitor.rs
+++ b/compute_tools/src/monitor.rs
@@ -52,10 +52,16 @@ fn watch_compute_activity(compute: &ComputeNode) {
                    let mut idle_backs: Vec<DateTime<Utc>> = vec![];

                    for b in backs.into_iter() {
-                        let state: String = b.get("state");
-                        let change: String = b.get("state_change");
+                        let state: String = match b.try_get("state") {
+                            Ok(state) => state,
+                            Err(_) => continue,
+                        };

                        if state == "idle" {
+                            let change: String = match b.try_get("state_change") {
+                                Ok(state_change) => state_change,
+                                Err(_) => continue,
+                            };
                            let change = DateTime::parse_from_rfc3339(&change);
                            match change {
                                Ok(t) => idle_backs.push(t.with_timezone(&Utc)),
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -1,5 +1,6 @@
 use std::path::Path;
 use std::str::FromStr;
+use std::time::Instant;

 use anyhow::Result;
 use log::{info, log_enabled, warn, Level};
@@ -197,22 +198,18 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

 /// Reassign all dependent objects and delete requested roles.
 pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<()> {
-    let spec = &node.spec;
-
-    // First, reassign all dependent objects to db owners.
-    if let Some(ops) = &spec.delta_operations {
+    if let Some(ops) = &node.spec.delta_operations {
+        // First, reassign all dependent objects to db owners.
        info!("reassigning dependent objects of to-be-deleted roles");
        for op in ops {
            if op.action == "delete_role" {
                reassign_owned_objects(node, &op.name)?;
            }
        }
-    }

-    // Second, proceed with role deletions.
-    let mut xact = client.transaction()?;
-    if let Some(ops) = &spec.delta_operations {
+        // Second, proceed with role deletions.
        info!("processing role deletions");
+        let mut xact = client.transaction()?;
        for op in ops {
            // We do not check either role exists or not,
            // Postgres will take care of it for us
@@ -223,6 +220,7 @@ pub fn handle_role_deletions(node: &ComputeNode, client: &mut Client) -> Result<
                xact.execute(query.as_str(), &[])?;
            }
        }
+        xact.commit()?;
    }

    Ok(())
@@ -317,6 +315,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
        // XXX: with a limited number of databases it is fine, but consider making it a HashMap
        let pg_db = existing_dbs.iter().find(|r| r.name == *name);

+        let start_time = Instant::now();
        if let Some(r) = pg_db {
            // XXX: db owner name is returned as quoted string from Postgres,
            // when quoting is needed.
@@ -335,6 +334,8 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                info_print!(" -> update");

                client.execute(query.as_str(), &[])?;
+                let elapsed = start_time.elapsed().as_millis();
+                info_print!(" ({} ms)", elapsed);
            }
        } else {
            let mut query: String = format!("CREATE DATABASE {} ", name.pg_quote());
@@ -342,6 +343,9 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {

            query.push_str(&db.to_pg_options());
            client.execute(query.as_str(), &[])?;
+
+            let elapsed = start_time.elapsed().as_millis();
+            info_print!(" ({} ms)", elapsed);
        }

        info_print!("\n");
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -2,6 +2,7 @@
 name = "control_plane"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 anyhow = "1.0"
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -136,22 +136,6 @@ where
    anyhow::bail!("{process_name} did not start in {RETRY_UNTIL_SECS} seconds");
 }

-/// Send SIGTERM to child process
-pub fn send_stop_child_process(child: &std::process::Child) -> anyhow::Result<()> {
-    let pid = child.id();
-    match kill(
-        nix::unistd::Pid::from_raw(pid.try_into().unwrap()),
-        Signal::SIGTERM,
-    ) {
-        Ok(()) => Ok(()),
-        Err(Errno::ESRCH) => {
-            println!("child process with pid {pid} does not exist");
-            Ok(())
-        }
-        Err(e) => anyhow::bail!("Failed to send signal to child process with pid {pid}: {e}"),
-    }
-}
-
 /// Stops the process, using the pid file given. Returns Ok also if the process is already not running.
 pub fn stop_process(immediate: bool, process_name: &str, pid_file: &Path) -> anyhow::Result<()> {
    let pid = match pid_file::read(pid_file)
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -263,7 +263,7 @@ fn get_tenant_id(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::R
    } else if let Some(default_id) = env.default_tenant_id {
        Ok(default_id)
    } else {
-        bail!("No tenant id. Use --tenant-id, or set 'default_tenant_id' in the config file");
+        anyhow::bail!("No tenant id. Use --tenant-id, or set a default tenant");
    }
 }

@@ -284,8 +284,6 @@ fn parse_timeline_id(sub_match: &ArgMatches) -> anyhow::Result<Option<TimelineId
 }

 fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
-    let initial_timeline_id_arg = parse_timeline_id(init_match)?;
-
    // Create config file
    let toml_file: String = if let Some(config_path) = init_match.get_one::<PathBuf>("config") {
        // load and parse the file
@@ -309,30 +307,16 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
        LocalEnv::parse_config(&toml_file).context("Failed to create neon configuration")?;
    env.init(pg_version)
        .context("Failed to initialize neon repository")?;
-    let initial_tenant_id = env
-        .default_tenant_id
-        .expect("default_tenant_id should be generated by the `env.init()` call above");

    // Initialize pageserver, create initial tenant and timeline.
    let pageserver = PageServerNode::from_env(&env);
-    let initial_timeline_id = pageserver
-        .initialize(
-            Some(initial_tenant_id),
-            initial_timeline_id_arg,
-            &pageserver_config_overrides(init_match),
-            pg_version,
-        )
+    pageserver
+        .initialize(&pageserver_config_overrides(init_match))
        .unwrap_or_else(|e| {
            eprintln!("pageserver init failed: {e:?}");
            exit(1);
        });

-    env.register_branch_mapping(
-        DEFAULT_BRANCH_NAME.to_owned(),
-        initial_tenant_id,
-        initial_timeline_id,
-    )?;
-
    Ok(env)
 }

@@ -388,6 +372,17 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
            println!(
                "Created an initial timeline '{new_timeline_id}' at Lsn {last_record_lsn} for tenant: {new_tenant_id}",
            );
+
+            if create_match.get_flag("set-default") {
+                println!("Setting tenant {new_tenant_id} as a default one");
+                env.default_tenant_id = Some(new_tenant_id);
+            }
+        }
+        Some(("set-default", set_default_match)) => {
+            let tenant_id =
+                parse_tenant_id(set_default_match)?.context("No tenant id specified")?;
+            println!("Setting tenant {tenant_id} as a default one");
+            env.default_tenant_id = Some(tenant_id);
        }
        Some(("config", create_match)) => {
            let tenant_id = get_tenant_id(create_match, env)?;
@@ -928,9 +923,8 @@ fn cli() -> Command {
        .version(GIT_VERSION)
        .subcommand(
            Command::new("init")
-                .about("Initialize a new Neon repository")
+                .about("Initialize a new Neon repository, preparing configs for services to start with")
                .arg(pageserver_config_args.clone())
-                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
                .arg(
                    Arg::new("config")
                        .long("config")
@@ -992,11 +986,14 @@ fn cli() -> Command {
                .arg(timeline_id_arg.clone().help("Use a specific timeline id when creating a tenant and its initial timeline"))
                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
                .arg(pg_version_arg.clone())
+                .arg(Arg::new("set-default").long("set-default").action(ArgAction::SetTrue).required(false)
+                    .help("Use this tenant in future CLI commands where tenant_id is needed, but not specified"))
                )
+            .subcommand(Command::new("set-default").arg(tenant_id_arg.clone().required(true))
+                .about("Set a particular tenant as default in future CLI commands where tenant_id is needed, but not specified"))
            .subcommand(Command::new("config")
                .arg(tenant_id_arg.clone())
-                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false))
-                )
+                .arg(Arg::new("config").short('c').num_args(1).action(ArgAction::Append).required(false)))
        )
        .subcommand(
            Command::new("pageserver")
--- a/control_plane/src/compute.rs
+++ b/control_plane/src/compute.rs
@@ -201,7 +201,7 @@ impl PostgresNode {
            .stderr(Stdio::piped());

        if let Some(token) = auth_token {
-            cmd.env("ZENITH_AUTH_TOKEN", token);
+            cmd.env("NEON_AUTH_TOKEN", token);
        }

        let sync_handle = cmd
@@ -304,17 +304,17 @@ impl PostgresNode {

            // Set up authentication
            //
-            // $ZENITH_AUTH_TOKEN will be replaced with value from environment
+            // $NEON_AUTH_TOKEN will be replaced with value from environment
            // variable during compute pg startup. It is done this way because
            // otherwise user will be able to retrieve the value using SHOW
            // command or pg_settings
            let password = if let AuthType::NeonJWT = auth_type {
-                "$ZENITH_AUTH_TOKEN"
+                "$NEON_AUTH_TOKEN"
            } else {
                ""
            };
            // NOTE avoiding spaces in connection string, because it is less error prone if we forward it somewhere.
-            // Also note that not all parameters are supported here. Because in compute we substitute $ZENITH_AUTH_TOKEN
+            // Also note that not all parameters are supported here. Because in compute we substitute $NEON_AUTH_TOKEN
            // We parse this string and build it back with token from env var, and for simplicity rebuild
            // uses only needed variables namely host, port, user, password.
            format!("postgresql://no_user:{password}@{host}:{port}")
@@ -323,7 +323,7 @@ impl PostgresNode {
        conf.append_line("");
        conf.append("neon.pageserver_connstring", &pageserver_connstr);
        if let AuthType::NeonJWT = auth_type {
-            conf.append("neon.safekeeper_token_env", "$ZENITH_AUTH_TOKEN");
+            conf.append("neon.safekeeper_token_env", "$NEON_AUTH_TOKEN");
        }
        conf.append("neon.tenant_id", &self.tenant_id.to_string());
        conf.append("neon.timeline_id", &self.timeline_id.to_string());
@@ -448,7 +448,7 @@ impl PostgresNode {
            self.env.pg_lib_dir(self.pg_version)?.to_str().unwrap(),
        );
        if let Some(token) = auth_token {
-            cmd.env("ZENITH_AUTH_TOKEN", token);
+            cmd.env("NEON_AUTH_TOKEN", token);
        }

        let pg_ctl = cmd.output().context("pg_ctl failed")?;
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -296,11 +296,6 @@ impl LocalEnv {
            env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
        }

-        // If no initial tenant ID was given, generate it.
-        if env.default_tenant_id.is_none() {
-            env.default_tenant_id = Some(TenantId::generate());
-        }
-
        env.base_data_dir = base_path();

        Ok(env)
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -7,7 +7,7 @@ use std::path::PathBuf;
 use std::process::{Child, Command};
 use std::{io, result};

-use anyhow::{bail, ensure, Context};
+use anyhow::{bail, Context};
 use pageserver_api::models::{
    TenantConfigRequest, TenantCreateRequest, TenantInfo, TimelineCreateRequest, TimelineInfo,
 };
@@ -130,83 +130,15 @@ impl PageServerNode {
        overrides
    }

-    /// Initializes a pageserver node by creating its config with the overrides provided,
-    /// and creating an initial tenant and timeline afterwards.
-    pub fn initialize(
-        &self,
-        create_tenant: Option<TenantId>,
-        initial_timeline_id: Option<TimelineId>,
-        config_overrides: &[&str],
-        pg_version: u32,
-    ) -> anyhow::Result<TimelineId> {
+    /// Initializes a pageserver node by creating its config with the overrides provided.
+    pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
        // First, run `pageserver --init` and wait for it to write a config into FS and exit.
        self.pageserver_init(config_overrides).with_context(|| {
            format!(
                "Failed to run init for pageserver node {}",
                self.env.pageserver.id,
            )
-        })?;
-
-        // Then, briefly start it fully to run HTTP commands on it,
-        // to create initial tenant and timeline.
-        // We disable the remote storage, since we stop pageserver right after the timeline creation,
-        // hence most of the uploads will either aborted or not started: no point to start them at all.
-        let disabled_remote_storage_override = "remote_storage={}";
-        let mut pageserver_process = self
-            .start_node(
-                &[disabled_remote_storage_override],
-                // Previous overrides will be taken from the config created before, don't overwrite them.
-                false,
-            )
-            .with_context(|| {
-                format!(
-                    "Failed to start a process for pageserver node {}",
-                    self.env.pageserver.id,
-                )
-            })?;
-
-        let init_result = self
-            .try_init_timeline(create_tenant, initial_timeline_id, pg_version)
-            .context("Failed to create initial tenant and timeline for pageserver");
-        match &init_result {
-            Ok(initial_timeline_id) => {
-                println!("Successfully initialized timeline {initial_timeline_id}")
-            }
-            Err(e) => eprintln!("{e:#}"),
-        }
-        background_process::send_stop_child_process(&pageserver_process)?;
-
-        let exit_code = pageserver_process.wait()?;
-        ensure!(
-            exit_code.success(),
-            format!(
-                "pageserver init failed with exit code {:?}",
-                exit_code.code()
-            )
-        );
-        println!(
-            "Stopped pageserver {} process with pid {}",
-            self.env.pageserver.id,
-            pageserver_process.id(),
-        );
-        init_result
-    }
-
-    fn try_init_timeline(
-        &self,
-        new_tenant_id: Option<TenantId>,
-        new_timeline_id: Option<TimelineId>,
-        pg_version: u32,
-    ) -> anyhow::Result<TimelineId> {
-        let initial_tenant_id = self.tenant_create(new_tenant_id, HashMap::new())?;
-        let initial_timeline_info = self.timeline_create(
-            initial_tenant_id,
-            new_timeline_id,
-            None,
-            None,
-            Some(pg_version),
-        )?;
-        Ok(initial_timeline_info.timeline_id)
+        })
    }

    pub fn repo_path(&self) -> PathBuf {
@@ -320,7 +252,7 @@ impl PageServerNode {
            let token = self
                .env
                .generate_auth_token(&Claims::new(None, Scope::SafekeeperData))?;
-            vec![("ZENITH_AUTH_TOKEN".to_owned(), token)]
+            vec![("NEON_AUTH_TOKEN".to_owned(), token)]
        } else {
            Vec::new()
        })
--- a/deny.toml
+++ b/deny.toml
@@ -0,0 +1,90 @@
+# This file was auto-generated using `cargo deny init`.
+# cargo-deny is a cargo plugin that lets you lint your project's
+# dependency graph to ensure all your dependencies conform
+# to your expectations and requirements.
+
+# Root options
+targets = []
+all-features = false
+no-default-features = false
+feature-depth = 1
+
+# This section is considered when running `cargo deny check advisories`
+# More documentation for the advisories section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html
+[advisories]
+db-urls = ["https://github.com/rustsec/advisory-db"]
+vulnerability = "deny"
+unmaintained = "warn"
+yanked = "warn"
+notice = "warn"
+ignore = []
+
+# This section is considered when running `cargo deny check licenses`
+# More documentation for the licenses section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html
+[licenses]
+unlicensed = "deny"
+allow = [
+    "Apache-2.0",
+    "Artistic-2.0",
+    "BSD-2-Clause",
+    "BSD-3-Clause",
+    "ISC",
+    "MIT",
+    "MPL-2.0",
+    "OpenSSL",
+    "Unicode-DFS-2016",
+]
+deny = []
+copyleft = "warn"
+allow-osi-fsf-free = "neither"
+default = "deny"
+confidence-threshold = 0.8
+exceptions = [
+    # Zlib license has some restrictions if we decide to change sth
+    { allow = ["Zlib"], name = "const_format_proc_macros", version = "*" },
+    { allow = ["Zlib"], name = "const_format", version = "*" },
+]
+
+[[licenses.clarify]]
+name = "ring"
+version = "*"
+expression = "MIT AND ISC AND OpenSSL"
+license-files = [
+    { path = "LICENSE", hash = 0xbd0eed23 },
+]
+
+[licenses.private]
+ignore = true
+registries = []
+
+# This section is considered when running `cargo deny check bans`.
+# More documentation about the 'bans' section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html
+[bans]
+multiple-versions = "warn"
+wildcards = "allow"
+highlight = "all"
+workspace-default-features = "allow"
+external-default-features = "allow"
+allow = []
+deny = []
+skip = []
+skip-tree = []
+
+# This section is considered when running `cargo deny check sources`.
+# More documentation about the 'sources' section can be found here:
+# https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html
+[sources]
+unknown-registry = "warn"
+unknown-git = "warn"
+allow-registry = ["https://github.com/rust-lang/crates.io-index"]
+allow-git = []
+
+[sources.allow-org]
+github = [
+    "neondatabase",
+]
+gitlab = []
+bitbucket = []
--- a/docs/authentication.md
+++ b/docs/authentication.md
@@ -65,7 +65,7 @@ There is no administrative API except those provided by PostgreSQL.

 #### Outgoing connections
 Compute connects to Pageserver for getting pages.
-The connection string is configured by the `neon.pageserver_connstring` PostgreSQL GUC, e.g. `postgresql://no_user:$ZENITH_AUTH_TOKEN@localhost:15028`.
+The connection string is configured by the `neon.pageserver_connstring` PostgreSQL GUC, e.g. `postgresql://no_user:$NEON_AUTH_TOKEN@localhost:15028`.
 The environment variable inside the connection string is substituted with
 the JWT token.

@@ -77,7 +77,7 @@ If the GUC is unset, no token is passed.

 Note that both tokens can be (and typically are) the same;
 the scope is the tenant and the token is usually passed through the
-`$ZENITH_AUTH_TOKEN` environment variable.
+`$NEON_AUTH_TOKEN` environment variable.

 ### Pageserver
 #### Overview
@@ -114,7 +114,7 @@ either of three values:
 Pageserver makes a connection to a Safekeeper for each active timeline.
 As Pageserver may want to access any timeline it has on the disk,
 it is given a blanket JWT token to access any data on any Safekeeper.
-This token is passed through an environment variable called `ZENITH_AUTH_TOKEN`
+This token is passed through an environment variable called `NEON_AUTH_TOKEN`
 (non-configurable as of writing this text).

 A better way _may be_ to store JWT token for each timeline next to it,
--- a/libs/metrics/Cargo.toml
+++ b/libs/metrics/Cargo.toml
@@ -2,6 +2,7 @@
 name = "metrics"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 prometheus = {version = "0.13", default_features=false, features = ["process"]} # removes protobuf dependency
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -2,6 +2,7 @@
 name = "pageserver_api"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/postgres_connection/Cargo.toml
+++ b/libs/postgres_connection/Cargo.toml
@@ -2,6 +2,7 @@
 name = "postgres_connection"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

--- a/libs/postgres_ffi/Cargo.toml
+++ b/libs/postgres_ffi/Cargo.toml
@@ -2,6 +2,7 @@
 name = "postgres_ffi"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 rand = "0.8.3"
--- a/libs/postgres_ffi/wal_craft/Cargo.toml
+++ b/libs/postgres_ffi/wal_craft/Cargo.toml
@@ -2,7 +2,7 @@
 name = "wal_craft"
 version = "0.1.0"
 edition = "2021"
-
+license = "Apache-2.0"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
--- a/libs/pq_proto/Cargo.toml
+++ b/libs/pq_proto/Cargo.toml
@@ -2,6 +2,7 @@
 name = "pq_proto"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 anyhow = "1.0"
@@ -12,5 +13,6 @@ rand = "0.8.3"
 serde = { version = "1.0", features = ["derive"] }
 tokio = { version = "1.17", features = ["macros"] }
 tracing = "0.1"
+thiserror = "1.0"

 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/pq_proto/src/lib.rs
+++ b/libs/pq_proto/src/lib.rs
@@ -5,7 +5,7 @@
 // Tools for calling certain async methods in sync contexts.
 pub mod sync;

-use anyhow::{bail, ensure, Context, Result};
+use anyhow::{ensure, Context, Result};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use postgres_protocol::PG_EPOCH;
 use serde::{Deserialize, Serialize};
@@ -194,6 +194,35 @@ macro_rules! retry_read {
    };
 }

+/// An error occured during connection being open.
+#[derive(thiserror::Error, Debug)]
+pub enum ConnectionError {
+    /// IO error during writing to or reading from the connection socket.
+    #[error("Socket IO error: {0}")]
+    Socket(std::io::Error),
+    /// Invalid packet was received from client
+    #[error("Protocol error: {0}")]
+    Protocol(String),
+    /// Failed to parse a protocol mesage
+    #[error("Message parse error: {0}")]
+    MessageParse(anyhow::Error),
+}
+
+impl From<anyhow::Error> for ConnectionError {
+    fn from(e: anyhow::Error) -> Self {
+        Self::MessageParse(e)
+    }
+}
+
+impl ConnectionError {
+    pub fn into_io_error(self) -> io::Error {
+        match self {
+            ConnectionError::Socket(io) => io,
+            other => io::Error::new(io::ErrorKind::Other, other.to_string()),
+        }
+    }
+}
+
 impl FeMessage {
    /// Read one message from the stream.
    /// This function returns `Ok(None)` in case of EOF.
@@ -216,7 +245,9 @@ impl FeMessage {
    /// }
    /// ```
    #[inline(never)]
-    pub fn read(stream: &mut (impl io::Read + Unpin)) -> anyhow::Result<Option<FeMessage>> {
+    pub fn read(
+        stream: &mut (impl io::Read + Unpin),
+    ) -> Result<Option<FeMessage>, ConnectionError> {
        Self::read_fut(&mut AsyncishRead(stream)).wait()
    }

@@ -224,7 +255,7 @@ impl FeMessage {
    /// See documentation for `Self::read`.
    pub fn read_fut<Reader>(
        stream: &mut Reader,
-    ) -> SyncFuture<Reader, impl Future<Output = anyhow::Result<Option<FeMessage>>> + '_>
+    ) -> SyncFuture<Reader, impl Future<Output = Result<Option<FeMessage>, ConnectionError>> + '_>
    where
        Reader: tokio::io::AsyncRead + Unpin,
    {
@@ -238,17 +269,21 @@ impl FeMessage {
            let tag = match retry_read!(stream.read_u8().await) {
                Ok(b) => b,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
-                Err(e) => return Err(e.into()),
+                Err(e) => return Err(ConnectionError::Socket(e)),
            };

            // The message length includes itself, so it better be at least 4.
-            let len = retry_read!(stream.read_u32().await)?
+            let len = retry_read!(stream.read_u32().await)
+                .map_err(ConnectionError::Socket)?
                .checked_sub(4)
-                .context("invalid message length")?;
+                .ok_or_else(|| ConnectionError::Protocol("invalid message length".to_string()))?;

            let body = {
                let mut buffer = vec![0u8; len as usize];
-                stream.read_exact(&mut buffer).await?;
+                stream
+                    .read_exact(&mut buffer)
+                    .await
+                    .map_err(ConnectionError::Socket)?;
                Bytes::from(buffer)
            };

@@ -265,7 +300,11 @@ impl FeMessage {
                b'c' => Ok(Some(FeMessage::CopyDone)),
                b'f' => Ok(Some(FeMessage::CopyFail)),
                b'p' => Ok(Some(FeMessage::PasswordMessage(body))),
-                tag => bail!("unknown message tag: {},'{:?}'", tag, body),
+                tag => {
+                    return Err(ConnectionError::Protocol(format!(
+                        "unknown message tag: {tag},'{body:?}'"
+                    )))
+                }
            }
        })
    }
@@ -275,7 +314,9 @@ impl FeStartupPacket {
    /// Read startup message from the stream.
    // XXX: It's tempting yet undesirable to accept `stream` by value,
    // since such a change will cause user-supplied &mut references to be consumed
-    pub fn read(stream: &mut (impl io::Read + Unpin)) -> anyhow::Result<Option<FeMessage>> {
+    pub fn read(
+        stream: &mut (impl io::Read + Unpin),
+    ) -> Result<Option<FeMessage>, ConnectionError> {
        Self::read_fut(&mut AsyncishRead(stream)).wait()
    }

@@ -284,7 +325,7 @@ impl FeStartupPacket {
    // since such a change will cause user-supplied &mut references to be consumed
    pub fn read_fut<Reader>(
        stream: &mut Reader,
-    ) -> SyncFuture<Reader, impl Future<Output = anyhow::Result<Option<FeMessage>>> + '_>
+    ) -> SyncFuture<Reader, impl Future<Output = Result<Option<FeMessage>, ConnectionError>> + '_>
    where
        Reader: tokio::io::AsyncRead + Unpin,
    {
@@ -302,31 +343,41 @@ impl FeStartupPacket {
            let len = match retry_read!(stream.read_u32().await) {
                Ok(len) => len as usize,
                Err(e) if e.kind() == io::ErrorKind::UnexpectedEof => return Ok(None),
-                Err(e) => return Err(e.into()),
+                Err(e) => return Err(ConnectionError::Socket(e)),
            };

            #[allow(clippy::manual_range_contains)]
            if len < 4 || len > MAX_STARTUP_PACKET_LENGTH {
-                bail!("invalid message length");
+                return Err(ConnectionError::Protocol(format!(
+                    "invalid message length {len}"
+                )));
            }

-            let request_code = retry_read!(stream.read_u32().await)?;
+            let request_code =
+                retry_read!(stream.read_u32().await).map_err(ConnectionError::Socket)?;

            // the rest of startup packet are params
            let params_len = len - 8;
            let mut params_bytes = vec![0u8; params_len];
-            stream.read_exact(params_bytes.as_mut()).await?;
+            stream
+                .read_exact(params_bytes.as_mut())
+                .await
+                .map_err(ConnectionError::Socket)?;

            // Parse params depending on request code
            let req_hi = request_code >> 16;
            let req_lo = request_code & ((1 << 16) - 1);
            let message = match (req_hi, req_lo) {
                (RESERVED_INVALID_MAJOR_VERSION, CANCEL_REQUEST_CODE) => {
-                    ensure!(params_len == 8, "expected 8 bytes for CancelRequest params");
+                    if params_len != 8 {
+                        return Err(ConnectionError::Protocol(
+                            "expected 8 bytes for CancelRequest params".to_string(),
+                        ));
+                    }
                    let mut cursor = Cursor::new(params_bytes);
                    FeStartupPacket::CancelRequest(CancelKeyData {
-                        backend_pid: cursor.read_i32().await?,
-                        cancel_key: cursor.read_i32().await?,
+                        backend_pid: cursor.read_i32().await.map_err(ConnectionError::Socket)?,
+                        cancel_key: cursor.read_i32().await.map_err(ConnectionError::Socket)?,
                    })
                }
                (RESERVED_INVALID_MAJOR_VERSION, NEGOTIATE_SSL_CODE) => {
@@ -338,7 +389,9 @@ impl FeStartupPacket {
                    FeStartupPacket::GssEncRequest
                }
                (RESERVED_INVALID_MAJOR_VERSION, unrecognized_code) => {
-                    bail!("Unrecognized request code {}", unrecognized_code)
+                    return Err(ConnectionError::Protocol(format!(
+                        "Unrecognized request code {unrecognized_code}"
+                    )));
                }
                // TODO bail if protocol major_version is not 3?
                (major_version, minor_version) => {
@@ -346,15 +399,21 @@ impl FeStartupPacket {
                    // See `postgres: ProcessStartupPacket, build_startup_packet`.
                    let mut tokens = str::from_utf8(&params_bytes)
                        .context("StartupMessage params: invalid utf-8")?
-                        .strip_suffix('\0') // drop packet's own null terminator
-                        .context("StartupMessage params: missing null terminator")?
+                        .strip_suffix('\0') // drop packet's own null
+                        .ok_or_else(|| {
+                            ConnectionError::Protocol(
+                                "StartupMessage params: missing null terminator".to_string(),
+                            )
+                        })?
                        .split_terminator('\0');

                    let mut params = HashMap::new();
                    while let Some(name) = tokens.next() {
-                        let value = tokens
-                            .next()
-                            .context("StartupMessage params: key without value")?;
+                        let value = tokens.next().ok_or_else(|| {
+                            ConnectionError::Protocol(
+                                "StartupMessage params: key without value".to_string(),
+                            )
+                        })?;

                        params.insert(name.to_owned(), value.to_owned());
                    }
@@ -458,7 +517,7 @@ pub enum BeMessage<'a> {
    CloseComplete,
    // None means column is NULL
    DataRow(&'a [Option<&'a [u8]>]),
-    ErrorResponse(&'a str),
+    ErrorResponse(&'a str, Option<&'a [u8; 5]>),
    /// Single byte - used in response to SSLRequest/GSSENCRequest.
    EncryptionResponse(bool),
    NoData,
@@ -606,7 +665,7 @@ fn write_body<R>(buf: &mut BytesMut, f: impl FnOnce(&mut BytesMut) -> R) -> R {
 }

 /// Safe write of s into buf as cstring (String in the protocol).
-fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> Result<(), io::Error> {
+fn write_cstr(s: impl AsRef<[u8]>, buf: &mut BytesMut) -> io::Result<()> {
    let bytes = s.as_ref();
    if bytes.contains(&0) {
        return Err(io::Error::new(
@@ -626,6 +685,8 @@ fn read_cstr(buf: &mut Bytes) -> anyhow::Result<Bytes> {
    Ok(result)
 }

+pub const SQLSTATE_INTERNAL_ERROR: &[u8; 5] = b"XX000";
+
 impl<'a> BeMessage<'a> {
    /// Write message to the given buf.
    // Unlike the reading side, we use BytesMut
@@ -765,10 +826,7 @@ impl<'a> BeMessage<'a> {
            // First byte of each field represents type of this field. Set just enough fields
            // to satisfy rust-postgres client: 'S' -- severity, 'C' -- error, 'M' -- error
            // message text.
-            BeMessage::ErrorResponse(error_msg) => {
-                // For all the errors set Severity to Error and error code to
-                // 'internal error'.
-
+            BeMessage::ErrorResponse(error_msg, pg_error_code) => {
                // 'E' signalizes ErrorResponse messages
                buf.put_u8(b'E');
                write_body(buf, |buf| {
@@ -776,7 +834,9 @@ impl<'a> BeMessage<'a> {
                    buf.put_slice(b"ERROR\0");

                    buf.put_u8(b'C'); // SQLSTATE error code
-                    buf.put_slice(b"CXX000\0");
+                    buf.put_slice(&terminate_code(
+                        pg_error_code.unwrap_or(SQLSTATE_INTERNAL_ERROR),
+                    ));

                    buf.put_u8(b'M'); // the message
                    write_cstr(error_msg, buf)?;
@@ -799,7 +859,7 @@ impl<'a> BeMessage<'a> {
                    buf.put_slice(b"NOTICE\0");

                    buf.put_u8(b'C'); // SQLSTATE error code
-                    buf.put_slice(b"CXX000\0");
+                    buf.put_slice(&terminate_code(SQLSTATE_INTERNAL_ERROR));

                    buf.put_u8(b'M'); // the message
                    write_cstr(error_msg.as_bytes(), buf)?;
@@ -1087,3 +1147,12 @@ mod tests {
        let _ = FeStartupPacket::read_fut(stream).await;
    }
 }
+
+fn terminate_code(code: &[u8; 5]) -> [u8; 6] {
+    let mut terminated = [0; 6];
+    for (i, &elem) in code.iter().enumerate() {
+        terminated[i] = elem;
+    }
+
+    terminated
+}
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -2,6 +2,7 @@
 name = "remote_storage"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 anyhow = { version = "1.0", features = ["backtrace"] }
--- a/libs/safekeeper_api/Cargo.toml
+++ b/libs/safekeeper_api/Cargo.toml
@@ -2,6 +2,7 @@
 name = "safekeeper_api"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 serde = { version = "1.0", features = ["derive"] }
--- a/libs/tenant_size_model/Cargo.toml
+++ b/libs/tenant_size_model/Cargo.toml
@@ -3,6 +3,7 @@ name = "tenant_size_model"
 version = "0.1.0"
 edition = "2021"
 publish = false
+license = "Apache-2.0"

 [dependencies]
 workspace_hack = { version = "0.1", path = "../../workspace_hack" }
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -2,9 +2,10 @@
 name = "utils"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
-sentry = "0.29.0"
+sentry = { version = "0.29.0", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 async-trait = "0.1"
 anyhow = "1.0"
 bincode = "1.3"
--- a/libs/utils/src/postgres_backend.rs
+++ b/libs/utils/src/postgres_backend.rs
@@ -3,11 +3,11 @@
 //! implementation determining how to process the queries. Currently its API
 //! is rather narrow, but we can extend it once required.

+use crate::postgres_backend_async::{log_query_error, short_error, QueryError};
 use crate::sock_split::{BidiStream, ReadStream, WriteStream};
-use anyhow::{bail, ensure, Context, Result};
+use anyhow::Context;
 use bytes::{Bytes, BytesMut};
 use pq_proto::{BeMessage, FeMessage, FeStartupPacket};
-use rand::Rng;
 use serde::{Deserialize, Serialize};
 use std::fmt;
 use std::io::{self, Write};
@@ -22,25 +22,32 @@ pub trait Handler {
    /// postgres_backend will issue ReadyForQuery after calling this (this
    /// might be not what we want after CopyData streaming, but currently we don't
    /// care).
-    fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()>;
+    fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> Result<(), QueryError>;

    /// Called on startup packet receival, allows to process params.
    ///
    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
    /// to override whole init logic in implementations.
-    fn startup(&mut self, _pgb: &mut PostgresBackend, _sm: &FeStartupPacket) -> Result<()> {
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
        Ok(())
    }

-    /// Check auth md5
-    fn check_auth_md5(&mut self, _pgb: &mut PostgresBackend, _md5_response: &[u8]) -> Result<()> {
-        bail!("MD5 auth failed")
-    }
-
    /// Check auth jwt
-    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
-        bail!("JWT auth failed")
+    fn check_auth_jwt(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _jwt_response: &[u8],
+    ) -> Result<(), QueryError> {
+        Err(QueryError::Other(anyhow::anyhow!("JWT auth failed")))
    }

    fn is_shutdown_requested(&self) -> bool {
@@ -61,7 +68,6 @@ pub enum ProtoState {
 #[derive(Debug, PartialEq, Eq, Clone, Copy, Serialize, Deserialize)]
 pub enum AuthType {
    Trust,
-    MD5,
    // This mimics postgres's AuthenticationCleartextPassword but instead of password expects JWT
    NeonJWT,
 }
@@ -72,9 +78,8 @@ impl FromStr for AuthType {
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "Trust" => Ok(Self::Trust),
-            "MD5" => Ok(Self::MD5),
            "NeonJWT" => Ok(Self::NeonJWT),
-            _ => bail!("invalid value \"{s}\" for auth type"),
+            _ => anyhow::bail!("invalid value \"{s}\" for auth type"),
        }
    }
 }
@@ -83,7 +88,6 @@ impl fmt::Display for AuthType {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(match self {
            AuthType::Trust => "Trust",
-            AuthType::MD5 => "MD5",
            AuthType::NeonJWT => "NeonJWT",
        })
    }
@@ -134,7 +138,6 @@ pub struct PostgresBackend {

    pub state: ProtoState,

-    md5_salt: [u8; 4],
    auth_type: AuthType,

    peer_addr: SocketAddr,
@@ -164,7 +167,7 @@ pub fn is_socket_read_timed_out(error: &anyhow::Error) -> bool {
 }

 // Cast a byte slice to a string slice, dropping null terminator if there's one.
-fn cstr_to_str(bytes: &[u8]) -> Result<&str> {
+fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
    std::str::from_utf8(without_null).map_err(|e| e.into())
 }
@@ -187,7 +190,6 @@ impl PostgresBackend {
            stream: Some(Stream::Bidirectional(BidiStream::from_tcp(socket))),
            buf_out: BytesMut::with_capacity(10 * 1024),
            state: ProtoState::Initialization,
-            md5_salt: [0u8; 4],
            auth_type,
            tls_config,
            peer_addr,
@@ -199,10 +201,10 @@ impl PostgresBackend {
    }

    /// Get direct reference (into the Option) to the read stream.
-    fn get_stream_in(&mut self) -> Result<&mut BidiStream> {
+    fn get_stream_in(&mut self) -> anyhow::Result<&mut BidiStream> {
        match &mut self.stream {
            Some(Stream::Bidirectional(stream)) => Ok(stream),
-            _ => bail!("reader taken"),
+            _ => anyhow::bail!("reader taken"),
        }
    }

@@ -226,7 +228,7 @@ impl PostgresBackend {
    }

    /// Read full message or return None if connection is closed.
-    pub fn read_message(&mut self) -> Result<Option<FeMessage>> {
+    pub fn read_message(&mut self) -> Result<Option<FeMessage>, QueryError> {
        let (state, stream) = (self.state, self.get_stream_in()?);

        use ProtoState::*;
@@ -234,6 +236,7 @@ impl PostgresBackend {
            Initialization | Encrypted => FeStartupPacket::read(stream),
            Authentication | Established => FeMessage::read(stream),
        }
+        .map_err(QueryError::from)
    }

    /// Write message into internal output buffer.
@@ -257,7 +260,7 @@ impl PostgresBackend {
    }

    // Wrapper for run_message_loop() that shuts down socket when we are done
-    pub fn run(mut self, handler: &mut impl Handler) -> Result<()> {
+    pub fn run(mut self, handler: &mut impl Handler) -> Result<(), QueryError> {
        let ret = self.run_message_loop(handler);
        if let Some(stream) = self.stream.as_mut() {
            let _ = stream.shutdown(Shutdown::Both);
@@ -265,7 +268,7 @@ impl PostgresBackend {
        ret
    }

-    fn run_message_loop(&mut self, handler: &mut impl Handler) -> Result<()> {
+    fn run_message_loop(&mut self, handler: &mut impl Handler) -> Result<(), QueryError> {
        trace!("postgres backend to {:?} started", self.peer_addr);

        let mut unnamed_query_string = Bytes::new();
@@ -274,7 +277,7 @@ impl PostgresBackend {
            match self.read_message() {
                Ok(message) => {
                    if let Some(msg) = message {
-                        trace!("got message {:?}", msg);
+                        trace!("got message {msg:?}");

                        match self.process_message(handler, msg, &mut unnamed_query_string)? {
                            ProcessMsgResult::Continue => continue,
@@ -285,10 +288,12 @@ impl PostgresBackend {
                    }
                }
                Err(e) => {
-                    // If it is a timeout error, continue the loop
-                    if !is_socket_read_timed_out(&e) {
-                        return Err(e);
+                    if let QueryError::Other(e) = &e {
+                        if is_socket_read_timed_out(e) {
+                            continue;
+                        }
                    }
+                    return Err(e);
                }
            }
        }
@@ -306,7 +311,7 @@ impl PostgresBackend {
            }
            stream => {
                self.stream = stream;
-                bail!("can't start TLs without bidi stream");
+                anyhow::bail!("can't start TLs without bidi stream");
            }
        }
    }
@@ -316,17 +321,16 @@ impl PostgresBackend {
        handler: &mut impl Handler,
        msg: FeMessage,
        unnamed_query_string: &mut Bytes,
-    ) -> Result<ProcessMsgResult> {
+    ) -> Result<ProcessMsgResult, QueryError> {
        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
-        if self.state < ProtoState::Established {
-            ensure!(
-                matches!(
-                    msg,
-                    FeMessage::PasswordMessage(_) | FeMessage::StartupPacket(_)
-                ),
-                "protocol violation"
-            );
+        if self.state < ProtoState::Established
+            && !matches!(
+                msg,
+                FeMessage::PasswordMessage(_) | FeMessage::StartupPacket(_)
+            )
+        {
+            return Err(QueryError::Other(anyhow::anyhow!("protocol violation")));
        }

        let have_tls = self.tls_config.is_some();
@@ -350,8 +354,13 @@ impl PostgresBackend {
                    }
                    FeStartupPacket::StartupMessage { .. } => {
                        if have_tls && !matches!(self.state, ProtoState::Encrypted) {
-                            self.write_message(&BeMessage::ErrorResponse("must connect with TLS"))?;
-                            bail!("client did not connect with TLS");
+                            self.write_message(&BeMessage::ErrorResponse(
+                                "must connect with TLS",
+                                None,
+                            ))?;
+                            return Err(QueryError::Other(anyhow::anyhow!(
+                                "client did not connect with TLS"
+                            )));
                        }

                        // NB: startup() may change self.auth_type -- we are using that in proxy code
@@ -367,13 +376,6 @@ impl PostgresBackend {
                                    .write_message(&BeMessage::ReadyForQuery)?;
                                self.state = ProtoState::Established;
                            }
-                            AuthType::MD5 => {
-                                rand::thread_rng().fill(&mut self.md5_salt);
-                                self.write_message(&BeMessage::AuthenticationMD5Password(
-                                    self.md5_salt,
-                                ))?;
-                                self.state = ProtoState::Authentication;
-                            }
                            AuthType::NeonJWT => {
                                self.write_message(&BeMessage::AuthenticationCleartextPassword)?;
                                self.state = ProtoState::Authentication;
@@ -393,20 +395,15 @@ impl PostgresBackend {

                match self.auth_type {
                    AuthType::Trust => unreachable!(),
-                    AuthType::MD5 => {
-                        let (_, md5_response) = m.split_last().context("protocol violation")?;
-
-                        if let Err(e) = handler.check_auth_md5(self, md5_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
-                        }
-                    }
                    AuthType::NeonJWT => {
                        let (_, jwt_response) = m.split_last().context("protocol violation")?;

                        if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
+                            self.write_message(&BeMessage::ErrorResponse(
+                                &e.to_string(),
+                                Some(e.pg_error_code()),
+                            ))?;
+                            return Err(e);
                        }
                    }
                }
@@ -420,33 +417,14 @@ impl PostgresBackend {
                // remove null terminator
                let query_string = cstr_to_str(&body)?;

-                trace!("got query {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
+                trace!("got query {query_string:?}");
                if let Err(e) = handler.process_query(self, query_string) {
-                    // ":?" uses the alternate formatting style, which makes anyhow display the
-                    // full cause of the error, not just the top-level context + its trace.
-                    // We don't want to send that in the ErrorResponse though,
-                    // because it's not relevant to the compute node logs.
-                    //
-                    // We also don't want to log full stacktrace when the error is primitive,
-                    // such as usual connection closed.
-                    let short_error = format!("{:#}", e);
-                    let root_cause = e.root_cause().to_string();
-                    if root_cause.contains("connection closed unexpectedly")
-                        || root_cause.contains("Broken pipe (os error 32)")
-                    {
-                        error!(
-                            "query handler for '{}' failed: {}",
-                            query_string, short_error
-                        );
-                    } else {
-                        error!("query handler for '{}' failed: {:?}", query_string, e);
-                    }
-                    self.write_message_noflush(&BeMessage::ErrorResponse(&short_error))?;
-                    // TODO: untangle convoluted control flow
-                    if e.to_string().contains("failed to run") {
-                        return Ok(ProcessMsgResult::Break);
-                    }
+                    log_query_error(query_string, &e);
+                    let short_error = short_error(&e);
+                    self.write_message_noflush(&BeMessage::ErrorResponse(
+                        &short_error,
+                        Some(e.pg_error_code()),
+                    ))?;
                }
                self.write_message(&BeMessage::ReadyForQuery)?;
            }
@@ -471,11 +449,13 @@ impl PostgresBackend {

            FeMessage::Execute(_) => {
                let query_string = cstr_to_str(unnamed_query_string)?;
-                trace!("got execute {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
+                trace!("got execute {query_string:?}");
                if let Err(e) = handler.process_query(self, query_string) {
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
-                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
+                    log_query_error(query_string, &e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?;
                }
                // NOTE there is no ReadyForQuery message. This handler is used
                // for basebackup and it uses CopyOut which doesn't require
@@ -494,7 +474,9 @@ impl PostgresBackend {
            // We prefer explicit pattern matching to wildcards, because
            // this helps us spot the places where new variants are missing
            FeMessage::CopyData(_) | FeMessage::CopyDone | FeMessage::CopyFail => {
-                bail!("unexpected message type: {:?}", msg);
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "unexpected message type: {msg:?}"
+                )));
            }
        }

--- a/libs/utils/src/postgres_backend_async.rs
+++ b/libs/utils/src/postgres_backend_async.rs
@@ -4,45 +4,87 @@
 //! is rather narrow, but we can extend it once required.

 use crate::postgres_backend::AuthType;
-use anyhow::{bail, Context, Result};
-use bytes::{Bytes, BytesMut};
-use pq_proto::{BeMessage, FeMessage, FeStartupPacket};
-use rand::Rng;
+use anyhow::Context;
+use bytes::{Buf, Bytes, BytesMut};
+use pq_proto::{BeMessage, ConnectionError, FeMessage, FeStartupPacket, SQLSTATE_INTERNAL_ERROR};
 use std::future::Future;
+use std::io;
 use std::net::SocketAddr;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::Poll;
-use tracing::{debug, error, trace};
+use tracing::{debug, error, info, trace};

 use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
 use tokio_rustls::TlsAcceptor;

+pub fn is_expected_io_error(e: &io::Error) -> bool {
+    use io::ErrorKind::*;
+    matches!(
+        e.kind(),
+        ConnectionRefused | ConnectionAborted | ConnectionReset
+    )
+}
+
+/// An error, occurred during query processing:
+/// either during the connection ([`ConnectionError`]) or before/after it.
+#[derive(thiserror::Error, Debug)]
+pub enum QueryError {
+    /// The connection was lost while processing the query.
+    #[error(transparent)]
+    Disconnected(#[from] ConnectionError),
+    /// Some other error
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl From<io::Error> for QueryError {
+    fn from(e: io::Error) -> Self {
+        Self::Disconnected(ConnectionError::Socket(e))
+    }
+}
+
+impl QueryError {
+    pub fn pg_error_code(&self) -> &'static [u8; 5] {
+        match self {
+            Self::Disconnected(_) => b"08006",         // connection failure
+            Self::Other(_) => SQLSTATE_INTERNAL_ERROR, // internal error
+        }
+    }
+}
+
 #[async_trait::async_trait]
 pub trait Handler {
    /// Handle single query.
    /// postgres_backend will issue ReadyForQuery after calling this (this
    /// might be not what we want after CopyData streaming, but currently we don't
    /// care).
-    async fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()>;
+    async fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> Result<(), QueryError>;

    /// Called on startup packet receival, allows to process params.
    ///
    /// If Ok(false) is returned postgres_backend will skip auth -- that is needed for new users
    /// creation is the proxy code. That is quite hacky and ad-hoc solution, may be we could allow
    /// to override whole init logic in implementations.
-    fn startup(&mut self, _pgb: &mut PostgresBackend, _sm: &FeStartupPacket) -> Result<()> {
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
        Ok(())
    }

-    /// Check auth md5
-    fn check_auth_md5(&mut self, _pgb: &mut PostgresBackend, _md5_response: &[u8]) -> Result<()> {
-        bail!("MD5 auth failed")
-    }
-
    /// Check auth jwt
-    fn check_auth_jwt(&mut self, _pgb: &mut PostgresBackend, _jwt_response: &[u8]) -> Result<()> {
-        bail!("JWT auth failed")
+    fn check_auth_jwt(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _jwt_response: &[u8],
+    ) -> Result<(), QueryError> {
+        Err(QueryError::Other(anyhow::anyhow!("JWT auth failed")))
    }
 }

@@ -76,17 +118,14 @@ impl AsyncWrite for Stream {
        self: Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
        buf: &[u8],
-    ) -> Poll<Result<usize, std::io::Error>> {
+    ) -> Poll<io::Result<usize>> {
        match self.get_mut() {
            Self::Unencrypted(stream) => Pin::new(stream).poll_write(cx, buf),
            Self::Tls(stream) => Pin::new(stream).poll_write(cx, buf),
            Self::Broken => unreachable!(),
        }
    }
-    fn poll_flush(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut std::task::Context<'_>) -> Poll<io::Result<()>> {
        match self.get_mut() {
            Self::Unencrypted(stream) => Pin::new(stream).poll_flush(cx),
            Self::Tls(stream) => Pin::new(stream).poll_flush(cx),
@@ -96,7 +135,7 @@ impl AsyncWrite for Stream {
    fn poll_shutdown(
        self: Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
+    ) -> Poll<io::Result<()>> {
        match self.get_mut() {
            Self::Unencrypted(stream) => Pin::new(stream).poll_shutdown(cx),
            Self::Tls(stream) => Pin::new(stream).poll_shutdown(cx),
@@ -109,7 +148,7 @@ impl AsyncRead for Stream {
        self: Pin<&mut Self>,
        cx: &mut std::task::Context<'_>,
        buf: &mut tokio::io::ReadBuf<'_>,
-    ) -> Poll<Result<(), std::io::Error>> {
+    ) -> Poll<io::Result<()>> {
        match self.get_mut() {
            Self::Unencrypted(stream) => Pin::new(stream).poll_read(cx, buf),
            Self::Tls(stream) => Pin::new(stream).poll_read(cx, buf),
@@ -120,12 +159,14 @@ impl AsyncRead for Stream {

 pub struct PostgresBackend {
    stream: Stream,
+
    // Output buffer. c.f. BeMessage::write why we are using BytesMut here.
+    // The data between 0 and "current position" as tracked by the bytes::Buf
+    // implementation of BytesMut, have already been written.
    buf_out: BytesMut,

    pub state: ProtoState,

-    md5_salt: [u8; 4],
    auth_type: AuthType,

    peer_addr: SocketAddr,
@@ -143,7 +184,7 @@ pub fn query_from_cstring(query_string: Bytes) -> Vec<u8> {
 }

 // Cast a byte slice to a string slice, dropping null terminator if there's one.
-fn cstr_to_str(bytes: &[u8]) -> Result<&str> {
+fn cstr_to_str(bytes: &[u8]) -> anyhow::Result<&str> {
    let without_null = bytes.strip_suffix(&[0]).unwrap_or(bytes);
    std::str::from_utf8(without_null).map_err(|e| e.into())
 }
@@ -153,14 +194,13 @@ impl PostgresBackend {
        socket: tokio::net::TcpStream,
        auth_type: AuthType,
        tls_config: Option<Arc<rustls::ServerConfig>>,
-    ) -> std::io::Result<Self> {
+    ) -> io::Result<Self> {
        let peer_addr = socket.peer_addr()?;

        Ok(Self {
            stream: Stream::Unencrypted(BufReader::new(socket)),
            buf_out: BytesMut::with_capacity(10 * 1024),
            state: ProtoState::Initialization,
-            md5_salt: [0u8; 4],
            auth_type,
            tls_config,
            peer_addr,
@@ -172,30 +212,68 @@ impl PostgresBackend {
    }

    /// Read full message or return None if connection is closed.
-    pub async fn read_message(&mut self) -> Result<Option<FeMessage>> {
+    pub async fn read_message(&mut self) -> Result<Option<FeMessage>, QueryError> {
        use ProtoState::*;
        match self.state {
            Initialization | Encrypted => FeStartupPacket::read_fut(&mut self.stream).await,
            Authentication | Established => FeMessage::read_fut(&mut self.stream).await,
            Closed => Ok(None),
        }
+        .map_err(QueryError::from)
    }

    /// Flush output buffer into the socket.
-    pub async fn flush(&mut self) -> std::io::Result<&mut Self> {
-        self.stream.write_all(&self.buf_out).await?;
+    pub async fn flush(&mut self) -> io::Result<()> {
+        while self.buf_out.has_remaining() {
+            let bytes_written = self.stream.write(self.buf_out.chunk()).await?;
+            self.buf_out.advance(bytes_written);
+        }
        self.buf_out.clear();
-        Ok(self)
+        Ok(())
    }

    /// Write message into internal output buffer.
-    pub fn write_message(&mut self, message: &BeMessage<'_>) -> Result<&mut Self, std::io::Error> {
+    pub fn write_message(&mut self, message: &BeMessage<'_>) -> io::Result<&mut Self> {
        BeMessage::write(&mut self.buf_out, message)?;
        Ok(self)
    }

+    /// Returns an AsyncWrite implementation that wraps all the data written
+    /// to it in CopyData messages, and writes them to the connection
+    ///
+    /// The caller is responsible for sending CopyOutResponse and CopyDone messages.
+    pub fn copyout_writer(&mut self) -> CopyDataWriter {
+        CopyDataWriter { pgb: self }
+    }
+
+    /// A polling function that tries to write all the data from 'buf_out' to the
+    /// underlying stream.
+    fn poll_write_buf(
+        &mut self,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        while self.buf_out.has_remaining() {
+            match Pin::new(&mut self.stream).poll_write(cx, self.buf_out.chunk()) {
+                Poll::Ready(Ok(bytes_written)) => {
+                    self.buf_out.advance(bytes_written);
+                }
+                Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
+                Poll::Pending => return Poll::Pending,
+            }
+        }
+        Poll::Ready(Ok(()))
+    }
+
+    fn poll_flush(&mut self, cx: &mut std::task::Context<'_>) -> Poll<Result<(), std::io::Error>> {
+        Pin::new(&mut self.stream).poll_flush(cx)
+    }
+
    // Wrapper for run_message_loop() that shuts down socket when we are done
-    pub async fn run<F, S>(mut self, handler: &mut impl Handler, shutdown_watcher: F) -> Result<()>
+    pub async fn run<F, S>(
+        mut self,
+        handler: &mut impl Handler,
+        shutdown_watcher: F,
+    ) -> Result<(), QueryError>
    where
        F: Fn() -> S,
        S: Future,
@@ -209,7 +287,7 @@ impl PostgresBackend {
        &mut self,
        handler: &mut impl Handler,
        shutdown_watcher: F,
-    ) -> Result<()>
+    ) -> Result<(), QueryError>
    where
        F: Fn() -> S,
        S: Future,
@@ -245,7 +323,7 @@ impl PostgresBackend {
                        return Ok(());
                    }
                }
-                Ok::<(), anyhow::Error>(())
+                Ok::<(), QueryError>(())
            } => {
                // Handshake complete.
                result?;
@@ -290,14 +368,14 @@ impl PostgresBackend {
            self.stream = Stream::Tls(Box::new(tls_stream));
            return Ok(());
        };
-        bail!("TLS already started");
+        anyhow::bail!("TLS already started");
    }

    async fn process_handshake_message(
        &mut self,
        handler: &mut impl Handler,
        msg: FeMessage,
-    ) -> Result<ProcessMsgResult> {
+    ) -> Result<ProcessMsgResult, QueryError> {
        assert!(self.state < ProtoState::Established);
        let have_tls = self.tls_config.is_some();
        match msg {
@@ -320,8 +398,13 @@ impl PostgresBackend {
                    }
                    FeStartupPacket::StartupMessage { .. } => {
                        if have_tls && !matches!(self.state, ProtoState::Encrypted) {
-                            self.write_message(&BeMessage::ErrorResponse("must connect with TLS"))?;
-                            bail!("client did not connect with TLS");
+                            self.write_message(&BeMessage::ErrorResponse(
+                                "must connect with TLS",
+                                None,
+                            ))?;
+                            return Err(QueryError::Other(anyhow::anyhow!(
+                                "client did not connect with TLS"
+                            )));
                        }

                        // NB: startup() may change self.auth_type -- we are using that in proxy code
@@ -337,13 +420,6 @@ impl PostgresBackend {
                                    .write_message(&BeMessage::ReadyForQuery)?;
                                self.state = ProtoState::Established;
                            }
-                            AuthType::MD5 => {
-                                rand::thread_rng().fill(&mut self.md5_salt);
-                                self.write_message(&BeMessage::AuthenticationMD5Password(
-                                    self.md5_salt,
-                                ))?;
-                                self.state = ProtoState::Authentication;
-                            }
                            AuthType::NeonJWT => {
                                self.write_message(&BeMessage::AuthenticationCleartextPassword)?;
                                self.state = ProtoState::Authentication;
@@ -364,20 +440,15 @@ impl PostgresBackend {

                match self.auth_type {
                    AuthType::Trust => unreachable!(),
-                    AuthType::MD5 => {
-                        let (_, md5_response) = m.split_last().context("protocol violation")?;
-
-                        if let Err(e) = handler.check_auth_md5(self, md5_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
-                        }
-                    }
                    AuthType::NeonJWT => {
                        let (_, jwt_response) = m.split_last().context("protocol violation")?;

                        if let Err(e) = handler.check_auth_jwt(self, jwt_response) {
-                            self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                            bail!("auth failed: {}", e);
+                            self.write_message(&BeMessage::ErrorResponse(
+                                &e.to_string(),
+                                Some(e.pg_error_code()),
+                            ))?;
+                            return Err(e);
                        }
                    }
                }
@@ -400,33 +471,28 @@ impl PostgresBackend {
        handler: &mut impl Handler,
        msg: FeMessage,
        unnamed_query_string: &mut Bytes,
-    ) -> Result<ProcessMsgResult> {
+    ) -> Result<ProcessMsgResult, QueryError> {
        // Allow only startup and password messages during auth. Otherwise client would be able to bypass auth
        // TODO: change that to proper top-level match of protocol state with separate message handling for each state
        assert!(self.state == ProtoState::Established);

        match msg {
            FeMessage::StartupPacket(_) | FeMessage::PasswordMessage(_) => {
-                bail!("protocol violation");
+                return Err(QueryError::Other(anyhow::anyhow!("protocol violation")));
            }

            FeMessage::Query(body) => {
                // remove null terminator
                let query_string = cstr_to_str(&body)?;

-                trace!("got query {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
+                trace!("got query {query_string:?}");
                if let Err(e) = handler.process_query(self, query_string).await {
-                    // ":?" uses the alternate formatting style, which makes anyhow display the
-                    // full cause of the error, not just the top-level context + its trace.
-                    // We don't want to send that in the ErrorResponse though,
-                    // because it's not relevant to the compute node logs.
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
-                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
-                    // TODO: untangle convoluted control flow
-                    if e.to_string().contains("failed to run") {
-                        return Ok(ProcessMsgResult::Break);
-                    }
+                    log_query_error(query_string, &e);
+                    let short_error = short_error(&e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &short_error,
+                        Some(e.pg_error_code()),
+                    ))?;
                }
                self.write_message(&BeMessage::ReadyForQuery)?;
            }
@@ -451,11 +517,13 @@ impl PostgresBackend {

            FeMessage::Execute(_) => {
                let query_string = cstr_to_str(unnamed_query_string)?;
-                trace!("got execute {:?}", query_string);
-                // xxx distinguish fatal and recoverable errors?
+                trace!("got execute {query_string:?}");
                if let Err(e) = handler.process_query(self, query_string).await {
-                    error!("query handler for '{}' failed: {:?}", query_string, e);
-                    self.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
+                    log_query_error(query_string, &e);
+                    self.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?;
                }
                // NOTE there is no ReadyForQuery message. This handler is used
                // for basebackup and it uses CopyOut which doesn't require
@@ -474,10 +542,99 @@ impl PostgresBackend {
            // We prefer explicit pattern matching to wildcards, because
            // this helps us spot the places where new variants are missing
            FeMessage::CopyData(_) | FeMessage::CopyDone | FeMessage::CopyFail => {
-                bail!("unexpected message type: {:?}", msg);
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "unexpected message type: {:?}",
+                    msg
+                )));
            }
        }

        Ok(ProcessMsgResult::Continue)
    }
 }
+
+///
+/// A futures::AsyncWrite implementation that wraps all data written to it in CopyData
+/// messages.
+///
+
+pub struct CopyDataWriter<'a> {
+    pgb: &'a mut PostgresBackend,
+}
+
+impl<'a> AsyncWrite for CopyDataWriter<'a> {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, std::io::Error>> {
+        let this = self.get_mut();
+
+        // It's not strictly required to flush between each message, but makes it easier
+        // to view in wireshark, and usually the messages that the callers write are
+        // decently-sized anyway.
+        match this.pgb.poll_write_buf(cx) {
+            Poll::Ready(Ok(())) => {}
+            Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
+            Poll::Pending => return Poll::Pending,
+        }
+
+        // CopyData
+        // XXX: if the input is large, we should split it into multiple messages.
+        // Not sure what the threshold should be, but the ultimate hard limit is that
+        // the length cannot exceed u32.
+        this.pgb.write_message(&BeMessage::CopyData(buf))?;
+
+        Poll::Ready(Ok(buf.len()))
+    }
+
+    fn poll_flush(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let this = self.get_mut();
+        match this.pgb.poll_write_buf(cx) {
+            Poll::Ready(Ok(())) => {}
+            Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
+            Poll::Pending => return Poll::Pending,
+        }
+        this.pgb.poll_flush(cx)
+    }
+    fn poll_shutdown(
+        self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> Poll<Result<(), std::io::Error>> {
+        let this = self.get_mut();
+        match this.pgb.poll_write_buf(cx) {
+            Poll::Ready(Ok(())) => {}
+            Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
+            Poll::Pending => return Poll::Pending,
+        }
+        this.pgb.poll_flush(cx)
+    }
+}
+
+pub fn short_error(e: &QueryError) -> String {
+    match e {
+        QueryError::Disconnected(connection_error) => connection_error.to_string(),
+        QueryError::Other(e) => format!("{e:#}"),
+    }
+}
+
+pub(super) fn log_query_error(query: &str, e: &QueryError) {
+    match e {
+        QueryError::Disconnected(ConnectionError::Socket(io_error)) => {
+            if is_expected_io_error(io_error) {
+                info!("query handler for '{query}' failed with expected io error: {io_error}");
+            } else {
+                error!("query handler for '{query}' failed with io error: {io_error}");
+            }
+        }
+        QueryError::Disconnected(other_connection_error) => {
+            error!("query handler for '{query}' failed with connection error: {other_connection_error:?}")
+        }
+        QueryError::Other(e) => {
+            error!("query handler for '{query}' failed: {e:?}");
+        }
+    }
+}
--- a/libs/utils/tests/ssl_test.rs
+++ b/libs/utils/tests/ssl_test.rs
@@ -9,7 +9,10 @@ use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
 use bytes::{Buf, BufMut, Bytes, BytesMut};
 use once_cell::sync::Lazy;

-use utils::postgres_backend::{AuthType, Handler, PostgresBackend};
+use utils::{
+    postgres_backend::{AuthType, Handler, PostgresBackend},
+    postgres_backend_async::QueryError,
+};

 fn make_tcp_pair() -> (TcpStream, TcpStream) {
    let listener = TcpListener::bind("127.0.0.1:0").unwrap();
@@ -105,7 +108,7 @@ fn ssl() {
            &mut self,
            _pgb: &mut PostgresBackend,
            query_string: &str,
-        ) -> anyhow::Result<()> {
+        ) -> Result<(), QueryError> {
            self.got_query = query_string == QUERY;
            Ok(())
        }
@@ -152,7 +155,7 @@ fn no_ssl() {
            &mut self,
            _pgb: &mut PostgresBackend,
            _query_string: &str,
-        ) -> anyhow::Result<()> {
+        ) -> Result<(), QueryError> {
            panic!()
        }
    }
@@ -212,7 +215,7 @@ fn server_forces_ssl() {
            &mut self,
            _pgb: &mut PostgresBackend,
            _query_string: &str,
-        ) -> anyhow::Result<()> {
+        ) -> Result<(), QueryError> {
            panic!()
        }
    }
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -2,6 +2,7 @@
 name = "pageserver"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [features]
 default = []
@@ -9,8 +10,6 @@ default = []
 # which adds some runtime cost to run tests on outage conditions
 testing = ["fail/failpoints"]

-profiling = ["pprof"]
-
 [dependencies]
 amplify_num = { git = "https://github.com/hlinnaka/rust-amplify.git", branch = "unsigned-int-perf" }
 anyhow = { version = "1.0", features = ["backtrace"] }
@@ -39,7 +38,6 @@ pin-project-lite = "0.2.7"
 postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
 postgres-protocol = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
 postgres-types = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
-pprof = { git = "https://github.com/neondatabase/pprof-rs.git", branch = "wallclock-profiling", features = ["flamegraph"], optional = true }
 rand = "0.8.3"
 regex = "1.4.5"
 rstar = "0.9.3"
@@ -49,7 +47,7 @@ serde_json = { version = "1.0", features = ["raw_value"] }
 serde_with = "2.0"
 signal-hook = "0.3.10"
 svg_fmt = "0.4.1"
-tar = "0.4.33"
+tokio-tar = { git = "https://github.com/neondatabase/tokio-tar.git", rev="404df61437de0feef49ba2ccdbdd94eb8ad6e142" }
 thiserror = "1.0"
 tokio = { version = "1.17", features = ["process", "sync", "macros", "fs", "rt", "io-util", "time"] }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
@@ -69,7 +67,7 @@ storage_broker = { version = "0.1", path = "../storage_broker" }
 tenant_size_model = { path = "../libs/tenant_size_model" }
 utils = { path = "../libs/utils" }
 workspace_hack = { version = "0.1", path = "../workspace_hack" }
-reqwest = "0.11.13"
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }

 [dev-dependencies]
 criterion = "0.4"
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -10,20 +10,25 @@
 //! This module is responsible for creation of such tarball
 //! from data stored in object storage.
 //!
-use anyhow::{anyhow, bail, ensure, Context, Result};
+use anyhow::{anyhow, ensure, Context, Result};
 use bytes::{BufMut, BytesMut};
 use fail::fail_point;
-use itertools::Itertools;
 use std::fmt::Write as FmtWrite;
-use std::io;
-use std::io::Write;
-use std::sync::Arc;
 use std::time::SystemTime;
-use tar::{Builder, EntryType, Header};
+use tokio::io;
+use tokio::io::AsyncWrite;
 use tracing::*;

-use crate::task_mgr;
-use crate::tenant::{with_ondemand_download, PageReconstructResult, Timeline};
+/// NB: This relies on a modified version of tokio_tar that does *not* write the
+/// end-of-archive marker (1024 zero bytes), when the Builder struct is dropped
+/// without explicitly calling 'finish' or 'into_inner'!
+///
+/// See https://github.com/neondatabase/tokio-tar/pull/1
+///
+use tokio_tar::{Builder, EntryType, Header};
+
+use crate::tenant::TimelineRequestContext;
+use crate::tenant::{PageReconstructError, Timeline};
 use pageserver_api::reltag::{RelTag, SlruKind};

 use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID};
@@ -34,116 +39,135 @@ use postgres_ffi::PG_TLI;
 use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
 use utils::lsn::Lsn;

+/// Create basebackup with non-rel data in it.
+/// Only include relational data if 'full_backup' is true.
+///
+/// Currently we use empty 'req_lsn' in two cases:
+///  * During the basebackup right after timeline creation
+///  * When working without safekeepers. In this situation it is important to match the lsn
+///    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
+///    to start the replication.
+pub async fn send_basebackup_tarball<'a, W>(
+    write: &'a mut W,
+    timeline: &'a Timeline,
+    req_lsn: Option<Lsn>,
+    prev_lsn: Option<Lsn>,
+    full_backup: bool,
+    ctx: &'a TimelineRequestContext,
+) -> Result<(), PageReconstructError>
+where
+    W: AsyncWrite + Send + Sync + Unpin,
+{
+    // Compute postgres doesn't have any previous WAL files, but the first
+    // record that it's going to write needs to include the LSN of the
+    // previous record (xl_prev). We include prev_record_lsn in the
+    // "zenith.signal" file, so that postgres can read it during startup.
+    //
+    // We don't keep full history of record boundaries in the page server,
+    // however, only the predecessor of the latest record on each
+    // timeline. So we can only provide prev_record_lsn when you take a
+    // base backup at the end of the timeline, i.e. at last_record_lsn.
+    // Even at the end of the timeline, we sometimes don't have a valid
+    // prev_lsn value; that happens if the timeline was just branched from
+    // an old LSN and it doesn't have any WAL of its own yet. We will set
+    // prev_lsn to Lsn(0) if we cannot provide the correct value.
+    let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
+        // Backup was requested at a particular LSN. The caller should've
+        // already checked that it's a valid LSN.
+
+        // If the requested point is the end of the timeline, we can
+        // provide prev_lsn. (get_last_record_rlsn() might return it as
+        // zero, though, if no WAL has been generated on this timeline
+        // yet.)
+        let end_of_timeline = timeline.get_last_record_rlsn();
+        if req_lsn == end_of_timeline.last {
+            (end_of_timeline.prev, req_lsn)
+        } else {
+            (Lsn(0), req_lsn)
+        }
+    } else {
+        // Backup was requested at end of the timeline.
+        let end_of_timeline = timeline.get_last_record_rlsn();
+        (end_of_timeline.prev, end_of_timeline.last)
+    };
+
+    // Consolidate the derived and the provided prev_lsn values
+    let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
+        if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn {
+            return Err(PageReconstructError::Other(anyhow!(
+                "prev LSN doesn't match"
+            )));
+        }
+        provided_prev_lsn
+    } else {
+        backup_prev
+    };
+
+    info!(
+        "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
+        backup_lsn, prev_lsn, full_backup
+    );
+
+    let basebackup = Basebackup {
+        ar: Builder::new_non_terminated(write),
+        timeline,
+        lsn: backup_lsn,
+        prev_record_lsn: prev_lsn,
+        full_backup,
+        ctx,
+    };
+    basebackup
+        .send_tarball()
+        .instrument(info_span!("send_tarball", backup_lsn=%backup_lsn))
+        .await
+}
+
 /// This is short-living object only for the time of tarball creation,
 /// created mostly to avoid passing a lot of parameters between various functions
 /// used for constructing tarball.
-pub struct Basebackup<'a, W>
+struct Basebackup<'a, W>
 where
-    W: Write,
+    W: AsyncWrite + Send + Sync + Unpin,
 {
-    ar: Builder<AbortableWrite<W>>,
-    timeline: &'a Arc<Timeline>,
-    pub lsn: Lsn,
+    ar: Builder<&'a mut W>,
+    timeline: &'a Timeline,
+    lsn: Lsn,
    prev_record_lsn: Lsn,
    full_backup: bool,
-    finished: bool,
+    ctx: &'a TimelineRequestContext,
 }

-// Create basebackup with non-rel data in it.
-// Only include relational data if 'full_backup' is true.
-//
-// Currently we use empty lsn in two cases:
-//  * During the basebackup right after timeline creation
-//  * When working without safekeepers. In this situation it is important to match the lsn
-//    we are taking basebackup on with the lsn that is used in pageserver's walreceiver
-//    to start the replication.
 impl<'a, W> Basebackup<'a, W>
 where
-    W: Write,
+    W: AsyncWrite + Send + Sync + Unpin,
 {
-    pub fn new(
-        write: W,
-        timeline: &'a Arc<Timeline>,
-        req_lsn: Option<Lsn>,
-        prev_lsn: Option<Lsn>,
-        full_backup: bool,
-    ) -> Result<Basebackup<'a, W>> {
-        // Compute postgres doesn't have any previous WAL files, but the first
-        // record that it's going to write needs to include the LSN of the
-        // previous record (xl_prev). We include prev_record_lsn in the
-        // "zenith.signal" file, so that postgres can read it during startup.
-        //
-        // We don't keep full history of record boundaries in the page server,
-        // however, only the predecessor of the latest record on each
-        // timeline. So we can only provide prev_record_lsn when you take a
-        // base backup at the end of the timeline, i.e. at last_record_lsn.
-        // Even at the end of the timeline, we sometimes don't have a valid
-        // prev_lsn value; that happens if the timeline was just branched from
-        // an old LSN and it doesn't have any WAL of its own yet. We will set
-        // prev_lsn to Lsn(0) if we cannot provide the correct value.
-        let (backup_prev, backup_lsn) = if let Some(req_lsn) = req_lsn {
-            // Backup was requested at a particular LSN. The caller should've
-            // already checked that it's a valid LSN.
-
-            // If the requested point is the end of the timeline, we can
-            // provide prev_lsn. (get_last_record_rlsn() might return it as
-            // zero, though, if no WAL has been generated on this timeline
-            // yet.)
-            let end_of_timeline = timeline.get_last_record_rlsn();
-            if req_lsn == end_of_timeline.last {
-                (end_of_timeline.prev, req_lsn)
-            } else {
-                (Lsn(0), req_lsn)
-            }
-        } else {
-            // Backup was requested at end of the timeline.
-            let end_of_timeline = timeline.get_last_record_rlsn();
-            (end_of_timeline.prev, end_of_timeline.last)
-        };
-
-        // Consolidate the derived and the provided prev_lsn values
-        let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
-            if backup_prev != Lsn(0) {
-                ensure!(backup_prev == provided_prev_lsn)
-            }
-            provided_prev_lsn
-        } else {
-            backup_prev
-        };
-
-        info!(
-            "taking basebackup lsn={}, prev_lsn={} (full_backup={})",
-            backup_lsn, prev_lsn, full_backup
-        );
-
-        Ok(Basebackup {
-            ar: Builder::new(AbortableWrite::new(write)),
-            timeline,
-            lsn: backup_lsn,
-            prev_record_lsn: prev_lsn,
-            full_backup,
-            finished: false,
-        })
-    }
-
-    pub fn send_tarball(mut self) -> anyhow::Result<()> {
+    async fn send_tarball(mut self) -> Result<(), PageReconstructError> {
        // TODO include checksum

        // Create pgdata subdirs structure
        for dir in PGDATA_SUBDIRS.iter() {
            let header = new_tar_header_dir(dir)?;
-            self.ar.append(&header, &mut io::empty())?;
+            self.ar
+                .append(&header, &mut io::empty())
+                .await
+                .context("could not add directory to basebackup tarball")?;
        }

-        // Send empty config files.
+        // Send config files.
        for filepath in PGDATA_SPECIAL_FILES.iter() {
            if *filepath == "pg_hba.conf" {
                let data = PG_HBA.as_bytes();
                let header = new_tar_header(filepath, data.len() as u64)?;
-                self.ar.append(&header, data)?;
+                self.ar
+                    .append(&header, data)
+                    .await
+                    .context("could not add config file to basebackup tarball")?;
            } else {
                let header = new_tar_header(filepath, 0)?;
-                self.ar.append(&header, &mut io::empty())?;
+                self.ar
+                    .append(&header, &mut io::empty())
+                    .await
+                    .context("could not add config file to basebackup tarball")?;
            }
        }

@@ -153,75 +177,93 @@ where
            SlruKind::MultiXactOffsets,
            SlruKind::MultiXactMembers,
        ] {
-            for segno in
-                with_ondemand_download_sync(|| self.timeline.list_slru_segments(kind, self.lsn))?
+            for segno in self
+                .timeline
+                .list_slru_segments(kind, self.lsn, self.ctx)
+                .await?
            {
-                self.add_slru_segment(kind, segno)?;
+                self.add_slru_segment(kind, segno).await?;
            }
        }

        // Create tablespace directories
        for ((spcnode, dbnode), has_relmap_file) in
-            with_ondemand_download_sync(|| self.timeline.list_dbdirs(self.lsn))?
+            self.timeline.list_dbdirs(self.lsn, self.ctx).await?
        {
-            self.add_dbdir(spcnode, dbnode, has_relmap_file)?;
+            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

            // Gather and send relational files in each database if full backup is requested.
            if self.full_backup {
-                for rel in with_ondemand_download_sync(|| {
-                    self.timeline.list_rels(spcnode, dbnode, self.lsn)
-                })? {
-                    self.add_rel(rel)?;
+                for rel in self
+                    .timeline
+                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                    .await?
+                {
+                    self.add_rel(rel).await?;
                }
            }
        }
-        for xid in with_ondemand_download_sync(|| self.timeline.list_twophase_files(self.lsn))? {
-            self.add_twophase_file(xid)?;
+        for xid in self
+            .timeline
+            .list_twophase_files(self.lsn, self.ctx)
+            .await?
+        {
+            self.add_twophase_file(xid).await?;
        }

        fail_point!("basebackup-before-control-file", |_| {
-            bail!("failpoint basebackup-before-control-file")
+            Err(PageReconstructError::from(anyhow!(
+                "failpoint basebackup-before-control-file"
+            )))
        });

        // Generate pg_control and bootstrap WAL segment.
-        self.add_pgcontrol_file()?;
-        self.ar.finish()?;
-        self.finished = true;
+        self.add_pgcontrol_file().await?;
+        self.ar.finish().await.context("could not finish tarball")?;
        debug!("all tarred up!");
        Ok(())
    }

-    fn add_rel(&mut self, tag: RelTag) -> anyhow::Result<()> {
-        let nblocks =
-            with_ondemand_download_sync(|| self.timeline.get_rel_size(tag, self.lsn, false))?;
-
-        // Function that adds relation segment data to archive
-        let mut add_file = |segment_index, data: &Vec<u8>| -> anyhow::Result<()> {
-            let file_name = tag.to_segfile_name(segment_index as u32);
-            let header = new_tar_header(&file_name, data.len() as u64)?;
-            self.ar.append(&header, data.as_slice())?;
-            Ok(())
-        };
+    async fn add_rel(&mut self, tag: RelTag) -> Result<(), PageReconstructError> {
+        let nblocks = self
+            .timeline
+            .get_rel_size(tag, self.lsn, false, self.ctx)
+            .await?;

        // If the relation is empty, create an empty file
        if nblocks == 0 {
-            add_file(0, &vec![])?;
+            let file_name = tag.to_segfile_name(0);
+            let header = new_tar_header(&file_name, 0)?;
+            self.ar
+                .append(&header, &mut io::empty())
+                .await
+                .context("could not write empty relfile to tar stream")?;
            return Ok(());
        }

        // Add a file for each chunk of blocks (aka segment)
-        let chunks = (0..nblocks).chunks(RELSEG_SIZE as usize);
-        for (seg, blocks) in chunks.into_iter().enumerate() {
+        let mut startblk = 0;
+        let mut seg = 0;
+        while startblk < nblocks {
+            let endblk = std::cmp::min(startblk + RELSEG_SIZE, nblocks);
            let mut segment_data: Vec<u8> = vec![];
-            for blknum in blocks {
+            for blknum in startblk..endblk {
                let img = self
                    .timeline
-                    .get_rel_page_at_lsn(tag, blknum, self.lsn, false)
-                    .no_ondemand_download()?;
+                    .get_rel_page_at_lsn(tag, blknum, self.lsn, false, self.ctx)
+                    .await?;
                segment_data.extend_from_slice(&img[..]);
            }

-            add_file(seg, &segment_data)?;
+            let file_name = tag.to_segfile_name(seg as u32);
+            let header = new_tar_header(&file_name, segment_data.len() as u64)?;
+            self.ar
+                .append(&header, segment_data.as_slice())
+                .await
+                .context("could not write relfile segment to tar stream")?;
+
+            seg += 1;
+            startblk = endblk;
        }

        Ok(())
@@ -230,17 +272,18 @@ where
    //
    // Generate SLRU segment files from repository.
    //
-    fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
-        let nblocks = with_ondemand_download_sync(|| {
-            self.timeline.get_slru_segment_size(slru, segno, self.lsn)
-        })?;
+    async fn add_slru_segment(&mut self, slru: SlruKind, segno: u32) -> anyhow::Result<()> {
+        let nblocks = self
+            .timeline
+            .get_slru_segment_size(slru, segno, self.lsn, self.ctx)
+            .await?;

        let mut slru_buf: Vec<u8> = Vec::with_capacity(nblocks as usize * BLCKSZ as usize);
        for blknum in 0..nblocks {
-            let img = with_ondemand_download_sync(|| {
-                self.timeline
-                    .get_slru_page_at_lsn(slru, segno, blknum, self.lsn)
-            })?;
+            let img = self
+                .timeline
+                .get_slru_page_at_lsn(slru, segno, blknum, self.lsn, self.ctx)
+                .await?;

            if slru == SlruKind::Clog {
                ensure!(img.len() == BLCKSZ as usize || img.len() == BLCKSZ as usize + 8);
@@ -253,7 +296,7 @@ where

        let segname = format!("{}/{:>04X}", slru.to_str(), segno);
        let header = new_tar_header(&segname, slru_buf.len() as u64)?;
-        self.ar.append(&header, slru_buf.as_slice())?;
+        self.ar.append(&header, slru_buf.as_slice()).await?;

        trace!("Added to basebackup slru {} relsize {}", segname, nblocks);
        Ok(())
@@ -265,16 +308,17 @@ where
    // Each directory contains a PG_VERSION file, and the default database
    // directories also contain pg_filenode.map files.
    //
-    fn add_dbdir(
+    async fn add_dbdir(
        &mut self,
        spcnode: u32,
        dbnode: u32,
        has_relmap_file: bool,
    ) -> anyhow::Result<()> {
        let relmap_img = if has_relmap_file {
-            let img = with_ondemand_download_sync(|| {
-                self.timeline.get_relmap_file(spcnode, dbnode, self.lsn)
-            })?;
+            let img = self
+                .timeline
+                .get_relmap_file(spcnode, dbnode, self.lsn, self.ctx)
+                .await?;
            ensure!(img.len() == 512);
            Some(img)
        } else {
@@ -284,14 +328,14 @@ where
        if spcnode == GLOBALTABLESPACE_OID {
            let pg_version_str = self.timeline.pg_version.to_string();
            let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
-            self.ar.append(&header, pg_version_str.as_bytes())?;
+            self.ar.append(&header, pg_version_str.as_bytes()).await?;

            info!("timeline.pg_version {}", self.timeline.pg_version);

            if let Some(img) = relmap_img {
                // filenode map for global tablespace
                let header = new_tar_header("global/pg_filenode.map", img.len() as u64)?;
-                self.ar.append(&header, &img[..])?;
+                self.ar.append(&header, &img[..]).await?;
            } else {
                warn!("global/pg_filenode.map is missing");
            }
@@ -309,8 +353,8 @@ where
            if !has_relmap_file
                && self
                    .timeline
-                    .list_rels(spcnode, dbnode, self.lsn)
-                    .no_ondemand_download()?
+                    .list_rels(spcnode, dbnode, self.lsn, self.ctx)
+                    .await?
                    .is_empty()
            {
                return Ok(());
@@ -321,18 +365,18 @@ where
            // Append dir path for each database
            let path = format!("base/{}", dbnode);
            let header = new_tar_header_dir(&path)?;
-            self.ar.append(&header, &mut io::empty())?;
+            self.ar.append(&header, &mut io::empty()).await?;

            if let Some(img) = relmap_img {
                let dst_path = format!("base/{}/PG_VERSION", dbnode);

                let pg_version_str = self.timeline.pg_version.to_string();
                let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
-                self.ar.append(&header, pg_version_str.as_bytes())?;
+                self.ar.append(&header, pg_version_str.as_bytes()).await?;

                let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
                let header = new_tar_header(&relmap_path, img.len() as u64)?;
-                self.ar.append(&header, &img[..])?;
+                self.ar.append(&header, &img[..]).await?;
            }
        };
        Ok(())
@@ -341,8 +385,11 @@ where
    //
    // Extract twophase state files
    //
-    fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
-        let img = with_ondemand_download_sync(|| self.timeline.get_twophase_file(xid, self.lsn))?;
+    async fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
+        let img = self
+            .timeline
+            .get_twophase_file(xid, self.lsn, self.ctx)
+            .await?;

        let mut buf = BytesMut::new();
        buf.extend_from_slice(&img[..]);
@@ -350,7 +397,7 @@ where
        buf.put_u32_le(crc);
        let path = format!("pg_twophase/{:>08X}", xid);
        let header = new_tar_header(&path, buf.len() as u64)?;
-        self.ar.append(&header, &buf[..])?;
+        self.ar.append(&header, &buf[..]).await?;

        Ok(())
    }
@@ -359,7 +406,7 @@ where
    // Add generated pg_control file and bootstrap WAL segment.
    // Also send zenith.signal file with extra bootstrap data.
    //
-    fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
+    async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
@@ -371,17 +418,23 @@ where
        } else {
            write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
        }
-        self.ar.append(
-            &new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
-            zenith_signal.as_bytes(),
-        )?;
+        self.ar
+            .append(
+                &new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
+                zenith_signal.as_bytes(),
+            )
+            .await?;

-        let checkpoint_bytes =
-            with_ondemand_download_sync(|| self.timeline.get_checkpoint(self.lsn))
-                .context("failed to get checkpoint bytes")?;
-        let pg_control_bytes =
-            with_ondemand_download_sync(|| self.timeline.get_control_file(self.lsn))
-                .context("failed get control bytes")?;
+        let checkpoint_bytes = self
+            .timeline
+            .get_checkpoint(self.lsn, self.ctx)
+            .await
+            .context("failed to get checkpoint bytes")?;
+        let pg_control_bytes = self
+            .timeline
+            .get_control_file(self.lsn, self.ctx)
+            .await
+            .context("failed get control bytes")?;

        let (pg_control_bytes, system_identifier) = postgres_ffi::generate_pg_control(
            &pg_control_bytes,
@@ -392,7 +445,7 @@ where

        //send pg_control
        let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
-        self.ar.append(&header, &pg_control_bytes[..])?;
+        self.ar.append(&header, &pg_control_bytes[..]).await?;

        //send wal segment
        let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);
@@ -404,24 +457,11 @@ where
            postgres_ffi::generate_wal_segment(segno, system_identifier, self.timeline.pg_version)
                .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
        ensure!(wal_seg.len() == WAL_SEGMENT_SIZE);
-        self.ar.append(&header, &wal_seg[..])?;
+        self.ar.append(&header, &wal_seg[..]).await?;
        Ok(())
    }
 }

-impl<'a, W> Drop for Basebackup<'a, W>
-where
-    W: Write,
-{
-    /// If the basebackup was not finished, prevent the Archive::drop() from
-    /// writing the end-of-archive marker.
-    fn drop(&mut self) {
-        if !self.finished {
-            self.ar.get_mut().abort();
-        }
-    }
-}
-
 //
 // Create new tarball entry header
 //
@@ -457,57 +497,3 @@ fn new_tar_header_dir(path: &str) -> anyhow::Result<Header> {
    header.set_cksum();
    Ok(header)
 }
-
-/// A wrapper that passes through all data to the underlying Write,
-/// until abort() is called.
-///
-/// tar::Builder has an annoying habit of finishing the archive with
-/// a valid tar end-of-archive marker (two 512-byte sectors of zeros),
-/// even if an error occurs and we don't finish building the archive.
-/// We'd rather abort writing the tarball immediately than construct
-/// a seemingly valid but incomplete archive. This wrapper allows us
-/// to swallow the end-of-archive marker that Builder::drop() emits,
-/// without writing it to the underlying sink.
-///
-struct AbortableWrite<W> {
-    w: W,
-    aborted: bool,
-}
-
-impl<W> AbortableWrite<W> {
-    pub fn new(w: W) -> Self {
-        AbortableWrite { w, aborted: false }
-    }
-
-    pub fn abort(&mut self) {
-        self.aborted = true;
-    }
-}
-
-impl<W> Write for AbortableWrite<W>
-where
-    W: Write,
-{
-    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
-        if self.aborted {
-            Ok(data.len())
-        } else {
-            self.w.write(data)
-        }
-    }
-    fn flush(&mut self) -> io::Result<()> {
-        if self.aborted {
-            Ok(())
-        } else {
-            self.w.flush()
-        }
-    }
-}
-
-fn with_ondemand_download_sync<F, T>(f: F) -> anyhow::Result<T>
-where
-    F: Send + Fn() -> PageReconstructResult<T>,
-    T: Send,
-{
-    task_mgr::COMPUTE_REQUEST_RUNTIME.block_on(with_ondemand_download(f))
-}
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -13,8 +13,8 @@ use tracing::*;
 use metrics::set_build_info_metric;
 use pageserver::{
    config::{defaults::*, PageServerConf},
-    http, page_cache, page_service, profiling, task_mgr,
-    task_mgr::TaskKind,
+    context::{DownloadBehavior, RequestContext, TaskKind},
+    http, page_cache, page_service, task_mgr,
    task_mgr::{
        BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME,
    },
@@ -40,8 +40,6 @@ const FEATURES: &[&str] = &[
    "testing",
    #[cfg(feature = "fail/failpoints")]
    "fail/failpoints",
-    #[cfg(feature = "profiling")]
-    "profiling",
 ];

 fn version() -> String {
@@ -247,15 +245,12 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    // Install signal handlers
    let signals = signals::install_shutdown_handlers()?;

-    // Start profiler (if enabled)
-    let profiler_guard = profiling::init_profiler(conf);
-
    // Launch broker client
    WALRECEIVER_RUNTIME.block_on(pageserver::walreceiver::init_broker_client(conf))?;

    // Initialize authentication for incoming connections
    let auth = match &conf.auth_type {
-        AuthType::Trust | AuthType::MD5 => None,
+        AuthType::Trust => None,
        AuthType::NeonJWT => {
            // unwrap is ok because check is performed when creating config, so path is set and file exists
            let key_path = conf.auth_validation_public_key_path.as_ref().unwrap();
@@ -264,19 +259,35 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    };
    info!("Using auth: {:#?}", conf.auth_type);

-    match var("ZENITH_AUTH_TOKEN") {
-        Ok(v) => {
+    // TODO: remove ZENITH_AUTH_TOKEN once it's not used anywhere in development/staging/prod configuration.
+    match (var("ZENITH_AUTH_TOKEN"), var("NEON_AUTH_TOKEN")) {
+        (old, Ok(v)) => {
            info!("Loaded JWT token for authentication with Safekeeper");
+            if let Ok(v_old) = old {
+                warn!(
+                    "JWT token for Safekeeper is specified twice, ZENITH_AUTH_TOKEN is deprecated"
+                );
+                if v_old != v {
+                    warn!("JWT token for Safekeeper has two different values, choosing NEON_AUTH_TOKEN");
+                }
+            }
            pageserver::config::SAFEKEEPER_AUTH_TOKEN
                .set(Arc::new(v))
                .map_err(|_| anyhow!("Could not initialize SAFEKEEPER_AUTH_TOKEN"))?;
        }
-        Err(VarError::NotPresent) => {
+        (Ok(v), _) => {
+            info!("Loaded JWT token for authentication with Safekeeper");
+            warn!("Please update pageserver configuration: the JWT token should be NEON_AUTH_TOKEN, not ZENITH_AUTH_TOKEN");
+            pageserver::config::SAFEKEEPER_AUTH_TOKEN
+                .set(Arc::new(v))
+                .map_err(|_| anyhow!("Could not initialize SAFEKEEPER_AUTH_TOKEN"))?;
+        }
+        (_, Err(VarError::NotPresent)) => {
            info!("No JWT token for authentication with Safekeeper detected");
        }
-        Err(e) => {
+        (_, Err(e)) => {
            return Err(e).with_context(|| {
-                "Failed to either load to detect non-present ZENITH_AUTH_TOKEN environment variable"
+                "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable"
            })
        }
    };
@@ -292,61 +303,79 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
    {
        let _rt_guard = MGMT_REQUEST_RUNTIME.enter();

+        let mgmt_ctx = RequestContext::new(TaskKind::HttpEndpointListener, DownloadBehavior::Error);
+        let cancellation_token = Box::leak(Box::new(mgmt_ctx.cancellation_token().clone()));
        let router = http::make_router(conf, auth.clone(), remote_storage)?
            .build()
            .map_err(|err| anyhow!(err))?;
        let service = utils::http::RouterService::new(router).unwrap();
        let server = hyper::Server::from_tcp(http_listener)?
            .serve(service)
-            .with_graceful_shutdown(task_mgr::shutdown_watcher());
+            .with_graceful_shutdown(cancellation_token.cancelled());

        task_mgr::spawn(
            MGMT_REQUEST_RUNTIME.handle(),
-            TaskKind::HttpEndpointListener,
-            None,
-            None,
            "http endpoint listener",
            true,
            async {
-                server.await?;
-                Ok(())
+                match server.await {
+                    Ok(()) => info!("HTTP endpoint listener shut down"),
+                    Err(err) => error!("HTTP endpoint listener shut down with error: {err:?}"),
+                }
            },
        );
+    }

-        if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
-            task_mgr::spawn(
-                MGMT_REQUEST_RUNTIME.handle(),
-                TaskKind::MetricsCollection,
-                None,
-                None,
-                "consumption metrics collection",
-                true,
-                async move {
-                    pageserver::billing_metrics::collect_metrics(
-                        metric_collection_endpoint,
-                        conf.metric_collection_interval,
-                    )
-                    .instrument(info_span!("metrics_collection"))
-                    .await?;
-                    Ok(())
-                },
-            );
-        }
+    if let Some(metric_collection_endpoint) = &conf.metric_collection_endpoint {
+        let metrics_ctx = RequestContext::new(
+            TaskKind::MetricsCollection,
+            DownloadBehavior::Error, // metrics collector shouldn't be downloading anything
+        );
+        task_mgr::spawn(
+            MGMT_REQUEST_RUNTIME.handle(),
+            "consumption metrics collection",
+            true,
+            pageserver::consumption_metrics::collect_metrics(
+                metric_collection_endpoint,
+                conf.metric_collection_interval,
+                conf.id,
+                metrics_ctx,
+            )
+            .instrument(info_span!("metrics_collection")),
+        );
    }

    // Spawn a task to listen for libpq connections. It will spawn further tasks
    // for each connection. We created the listener earlier already.
-    task_mgr::spawn(
-        COMPUTE_REQUEST_RUNTIME.handle(),
-        TaskKind::LibpqEndpointListener,
-        None,
-        None,
-        "libpq endpoint listener",
-        true,
-        async move {
-            page_service::libpq_listener_main(conf, auth, pageserver_listener, conf.auth_type).await
-        },
-    );
+    {
+        let libpq_ctx = RequestContext::new(
+            TaskKind::LibpqEndpointListener,
+            // listener task shouldn't need to download anything. (We will
+            // create a separate sub-contexts for each connection, with their
+            // own download behavior. This context is used only to listen and
+            // accept connections.)
+            DownloadBehavior::Error,
+        );
+        task_mgr::spawn(
+            COMPUTE_REQUEST_RUNTIME.handle(),
+            "libpq endpoint listener",
+            true,
+            async move {
+                match page_service::libpq_listener_main(
+                    conf,
+                    auth,
+                    pageserver_listener,
+                    conf.auth_type,
+                    libpq_ctx,
+                )
+                .await
+                {
+                    Ok(()) => info!("libpq endpoint listener shut down"),
+                    Err(err) => error!("libpq endpoint listener shut down with error: {err:?}"),
+                }
+            },
+        );
+    }

    // All started up! Now just sit and wait for shutdown signal.
    signals.handle(|signal| match signal {
@@ -355,7 +384,6 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
                "Got {}. Terminating in immediate shutdown mode",
                signal.name()
            );
-            profiling::exit_profiler(conf, &profiler_guard);
            std::process::exit(111);
        }

@@ -364,8 +392,7 @@ fn start_pageserver(conf: &'static PageServerConf) -> anyhow::Result<()> {
                "Got {}. Terminating gracefully in fast shutdown mode",
                signal.name()
            );
-            profiling::exit_profiler(conf, &profiler_guard);
-            BACKGROUND_RUNTIME.block_on(pageserver::shutdown_pageserver(0));
+            BACKGROUND_RUNTIME.block_on(task_mgr::shutdown_pageserver(0));
            unreachable!()
        }
    })
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -138,7 +138,6 @@ pub struct PageServerConf {
    pub auth_validation_public_key_path: Option<PathBuf>,
    pub remote_storage_config: Option<RemoteStorageConfig>,

-    pub profiling: ProfilingConfig,
    pub default_tenant_conf: TenantConf,

    /// Storage broker endpoints to connect to.
@@ -165,25 +164,6 @@ pub struct PageServerConf {
 /// startup code to the connection code through a dozen layers.
 pub static SAFEKEEPER_AUTH_TOKEN: OnceCell<Arc<String>> = OnceCell::new();

-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum ProfilingConfig {
-    Disabled,
-    PageRequests,
-}
-
-impl FromStr for ProfilingConfig {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<ProfilingConfig, Self::Err> {
-        let result = match s {
-            "disabled"  => ProfilingConfig::Disabled,
-            "page_requests"  => ProfilingConfig::PageRequests,
-            _ => bail!("invalid value \"{s}\" for profiling option, valid values are \"disabled\" and \"page_requests\""),
-        };
-        Ok(result)
-    }
-}
-
 // use dedicated enum for builder to better indicate the intention
 // and avoid possible confusion with nested options
 pub enum BuilderValue<T> {
@@ -226,7 +206,6 @@ struct PageServerConfigBuilder {

    id: BuilderValue<NodeId>,

-    profiling: BuilderValue<ProfilingConfig>,
    broker_endpoint: BuilderValue<Uri>,
    broker_keepalive_interval: BuilderValue<Duration>,

@@ -262,7 +241,6 @@ impl Default for PageServerConfigBuilder {
            auth_validation_public_key_path: Set(None),
            remote_storage_config: Set(None),
            id: NotSet,
-            profiling: Set(ProfilingConfig::Disabled),
            broker_endpoint: Set(storage_broker::DEFAULT_ENDPOINT
                .parse()
                .expect("failed to parse default broker endpoint")),
@@ -348,10 +326,6 @@ impl PageServerConfigBuilder {
        self.id = BuilderValue::Set(node_id)
    }

-    pub fn profiling(&mut self, profiling: ProfilingConfig) {
-        self.profiling = BuilderValue::Set(profiling)
-    }
-
    pub fn log_format(&mut self, log_format: LogFormat) {
        self.log_format = BuilderValue::Set(log_format)
    }
@@ -405,7 +379,6 @@ impl PageServerConfigBuilder {
                .remote_storage_config
                .ok_or(anyhow!("missing remote_storage_config"))?,
            id: self.id.ok_or(anyhow!("missing id"))?,
-            profiling: self.profiling.ok_or(anyhow!("missing profiling"))?,
            // TenantConf is handled separately
            default_tenant_conf: TenantConf::default(),
            broker_endpoint: self
@@ -588,7 +561,6 @@ impl PageServerConf {
                    t_conf = Self::parse_toml_tenant_conf(item)?;
                }
                "id" => builder.id(NodeId(parse_toml_u64(key, item)?)),
-                "profiling" => builder.profiling(parse_toml_from_str(key, item)?),
                "broker_endpoint" => builder.broker_endpoint(parse_toml_string(key, item)?.parse().context("failed to parse broker endpoint")?),
                "broker_keepalive_interval" => builder.broker_keepalive_interval(parse_toml_duration(key, item)?),
                "log_format" => builder.log_format(
@@ -722,7 +694,6 @@ impl PageServerConf {
            auth_type: AuthType::Trust,
            auth_validation_public_key_path: None,
            remote_storage_config: None,
-            profiling: ProfilingConfig::Disabled,
            default_tenant_conf: TenantConf::default(),
            broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
            broker_keepalive_interval: Duration::from_secs(5000),
@@ -898,7 +869,6 @@ log_format = 'json'
                auth_type: AuthType::Trust,
                auth_validation_public_key_path: None,
                remote_storage_config: None,
-                profiling: ProfilingConfig::Disabled,
                default_tenant_conf: TenantConf::default(),
                broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
                broker_keepalive_interval: humantime::parse_duration(
@@ -949,7 +919,6 @@ log_format = 'json'
                auth_type: AuthType::Trust,
                auth_validation_public_key_path: None,
                remote_storage_config: None,
-                profiling: ProfilingConfig::Disabled,
                default_tenant_conf: TenantConf::default(),
                broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
                broker_keepalive_interval: Duration::from_secs(5),
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -6,23 +6,25 @@

 use anyhow;
 use tracing::*;
+use utils::id::NodeId;
 use utils::id::TimelineId;

-use crate::task_mgr;
+use crate::context::RequestContext;
 use crate::tenant::mgr;
-use pageserver_api::models::TenantState;
 use utils::id::TenantId;

 use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr};
 use std::collections::HashMap;
 use std::fmt;
 use std::str::FromStr;
 use std::time::Duration;

 use chrono::{DateTime, Utc};
+use rand::Rng;
 use reqwest::Url;

-/// BillingMetric struct that defines the format for one metric entry
+/// ConsumptionMetric struct that defines the format for one metric entry
 /// i.e.
 ///
 /// ```json
@@ -30,27 +32,36 @@ use reqwest::Url;
 /// "metric": "remote_storage_size",
 /// "type": "absolute",
 /// "tenant_id": "5d07d9ce9237c4cd845ea7918c0afa7d",
-/// "timeline_id": "00000000000000000000000000000000",
-/// "time": ...,
+/// "timeline_id": "a03ebb4f5922a1c56ff7485cc8854143",
+/// "time": "2022-12-28T11:07:19.317310284Z",
+/// "idempotency_key": "2022-12-28 11:07:19.317310324 UTC-1-4019",
 /// "value": 12345454,
 /// }
 /// ```
+#[serde_as]
 #[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
-pub struct BillingMetric {
-    pub metric: BillingMetricKind,
+pub struct ConsumptionMetric {
+    pub metric: ConsumptionMetricKind,
+    #[serde(rename = "type")]
    pub metric_type: &'static str,
+    #[serde_as(as = "DisplayFromStr")]
    pub tenant_id: TenantId,
+    #[serde_as(as = "Option<DisplayFromStr>")]
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub timeline_id: Option<TimelineId>,
    pub time: DateTime<Utc>,
+    pub idempotency_key: String,
    pub value: u64,
 }

-impl BillingMetric {
-    pub fn new_absolute(
-        metric: BillingMetricKind,
+impl ConsumptionMetric {
+    pub fn new_absolute<R: Rng + ?Sized>(
+        metric: ConsumptionMetricKind,
        tenant_id: TenantId,
        timeline_id: Option<TimelineId>,
        value: u64,
+        node_id: NodeId,
+        rng: &mut R,
    ) -> Self {
        Self {
            metric,
@@ -58,6 +69,8 @@ impl BillingMetric {
            tenant_id,
            timeline_id,
            time: Utc::now(),
+            // key that allows metric collector to distinguish unique events
+            idempotency_key: format!("{}-{}-{:04}", Utc::now(), node_id, rng.gen_range(0..=9999)),
            value,
        }
    }
@@ -65,7 +78,7 @@ impl BillingMetric {

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
-pub enum BillingMetricKind {
+pub enum ConsumptionMetricKind {
    /// Amount of WAL produced , by a timeline, i.e. last_record_lsn
    /// This is an absolute, per-timeline metric.
    WrittenSize,
@@ -80,9 +93,12 @@ pub enum BillingMetricKind {
    /// Size of the remote storage (S3) directory.
    /// This is an absolute, per-tenant metric.
    RemoteStorageSize,
+    /// Logical size of the data in the timeline
+    /// This is an absolute, per-timeline metric
+    TimelineLogicalSize,
 }

-impl FromStr for BillingMetricKind {
+impl FromStr for ConsumptionMetricKind {
    type Err = anyhow::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
@@ -91,55 +107,62 @@ impl FromStr for BillingMetricKind {
            "synthetic_storage_size" => Ok(Self::SyntheticStorageSize),
            "resident_size" => Ok(Self::ResidentSize),
            "remote_storage_size" => Ok(Self::RemoteStorageSize),
+            "timeline_logical_size" => Ok(Self::TimelineLogicalSize),
            _ => anyhow::bail!("invalid value \"{s}\" for metric type"),
        }
    }
 }

-impl fmt::Display for BillingMetricKind {
+impl fmt::Display for ConsumptionMetricKind {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str(match self {
-            BillingMetricKind::WrittenSize => "written_size",
-            BillingMetricKind::SyntheticStorageSize => "synthetic_storage_size",
-            BillingMetricKind::ResidentSize => "resident_size",
-            BillingMetricKind::RemoteStorageSize => "remote_storage_size",
+            ConsumptionMetricKind::WrittenSize => "written_size",
+            ConsumptionMetricKind::SyntheticStorageSize => "synthetic_storage_size",
+            ConsumptionMetricKind::ResidentSize => "resident_size",
+            ConsumptionMetricKind::RemoteStorageSize => "remote_storage_size",
+            ConsumptionMetricKind::TimelineLogicalSize => "timeline_logical_size",
        })
    }
 }

 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct BillingMetricsKey {
+pub struct ConsumptionMetricsKey {
    tenant_id: TenantId,
    timeline_id: Option<TimelineId>,
-    metric: BillingMetricKind,
+    metric: ConsumptionMetricKind,
 }

 #[derive(serde::Serialize)]
 struct EventChunk<'a> {
-    events: &'a [BillingMetric],
+    events: &'a [ConsumptionMetric],
 }

-/// Main thread that serves metrics collection
+/// Main task that serves metrics collection
 pub async fn collect_metrics(
    metric_collection_endpoint: &Url,
    metric_collection_interval: Duration,
-) -> anyhow::Result<()> {
+    node_id: NodeId,
+    metrics_ctx: RequestContext,
+) {
    let mut ticker = tokio::time::interval(metric_collection_interval);

    info!("starting collect_metrics");

    // define client here to reuse it for all requests
    let client = reqwest::Client::new();
-    let mut cached_metrics: HashMap<BillingMetricsKey, u64> = HashMap::new();
+    let mut cached_metrics: HashMap<ConsumptionMetricsKey, u64> = HashMap::new();

    loop {
        tokio::select! {
-            _ = task_mgr::shutdown_watcher() => {
+            _ = metrics_ctx.cancelled() => {
                info!("collect_metrics received cancellation request");
-                return Ok(());
+                return;
            },
            _ = ticker.tick() => {
-                collect_metrics_task(&client, &mut cached_metrics, metric_collection_endpoint).await?;
+                if let Err(err) = collect_metrics_task(&client, &mut cached_metrics, metric_collection_endpoint, node_id, &metrics_ctx).await {
+                    // Log the error and continue
+                    error!("metrics collection failed: {err:?}");
+                }
            }
        }
    }
@@ -151,10 +174,12 @@ pub async fn collect_metrics(
 /// Cache metrics to avoid sending the same metrics multiple times.
 pub async fn collect_metrics_task(
    client: &reqwest::Client,
-    cached_metrics: &mut HashMap<BillingMetricsKey, u64>,
+    cached_metrics: &mut HashMap<ConsumptionMetricsKey, u64>,
    metric_collection_endpoint: &reqwest::Url,
+    node_id: NodeId,
+    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
-    let mut current_metrics: Vec<(BillingMetricsKey, u64)> = Vec::new();
+    let mut current_metrics: Vec<(ConsumptionMetricsKey, u64)> = Vec::new();
    trace!(
        "starting collect_metrics_task. metric_collection_endpoint: {}",
        metric_collection_endpoint
@@ -165,33 +190,55 @@ pub async fn collect_metrics_task(

    // iterate through list of Active tenants and collect metrics
    for (tenant_id, tenant_state) in tenants {
-        if tenant_state != TenantState::Active {
+        if ctx.is_cancelled() {
            continue;
        }
-
-        let tenant = mgr::get_tenant(tenant_id, true).await?;
+        let tenant = mgr::get_tenant(tenant_id).await?;
+        // If the tenant was shut down while while we were looking elsewhere, skip it.
+        let tenant_ctx = match tenant.get_context(ctx) {
+            Ok(ctx) => ctx,
+            Err(_state) => {
+                debug!(
+                    "skipping metrics collection for tenant {tenant_id} because it is not active"
+                );
+                continue;
+            }
+        };

        let mut tenant_resident_size = 0;

        // iterate through list of timelines in tenant
        for timeline in tenant.list_timelines().iter() {
-            let timeline_written_size = u64::from(timeline.get_last_record_lsn());
+            // collect per-timeline metrics only for active timelines
+            if let Ok(timeline_ctx) = timeline.get_context(&tenant_ctx) {
+                let timeline_written_size = u64::from(timeline.get_last_record_lsn());

-            current_metrics.push((
-                BillingMetricsKey {
-                    tenant_id,
-                    timeline_id: Some(timeline.timeline_id),
-                    metric: BillingMetricKind::WrittenSize,
-                },
-                timeline_written_size,
-            ));
+                current_metrics.push((
+                    ConsumptionMetricsKey {
+                        tenant_id,
+                        timeline_id: Some(timeline.timeline_id),
+                        metric: ConsumptionMetricKind::WrittenSize,
+                    },
+                    timeline_written_size,
+                ));
+
+                let (timeline_logical_size, is_exact) =
+                    timeline.get_current_logical_size(&timeline_ctx)?;
+                // Only send timeline logical size when it is fully calculated.
+                if is_exact {
+                    current_metrics.push((
+                        ConsumptionMetricsKey {
+                            tenant_id,
+                            timeline_id: Some(timeline.timeline_id),
+                            metric: ConsumptionMetricKind::TimelineLogicalSize,
+                        },
+                        timeline_logical_size,
+                    ));
+                }
+            }

            let timeline_resident_size = timeline.get_resident_physical_size();
            tenant_resident_size += timeline_resident_size;
-
-            debug!(
-                "per-timeline current metrics for tenant: {}: timeline {} resident_size={} last_record_lsn {} (as bytes)",
-                tenant_id, timeline.timeline_id, timeline_resident_size, timeline_written_size)
        }

        let tenant_remote_size = tenant.get_remote_size().await?;
@@ -201,19 +248,19 @@ pub async fn collect_metrics_task(
        );

        current_metrics.push((
-            BillingMetricsKey {
+            ConsumptionMetricsKey {
                tenant_id,
                timeline_id: None,
-                metric: BillingMetricKind::ResidentSize,
+                metric: ConsumptionMetricKind::ResidentSize,
            },
            tenant_resident_size,
        ));

        current_metrics.push((
-            BillingMetricsKey {
+            ConsumptionMetricsKey {
                tenant_id,
                timeline_id: None,
-                metric: BillingMetricKind::RemoteStorageSize,
+                metric: ConsumptionMetricKind::RemoteStorageSize,
            },
            tenant_remote_size,
        ));
@@ -237,24 +284,32 @@ pub async fn collect_metrics_task(
    const CHUNK_SIZE: usize = 1000;
    let chunks = current_metrics.chunks(CHUNK_SIZE);

-    let mut chunk_to_send: Vec<BillingMetric> = Vec::with_capacity(1000);
+    let mut chunk_to_send: Vec<ConsumptionMetric> = Vec::with_capacity(1000);

    for chunk in chunks {
        chunk_to_send.clear();
-        // enrich metrics with timestamp and metric_kind before sending
-        chunk_to_send.extend(chunk.iter().map(|(curr_key, curr_val)| {
-            BillingMetric::new_absolute(
-                curr_key.metric,
-                curr_key.tenant_id,
-                curr_key.timeline_id,
-                *curr_val,
-            )
-        }));
+
+        // this code block is needed to convince compiler
+        // that rng is not reused aroung await point
+        {
+            // enrich metrics with timestamp and metric_kind before sending
+            let mut rng = rand::thread_rng();
+            chunk_to_send.extend(chunk.iter().map(|(curr_key, curr_val)| {
+                ConsumptionMetric::new_absolute(
+                    curr_key.metric,
+                    curr_key.tenant_id,
+                    curr_key.timeline_id,
+                    *curr_val,
+                    node_id,
+                    &mut rng,
+                )
+            }));
+        }

        let chunk_json = serde_json::value::to_raw_value(&EventChunk {
            events: &chunk_to_send,
        })
-        .expect("BillingMetric should not fail serialization");
+        .expect("ConsumptionMetric should not fail serialization");

        let res = client
            .post(metric_collection_endpoint.clone())
--- a/pageserver/src/context.rs
+++ b/pageserver/src/context.rs
@@ -0,0 +1,348 @@
+//!
+//! Most async functions throughout the pageserver take a `ctx: &RequestContext`
+//! argument. It is used to control desired behaviour of the operation, and to
+//! allow cancelling the operation gracefully.
+//!
+//! # Context hierarchy
+//!
+//! RequestContext's form a hierarchy. For example:
+//!
+//!  listener context (LibpqEndpointListener)
+//!    connection context (PageRequestHandler)
+//!      per-request context (PageRequestHandler)
+//!
+//! The top "listener context" is created at pageserver startup. The tokio
+//! task that listens on the libpq protocol TCP port holds that context. When
+//! it accepts a connection, it spawns a new task to handle that connection
+//! and creates a new per-connection context for it. The mgmt API listener,
+//! background jobs, and other things form separate but similar hierarchies.
+//!
+//! Usually, each tokio task has its own context, but it's not a strict
+//! requirement and some tasks can hold multiple contexts, and converesely,
+//! some contexts are shared by multiple tasks that work together to perform
+//! some operation.
+//!
+//! The hierarchy is not explictly tracked in the RequestContext struct
+//! itself, but only by their cancellation tokens. It's entirely possible for
+//! the parent context to be dropped before its children.
+//!
+//! # Tenant and Timeline registration
+//!
+//! Most operations are performed on a particular Tenant or Timeline. When
+//! operating on a Tenant or Timeline, it's important that the Tenant/Timeline
+//! isn't detached or deleted while there are tasks working on it. To ensure
+//! that, a RequestContext can be registered with a Tenant or Timeline. See
+//! `Tenant::register_context` and `Timeline::register_context` When
+//! shutting down a Tenant or Timeline, the shutdown routine cancels all the
+//! registered contexts, and waits for them to be dropped before completing
+//! the shutdown.
+//!
+//! To enforce that you hold a registered context when operating on a Tenant
+//! or Timeline, most functions take a TimelineRequestContext or
+//! TenantRequestContext reference as argument.
+//!
+//! NOTE: The Tenant / Timeline registration is separate from the context
+//! hierarchy. You can create a new RequestContext with TimelineRequestContext
+//! as the parent, and register it with a different timeline, for example.
+//!
+//! # Notes
+//!
+//! All RequestContexts in the system have a unique ID, and are also tracked
+//! in a global hash table, CONTEXTS.
+//!
+//! - Futures are normally not assumed to be async cancellation-safe. Pass a
+//!   RequestContext as argument and use cancel() on it instead.
+//!
+//! - If you perform an operation that depends on some external actor or the
+//!   network, use the cancellation token to check for cancellation
+//!
+//! - By convention, the appropriate context for current operation is carried in
+//!   a variable called 'ctx'. If a function handles multiple contexts, it's
+//!   best to *not* have a variable called 'ctx', to force you to think which
+//!   one to use in each call.
+//!
+//! # TODO
+//! - include a unique request ID for tracing
+//!
+
+use once_cell::sync::Lazy;
+use tokio_util::sync::CancellationToken;
+use tracing::{info, warn};
+
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Mutex;
+
+/// Each RequestContext has a unique context ID. It's just an increasing
+/// number that we assign.
+static NEXT_CONTEXT_ID: AtomicU64 = AtomicU64::new(1);
+
+/// Global registry of contexts
+static CONTEXTS: Lazy<Mutex<HashMap<RequestContextId, (TaskKind, CancellationToken)>>> =
+    Lazy::new(|| Mutex::new(HashMap::new()));
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
+pub struct RequestContextId(u64);
+
+///
+pub struct RequestContext {
+    context_id: RequestContextId,
+    task_kind: TaskKind,
+
+    download_behavior: DownloadBehavior,
+    cancellation_token: CancellationToken,
+}
+
+/// DownloadBehavior option specifies the behavior if completing the operation
+/// would require downloading a layer file from remote storage.
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum DownloadBehavior {
+    /// Download the layer file. It can take a while.
+    Download,
+
+    /// Download the layer file, but print a warning to the log. This should be used
+    /// in code where the layer file is expected to already exist locally.
+    Warn,
+
+    /// Return a PageReconstructError::NeedsDownload error
+    Error,
+}
+
+///
+/// There are many kinds of tasks in the system. Some are associated with a particular
+/// tenant or timeline, while others are global.
+///
+/// The task kind affects the shutdown sequence on pageserver shutdown and on detach
+/// of an individual tenant. For example, when shutting down the pageserver, we shut
+/// down the LibpqEndpointListeners first, so that we don't accept any more client
+/// connections while we perform the rest of the shutdown duties. See
+/// [`Timeline::graceful_shutdown and`] and [`tenant_mgr::shutdown_pageserver`]
+/// for details.
+///
+/// Note that we don't try to limit how many task of a certain kind can be running
+/// at the same time.
+///
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub enum TaskKind {
+    // libpq listener task. It just accepts connection and spawns a
+    // PageRequestHandler task for each connection.
+    LibpqEndpointListener,
+
+    // HTTP endpoint listener.
+    HttpEndpointListener,
+
+    // Task that handles a single connection. A PageRequestHandler task
+    // starts detached from any particular tenant or timeline, but it can be
+    // associated with one later, after receiving a command from the client.
+    PageRequestHandler,
+
+    // Context for one management API request
+    MgmtRequest,
+
+    // Manages the WAL receiver connection for one timeline. It subscribes to
+    // events from storage_broker, decides which safekeeper to connect to. It spawns a
+    // separate WalReceiverConnection task to handle each connection.
+    WalReceiverManager,
+
+    // Handles a connection to a safekeeper, to stream WAL to a timeline.
+    WalReceiverConnection,
+
+    // Garbage collection worker. One per tenant
+    GarbageCollector,
+
+    // Compaction. One per tenant.
+    Compaction,
+
+    // Initial logical size calculation
+    InitialLogicalSizeCalculation,
+
+    // Task that flushes frozen in-memory layers to disk
+    LayerFlush,
+
+    // Task that uploads a file to remote storage
+    RemoteUploadTask,
+
+    // Task that downloads a file from remote storage
+    RemoteDownloadTask,
+
+    // task that handles the initial downloading of all tenants
+    InitialLoad,
+
+    // task that handles attaching a tenant
+    Attach,
+
+    // task that handles metrics collection
+    MetricsCollection,
+
+    // task that drives downloading layers
+    DownloadAllRemoteLayers,
+
+    // Only used in unit tests
+    UnitTest,
+}
+
+impl Drop for RequestContext {
+    fn drop(&mut self) {
+        CONTEXTS
+            .lock()
+            .unwrap()
+            .remove(&self.context_id)
+            .expect("context is not in global registry");
+    }
+}
+
+impl RequestContext {
+    /// Create a new RequestContext
+    pub fn new(task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self {
+        let cancellation_token = CancellationToken::new();
+        let context_id = RequestContextId(NEXT_CONTEXT_ID.fetch_add(1, Ordering::Relaxed));
+        CONTEXTS
+            .lock()
+            .unwrap()
+            .insert(context_id, (task_kind, cancellation_token.clone()));
+
+        RequestContext {
+            task_kind,
+            context_id,
+            download_behavior,
+            cancellation_token,
+        }
+    }
+
+    /// Create a new RequestContext, as a child of 'parent'.
+    pub fn with_parent(
+        task_kind: TaskKind,
+        download_behavior: DownloadBehavior,
+        parent: &RequestContext,
+    ) -> Self {
+        let cancellation_token = parent.cancellation_token.child_token();
+        let context_id = RequestContextId(NEXT_CONTEXT_ID.fetch_add(1, Ordering::Relaxed));
+        CONTEXTS
+            .lock()
+            .unwrap()
+            .insert(context_id, (task_kind, cancellation_token.clone()));
+
+        RequestContext {
+            task_kind,
+            context_id,
+            download_behavior,
+            cancellation_token,
+        }
+    }
+
+    pub fn context_id(&self) -> RequestContextId {
+        self.context_id
+    }
+
+    pub fn task_kind(&self) -> TaskKind {
+        self.task_kind
+    }
+
+    pub fn download_behavior(&self) -> DownloadBehavior {
+        self.download_behavior
+    }
+
+    pub fn cancellation_token(&self) -> &CancellationToken {
+        &self.cancellation_token
+    }
+
+    pub fn is_cancelled(&self) -> bool {
+        self.cancellation_token.is_cancelled()
+    }
+
+    pub async fn cancelled(&self) {
+        self.cancellation_token.cancelled().await
+    }
+}
+
+///
+/// Cancel all the contexts in 'context_ids' and wait for them to finish.
+///
+/// Whenever we notice that one of the contexts has finished, it is removed
+/// from 'context_ids'. On return, it is empty.
+///
+pub async fn cancel_and_wait(context_ids: &mut Vec<RequestContextId>) {
+    {
+        let contexts = CONTEXTS.lock().unwrap();
+        context_ids.retain(|context_id| {
+            if let Some((task_kind, cancellation_token)) = contexts.get(context_id) {
+                info!("cancelling task {task_kind:?} with ID {context_id:?}");
+                cancellation_token.cancel();
+                true
+            } else {
+                // Already gone
+                false
+            }
+        });
+    }
+    wait_contexts_to_finish(context_ids).await
+}
+
+async fn wait_contexts_to_finish(context_ids: &mut Vec<RequestContextId>) {
+    let mut n = 0;
+    while !context_ids.is_empty() {
+        {
+            let contexts = CONTEXTS.lock().unwrap();
+            while let Some(context_id) = context_ids.last() {
+                if let Some((task_kind, _cancellation_token)) = contexts.get(context_id) {
+                    info!("waiting for task {task_kind:?} with ID {context_id:?} to finish");
+                    break;
+                } else {
+                    context_ids.pop();
+                }
+            }
+        }
+        if !context_ids.is_empty() {
+            crate::exponential_backoff(
+                n,
+                crate::DEFAULT_BASE_BACKOFF_SECONDS,
+                crate::DEFAULT_MAX_BACKOFF_SECONDS,
+            )
+            .await;
+            n += 1;
+        }
+    }
+}
+
+/// Cancel and wait for all tasks of given 'kind' to finish
+pub async fn shutdown_tasks(kind: TaskKind) {
+    let mut context_ids = Vec::new();
+    {
+        let contexts = CONTEXTS.lock().unwrap();
+        for (&context_id, (task_kind, cancellation_token)) in contexts.iter() {
+            if *task_kind == kind {
+                cancellation_token.cancel();
+                context_ids.push(context_id);
+            }
+        }
+    }
+    wait_contexts_to_finish(&mut context_ids).await
+}
+
+/// Cancel all remaining contexts.
+///
+/// This is used as part of pageserver shutdown. We have already shut down all
+/// tasks / contexts, this is just a backstop or sanity check to make sure we
+/// didn't miss anything. Hence, also print a warning for any remaining tasks.
+pub async fn shutdown_all_tasks() {
+    loop {
+        let mut context_ids = Vec::new();
+        {
+            let contexts = CONTEXTS.lock().unwrap();
+
+            if contexts.is_empty() {
+                return;
+            }
+
+            for (&context_id, (task_kind, cancellation_token)) in contexts.iter() {
+                cancellation_token.cancel();
+                context_ids.push(context_id);
+                warn!(
+                    "unexpected task of kind {:?} with ID {:?} still running",
+                    *task_kind, context_id
+                );
+            }
+        }
+        wait_contexts_to_finish(&mut context_ids).await
+    }
+}
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -4,16 +4,16 @@ use anyhow::{anyhow, Context, Result};
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use remote_storage::GenericRemoteStorage;
-use tokio_util::sync::CancellationToken;
 use tracing::*;

 use super::models::{
    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
    TimelineCreateRequest, TimelineInfo,
 };
+use crate::context::{DownloadBehavior, RequestContext, TaskKind};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::tenant::config::TenantConfOpt;
-use crate::tenant::{with_ondemand_download, Timeline};
+use crate::tenant::{PageReconstructError, Timeline, TimelineRequestContext};
 use crate::{config::PageServerConf, tenant::mgr};
 use utils::{
    auth::JwtAuth,
@@ -77,29 +77,50 @@ fn check_permission(request: &Request<Body>, tenant_id: Option<TenantId>) -> Res
    })
 }

+fn apierror_from_prerror(err: PageReconstructError) -> ApiError {
+    match err {
+        PageReconstructError::Other(err) => ApiError::InternalServerError(err),
+        PageReconstructError::NeedsDownload(_, _) => {
+            // This shouldn't happen, because we use a RequestContext that requests to
+            // download any missing layer files on-demand.
+            ApiError::InternalServerError(anyhow::anyhow!(
+                "would need to download remote layer file"
+            ))
+        }
+        PageReconstructError::Cancelled => {
+            ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
+        }
+        PageReconstructError::WalRedo(err) => {
+            ApiError::InternalServerError(anyhow::Error::new(err))
+        }
+    }
+}
+
 // Helper function to construct a TimelineInfo struct for a timeline
 async fn build_timeline_info(
    timeline: &Arc<Timeline>,
    include_non_incremental_logical_size: bool,
+    ctx: Option<&TimelineRequestContext>,
 ) -> anyhow::Result<TimelineInfo> {
-    let mut info = build_timeline_info_common(timeline)?;
+    let mut info = build_timeline_info_common(timeline, ctx)?;
    if include_non_incremental_logical_size {
-        // XXX we should be using spawn_ondemand_logical_size_calculation here.
-        // Otherwise, if someone deletes the timeline / detaches the tenant while
-        // we're executing this function, we will outlive the timeline on-disk state.
-        info.current_logical_size_non_incremental = Some(
-            timeline
-                .get_current_logical_size_non_incremental(
-                    info.last_record_lsn,
-                    CancellationToken::new(),
-                )
-                .await?,
-        );
+        if let Some(ctx) = ctx {
+            info.current_logical_size_non_incremental = Some(
+                timeline
+                    .get_current_logical_size_non_incremental(info.last_record_lsn, ctx)
+                    .await?,
+            );
+        } else {
+            info!("could not calculate non-incremental size for timeline because it is not active");
+        }
    }
    Ok(info)
 }

-fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<TimelineInfo> {
+fn build_timeline_info_common(
+    timeline: &Arc<Timeline>,
+    ctx: Option<&TimelineRequestContext>,
+) -> anyhow::Result<TimelineInfo> {
    let last_record_lsn = timeline.get_last_record_lsn();
    let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
        let guard = timeline.last_received_wal.lock().unwrap();
@@ -119,12 +140,16 @@ fn build_timeline_info_common(timeline: &Arc<Timeline>) -> anyhow::Result<Timeli
        Lsn(0) => None,
        lsn @ Lsn(_) => Some(lsn),
    };
-    let current_logical_size = match timeline.get_current_logical_size() {
-        Ok(size) => Some(size),
-        Err(err) => {
-            error!("Timeline info creation failed to get current logical size: {err:?}");
-            None
+    let current_logical_size = if let Some(ctx) = ctx {
+        match timeline.get_current_logical_size(ctx) {
+            Ok((size, _)) => Some(size),
+            Err(err) => {
+                error!("Timeline info creation failed to get current logical size: {err:?}");
+                None
+            }
        }
+    } else {
+        None
    };
    let current_physical_size = Some(timeline.layer_size_sum().approximate_is_ok());
    let state = timeline.current_state();
@@ -170,20 +195,23 @@ async fn timeline_create_handler(mut request: Request<Body>) -> Result<Response<
        .new_timeline_id
        .unwrap_or_else(TimelineId::generate);

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, tenant_ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::NotFound)?;
    match tenant.create_timeline(
        new_timeline_id,
        request_data.ancestor_timeline_id.map(TimelineId::from),
        request_data.ancestor_start_lsn,
-        request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION)
+        request_data.pg_version.unwrap_or(crate::DEFAULT_PG_VERSION),
+        &tenant_ctx,
    )
    .instrument(info_span!("timeline_create", tenant = %tenant_id, new_timeline = ?request_data.new_timeline_id, timeline_id = %new_timeline_id, lsn=?request_data.ancestor_start_lsn, pg_version=?request_data.pg_version))
    .await {
-        Ok(Some(new_timeline)) => {
+        Ok(Some((new_timeline, timeline_ctx))) => {
            // Created. Construct a TimelineInfo for it.
-            let timeline_info = build_timeline_info_common(&new_timeline)
+            let timeline_info = build_timeline_info_common(&new_timeline, Some(&timeline_ctx))
                .map_err(ApiError::InternalServerError)?;
            json_response(StatusCode::CREATED, timeline_info)
        }
@@ -198,21 +226,25 @@ async fn timeline_list_handler(request: Request<Body>) -> Result<Response<Body>,
        query_param_present(&request, "include-non-incremental-logical-size");
    check_permission(&request, Some(tenant_id))?;

+    let top_ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
    let response_data = async {
-        let tenant = mgr::get_tenant(tenant_id, true)
+        let (tenant, tenant_ctx) = mgr::get_active_tenant(tenant_id, &top_ctx)
            .await
            .map_err(ApiError::NotFound)?;
        let timelines = tenant.list_timelines();

        let mut response_data = Vec::with_capacity(timelines.len());
        for timeline in timelines {
-            let timeline_info =
-                build_timeline_info(&timeline, include_non_incremental_logical_size)
-                    .await
-                    .context(
-                        "Failed to convert tenant timeline {timeline_id} into the local one: {e:?}",
-                    )
-                    .map_err(ApiError::InternalServerError)?;
+            let timeline_ctx = timeline.get_context(&tenant_ctx).ok();
+            let timeline_info = build_timeline_info(
+                &timeline,
+                include_non_incremental_logical_size,
+                timeline_ctx.as_ref(),
+            )
+            .await
+            .context("Failed to convert tenant timeline {timeline_id} into the local one: {e:?}")
+            .map_err(ApiError::InternalServerError)?;

            response_data.push(timeline_info);
        }
@@ -261,19 +293,26 @@ async fn timeline_detail_handler(request: Request<Body>) -> Result<Response<Body
        query_param_present(&request, "include-non-incremental-logical-size");
    check_permission(&request, Some(tenant_id))?;

+    let top_ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
    let timeline_info = async {
-        let tenant = mgr::get_tenant(tenant_id, true)
+        let (tenant, tenant_ctx) = mgr::get_active_tenant(tenant_id, &top_ctx)
            .await
            .map_err(ApiError::NotFound)?;

        let timeline = tenant
-            .get_timeline(timeline_id, false)
+            .get_timeline(timeline_id)
            .map_err(ApiError::NotFound)?;
+        let timeline_ctx = timeline.get_context(&tenant_ctx).ok();

-        let timeline_info = build_timeline_info(&timeline, include_non_incremental_logical_size)
-            .await
-            .context("Failed to get local timeline info: {e:#}")
-            .map_err(ApiError::InternalServerError)?;
+        let timeline_info = build_timeline_info(
+            &timeline,
+            include_non_incremental_logical_size,
+            timeline_ctx.as_ref(),
+        )
+        .await
+        .context("Failed to get local timeline info: {e:#}")
+        .map_err(ApiError::InternalServerError)?;

        Ok::<_, ApiError>(timeline_info)
    }
@@ -294,13 +333,19 @@ async fn get_lsn_by_timestamp_handler(request: Request<Body>) -> Result<Response
        .map_err(ApiError::BadRequest)?;
    let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);

-    let timeline = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
-        .and_then(|tenant| tenant.get_timeline(timeline_id, true))
        .map_err(ApiError::NotFound)?;
-    let result = with_ondemand_download(|| timeline.find_lsn_for_timestamp(timestamp_pg))
+
+    let (timeline, ctx) = tenant
+        .get_active_timeline(timeline_id, &ctx)
+        .map_err(ApiError::NotFound)?;
+    let result = timeline
+        .find_lsn_for_timestamp(timestamp_pg, &ctx)
        .await
-        .map_err(ApiError::InternalServerError)?;
+        .map_err(apierror_from_prerror)?;

    let result = match result {
        LsnForTimestamp::Present(lsn) => format!("{lsn}"),
@@ -340,7 +385,10 @@ async fn timeline_delete_handler(request: Request<Body>) -> Result<Response<Body
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    mgr::delete_timeline(tenant_id, timeline_id)
+    // deleting shouldn't require downloading anything
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
+
+    mgr::delete_timeline(tenant_id, timeline_id, &ctx)
        .instrument(info_span!("timeline_delete", tenant = %tenant_id, timeline = %timeline_id))
        .await
        // FIXME: Errors from `delete_timeline` can occur for a number of reasons, incuding both
@@ -418,8 +466,10 @@ async fn tenant_status(request: Request<Body>) -> Result<Response<Body>, ApiErro
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

+    let mut _req_ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
    let tenant_info = async {
-        let tenant = mgr::get_tenant(tenant_id, false).await?;
+        let tenant = mgr::get_tenant(tenant_id).await?;

        // Calculate total physical size of all timelines
        let mut current_physical_size = 0;
@@ -446,13 +496,15 @@ async fn tenant_size_handler(request: Request<Body>) -> Result<Response<Body>, A
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

    // this can be long operation, it currently is not backed by any request coalescing or similar
    let inputs = tenant
-        .gather_size_inputs()
+        .gather_size_inputs(&ctx)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -495,6 +547,8 @@ fn bad_duration<'a>(field_name: &'static str, value: &'a str) -> impl 'a + Fn()
 async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Body>, ApiError> {
    check_permission(&request, None)?;

+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
    let request_data: TenantCreateRequest = json_request(&mut request).await?;

    let mut tenant_conf = TenantConfOpt::default();
@@ -583,9 +637,9 @@ async fn tenant_create_handler(mut request: Request<Body>) -> Result<Response<Bo
        Some(tenant) => {
            // We created the tenant. Existing API semantics are that the tenant
            // is Active when this function returns.
-            if let res @ Err(_) = tenant.wait_to_become_active().await {
+            if let res @ Err(_) = tenant.wait_to_become_active(ctx).await {
                // This shouldn't happen because we just created the tenant directory
-                // in tenant_mgr::create_tenant, and there aren't any remote timelines
+                // in tenant::mgr::create_tenant, and there aren't any remote timelines
                // to load, so, nothing can really fail during load.
                // Don't do cleanup because we don't know how we got here.
                // The tenant will likely be in `Broken` state and subsequent
@@ -607,6 +661,8 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
    let tenant_id = request_data.tenant_id;
    check_permission(&request, Some(tenant_id))?;

+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
    let mut tenant_conf: TenantConfOpt = Default::default();
    if let Some(gc_period) = request_data.gc_period {
        tenant_conf.gc_period = Some(
@@ -669,7 +725,7 @@ async fn tenant_config_handler(mut request: Request<Body>) -> Result<Response<Bo
    }

    let state = get_state(&request);
-    mgr::update_tenant_config(state.conf, tenant_conf, tenant_id)
+    mgr::update_tenant_config(state.conf, tenant_conf, tenant_id, &ctx)
        .instrument(info_span!("tenant_config", tenant = ?tenant_id))
        .await
        // FIXME: `update_tenant_config` can fail because of both user and internal errors.
@@ -721,11 +777,21 @@ async fn timeline_gc_handler(mut request: Request<Body>) -> Result<Response<Body

    let gc_req: TimelineGcRequest = json_request(&mut request).await?;

-    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req).await?;
-    let gc_result = wait_task_done
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
+        .await
+        .map_err(ApiError::NotFound)?;
+
+    let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
+    // Use tenant's pitr setting
+    let pitr = tenant.get_pitr_interval();
+
+    fail::fail_point!("immediate_gc_task_pre");
+    let gc_result = tenant
+        .gc_iteration(Some(timeline_id), gc_horizon, pitr, &ctx)
+        .instrument(info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id))
        .await
-        .context("wait for gc task")
-        .map_err(ApiError::InternalServerError)?
        .map_err(ApiError::InternalServerError)?;

    json_response(StatusCode::OK, gc_result)
@@ -738,14 +804,17 @@ async fn timeline_compact_handler(request: Request<Body>) -> Result<Response<Bod
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::NotFound)?;
-    let timeline = tenant
-        .get_timeline(timeline_id, true)
+    let (timeline, ctx) = tenant
+        .get_active_timeline(timeline_id, &ctx)
        .map_err(ApiError::NotFound)?;
    timeline
-        .compact()
+        .compact(&ctx)
+        .instrument(info_span!("manual_compact", tenant = %tenant_id, timeline = %timeline_id))
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -759,18 +828,21 @@ async fn timeline_checkpoint_handler(request: Request<Body>) -> Result<Response<
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::NotFound)?;
-    let timeline = tenant
-        .get_timeline(timeline_id, true)
+    let (timeline, ctx) = tenant
+        .get_active_timeline(timeline_id, &ctx)
        .map_err(ApiError::NotFound)?;
    timeline
        .freeze_and_flush()
        .await
        .map_err(ApiError::InternalServerError)?;
    timeline
-        .compact()
+        .compact(&ctx)
+        .instrument(info_span!("manual_compact", tenant = %tenant_id, timeline = %timeline_id))
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -784,13 +856,15 @@ async fn timeline_download_remote_layers_handler_post(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::NotFound)?;
-    let timeline = tenant
-        .get_timeline(timeline_id, true)
+    let (timeline, ctx) = tenant
+        .get_active_timeline(timeline_id, &ctx)
        .map_err(ApiError::NotFound)?;
-    match timeline.spawn_download_all_remote_layers().await {
+    match timeline.spawn_download_all_remote_layers(&ctx).await {
        Ok(st) => json_response(StatusCode::ACCEPTED, st),
        Err(st) => json_response(StatusCode::CONFLICT, st),
    }
@@ -803,11 +877,13 @@ async fn timeline_download_remote_layers_handler_get(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    check_permission(&request, Some(tenant_id))?;

-    let tenant = mgr::get_tenant(tenant_id, true)
+    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
+
+    let (tenant, ctx) = mgr::get_active_tenant(tenant_id, &ctx)
        .await
        .map_err(ApiError::NotFound)?;
-    let timeline = tenant
-        .get_timeline(timeline_id, true)
+    let (timeline, _ctx) = tenant
+        .get_active_timeline(timeline_id, &ctx)
        .map_err(ApiError::NotFound)?;
    let info = timeline
        .get_download_all_remote_layers_task_info()
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -2,17 +2,18 @@
 //! Import data and WAL from a PostgreSQL data directory and WAL segments into
 //! a neon Timeline.
 //!
-use std::fs::File;
-use std::io::{Read, Seek, SeekFrom};
 use std::path::{Path, PathBuf};

 use anyhow::{bail, ensure, Context, Result};
 use bytes::Bytes;
+use futures::StreamExt;
+use tokio::io::{AsyncRead, AsyncReadExt};
+use tokio_tar::Archive;
 use tracing::*;
 use walkdir::WalkDir;

 use crate::pgdatadir_mapping::*;
-use crate::tenant::Timeline;
+use crate::tenant::{Timeline, TimelineRequestContext};
 use crate::walingest::WalIngest;
 use crate::walrecord::DecodedWALRecord;
 use pageserver_api::reltag::{RelTag, SlruKind};
@@ -42,10 +43,11 @@ pub fn get_lsn_from_controlfile(path: &Path) -> Result<Lsn> {
 /// This is currently only used to import a cluster freshly created by initdb.
 /// The code that deals with the checkpoint would not work right if the
 /// cluster was not shut down cleanly.
-pub fn import_timeline_from_postgres_datadir(
+pub async fn import_timeline_from_postgres_datadir(
    tline: &Timeline,
    pgdata_path: &Path,
    pgdata_lsn: Lsn,
+    ctx: &TimelineRequestContext,
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;

@@ -65,9 +67,11 @@ pub fn import_timeline_from_postgres_datadir(
            let absolute_path = entry.path();
            let relative_path = absolute_path.strip_prefix(pgdata_path)?;

-            let file = File::open(absolute_path)?;
+            let mut file = tokio::fs::File::open(absolute_path).await?;
            let len = metadata.len() as usize;
-            if let Some(control_file) = import_file(&mut modification, relative_path, file, len)? {
+            if let Some(control_file) =
+                import_file(&mut modification, relative_path, &mut file, len, ctx).await?
+            {
                pg_control = Some(control_file);
            }
            modification.flush()?;
@@ -96,19 +100,22 @@ pub fn import_timeline_from_postgres_datadir(
        tline,
        Lsn(pg_control.checkPointCopy.redo),
        pgdata_lsn,
-    )?;
+        ctx,
+    )
+    .await?;

    Ok(())
 }

 // subroutine of import_timeline_from_postgres_datadir(), to load one relation file.
-fn import_rel<Reader: Read>(
-    modification: &mut DatadirModification,
+async fn import_rel(
+    modification: &mut DatadirModification<'_>,
    path: &Path,
    spcoid: Oid,
    dboid: Oid,
-    mut reader: Reader,
+    reader: &mut (impl AsyncRead + Send + Sync + Unpin),
    len: usize,
+    ctx: &TimelineRequestContext,
 ) -> anyhow::Result<()> {
    // Does it look like a relation file?
    trace!("importing rel file {}", path.display());
@@ -139,7 +146,14 @@ fn import_rel<Reader: Read>(
    // Call put_rel_creation for every segment of the relation,
    // because there is no guarantee about the order in which we are processing segments.
    // ignore "relation already exists" error
-    if let Err(e) = modification.put_rel_creation(rel, nblocks as u32) {
+    //
+    // FIXME: use proper error type for this, instead of parsing the error message.
+    // Or better yet, keep track of which relations we've already created
+    // https://github.com/neondatabase/neon/issues/3309
+    if let Err(e) = modification
+        .put_rel_creation(rel, nblocks as u32, ctx)
+        .await
+    {
        if e.to_string().contains("already exists") {
            debug!("relation {} already exists. we must be extending it", rel);
        } else {
@@ -148,7 +162,7 @@ fn import_rel<Reader: Read>(
    }

    loop {
-        let r = reader.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf).await;
        match r {
            Ok(_) => {
                modification.put_rel_page_image(rel, blknum, Bytes::copy_from_slice(&buf))?;
@@ -174,19 +188,20 @@ fn import_rel<Reader: Read>(
    //
    // If we process rel segments out of order,
    // put_rel_extend will skip the update.
-    modification.put_rel_extend(rel, blknum)?;
+    modification.put_rel_extend(rel, blknum, ctx).await?;

    Ok(())
 }

 /// Import an SLRU segment file
 ///
-fn import_slru<Reader: Read>(
-    modification: &mut DatadirModification,
+async fn import_slru(
+    modification: &mut DatadirModification<'_>,
    slru: SlruKind,
    path: &Path,
-    mut reader: Reader,
+    reader: &mut (impl AsyncRead + Send + Sync + Unpin),
    len: usize,
+    ctx: &TimelineRequestContext,
 ) -> anyhow::Result<()> {
    info!("importing slru file {path:?}");

@@ -202,11 +217,13 @@ fn import_slru<Reader: Read>(

    ensure!(nblocks <= pg_constants::SLRU_PAGES_PER_SEGMENT as usize);

-    modification.put_slru_segment_creation(slru, segno, nblocks as u32)?;
+    modification
+        .put_slru_segment_creation(slru, segno, nblocks as u32, ctx)
+        .await?;

    let mut rpageno = 0;
    loop {
-        let r = reader.read_exact(&mut buf);
+        let r = reader.read_exact(&mut buf).await;
        match r {
            Ok(_) => {
                modification.put_slru_page_image(
@@ -237,11 +254,12 @@ fn import_slru<Reader: Read>(

 /// Scan PostgreSQL WAL files in given directory and load all records between
 /// 'startpoint' and 'endpoint' into the repository.
-fn import_wal(
+async fn import_wal(
    walpath: &Path,
    tline: &Timeline,
    startpoint: Lsn,
    endpoint: Lsn,
+    ctx: &TimelineRequestContext,
 ) -> anyhow::Result<()> {
    let mut waldecoder = WalStreamDecoder::new(startpoint, tline.pg_version);

@@ -249,7 +267,7 @@ fn import_wal(
    let mut offset = startpoint.segment_offset(WAL_SEGMENT_SIZE);
    let mut last_lsn = startpoint;

-    let mut walingest = WalIngest::new(tline, startpoint).no_ondemand_download()?;
+    let mut walingest = WalIngest::new(tline, startpoint, ctx).await?;

    while last_lsn <= endpoint {
        // FIXME: assume postgresql tli 1 for now
@@ -265,12 +283,14 @@ fn import_wal(
        }

        // Slurp the WAL file
-        let mut file = File::open(&path)?;
+        let mut file = std::fs::File::open(&path)?;

        if offset > 0 {
-            file.seek(SeekFrom::Start(offset as u64))?;
+            use std::io::Seek;
+            file.seek(std::io::SeekFrom::Start(offset as u64))?;
        }

+        use std::io::Read;
        let nread = file.read_to_end(&mut buf)?;
        if nread != WAL_SEGMENT_SIZE - offset {
            // Maybe allow this for .partial files?
@@ -285,8 +305,8 @@ fn import_wal(
        while last_lsn <= endpoint {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
                walingest
-                    .ingest_record(recdata, lsn, &mut modification, &mut decoded)
-                    .no_ondemand_download()?;
+                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, ctx)
+                    .await?;
                last_lsn = lsn;

                nrecords += 1;
@@ -310,10 +330,11 @@ fn import_wal(
    Ok(())
 }

-pub fn import_basebackup_from_tar<Reader: Read>(
+pub async fn import_basebackup_from_tar(
    tline: &Timeline,
-    reader: Reader,
+    reader: &mut (impl AsyncRead + Send + Sync + Unpin),
    base_lsn: Lsn,
+    ctx: &TimelineRequestContext,
 ) -> Result<()> {
    info!("importing base at {base_lsn}");
    let mut modification = tline.begin_modification(base_lsn);
@@ -322,21 +343,24 @@ pub fn import_basebackup_from_tar<Reader: Read>(
    let mut pg_control: Option<ControlFileData> = None;

    // Import base
-    for base_tar_entry in tar::Archive::new(reader).entries()? {
-        let entry = base_tar_entry?;
+    let mut entries = Archive::new(reader).entries()?;
+    while let Some(base_tar_entry) = entries.next().await {
+        let mut entry = base_tar_entry?;
        let header = entry.header();
        let len = header.entry_size()? as usize;
        let file_path = header.path()?.into_owned();

        match header.entry_type() {
-            tar::EntryType::Regular => {
-                if let Some(res) = import_file(&mut modification, file_path.as_ref(), entry, len)? {
+            tokio_tar::EntryType::Regular => {
+                if let Some(res) =
+                    import_file(&mut modification, file_path.as_ref(), &mut entry, len, ctx).await?
+                {
                    // We found the pg_control file.
                    pg_control = Some(res);
                }
                modification.flush()?;
            }
-            tar::EntryType::Directory => {
+            tokio_tar::EntryType::Directory => {
                debug!("directory {:?}", file_path);
            }
            _ => {
@@ -356,31 +380,35 @@ pub fn import_basebackup_from_tar<Reader: Read>(
    Ok(())
 }

-pub fn import_wal_from_tar<Reader: Read>(
+pub async fn import_wal_from_tar(
    tline: &Timeline,
-    reader: Reader,
+    reader: &mut (impl AsyncRead + Send + Sync + Unpin),
    start_lsn: Lsn,
    end_lsn: Lsn,
+    ctx: &TimelineRequestContext,
 ) -> Result<()> {
    // Set up walingest mutable state
    let mut waldecoder = WalStreamDecoder::new(start_lsn, tline.pg_version);
    let mut segno = start_lsn.segment_number(WAL_SEGMENT_SIZE);
    let mut offset = start_lsn.segment_offset(WAL_SEGMENT_SIZE);
    let mut last_lsn = start_lsn;
-    let mut walingest = WalIngest::new(tline, start_lsn).no_ondemand_download()?;
+    let mut walingest = WalIngest::new(tline, start_lsn, ctx).await?;

    // Ingest wal until end_lsn
    info!("importing wal until {}", end_lsn);
-    let mut pg_wal_tar = tar::Archive::new(reader);
-    let mut pg_wal_entries_iter = pg_wal_tar.entries()?;
+    let mut pg_wal_tar = Archive::new(reader);
+    let mut pg_wal_entries = pg_wal_tar.entries()?;
    while last_lsn <= end_lsn {
        let bytes = {
-            let entry = pg_wal_entries_iter.next().expect("expected more wal")?;
+            let mut entry = pg_wal_entries
+                .next()
+                .await
+                .ok_or_else(|| anyhow::anyhow!("expected more wal"))??;
            let header = entry.header();
            let file_path = header.path()?.into_owned();

            match header.entry_type() {
-                tar::EntryType::Regular => {
+                tokio_tar::EntryType::Regular => {
                    // FIXME: assume postgresql tli 1 for now
                    let expected_filename = XLogFileName(1, segno, WAL_SEGMENT_SIZE);
                    let file_name = file_path
@@ -390,9 +418,9 @@ pub fn import_wal_from_tar<Reader: Read>(
                    ensure!(expected_filename == file_name);

                    debug!("processing wal file {:?}", file_path);
-                    read_all_bytes(entry)?
+                    read_all_bytes(&mut entry).await?
                }
-                tar::EntryType::Directory => {
+                tokio_tar::EntryType::Directory => {
                    debug!("directory {:?}", file_path);
                    continue;
                }
@@ -413,8 +441,8 @@ pub fn import_wal_from_tar<Reader: Read>(
        while last_lsn <= end_lsn {
            if let Some((lsn, recdata)) = waldecoder.poll_decode()? {
                walingest
-                    .ingest_record(recdata, lsn, &mut modification, &mut decoded)
-                    .no_ondemand_download()?;
+                    .ingest_record(recdata, lsn, &mut modification, &mut decoded, ctx)
+                    .await?;
                last_lsn = lsn;

                debug!("imported record at {} (end {})", lsn, end_lsn);
@@ -433,7 +461,7 @@ pub fn import_wal_from_tar<Reader: Read>(
    }

    // Log any extra unused files
-    for e in &mut pg_wal_entries_iter {
+    while let Some(e) = pg_wal_entries.next().await {
        let entry = e?;
        let header = entry.header();
        let file_path = header.path()?.into_owned();
@@ -443,11 +471,12 @@ pub fn import_wal_from_tar<Reader: Read>(
    Ok(())
 }

-fn import_file<Reader: Read>(
-    modification: &mut DatadirModification,
+async fn import_file(
+    modification: &mut DatadirModification<'_>,
    file_path: &Path,
-    reader: Reader,
+    reader: &mut (impl AsyncRead + Send + Sync + Unpin),
    len: usize,
+    ctx: &TimelineRequestContext,
 ) -> Result<Option<ControlFileData>> {
    let file_name = match file_path.file_name() {
        Some(name) => name.to_string_lossy(),
@@ -466,7 +495,7 @@ fn import_file<Reader: Read>(

        match file_name.as_ref() {
            "pg_control" => {
-                let bytes = read_all_bytes(reader)?;
+                let bytes = read_all_bytes(reader).await?;

                // Extract the checkpoint record and import it separately.
                let pg_control = ControlFileData::decode(&bytes[..])?;
@@ -479,15 +508,17 @@ fn import_file<Reader: Read>(
                return Ok(Some(pg_control));
            }
            "pg_filenode.map" => {
-                let bytes = read_all_bytes(reader)?;
-                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                let bytes = read_all_bytes(reader).await?;
+                modification
+                    .put_relmap_file(spcnode, dbnode, bytes, ctx)
+                    .await?;
                debug!("imported relmap file")
            }
            "PG_VERSION" => {
                debug!("ignored PG_VERSION file");
            }
            _ => {
-                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;
                debug!("imported rel creation");
            }
        }
@@ -502,44 +533,48 @@ fn import_file<Reader: Read>(

        match file_name.as_ref() {
            "pg_filenode.map" => {
-                let bytes = read_all_bytes(reader)?;
-                modification.put_relmap_file(spcnode, dbnode, bytes)?;
+                let bytes = read_all_bytes(reader).await?;
+                modification
+                    .put_relmap_file(spcnode, dbnode, bytes, ctx)
+                    .await?;
                debug!("imported relmap file")
            }
            "PG_VERSION" => {
                debug!("ignored PG_VERSION file");
            }
            _ => {
-                import_rel(modification, file_path, spcnode, dbnode, reader, len)?;
+                import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;
                debug!("imported rel creation");
            }
        }
    } else if file_path.starts_with("pg_xact") {
        let slru = SlruKind::Clog;

-        import_slru(modification, slru, file_path, reader, len)?;
+        import_slru(modification, slru, file_path, reader, len, ctx).await?;
        debug!("imported clog slru");
    } else if file_path.starts_with("pg_multixact/offsets") {
        let slru = SlruKind::MultiXactOffsets;

-        import_slru(modification, slru, file_path, reader, len)?;
+        import_slru(modification, slru, file_path, reader, len, ctx).await?;
        debug!("imported multixact offsets slru");
    } else if file_path.starts_with("pg_multixact/members") {
        let slru = SlruKind::MultiXactMembers;

-        import_slru(modification, slru, file_path, reader, len)?;
+        import_slru(modification, slru, file_path, reader, len, ctx).await?;
        debug!("imported multixact members slru");
    } else if file_path.starts_with("pg_twophase") {
        let xid = u32::from_str_radix(file_name.as_ref(), 16)?;

-        let bytes = read_all_bytes(reader)?;
-        modification.put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]))?;
+        let bytes = read_all_bytes(reader).await?;
+        modification
+            .put_twophase_file(xid, Bytes::copy_from_slice(&bytes[..]), ctx)
+            .await?;
        debug!("imported twophase file");
    } else if file_path.starts_with("pg_wal") {
        debug!("found wal file in base section. ignore it");
    } else if file_path.starts_with("zenith.signal") {
        // Parse zenith signal file to set correct previous LSN
-        let bytes = read_all_bytes(reader)?;
+        let bytes = read_all_bytes(reader).await?;
        // zenith.signal format is "PREV LSN: prev_lsn"
        // TODO write serialization and deserialization in the same place.
        let zenith_signal = std::str::from_utf8(&bytes)?.trim();
@@ -576,8 +611,8 @@ fn import_file<Reader: Read>(
    Ok(None)
 }

-fn read_all_bytes<Reader: Read>(mut reader: Reader) -> Result<Bytes> {
+async fn read_all_bytes(reader: &mut (impl AsyncRead + Send + Sync + Unpin)) -> Result<Bytes> {
    let mut buf: Vec<u8> = vec![];
-    reader.read_to_end(&mut buf)?;
+    reader.read_to_end(&mut buf).await?;
    Ok(Bytes::copy_from_slice(&buf[..]))
 }
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -1,7 +1,8 @@
 mod auth;
 pub mod basebackup;
-pub mod billing_metrics;
 pub mod config;
+pub mod consumption_metrics;
+pub mod context;
 pub mod http;
 pub mod import_datadir;
 pub mod keyspace;
@@ -9,7 +10,6 @@ pub(crate) mod metrics;
 pub mod page_cache;
 pub mod page_service;
 pub mod pgdatadir_mapping;
-pub mod profiling;
 pub mod repository;
 pub mod task_mgr;
 pub mod tenant;
@@ -22,7 +22,6 @@ pub mod walredo;

 use std::path::Path;

-use crate::task_mgr::TaskKind;
 use tracing::info;

 /// Current storage format version
@@ -42,35 +41,6 @@ pub const DELTA_FILE_MAGIC: u16 = 0x5A61;

 static ZERO_PAGE: bytes::Bytes = bytes::Bytes::from_static(&[0u8; 8192]);

-pub async fn shutdown_pageserver(exit_code: i32) {
-    // Shut down the libpq endpoint task. This prevents new connections from
-    // being accepted.
-    task_mgr::shutdown_tasks(Some(TaskKind::LibpqEndpointListener), None, None).await;
-
-    // Shut down any page service tasks.
-    task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None).await;
-
-    // Shut down all the tenants. This flushes everything to disk and kills
-    // the checkpoint and GC tasks.
-    tenant::mgr::shutdown_all_tenants().await;
-
-    // Stop syncing with remote storage.
-    //
-    // FIXME: Does this wait for the sync tasks to finish syncing what's queued up?
-    // Should it?
-    task_mgr::shutdown_tasks(Some(TaskKind::RemoteUploadTask), None, None).await;
-
-    // Shut down the HTTP endpoint last, so that you can still check the server's
-    // status while it's shutting down.
-    // FIXME: We should probably stop accepting commands like attach/detach earlier.
-    task_mgr::shutdown_tasks(Some(TaskKind::HttpEndpointListener), None, None).await;
-
-    // There should be nothing left, but let's be sure
-    task_mgr::shutdown_tasks(None, None, None).await;
-    info!("Shut down successfully completed");
-    std::process::exit(exit_code);
-}
-
 const DEFAULT_BASE_BACKOFF_SECONDS: f64 = 0.1;
 const DEFAULT_MAX_BACKOFF_SECONDS: f64 = 3.0;

--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -209,15 +209,34 @@ pub static NUM_ONDISK_LAYERS: Lazy<IntGauge> = Lazy::new(|| {

 // remote storage metrics

-static REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS: Lazy<IntGaugeVec> = Lazy::new(|| {
+/// NB: increment _after_ recording the current value into [`REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST`].
+static REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE: Lazy<IntGaugeVec> = Lazy::new(|| {
    register_int_gauge_vec!(
-        "pageserver_remote_upload_queue_unfinished_tasks",
-        "Number of tasks in the upload queue that are not finished yet.",
+        "pageserver_remote_timeline_client_calls_unfinished",
+        "Number of ongoing calls to remote timeline client. \
+         Used to populate pageserver_remote_timeline_client_calls_started. \
+         This metric is not useful for sampling from Prometheus, but useful in tests.",
        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
    )
    .expect("failed to define a metric")
 });

+static REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "pageserver_remote_timeline_client_calls_started",
+        "When calling a remote timeline client method, we record the current value \
+         of the calls_unfinished gauge in this histogram. Plot the histogram \
+         over time in a heatmap to visualize how many operations were ongoing \
+         at a given instant. It gives you a better idea of the queue depth \
+         than plotting the gauge directly, since operations may complete faster \
+         than the sampling interval.",
+        &["tenant_id", "timeline_id", "file_kind", "op_kind"],
+        // The calls_unfinished gauge is an integer gauge, hence we have integer buckets.
+        vec![0.0, 1.0, 2.0, 4.0, 6.0, 8.0, 10.0, 15.0, 20.0, 40.0, 60.0, 80.0, 100.0, 500.0],
+    )
+    .expect("failed to define a metric")
+});
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RemoteOpKind {
    Upload,
@@ -248,15 +267,12 @@ impl RemoteOpFileKind {
    }
 }

-pub static REMOTE_OPERATION_KINDS: &[&str] = &["upload", "download", "delete"];
-pub static REMOTE_OPERATION_FILE_KINDS: &[&str] = &["layer", "index"];
-pub static REMOTE_OPERATION_STATUSES: &[&str] = &["success", "failure"];
-
 pub static REMOTE_OPERATION_TIME: Lazy<HistogramVec> = Lazy::new(|| {
    register_histogram_vec!(
        "pageserver_remote_operation_seconds",
        "Time spent on remote storage operations. \
-        Grouped by tenant, timeline, operation_kind and status",
+        Grouped by tenant, timeline, operation_kind and status. \
+        Does not account for time spent waiting in remote timeline client's queues.",
        &["tenant_id", "timeline_id", "file_kind", "op_kind", "status"]
    )
    .expect("failed to define a metric")
@@ -475,21 +491,6 @@ impl Drop for TimelineMetrics {
        for op in SMGR_QUERY_TIME_OPERATIONS {
            let _ = SMGR_QUERY_TIME.remove_label_values(&[op, tenant_id, timeline_id]);
        }
-
-        let _ = REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS.remove_label_values(&[tenant_id, timeline_id]);
-        for file_kind in REMOTE_OPERATION_FILE_KINDS {
-            for op in REMOTE_OPERATION_KINDS {
-                for status in REMOTE_OPERATION_STATUSES {
-                    let _ = REMOTE_OPERATION_TIME.remove_label_values(&[
-                        tenant_id,
-                        timeline_id,
-                        file_kind,
-                        op,
-                        status,
-                    ]);
-                }
-            }
-        }
    }
 }

@@ -510,7 +511,8 @@ pub struct RemoteTimelineClientMetrics {
    timeline_id: String,
    remote_physical_size_gauge: Mutex<Option<UIntGauge>>,
    remote_operation_time: Mutex<HashMap<(&'static str, &'static str, &'static str), Histogram>>,
-    unfinished_tasks: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
+    calls_unfinished_gauge: Mutex<HashMap<(&'static str, &'static str), IntGauge>>,
+    calls_started_hist: Mutex<HashMap<(&'static str, &'static str), Histogram>>,
 }

 impl RemoteTimelineClientMetrics {
@@ -519,7 +521,8 @@ impl RemoteTimelineClientMetrics {
            tenant_id: tenant_id.to_string(),
            timeline_id: timeline_id.to_string(),
            remote_operation_time: Mutex::new(HashMap::default()),
-            unfinished_tasks: Mutex::new(HashMap::default()),
+            calls_unfinished_gauge: Mutex::new(HashMap::default()),
+            calls_started_hist: Mutex::new(HashMap::default()),
            remote_physical_size_gauge: Mutex::new(None),
        }
    }
@@ -558,16 +561,37 @@ impl RemoteTimelineClientMetrics {
        });
        metric.clone()
    }
-    pub fn unfinished_tasks(
+    fn calls_unfinished_gauge(
        &self,
        file_kind: &RemoteOpFileKind,
        op_kind: &RemoteOpKind,
    ) -> IntGauge {
        // XXX would be nice to have an upgradable RwLock
-        let mut guard = self.unfinished_tasks.lock().unwrap();
+        let mut guard = self.calls_unfinished_gauge.lock().unwrap();
        let key = (file_kind.as_str(), op_kind.as_str());
        let metric = guard.entry(key).or_insert_with(move || {
-            REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS
+            REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE
+                .get_metric_with_label_values(&[
+                    &self.tenant_id.to_string(),
+                    &self.timeline_id.to_string(),
+                    key.0,
+                    key.1,
+                ])
+                .unwrap()
+        });
+        metric.clone()
+    }
+
+    fn calls_started_hist(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> Histogram {
+        // XXX would be nice to have an upgradable RwLock
+        let mut guard = self.calls_started_hist.lock().unwrap();
+        let key = (file_kind.as_str(), op_kind.as_str());
+        let metric = guard.entry(key).or_insert_with(move || {
+            REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST
                .get_metric_with_label_values(&[
                    &self.tenant_id.to_string(),
                    &self.timeline_id.to_string(),
@@ -580,6 +604,58 @@ impl RemoteTimelineClientMetrics {
    }
 }

+/// See [`RemoteTimelineClientMetrics::call_begin`].
+#[must_use]
+pub(crate) struct RemoteTimelineClientCallMetricGuard(Option<IntGauge>);
+
+impl RemoteTimelineClientCallMetricGuard {
+    /// Consume this guard object without decrementing the metric.
+    /// The caller vouches to do this manually, so that the prior increment of the gauge will cancel out.
+    pub fn will_decrement_manually(mut self) {
+        self.0 = None; // prevent drop() from decrementing
+    }
+}
+
+impl Drop for RemoteTimelineClientCallMetricGuard {
+    fn drop(&mut self) {
+        if let RemoteTimelineClientCallMetricGuard(Some(guard)) = self {
+            guard.dec();
+        }
+    }
+}
+
+impl RemoteTimelineClientMetrics {
+    /// Increment the metrics that track ongoing calls to the remote timeline client instance.
+    ///
+    /// Drop the returned guard object once the operation is finished to decrement the values.
+    /// Or, use [`RemoteTimelineClientCallMetricGuard::will_decrement_manually`] and [`call_end`] if that
+    /// is more suitable.
+    /// Never do both.
+    pub(crate) fn call_begin(
+        &self,
+        file_kind: &RemoteOpFileKind,
+        op_kind: &RemoteOpKind,
+    ) -> RemoteTimelineClientCallMetricGuard {
+        let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+        self.calls_started_hist(file_kind, op_kind)
+            .observe(unfinished_metric.get() as f64);
+        unfinished_metric.inc();
+        RemoteTimelineClientCallMetricGuard(Some(unfinished_metric))
+    }
+
+    /// Manually decrement the metric instead of using the guard object.
+    /// Using the guard object is generally preferable.
+    /// See [`call_begin`] for more context.
+    pub(crate) fn call_end(&self, file_kind: &RemoteOpFileKind, op_kind: &RemoteOpKind) {
+        let unfinished_metric = self.calls_unfinished_gauge(file_kind, op_kind);
+        debug_assert!(
+            unfinished_metric.get() > 0,
+            "begin and end should cancel out"
+        );
+        unfinished_metric.dec();
+    }
+}
+
 impl Drop for RemoteTimelineClientMetrics {
    fn drop(&mut self) {
        let RemoteTimelineClientMetrics {
@@ -587,13 +663,22 @@ impl Drop for RemoteTimelineClientMetrics {
            timeline_id,
            remote_physical_size_gauge,
            remote_operation_time,
-            unfinished_tasks,
+            calls_unfinished_gauge,
+            calls_started_hist,
        } = self;
        for ((a, b, c), _) in remote_operation_time.get_mut().unwrap().drain() {
            let _ = REMOTE_OPERATION_TIME.remove_label_values(&[tenant_id, timeline_id, a, b, c]);
        }
-        for ((a, b), _) in unfinished_tasks.get_mut().unwrap().drain() {
-            let _ = REMOTE_UPLOAD_QUEUE_UNFINISHED_TASKS.remove_label_values(&[
+        for ((a, b), _) in calls_unfinished_gauge.get_mut().unwrap().drain() {
+            let _ = REMOTE_TIMELINE_CLIENT_CALLS_UNFINISHED_GAUGE.remove_label_values(&[
+                tenant_id,
+                timeline_id,
+                a,
+                b,
+            ]);
+        }
+        for ((a, b), _) in calls_started_hist.get_mut().unwrap().drain() {
+            let _ = REMOTE_TIMELINE_CLIENT_CALLS_STARTED_HIST.remove_label_values(&[
                tenant_id,
                timeline_id,
                a,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -9,7 +9,7 @@
 //  custom protocol.
 //

-use anyhow::{bail, ensure, Context, Result};
+use anyhow::Context;
 use bytes::Buf;
 use bytes::Bytes;
 use futures::{Stream, StreamExt};
@@ -19,6 +19,8 @@ use pageserver_api::models::{
    PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse,
    PagestreamNblocksRequest, PagestreamNblocksResponse,
 };
+use pq_proto::ConnectionError;
+use pq_proto::FeStartupPacket;
 use pq_proto::{BeMessage, FeMessage, RowDescriptor};
 use std::io;
 use std::net::TcpListener;
@@ -26,11 +28,9 @@ use std::str;
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::Duration;
-use tokio::pin;
-use tokio_util::io::StreamReader;
-use tokio_util::io::SyncIoBridge;
 use tracing::*;
 use utils::id::ConnectionId;
+use utils::postgres_backend_async::QueryError;
 use utils::{
    auth::{Claims, JwtAuth, Scope},
    id::{TenantId, TimelineId},
@@ -42,30 +42,32 @@ use utils::{

 use crate::auth::check_permission;
 use crate::basebackup;
-use crate::config::{PageServerConf, ProfilingConfig};
+use crate::config::PageServerConf;
+use crate::context::{DownloadBehavior, RequestContext, TaskKind};
 use crate::import_datadir::import_wal_from_tar;
 use crate::metrics::{LIVE_CONNECTIONS_COUNT, SMGR_QUERY_TIME};
-use crate::profiling::profpoint_start;
 use crate::task_mgr;
-use crate::task_mgr::TaskKind;
 use crate::tenant::mgr;
-use crate::tenant::{Tenant, Timeline};
+use crate::tenant::{Tenant, TenantRequestContext, Timeline, TimelineRequestContext};
 use crate::trace::Tracer;

 use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID;
 use postgres_ffi::BLCKSZ;

-fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Bytes>> + '_ {
+fn copyin_stream<'a>(
+    pgb: &'a mut PostgresBackend,
+    ctx: &'a RequestContext,
+) -> impl Stream<Item = io::Result<Bytes>> + 'a {
    async_stream::try_stream! {
        loop {
            let msg = tokio::select! {
                biased;

-                _ = task_mgr::shutdown_watcher() => {
+                _ = ctx.cancelled() => {
                    // We were requested to shut down.
                    let msg = format!("pageserver is shutting down");
-                    let _ = pgb.write_message(&BeMessage::ErrorResponse(&msg));
-                    Err(anyhow::anyhow!(msg))
+                    let _ = pgb.write_message(&BeMessage::ErrorResponse(&msg, None));
+                    Err(QueryError::Other(anyhow::anyhow!(msg)))
                }

                msg = pgb.read_message() => { msg }
@@ -78,14 +80,15 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                        FeMessage::CopyDone => { break },
                        FeMessage::Sync => continue,
                        FeMessage::Terminate => {
-                            let msg = format!("client terminated connection with Terminate message during COPY");
-                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            let msg = "client terminated connection with Terminate message during COPY";
+                            let query_error_error = QueryError::Disconnected(ConnectionError::Socket(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
+                            pgb.write_message(&BeMessage::ErrorResponse(msg, Some(query_error_error.pg_error_code())))?;
                            Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
                            break;
                        }
                        m => {
-                            let msg = format!("unexpected message {:?}", m);
-                            pgb.write_message(&BeMessage::ErrorResponse(&msg))?;
+                            let msg = format!("unexpected message {m:?}");
+                            pgb.write_message(&BeMessage::ErrorResponse(&msg, None))?;
                            Err(io::Error::new(io::ErrorKind::Other, msg))?;
                            break;
                        }
@@ -95,12 +98,16 @@ fn copyin_stream(pgb: &mut PostgresBackend) -> impl Stream<Item = io::Result<Byt
                }
                Ok(None) => {
                    let msg = "client closed connection during COPY";
-                    pgb.write_message(&BeMessage::ErrorResponse(msg))?;
+                    let query_error_error = QueryError::Disconnected(ConnectionError::Socket(io::Error::new(io::ErrorKind::ConnectionReset, msg)));
+                    pgb.write_message(&BeMessage::ErrorResponse(msg, Some(query_error_error.pg_error_code())))?;
                    pgb.flush().await?;
                    Err(io::Error::new(io::ErrorKind::ConnectionReset, msg))?;
                }
-                Err(e) => {
-                    Err(io::Error::new(io::ErrorKind::Other, e))?;
+                Err(QueryError::Disconnected(ConnectionError::Socket(io_error))) => {
+                    Err(io_error)?;
+                }
+                Err(other) => {
+                    Err(io::Error::new(io::ErrorKind::Other, other))?;
                }
            };
        }
@@ -119,6 +126,7 @@ pub async fn libpq_listener_main(
    auth: Option<Arc<JwtAuth>>,
    listener: TcpListener,
    auth_type: AuthType,
+    listener_ctx: RequestContext,
 ) -> anyhow::Result<()> {
    listener.set_nonblocking(true)?;
    let tokio_listener = tokio::net::TcpListener::from_std(listener)?;
@@ -127,8 +135,9 @@ pub async fn libpq_listener_main(
    while let Some(res) = tokio::select! {
        biased;

-        _ = task_mgr::shutdown_watcher() => {
+        _ = listener_ctx.cancelled() => {
            // We were requested to shut down.
+            info!("libpq listener shutting down");
            None
        }

@@ -142,18 +151,33 @@ pub async fn libpq_listener_main(
                debug!("accepted connection from {}", peer_addr);
                let local_auth = auth.clone();

+                let connection_ctx = RequestContext::with_parent(
+                    TaskKind::PageRequestHandler,
+                    DownloadBehavior::Download,
+                    &listener_ctx,
+                );
+
                // PageRequestHandler tasks are not associated with any particular
                // timeline in the task manager. In practice most connections will
                // only deal with a particular timeline, but we don't know which one
                // yet.
                task_mgr::spawn(
                    &tokio::runtime::Handle::current(),
-                    TaskKind::PageRequestHandler,
-                    None,
-                    None,
                    "serving compute connection task",
                    false,
-                    page_service_conn_main(conf, local_auth, socket, auth_type),
+                    async move {
+                        if let Err(err) = page_service_conn_main(
+                            conf,
+                            local_auth,
+                            socket,
+                            auth_type,
+                            connection_ctx,
+                        )
+                        .await
+                        {
+                            error!("connection handler exited with error: {err:?}");
+                        }
+                    },
                );
            }
            Err(err) => {
@@ -173,6 +197,7 @@ async fn page_service_conn_main(
    auth: Option<Arc<JwtAuth>>,
    socket: tokio::net::TcpStream,
    auth_type: AuthType,
+    connection_ctx: RequestContext,
 ) -> anyhow::Result<()> {
    // Immediately increment the gauge, then create a job to decrement it on task exit.
    // One of the pros of `defer!` is that this will *most probably*
@@ -187,34 +212,32 @@ async fn page_service_conn_main(
        .set_nodelay(true)
        .context("could not set TCP_NODELAY")?;

-    let mut conn_handler = PageServerHandler::new(conf, auth);
+    let cancellation_token = connection_ctx.cancellation_token().clone();
+
+    let mut conn_handler = PageServerHandler::new(conf, auth, connection_ctx);
    let pgbackend = PostgresBackend::new(socket, auth_type, None)?;

    let result = pgbackend
-        .run(&mut conn_handler, task_mgr::shutdown_watcher)
+        .run(&mut conn_handler, || cancellation_token.cancelled())
        .await;
    match result {
        Ok(()) => {
            // we've been requested to shut down
            Ok(())
        }
-        Err(err) => {
-            let root_cause_io_err_kind = err
-                .root_cause()
-                .downcast_ref::<io::Error>()
-                .map(|e| e.kind());
-
+        Err(QueryError::Disconnected(ConnectionError::Socket(io_error))) => {
            // `ConnectionReset` error happens when the Postgres client closes the connection.
            // As this disconnection happens quite often and is expected,
            // we decided to downgrade the logging level to `INFO`.
            // See: https://github.com/neondatabase/neon/issues/1683.
-            if root_cause_io_err_kind == Some(io::ErrorKind::ConnectionReset) {
+            if io_error.kind() == io::ErrorKind::ConnectionReset {
                info!("Postgres client disconnected");
                Ok(())
            } else {
-                Err(err)
+                Err(io_error).context("Postgres connection error")
            }
        }
+        other => other.context("Postgres query error"),
    }
 }

@@ -251,35 +274,38 @@ impl PageRequestMetrics {
    }
 }

-#[derive(Debug)]
 struct PageServerHandler {
-    conf: &'static PageServerConf,
+    _conf: &'static PageServerConf,
    auth: Option<Arc<JwtAuth>>,
    claims: Option<Claims>,
+
+    connection_ctx: RequestContext,
 }

 impl PageServerHandler {
-    pub fn new(conf: &'static PageServerConf, auth: Option<Arc<JwtAuth>>) -> Self {
+    pub fn new(
+        conf: &'static PageServerConf,
+        auth: Option<Arc<JwtAuth>>,
+        connection_ctx: RequestContext,
+    ) -> Self {
        PageServerHandler {
-            conf,
+            _conf: conf,
            auth,
            claims: None,
+            connection_ctx,
        }
    }

    #[instrument(skip(self, pgb))]
    async fn handle_pagerequests(
-        &self,
+        &mut self,
        pgb: &mut PostgresBackend,
        tenant_id: TenantId,
        timeline_id: TimelineId,
    ) -> anyhow::Result<()> {
-        // NOTE: pagerequests handler exits when connection is closed,
-        //       so there is no need to reset the association
-        task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+        let (tenant, ctx) = get_active_tenant_with_timeout(tenant_id, &self.connection_ctx).await?;

        // Make request tracer if needed
-        let tenant = get_active_tenant_with_timeout(tenant_id).await?;
        let mut tracer = if tenant.get_trace_read_requests() {
            let connection_id = ConnectionId::generate();
            let path = tenant
@@ -291,7 +317,7 @@ impl PageServerHandler {
        };

        // Check that the timeline exists
-        let timeline = tenant.get_timeline(timeline_id, true)?;
+        let (timeline, ctx) = tenant.get_active_timeline(timeline_id, &ctx)?;

        // switch client to COPYBOTH
        pgb.write_message(&BeMessage::CopyBothResponse)?;
@@ -303,7 +329,7 @@ impl PageServerHandler {
            let msg = tokio::select! {
                biased;

-                _ = task_mgr::shutdown_watcher() => {
+                _ = ctx.cancelled() => {
                    // We were requested to shut down.
                    info!("shutdown request received in page handler");
                    break;
@@ -316,7 +342,7 @@ impl PageServerHandler {
                Some(FeMessage::CopyData(bytes)) => bytes,
                Some(FeMessage::Terminate) => break,
                Some(m) => {
-                    bail!("unexpected message: {m:?} during COPY");
+                    anyhow::bail!("unexpected message: {m:?} during COPY");
                }
                None => break, // client disconnected
            };
@@ -330,22 +356,27 @@ impl PageServerHandler {

            let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader())?;

+            // TODO: We could create a new per-request context here, with unique ID.
+            // Currently we use the same per-timeline context for all requests
+
            let response = match neon_fe_msg {
                PagestreamFeMessage::Exists(req) => {
                    let _timer = metrics.get_rel_exists.start_timer();
-                    self.handle_get_rel_exists_request(&timeline, &req).await
+                    self.handle_get_rel_exists_request(&timeline, &req, &ctx)
+                        .await
                }
                PagestreamFeMessage::Nblocks(req) => {
                    let _timer = metrics.get_rel_size.start_timer();
-                    self.handle_get_nblocks_request(&timeline, &req).await
+                    self.handle_get_nblocks_request(&timeline, &req, &ctx).await
                }
                PagestreamFeMessage::GetPage(req) => {
                    let _timer = metrics.get_page_at_lsn.start_timer();
-                    self.handle_get_page_at_lsn_request(&timeline, &req).await
+                    self.handle_get_page_at_lsn_request(&timeline, &req, &ctx)
+                        .await
                }
                PagestreamFeMessage::DbSize(req) => {
                    let _timer = metrics.get_db_size.start_timer();
-                    self.handle_db_size_request(&timeline, &req).await
+                    self.handle_db_size_request(&timeline, &req, &ctx).await
                }
            };

@@ -366,19 +397,20 @@ impl PageServerHandler {

    #[instrument(skip(self, pgb))]
    async fn handle_import_basebackup(
-        &self,
+        &mut self,
        pgb: &mut PostgresBackend,
        tenant_id: TenantId,
        timeline_id: TimelineId,
        base_lsn: Lsn,
        _end_lsn: Lsn,
        pg_version: u32,
-    ) -> anyhow::Result<()> {
-        task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
+    ) -> Result<(), QueryError> {
        // Create empty timeline
        info!("creating new timeline");
-        let tenant = get_active_tenant_with_timeout(tenant_id).await?;
-        let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version)?;
+        let (tenant, tenant_ctx) =
+            get_active_tenant_with_timeout(tenant_id, &self.connection_ctx).await?;
+        let (timeline, ctx) =
+            tenant.create_empty_timeline(timeline_id, base_lsn, pg_version, &tenant_ctx)?;

        // TODO mark timeline as not ready until it reaches end_lsn.
        // We might have some wal to import as well, and we should prevent compute
@@ -395,11 +427,9 @@ impl PageServerHandler {
        pgb.write_message(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;

-        let copyin_stream = copyin_stream(pgb);
-        pin!(copyin_stream);
-
+        let mut copyin_stream = Box::pin(copyin_stream(pgb, &ctx));
        timeline
-            .import_basebackup_from_tar(&mut copyin_stream, base_lsn)
+            .import_basebackup_from_tar(&mut copyin_stream, base_lsn, &ctx)
            .await?;

        // Drain the rest of the Copy data
@@ -423,17 +453,21 @@ impl PageServerHandler {

    #[instrument(skip(self, pgb))]
    async fn handle_import_wal(
-        &self,
+        &mut self,
        pgb: &mut PostgresBackend,
        tenant_id: TenantId,
        timeline_id: TimelineId,
        start_lsn: Lsn,
        end_lsn: Lsn,
-    ) -> anyhow::Result<()> {
-        task_mgr::associate_with(Some(tenant_id), Some(timeline_id));
-
-        let timeline = get_active_timeline_with_timeout(tenant_id, timeline_id).await?;
-        ensure!(timeline.get_last_record_lsn() == start_lsn);
+    ) -> Result<(), QueryError> {
+        let (timeline, ctx) =
+            get_active_timeline_with_timeout(tenant_id, timeline_id, &self.connection_ctx).await?;
+        let last_record_lsn = timeline.get_last_record_lsn();
+        if last_record_lsn != start_lsn {
+            return Err(QueryError::Other(
+                anyhow::anyhow!("Cannot import WAL from Lsn {start_lsn} because timeline does not start from the same lsn: {last_record_lsn}"))
+            );
+        }

        // TODO leave clean state on error. For now you can use detach to clean
        // up broken state from a failed import.
@@ -442,9 +476,9 @@ impl PageServerHandler {
        info!("importing wal");
        pgb.write_message(&BeMessage::CopyInResponse)?;
        pgb.flush().await?;
-        let mut copyin_stream = Box::pin(copyin_stream(pgb));
-        let reader = SyncIoBridge::new(StreamReader::new(&mut copyin_stream));
-        tokio::task::block_in_place(|| import_wal_from_tar(&timeline, reader, start_lsn, end_lsn))?;
+        let mut copyin_stream = Box::pin(copyin_stream(pgb, &ctx));
+        let mut reader = tokio_util::io::StreamReader::new(&mut copyin_stream);
+        import_wal_from_tar(&timeline, &mut reader, start_lsn, end_lsn, &ctx).await?;
        info!("wal import complete");

        // Drain the rest of the Copy data
@@ -457,7 +491,11 @@ impl PageServerHandler {
        }

        // TODO Does it make sense to overshoot?
-        ensure!(timeline.get_last_record_lsn() >= end_lsn);
+        if timeline.get_last_record_lsn() < end_lsn {
+            return Err(QueryError::Other(
+                anyhow::anyhow!("Cannot import WAL from Lsn {start_lsn} because timeline does not start from the same lsn: {last_record_lsn}"))
+            );
+        }

        // Flush data to disk, then upload to s3. No need for a forced checkpoint.
        // We only want to persist the data, and it doesn't matter if it's in the
@@ -486,7 +524,8 @@ impl PageServerHandler {
        mut lsn: Lsn,
        latest: bool,
        latest_gc_cutoff_lsn: &RcuReadGuard<Lsn>,
-    ) -> Result<Lsn> {
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<Lsn> {
        if latest {
            // Latest page version was requested. If LSN is given, it is a hint
            // to the page server that there have been no modifications to the
@@ -509,7 +548,7 @@ impl PageServerHandler {
            if lsn <= last_record_lsn {
                lsn = last_record_lsn;
            } else {
-                timeline.wait_lsn(lsn).await?;
+                timeline.wait_lsn(lsn, ctx).await?;
                // Since we waited for 'lsn' to arrive, that is now the last
                // record LSN. (Or close enough for our purposes; the
                // last-record LSN can advance immediately after we return
@@ -517,11 +556,11 @@ impl PageServerHandler {
            }
        } else {
            if lsn == Lsn(0) {
-                bail!("invalid LSN(0) in request");
+                anyhow::bail!("invalid LSN(0) in request");
            }
-            timeline.wait_lsn(lsn).await?;
+            timeline.wait_lsn(lsn, ctx).await?;
        }
-        ensure!(
+        anyhow::ensure!(
            lsn >= **latest_gc_cutoff_lsn,
            "tried to request a page version that was garbage collected. requested at {} gc cutoff {}",
            lsn, **latest_gc_cutoff_lsn
@@ -529,60 +568,61 @@ impl PageServerHandler {
        Ok(lsn)
    }

-    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req, ctx), fields(rel = %req.rel, req_lsn = %req.lsn))]
    async fn handle_get_rel_exists_request(
        &self,
        timeline: &Timeline,
        req: &PagestreamExistsRequest,
-    ) -> Result<PagestreamBeMessage> {
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<PagestreamBeMessage> {
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)
-            .await?;
+        let lsn =
+            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
+                .await?;

-        let exists = crate::tenant::with_ondemand_download(|| {
-            timeline.get_rel_exists(req.rel, lsn, req.latest)
-        })
-        .await?;
+        let exists = timeline
+            .get_rel_exists(req.rel, lsn, req.latest, ctx)
+            .await?;

        Ok(PagestreamBeMessage::Exists(PagestreamExistsResponse {
            exists,
        }))
    }

-    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req, ctx), fields(rel = %req.rel, req_lsn = %req.lsn))]
    async fn handle_get_nblocks_request(
        &self,
        timeline: &Timeline,
        req: &PagestreamNblocksRequest,
-    ) -> Result<PagestreamBeMessage> {
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<PagestreamBeMessage> {
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)
-            .await?;
+        let lsn =
+            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
+                .await?;

-        let n_blocks = crate::tenant::with_ondemand_download(|| {
-            timeline.get_rel_size(req.rel, lsn, req.latest)
-        })
-        .await?;
+        let n_blocks = timeline.get_rel_size(req.rel, lsn, req.latest, ctx).await?;

        Ok(PagestreamBeMessage::Nblocks(PagestreamNblocksResponse {
            n_blocks,
        }))
    }

-    #[instrument(skip(self, timeline, req), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req, ctx), fields(dbnode = %req.dbnode, req_lsn = %req.lsn))]
    async fn handle_db_size_request(
        &self,
        timeline: &Timeline,
        req: &PagestreamDbSizeRequest,
-    ) -> Result<PagestreamBeMessage> {
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<PagestreamBeMessage> {
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)
-            .await?;
+        let lsn =
+            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
+                .await?;

-        let total_blocks = crate::tenant::with_ondemand_download(|| {
-            timeline.get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, lsn, req.latest)
-        })
-        .await?;
+        let total_blocks = timeline
+            .get_db_size(DEFAULTTABLESPACE_OID, req.dbnode, lsn, req.latest, ctx)
+            .await?;
        let db_size = total_blocks as i64 * BLCKSZ as i64;

        Ok(PagestreamBeMessage::DbSize(PagestreamDbSizeResponse {
@@ -590,15 +630,17 @@ impl PageServerHandler {
        }))
    }

-    #[instrument(skip(self, timeline, req), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
+    #[instrument(skip(self, timeline, req, ctx), fields(rel = %req.rel, blkno = %req.blkno, req_lsn = %req.lsn))]
    async fn handle_get_page_at_lsn_request(
        &self,
        timeline: &Timeline,
        req: &PagestreamGetPageRequest,
-    ) -> Result<PagestreamBeMessage> {
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<PagestreamBeMessage> {
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
-        let lsn = Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn)
-            .await?;
+        let lsn =
+            Self::wait_or_get_last_lsn(timeline, req.lsn, req.latest, &latest_gc_cutoff_lsn, ctx)
+                .await?;
        /*
        // Add a 1s delay to some requests. The delay helps the requests to
        // hit the race condition from github issue #1047 more easily.
@@ -608,14 +650,9 @@ impl PageServerHandler {
        }
        */

-        let page = crate::tenant::with_ondemand_download(|| {
-            // FIXME: this profiling now happens at different place than it used to. The
-            // current profiling is based on a thread-local variable, so it doesn't work
-            // across awaits
-            let _profiling_guard = profpoint_start(self.conf, ProfilingConfig::PageRequests);
-            timeline.get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest)
-        })
-        .await?;
+        let page = timeline
+            .get_rel_page_at_lsn(req.rel, req.blkno, lsn, req.latest, ctx)
+            .await?;

        Ok(PagestreamBeMessage::GetPage(PagestreamGetPageResponse {
            page,
@@ -624,7 +661,7 @@ impl PageServerHandler {

    #[instrument(skip(self, pgb))]
    async fn handle_basebackup_request(
-        &self,
+        &mut self,
        pgb: &mut PostgresBackend,
        tenant_id: TenantId,
        timeline_id: TimelineId,
@@ -633,12 +670,14 @@ impl PageServerHandler {
        full_backup: bool,
    ) -> anyhow::Result<()> {
        // check that the timeline exists
-        let timeline = get_active_timeline_with_timeout(tenant_id, timeline_id).await?;
+        let (timeline, ctx) =
+            get_active_timeline_with_timeout(tenant_id, timeline_id, &self.connection_ctx).await?;
+
        let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn();
        if let Some(lsn) = lsn {
            // Backup was requested at a particular LSN. Wait for it to arrive.
            info!("waiting for {}", lsn);
-            timeline.wait_lsn(lsn).await?;
+            timeline.wait_lsn(lsn, &ctx).await?;
            timeline
                .check_lsn_is_in_scope(lsn, &latest_gc_cutoff_lsn)
                .context("invalid basebackup lsn")?;
@@ -648,17 +687,20 @@ impl PageServerHandler {
        pgb.write_message(&BeMessage::CopyOutResponse)?;
        pgb.flush().await?;

-        /* Send a tarball of the latest layer on the timeline */
-        let mut writer = CopyDataSink {
-            pgb,
-            rt: tokio::runtime::Handle::current(),
-        };
-        tokio::task::block_in_place(|| {
-            let basebackup =
-                basebackup::Basebackup::new(&mut writer, &timeline, lsn, prev_lsn, full_backup)?;
-            tracing::Span::current().record("lsn", basebackup.lsn.to_string().as_str());
-            basebackup.send_tarball()
-        })?;
+        // Send a tarball of the latest layer on the timeline
+        {
+            let mut writer = pgb.copyout_writer();
+            basebackup::send_basebackup_tarball(
+                &mut writer,
+                &timeline,
+                lsn,
+                prev_lsn,
+                full_backup,
+                &ctx,
+            )
+            .await?;
+        }
+
        pgb.write_message(&BeMessage::CopyDone)?;
        pgb.flush().await?;
        info!("basebackup complete");
@@ -668,7 +710,7 @@ impl PageServerHandler {

    // when accessing management api supply None as an argument
    // when using to authorize tenant pass corresponding tenant id
-    fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<()> {
+    fn check_permission(&self, tenant_id: Option<TenantId>) -> anyhow::Result<()> {
        if self.auth.is_none() {
            // auth is set to Trust, nothing to check so just return ok
            return Ok(());
@@ -690,20 +732,19 @@ impl postgres_backend_async::Handler for PageServerHandler {
        &mut self,
        _pgb: &mut PostgresBackend,
        jwt_response: &[u8],
-    ) -> anyhow::Result<()> {
+    ) -> Result<(), QueryError> {
        // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
        // which requires auth to be present
        let data = self
            .auth
            .as_ref()
            .unwrap()
-            .decode(str::from_utf8(jwt_response)?)?;
+            .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?;

-        if matches!(data.claims.scope, Scope::Tenant) {
-            ensure!(
-                data.claims.tenant_id.is_some(),
+        if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {
+            return Err(QueryError::Other(anyhow::anyhow!(
                "jwt token scope is Tenant, but tenant id is missing"
-            )
+            )));
        }

        info!(
@@ -715,22 +756,33 @@ impl postgres_backend_async::Handler for PageServerHandler {
        Ok(())
    }

+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        _sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
+        Ok(())
+    }
+
    async fn process_query(
        &mut self,
        pgb: &mut PostgresBackend,
        query_string: &str,
-    ) -> anyhow::Result<()> {
-        debug!("process query {:?}", query_string);
+    ) -> Result<(), QueryError> {
+        debug!("process query {query_string:?}");

        if query_string.starts_with("pagestream ") {
            let (_, params_raw) = query_string.split_at("pagestream ".len());
            let params = params_raw.split(' ').collect::<Vec<_>>();
-            ensure!(
-                params.len() == 2,
-                "invalid param number for pagestream command"
-            );
-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
+            if params.len() != 2 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for pagestream command"
+                )));
+            }
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;

            self.check_permission(Some(tenant_id))?;

@@ -740,18 +792,24 @@ impl postgres_backend_async::Handler for PageServerHandler {
            let (_, params_raw) = query_string.split_at("basebackup ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();

-            ensure!(
-                params.len() >= 2,
-                "invalid param number for basebackup command"
-            );
+            if params.len() < 2 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for basebackup command"
+                )));
+            }

-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;

            self.check_permission(Some(tenant_id))?;

            let lsn = if params.len() == 3 {
-                Some(Lsn::from_str(params[2])?)
+                Some(
+                    Lsn::from_str(params[2])
+                        .with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
+                )
            } else {
                None
            };
@@ -766,16 +824,21 @@ impl postgres_backend_async::Handler for PageServerHandler {
            let (_, params_raw) = query_string.split_at("get_last_record_rlsn ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();

-            ensure!(
-                params.len() == 2,
-                "invalid param number for get_last_record_rlsn command"
-            );
+            if params.len() != 2 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for get_last_record_rlsn command"
+                )));
+            }

-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;

            self.check_permission(Some(tenant_id))?;
-            let timeline = get_active_timeline_with_timeout(tenant_id, timeline_id).await?;
+            let (timeline, _ctx) =
+                get_active_timeline_with_timeout(tenant_id, timeline_id, &self.connection_ctx)
+                    .await?;

            let end_of_timeline = timeline.get_last_record_rlsn();

@@ -794,22 +857,31 @@ impl postgres_backend_async::Handler for PageServerHandler {
            let (_, params_raw) = query_string.split_at("fullbackup ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();

-            ensure!(
-                params.len() >= 2,
-                "invalid param number for fullbackup command"
-            );
+            if params.len() < 2 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for fullbackup command"
+                )));
+            }

-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;

            // The caller is responsible for providing correct lsn and prev_lsn.
            let lsn = if params.len() > 2 {
-                Some(Lsn::from_str(params[2])?)
+                Some(
+                    Lsn::from_str(params[2])
+                        .with_context(|| format!("Failed to parse Lsn from {}", params[2]))?,
+                )
            } else {
                None
            };
            let prev_lsn = if params.len() > 3 {
-                Some(Lsn::from_str(params[3])?)
+                Some(
+                    Lsn::from_str(params[3])
+                        .with_context(|| format!("Failed to parse Lsn from {}", params[3]))?,
+                )
            } else {
                None
            };
@@ -834,12 +906,21 @@ impl postgres_backend_async::Handler for PageServerHandler {
            //     -c "import basebackup $TENANT $TIMELINE $START_LSN $END_LSN $PG_VERSION"
            let (_, params_raw) = query_string.split_at("import basebackup ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();
-            ensure!(params.len() == 5);
-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
-            let base_lsn = Lsn::from_str(params[2])?;
-            let end_lsn = Lsn::from_str(params[3])?;
-            let pg_version = u32::from_str(params[4])?;
+            if params.len() != 5 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for import basebackup command"
+                )));
+            }
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;
+            let base_lsn = Lsn::from_str(params[2])
+                .with_context(|| format!("Failed to parse Lsn from {}", params[2]))?;
+            let end_lsn = Lsn::from_str(params[3])
+                .with_context(|| format!("Failed to parse Lsn from {}", params[3]))?;
+            let pg_version = u32::from_str(params[4])
+                .with_context(|| format!("Failed to parse pg_version from {}", params[4]))?;

            self.check_permission(Some(tenant_id))?;

@@ -857,7 +938,10 @@ impl postgres_backend_async::Handler for PageServerHandler {
                Ok(()) => pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?,
                Err(e) => {
                    error!("error importing base backup between {base_lsn} and {end_lsn}: {e:?}");
-                    pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?
+                    pgb.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?
                }
            };
        } else if query_string.starts_with("import wal ") {
@@ -867,11 +951,19 @@ impl postgres_backend_async::Handler for PageServerHandler {
            // caller should poll the http api to check when that is done.
            let (_, params_raw) = query_string.split_at("import wal ".len());
            let params = params_raw.split_whitespace().collect::<Vec<_>>();
-            ensure!(params.len() == 4);
-            let tenant_id = TenantId::from_str(params[0])?;
-            let timeline_id = TimelineId::from_str(params[1])?;
-            let start_lsn = Lsn::from_str(params[2])?;
-            let end_lsn = Lsn::from_str(params[3])?;
+            if params.len() != 4 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for import wal command"
+                )));
+            }
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;
+            let timeline_id = TimelineId::from_str(params[1])
+                .with_context(|| format!("Failed to parse timeline id from {}", params[1]))?;
+            let start_lsn = Lsn::from_str(params[2])
+                .with_context(|| format!("Failed to parse Lsn from {}", params[2]))?;
+            let end_lsn = Lsn::from_str(params[3])
+                .with_context(|| format!("Failed to parse Lsn from {}", params[3]))?;

            self.check_permission(Some(tenant_id))?;

@@ -882,7 +974,10 @@ impl postgres_backend_async::Handler for PageServerHandler {
                Ok(()) => pgb.write_message(&BeMessage::CommandComplete(b"SELECT 1"))?,
                Err(e) => {
                    error!("error importing WAL between {start_lsn} and {end_lsn}: {e:?}");
-                    pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?
+                    pgb.write_message(&BeMessage::ErrorResponse(
+                        &e.to_string(),
+                        Some(e.pg_error_code()),
+                    ))?
                }
            };
        } else if query_string.to_ascii_lowercase().starts_with("set ") {
@@ -893,12 +988,18 @@ impl postgres_backend_async::Handler for PageServerHandler {
            // show <tenant_id>
            let (_, params_raw) = query_string.split_at("show ".len());
            let params = params_raw.split(' ').collect::<Vec<_>>();
-            ensure!(params.len() == 1, "invalid param number for config command");
-            let tenant_id = TenantId::from_str(params[0])?;
+            if params.len() != 1 {
+                return Err(QueryError::Other(anyhow::anyhow!(
+                    "invalid param number for config command"
+                )));
+            }
+            let tenant_id = TenantId::from_str(params[0])
+                .with_context(|| format!("Failed to parse tenant id from {}", params[0]))?;

            self.check_permission(Some(tenant_id))?;

-            let tenant = get_active_tenant_with_timeout(tenant_id).await?;
+            let (tenant, _ctx) =
+                get_active_tenant_with_timeout(tenant_id, &self.connection_ctx).await?;
            pgb.write_message(&BeMessage::RowDescription(&[
                RowDescriptor::int8_col(b"checkpoint_distance"),
                RowDescriptor::int8_col(b"checkpoint_timeout"),
@@ -935,7 +1036,9 @@ impl postgres_backend_async::Handler for PageServerHandler {
            ]))?
            .write_message(&BeMessage::CommandComplete(b"SELECT 1"))?;
        } else {
-            bail!("unknown command");
+            return Err(QueryError::Other(anyhow::anyhow!(
+                "unknown command {query_string}"
+            )));
        }

        Ok(())
@@ -947,12 +1050,25 @@ impl postgres_backend_async::Handler for PageServerHandler {
 /// If the tenant is Loading, waits for it to become Active, for up to 30 s. That
 /// ensures that queries don't fail immediately after pageserver startup, because
 /// all tenants are still loading.
-async fn get_active_tenant_with_timeout(tenant_id: TenantId) -> Result<Arc<Tenant>> {
-    let tenant = mgr::get_tenant(tenant_id, false).await?;
-    match tokio::time::timeout(Duration::from_secs(30), tenant.wait_to_become_active()).await {
-        Ok(wait_result) => wait_result
-            // no .context(), the error message is good enough and some tests depend on it
-            .map(move |()| tenant),
+async fn get_active_tenant_with_timeout(
+    tenant_id: TenantId,
+    parent_ctx: &RequestContext,
+) -> anyhow::Result<(Arc<Tenant>, TenantRequestContext)> {
+    let child_ctx = RequestContext::with_parent(
+        parent_ctx.task_kind(),
+        parent_ctx.download_behavior(),
+        parent_ctx,
+    );
+
+    let tenant = mgr::get_tenant(tenant_id).await?;
+    match tokio::time::timeout(
+        Duration::from_secs(30),
+        tenant.wait_to_become_active(child_ctx),
+    )
+    .await
+    {
+        Ok(Ok(ctx)) => Ok((tenant, ctx)),
+        Ok(Err(err)) => Err(err),
        Err(_) => anyhow::bail!("Timeout waiting for tenant {tenant_id} to become Active"),
    }
 }
@@ -961,37 +1077,9 @@ async fn get_active_tenant_with_timeout(tenant_id: TenantId) -> Result<Arc<Tenan
 async fn get_active_timeline_with_timeout(
    tenant_id: TenantId,
    timeline_id: TimelineId,
-) -> Result<Arc<Timeline>> {
-    get_active_tenant_with_timeout(tenant_id)
+    ctx: &RequestContext,
+) -> anyhow::Result<(Arc<Timeline>, TimelineRequestContext)> {
+    get_active_tenant_with_timeout(tenant_id, ctx)
        .await
-        .and_then(|tenant| tenant.get_timeline(timeline_id, true))
-}
-
-///
-/// A std::io::Write implementation that wraps all data written to it in CopyData
-/// messages.
-///
-struct CopyDataSink<'a> {
-    pgb: &'a mut PostgresBackend,
-    rt: tokio::runtime::Handle,
-}
-
-impl<'a> io::Write for CopyDataSink<'a> {
-    fn write(&mut self, data: &[u8]) -> io::Result<usize> {
-        // CopyData
-        // FIXME: if the input is large, we should split it into multiple messages.
-        // Not sure what the threshold should be, but the ultimate hard limit is that
-        // the length cannot exceed u32.
-        // FIXME: flush isn't really required, but makes it easier
-        // to view in wireshark
-        self.pgb.write_message(&BeMessage::CopyData(data))?;
-        self.rt.block_on(self.pgb.flush())?;
-        trace!("CopyData sent for {} bytes!", data.len());
-
-        Ok(data.len())
-    }
-    fn flush(&mut self) -> io::Result<()> {
-        // no-op
-        Ok(())
-    }
+        .and_then(|(tenant, ctx)| tenant.get_active_timeline(timeline_id, &ctx))
 }
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -6,11 +6,10 @@
 //! walingest.rs handles a few things like implicit relation creation and extension.
 //! Clarify that)
 //!
-use super::tenant::PageReconstructResult;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
-use crate::tenant::{with_ondemand_download, Timeline};
+use crate::repository::*;
+use crate::tenant::{PageReconstructError, Timeline, TimelineRequestContext};
 use crate::walrecord::NeonWalRecord;
-use crate::{repository::*, try_no_ondemand_download};
 use anyhow::Context;
 use bytes::{Buf, Bytes};
 use pageserver_api::reltag::{RelTag, SlruKind};
@@ -20,7 +19,6 @@ use postgres_ffi::{Oid, TimestampTz, TransactionId};
 use serde::{Deserialize, Serialize};
 use std::collections::{hash_map, HashMap, HashSet};
 use std::ops::Range;
-use tokio_util::sync::CancellationToken;
 use tracing::{debug, trace, warn};
 use utils::{bin_ser::BeSer, lsn::Lsn};

@@ -35,14 +33,6 @@ pub enum LsnForTimestamp {
    NoData(Lsn),
 }

-#[derive(Debug, thiserror::Error)]
-pub enum CalculateLogicalSizeError {
-    #[error("cancelled")]
-    Cancelled,
-    #[error(transparent)]
-    Other(#[from] anyhow::Error),
-}
-
 ///
 /// This impl provides all the functionality to store PostgreSQL relations, SLRUs,
 /// and other special kinds of files, in a versioned key-value store. The
@@ -92,76 +82,83 @@ impl Timeline {
    //------------------------------------------------------------------------------

    /// Look up given page version.
-    pub fn get_rel_page_at_lsn(
+    pub async fn get_rel_page_at_lsn(
        &self,
        tag: RelTag,
        blknum: BlockNumber,
        lsn: Lsn,
        latest: bool,
-    ) -> PageReconstructResult<Bytes> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
        if tag.relnode == 0 {
-            return PageReconstructResult::from(anyhow::anyhow!("invalid relnode"));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

-        let nblocks = try_no_ondemand_download!(self.get_rel_size(tag, lsn, latest));
+        let nblocks = self.get_rel_size(tag, lsn, latest, ctx).await?;
        if blknum >= nblocks {
            debug!(
                "read beyond EOF at {} blk {} at {}, size is {}: returning all-zeros page",
                tag, blknum, lsn, nblocks
            );
-            return PageReconstructResult::Success(ZERO_PAGE.clone());
+            return Ok(ZERO_PAGE.clone());
        }

        let key = rel_block_to_key(tag, blknum);
-        self.get(key, lsn)
+        self.get(key, lsn, ctx).await
    }

    // Get size of a database in blocks
-    pub fn get_db_size(
+    pub async fn get_db_size(
        &self,
        spcnode: Oid,
        dbnode: Oid,
        lsn: Lsn,
        latest: bool,
-    ) -> PageReconstructResult<usize> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<usize, PageReconstructError> {
        let mut total_blocks = 0;

-        let rels = try_no_ondemand_download!(self.list_rels(spcnode, dbnode, lsn));
+        let rels = self.list_rels(spcnode, dbnode, lsn, ctx).await?;

        for rel in rels {
-            let n_blocks = try_no_ondemand_download!(self.get_rel_size(rel, lsn, latest));
+            let n_blocks = self.get_rel_size(rel, lsn, latest, ctx).await?;
            total_blocks += n_blocks as usize;
        }
-        PageReconstructResult::Success(total_blocks)
+        Ok(total_blocks)
    }

    /// Get size of a relation file
-    pub fn get_rel_size(
+    pub async fn get_rel_size(
        &self,
        tag: RelTag,
        lsn: Lsn,
        latest: bool,
-    ) -> PageReconstructResult<BlockNumber> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<BlockNumber, PageReconstructError> {
        if tag.relnode == 0 {
-            return PageReconstructResult::from(anyhow::anyhow!("invalid relnode"));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

        if let Some(nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return PageReconstructResult::Success(nblocks);
+            return Ok(nblocks);
        }

        if (tag.forknum == FSM_FORKNUM || tag.forknum == VISIBILITYMAP_FORKNUM)
-            && !try_no_ondemand_download!(self.get_rel_exists(tag, lsn, latest))
+            && !self.get_rel_exists(tag, lsn, latest, ctx).await?
        {
            // FIXME: Postgres sometimes calls smgrcreate() to create
            // FSM, and smgrnblocks() on it immediately afterwards,
            // without extending it.  Tolerate that by claiming that
            // any non-existent FSM fork has size 0.
-            return PageReconstructResult::Success(0);
+            return Ok(0);
        }

        let key = rel_size_to_key(tag);
-        let mut buf = try_no_ondemand_download!(self.get(key, lsn));
+        let mut buf = self.get(key, lsn, ctx).await?;
        let nblocks = buf.get_u32_le();

        if latest {
@@ -174,47 +171,51 @@ impl Timeline {
            // associated with most recent value of LSN.
            self.update_cached_rel_size(tag, lsn, nblocks);
        }
-        PageReconstructResult::Success(nblocks)
+        Ok(nblocks)
    }

    /// Does relation exist?
-    pub fn get_rel_exists(
+    pub async fn get_rel_exists(
        &self,
        tag: RelTag,
        lsn: Lsn,
        _latest: bool,
-    ) -> PageReconstructResult<bool> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<bool, PageReconstructError> {
        if tag.relnode == 0 {
-            return PageReconstructResult::from(anyhow::anyhow!("invalid relnode"));
+            return Err(PageReconstructError::Other(anyhow::anyhow!(
+                "invalid relnode"
+            )));
        }

        // first try to lookup relation in cache
        if let Some(_nblocks) = self.get_cached_rel_size(&tag, lsn) {
-            return PageReconstructResult::Success(true);
+            return Ok(true);
        }
        // fetch directory listing
        let key = rel_dir_to_key(tag.spcnode, tag.dbnode);
-        let buf = try_no_ondemand_download!(self.get(key, lsn));
+        let buf = self.get(key, lsn, ctx).await?;

        match RelDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
                let exists = dir.rels.get(&(tag.relnode, tag.forknum)).is_some();
-                PageReconstructResult::Success(exists)
+                Ok(exists)
            }
-            Err(e) => PageReconstructResult::from(e),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

    /// Get a list of all existing relations in given tablespace and database.
-    pub fn list_rels(
+    pub async fn list_rels(
        &self,
        spcnode: Oid,
        dbnode: Oid,
        lsn: Lsn,
-    ) -> PageReconstructResult<HashSet<RelTag>> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<HashSet<RelTag>, PageReconstructError> {
        // fetch directory listing
        let key = rel_dir_to_key(spcnode, dbnode);
-        let buf = try_no_ondemand_download!(self.get(key, lsn));
+        let buf = self.get(key, lsn, ctx).await?;

        match RelDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
@@ -226,53 +227,56 @@ impl Timeline {
                        forknum: *forknum,
                    }));

-                PageReconstructResult::Success(rels)
+                Ok(rels)
            }
-            Err(e) => PageReconstructResult::from(e),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

    /// Look up given SLRU page version.
-    pub fn get_slru_page_at_lsn(
+    pub async fn get_slru_page_at_lsn(
        &self,
        kind: SlruKind,
        segno: u32,
        blknum: BlockNumber,
        lsn: Lsn,
-    ) -> PageReconstructResult<Bytes> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
        let key = slru_block_to_key(kind, segno, blknum);
-        self.get(key, lsn)
+        self.get(key, lsn, ctx).await
    }

    /// Get size of an SLRU segment
-    pub fn get_slru_segment_size(
+    pub async fn get_slru_segment_size(
        &self,
        kind: SlruKind,
        segno: u32,
        lsn: Lsn,
-    ) -> PageReconstructResult<BlockNumber> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<BlockNumber, PageReconstructError> {
        let key = slru_segment_size_to_key(kind, segno);
-        let mut buf = try_no_ondemand_download!(self.get(key, lsn));
-        PageReconstructResult::Success(buf.get_u32_le())
+        let mut buf = self.get(key, lsn, ctx).await?;
+        Ok(buf.get_u32_le())
    }

    /// Get size of an SLRU segment
-    pub fn get_slru_segment_exists(
+    pub async fn get_slru_segment_exists(
        &self,
        kind: SlruKind,
        segno: u32,
        lsn: Lsn,
-    ) -> PageReconstructResult<bool> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<bool, PageReconstructError> {
        // fetch directory listing
        let key = slru_dir_to_key(kind);
-        let buf = try_no_ondemand_download!(self.get(key, lsn));
+        let buf = self.get(key, lsn, ctx).await?;

        match SlruSegmentDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
                let exists = dir.segments.get(&segno).is_some();
-                PageReconstructResult::Success(exists)
+                Ok(exists)
            }
-            Err(e) => PageReconstructResult::from(e),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

@@ -283,10 +287,11 @@ impl Timeline {
    /// so it's not well defined which LSN you get if there were multiple commits
    /// "in flight" at that point in time.
    ///
-    pub fn find_lsn_for_timestamp(
+    pub async fn find_lsn_for_timestamp(
        &self,
        search_timestamp: TimestampTz,
-    ) -> PageReconstructResult<LsnForTimestamp> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<LsnForTimestamp, PageReconstructError> {
        let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
        let min_lsn = *gc_cutoff_lsn_guard;
        let max_lsn = self.get_last_record_lsn();
@@ -302,12 +307,15 @@ impl Timeline {
            // cannot overflow, high and low are both smaller than u64::MAX / 2
            let mid = (high + low) / 2;

-            let cmp = try_no_ondemand_download!(self.is_latest_commit_timestamp_ge_than(
-                search_timestamp,
-                Lsn(mid * 8),
-                &mut found_smaller,
-                &mut found_larger,
-            ));
+            let cmp = self
+                .is_latest_commit_timestamp_ge_than(
+                    search_timestamp,
+                    Lsn(mid * 8),
+                    &mut found_smaller,
+                    &mut found_larger,
+                    ctx,
+                )
+                .await?;

            if cmp {
                high = mid;
@@ -319,15 +327,15 @@ impl Timeline {
            (false, false) => {
                // This can happen if no commit records have been processed yet, e.g.
                // just after importing a cluster.
-                PageReconstructResult::Success(LsnForTimestamp::NoData(max_lsn))
+                Ok(LsnForTimestamp::NoData(max_lsn))
            }
            (true, false) => {
                // Didn't find any commit timestamps larger than the request
-                PageReconstructResult::Success(LsnForTimestamp::Future(max_lsn))
+                Ok(LsnForTimestamp::Future(max_lsn))
            }
            (false, true) => {
                // Didn't find any commit timestamps smaller than the request
-                PageReconstructResult::Success(LsnForTimestamp::Past(max_lsn))
+                Ok(LsnForTimestamp::Past(max_lsn))
            }
            (true, true) => {
                // low is the LSN of the first commit record *after* the search_timestamp,
@@ -337,7 +345,7 @@ impl Timeline {
                // Otherwise, if you restore to the returned LSN, the database will
                // include physical changes from later commits that will be marked
                // as aborted, and will need to be vacuumed away.
-                PageReconstructResult::Success(LsnForTimestamp::Present(Lsn((low - 1) * 8)))
+                Ok(LsnForTimestamp::Present(Lsn((low - 1) * 8)))
            }
        }
    }
@@ -349,26 +357,25 @@ impl Timeline {
    /// Additionally, sets 'found_smaller'/'found_Larger, if encounters any commits
    /// with a smaller/larger timestamp.
    ///
-    pub fn is_latest_commit_timestamp_ge_than(
+    pub async fn is_latest_commit_timestamp_ge_than(
        &self,
        search_timestamp: TimestampTz,
        probe_lsn: Lsn,
        found_smaller: &mut bool,
        found_larger: &mut bool,
-    ) -> PageReconstructResult<bool> {
-        for segno in try_no_ondemand_download!(self.list_slru_segments(SlruKind::Clog, probe_lsn)) {
-            let nblocks = try_no_ondemand_download!(self.get_slru_segment_size(
-                SlruKind::Clog,
-                segno,
-                probe_lsn
-            ));
+        ctx: &TimelineRequestContext,
+    ) -> Result<bool, PageReconstructError> {
+        for segno in self
+            .list_slru_segments(SlruKind::Clog, probe_lsn, ctx)
+            .await?
+        {
+            let nblocks = self
+                .get_slru_segment_size(SlruKind::Clog, segno, probe_lsn, ctx)
+                .await?;
            for blknum in (0..nblocks).rev() {
-                let clog_page = try_no_ondemand_download!(self.get_slru_page_at_lsn(
-                    SlruKind::Clog,
-                    segno,
-                    blknum,
-                    probe_lsn
-                ));
+                let clog_page = self
+                    .get_slru_page_at_lsn(SlruKind::Clog, segno, blknum, probe_lsn, ctx)
+                    .await?;

                if clog_page.len() == BLCKSZ as usize + 8 {
                    let mut timestamp_bytes = [0u8; 8];
@@ -377,76 +384,99 @@ impl Timeline {

                    if timestamp >= search_timestamp {
                        *found_larger = true;
-                        return PageReconstructResult::Success(true);
+                        return Ok(true);
                    } else {
                        *found_smaller = true;
                    }
                }
            }
        }
-        PageReconstructResult::Success(false)
+        Ok(false)
    }

    /// Get a list of SLRU segments
-    pub fn list_slru_segments(
+    pub async fn list_slru_segments(
        &self,
        kind: SlruKind,
        lsn: Lsn,
-    ) -> PageReconstructResult<HashSet<u32>> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<HashSet<u32>, PageReconstructError> {
        // fetch directory entry
        let key = slru_dir_to_key(kind);

-        let buf = try_no_ondemand_download!(self.get(key, lsn));
+        let buf = self.get(key, lsn, ctx).await?;
        match SlruSegmentDirectory::des(&buf).context("deserialization failure") {
-            Ok(dir) => PageReconstructResult::Success(dir.segments),
-            Err(e) => PageReconstructResult::from(e),
+            Ok(dir) => Ok(dir.segments),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

-    pub fn get_relmap_file(
+    pub async fn get_relmap_file(
        &self,
        spcnode: Oid,
        dbnode: Oid,
        lsn: Lsn,
-    ) -> PageReconstructResult<Bytes> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
        let key = relmap_file_key(spcnode, dbnode);

-        let buf = try_no_ondemand_download!(self.get(key, lsn));
-        PageReconstructResult::Success(buf)
+        let buf = self.get(key, lsn, ctx).await?;
+        Ok(buf)
    }

-    pub fn list_dbdirs(&self, lsn: Lsn) -> PageReconstructResult<HashMap<(Oid, Oid), bool>> {
+    pub async fn list_dbdirs(
+        &self,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> Result<HashMap<(Oid, Oid), bool>, PageReconstructError> {
        // fetch directory entry
-        let buf = try_no_ondemand_download!(self.get(DBDIR_KEY, lsn));
+        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;

        match DbDirectory::des(&buf).context("deserialization failure") {
-            Ok(dir) => PageReconstructResult::Success(dir.dbdirs),
-            Err(e) => PageReconstructResult::from(e),
+            Ok(dir) => Ok(dir.dbdirs),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

-    pub fn get_twophase_file(&self, xid: TransactionId, lsn: Lsn) -> PageReconstructResult<Bytes> {
+    pub async fn get_twophase_file(
+        &self,
+        xid: TransactionId,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
        let key = twophase_file_key(xid);
-        let buf = try_no_ondemand_download!(self.get(key, lsn));
-        PageReconstructResult::Success(buf)
+        let buf = self.get(key, lsn, ctx).await?;
+        Ok(buf)
    }

-    pub fn list_twophase_files(&self, lsn: Lsn) -> PageReconstructResult<HashSet<TransactionId>> {
+    pub async fn list_twophase_files(
+        &self,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> Result<HashSet<TransactionId>, PageReconstructError> {
        // fetch directory entry
-        let buf = try_no_ondemand_download!(self.get(TWOPHASEDIR_KEY, lsn));
+        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;

        match TwoPhaseDirectory::des(&buf).context("deserialization failure") {
-            Ok(dir) => PageReconstructResult::Success(dir.xids),
-            Err(e) => PageReconstructResult::from(e),
+            Ok(dir) => Ok(dir.xids),
+            Err(e) => Err(PageReconstructError::from(e)),
        }
    }

-    pub fn get_control_file(&self, lsn: Lsn) -> PageReconstructResult<Bytes> {
-        self.get(CONTROLFILE_KEY, lsn)
+    pub async fn get_control_file(
+        &self,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
+        self.get(CONTROLFILE_KEY, lsn, ctx).await
    }

-    pub fn get_checkpoint(&self, lsn: Lsn) -> PageReconstructResult<Bytes> {
-        self.get(CHECKPOINT_KEY, lsn)
+    pub async fn get_checkpoint(
+        &self,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
+        self.get(CHECKPOINT_KEY, lsn, ctx).await
    }

    /// Does the same as get_current_logical_size but counted on demand.
@@ -457,23 +487,20 @@ impl Timeline {
    pub async fn get_current_logical_size_non_incremental(
        &self,
        lsn: Lsn,
-        cancel: CancellationToken,
-    ) -> Result<u64, CalculateLogicalSizeError> {
+        ctx: &TimelineRequestContext,
+    ) -> Result<u64, PageReconstructError> {
        // Fetch list of database dirs and iterate them
-        let buf = self.get_download(DBDIR_KEY, lsn).await?;
+        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
        let dbdir = DbDirectory::des(&buf).context("deserialize db directory")?;

        let mut total_size: u64 = 0;
        for (spcnode, dbnode) in dbdir.dbdirs.keys() {
-            for rel in
-                crate::tenant::with_ondemand_download(|| self.list_rels(*spcnode, *dbnode, lsn))
-                    .await?
-            {
-                if cancel.is_cancelled() {
-                    return Err(CalculateLogicalSizeError::Cancelled);
+            for rel in self.list_rels(*spcnode, *dbnode, lsn, ctx).await? {
+                if ctx.is_cancelled() {
+                    return Err(PageReconstructError::Cancelled);
                }
                let relsize_key = rel_size_to_key(rel);
-                let mut buf = self.get_download(relsize_key, lsn).await?;
+                let mut buf = self.get(relsize_key, lsn, ctx).await?;
                let relsize = buf.get_u32_le();

                total_size += relsize as u64;
@@ -486,7 +513,11 @@ impl Timeline {
    /// Get a KeySpace that covers all the Keys that are in use at the given LSN.
    /// Anything that's not listed maybe removed from the underlying storage (from
    /// that LSN forwards).
-    pub async fn collect_keyspace(&self, lsn: Lsn) -> anyhow::Result<KeySpace> {
+    pub async fn collect_keyspace(
+        &self,
+        lsn: Lsn,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<KeySpace> {
        // Iterate through key ranges, greedily packing them into partitions
        let mut result = KeySpaceAccum::new();

@@ -494,7 +525,7 @@ impl Timeline {
        result.add_key(DBDIR_KEY);

        // Fetch list of database dirs and iterate them
-        let buf = self.get_download(DBDIR_KEY, lsn).await?;
+        let buf = self.get(DBDIR_KEY, lsn, ctx).await?;
        let dbdir = DbDirectory::des(&buf).context("deserialization failure")?;

        let mut dbs: Vec<(Oid, Oid)> = dbdir.dbdirs.keys().cloned().collect();
@@ -503,15 +534,16 @@ impl Timeline {
            result.add_key(relmap_file_key(spcnode, dbnode));
            result.add_key(rel_dir_to_key(spcnode, dbnode));

-            let mut rels: Vec<RelTag> =
-                with_ondemand_download(|| self.list_rels(spcnode, dbnode, lsn))
-                    .await?
-                    .into_iter()
-                    .collect();
+            let mut rels: Vec<RelTag> = self
+                .list_rels(spcnode, dbnode, lsn, ctx)
+                .await?
+                .iter()
+                .cloned()
+                .collect();
            rels.sort_unstable();
            for rel in rels {
                let relsize_key = rel_size_to_key(rel);
-                let mut buf = self.get_download(relsize_key, lsn).await?;
+                let mut buf = self.get(relsize_key, lsn, ctx).await?;
                let relsize = buf.get_u32_le();

                result.add_range(rel_block_to_key(rel, 0)..rel_block_to_key(rel, relsize));
@@ -527,13 +559,13 @@ impl Timeline {
        ] {
            let slrudir_key = slru_dir_to_key(kind);
            result.add_key(slrudir_key);
-            let buf = self.get_download(slrudir_key, lsn).await?;
+            let buf = self.get(slrudir_key, lsn, ctx).await?;
            let dir = SlruSegmentDirectory::des(&buf).context("deserialization failure")?;
            let mut segments: Vec<u32> = dir.segments.iter().cloned().collect();
            segments.sort_unstable();
            for segno in segments {
                let segsize_key = slru_segment_size_to_key(kind, segno);
-                let mut buf = self.get_download(segsize_key, lsn).await?;
+                let mut buf = self.get(segsize_key, lsn, ctx).await?;
                let segsize = buf.get_u32_le();

                result.add_range(
@@ -545,7 +577,7 @@ impl Timeline {

        // Then pg_twophase
        result.add_key(TWOPHASEDIR_KEY);
-        let buf = self.get_download(TWOPHASEDIR_KEY, lsn).await?;
+        let buf = self.get(TWOPHASEDIR_KEY, lsn, ctx).await?;
        let twophase_dir = TwoPhaseDirectory::des(&buf).context("deserialization failure")?;
        let mut xids: Vec<TransactionId> = twophase_dir.xids.iter().cloned().collect();
        xids.sort_unstable();
@@ -703,9 +735,15 @@ impl<'a> DatadirModification<'a> {
    }

    /// Store a relmapper file (pg_filenode.map) in the repository
-    pub fn put_relmap_file(&mut self, spcnode: Oid, dbnode: Oid, img: Bytes) -> anyhow::Result<()> {
+    pub async fn put_relmap_file(
+        &mut self,
+        spcnode: Oid,
+        dbnode: Oid,
+        img: Bytes,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        // Add it to the directory (if it doesn't exist already)
-        let buf = self.get(DBDIR_KEY).no_ondemand_download()?;
+        let buf = self.get(DBDIR_KEY, ctx).await?;
        let mut dbdir = DbDirectory::des(&buf)?;

        let r = dbdir.dbdirs.insert((spcnode, dbnode), true);
@@ -731,9 +769,14 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub fn put_twophase_file(&mut self, xid: TransactionId, img: Bytes) -> anyhow::Result<()> {
+    pub async fn put_twophase_file(
+        &mut self,
+        xid: TransactionId,
+        img: Bytes,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        // Add it to the directory entry
-        let buf = self.get(TWOPHASEDIR_KEY).no_ondemand_download()?;
+        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
        let mut dir = TwoPhaseDirectory::des(&buf)?;
        if !dir.xids.insert(xid) {
            anyhow::bail!("twophase file for xid {} already exists", xid);
@@ -757,16 +800,21 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub fn drop_dbdir(&mut self, spcnode: Oid, dbnode: Oid) -> anyhow::Result<()> {
+    pub async fn drop_dbdir(
+        &mut self,
+        spcnode: Oid,
+        dbnode: Oid,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        let req_lsn = self.tline.get_last_record_lsn();

        let total_blocks = self
            .tline
-            .get_db_size(spcnode, dbnode, req_lsn, true)
-            .no_ondemand_download()?;
+            .get_db_size(spcnode, dbnode, req_lsn, true, ctx)
+            .await?;

        // Remove entry from dbdir
-        let buf = self.get(DBDIR_KEY).no_ondemand_download()?;
+        let buf = self.get(DBDIR_KEY, ctx).await?;
        let mut dir = DbDirectory::des(&buf)?;
        if dir.dbdirs.remove(&(spcnode, dbnode)).is_some() {
            let buf = DbDirectory::ser(&dir)?;
@@ -789,11 +837,16 @@ impl<'a> DatadirModification<'a> {
    /// Create a relation fork.
    ///
    /// 'nblocks' is the initial size.
-    pub fn put_rel_creation(&mut self, rel: RelTag, nblocks: BlockNumber) -> anyhow::Result<()> {
+    pub async fn put_rel_creation(
+        &mut self,
+        rel: RelTag,
+        nblocks: BlockNumber,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        // It's possible that this is the first rel for this db in this
        // tablespace.  Create the reldir entry for it if so.
-        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY).no_ondemand_download()?)?;
+        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await?)?;
        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
        let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
            // Didn't exist. Update dbdir
@@ -805,7 +858,7 @@ impl<'a> DatadirModification<'a> {
            RelDirectory::default()
        } else {
            // reldir already exists, fetch it
-            RelDirectory::des(&self.get(rel_dir_key).no_ondemand_download()?)?
+            RelDirectory::des(&self.get(rel_dir_key, ctx).await?)?
        };

        // Add the new relation to the rel directory entry, and write it back
@@ -833,17 +886,18 @@ impl<'a> DatadirModification<'a> {
    }

    /// Truncate relation
-    pub fn put_rel_truncation(&mut self, rel: RelTag, nblocks: BlockNumber) -> anyhow::Result<()> {
+    pub async fn put_rel_truncation(
+        &mut self,
+        rel: RelTag,
+        nblocks: BlockNumber,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, "invalid relnode");
        let last_lsn = self.tline.get_last_record_lsn();
-        if self
-            .tline
-            .get_rel_exists(rel, last_lsn, true)
-            .no_ondemand_download()?
-        {
+        if self.tline.get_rel_exists(rel, last_lsn, true, ctx).await? {
            let size_key = rel_size_to_key(rel);
            // Fetch the old size first
-            let old_size = self.get(size_key).no_ondemand_download()?.get_u32_le();
+            let old_size = self.get(size_key, ctx).await?.get_u32_le();

            // Update the entry with the new size.
            let buf = nblocks.to_le_bytes();
@@ -863,12 +917,17 @@ impl<'a> DatadirModification<'a> {

    /// Extend relation
    /// If new size is smaller, do nothing.
-    pub fn put_rel_extend(&mut self, rel: RelTag, nblocks: BlockNumber) -> anyhow::Result<()> {
+    pub async fn put_rel_extend(
+        &mut self,
+        rel: RelTag,
+        nblocks: BlockNumber,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, "invalid relnode");

        // Put size
        let size_key = rel_size_to_key(rel);
-        let old_size = self.get(size_key).no_ondemand_download()?.get_u32_le();
+        let old_size = self.get(size_key, ctx).await?.get_u32_le();

        // only extend relation here. never decrease the size
        if nblocks > old_size {
@@ -884,12 +943,16 @@ impl<'a> DatadirModification<'a> {
    }

    /// Drop a relation.
-    pub fn put_rel_drop(&mut self, rel: RelTag) -> anyhow::Result<()> {
+    pub async fn put_rel_drop(
+        &mut self,
+        rel: RelTag,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        anyhow::ensure!(rel.relnode != 0, "invalid relnode");

        // Remove it from the directory entry
        let dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
-        let buf = self.get(dir_key).no_ondemand_download()?;
+        let buf = self.get(dir_key, ctx).await?;
        let mut dir = RelDirectory::des(&buf)?;

        if dir.rels.remove(&(rel.relnode, rel.forknum)) {
@@ -900,7 +963,7 @@ impl<'a> DatadirModification<'a> {

        // update logical size
        let size_key = rel_size_to_key(rel);
-        let old_size = self.get(size_key).no_ondemand_download()?.get_u32_le();
+        let old_size = self.get(size_key, ctx).await?.get_u32_le();
        self.pending_nblocks -= old_size as i64;

        // Remove enty from relation size cache
@@ -912,15 +975,16 @@ impl<'a> DatadirModification<'a> {
        Ok(())
    }

-    pub fn put_slru_segment_creation(
+    pub async fn put_slru_segment_creation(
        &mut self,
        kind: SlruKind,
        segno: u32,
        nblocks: BlockNumber,
+        ctx: &TimelineRequestContext,
    ) -> anyhow::Result<()> {
        // Add it to the directory entry
        let dir_key = slru_dir_to_key(kind);
-        let buf = self.get(dir_key).no_ondemand_download()?;
+        let buf = self.get(dir_key, ctx).await?;
        let mut dir = SlruSegmentDirectory::des(&buf)?;

        if !dir.segments.insert(segno) {
@@ -956,10 +1020,15 @@ impl<'a> DatadirModification<'a> {
    }

    /// This method is used for marking truncated SLRU files
-    pub fn drop_slru_segment(&mut self, kind: SlruKind, segno: u32) -> anyhow::Result<()> {
+    pub async fn drop_slru_segment(
+        &mut self,
+        kind: SlruKind,
+        segno: u32,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        // Remove it from the directory entry
        let dir_key = slru_dir_to_key(kind);
-        let buf = self.get(dir_key).no_ondemand_download()?;
+        let buf = self.get(dir_key, ctx).await?;
        let mut dir = SlruSegmentDirectory::des(&buf)?;

        if !dir.segments.remove(&segno) {
@@ -983,9 +1052,13 @@ impl<'a> DatadirModification<'a> {
    }

    /// This method is used for marking truncated SLRU files
-    pub fn drop_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
+    pub async fn drop_twophase_file(
+        &mut self,
+        xid: TransactionId,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        // Remove it from the directory entry
-        let buf = self.get(TWOPHASEDIR_KEY).no_ondemand_download()?;
+        let buf = self.get(TWOPHASEDIR_KEY, ctx).await?;
        let mut dir = TwoPhaseDirectory::des(&buf)?;

        if !dir.xids.remove(&xid) {
@@ -1079,7 +1152,11 @@ impl<'a> DatadirModification<'a> {

    // Internal helper functions to batch the modifications

-    fn get(&self, key: Key) -> PageReconstructResult<Bytes> {
+    async fn get(
+        &self,
+        key: Key,
+        ctx: &TimelineRequestContext,
+    ) -> Result<Bytes, PageReconstructError> {
        // Have we already updated the same key? Read the pending updated
        // version in that case.
        //
@@ -1087,18 +1164,20 @@ impl<'a> DatadirModification<'a> {
        // value that has been removed, deletion only avoids leaking storage.
        if let Some(value) = self.pending_updates.get(&key) {
            if let Value::Image(img) = value {
-                PageReconstructResult::Success(img.clone())
+                Ok(img.clone())
            } else {
                // Currently, we never need to read back a WAL record that we
                // inserted in the same "transaction". All the metadata updates
                // work directly with Images, and we never need to read actual
                // data pages. We could handle this if we had to, by calling
                // the walredo manager, but let's keep it simple for now.
-                PageReconstructResult::from(anyhow::anyhow!("unexpected pending WAL record"))
+                Err(PageReconstructError::from(anyhow::anyhow!(
+                    "unexpected pending WAL record"
+                )))
            }
        } else {
            let lsn = Lsn::max(self.tline.get_last_record_lsn(), self.lsn);
-            self.tline.get(key, lsn)
+            self.tline.get(key, lsn, ctx).await
        }
    }

@@ -1505,17 +1584,18 @@ fn is_slru_block_key(key: Key) -> bool {

 #[cfg(test)]
 pub fn create_test_timeline(
-    tenant: &crate::tenant::Tenant,
+    tenant: &std::sync::Arc<crate::tenant::Tenant>,
    timeline_id: utils::id::TimelineId,
    pg_version: u32,
-) -> anyhow::Result<std::sync::Arc<Timeline>> {
-    let tline = tenant
-        .create_empty_timeline(timeline_id, Lsn(8), pg_version)?
-        .initialize()?;
+    tenant_ctx: &crate::tenant::TenantRequestContext,
+) -> anyhow::Result<(std::sync::Arc<Timeline>, TimelineRequestContext)> {
+    let (tline, timeline_ctx) =
+        tenant.create_empty_timeline(timeline_id, Lsn(8), pg_version, tenant_ctx)?;
+    let tline = tline.initialize(&timeline_ctx)?;
    let mut m = tline.begin_modification(Lsn(8));
    m.init_empty()?;
    m.commit()?;
-    Ok(tline)
+    Ok((tline, timeline_ctx))
 }

 #[allow(clippy::bool_assert_comparison)]
--- a/pageserver/src/profiling.rs
+++ b/pageserver/src/profiling.rs
@@ -1,107 +0,0 @@
-//!
-//! Support for profiling
-//!
-//! This relies on a modified version of the 'pprof-rs' crate. That's not very
-//! nice, so to avoid a hard dependency on that, this is an optional feature.
-//!
-use crate::config::{PageServerConf, ProfilingConfig};
-
-/// The actual implementation is in the `profiling_impl` submodule. If the profiling
-/// feature is not enabled, it's just a dummy implementation that panics if you
-/// try to enabled profiling in the configuration.
-pub use profiling_impl::*;
-
-#[cfg(feature = "profiling")]
-mod profiling_impl {
-    use super::*;
-    use pprof;
-    use std::marker::PhantomData;
-
-    /// Start profiling the current thread. Returns a guard object;
-    /// the profiling continues until the guard is dropped.
-    ///
-    /// Note: profiling is not re-entrant. If you call 'profpoint_start' while
-    /// profiling is already started, nothing happens, and the profiling will be
-    /// stopped when either guard object is dropped.
-    #[inline]
-    pub fn profpoint_start(
-        conf: &crate::config::PageServerConf,
-        point: ProfilingConfig,
-    ) -> Option<ProfilingGuard> {
-        if conf.profiling == point {
-            pprof::start_profiling();
-            Some(ProfilingGuard(PhantomData))
-        } else {
-            None
-        }
-    }
-
-    /// A hack to remove Send and Sync from the ProfilingGuard. Because the
-    /// profiling is attached to current thread.
-    ////
-    /// See comments in https://github.com/rust-lang/rust/issues/68318
-    type PhantomUnsend = std::marker::PhantomData<*mut u8>;
-
-    pub struct ProfilingGuard(PhantomUnsend);
-
-    impl Drop for ProfilingGuard {
-        fn drop(&mut self) {
-            pprof::stop_profiling();
-        }
-    }
-
-    /// Initialize the profiler. This must be called before any 'profpoint_start' calls.
-    pub fn init_profiler(conf: &PageServerConf) -> Option<pprof::ProfilerGuard> {
-        if conf.profiling != ProfilingConfig::Disabled {
-            Some(pprof::ProfilerGuardBuilder::default().build().unwrap())
-        } else {
-            None
-        }
-    }
-
-    /// Exit the profiler. Writes the flamegraph to current workdir.
-    pub fn exit_profiler(_conf: &PageServerConf, profiler_guard: &Option<pprof::ProfilerGuard>) {
-        // Write out the flamegraph
-        if let Some(profiler_guard) = profiler_guard {
-            if let Ok(report) = profiler_guard.report().build() {
-                // this gets written under the workdir
-                let file = std::fs::File::create("flamegraph.svg").unwrap();
-                let mut options = pprof::flamegraph::Options::default();
-                options.image_width = Some(2500);
-                report.flamegraph_with_options(file, &mut options).unwrap();
-            }
-        }
-    }
-}
-
-/// Dummy implementation when compiling without profiling feature or for non-linux OSes.
-#[cfg(not(feature = "profiling"))]
-mod profiling_impl {
-    use super::*;
-
-    pub struct DummyProfilerGuard;
-
-    impl Drop for DummyProfilerGuard {
-        fn drop(&mut self) {
-            // do nothing, this exists to calm Clippy down
-        }
-    }
-
-    pub fn profpoint_start(
-        _conf: &PageServerConf,
-        _point: ProfilingConfig,
-    ) -> Option<DummyProfilerGuard> {
-        None
-    }
-
-    pub fn init_profiler(conf: &PageServerConf) -> Option<DummyProfilerGuard> {
-        if conf.profiling != ProfilingConfig::Disabled {
-            // shouldn't happen, we don't allow profiling in the config if the support
-            // for it is disabled.
-            panic!("profiling enabled but the binary was compiled without profiling support");
-        }
-        None
-    }
-
-    pub fn exit_profiler(_conf: &PageServerConf, _guard: &Option<DummyProfilerGuard>) {}
-}
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -1,59 +1,21 @@
 //!
-//! This module provides centralized handling of tokio tasks in the Page Server.
+//! This module provides some helpers for spawning tokio tasks in the pageserver.
 //!
-//! We provide a few basic facilities:
-//! - A global registry of tasks that lists what kind of tasks they are, and
-//!   which tenant or timeline they are working on
-//!
-//! - The ability to request a task to shut down.
-//!
-//!
-//! # How it works?
-//!
-//! There is a global hashmap of all the tasks (`TASKS`). Whenever a new
-//! task is spawned, a PageServerTask entry is added there, and when a
-//! task dies, it removes itself from the hashmap. If you want to kill a
-//! task, you can scan the hashmap to find it.
-//!
-//! # Task shutdown
-//!
-//! To kill a task, we rely on co-operation from the victim. Each task is
-//! expected to periodically call the `is_shutdown_requested()` function, and
-//! if it returns true, exit gracefully. In addition to that, when waiting for
-//! the network or other long-running operation, you can use
-//! `shutdown_watcher()` function to get a Future that will become ready if
-//! the current task has been requested to shut down. You can use that with
-//! Tokio select!().
-//!
-//! TODO: This would be a good place to also handle panics in a somewhat sane way.
-//! Depending on what task panics, we might want to kill the whole server, or
-//! only a single tenant or timeline.
+//! Mostly just a wrapper around tokio::spawn, with some code to handle panics.
 //!

-// Clippy 1.60 incorrectly complains about the tokio::task_local!() macro.
-// Silence it. See https://github.com/rust-lang/rust-clippy/issues/9224.
-#![allow(clippy::declare_interior_mutable_const)]
-
-use std::collections::HashMap;
-use std::fmt;
 use std::future::Future;
-use std::panic::AssertUnwindSafe;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::{Arc, Mutex};
+use std::panic::{resume_unwind, AssertUnwindSafe};

 use futures::FutureExt;
 use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
-use tokio::task_local;
-use tokio_util::sync::CancellationToken;

-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, info};

 use once_cell::sync::Lazy;

-use utils::id::{TenantId, TimelineId};
-
-use crate::shutdown_pageserver;
+use crate::context::{self, TaskKind};

 //
 // There are four runtimes:
@@ -92,10 +54,6 @@ use crate::shutdown_pageserver;
 // runtime. If a GetPage request comes in before the load of a tenant has finished, the
 // GetPage request will wait for the tenant load to finish.
 //
-// The core Timeline code is synchronous, and uses a bunch of std Mutexes and RWLocks to
-// protect data structures. Let's keep it that way. Synchronous code is easier to debug
-// and analyze, and there's a lot of hairy, low-level, performance critical code there.
-//
 // It's nice to have different runtimes, so that you can quickly eyeball how much CPU
 // time each class of operations is taking, with 'top -H' or similar.
 //
@@ -135,355 +93,81 @@ pub static BACKGROUND_RUNTIME: Lazy<Runtime> = Lazy::new(|| {
        .expect("Failed to create background op runtime")
 });

-#[derive(Debug, Clone, Copy)]
-pub struct PageserverTaskId(u64);
-
-impl fmt::Display for PageserverTaskId {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        self.0.fmt(f)
-    }
-}
-
-/// Each task that we track is associated with a "task ID". It's just an
-/// increasing number that we assign. Note that it is different from tokio::task::Id.
-static NEXT_TASK_ID: AtomicU64 = AtomicU64::new(1);
-
-/// Global registry of tasks
-static TASKS: Lazy<Mutex<HashMap<u64, Arc<PageServerTask>>>> =
-    Lazy::new(|| Mutex::new(HashMap::new()));
-
-task_local! {
-    // This is a cancellation token which will be cancelled when a task needs to shut down. The
-    // root token is kept in the global registry, so that anyone can send the signal to request
-    // task shutdown.
-    static SHUTDOWN_TOKEN: CancellationToken;
-
-    // Each task holds reference to its own PageServerTask here.
-    static CURRENT_TASK: Arc<PageServerTask>;
-}
-
-///
-/// There are many kinds of tasks in the system. Some are associated with a particular
-/// tenant or timeline, while others are global.
-///
-/// Note that we don't try to limit how many task of a certain kind can be running
-/// at the same time.
-///
-#[derive(Debug, PartialEq, Eq, Clone, Copy)]
-pub enum TaskKind {
-    // libpq listener task. It just accepts connection and spawns a
-    // PageRequestHandler task for each connection.
-    LibpqEndpointListener,
-
-    // HTTP endpoint listener.
-    HttpEndpointListener,
-
-    // Task that handles a single connection. A PageRequestHandler task
-    // starts detached from any particular tenant or timeline, but it can be
-    // associated with one later, after receiving a command from the client.
-    PageRequestHandler,
-
-    // Manages the WAL receiver connection for one timeline. It subscribes to
-    // events from storage_broker, decides which safekeeper to connect to. It spawns a
-    // separate WalReceiverConnection task to handle each connection.
-    WalReceiverManager,
-
-    // Handles a connection to a safekeeper, to stream WAL to a timeline.
-    WalReceiverConnection,
-
-    // Garbage collection worker. One per tenant
-    GarbageCollector,
-
-    // Compaction. One per tenant.
-    Compaction,
-
-    // Initial logical size calculation
-    InitialLogicalSizeCalculation,
-
-    // Task that flushes frozen in-memory layers to disk
-    LayerFlushTask,
-
-    // Task that uploads a file to remote storage
-    RemoteUploadTask,
-
-    // Task that downloads a file from remote storage
-    RemoteDownloadTask,
-
-    // task that handles the initial downloading of all tenants
-    InitialLoad,
-
-    // task that handles attaching a tenant
-    Attach,
-
-    // task that handhes metrics collection
-    MetricsCollection,
-
-    // task that drives downloading layers
-    DownloadAllRemoteLayers,
-}
-
-#[derive(Default)]
-struct MutableTaskState {
-    /// Tenant and timeline that this task is associated with.
-    tenant_id: Option<TenantId>,
-    timeline_id: Option<TimelineId>,
-
-    /// Handle for waiting for the task to exit. It can be None, if the
-    /// the task has already exited.
-    join_handle: Option<JoinHandle<()>>,
-}
-
-struct PageServerTask {
-    #[allow(dead_code)] // unused currently
-    task_id: PageserverTaskId,
-
-    kind: TaskKind,
-
-    name: String,
-
-    // To request task shutdown, just cancel this token.
-    cancel: CancellationToken,
-
-    mutable: Mutex<MutableTaskState>,
-}
-
 /// Launch a new task
-/// Note: if shutdown_process_on_error is set to true failure
-///   of the task will lead to shutdown of entire process
+///
+/// This is a wrapper around tokio::spawn. One difference is that the Future
+/// is marked to return nothing to avoid silently swallowing errors. This
+/// forces the future to handle errors by itself. If you need the return
+/// value, you could create another function that passes it through, but we
+/// don't have a need for that currently.
+///
+/// If shutdown_process_on_panic is set to true, panic of the task will lead
+/// to shutdown of entire process. Otherwise we log the panic and continue.
 pub fn spawn<F>(
    runtime: &tokio::runtime::Handle,
-    kind: TaskKind,
-    tenant_id: Option<TenantId>,
-    timeline_id: Option<TimelineId>,
    name: &str,
-    shutdown_process_on_error: bool,
+    shutdown_process_on_panic: bool,
    future: F,
-) -> PageserverTaskId
+) -> JoinHandle<F::Output>
 where
-    F: Future<Output = anyhow::Result<()>> + Send + 'static,
+    F: Future<Output = ()> + Send + 'static,
 {
-    let cancel = CancellationToken::new();
-    let task_id = NEXT_TASK_ID.fetch_add(1, Ordering::Relaxed);
-    let task = Arc::new(PageServerTask {
-        task_id: PageserverTaskId(task_id),
-        kind,
-        name: name.to_string(),
-        cancel: cancel.clone(),
-        mutable: Mutex::new(MutableTaskState {
-            tenant_id,
-            timeline_id,
-            join_handle: None,
-        }),
-    });
-
-    TASKS.lock().unwrap().insert(task_id, Arc::clone(&task));
-
-    let mut task_mut = task.mutable.lock().unwrap();
-
    let task_name = name.to_string();
-    let task_cloned = Arc::clone(&task);
-    let join_handle = runtime.spawn(task_wrapper(
-        task_name,
-        task_id,
-        task_cloned,
-        cancel,
-        shutdown_process_on_error,
-        future,
-    ));
-    task_mut.join_handle = Some(join_handle);
-    drop(task_mut);
-
-    // The task is now running. Nothing more to do here
-    PageserverTaskId(task_id)
+    runtime.spawn(task_wrapper(task_name, shutdown_process_on_panic, future))
 }

-/// This wrapper function runs in a newly-spawned task. It initializes the
-/// task-local variables and calls the payload function.
-async fn task_wrapper<F>(
-    task_name: String,
-    task_id: u64,
-    task: Arc<PageServerTask>,
-    shutdown_token: CancellationToken,
-    shutdown_process_on_error: bool,
-    future: F,
-) where
-    F: Future<Output = anyhow::Result<()>> + Send + 'static,
+/// This wrapper function runs in a newly-spawned task. To handle panics.
+async fn task_wrapper<F, R>(task_name: String, shutdown_process_on_panic: bool, future: F) -> R
+where
+    F: Future<Output = R> + Send + 'static,
 {
    debug!("Starting task '{}'", task_name);

-    let result = SHUTDOWN_TOKEN
-        .scope(
-            shutdown_token,
-            CURRENT_TASK.scope(task, {
-                // We use AssertUnwindSafe here so that the payload function
-                // doesn't need to be UnwindSafe. We don't do anything after the
-                // unwinding that would expose us to unwind-unsafe behavior.
-                AssertUnwindSafe(future).catch_unwind()
-            }),
-        )
-        .await;
-    task_finish(result, task_name, task_id, shutdown_process_on_error).await;
-}
+    // We use AssertUnwindSafe here so that the payload function
+    // doesn't need to be UnwindSafe. We don't do anything after the
+    // unwinding that would expose us to unwind-unsafe behavior.
+    let result = AssertUnwindSafe(future).catch_unwind().await;

-async fn task_finish(
-    result: std::result::Result<
-        anyhow::Result<()>,
-        std::boxed::Box<dyn std::any::Any + std::marker::Send>,
-    >,
-    task_name: String,
-    task_id: u64,
-    shutdown_process_on_error: bool,
-) {
-    // Remove our entry from the global hashmap.
-    let task = TASKS
-        .lock()
-        .unwrap()
-        .remove(&task_id)
-        .expect("no task in registry");
-
-    let mut shutdown_process = false;
-    {
-        let task_mut = task.mutable.lock().unwrap();
-
-        match result {
-            Ok(Ok(())) => {
-                debug!("Task '{}' exited normally", task_name);
-            }
-            Ok(Err(err)) => {
-                if shutdown_process_on_error {
-                    error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task_mut.tenant_id, task_mut.timeline_id, err
-                    );
-                    shutdown_process = true;
-                } else {
-                    error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task_mut.tenant_id, task_mut.timeline_id, err
-                    );
-                }
-            }
-            Err(err) => {
-                if shutdown_process_on_error {
-                    error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task_mut.tenant_id, task_mut.timeline_id, err
-                    );
-                    shutdown_process = true;
-                } else {
-                    error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task_mut.tenant_id, task_mut.timeline_id, err
-                    );
-                }
+    // Handle panics
+    match result {
+        Ok(result) => {
+            debug!("Task '{}' exited normally", task_name);
+            result
+        }
+        Err(err) => {
+            if shutdown_process_on_panic {
+                error!("Shutting down: task '{}' panicked: {:?}", task_name, err);
+                shutdown_pageserver(1).await;
+                unreachable!();
+            } else {
+                error!("Task '{}'  panicked: {:?}", task_name, err);
+                resume_unwind(err);
            }
        }
    }
-
-    if shutdown_process {
-        shutdown_pageserver(1).await;
-    }
 }

-// expected to be called from the task of the given id.
-pub fn associate_with(tenant_id: Option<TenantId>, timeline_id: Option<TimelineId>) {
-    CURRENT_TASK.with(|ct| {
-        let mut task_mut = ct.mutable.lock().unwrap();
-        task_mut.tenant_id = tenant_id;
-        task_mut.timeline_id = timeline_id;
-    });
-}
-
-/// Is there a task running that matches the criteria
-
-/// Signal and wait for tasks to shut down.
 ///
+/// Perform pageserver shutdown. This is called on receiving a signal,
+/// or if one of the tasks marked as 'shutdown_process_on_error' dies.
 ///
-/// The arguments are used to select the tasks to kill. Any None arguments are
-/// ignored. For example, to shut down all WalReceiver tasks:
-///
-///   shutdown_tasks(Some(TaskKind::WalReceiver), None, None)
-///
-/// Or to shut down all tasks for given timeline:
-///
-///   shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
-///
-pub async fn shutdown_tasks(
-    kind: Option<TaskKind>,
-    tenant_id: Option<TenantId>,
-    timeline_id: Option<TimelineId>,
-) {
-    let mut victim_tasks = Vec::new();
+/// This never returns.
+pub async fn shutdown_pageserver(exit_code: i32) {
+    // Shut down the libpq endpoint task. This prevents new connections from
+    // being accepted.
+    context::shutdown_tasks(TaskKind::LibpqEndpointListener).await;

-    {
-        let tasks = TASKS.lock().unwrap();
-        for task in tasks.values() {
-            let task_mut = task.mutable.lock().unwrap();
-            if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_id.is_none() || task_mut.tenant_id == tenant_id)
-                && (timeline_id.is_none() || task_mut.timeline_id == timeline_id)
-            {
-                task.cancel.cancel();
-                victim_tasks.push(Arc::clone(task));
-            }
-        }
-    }
+    // Shut down all tenants gracefully
+    crate::tenant::mgr::shutdown_all_tenants().await;

-    for task in victim_tasks {
-        let join_handle = {
-            let mut task_mut = task.mutable.lock().unwrap();
-            info!("waiting for {} to shut down", task.name);
-            let join_handle = task_mut.join_handle.take();
-            drop(task_mut);
-            join_handle
-        };
-        if let Some(join_handle) = join_handle {
-            let _ = join_handle.await;
-        } else {
-            // Possibly one of:
-            //  * The task had not even fully started yet.
-            //  * It was shut down concurrently and already exited
-        }
-    }
-}
-
-pub fn current_task_kind() -> Option<TaskKind> {
-    CURRENT_TASK.try_with(|ct| ct.kind).ok()
-}
-
-pub fn current_task_id() -> Option<PageserverTaskId> {
-    CURRENT_TASK.try_with(|ct| ct.task_id).ok()
-}
-
-/// A Future that can be used to check if the current task has been requested to
-/// shut down.
-pub async fn shutdown_watcher() {
-    let token = SHUTDOWN_TOKEN
-        .try_with(|t| t.clone())
-        .expect("shutdown_requested() called in an unexpected task or thread");
-
-    token.cancelled().await;
-}
-
-/// Clone the current task's cancellation token, which can be moved across tasks.
-///
-/// When the task which is currently executing is shutdown, the cancellation token will be
-/// cancelled. It can however be moved to other tasks, such as `tokio::task::spawn_blocking` or
-/// `tokio::task::JoinSet::spawn`.
-pub fn shutdown_token() -> CancellationToken {
-    SHUTDOWN_TOKEN
-        .try_with(|t| t.clone())
-        .expect("shutdown_token() called in an unexpected task or thread")
-}
-
-/// Has the current task been requested to shut down?
-pub fn is_shutdown_requested() -> bool {
-    if let Ok(cancel) = SHUTDOWN_TOKEN.try_with(|t| t.clone()) {
-        cancel.is_cancelled()
-    } else {
-        if !cfg!(test) {
-            warn!("is_shutdown_requested() called in an unexpected task or thread");
-        }
-        false
-    }
+    // Shut down the HTTP endpoint last, so that you can still check the server's
+    // status while it's shutting down.
+    // FIXME: We should probably stop accepting commands like attach/detach earlier.
+    context::shutdown_tasks(TaskKind::HttpEndpointListener).await;
+
+    // There should be nothing left, but let's be sure
+    context::shutdown_all_tasks().await;
+
+    info!("Shut down successfully completed");
+    std::process::exit(exit_code);
 }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -30,7 +30,7 @@ pub mod defaults {
    pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
    pub const DEFAULT_GC_PERIOD: &str = "100 s";
    pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3;
-    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
+    pub const DEFAULT_PITR_INTERVAL: &str = "7 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "3 seconds";
    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -260,8 +260,10 @@ where
    /// contain the version, even if it's missing from the returned
    /// layer.
    ///
+    /// NOTE: This only searches the 'historic' layers, *not* the
+    /// 'open' and 'frozen' layers!
+    ///
    pub fn search(&self, key: Key, end_lsn: Lsn) -> Option<SearchResult<L>> {
-        // linear search
        // Find the latest image layer that covers the given key
        let mut latest_img: Option<Arc<L>> = None;
        let mut latest_img_lsn: Option<Lsn> = None;
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -8,6 +8,8 @@ use std::sync::Arc;
 use tokio::fs;

 use anyhow::Context;
+use futures::stream::FuturesUnordered;
+use futures::StreamExt;
 use once_cell::sync::Lazy;
 use tokio::sync::RwLock;
 use tracing::*;
@@ -16,9 +18,9 @@ use remote_storage::GenericRemoteStorage;
 use utils::crashsafe;

 use crate::config::PageServerConf;
-use crate::task_mgr::{self, TaskKind};
+use crate::context::RequestContext;
 use crate::tenant::config::TenantConfOpt;
-use crate::tenant::{Tenant, TenantState};
+use crate::tenant::{Tenant, TenantRequestContext, TenantState};
 use crate::IGNORED_TENANT_FILE_NAME;

 use utils::fs_ext::PathExt;
@@ -181,25 +183,11 @@ pub async fn shutdown_all_tenants() {
        tenants_to_shut_down
    };

-    // Shut down all existing walreceiver connections and stop accepting the new ones.
-    task_mgr::shutdown_tasks(Some(TaskKind::WalReceiverManager), None, None).await;
-
-    // Ok, no background tasks running anymore. Flush any remaining data in
-    // memory to disk.
-    //
-    // We assume that any incoming connections that might request pages from
-    // the tenant have already been terminated by the caller, so there
-    // should be no more activity in any of the repositories.
-    //
-    // On error, log it but continue with the shutdown for other tenants.
-    for tenant in tenants_to_shut_down {
-        let tenant_id = tenant.tenant_id();
-        debug!("shutdown tenant {tenant_id}");
-
-        if let Err(err) = tenant.freeze_and_flush().await {
-            error!("Could not checkpoint tenant {tenant_id} during shutdown: {err:?}");
-        }
+    let mut shutdown_futures: FuturesUnordered<_> = FuturesUnordered::new();
+    for tenant in tenants_to_shut_down.iter() {
+        shutdown_futures.push(tenant.graceful_shutdown(true));
    }
+    while let Some(_result) = shutdown_futures.next().await {}
 }

 pub async fn create_tenant(
@@ -234,36 +222,47 @@ pub async fn update_tenant_config(
    conf: &'static PageServerConf,
    tenant_conf: TenantConfOpt,
    tenant_id: TenantId,
+    ctx: &RequestContext,
 ) -> anyhow::Result<()> {
    info!("configuring tenant {tenant_id}");
-    get_tenant(tenant_id, true)
-        .await?
-        .update_tenant_config(tenant_conf);
+    let (tenant, _ctx) = get_active_tenant(tenant_id, ctx).await?;
+
+    tenant.update_tenant_config(tenant_conf);
    Tenant::persist_tenant_config(&conf.tenant_config_path(tenant_id), tenant_conf, false)?;
    Ok(())
 }

 /// Gets the tenant from the in-memory data, erroring if it's absent or is not fitting to the query.
 /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
-pub async fn get_tenant(tenant_id: TenantId, active_only: bool) -> anyhow::Result<Arc<Tenant>> {
+pub async fn get_active_tenant(
+    tenant_id: TenantId,
+    parent_ctx: &RequestContext,
+) -> anyhow::Result<(Arc<Tenant>, TenantRequestContext)> {
+    let tenant = get_tenant(tenant_id).await?;
+    let tenant_ctx = match tenant.get_context(parent_ctx) {
+        Ok(ctx) => ctx,
+        Err(state) => anyhow::bail!("Tenant {} is not active, state: {:?}", tenant_id, state,),
+    };
+    Ok((tenant, tenant_ctx))
+}
+
+pub async fn get_tenant(tenant_id: TenantId) -> anyhow::Result<Arc<Tenant>> {
    let m = TENANTS.read().await;
    let tenant = m
        .get(&tenant_id)
        .with_context(|| format!("Tenant {tenant_id} not found in the local state"))?;
-    if active_only && !tenant.is_active() {
-        anyhow::bail!(
-            "Tenant {tenant_id} is not active. Current state: {:?}",
-            tenant.current_state()
-        )
-    } else {
-        Ok(Arc::clone(tenant))
-    }
+
+    Ok(Arc::clone(tenant))
 }

-pub async fn delete_timeline(tenant_id: TenantId, timeline_id: TimelineId) -> anyhow::Result<()> {
-    match get_tenant(tenant_id, true).await {
-        Ok(tenant) => {
-            tenant.delete_timeline(timeline_id).await?;
+pub async fn delete_timeline(
+    tenant_id: TenantId,
+    timeline_id: TimelineId,
+    ctx: &RequestContext,
+) -> anyhow::Result<()> {
+    match get_active_tenant(tenant_id, ctx).await {
+        Ok((tenant, ctx)) => {
+            tenant.delete_timeline(timeline_id, &ctx).await?;
        }
        Err(e) => anyhow::bail!("Cannot access tenant {tenant_id} in local tenant state: {e:?}"),
    }
@@ -395,27 +394,31 @@ where
    // The exclusive lock here ensures we don't miss the tenant state updates before trying another removal.
    // tenant-wde cleanup operations may take some time (removing the entire tenant directory), we want to
    // avoid holding the lock for the entire process.
-    {
+    let tenant = {
        let tenants_accessor = TENANTS.write().await;
        match tenants_accessor.get(&tenant_id) {
            Some(tenant) => match tenant.current_state() {
                TenantState::Attaching
                | TenantState::Loading
                | TenantState::Broken
-                | TenantState::Active => tenant.set_stopping(),
+                | TenantState::Active => {
+                    tenant.set_stopping();
+                    Arc::clone(tenant)
+                }
                TenantState::Stopping => {
                    anyhow::bail!("Tenant {tenant_id} is stopping already")
                }
            },
            None => anyhow::bail!("Tenant not found for id {tenant_id}"),
        }
-    }
+    };

-    // shutdown all tenant and timeline tasks: gc, compaction, page service)
-    // No new tasks will be started for this tenant because it's in `Stopping` state.
-    // Hence, once we're done here, the `tenant_cleanup` callback can mutate tenant on-disk state freely.
-    task_mgr::shutdown_tasks(None, Some(tenant_id), None).await;
+    // Shut down all tenant and timeline tasks.
+    tenant.graceful_shutdown(true).await;

+    // All tasks that operated on the tenant or any of its timelines have no finished,
+    // and they are in Stopped state so that new ones cannot appear anymore. Proceed
+    // with the cleanup.
    match tenant_cleanup
        .await
        .with_context(|| format!("Failed to run cleanup for tenant {tenant_id}"))
@@ -430,65 +433,10 @@ where
        Err(e) => {
            let tenants_accessor = TENANTS.read().await;
            match tenants_accessor.get(&tenant_id) {
-                Some(tenant) => tenant.set_broken(),
+                Some(tenant) => tenant.set_broken(&e.to_string()),
                None => warn!("Tenant {tenant_id} got removed from memory"),
            }
            Err(e)
        }
    }
 }
-
-#[cfg(feature = "testing")]
-use {
-    crate::repository::GcResult, pageserver_api::models::TimelineGcRequest,
-    utils::http::error::ApiError,
-};
-
-#[cfg(feature = "testing")]
-pub async fn immediate_gc(
-    tenant_id: TenantId,
-    timeline_id: TimelineId,
-    gc_req: TimelineGcRequest,
-) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
-    let guard = TENANTS.read().await;
-
-    let tenant = guard
-        .get(&tenant_id)
-        .map(Arc::clone)
-        .with_context(|| format!("Tenant {tenant_id} not found"))
-        .map_err(ApiError::NotFound)?;
-
-    let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
-    // Use tenant's pitr setting
-    let pitr = tenant.get_pitr_interval();
-
-    // Run in task_mgr to avoid race with detach operation
-    let (task_done, wait_task_done) = tokio::sync::oneshot::channel();
-    task_mgr::spawn(
-        &tokio::runtime::Handle::current(),
-        TaskKind::GarbageCollector,
-        Some(tenant_id),
-        Some(timeline_id),
-        &format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
-        false,
-        async move {
-            fail::fail_point!("immediate_gc_task_pre");
-            let result = tenant
-                .gc_iteration(Some(timeline_id), gc_horizon, pitr)
-                .instrument(info_span!("manual_gc", tenant = %tenant_id, timeline = %timeline_id))
-                .await;
-                // FIXME: `gc_iteration` can return an error for multiple reasons; we should handle it
-                // better once the types support it.
-            match task_done.send(result) {
-                Ok(_) => (),
-                Err(result) => error!("failed to send gc result: {result:?}"),
-            }
-            Ok(())
-        }
-    );
-
-    // drop the guard until after we've spawned the task so that timeline shutdown will wait for the task
-    drop(guard);
-
-    Ok(wait_task_done)
-}
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -16,7 +16,7 @@
 //! unless the pageserver is configured without remote storage.
 //!
 //! We allocate the client instance in [Timeline][`crate::tenant::Timeline`], i.e.,
-//! either in [`crate::tenant_mgr`] during startup or when creating a new
+//! either in [`crate::tenant::mgr`] during startup or when creating a new
 //! timeline.
 //! However, the client does not become ready for use until we've initialized its upload queue:
 //!
@@ -214,7 +214,8 @@ use anyhow::ensure;
 use remote_storage::{DownloadError, GenericRemoteStorage};
 use std::ops::DerefMut;
 use tokio::runtime::Runtime;
-use tracing::{info, warn};
+use tokio_util::sync::CancellationToken;
+use tracing::{debug, info, warn};
 use tracing::{info_span, Instrument};
 use utils::lsn::Lsn;

@@ -225,12 +226,12 @@ use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
 use crate::{
    config::PageServerConf,
    task_mgr,
-    task_mgr::TaskKind,
    task_mgr::BACKGROUND_RUNTIME,
    tenant::metadata::TimelineMetadata,
    tenant::upload_queue::{
        UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, UploadTask,
    },
+    tenant::TimelineRequestContext,
    {exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS},
 };

@@ -298,8 +299,8 @@ impl RemoteTimelineClient {
        conf: &'static PageServerConf,
        tenant_id: TenantId,
        timeline_id: TimelineId,
-    ) -> anyhow::Result<RemoteTimelineClient> {
-        Ok(RemoteTimelineClient {
+    ) -> RemoteTimelineClient {
+        RemoteTimelineClient {
            conf,
            runtime: &BACKGROUND_RUNTIME,
            tenant_id,
@@ -307,31 +308,56 @@ impl RemoteTimelineClient {
            storage_impl: remote_storage,
            upload_queue: Mutex::new(UploadQueue::Uninitialized),
            metrics: Arc::new(RemoteTimelineClientMetrics::new(&tenant_id, &timeline_id)),
-        })
+        }
    }

    /// Initialize the upload queue for a remote storage that already received
    /// an index file upload, i.e., it's not empty.
    /// The given `index_part` must be the one on the remote.
-    pub fn init_upload_queue(&self, index_part: &IndexPart) -> anyhow::Result<()> {
+    pub fn init_upload_queue(
+        self: &Arc<Self>,
+        index_part: &IndexPart,
+        upload_ctx: TimelineRequestContext,
+    ) -> anyhow::Result<()> {
+        let cancellation_token = upload_ctx.cancellation_token().clone();
        let mut upload_queue = self.upload_queue.lock().unwrap();
-        upload_queue.initialize_with_current_remote_index_part(index_part)?;
+        upload_queue.initialize_with_current_remote_index_part(index_part, upload_ctx)?;
        self.update_remote_physical_size_gauge(Some(index_part));
+        self.spawn_cancellation_watch(cancellation_token);
        Ok(())
    }

    /// Initialize the upload queue for the case where the remote storage is empty,
    /// i.e., it doesn't have an `IndexPart`.
    pub fn init_upload_queue_for_empty_remote(
-        &self,
+        self: &Arc<Self>,
        local_metadata: &TimelineMetadata,
+        upload_ctx: TimelineRequestContext,
    ) -> anyhow::Result<()> {
+        let cancellation_token = upload_ctx.cancellation_token().clone();
        let mut upload_queue = self.upload_queue.lock().unwrap();
-        upload_queue.initialize_empty_remote(local_metadata)?;
+        upload_queue.initialize_empty_remote(local_metadata, upload_ctx)?;
        self.update_remote_physical_size_gauge(None);
+        self.spawn_cancellation_watch(cancellation_token);
        Ok(())
    }

+    /// Spawn a task that calls `stop` on cancellation. It's important that we
+    /// stop the upload queue promptly, because it holds onto the RequestContext,
+    /// which in turn prevents the Timeline from shutting down.
+    fn spawn_cancellation_watch(self: &Arc<Self>, cancellation_token: CancellationToken) {
+        let self_rc = Arc::clone(self);
+        task_mgr::spawn(
+            self.runtime.handle(),
+            "remote upload queue cancellation watch",
+            false,
+            async move {
+                cancellation_token.cancelled().await;
+                self_rc.stop();
+            },
+        );
+    }
+
    pub fn last_uploaded_consistent_lsn(&self) -> Option<Lsn> {
        match &*self.upload_queue.lock().unwrap() {
            UploadQueue::Uninitialized => None,
@@ -367,6 +393,10 @@ impl RemoteTimelineClient {

    /// Download index file
    pub async fn download_index_file(&self) -> Result<IndexPart, DownloadError> {
+        let _unfinished_gauge_guard = self
+            .metrics
+            .call_begin(&RemoteOpFileKind::Index, &RemoteOpKind::Download);
+
        download::download_index_part(
            self.conf,
            &self.storage_impl,
@@ -393,22 +423,27 @@ impl RemoteTimelineClient {
        layer_file_name: &LayerFileName,
        layer_metadata: &LayerFileMetadata,
    ) -> anyhow::Result<u64> {
-        let downloaded_size = download::download_layer_file(
-            self.conf,
-            &self.storage_impl,
-            self.tenant_id,
-            self.timeline_id,
-            layer_file_name,
-            layer_metadata,
-        )
-        .measure_remote_op(
-            self.tenant_id,
-            self.timeline_id,
-            RemoteOpFileKind::Layer,
-            RemoteOpKind::Download,
-            Arc::clone(&self.metrics),
-        )
-        .await?;
+        let downloaded_size = {
+            let _unfinished_gauge_guard = self
+                .metrics
+                .call_begin(&RemoteOpFileKind::Layer, &RemoteOpKind::Download);
+            download::download_layer_file(
+                self.conf,
+                &self.storage_impl,
+                self.tenant_id,
+                self.timeline_id,
+                layer_file_name,
+                layer_metadata,
+            )
+            .measure_remote_op(
+                self.tenant_id,
+                self.timeline_id,
+                RemoteOpFileKind::Layer,
+                RemoteOpKind::Download,
+                Arc::clone(&self.metrics),
+            )
+            .await?
+        };

        // Update the metadata for given layer file. The remote index file
        // might be missing some information for the file; this allows us
@@ -517,7 +552,7 @@ impl RemoteTimelineClient {
            metadata_bytes,
        );
        let op = UploadOp::UploadMetadata(index_part, disk_consistent_lsn);
-        self.update_upload_queue_unfinished_metric(1, &op);
+        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
        upload_queue.latest_files_changes_since_metadata_upload_scheduled = 0;

@@ -549,7 +584,7 @@ impl RemoteTimelineClient {
        upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;

        let op = UploadOp::UploadLayer(layer_file_name.clone(), layer_metadata.clone());
-        self.update_upload_queue_unfinished_metric(1, &op);
+        self.calls_unfinished_metric_begin(&op);
        upload_queue.queued_operations.push_back(op);

        info!(
@@ -601,7 +636,7 @@ impl RemoteTimelineClient {
            // schedule the actual deletions
            for name in names {
                let op = UploadOp::Delete(RemoteOpFileKind::Layer, name.clone());
-                self.update_upload_queue_unfinished_metric(1, &op);
+                self.calls_unfinished_metric_begin(&op);
                upload_queue.queued_operations.push_back(op);
                info!("scheduled layer file deletion {}", name.file_name());
            }
@@ -616,7 +651,10 @@ impl RemoteTimelineClient {
    ///
    /// Wait for all previously scheduled uploads/deletions to complete
    ///
-    pub async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
+    pub async fn wait_completion(
+        self: &Arc<Self>,
+        ctx: &TimelineRequestContext,
+    ) -> anyhow::Result<()> {
        let (sender, mut receiver) = tokio::sync::watch::channel(());
        let barrier_op = UploadOp::Barrier(sender);

@@ -630,9 +668,16 @@ impl RemoteTimelineClient {
            self.launch_queued_tasks(upload_queue);
        }

-        if receiver.changed().await.is_err() {
-            anyhow::bail!("wait_completion aborted because upload queue was stopped");
-        }
+        tokio::select! {
+            result = receiver.changed() => {
+                if result.is_err() {
+                    anyhow::bail!("wait_completion aborted because upload queue was stopped");
+                }
+            },
+            _ = ctx.cancelled() => {
+                anyhow::bail!("request cancelled while waiting on uploads to finish");
+            },
+        };
        Ok(())
    }

@@ -675,7 +720,7 @@ impl RemoteTimelineClient {
            // We can launch this task. Remove it from the queue first.
            let next_op = upload_queue.queued_operations.pop_front().unwrap();

-            info!("starting op: {}", next_op);
+            debug!("starting op: {}", next_op);

            // Update the counters
            match next_op {
@@ -710,16 +755,15 @@ impl RemoteTimelineClient {

            // Spawn task to perform the task
            let self_rc = Arc::clone(self);
+
+            let cancellation_token = upload_queue.upload_ctx.cancellation_token().clone();
+
            task_mgr::spawn(
                self.runtime.handle(),
-                TaskKind::RemoteUploadTask,
-                Some(self.tenant_id),
-                Some(self.timeline_id),
                "remote upload",
                false,
                async move {
-                    self_rc.perform_upload_task(task).await;
-                    Ok(())
+                    self_rc.perform_upload_task(task, cancellation_token).await;
                }
                .instrument(info_span!(parent: None, "remote_upload", tenant = %self.tenant_id, timeline = %self.timeline_id, upload_task_id = %task_id)),
            );
@@ -739,7 +783,11 @@ impl RemoteTimelineClient {
    /// The task can be shut down, however. That leads to stopping the whole
    /// queue.
    ///
-    async fn perform_upload_task(self: &Arc<Self>, task: Arc<UploadTask>) {
+    async fn perform_upload_task(
+        self: &Arc<Self>,
+        task: Arc<UploadTask>,
+        cancellation_token: CancellationToken,
+    ) {
        // Loop to retry until it completes.
        loop {
            // If we're requested to shut down, close up shop and exit.
@@ -747,13 +795,13 @@ impl RemoteTimelineClient {
            // Note: We only check for the shutdown requests between retries, so
            // if a shutdown request arrives while we're busy uploading, in the
            // upload::upload:*() call below, we will wait not exit until it has
-            // finisheed. We probably could cancel the upload by simply dropping
+            // finished. We probably could cancel the upload by simply dropping
            // the Future, but we're not 100% sure if the remote storage library
            // is cancellation safe, so we don't dare to do that. Hopefully, the
            // upload finishes or times out soon enough.
-            if task_mgr::is_shutdown_requested() {
+            if cancellation_token.is_cancelled() {
                info!("upload task cancelled by shutdown request");
-                self.update_upload_queue_unfinished_metric(-1, &task.op);
+                self.calls_unfinished_metric_end(&task.op);
                self.stop();
                return;
            }
@@ -849,7 +897,7 @@ impl RemoteTimelineClient {

                    // sleep until it's time to retry, or we're cancelled
                    tokio::select! {
-                        _ = task_mgr::shutdown_watcher() => { },
+                        _ = cancellation_token.cancelled() => { },
                        _ = exponential_backoff(
                            retries,
                            DEFAULT_BASE_BACKOFF_SECONDS,
@@ -867,7 +915,7 @@ impl RemoteTimelineClient {
                task.op, retries
            );
        } else {
-            info!("remote task {} completed successfully", task.op);
+            debug!("remote task {} completed successfully", task.op);
        }

        // The task has completed succesfully. Remove it from the in-progress list.
@@ -901,22 +949,40 @@ impl RemoteTimelineClient {
            // Launch any queued tasks that were unblocked by this one.
            self.launch_queued_tasks(upload_queue);
        }
-        self.update_upload_queue_unfinished_metric(-1, &task.op);
+        self.calls_unfinished_metric_end(&task.op);
    }

-    fn update_upload_queue_unfinished_metric(&self, delta: i64, op: &UploadOp) {
-        let (file_kind, op_kind) = match op {
+    fn calls_unfinished_metric_impl(
+        &self,
+        op: &UploadOp,
+    ) -> Option<(RemoteOpFileKind, RemoteOpKind)> {
+        let res = match op {
            UploadOp::UploadLayer(_, _) => (RemoteOpFileKind::Layer, RemoteOpKind::Upload),
            UploadOp::UploadMetadata(_, _) => (RemoteOpFileKind::Index, RemoteOpKind::Upload),
            UploadOp::Delete(file_kind, _) => (*file_kind, RemoteOpKind::Delete),
            UploadOp::Barrier(_) => {
                // we do not account these
-                return;
+                return None;
            }
        };
-        self.metrics
-            .unfinished_tasks(&file_kind, &op_kind)
-            .add(delta)
+        Some(res)
+    }
+
+    fn calls_unfinished_metric_begin(&self, op: &UploadOp) {
+        let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) {
+            Some(x) => x,
+            None => return,
+        };
+        let guard = self.metrics.call_begin(&file_kind, &op_kind);
+        guard.will_decrement_manually(); // in unfinished_ops_metric_end()
+    }
+
+    fn calls_unfinished_metric_end(&self, op: &UploadOp) {
+        let (file_kind, op_kind) = match self.calls_unfinished_metric_impl(op) {
+            Some(x) => x,
+            None => return,
+        };
+        self.metrics.call_end(&file_kind, &op_kind);
    }

    fn stop(&self) {
@@ -967,7 +1033,7 @@ impl RemoteTimelineClient {

                // Tear down queued ops
                for op in qi.queued_operations.into_iter() {
-                    self.update_upload_queue_unfinished_metric(-1, &op);
+                    self.calls_unfinished_metric_end(&op);
                    // Dropping UploadOp::Barrier() here will make wait_completion() return with an Err()
                    // which is exactly what we want to happen.
                    drop(op);
@@ -983,7 +1049,9 @@ impl RemoteTimelineClient {
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::context::{DownloadBehavior, RequestContext, TaskKind};
    use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
+    use crate::DEFAULT_PG_VERSION;
    use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
    use std::{collections::HashSet, path::Path};
    use utils::lsn::Lsn;
@@ -1002,7 +1070,7 @@ mod tests {
            Lsn(0),
            // Any version will do
            // but it should be consistent with the one in the tests
-            crate::DEFAULT_PG_VERSION,
+            DEFAULT_PG_VERSION,
        );

        // go through serialize + deserialize to fix the header, including checksum
@@ -1037,9 +1105,19 @@ mod tests {
    // Test scheduling
    #[test]
    fn upload_scheduling() -> anyhow::Result<()> {
+        // Use a current-thread runtime in the test
+        let runtime = Box::leak(Box::new(
+            tokio::runtime::Builder::new_current_thread()
+                .enable_all()
+                .build()?,
+        ));
+        let _entered = runtime.enter();
+
        let harness = TenantHarness::create("upload_scheduling")?;
+        let (tenant, tenant_ctx) = runtime.block_on(harness.load());
+        let (_timeline, timeline_ctx) =
+            tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &tenant_ctx)?;
        let timeline_path = harness.timeline_path(&TIMELINE_ID);
-        std::fs::create_dir_all(&timeline_path)?;

        let remote_fs_dir = harness.conf.workdir.join("remote_fs");
        std::fs::create_dir_all(remote_fs_dir)?;
@@ -1057,14 +1135,6 @@ mod tests {
            storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
        };

-        // Use a current-thread runtime in the test
-        let runtime = Box::leak(Box::new(
-            tokio::runtime::Builder::new_current_thread()
-                .enable_all()
-                .build()?,
-        ));
-        let _entered = runtime.enter();
-
        // Test outline:
        //
        // Schedule upload of a bunch of layers. Check that they are started immediately, not queued
@@ -1100,7 +1170,11 @@ mod tests {
        println!("remote_timeline_dir: {}", remote_timeline_dir.display());

        let metadata = dummy_metadata(Lsn(0x10));
-        client.init_upload_queue_for_empty_remote(&metadata)?;
+        let upload_ctx = timeline_ctx.register_another(RequestContext::new(
+            TaskKind::RemoteUploadTask,
+            DownloadBehavior::Error,
+        ));
+        client.init_upload_queue_for_empty_remote(&metadata, upload_ctx)?;

        // Create a couple of dummy files,  schedule upload for them
        let content_foo = dummy_contents("foo");
@@ -1140,7 +1214,7 @@ mod tests {
        }

        // Wait for the uploads to finish
-        runtime.block_on(client.wait_completion())?;
+        runtime.block_on(client.wait_completion(&timeline_ctx))?;
        {
            let mut guard = client.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut().unwrap();
@@ -1177,7 +1251,7 @@ mod tests {
        assert_remote_files(&["foo", "bar", "index_part.json"], &remote_timeline_dir);

        // Finish them
-        runtime.block_on(client.wait_completion())?;
+        runtime.block_on(client.wait_completion(&timeline_ctx))?;

        assert_remote_files(&["bar", "baz", "index_part.json"], &remote_timeline_dir);

--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -8,10 +8,9 @@ use std::future::Future;
 use std::path::Path;

 use anyhow::{anyhow, Context};
-use futures::stream::{FuturesUnordered, StreamExt};
 use tokio::fs;
 use tokio::io::AsyncWriteExt;
-use tracing::{debug, error, info, info_span, warn, Instrument};
+use tracing::{error, info, warn};

 use crate::config::PageServerConf;
 use crate::tenant::storage_layer::LayerFileName;
@@ -175,7 +174,7 @@ pub async fn list_remote_timelines<'a>(
    storage: &'a GenericRemoteStorage,
    conf: &'static PageServerConf,
    tenant_id: TenantId,
-) -> anyhow::Result<Vec<(TimelineId, IndexPart)>> {
+) -> anyhow::Result<HashSet<TimelineId>> {
    let tenant_path = conf.timelines_path(&tenant_id);
    let tenant_storage_path = conf.remote_path(&tenant_path)?;

@@ -194,7 +193,6 @@ pub async fn list_remote_timelines<'a>(
    }

    let mut timeline_ids = HashSet::new();
-    let mut part_downloads = FuturesUnordered::new();

    for timeline_remote_storage_key in timelines {
        let object_name = timeline_remote_storage_key.object_name().ok_or_else(|| {
@@ -205,35 +203,22 @@ pub async fn list_remote_timelines<'a>(
            format!("failed to parse object name into timeline id '{object_name}'")
        })?;

-        // list_prefixes returns all files with the prefix. If we haven't seen this timeline ID
-        // yet, launch a download task for it.
-        if !timeline_ids.contains(&timeline_id) {
-            timeline_ids.insert(timeline_id);
-            let storage_clone = storage.clone();
-            part_downloads.push(async move {
-                (
-                    timeline_id,
-                    download_index_part(conf, &storage_clone, tenant_id, timeline_id)
-                        .instrument(info_span!("download_index_part", timeline=%timeline_id))
-                        .await,
-                )
-            });
-        }
+        // list_prefixes is assumed to return unique names. Ensure this here.
+        // NB: it's safer to bail out than warn-log this because the pageserver
+        //     needs to absolutely know about _all_ timelines that exist, so that
+        //     GC knows all the branchpoints. If we skipped over a timeline instead,
+        //     GC could delete a layer that's still needed by that timeline.
+        anyhow::ensure!(
+            !timeline_ids.contains(&timeline_id),
+            "list_prefixes contains duplicate timeline id {timeline_id}"
+        );
+        timeline_ids.insert(timeline_id);
    }

-    // Wait for all the download tasks to complete.
-    let mut timeline_parts = Vec::new();
-    while let Some((timeline_id, part_upload_result)) = part_downloads.next().await {
-        let index_part = part_upload_result
-            .with_context(|| format!("Failed to fetch index part for timeline {timeline_id}"))?;
-
-        debug!("Successfully fetched index part for timeline {timeline_id}");
-        timeline_parts.push((timeline_id, index_part));
-    }
-    Ok(timeline_parts)
+    Ok(timeline_ids)
 }

-pub async fn download_index_part(
+pub(super) async fn download_index_part(
    conf: &'static PageServerConf,
    storage: &GenericRemoteStorage,
    tenant_id: TenantId,
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -83,11 +83,6 @@ where
    /// Additional metadata can might exist in `layer_metadata`.
    pub timeline_layers: HashSet<L>,

-    /// FIXME: unused field. This should be removed, but that changes the on-disk format,
-    /// so we need to make sure we're backwards-` (and maybe forwards-) compatible
-    /// First pass is to move it to Optional and the next would be its removal
-    missing_layers: Option<HashSet<L>>,
-
    /// Per layer file name metadata, which can be present for a present or missing layer file.
    ///
    /// Older versions of `IndexPart` will not have this property or have only a part of metadata
@@ -167,8 +162,6 @@ impl IndexPartUnclean {
        let IndexPartUnclean {
            version,
            timeline_layers,
-            // this is an unused field, ignore it on cleaning
-            missing_layers: _,
            layer_metadata,
            disk_consistent_lsn,
            metadata_bytes,
@@ -189,7 +182,6 @@ impl IndexPartUnclean {
                    }
                })
                .collect(),
-            missing_layers: None,
            layer_metadata: layer_metadata
                .into_iter()
                .filter_map(|(l, m)| l.into_clean().map(|l| (l, m)))
@@ -225,7 +217,6 @@ impl IndexPart {
        Self {
            version: Self::LATEST_VERSION,
            timeline_layers,
-            missing_layers: Some(HashSet::new()),
            layer_metadata,
            disk_consistent_lsn,
            metadata_bytes,
@@ -259,7 +250,6 @@ mod tests {
    fn v0_indexpart_is_parsed() {
        let example = r#"{
            "timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
-            "missing_layers":["LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage"],
            "disk_consistent_lsn":"0/16960E8",
            "metadata_bytes":[113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        }"#;
@@ -267,7 +257,6 @@ mod tests {
        let expected = IndexPart {
            version: 0,
            timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap()]),
-            missing_layers: None, // disabled fields should not carry unused values further
            layer_metadata: HashMap::default(),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata_bytes: [113,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
@@ -283,7 +272,6 @@ mod tests {
        let example = r#"{
            "version":1,
            "timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
-            "missing_layers":["LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage"],
            "layer_metadata":{
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
                "LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
@@ -296,7 +284,6 @@ mod tests {
            // note this is not verified, could be anything, but exists for humans debugging.. could be the git version instead?
            version: 1,
            timeline_layers: HashSet::from(["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap()]),
-            missing_layers: None,
            layer_metadata: HashMap::from([
                ("000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9".parse().unwrap(), IndexLayerMetadata {
                    file_size: Some(25600000),
@@ -322,6 +309,7 @@ mod tests {
        let example = r#"{
            "version":1,
            "timeline_layers":["000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9"],
+            "missing_layers":["This shouldn't fail deserialization"],
            "layer_metadata":{
                "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__0000000001696070-00000000016960E9": { "file_size": 25600000 },
                "LAYER_FILE_NAME::test/not_a_real_layer_but_adding_coverage": { "file_size": 9007199254741001 }
@@ -346,7 +334,6 @@ mod tests {
            ]),
            disk_consistent_lsn: "0/16960E8".parse::<Lsn>().unwrap(),
            metadata_bytes: [112,11,159,210,0,54,0,4,0,0,0,0,1,105,96,232,1,0,0,0,0,1,105,96,112,0,0,0,0,0,0,0,0,0,0,0,0,0,1,105,96,112,0,0,0,0,1,105,96,112,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0].to_vec(),
-            missing_layers: None,
        };

        let part = serde_json::from_str::<IndexPartUnclean>(example).unwrap();
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -3,10 +3,9 @@ use std::collections::{HashMap, HashSet};
 use std::sync::Arc;

 use anyhow::Context;
-use tokio::sync::oneshot::error::RecvError;
 use tokio::sync::Semaphore;

-use crate::pgdatadir_mapping::CalculateLogicalSizeError;
+use crate::tenant::{PageReconstructError, TenantRequestContext, TimelineRequestContext};

 use super::Tenant;
 use utils::id::TimelineId;
@@ -63,13 +62,14 @@ pub(super) async fn gather_inputs(
    tenant: &Tenant,
    limit: &Arc<Semaphore>,
    logical_size_cache: &mut HashMap<(TimelineId, Lsn), u64>,
+    tenant_ctx: &TenantRequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // with joinset, on drop, all of the tasks will just be de-scheduled, which we can use to
    // our advantage with `?` error handling.
    let mut joinset = tokio::task::JoinSet::new();

    let timelines = tenant
-        .refresh_gc_info()
+        .refresh_gc_info(tenant_ctx)
        .await
        .context("Failed to refresh gc_info before gathering inputs")?;

@@ -97,9 +97,21 @@ pub(super) async fn gather_inputs(
    // used to determine the `retention_period` for the size model
    let mut max_cutoff_distance = None;

+    let mut ctx_dropguards: Vec<tokio_util::sync::DropGuard> = Vec::new();
+
    for timeline in timelines {
        let last_record_lsn = timeline.get_last_record_lsn();

+        let ctx = match timeline.get_context(tenant_ctx) {
+            Ok(ctx) => ctx,
+            Err(state) => {
+                info!("skipping tenant size calculation for timeline because it is in {state:?} state");
+                continue;
+            }
+        };
+        ctx_dropguards.push(ctx.cancellation_token().clone().drop_guard());
+        let ctx = Arc::new(ctx);
+
        let (interesting_lsns, horizon_cutoff, pitr_cutoff, next_gc_cutoff) = {
            // there's a race between the update (holding tenant.gc_lock) and this read but it
            // might not be an issue, because it's not for Timeline::gc
@@ -169,19 +181,23 @@ pub(super) async fn gather_inputs(
            timeline_id: timeline.timeline_id,
        });

-        for (lsn, _kind) in &interesting_lsns {
-            if let Some(size) = logical_size_cache.get(&(timeline.timeline_id, *lsn)) {
+        for (lsn, _kind) in interesting_lsns.iter() {
+            let lsn = *lsn;
+            if let Some(size) = logical_size_cache.get(&(timeline.timeline_id, lsn)) {
                updates.push(Update {
-                    lsn: *lsn,
+                    lsn,
                    timeline_id: timeline.timeline_id,
                    command: Command::Update(*size),
                });

-                needed_cache.insert((timeline.timeline_id, *lsn));
+                needed_cache.insert((timeline.timeline_id, lsn));
            } else {
                let timeline = Arc::clone(&timeline);
                let parallel_size_calcs = Arc::clone(limit);
-                joinset.spawn(calculate_logical_size(parallel_size_calcs, timeline, *lsn));
+                let ctx_clone = Arc::clone(&ctx);
+                joinset.spawn(async move {
+                    calculate_logical_size(parallel_size_calcs, timeline, lsn, &ctx_clone).await
+                });
            }
        }

@@ -357,7 +373,7 @@ enum LsnKind {
 struct TimelineAtLsnSizeResult(
    Arc<crate::tenant::Timeline>,
    utils::lsn::Lsn,
-    Result<u64, CalculateLogicalSizeError>,
+    Result<u64, PageReconstructError>,
 );

 #[instrument(skip_all, fields(timeline_id=%timeline.timeline_id, lsn=%lsn))]
@@ -365,14 +381,13 @@ async fn calculate_logical_size(
    limit: Arc<tokio::sync::Semaphore>,
    timeline: Arc<crate::tenant::Timeline>,
    lsn: utils::lsn::Lsn,
-) -> Result<TimelineAtLsnSizeResult, RecvError> {
+    ctx: &TimelineRequestContext,
+) -> Result<TimelineAtLsnSizeResult, PageReconstructError> {
    let _permit = tokio::sync::Semaphore::acquire_owned(limit)
        .await
-        .expect("global semaphore should not had been closed");
+        .expect("global semaphore should not have been closed");

-    let size_res = timeline
-        .spawn_ondemand_logical_size_calculation(lsn)
-        .await?;
+    let size_res = timeline.calculate_logical_size(lsn, ctx).await;
    Ok(TimelineAtLsnSizeResult(timeline, lsn, size_res))
 }

--- a/pageserver/src/tenant/storage_layer.rs
+++ b/pageserver/src/tenant/storage_layer.rs
@@ -109,7 +109,7 @@ pub trait Layer: Send + Sync {
    /// See PageReconstructResult for possible return values. The collected data
    /// is appended to reconstruct_data; the caller should pass an empty struct
    /// on first call, or a struct with a cached older image of the page if one
-    /// is available. If this returns PageReconstructResult::Continue, look up
+    /// is available. If this returns ValueReconstructResult::Continue, look up
    /// the predecessor layer and call again with the same 'reconstruct_data' to
    /// collect more data.
    fn get_value_reconstruct_data(
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -1,45 +1,39 @@
 //! This module contains functions to serve per-tenant background processes,
 //! such as compaction and GC

-use std::ops::ControlFlow;
 use std::sync::Arc;
 use std::time::Duration;

+use crate::context::{DownloadBehavior, RequestContext, TaskKind};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
-use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::mgr;
-use crate::tenant::{Tenant, TenantState};
+use crate::task_mgr::BACKGROUND_RUNTIME;
+use crate::tenant::Tenant;
 use tracing::*;
-use utils::id::TenantId;

-pub fn start_background_loops(tenant_id: TenantId) {
+pub fn start_background_loops(tenant: &Arc<Tenant>) {
+    let tenant_id = tenant.tenant_id;
+
+    let tenant_clone = Arc::clone(tenant);
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
-        TaskKind::Compaction,
-        Some(tenant_id),
-        None,
        &format!("compactor for tenant {tenant_id}"),
        false,
        async move {
-            compaction_loop(tenant_id)
+            compaction_loop(&tenant_clone)
                .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
                .await;
-            Ok(())
        },
    );
+    let tenant_clone = Arc::clone(tenant);
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
-        TaskKind::GarbageCollector,
-        Some(tenant_id),
-        None,
        &format!("garbage collector for tenant {tenant_id}"),
        false,
        async move {
-            gc_loop(tenant_id)
+            gc_loop(&tenant_clone)
                .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
                .await;
-            Ok(())
        },
    );
 }
@@ -47,25 +41,27 @@ pub fn start_background_loops(tenant_id: TenantId) {
 ///
 /// Compaction task's main loop
 ///
-async fn compaction_loop(tenant_id: TenantId) {
+async fn compaction_loop(tenant: &Arc<Tenant>) {
    let wait_duration = Duration::from_secs(2);
    info!("starting");
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
+        let top_ctx = RequestContext::new(TaskKind::Compaction, DownloadBehavior::Download);
+
+        let tenant_ctx = match tenant.get_context(&top_ctx) {
+            Ok(ctx) => ctx,
+            Err(state) => {
+                // This could happen if the tenant is detached or the pageserver is shut
+                // down immediately after loading or attaching completed and the tenant
+                // was activated. It seems unlikely enough in practice that we better print
+                // a warning, as it could also be a bug.
+                error!("Not running compaction loop, tenant is not active: {state:?}");
+                return;
+            }
+        };
        loop {
            trace!("waking up");

-            let tenant = tokio::select! {
-                _ = task_mgr::shutdown_watcher() => {
-                    info!("received cancellation request");
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(tenant) => tenant,
-                },
-            };
-
            let mut sleep_duration = tenant.get_compaction_period();
            if sleep_duration == Duration::ZERO {
                info!("automatic compaction is disabled");
@@ -73,7 +69,7 @@ async fn compaction_loop(tenant_id: TenantId) {
                sleep_duration = Duration::from_secs(10);
            } else {
                // Run compaction
-                if let Err(e) = tenant.compaction_iteration().await {
+                if let Err(e) = tenant.compaction_iteration(&tenant_ctx).await {
                    sleep_duration = wait_duration;
                    error!("Compaction failed, retrying in {:?}: {e:?}", sleep_duration);
                }
@@ -81,9 +77,9 @@ async fn compaction_loop(tenant_id: TenantId) {

            // Sleep
            tokio::select! {
-                _ = task_mgr::shutdown_watcher() => {
+                _ = tenant_ctx.cancelled() => {
                    info!("received cancellation request during idling");
-                    break ;
+                    break;
                },
                _ = tokio::time::sleep(sleep_duration) => {},
            }
@@ -98,25 +94,28 @@ async fn compaction_loop(tenant_id: TenantId) {
 ///
 /// GC task's main loop
 ///
-async fn gc_loop(tenant_id: TenantId) {
+async fn gc_loop(tenant: &Arc<Tenant>) {
    let wait_duration = Duration::from_secs(2);
    info!("starting");
    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
    async {
+        // GC might require downloading, to find the cutoff LSN that corresponds to the
+        // cutoff specified as time.
+        let top_ctx = RequestContext::new(TaskKind::GarbageCollector, DownloadBehavior::Download);
+        let tenant_ctx = match tenant.get_context(&top_ctx) {
+            Ok(ctx) => ctx,
+            Err(state) => {
+                // This could happen if the tenant is detached or the pageserver is shut
+                // down immediately after loading or attaching completed and the tenant
+                // was activated. It seems unlikely enough in practice that we better print
+                // a warning, as it could also be a bug.
+                error!("Not running GC loop, tenant is not active: {state:?}");
+                return;
+            }
+        };
        loop {
            trace!("waking up");

-            let tenant = tokio::select! {
-                _ = task_mgr::shutdown_watcher() => {
-                    info!("received cancellation request");
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(tenant_id, wait_duration) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(tenant) => tenant,
-                },
-            };
-
            let gc_period = tenant.get_gc_period();
            let gc_horizon = tenant.get_gc_horizon();
            let mut sleep_duration = gc_period;
@@ -127,7 +126,10 @@ async fn gc_loop(tenant_id: TenantId) {
            } else {
                // Run gc
                if gc_horizon > 0 {
-                    if let Err(e) = tenant.gc_iteration(None, gc_horizon, tenant.get_pitr_interval()).await
+                    // Run compaction
+                    if let Err(e) = tenant
+                        .gc_iteration(None, gc_horizon, tenant.get_pitr_interval(), &tenant_ctx)
+                        .await
                    {
                        sleep_duration = wait_duration;
                        error!("Gc failed, retrying in {:?}: {e:?}", sleep_duration);
@@ -137,7 +139,7 @@ async fn gc_loop(tenant_id: TenantId) {

            // Sleep
            tokio::select! {
-                _ = task_mgr::shutdown_watcher() => {
+                _ = tenant_ctx.cancelled() => {
                    info!("received cancellation request during idling");
                    break;
                },
@@ -149,46 +151,3 @@ async fn gc_loop(tenant_id: TenantId) {
    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
    trace!("GC loop stopped.");
 }
-
-async fn wait_for_active_tenant(
-    tenant_id: TenantId,
-    wait: Duration,
-) -> ControlFlow<(), Arc<Tenant>> {
-    let tenant = loop {
-        match mgr::get_tenant(tenant_id, false).await {
-            Ok(tenant) => break tenant,
-            Err(e) => {
-                error!("Failed to get a tenant {tenant_id}: {e:#}");
-                tokio::time::sleep(wait).await;
-            }
-        }
-    };
-
-    // if the tenant has a proper status already, no need to wait for anything
-    if tenant.current_state() == TenantState::Active {
-        ControlFlow::Continue(tenant)
-    } else {
-        let mut tenant_state_updates = tenant.subscribe_for_state_updates();
-        loop {
-            match tenant_state_updates.changed().await {
-                Ok(()) => {
-                    let new_state = *tenant_state_updates.borrow();
-                    match new_state {
-                        TenantState::Active => {
-                            debug!("Tenant state changed to active, continuing the task loop");
-                            return ControlFlow::Continue(tenant);
-                        }
-                        state => {
-                            debug!("Not running the task loop, tenant is not active: {state:?}");
-                            continue;
-                        }
-                    }
-                }
-                Err(_sender_dropped_error) => {
-                    info!("Tenant dropped the state updates sender, quitting waiting for tenant and the task loop");
-                    return ControlFlow::Break(());
-                }
-            }
-        }
-    }
-}
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
--- a/pageserver/src/tenant/upload_queue.rs
+++ b/pageserver/src/tenant/upload_queue.rs
@@ -4,6 +4,7 @@ use super::storage_layer::LayerFileName;
 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::index::LayerFileMetadata;
+use crate::tenant::TimelineRequestContext;
 use std::collections::{HashMap, VecDeque};
 use std::fmt::Debug;

@@ -73,6 +74,13 @@ pub(crate) struct UploadQueueInitialized {
    /// tasks to finish. For example, metadata upload cannot be performed before all
    /// preceding layer file uploads have completed.
    pub(crate) queued_operations: VecDeque<UploadOp>,
+
+    /// Context used for the upload tasks. Note that this is associated with the
+    /// Timeline, so this prevents the Timeline from being shut down. To ensure quick
+    /// shutdown, RemoteTimelineClient spawns a task to wait for cancellation on the
+    /// context and stop the queue. Otherwise we woudn't notice the cancellation
+    /// until next upload attempt.
+    pub(crate) upload_ctx: TimelineRequestContext,
 }

 pub(crate) struct UploadQueueStopped {
@@ -83,6 +91,7 @@ impl UploadQueue {
    pub(crate) fn initialize_empty_remote(
        &mut self,
        metadata: &TimelineMetadata,
+        upload_ctx: TimelineRequestContext,
    ) -> anyhow::Result<&mut UploadQueueInitialized> {
        match self {
            UploadQueue::Uninitialized => (),
@@ -108,6 +117,7 @@ impl UploadQueue {
            num_inprogress_deletions: 0,
            inprogress_tasks: HashMap::new(),
            queued_operations: VecDeque::new(),
+            upload_ctx,
        };

        *self = UploadQueue::Initialized(state);
@@ -117,6 +127,7 @@ impl UploadQueue {
    pub(crate) fn initialize_with_current_remote_index_part(
        &mut self,
        index_part: &IndexPart,
+        upload_ctx: TimelineRequestContext,
    ) -> anyhow::Result<&mut UploadQueueInitialized> {
        match self {
            UploadQueue::Uninitialized => (),
@@ -153,6 +164,7 @@ impl UploadQueue {
            num_inprogress_deletions: 0,
            inprogress_tasks: HashMap::new(),
            queued_operations: VecDeque::new(),
+            upload_ctx,
        };

        *self = UploadQueue::Initialized(state);
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -31,6 +31,7 @@ use once_cell::sync::OnceCell;
 use std::future::Future;
 use storage_broker::BrokerClientChannel;
 use tokio::sync::watch;
+use tokio_util::sync::CancellationToken;
 use tracing::*;

 pub use connection_manager::spawn_connection_manager_task;
@@ -76,7 +77,7 @@ pub fn is_broker_client_initialized() -> bool {

 /// A handle of an asynchronous task.
 /// The task has a channel that it can use to communicate its lifecycle events in a certain form, see [`TaskEvent`]
-/// and a cancellation channel that it can listen to for earlier interrupts.
+/// and a cancellation token that it can listen to for earlier interrupts.
 ///
 /// Note that the communication happens via the `watch` channel, that does not accumulate the events, replacing the old one with the never one on submission.
 /// That may lead to certain events not being observed by the listener.
@@ -84,7 +85,7 @@ pub fn is_broker_client_initialized() -> bool {
 pub struct TaskHandle<E> {
    join_handle: Option<tokio::task::JoinHandle<anyhow::Result<()>>>,
    events_receiver: watch::Receiver<TaskStateUpdate<E>>,
-    cancellation: watch::Sender<()>,
+    cancellation: CancellationToken,
 }

 pub enum TaskEvent<E> {
@@ -102,20 +103,18 @@ pub enum TaskStateUpdate<E> {
 impl<E: Clone> TaskHandle<E> {
    /// Initializes the task, starting it immediately after the creation.
    pub fn spawn<Fut>(
-        task: impl FnOnce(watch::Sender<TaskStateUpdate<E>>, watch::Receiver<()>) -> Fut
-            + Send
-            + 'static,
+        task: impl FnOnce(watch::Sender<TaskStateUpdate<E>>) -> Fut + Send + 'static,
+        cancellation: CancellationToken,
    ) -> Self
    where
        Fut: Future<Output = anyhow::Result<()>> + Send,
        E: Send + Sync + 'static,
    {
-        let (cancellation, cancellation_receiver) = watch::channel(());
        let (events_sender, events_receiver) = watch::channel(TaskStateUpdate::Started);

        let join_handle = WALRECEIVER_RUNTIME.spawn(async move {
            events_sender.send(TaskStateUpdate::Started).ok();
-            task(events_sender, cancellation_receiver).await
+            task(events_sender).await
        });

        TaskHandle {
@@ -157,7 +156,7 @@ impl<E: Clone> TaskHandle<E> {
    /// Aborts current task, waiting for it to finish.
    pub async fn shutdown(self) {
        if let Some(jh) = self.join_handle {
-            self.cancellation.send(()).ok();
+            self.cancellation.cancel();
            match jh.await {
                Ok(Ok(())) => debug!("Shutdown success"),
                Ok(Err(e)) => error!("Shutdown task error: {e:?}"),
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -11,9 +11,9 @@

 use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration};

-use crate::task_mgr::TaskKind;
+use crate::context::{DownloadBehavior, RequestContext, TaskKind};
 use crate::task_mgr::WALRECEIVER_RUNTIME;
-use crate::tenant::Timeline;
+use crate::tenant::{Timeline, TimelineRequestContext};
 use crate::{task_mgr, walreceiver::TaskStateUpdate};
 use anyhow::Context;
 use chrono::{NaiveDateTime, Utc};
@@ -46,6 +46,7 @@ pub fn spawn_connection_manager_task(
    lagging_wal_timeout: Duration,
    max_lsn_wal_lag: NonZeroU64,
    auth_token: Option<Arc<String>>,
+    ctx: TimelineRequestContext,
 ) {
    let mut broker_client = get_broker_client().clone();

@@ -54,9 +55,6 @@ pub fn spawn_connection_manager_task(

    task_mgr::spawn(
        WALRECEIVER_RUNTIME.handle(),
-        TaskKind::WalReceiverManager,
-        Some(tenant_id),
-        Some(timeline_id),
        &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
        false,
        async move {
@@ -70,20 +68,21 @@ pub fn spawn_connection_manager_task(
            );
            loop {
                select! {
-                    _ = task_mgr::shutdown_watcher() => {
+                    _ = ctx.cancelled() => {
                        info!("WAL receiver shutdown requested, shutting down");
                        walreceiver_state.shutdown().await;
-                        return Ok(());
+                        return;
                    },
                    loop_step_result = connection_manager_loop_step(
                        &mut broker_client,
                        &mut walreceiver_state,
+                        &ctx,
                    ) => match loop_step_result {
                        ControlFlow::Continue(()) => continue,
                        ControlFlow::Break(()) => {
                            info!("Connection manager loop ended, shutting down");
                            walreceiver_state.shutdown().await;
-                            return Ok(());
+                            return;
                        }
                    },
                }
@@ -101,6 +100,7 @@ pub fn spawn_connection_manager_task(
 async fn connection_manager_loop_step(
    broker_client: &mut BrokerClientChannel,
    walreceiver_state: &mut WalreceiverState,
+    ctx: &TimelineRequestContext,
 ) -> ControlFlow<(), ()> {
    let mut timeline_state_updates = walreceiver_state.timeline.subscribe_for_state_updates();

@@ -226,6 +226,7 @@ async fn connection_manager_loop_step(
                .change_connection(
                    new_candidate.safekeeper_id,
                    new_candidate.wal_source_connconf,
+                    ctx,
                )
                .await
        }
@@ -389,26 +390,38 @@ impl WalreceiverState {
        &mut self,
        new_sk_id: NodeId,
        new_wal_source_connconf: PgConnectionConfig,
+        ctx: &TimelineRequestContext,
    ) {
        self.drop_old_connection(true).await;

        let id = self.id;
        let connect_timeout = self.wal_connect_timeout;
        let timeline = Arc::clone(&self.timeline);
-        let connection_handle = TaskHandle::spawn(move |events_sender, cancellation| {
-            async move {
-                super::walreceiver_connection::handle_walreceiver_connection(
-                    timeline,
-                    new_wal_source_connconf,
-                    events_sender,
-                    cancellation,
-                    connect_timeout,
-                )
-                .await
-                .context("walreceiver connection handling failure")
-            }
-            .instrument(info_span!("walreceiver_connection", id = %id, node_id = %new_sk_id))
-        });
+
+        let child_ctx = ctx.register_another(RequestContext::with_parent(
+            TaskKind::WalReceiverConnection,
+            DownloadBehavior::Download,
+            ctx,
+        ));
+        let cancellation_token = child_ctx.cancellation_token().clone();
+
+        let connection_handle = TaskHandle::spawn(
+            move |events_sender| {
+                async move {
+                    super::walreceiver_connection::handle_walreceiver_connection(
+                        timeline,
+                        new_wal_source_connconf,
+                        events_sender,
+                        connect_timeout,
+                        child_ctx,
+                    )
+                    .await
+                    .context("walreceiver connection handling failure")
+                }
+                .instrument(info_span!("walreceiver_connection", id = %id, node_id = %new_sk_id))
+            },
+            cancellation_token,
+        );

        let now = Utc::now().naive_utc();
        self.wal_connection = Some(WalConnection {
@@ -820,6 +833,7 @@ fn wal_stream_connection_config(
 mod tests {
    use super::*;
    use crate::tenant::harness::{TenantHarness, TIMELINE_ID};
+    use tokio_util::sync::CancellationToken;
    use url::Host;

    fn dummy_broker_sk_timeline(
@@ -900,12 +914,15 @@ mod tests {
            started_at: now,
            sk_id: connected_sk_id,
            status: connection_status,
-            connection_task: TaskHandle::spawn(move |sender, _| async move {
-                sender
-                    .send(TaskStateUpdate::Progress(connection_status))
-                    .ok();
-                Ok(())
-            }),
+            connection_task: TaskHandle::spawn(
+                move |sender| async move {
+                    sender
+                        .send(TaskStateUpdate::Progress(connection_status))
+                        .ok();
+                    Ok(())
+                },
+                CancellationToken::new(),
+            ),
            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([
@@ -1062,12 +1079,15 @@ mod tests {
            started_at: now,
            sk_id: connected_sk_id,
            status: connection_status,
-            connection_task: TaskHandle::spawn(move |sender, _| async move {
-                sender
-                    .send(TaskStateUpdate::Progress(connection_status))
-                    .ok();
-                Ok(())
-            }),
+            connection_task: TaskHandle::spawn(
+                move |sender| async move {
+                    sender
+                        .send(TaskStateUpdate::Progress(connection_status))
+                        .ok();
+                    Ok(())
+                },
+                CancellationToken::new(),
+            ),
            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([
@@ -1127,12 +1147,15 @@ mod tests {
            started_at: now,
            sk_id: NodeId(1),
            status: connection_status,
-            connection_task: TaskHandle::spawn(move |sender, _| async move {
-                sender
-                    .send(TaskStateUpdate::Progress(connection_status))
-                    .ok();
-                Ok(())
-            }),
+            connection_task: TaskHandle::spawn(
+                move |sender| async move {
+                    sender
+                        .send(TaskStateUpdate::Progress(connection_status))
+                        .ok();
+                    Ok(())
+                },
+                CancellationToken::new(),
+            ),
            discovered_new_wal: None,
        });
        state.wal_stream_candidates = HashMap::from([(
@@ -1189,7 +1212,10 @@ mod tests {
            started_at: now,
            sk_id: NodeId(1),
            status: connection_status,
-            connection_task: TaskHandle::spawn(move |_, _| async move { Ok(()) }),
+            connection_task: TaskHandle::spawn(
+                move |_| async move { Ok(()) },
+                CancellationToken::new(),
+            ),
            discovered_new_wal: Some(NewCommittedWAL {
                discovered_at: time_over_threshold,
                lsn: new_lsn,
@@ -1233,18 +1259,18 @@ mod tests {
    const DUMMY_SAFEKEEPER_HOST: &str = "safekeeper_connstr";

    async fn dummy_state(harness: &TenantHarness<'_>) -> WalreceiverState {
+        let (tenant, tenant_ctx) = harness.load().await;
+        let (timeline, timeline_ctx) = tenant
+            .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION, &tenant_ctx)
+            .expect("Failed to create an empty timeline for dummy wal connection manager");
+        let timeline = timeline.initialize(&timeline_ctx).unwrap();
+
        WalreceiverState {
            id: TenantTimelineId {
                tenant_id: harness.tenant_id,
                timeline_id: TIMELINE_ID,
            },
-            timeline: harness
-                .load()
-                .await
-                .create_empty_timeline(TIMELINE_ID, Lsn(0), crate::DEFAULT_PG_VERSION)
-                .expect("Failed to create an empty timeline for dummy wal connection manager")
-                .initialize()
-                .unwrap(),
+            timeline,
            wal_connect_timeout: Duration::from_secs(1),
            lagging_wal_timeout: Duration::from_secs(1),
            max_lsn_wal_lag: NonZeroU64::new(1024 * 1024).unwrap(),
--- a/pageserver/src/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/walreceiver/walreceiver_connection.rs
@@ -1,6 +1,7 @@
 //! Actual Postgres connection handler to stream WAL to the server.

 use std::{
+    error::Error,
    str::FromStr,
    sync::Arc,
    time::{Duration, SystemTime},
@@ -11,7 +12,7 @@ use bytes::BytesMut;
 use chrono::{NaiveDateTime, Utc};
 use fail::fail_point;
 use futures::StreamExt;
-use postgres::{SimpleQueryMessage, SimpleQueryRow};
+use postgres::{error::SqlState, SimpleQueryMessage, SimpleQueryRow};
 use postgres_ffi::v14::xlog_utils::normalize_lsn;
 use postgres_ffi::WAL_SEGMENT_SIZE;
 use postgres_protocol::message::backend::ReplicationMessage;
@@ -20,21 +21,18 @@ use tokio::{pin, select, sync::watch, time};
 use tokio_postgres::{replication::ReplicationStream, Client};
 use tracing::{debug, error, info, trace, warn};

-use crate::{
-    metrics::LIVE_CONNECTIONS_COUNT, tenant::with_ondemand_download, walreceiver::TaskStateUpdate,
-};
+use crate::{metrics::LIVE_CONNECTIONS_COUNT, walreceiver::TaskStateUpdate};
 use crate::{
    task_mgr,
-    task_mgr::TaskKind,
    task_mgr::WALRECEIVER_RUNTIME,
-    tenant::{Timeline, WalReceiverInfo},
+    tenant::{Timeline, TimelineRequestContext, WalReceiverInfo},
    walingest::WalIngest,
    walrecord::DecodedWALRecord,
 };
 use postgres_connection::PgConnectionConfig;
 use postgres_ffi::waldecoder::WalStreamDecoder;
 use pq_proto::ReplicationFeedback;
-use utils::lsn::Lsn;
+use utils::{lsn::Lsn, postgres_backend_async::is_expected_io_error};

 /// Status of the connection.
 #[derive(Debug, Clone, Copy)]
@@ -60,8 +58,8 @@ pub async fn handle_walreceiver_connection(
    timeline: Arc<Timeline>,
    wal_source_connconf: PgConnectionConfig,
    events_sender: watch::Sender<TaskStateUpdate<WalConnectionStatus>>,
-    mut cancellation: watch::Receiver<()>,
    connect_timeout: Duration,
+    ctx: TimelineRequestContext,
 ) -> anyhow::Result<()> {
    // Connect to the database in replication mode.
    info!("connecting to {wal_source_connconf:?}");
@@ -70,10 +68,17 @@ pub async fn handle_walreceiver_connection(
        let mut config = wal_source_connconf.to_tokio_postgres_config();
        config.application_name("pageserver");
        config.replication_mode(tokio_postgres::config::ReplicationMode::Physical);
-        time::timeout(connect_timeout, config.connect(postgres::NoTls))
-            .await
-            .context("Timed out while waiting for walreceiver connection to open")?
-            .context("Failed to open walreceiver connection")?
+        match time::timeout(connect_timeout, config.connect(postgres::NoTls)).await {
+            Ok(Ok(client_and_conn)) => client_and_conn,
+            Ok(Err(conn_err)) => {
+                let expected_error = ignore_expected_errors(conn_err)?;
+                info!("DB connection stream finished: {expected_error}");
+                return Ok(());
+            }
+            Err(elapsed) => anyhow::bail!(
+                "Timed out while waiting {elapsed} for walreceiver connection to open"
+            ),
+        }
    };

    info!("connected!");
@@ -92,12 +97,9 @@ pub async fn handle_walreceiver_connection(

    // The connection object performs the actual communication with the database,
    // so spawn it off to run on its own.
-    let mut connection_cancellation = cancellation.clone();
+    let cancellation_token = ctx.cancellation_token().clone();
    task_mgr::spawn(
        WALRECEIVER_RUNTIME.handle(),
-        TaskKind::WalReceiverConnection,
-        Some(timeline.tenant_id),
-        Some(timeline.timeline_id),
        "walreceiver connection",
        false,
        async move {
@@ -105,17 +107,14 @@ pub async fn handle_walreceiver_connection(
                connection_result = connection => match connection_result{
                    Ok(()) => info!("Walreceiver db connection closed"),
                    Err(connection_error) => {
-                        if connection_error.is_closed() {
-                            info!("Connection closed regularly: {connection_error}")
-                        } else {
-                            warn!("Connection aborted: {connection_error}")
+                        if let Err(e) = ignore_expected_errors(connection_error) {
+                            warn!("Connection aborted: {e:#}")
                        }
                    }
                },

-                _ = connection_cancellation.changed() => info!("Connection cancelled"),
+                _ = cancellation_token.cancelled() => info!("Connection cancelled"),
            }
-            Ok(())
        },
    );

@@ -175,12 +174,13 @@ pub async fn handle_walreceiver_connection(

    let mut waldecoder = WalStreamDecoder::new(startpoint, timeline.pg_version);

-    let mut walingest =
-        with_ondemand_download(|| WalIngest::new(timeline.as_ref(), startpoint)).await?;
+    let mut walingest = WalIngest::new(timeline.as_ref(), startpoint, &ctx).await?;
+
+    let cancellation = ctx.cancellation_token().clone();

    while let Some(replication_message) = {
        select! {
-            _ = cancellation.changed() => {
+            _ = cancellation.cancelled() => {
                info!("walreceiver interrupted");
                None
            }
@@ -190,14 +190,9 @@ pub async fn handle_walreceiver_connection(
        let replication_message = match replication_message {
            Ok(message) => message,
            Err(replication_error) => {
-                if replication_error.is_closed() {
-                    info!("Replication stream got closed");
-                    return Ok(());
-                } else {
-                    return Err(
-                        anyhow::Error::new(replication_error).context("replication stream error")
-                    );
-                }
+                let expected_error = ignore_expected_errors(replication_error)?;
+                info!("Replication stream finished: {expected_error}");
+                return Ok(());
            }
        };

@@ -251,16 +246,10 @@ pub async fn handle_walreceiver_connection(
                        // at risk of hitting a deadlock.
                        ensure!(lsn.is_aligned());

-                        with_ondemand_download(|| {
-                            walingest.ingest_record(
-                                recdata.clone(),
-                                lsn,
-                                &mut modification,
-                                &mut decoded,
-                            )
-                        })
-                        .await
-                        .with_context(|| format!("could not ingest record at {lsn}"))?;
+                        walingest
+                            .ingest_record(recdata, lsn, &mut modification, &mut decoded, &ctx)
+                            .await
+                            .with_context(|| format!("could not ingest record at {lsn}"))?;

                        fail_point!("walreceiver-after-ingest");

@@ -335,10 +324,11 @@ pub async fn handle_walreceiver_connection(

            // Send the replication feedback message.
            // Regular standby_status_update fields are put into this message.
+            let (timeline_logical_size, _) = timeline
+                .get_current_logical_size(&ctx)
+                .context("Status update creation failed to get current logical size")?;
            let status_update = ReplicationFeedback {
-                current_timeline_size: timeline
-                    .get_current_logical_size()
-                    .context("Status update creation failed to get current logical size")?,
+                current_timeline_size: timeline_logical_size,
                ps_writelsn: write_lsn,
                ps_flushlsn: flush_lsn,
                ps_applylsn: apply_lsn,
@@ -408,3 +398,32 @@ async fn identify_system(client: &mut Client) -> anyhow::Result<IdentifySystem>
        Err(IdentifyError.into())
    }
 }
+
+/// We don't want to report connectivity problems as real errors towards connection manager because
+/// 1. they happen frequently enough to make server logs hard to read and
+/// 2. the connection manager can retry other safekeeper.
+///
+/// If this function returns `Ok(pg_error)`, it's such an error.
+/// The caller should log it at info level and then report to connection manager that we're done handling this connection.
+/// Connection manager will then handle reconnections.
+///
+/// If this function returns an `Err()`, the caller can bubble it up using `?`.
+/// The connection manager will log the error at ERROR level.
+fn ignore_expected_errors(pg_error: postgres::Error) -> anyhow::Result<postgres::Error> {
+    if pg_error.is_closed()
+        || pg_error
+            .source()
+            .and_then(|source| source.downcast_ref::<std::io::Error>())
+            .map(is_expected_io_error)
+            .unwrap_or(false)
+    {
+        return Ok(pg_error);
+    } else if let Some(db_error) = pg_error.as_db_error() {
+        if db_error.code() == &SqlState::CONNECTION_FAILURE
+            && db_error.message().contains("end streaming")
+        {
+            return Ok(pg_error);
+        }
+    }
+    Err(pg_error).context("connection error")
+}
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -111,6 +111,7 @@ pageserver_connect()
 				PQfinish(pageserver_conn);
 				pageserver_conn = NULL;
 				FreeWaitEventSet(pageserver_conn_wes);
+				pageserver_conn_wes = NULL;

 				neon_log(ERROR, "could not complete handshake with pageserver: %s",
 						 msg);
@@ -179,7 +180,10 @@ pageserver_disconnect(void)
 		prefetch_on_ps_disconnect();
 	}
 	if (pageserver_conn_wes != NULL)
+	{
 		FreeWaitEventSet(pageserver_conn_wes);
+		pageserver_conn_wes = NULL;
+	}
 }

 static void
@@ -206,7 +210,7 @@ pageserver_send(NeonRequest * request)
 	 */
 	if (PQputCopyData(pageserver_conn, req_buff.data, req_buff.len) <= 0)
 	{
-		char	   *msg = PQerrorMessage(pageserver_conn);
+		char	   *msg = pchomp(PQerrorMessage(pageserver_conn));

 		pageserver_disconnect();
 		neon_log(ERROR, "failed to send page request: %s", msg);
@@ -239,29 +243,33 @@ pageserver_receive(void)
 	PG_TRY();
 	{
 		/* read response */
-		resp_buff.len = call_PQgetCopyData(&resp_buff.data);
-		resp_buff.cursor = 0;
+		int			rc;

-		if (resp_buff.len < 0)
+		rc = call_PQgetCopyData(&resp_buff.data);
+		if (rc >= 0)
 		{
-			if (resp_buff.len == -1)
+			resp_buff.len = rc;
+			resp_buff.cursor = 0;
+			resp = nm_unpack_response(&resp_buff);
+			PQfreemem(resp_buff.data);
+
+			if (message_level_is_interesting(PageStoreTrace))
 			{
-				pageserver_disconnect();
-				return NULL;
+				char	   *msg = nm_to_string((NeonMessage *) resp);
+
+				neon_log(PageStoreTrace, "got response: %s", msg);
+				pfree(msg);
 			}
-			else if (resp_buff.len == -2)
-				neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
 		}
-		resp = nm_unpack_response(&resp_buff);
-		PQfreemem(resp_buff.data);
-
-		if (message_level_is_interesting(PageStoreTrace))
+		else if (rc == -1)
 		{
-			char	   *msg = nm_to_string((NeonMessage *) resp);
-
-			neon_log(PageStoreTrace, "got response: %s", msg);
-			pfree(msg);
+			pageserver_disconnect();
+			resp = NULL;
 		}
+		else if (rc == -2)
+			neon_log(ERROR, "could not read COPY data: %s", PQerrorMessage(pageserver_conn));
+		else
+			neon_log(ERROR, "unexpected PQgetCopyData return value: %d", rc);
 	}
 	PG_CATCH();
 	{
@@ -420,7 +428,7 @@ pg_init_libpagestore(void)
 							   NULL, NULL, NULL);

    DefineCustomStringVariable("neon.safekeeper_token_env",
-                               "the environment variable containing JWT token for authentication with Safekeepers, the convention is to either unset or set to $ZENITH_AUTH_TOKEN",
+                               "the environment variable containing JWT token for authentication with Safekeepers, the convention is to either unset or set to $NEON_AUTH_TOKEN",
                               NULL,
                               &safekeeper_token_env,
                               NULL,
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -52,6 +52,7 @@
 #include "access/xlogdefs.h"
 #include "catalog/pg_class.h"
 #include "common/hashfn.h"
+#include "executor/instrument.h"
 #include "pagestore_client.h"
 #include "postmaster/interrupt.h"
 #include "postmaster/autovacuum.h"
@@ -250,11 +251,6 @@ PrefetchState *MyPState;
 	) \
 )

-int			n_prefetch_hits = 0;
-int			n_prefetch_misses = 0;
-int			n_prefetch_missed_caches = 0;
-int			n_prefetch_dupes = 0;
-
 XLogRecPtr	prefetch_lsn = 0;

 static bool compact_prefetch_buffers(void);
@@ -770,7 +766,7 @@ prefetch_register_buffer(BufferTag tag, bool *force_latest, XLogRecPtr *force_ls
 		else
 		{
 			/* The buffered request is good enough, return that index */
-			n_prefetch_dupes++;
+			pgBufferUsage.prefetch.duplicates++;
 			return ring_index;
 		}
 	}
@@ -1845,7 +1841,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 		if (slot->effective_request_lsn >= request_lsn)
 		{
 			ring_index = slot->my_ring_index;
-			n_prefetch_hits += 1;
+			pgBufferUsage.prefetch.hits += 1;
 		}
 		else /* the current prefetch LSN is not large enough, so drop the prefetch */
 		{
@@ -1860,7 +1856,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 			}
 			/* drop caches */
 			prefetch_set_unused(slot->my_ring_index);
-			n_prefetch_missed_caches += 1;
+			pgBufferUsage.prefetch.expired += 1;
 			/* make it look like a prefetch cache miss */
 			entry = NULL;
 		}
@@ -1870,7 +1866,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
 	{
 		if (entry == NULL)
 		{
-			n_prefetch_misses += 1;
+			pgBufferUsage.prefetch.misses += 1;

 			ring_index = prefetch_register_buffer(buftag, &request_latest,
 												  &request_lsn);
--- a/poetry.lock
+++ b/poetry.lock
@@ -1418,7 +1418,7 @@ pbr = "*"

 [[package]]
 name = "setuptools"
-version = "65.5.0"
+version = "65.5.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 category = "main"
 optional = false
@@ -1426,7 +1426,7 @@ python-versions = ">=3.7"

 [package.extras]
 docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mock", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8 (<5)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
 testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]

 [[package]]
@@ -2283,8 +2283,8 @@ sarif-om = [
    {file = "sarif_om-1.0.4.tar.gz", hash = "sha256:cd5f416b3083e00d402a92e449a7ff67af46f11241073eea0461802a3b5aef98"},
 ]
 setuptools = [
-    {file = "setuptools-65.5.0-py3-none-any.whl", hash = "sha256:f62ea9da9ed6289bfe868cd6845968a2c854d1427f8548d52cae02a42b4f0356"},
-    {file = "setuptools-65.5.0.tar.gz", hash = "sha256:512e5536220e38146176efb833d4a62aa726b7bbff82cfbc8ba9eaa3996e0b17"},
+    {file = "setuptools-65.5.1-py3-none-any.whl", hash = "sha256:d0b9a8433464d5800cbe05094acf5c6d52a91bfac9b52bcfc4d41382be5d5d31"},
+    {file = "setuptools-65.5.1.tar.gz", hash = "sha256:e197a19aa8ec9722928f2206f8de752def0e4c9fc6953527360d1c36d94ddb2f"},
 ]
 six = [
    {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -2,6 +2,7 @@
 name = "proxy"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 anyhow = "1.0"
@@ -16,12 +17,14 @@ hashbrown = "0.12"
 hex = "0.4.3"
 hmac = "0.12.1"
 hyper = "0.14"
+hyper-tungstenite = "0.8.1"
 itertools = "0.10.3"
 md5 = "0.7.0"
 once_cell = "1.13.0"
 parking_lot = "0.12"
 pin-project-lite = "0.2.7"
 rand = "0.8.3"
+regex = "1.4.5"
 reqwest = { version = "0.11", default-features = false, features = [ "json", "rustls-tls" ] }
 routerify = "3"
 rustls = "0.20.0"
@@ -35,10 +38,12 @@ thiserror = "1.0.30"
 tokio = { version = "1.17", features = ["macros"] }
 tokio-postgres = { git = "https://github.com/neondatabase/rust-postgres.git", rev="43e6db254a97fdecbce33d8bc0890accfd74495e" }
 tokio-rustls = "0.23.0"
+tls-listener = { version = "0.5.1", features = ["rustls", "hyper-h1"] }
 tracing = "0.1.36"
 tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 url = "2.2.2"
 uuid = { version = "1.2", features = ["v4", "serde"] }
+webpki-roots = "0.22.5"
 x509-parser = "0.14"

 metrics = { path = "../libs/metrics" }
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -8,7 +8,9 @@ pub use console::{GetAuthInfoError, WakeComputeError};

 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
-    compute, http, mgmt, stream, url,
+    compute,
+    console::messages::MetricsAuxInfo,
+    http, mgmt, stream, url,
    waiters::{self, Waiter, Waiters},
 };
 use once_cell::sync::Lazy;
@@ -126,25 +128,13 @@ pub struct AuthSuccess<T> {
    pub value: T,
 }

-impl<T> AuthSuccess<T> {
-    /// Very similar to [`std::option::Option::map`].
-    /// Maps [`AuthSuccess<T>`] to [`AuthSuccess<R>`] by applying
-    /// a function to a contained value.
-    pub fn map<R>(self, f: impl FnOnce(T) -> R) -> AuthSuccess<R> {
-        AuthSuccess {
-            reported_auth_ok: self.reported_auth_ok,
-            value: f(self.value),
-        }
-    }
-}
-
 /// Info for establishing a connection to a compute node.
 /// This is what we get after auth succeeded, but not before!
 pub struct NodeInfo {
-    /// Project from [`auth::ClientCredentials`].
-    pub project: String,
    /// Compute node connection params.
    pub config: compute::ConnCfg,
+    /// Labels for proxy's metrics.
+    pub aux: MetricsAuxInfo,
 }

 impl BackendType<'_, ClientCredentials<'_>> {
@@ -159,7 +149,7 @@ impl BackendType<'_, ClientCredentials<'_>> {
        // If there's no project so far, that entails that client doesn't
        // support SNI or other means of passing the project name.
        // We now expect to see a very specific payload in the place of password.
-        let fetch_magic_payload = async {
+        let fetch_magic_payload = |client| async {
            warn!("project name not specified, resorting to the password hack auth flow");
            let payload = AuthFlow::new(client)
                .begin(auth::PasswordHack)
@@ -171,38 +161,61 @@ impl BackendType<'_, ClientCredentials<'_>> {
            auth::Result::Ok(payload)
        };

+        // If we want to use cleartext password flow, we can read the password
+        // from the client and pretend that it's a magic payload (PasswordHack hack).
+        let fetch_plaintext_password = |client| async {
+            info!("using cleartext password flow");
+            let payload = AuthFlow::new(client)
+                .begin(auth::CleartextPassword)
+                .await?
+                .authenticate()
+                .await?;
+
+            auth::Result::Ok(auth::password_hack::PasswordHackPayload {
+                project: String::new(),
+                password: payload,
+            })
+        };
+
        // TODO: find a proper way to merge those very similar blocks.
-        let (mut config, payload) = match self {
+        let (mut node, payload) = match self {
            Console(endpoint, creds) if creds.project.is_none() => {
-                let payload = fetch_magic_payload.await?;
+                let payload = fetch_magic_payload(client).await?;

                let mut creds = creds.as_ref();
                creds.project = Some(payload.project.as_str().into());
-                let config = console::Api::new(endpoint, extra, &creds)
+                let node = console::Api::new(endpoint, extra, &creds)
                    .wake_compute()
                    .await?;

-                (config, payload)
+                (node, payload)
+            }
+            Console(endpoint, creds) if creds.use_cleartext_password_flow => {
+                // This is a hack to allow cleartext password in secure connections (wss).
+                let payload = fetch_plaintext_password(client).await?;
+                let creds = creds.as_ref();
+                let node = console::Api::new(endpoint, extra, &creds)
+                    .wake_compute()
+                    .await?;
+
+                (node, payload)
            }
            Postgres(endpoint, creds) if creds.project.is_none() => {
-                let payload = fetch_magic_payload.await?;
+                let payload = fetch_magic_payload(client).await?;

                let mut creds = creds.as_ref();
                creds.project = Some(payload.project.as_str().into());
-                let config = postgres::Api::new(endpoint, &creds).wake_compute().await?;
+                let node = postgres::Api::new(endpoint, &creds).wake_compute().await?;

-                (config, payload)
+                (node, payload)
            }
            _ => return Ok(None),
        };

-        config.password(payload.password);
+        node.config.password(payload.password);
        Ok(Some(AuthSuccess {
            reported_auth_ok: false,
-            value: NodeInfo {
-                project: payload.project,
-                config,
-            },
+            value: node,
        }))
    }

@@ -233,10 +246,6 @@ impl BackendType<'_, ClientCredentials<'_>> {
                console::Api::new(&endpoint, extra, &creds)
                    .handle_user(client)
                    .await?
-                    .map(|config| NodeInfo {
-                        project: creds.project.unwrap().into_owned(),
-                        config,
-                    })
            }
            Postgres(endpoint, creds) => {
                info!("performing mock authentication using a local postgres instance");
@@ -245,10 +254,6 @@ impl BackendType<'_, ClientCredentials<'_>> {
                postgres::Api::new(&endpoint, &creds)
                    .handle_user(client)
                    .await?
-                    .map(|config| NodeInfo {
-                        project: creds.project.unwrap().into_owned(),
-                        config,
-                    })
            }
            // NOTE: this auth backend doesn't use client credentials.
            Link(url) => {
--- a/proxy/src/auth/backend/console.rs
+++ b/proxy/src/auth/backend/console.rs
@@ -1,16 +1,16 @@
 //! Cloud API V2.

-use super::{AuthSuccess, ConsoleReqExtra};
+use super::{AuthSuccess, ConsoleReqExtra, NodeInfo};
 use crate::{
    auth::{self, AuthFlow, ClientCredentials},
    compute,
+    console::messages::{ConsoleError, GetRoleSecret, WakeCompute},
    error::{io_error, UserFacingError},
    http, sasl, scram,
    stream::PqStream,
 };
 use futures::TryFutureExt;
 use reqwest::StatusCode as HttpStatusCode;
-use serde::Deserialize;
 use std::future::Future;
 use thiserror::Error;
 use tokio::io::{AsyncRead, AsyncWrite};
@@ -136,24 +136,6 @@ impl UserFacingError for WakeComputeError {
    }
 }

-/// Console's response which holds client's auth secret.
-#[derive(Deserialize, Debug)]
-struct GetRoleSecret {
-    role_secret: Box<str>,
-}
-
-/// Console's response which holds compute node's `host:port` pair.
-#[derive(Deserialize, Debug)]
-struct WakeCompute {
-    address: Box<str>,
-}
-
-/// Console's error response with human-readable description.
-#[derive(Deserialize, Debug)]
-struct ConsoleError {
-    error: Box<str>,
-}
-
 /// Auth secret which is managed by the cloud.
 pub enum AuthInfo {
    /// Md5 hash of user's password.
@@ -194,7 +176,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        &'a self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
+    ) -> auth::Result<AuthSuccess<NodeInfo>> {
        handle_user(client, self, Self::get_auth_info, Self::wake_compute).await
    }
 }
@@ -238,7 +220,7 @@ impl Api<'_> {
    }

    /// Wake up the compute node and return the corresponding connection info.
-    pub async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
+    pub async fn wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
        let request_id = uuid::Uuid::new_v4().to_string();
        async {
            let request = self
@@ -269,7 +251,10 @@ impl Api<'_> {
                .dbname(self.creds.dbname)
                .user(self.creds.user);

-            Ok(config)
+            Ok(NodeInfo {
+                config,
+                aux: body.aux,
+            })
        }
        .map_err(crate::error::log_error)
        .instrument(info_span!("wake_compute", id = request_id))
@@ -284,11 +269,11 @@ pub(super) async fn handle_user<'a, Endpoint, GetAuthInfo, WakeCompute>(
    endpoint: &'a Endpoint,
    get_auth_info: impl FnOnce(&'a Endpoint) -> GetAuthInfo,
    wake_compute: impl FnOnce(&'a Endpoint) -> WakeCompute,
-) -> auth::Result<AuthSuccess<compute::ConnCfg>>
+) -> auth::Result<AuthSuccess<NodeInfo>>
 where
    Endpoint: AsRef<ClientCredentials<'a>>,
    GetAuthInfo: Future<Output = Result<Option<AuthInfo>, GetAuthInfoError>>,
-    WakeCompute: Future<Output = Result<compute::ConnCfg, WakeComputeError>>,
+    WakeCompute: Future<Output = Result<NodeInfo, WakeComputeError>>,
 {
    let creds = endpoint.as_ref();

@@ -325,19 +310,20 @@ where
        }
    };

-    let mut config = wake_compute(endpoint).await?;
+    let mut node = wake_compute(endpoint).await?;
    if let Some(keys) = scram_keys {
-        config.auth_keys(tokio_postgres::config::AuthKeys::ScramSha256(keys));
+        use tokio_postgres::config::AuthKeys;
+        node.config.auth_keys(AuthKeys::ScramSha256(keys));
    }

    Ok(AuthSuccess {
        reported_auth_ok: false,
-        value: config,
+        value: node,
    })
 }

 /// Parse http response body, taking status code into account.
-async fn parse_body<T: for<'a> Deserialize<'a>>(
+async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
    response: reqwest::Response,
 ) -> Result<T, ApiError> {
    let status = response.status();
--- a/proxy/src/auth/backend/link.rs
+++ b/proxy/src/auth/backend/link.rs
@@ -86,8 +86,8 @@ pub async fn handle_user(
    Ok(AuthSuccess {
        reported_auth_ok: true,
        value: NodeInfo {
-            project: db_info.project,
            config,
+            aux: db_info.aux,
        },
    })
 }
--- a/proxy/src/auth/backend/postgres.rs
+++ b/proxy/src/auth/backend/postgres.rs
@@ -2,7 +2,7 @@

 use super::{
    console::{self, AuthInfo, GetAuthInfoError, WakeComputeError},
-    AuthSuccess,
+    AuthSuccess, NodeInfo,
 };
 use crate::{
    auth::{self, ClientCredentials},
@@ -57,7 +57,7 @@ impl<'a> Api<'a> {
    pub(super) async fn handle_user(
        &'a self,
        client: &mut PqStream<impl AsyncRead + AsyncWrite + Unpin + Send>,
-    ) -> auth::Result<AuthSuccess<compute::ConnCfg>> {
+    ) -> auth::Result<AuthSuccess<NodeInfo>> {
        // We reuse user handling logic from a production module.
        console::handle_user(client, self, Self::get_auth_info, Self::wake_compute).await
    }
@@ -103,7 +103,7 @@ impl Api<'_> {
    }

    /// We don't need to wake anything locally, so we just return the connection info.
-    pub async fn wake_compute(&self) -> Result<compute::ConnCfg, WakeComputeError> {
+    pub async fn wake_compute(&self) -> Result<NodeInfo, WakeComputeError> {
        let mut config = compute::ConnCfg::new();
        config
            .host(self.endpoint.host_str().unwrap_or("localhost"))
@@ -111,7 +111,10 @@ impl Api<'_> {
            .dbname(self.creds.dbname)
            .user(self.creds.user);

-        Ok(config)
+        Ok(NodeInfo {
+            config,
+            aux: Default::default(),
+        })
    }
 }

--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -34,6 +34,9 @@ pub struct ClientCredentials<'a> {
    pub user: &'a str,
    pub dbname: &'a str,
    pub project: Option<Cow<'a, str>>,
+    /// If `True`, we'll use the old cleartext password flow. This is used for
+    /// websocket connections, which want to minimize the number of round trips.
+    pub use_cleartext_password_flow: bool,
 }

 impl ClientCredentials<'_> {
@@ -50,6 +53,7 @@ impl<'a> ClientCredentials<'a> {
            user: self.user,
            dbname: self.dbname,
            project: self.project().map(Cow::Borrowed),
+            use_cleartext_password_flow: self.use_cleartext_password_flow,
        }
    }
 }
@@ -59,6 +63,7 @@ impl<'a> ClientCredentials<'a> {
        params: &'a StartupMessageParams,
        sni: Option<&str>,
        common_name: Option<&str>,
+        use_cleartext_password_flow: bool,
    ) -> Result<Self, ClientCredsParseError> {
        use ClientCredsParseError::*;

@@ -108,6 +113,7 @@ impl<'a> ClientCredentials<'a> {
            user = user,
            dbname = dbname,
            project = project.as_deref(),
+            use_cleartext_password_flow = use_cleartext_password_flow,
            "credentials"
        );

@@ -115,6 +121,7 @@ impl<'a> ClientCredentials<'a> {
            user,
            dbname,
            project,
+            use_cleartext_password_flow,
        })
    }
 }
@@ -141,7 +148,7 @@ mod tests {
        let options = StartupMessageParams::new([("user", "john_doe")]);

        // TODO: check that `creds.dbname` is None.
-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let creds = ClientCredentials::parse(&options, None, None, false)?;
        assert_eq!(creds.user, "john_doe");

        Ok(())
@@ -151,7 +158,7 @@ mod tests {
    fn parse_missing_project() -> anyhow::Result<()> {
        let options = StartupMessageParams::new([("user", "john_doe"), ("database", "world")]);

-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let creds = ClientCredentials::parse(&options, None, None, false)?;
        assert_eq!(creds.user, "john_doe");
        assert_eq!(creds.dbname, "world");
        assert_eq!(creds.project, None);
@@ -166,7 +173,7 @@ mod tests {
        let sni = Some("foo.localhost");
        let common_name = Some("localhost");

-        let creds = ClientCredentials::parse(&options, sni, common_name)?;
+        let creds = ClientCredentials::parse(&options, sni, common_name, false)?;
        assert_eq!(creds.user, "john_doe");
        assert_eq!(creds.dbname, "world");
        assert_eq!(creds.project.as_deref(), Some("foo"));
@@ -182,7 +189,7 @@ mod tests {
            ("options", "-ckey=1 project=bar -c geqo=off"),
        ]);

-        let creds = ClientCredentials::parse(&options, None, None)?;
+        let creds = ClientCredentials::parse(&options, None, None, false)?;
        assert_eq!(creds.user, "john_doe");
        assert_eq!(creds.dbname, "world");
        assert_eq!(creds.project.as_deref(), Some("bar"));
@@ -201,7 +208,7 @@ mod tests {
        let sni = Some("baz.localhost");
        let common_name = Some("localhost");

-        let creds = ClientCredentials::parse(&options, sni, common_name)?;
+        let creds = ClientCredentials::parse(&options, sni, common_name, false)?;
        assert_eq!(creds.user, "john_doe");
        assert_eq!(creds.dbname, "world");
        assert_eq!(creds.project.as_deref(), Some("baz"));
@@ -220,7 +227,8 @@ mod tests {
        let sni = Some("second.localhost");
        let common_name = Some("localhost");

-        let err = ClientCredentials::parse(&options, sni, common_name).expect_err("should fail");
+        let err =
+            ClientCredentials::parse(&options, sni, common_name, false).expect_err("should fail");
        match err {
            InconsistentProjectNames { domain, option } => {
                assert_eq!(option, "first");
@@ -237,7 +245,8 @@ mod tests {
        let sni = Some("project.localhost");
        let common_name = Some("example.com");

-        let err = ClientCredentials::parse(&options, sni, common_name).expect_err("should fail");
+        let err =
+            ClientCredentials::parse(&options, sni, common_name, false).expect_err("should fail");
        match err {
            InconsistentSni { sni, cn } => {
                assert_eq!(sni, "project.localhost");
--- a/proxy/src/auth/flow.rs
+++ b/proxy/src/auth/flow.rs
@@ -37,6 +37,17 @@ impl AuthMethod for PasswordHack {
    }
 }

+/// Use clear-text password auth called `password` in docs
+/// <https://www.postgresql.org/docs/current/auth-password.html>
+pub struct CleartextPassword;
+
+impl AuthMethod for CleartextPassword {
+    #[inline(always)]
+    fn first_message(&self) -> BeMessage<'_> {
+        Be::AuthenticationCleartextPassword
+    }
+}
+
 /// This wrapper for [`PqStream`] performs client authentication.
 #[must_use]
 pub struct AuthFlow<'a, Stream, State> {
@@ -86,6 +97,18 @@ impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, PasswordHack> {
    }
 }

+impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, CleartextPassword> {
+    /// Perform user authentication. Raise an error in case authentication failed.
+    pub async fn authenticate(self) -> super::Result<Vec<u8>> {
+        let msg = self.stream.read_password_message().await?;
+        let password = msg
+            .strip_suffix(&[0])
+            .ok_or(AuthErrorImpl::MalformedPassword("missing terminator"))?;
+
+        Ok(password.to_vec())
+    }
+}
+
 /// Stream wrapper for handling [SCRAM](crate::scram) auth.
 impl<S: AsyncRead + AsyncWrite + Unpin> AuthFlow<'_, S, Scram<'_>> {
    /// Perform user authentication. Raise an error in case authentication failed.
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -43,7 +43,7 @@ pub type ScramKeys = tokio_postgres::config::ScramKeys<32>;
 /// Eventually, `tokio_postgres` will be replaced with something better.
 /// Newtype allows us to implement methods on top of it.
 #[repr(transparent)]
-pub struct ConnCfg(pub tokio_postgres::Config);
+pub struct ConnCfg(Box<tokio_postgres::Config>);

 impl ConnCfg {
    /// Construct a new connection config.
--- a/proxy/src/console.rs
+++ b/proxy/src/console.rs
@@ -0,0 +1,5 @@
+///! Various stuff for dealing with the Neon Console.
+///! Later we might move some API wrappers here.
+
+/// Payloads used in the console's APIs.
+pub mod messages;
--- a/proxy/src/console/messages.rs
+++ b/proxy/src/console/messages.rs
@@ -0,0 +1,190 @@
+use serde::Deserialize;
+use std::fmt;
+
+/// Generic error response with human-readable description.
+/// Note that we can't always present it to user as is.
+#[derive(Debug, Deserialize)]
+pub struct ConsoleError {
+    pub error: Box<str>,
+}
+
+/// Response which holds client's auth secret, e.g. [`crate::scram::ServerSecret`].
+/// Returned by the `/proxy_get_role_secret` API method.
+#[derive(Deserialize)]
+pub struct GetRoleSecret {
+    pub role_secret: Box<str>,
+}
+
+// Manually implement debug to omit sensitive info.
+impl fmt::Debug for GetRoleSecret {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("GetRoleSecret").finish_non_exhaustive()
+    }
+}
+
+/// Response which holds compute node's `host:port` pair.
+/// Returned by the `/proxy_wake_compute` API method.
+#[derive(Debug, Deserialize)]
+pub struct WakeCompute {
+    pub address: Box<str>,
+    pub aux: MetricsAuxInfo,
+}
+
+/// Async response which concludes the link auth flow.
+/// Also known as `kickResponse` in the console.
+#[derive(Debug, Deserialize)]
+pub struct KickSession<'a> {
+    /// Session ID is assigned by the proxy.
+    pub session_id: &'a str,
+
+    /// Compute node connection params.
+    #[serde(deserialize_with = "KickSession::parse_db_info")]
+    pub result: DatabaseInfo,
+}
+
+impl KickSession<'_> {
+    fn parse_db_info<'de, D>(des: D) -> Result<DatabaseInfo, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        #[derive(Deserialize)]
+        enum Wrapper {
+            // Currently, console only reports `Success`.
+            // `Failure(String)` used to be here... RIP.
+            Success(DatabaseInfo),
+        }
+
+        Wrapper::deserialize(des).map(|x| match x {
+            Wrapper::Success(info) => info,
+        })
+    }
+}
+
+/// Compute node connection params.
+#[derive(Deserialize)]
+pub struct DatabaseInfo {
+    pub host: String,
+    pub port: u16,
+    pub dbname: String,
+    pub user: String,
+    /// Console always provides a password, but it might
+    /// be inconvenient for debug with local PG instance.
+    pub password: Option<String>,
+    pub aux: MetricsAuxInfo,
+}
+
+// Manually implement debug to omit sensitive info.
+impl fmt::Debug for DatabaseInfo {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("DatabaseInfo")
+            .field("host", &self.host)
+            .field("port", &self.port)
+            .field("dbname", &self.dbname)
+            .field("user", &self.user)
+            .finish_non_exhaustive()
+    }
+}
+
+/// Various labels for prometheus metrics.
+/// Also known as `ProxyMetricsAuxInfo` in the console.
+#[derive(Debug, Deserialize, Default)]
+pub struct MetricsAuxInfo {
+    pub endpoint_id: Box<str>,
+    pub project_id: Box<str>,
+    pub branch_id: Box<str>,
+}
+
+impl MetricsAuxInfo {
+    /// Definitions of labels for traffic metric.
+    pub const TRAFFIC_LABELS: &'static [&'static str] = &[
+        // Received (rx) / sent (tx).
+        "direction",
+        // ID of a project.
+        "project_id",
+        // ID of an endpoint within a project.
+        "endpoint_id",
+        // ID of a branch within a project (snapshot).
+        "branch_id",
+    ];
+
+    /// Values of labels for traffic metric.
+    // TODO: add more type safety (validate arity & positions).
+    pub fn traffic_labels(&self, direction: &'static str) -> [&str; 4] {
+        [
+            direction,
+            &self.project_id,
+            &self.endpoint_id,
+            &self.branch_id,
+        ]
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    fn dummy_aux() -> serde_json::Value {
+        json!({
+            "endpoint_id": "endpoint",
+            "project_id": "project",
+            "branch_id": "branch",
+        })
+    }
+
+    #[test]
+    fn parse_kick_session() -> anyhow::Result<()> {
+        // This is what the console's kickResponse looks like.
+        let json = json!({
+            "session_id": "deadbeef",
+            "result": {
+                "Success": {
+                    "host": "localhost",
+                    "port": 5432,
+                    "dbname": "postgres",
+                    "user": "john_doe",
+                    "password": "password",
+                    "aux": dummy_aux(),
+                }
+            }
+        });
+        let _: KickSession = serde_json::from_str(&json.to_string())?;
+
+        Ok(())
+    }
+
+    #[test]
+    fn parse_db_info() -> anyhow::Result<()> {
+        // with password
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "password": "password",
+            "aux": dummy_aux(),
+        }))?;
+
+        // without password
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "aux": dummy_aux(),
+        }))?;
+
+        // new field (forward compatibility)
+        let _: DatabaseInfo = serde_json::from_value(json!({
+            "host": "localhost",
+            "port": 5432,
+            "dbname": "postgres",
+            "user": "john_doe",
+            "project": "hello_world",
+            "N.E.W": "forward compatibility check",
+            "aux": dummy_aux(),
+        }))?;
+
+        Ok(())
+    }
+}
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -1,4 +1,5 @@
 pub mod server;
+pub mod websocket;

 use crate::url::ApiUrl;

--- a/proxy/src/http/websocket.rs
+++ b/proxy/src/http/websocket.rs
@@ -0,0 +1,263 @@
+use bytes::{Buf, Bytes};
+use futures::{Sink, Stream, StreamExt};
+use hyper::server::accept::{self};
+use hyper::server::conn::AddrIncoming;
+use hyper::upgrade::Upgraded;
+use hyper::{Body, Request, Response, StatusCode};
+use hyper_tungstenite::{tungstenite, WebSocketStream};
+use hyper_tungstenite::{tungstenite::Message, HyperWebsocket};
+use pin_project_lite::pin_project;
+use tokio::net::TcpListener;
+
+use std::convert::Infallible;
+use std::future::ready;
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use tls_listener::TlsListener;
+
+use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
+
+use tracing::{error, info, info_span, warn, Instrument};
+use utils::http::{error::ApiError, json::json_response};
+
+use crate::cancellation::CancelMap;
+use crate::config::ProxyConfig;
+use crate::proxy::handle_ws_client;
+
+pin_project! {
+    /// This is a wrapper around a WebSocketStream that implements AsyncRead and AsyncWrite.
+    pub struct WebSocketRW {
+        #[pin]
+        stream: WebSocketStream<Upgraded>,
+        chunk: Option<bytes::Bytes>,
+    }
+}
+
+// FIXME: explain why this is safe or try to remove `unsafe impl`.
+unsafe impl Sync for WebSocketRW {}
+
+impl WebSocketRW {
+    pub fn new(stream: WebSocketStream<Upgraded>) -> Self {
+        Self {
+            stream,
+            chunk: None,
+        }
+    }
+
+    fn has_chunk(&self) -> bool {
+        if let Some(ref chunk) = self.chunk {
+            chunk.remaining() > 0
+        } else {
+            false
+        }
+    }
+}
+
+fn ws_err_into(e: tungstenite::Error) -> io::Error {
+    io::Error::new(io::ErrorKind::Other, e.to_string())
+}
+
+impl AsyncWrite for WebSocketRW {
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, io::Error>> {
+        let mut this = self.project();
+        match this.stream.as_mut().poll_ready(cx) {
+            Poll::Ready(Ok(())) => {
+                if let Err(e) = this
+                    .stream
+                    .as_mut()
+                    .start_send(Message::Binary(buf.to_vec()))
+                {
+                    Poll::Ready(Err(ws_err_into(e)))
+                } else {
+                    Poll::Ready(Ok(buf.len()))
+                }
+            }
+            Poll::Ready(Err(e)) => Poll::Ready(Err(ws_err_into(e))),
+            Poll::Pending => {
+                cx.waker().wake_by_ref();
+                Poll::Pending
+            }
+        }
+    }
+
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
+        self.project().stream.poll_flush(cx).map_err(ws_err_into)
+    }
+
+    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
+        self.project().stream.poll_close(cx).map_err(ws_err_into)
+    }
+}
+
+impl AsyncRead for WebSocketRW {
+    fn poll_read(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        if buf.remaining() == 0 {
+            return Poll::Ready(Ok(()));
+        }
+
+        let inner_buf = match self.as_mut().poll_fill_buf(cx) {
+            Poll::Ready(Ok(buf)) => buf,
+            Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
+            Poll::Pending => return Poll::Pending,
+        };
+        let len = std::cmp::min(inner_buf.len(), buf.remaining());
+        buf.put_slice(&inner_buf[..len]);
+
+        self.consume(len);
+        Poll::Ready(Ok(()))
+    }
+}
+
+impl AsyncBufRead for WebSocketRW {
+    fn poll_fill_buf(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
+        loop {
+            if self.as_mut().has_chunk() {
+                let buf = self.project().chunk.as_ref().unwrap().chunk();
+                return Poll::Ready(Ok(buf));
+            } else {
+                match self.as_mut().project().stream.poll_next(cx) {
+                    Poll::Ready(Some(Ok(message))) => match message {
+                        Message::Text(_) => {}
+                        Message::Binary(chunk) => {
+                            *self.as_mut().project().chunk = Some(Bytes::from(chunk));
+                        }
+                        Message::Ping(_) => {
+                            // No need to send a reply: tungstenite takes care of this for you.
+                        }
+                        Message::Pong(_) => {}
+                        Message::Close(_) => {
+                            // No need to send a reply: tungstenite takes care of this for you.
+                            return Poll::Ready(Ok(&[]));
+                        }
+                        Message::Frame(_) => {
+                            unreachable!();
+                        }
+                    },
+                    Poll::Ready(Some(Err(err))) => return Poll::Ready(Err(ws_err_into(err))),
+                    Poll::Ready(None) => return Poll::Ready(Ok(&[])),
+                    Poll::Pending => return Poll::Pending,
+                }
+            }
+        }
+    }
+
+    fn consume(self: Pin<&mut Self>, amt: usize) {
+        if amt > 0 {
+            self.project()
+                .chunk
+                .as_mut()
+                .expect("No chunk present")
+                .advance(amt);
+        }
+    }
+}
+
+async fn serve_websocket(
+    websocket: HyperWebsocket,
+    config: &ProxyConfig,
+    cancel_map: &CancelMap,
+    session_id: uuid::Uuid,
+    hostname: Option<String>,
+) -> anyhow::Result<()> {
+    let websocket = websocket.await?;
+    handle_ws_client(
+        config,
+        cancel_map,
+        session_id,
+        WebSocketRW::new(websocket),
+        hostname,
+    )
+    .await?;
+    Ok(())
+}
+
+async fn ws_handler(
+    mut request: Request<Body>,
+    config: &'static ProxyConfig,
+    cancel_map: Arc<CancelMap>,
+    session_id: uuid::Uuid,
+) -> Result<Response<Body>, ApiError> {
+    let host = request
+        .headers()
+        .get("host")
+        .and_then(|h| h.to_str().ok())
+        .and_then(|h| h.split(':').next())
+        .map(|s| s.to_string());
+
+    // Check if the request is a websocket upgrade request.
+    if hyper_tungstenite::is_upgrade_request(&request) {
+        let (response, websocket) = hyper_tungstenite::upgrade(&mut request, None)
+            .map_err(|e| ApiError::BadRequest(e.into()))?;
+
+        tokio::spawn(async move {
+            if let Err(e) = serve_websocket(websocket, config, &cancel_map, session_id, host).await
+            {
+                error!("error in websocket connection: {:?}", e);
+            }
+        });
+
+        // Return the response so the spawned future can continue.
+        Ok(response)
+    } else {
+        json_response(StatusCode::OK, "Connect with a websocket client")
+    }
+}
+
+pub async fn task_main(
+    ws_listener: TcpListener,
+    config: &'static ProxyConfig,
+) -> anyhow::Result<()> {
+    scopeguard::defer! {
+        info!("websocket server has shut down");
+    }
+
+    let tls_config = config.tls_config.as_ref().map(|cfg| cfg.to_server_config());
+    let tls_acceptor: tokio_rustls::TlsAcceptor = match tls_config {
+        Some(config) => config.into(),
+        None => {
+            warn!("TLS config is missing, WebSocket Secure server will not be started");
+            return Ok(());
+        }
+    };
+
+    let addr_incoming = AddrIncoming::from_listener(ws_listener)?;
+
+    let tls_listener = TlsListener::new(tls_acceptor, addr_incoming).filter(|conn| {
+        if let Err(err) = conn {
+            error!("failed to accept TLS connection for websockets: {:?}", err);
+            ready(false)
+        } else {
+            ready(true)
+        }
+    });
+
+    let make_svc = hyper::service::make_service_fn(|_stream| async move {
+        Ok::<_, Infallible>(hyper::service::service_fn(
+            move |req: Request<Body>| async move {
+                let cancel_map = Arc::new(CancelMap::default());
+                let session_id = uuid::Uuid::new_v4();
+                ws_handler(req, config, cancel_map, session_id)
+                    .instrument(info_span!(
+                        "ws-client",
+                        session = format_args!("{session_id}")
+                    ))
+                    .await
+            },
+        ))
+    });
+
+    hyper::Server::builder(accept::from_stream(tls_listener))
+        .serve(make_svc)
+        .await?;
+
+    Ok(())
+}
--- a/proxy/src/main.rs
+++ b/proxy/src/main.rs
@@ -8,6 +8,7 @@ mod auth;
 mod cancellation;
 mod compute;
 mod config;
+mod console;
 mod error;
 mod http;
 mod mgmt;
@@ -109,12 +110,23 @@ async fn main() -> anyhow::Result<()> {
    info!("Starting proxy on {proxy_address}");
    let proxy_listener = TcpListener::bind(proxy_address).await?;

-    let tasks = [
+    let mut tasks = vec![
        tokio::spawn(http::server::task_main(http_listener)),
        tokio::spawn(proxy::task_main(config, proxy_listener)),
        tokio::task::spawn_blocking(move || mgmt::thread_main(mgmt_listener)),
-    ]
-    .map(flatten_err);
+    ];
+
+    if let Some(wss_address) = arg_matches.get_one::<String>("wss") {
+        let wss_address: SocketAddr = wss_address.parse()?;
+        info!("Starting wss on {}", wss_address);
+        let wss_listener = TcpListener::bind(wss_address).await?;
+        tasks.push(tokio::spawn(http::websocket::task_main(
+            wss_listener,
+            config,
+        )));
+    }
+
+    let tasks = tasks.into_iter().map(flatten_err);

    set_build_info_metric(GIT_VERSION);
    // This will block until all tasks have completed.
@@ -154,6 +166,11 @@ fn cli() -> clap::Command {
                .help("listen for incoming http connections (metrics, etc) on ip:port")
                .default_value("127.0.0.1:7001"),
        )
+        .arg(
+            Arg::new("wss")
+                .long("wss")
+                .help("listen for incoming wss connections on ip:port"),
+        )
        .arg(
            Arg::new("uri")
                .short('u')
--- a/proxy/src/mgmt.rs
+++ b/proxy/src/mgmt.rs
@@ -1,13 +1,18 @@
-use crate::auth;
+use crate::{
+    auth,
+    console::messages::{DatabaseInfo, KickSession},
+};
 use anyhow::Context;
 use pq_proto::{BeMessage, SINGLE_COL_ROWDESC};
-use serde::Deserialize;
 use std::{
    net::{TcpListener, TcpStream},
    thread,
 };
 use tracing::{error, info, info_span};
-use utils::postgres_backend::{self, AuthType, PostgresBackend};
+use utils::{
+    postgres_backend::{self, AuthType, PostgresBackend},
+    postgres_backend_async::QueryError,
+};

 /// Console management API listener thread.
 /// It spawns console response handlers needed for the link auth.
@@ -45,68 +50,18 @@ pub fn thread_main(listener: TcpListener) -> anyhow::Result<()> {
    }
 }

-fn handle_connection(socket: TcpStream) -> anyhow::Result<()> {
+fn handle_connection(socket: TcpStream) -> Result<(), QueryError> {
    let pgbackend = PostgresBackend::new(socket, AuthType::Trust, None, true)?;
    pgbackend.run(&mut MgmtHandler)
 }

-/// Known as `kickResponse` in the console.
-#[derive(Debug, Deserialize)]
-struct PsqlSessionResponse {
-    session_id: String,
-    result: PsqlSessionResult,
-}
-
-#[derive(Debug, Deserialize)]
-enum PsqlSessionResult {
-    Success(DatabaseInfo),
-    Failure(String),
-}
-
 /// A message received by `mgmt` when a compute node is ready.
 pub type ComputeReady = Result<DatabaseInfo, String>;

-impl PsqlSessionResult {
-    fn into_compute_ready(self) -> ComputeReady {
-        match self {
-            Self::Success(db_info) => Ok(db_info),
-            Self::Failure(message) => Err(message),
-        }
-    }
-}
-
-/// Compute node connection params provided by the console.
-/// This struct and its parents are mgmt API implementation
-/// detail and thus should remain in this module.
-// TODO: restore deserialization tests from git history.
-#[derive(Deserialize)]
-pub struct DatabaseInfo {
-    pub host: String,
-    pub port: u16,
-    pub dbname: String,
-    pub user: String,
-    /// Console always provides a password, but it might
-    /// be inconvenient for debug with local PG instance.
-    pub password: Option<String>,
-    pub project: String,
-}
-
-// Manually implement debug to omit sensitive info.
-impl std::fmt::Debug for DatabaseInfo {
-    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
-        fmt.debug_struct("DatabaseInfo")
-            .field("host", &self.host)
-            .field("port", &self.port)
-            .field("dbname", &self.dbname)
-            .field("user", &self.user)
-            .finish_non_exhaustive()
-    }
-}
-
 // TODO: replace with an http-based protocol.
 struct MgmtHandler;
 impl postgres_backend::Handler for MgmtHandler {
-    fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
+    fn process_query(&mut self, pgb: &mut PostgresBackend, query: &str) -> Result<(), QueryError> {
        try_process_query(pgb, query).map_err(|e| {
            error!("failed to process response: {e:?}");
            e
@@ -114,14 +69,14 @@ impl postgres_backend::Handler for MgmtHandler {
    }
 }

-fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<()> {
-    let resp: PsqlSessionResponse = serde_json::from_str(query)?;
+fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> Result<(), QueryError> {
+    let resp: KickSession = serde_json::from_str(query).context("Failed to parse query as json")?;

    let span = info_span!("event", session_id = resp.session_id);
    let _enter = span.enter();
    info!("got response: {:?}", resp.result);

-    match auth::backend::notify(&resp.session_id, resp.result.into_compute_ready()) {
+    match auth::backend::notify(resp.session_id, Ok(resp.result)) {
        Ok(()) => {
            pgb.write_message_noflush(&SINGLE_COL_ROWDESC)?
                .write_message_noflush(&BeMessage::DataRow(&[Some(b"ok")]))?
@@ -129,49 +84,9 @@ fn try_process_query(pgb: &mut PostgresBackend, query: &str) -> anyhow::Result<(
        }
        Err(e) => {
            error!("failed to deliver response to per-client task");
-            pgb.write_message(&BeMessage::ErrorResponse(&e.to_string()))?;
+            pgb.write_message(&BeMessage::ErrorResponse(&e.to_string(), None))?;
        }
    }

    Ok(())
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use serde_json::json;
-
-    #[test]
-    fn parse_db_info() -> anyhow::Result<()> {
-        // with password
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "password": "password",
-            "project": "hello_world",
-        }))?;
-
-        // without password
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "project": "hello_world",
-        }))?;
-
-        // new field (forward compatibility)
-        let _: DatabaseInfo = serde_json::from_value(json!({
-            "host": "localhost",
-            "port": 5432,
-            "dbname": "postgres",
-            "user": "john_doe",
-            "project": "hello_world",
-            "N.E.W": "forward compatibility check",
-        }))?;
-
-        Ok(())
-    }
-}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -11,7 +11,7 @@ use anyhow::{bail, Context};
 use futures::TryFutureExt;
 use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
-use pq_proto::{BeMessage as Be, *};
+use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
 use std::sync::Arc;
 use tokio::io::{AsyncRead, AsyncWrite};
 use tracing::{error, info, info_span, Instrument};
@@ -39,12 +39,7 @@ static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
    register_int_counter_vec!(
        "proxy_io_bytes_per_client",
        "Number of bytes sent/received between client and backend.",
-        &[
-            // Received (rx) / sent (tx).
-            "direction",
-            // Proxy can keep calling it `project` internally.
-            "endpoint_id"
-        ]
+        crate::console::messages::MetricsAuxInfo::TRAFFIC_LABELS,
    )
    .unwrap()
 });
@@ -87,6 +82,47 @@ pub async fn task_main(
    }
 }

+pub async fn handle_ws_client(
+    config: &ProxyConfig,
+    cancel_map: &CancelMap,
+    session_id: uuid::Uuid,
+    stream: impl AsyncRead + AsyncWrite + Unpin + Send,
+    hostname: Option<String>,
+) -> anyhow::Result<()> {
+    // The `closed` counter will increase when this future is destroyed.
+    NUM_CONNECTIONS_ACCEPTED_COUNTER.inc();
+    scopeguard::defer! {
+        NUM_CONNECTIONS_CLOSED_COUNTER.inc();
+    }
+
+    let tls = config.tls_config.as_ref();
+    let hostname = hostname.as_deref();
+
+    // TLS is None here, because the connection is already encrypted.
+    let do_handshake = handshake(stream, None, cancel_map).instrument(info_span!("handshake"));
+    let (mut stream, params) = match do_handshake.await? {
+        Some(x) => x,
+        None => return Ok(()), // it's a cancellation request
+    };
+
+    // Extract credentials which we're going to use for auth.
+    let creds = {
+        let common_name = tls.and_then(|tls| tls.common_name.as_deref());
+        let result = config
+            .auth_backend
+            .as_ref()
+            .map(|_| auth::ClientCredentials::parse(&params, hostname, common_name, true))
+            .transpose();
+
+        async { result }.or_else(|e| stream.throw_error(e)).await?
+    };
+
+    let client = Client::new(stream, creds, &params, session_id);
+    cancel_map
+        .with_session(|session| client.connect_to_db(session))
+        .await
+}
+
 async fn handle_client(
    config: &ProxyConfig,
    cancel_map: &CancelMap,
@@ -113,7 +149,7 @@ async fn handle_client(
        let result = config
            .auth_backend
            .as_ref()
-            .map(|_| auth::ClientCredentials::parse(&params, sni, common_name))
+            .map(|_| auth::ClientCredentials::parse(&params, sni, common_name, false))
            .transpose();

        async { result }.or_else(|e| stream.throw_error(e)).await?
@@ -271,19 +307,16 @@ impl<S: AsyncRead + AsyncWrite + Unpin + Send> Client<'_, S> {

        stream
            .write_message_noflush(&Be::BackendKeyData(cancel_key_data))?
-            .write_message(&BeMessage::ReadyForQuery)
+            .write_message(&Be::ReadyForQuery)
            .await?;

-        // TODO: add more identifiers.
-        let metric_id = node.project;
-
-        let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["tx", &metric_id]);
+        let m_sent = NUM_BYTES_PROXIED_COUNTER.with_label_values(&node.aux.traffic_labels("tx"));
        let mut client = MeasuredStream::new(stream.into_inner(), |cnt| {
            // Number of bytes we sent to the client (outbound).
            m_sent.inc_by(cnt as u64);
        });

-        let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&["rx", &metric_id]);
+        let m_recv = NUM_BYTES_PROXIED_COUNTER.with_label_values(&node.aux.traffic_labels("rx"));
        let mut db = MeasuredStream::new(db.stream, |cnt| {
            // Number of bytes the client sent to the compute node (inbound).
            m_recv.inc_by(cnt as u64);
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -140,7 +140,7 @@ async fn dummy_proxy(
    stream
        .write_message_noflush(&Be::AuthenticationOk)?
        .write_message_noflush(&Be::CLIENT_ENCODING)?
-        .write_message(&BeMessage::ReadyForQuery)
+        .write_message(&Be::ReadyForQuery)
        .await?;

    Ok(())
--- a/proxy/src/stream.rs
+++ b/proxy/src/stream.rs
@@ -2,7 +2,7 @@ use crate::error::UserFacingError;
 use anyhow::bail;
 use bytes::BytesMut;
 use pin_project_lite::pin_project;
-use pq_proto::{BeMessage, FeMessage, FeStartupPacket};
+use pq_proto::{BeMessage, ConnectionError, FeMessage, FeStartupPacket};
 use rustls::ServerConfig;
 use std::pin::Pin;
 use std::sync::Arc;
@@ -47,18 +47,13 @@ fn err_connection() -> io::Error {
    io::Error::new(io::ErrorKind::ConnectionAborted, "connection is lost")
 }

-// TODO: change error type of `FeMessage::read_fut`
-fn from_anyhow(e: anyhow::Error) -> io::Error {
-    io::Error::new(io::ErrorKind::Other, e.to_string())
-}
-
 impl<S: AsyncRead + Unpin> PqStream<S> {
    /// Receive [`FeStartupPacket`], which is a first packet sent by a client.
    pub async fn read_startup_packet(&mut self) -> io::Result<FeStartupPacket> {
        // TODO: `FeStartupPacket::read_fut` should return `FeStartupPacket`
        let msg = FeStartupPacket::read_fut(&mut self.stream)
            .await
-            .map_err(from_anyhow)?
+            .map_err(ConnectionError::into_io_error)?
            .ok_or_else(err_connection)?;

        match msg {
@@ -80,7 +75,7 @@ impl<S: AsyncRead + Unpin> PqStream<S> {
    async fn read_message(&mut self) -> io::Result<FeMessage> {
        FeMessage::read_fut(&mut self.stream)
            .await
-            .map_err(from_anyhow)?
+            .map_err(ConnectionError::into_io_error)?
            .ok_or_else(err_connection)
    }
 }
@@ -112,7 +107,8 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
    /// This method exists due to `&str` not implementing `Into<anyhow::Error>`.
    pub async fn throw_error_str<T>(&mut self, error: &'static str) -> anyhow::Result<T> {
        tracing::info!("forwarding error to user: {error}");
-        self.write_message(&BeMessage::ErrorResponse(error)).await?;
+        self.write_message(&BeMessage::ErrorResponse(error, None))
+            .await?;
        bail!(error)
    }

@@ -124,7 +120,8 @@ impl<S: AsyncWrite + Unpin> PqStream<S> {
    {
        let msg = error.to_string_client();
        tracing::info!("forwarding error to user: {msg}");
-        self.write_message(&BeMessage::ErrorResponse(&msg)).await?;
+        self.write_message(&BeMessage::ErrorResponse(&msg, None))
+            .await?;
        bail!(error)
    }
 }
--- a/run_clippy.sh
+++ b/run_clippy.sh
@@ -9,8 +9,8 @@
 # In vscode, this setting is Rust-analyzer>Check On Save:Command


-# Not every feature is supported in macOS builds, e.g. `profiling`,
-# avoid running regular linting script that checks every feature.
+# Not every feature is supported in macOS builds. Avoid running regular linting
+# script that checks every feature.
 if [[ "$OSTYPE" == "darwin"* ]]; then
    # no extra features to test currently, add more here when needed
    cargo clippy --locked --all --all-targets --features testing -- -A unknown_lints -D warnings
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,10 +1,5 @@
 [toolchain]
-# We try to stick to a toolchain version that is widely available on popular distributions, so that most people
-# can use the toolchain that comes with their operating system. But if there's a feature we miss badly from a later
-# version, we can consider updating.
-# See https://tracker.debian.org/pkg/rustc for more details on Debian rustc package,
-# we use "unstable" version number as the highest version used in the project by default.
-channel = "1.62.1"
+channel = "1.66.1"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/safekeeper/Cargo.toml
+++ b/safekeeper/Cargo.toml
@@ -2,6 +2,7 @@
 name = "safekeeper"
 version = "0.1.0"
 edition = "2021"
+license = "Apache-2.0"

 [dependencies]
 async-stream = "0.3"
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -143,6 +143,19 @@ fn main() -> anyhow::Result<()> {
        return Ok(());
    }

+    let auth = match args.auth_validation_public_key_path.as_ref() {
+        None => {
+            info!("auth is disabled");
+            None
+        }
+        Some(path) => {
+            info!("loading JWT auth key from {}", path.display());
+            Some(Arc::new(
+                JwtAuth::from_key_path(path).context("failed to load the auth key")?,
+            ))
+        }
+    };
+
    let conf = SafeKeeperConf {
        workdir,
        my_id: id,
@@ -156,7 +169,7 @@ fn main() -> anyhow::Result<()> {
        max_offloader_lag_bytes: args.max_offloader_lag,
        backup_runtime_threads: args.wal_backup_threads,
        wal_backup_enabled: !args.disable_wal_backup,
-        auth_validation_public_key_path: args.auth_validation_public_key_path,
+        auth,
    };

    // initialize sentry if SENTRY_DSN is provided
@@ -186,19 +199,6 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
        e
    })?;

-    let auth = match conf.auth_validation_public_key_path.as_ref() {
-        None => {
-            info!("auth is disabled");
-            None
-        }
-        Some(path) => {
-            info!("loading JWT auth key from {}", path.display());
-            Some(Arc::new(
-                JwtAuth::from_key_path(path).context("failed to load the auth key")?,
-            ))
-        }
-    };
-
    // Register metrics collector for active timelines. It's important to do this
    // after daemonizing, otherwise process collector will be upset.
    let timeline_collector = safekeeper::metrics::TimelineCollector::new();
@@ -212,12 +212,11 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
    GlobalTimelines::init(conf.clone(), wal_backup_launcher_tx)?;

    let conf_ = conf.clone();
-    let auth_ = auth.clone();
    threads.push(
        thread::Builder::new()
            .name("http_endpoint_thread".into())
            .spawn(|| {
-                let router = http::make_router(conf_, auth_);
+                let router = http::make_router(conf_);
                endpoint::serve_thread_main(
                    router,
                    http_listener,
@@ -230,11 +229,7 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
    let conf_cloned = conf.clone();
    let safekeeper_thread = thread::Builder::new()
        .name("safekeeper thread".into())
-        .spawn(|| {
-            if let Err(e) = wal_service::thread_main(conf_cloned, pg_listener, auth) {
-                info!("safekeeper thread terminated: {e}");
-            }
-        })
+        .spawn(|| wal_service::thread_main(conf_cloned, pg_listener))
        .unwrap();

    threads.push(safekeeper_thread);
@@ -244,7 +239,6 @@ fn start_safekeeper(conf: SafeKeeperConf) -> Result<()> {
        thread::Builder::new()
            .name("broker thread".into())
            .spawn(|| {
-                // TODO: add auth?
                broker::thread_main(conf_);
            })?,
    );
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -8,16 +8,16 @@ use crate::receive_wal::ReceiveWalConn;
 use crate::send_wal::ReplicationConn;

 use crate::{GlobalTimelines, SafeKeeperConf};
-use anyhow::{bail, ensure, Context, Result};
+use anyhow::Context;

 use postgres_ffi::PG_TLI;
 use regex::Regex;

 use pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID, TEXT_OID};
 use std::str;
-use std::sync::Arc;
 use tracing::info;
-use utils::auth::{Claims, JwtAuth, Scope};
+use utils::auth::{Claims, Scope};
+use utils::postgres_backend_async::QueryError;
 use utils::{
    id::{TenantId, TenantTimelineId, TimelineId},
    lsn::Lsn,
@@ -32,7 +32,6 @@ pub struct SafekeeperPostgresHandler {
    pub tenant_id: Option<TenantId>,
    pub timeline_id: Option<TimelineId>,
    pub ttid: TenantTimelineId,
-    auth: Option<Arc<JwtAuth>>,
    claims: Option<Claims>,
 }

@@ -44,7 +43,7 @@ enum SafekeeperPostgresCommand {
    JSONCtrl { cmd: AppendLogicalMessage },
 }

-fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
+fn parse_cmd(cmd: &str) -> anyhow::Result<SafekeeperPostgresCommand> {
    if cmd.starts_with("START_WAL_PUSH") {
        Ok(SafekeeperPostgresCommand::StartWalPush)
    } else if cmd.starts_with("START_REPLICATION") {
@@ -64,13 +63,17 @@ fn parse_cmd(cmd: &str) -> Result<SafekeeperPostgresCommand> {
            cmd: serde_json::from_str(cmd)?,
        })
    } else {
-        bail!("unsupported command {}", cmd);
+        anyhow::bail!("unsupported command {cmd}");
    }
 }

 impl postgres_backend::Handler for SafekeeperPostgresHandler {
    // tenant_id and timeline_id are passed in connection string params
-    fn startup(&mut self, _pgb: &mut PostgresBackend, sm: &FeStartupPacket) -> Result<()> {
+    fn startup(
+        &mut self,
+        _pgb: &mut PostgresBackend,
+        sm: &FeStartupPacket,
+    ) -> Result<(), QueryError> {
        if let FeStartupPacket::StartupMessage { params, .. } = sm {
            if let Some(options) = params.options_raw() {
                for opt in options {
@@ -79,10 +82,14 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
                    // https://github.com/neondatabase/neon/pull/2433#discussion_r970005064
                    match opt.split_once('=') {
                        Some(("ztenantid", value)) | Some(("tenant_id", value)) => {
-                            self.tenant_id = Some(value.parse()?);
+                            self.tenant_id = Some(value.parse().with_context(|| {
+                                format!("Failed to parse {value} as tenant id")
+                            })?);
                        }
                        Some(("ztimelineid", value)) | Some(("timeline_id", value)) => {
-                            self.timeline_id = Some(value.parse()?);
+                            self.timeline_id = Some(value.parse().with_context(|| {
+                                format!("Failed to parse {value} as timeline id")
+                            })?);
                        }
                        _ => continue,
                    }
@@ -95,7 +102,9 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {

            Ok(())
        } else {
-            bail!("Safekeeper received unexpected initial message: {:?}", sm);
+            Err(QueryError::Other(anyhow::anyhow!(
+                "Safekeeper received unexpected initial message: {sm:?}"
+            )))
        }
    }

@@ -103,20 +112,20 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        &mut self,
        _pgb: &mut PostgresBackend,
        jwt_response: &[u8],
-    ) -> anyhow::Result<()> {
+    ) -> Result<(), QueryError> {
        // this unwrap is never triggered, because check_auth_jwt only called when auth_type is NeonJWT
        // which requires auth to be present
        let data = self
+            .conf
            .auth
            .as_ref()
            .unwrap()
-            .decode(str::from_utf8(jwt_response)?)?;
+            .decode(str::from_utf8(jwt_response).context("jwt response is not UTF-8")?)?;

-        if matches!(data.claims.scope, Scope::Tenant) {
-            ensure!(
-                data.claims.tenant_id.is_some(),
+        if matches!(data.claims.scope, Scope::Tenant) && data.claims.tenant_id.is_none() {
+            return Err(QueryError::Other(anyhow::anyhow!(
                "jwt token scope is Tenant, but tenant id is missing"
-            )
+            )));
        }

        info!(
@@ -128,7 +137,11 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        Ok(())
    }

-    fn process_query(&mut self, pgb: &mut PostgresBackend, query_string: &str) -> Result<()> {
+    fn process_query(
+        &mut self,
+        pgb: &mut PostgresBackend,
+        query_string: &str,
+    ) -> Result<(), QueryError> {
        if query_string
            .to_ascii_lowercase()
            .starts_with("set datestyle to ")
@@ -149,39 +162,45 @@ impl postgres_backend::Handler for SafekeeperPostgresHandler {
        self.check_permission(Some(tenant_id))?;
        self.ttid = TenantTimelineId::new(tenant_id, timeline_id);

-        match cmd {
+        let res = match cmd {
            SafekeeperPostgresCommand::StartWalPush => ReceiveWalConn::new(pgb).run(self),
            SafekeeperPostgresCommand::StartReplication { start_lsn } => {
                ReplicationConn::new(pgb).run(self, pgb, start_lsn)
            }
            SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb),
            SafekeeperPostgresCommand::JSONCtrl { ref cmd } => handle_json_ctrl(self, pgb, cmd),
-        }
-        .context(format!(
-            "Failed to process query for timeline {timeline_id}"
-        ))?;
+        };

-        Ok(())
+        match res {
+            Ok(()) => Ok(()),
+            Err(QueryError::Disconnected(connection_error)) => {
+                info!("Timeline {tenant_id}/{timeline_id} query failed with connection error: {connection_error}");
+                Err(QueryError::Disconnected(connection_error))
+            }
+            Err(QueryError::Other(e)) => Err(QueryError::Other(e.context(format!(
+                "Failed to process query for timeline {}",
+                self.ttid
+            )))),
+        }
    }
 }

 impl SafekeeperPostgresHandler {
-    pub fn new(conf: SafeKeeperConf, auth: Option<Arc<JwtAuth>>) -> Self {
+    pub fn new(conf: SafeKeeperConf) -> Self {
        SafekeeperPostgresHandler {
            conf,
            appname: None,
            tenant_id: None,
            timeline_id: None,
            ttid: TenantTimelineId::empty(),
-            auth,
            claims: None,
        }
    }

    // when accessing management api supply None as an argument
    // when using to authorize tenant pass corresponding tenant id
-    fn check_permission(&self, tenant_id: Option<TenantId>) -> Result<()> {
-        if self.auth.is_none() {
+    fn check_permission(&self, tenant_id: Option<TenantId>) -> anyhow::Result<()> {
+        if self.conf.auth.is_none() {
            // auth is set to Trust, nothing to check so just return ok
            return Ok(());
        }
@@ -198,7 +217,7 @@ impl SafekeeperPostgresHandler {
    ///
    /// Handle IDENTIFY_SYSTEM replication command
    ///
-    fn handle_identify_system(&mut self, pgb: &mut PostgresBackend) -> Result<()> {
+    fn handle_identify_system(&mut self, pgb: &mut PostgresBackend) -> Result<(), QueryError> {
        let tli = GlobalTimelines::get(self.ttid)?;

        let lsn = if self.is_walproposer_recovery() {
--- a/safekeeper/src/http/routes.rs
+++ b/safekeeper/src/http/routes.rs
@@ -277,12 +277,9 @@ async fn record_safekeeper_info(mut request: Request<Body>) -> Result<Response<B
 }

 /// Safekeeper http router.
-pub fn make_router(
-    conf: SafeKeeperConf,
-    auth: Option<Arc<JwtAuth>>,
-) -> RouterBuilder<hyper::Body, ApiError> {
+pub fn make_router(conf: SafeKeeperConf) -> RouterBuilder<hyper::Body, ApiError> {
    let mut router = endpoint::make_router();
-    if auth.is_some() {
+    if conf.auth.is_some() {
        router = router.middleware(auth_middleware(|request| {
            #[allow(clippy::mutable_key_type)]
            static ALLOWLIST_ROUTES: Lazy<HashSet<Uri>> =
@@ -298,6 +295,7 @@ pub fn make_router(

    // NB: on any changes do not forget to update the OpenAPI spec
    // located nearby (/safekeeper/src/http/openapi_spec.yaml).
+    let auth = conf.auth.clone();
    router
        .data(Arc::new(conf))
        .data(auth)
--- a/Show More
+++ b/Show More