diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml
index e2f15d96db..8bf12c31b1 100644
--- a/.github/workflows/benchmarking.yml
+++ b/.github/workflows/benchmarking.yml
@@ -11,7 +11,7 @@ on:
     #          │ │ ┌───────────── day of the month (1 - 31)
     #          │ │ │ ┌───────────── month (1 - 12 or JAN-DEC)
     #          │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT)
-    - cron:  '0 3 * * *' # run once a day, timezone is utc
+    - cron:   '0 3 * * *' # run once a day, timezone is utc
 
   workflow_dispatch: # adds ability to run this manually
     inputs:
@@ -23,6 +23,21 @@ on:
         type: boolean
         description: 'Publish perf report. If not set, the report will be published only for the main branch'
         required: false
+      collect_olap_explain:
+        type: boolean
+        description: 'Collect EXPLAIN ANALYZE for OLAP queries. If not set, EXPLAIN ANALYZE will not be collected'
+        required: false
+        default: false
+      collect_pg_stat_statements:
+        type: boolean
+        description: 'Collect pg_stat_statements for OLAP queries. If not set, pg_stat_statements will not be collected'
+        required: false
+        default: false
+      run_AWS_RDS_AND_AURORA:
+        type: boolean
+        description: 'AWS-RDS and AWS-AURORA normally only run on Saturday. Set this to true to run them on every workflow_dispatch'
+        required: false
+        default: false
 
 defaults:
   run:
@@ -113,6 +128,8 @@ jobs:
     # - neon-captest-reuse: Reusing existing project
     # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs
     # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage
+    env:
+      RUN_AWS_RDS_AND_AURORA: ${{ github.event.inputs.run_AWS_RDS_AND_AURORA || 'false' }}
     runs-on: ubuntu-latest
     outputs:
       pgbench-compare-matrix: ${{ steps.pgbench-compare-matrix.outputs.matrix }}
@@ -152,7 +169,7 @@ jobs:
           ]
         }'
 
-        if [ "$(date +%A)" = "Saturday" ]; then
+        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
           matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres" },
                                                    { "platform": "rds-aurora"   }]')
         fi
@@ -171,9 +188,9 @@ jobs:
           ]
         }'
 
-        if [ "$(date +%A)" = "Saturday" ]; then
+        if [ "$(date +%A)" = "Saturday" ] || [ ${RUN_AWS_RDS_AND_AURORA} = "true" ]; then
           matrix=$(echo "$matrix" | jq '.include += [{ "platform": "rds-postgres", "scale": "10" },
-                                                   { "platform": "rds-aurora",   "scale": "10" }]')
+                                                    { "platform": "rds-aurora",   "scale": "10" }]')
         fi
 
         echo "matrix=$(echo "$matrix" | jq --compact-output '.')" >> $GITHUB_OUTPUT
@@ -337,6 +354,8 @@ jobs:
       POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
       DEFAULT_PG_VERSION: 14
       TEST_OUTPUT: /tmp/test_output
+      TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain }}
+      TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements }}
       BUILD_TYPE: remote
       SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }}
       PLATFORM: ${{ matrix.platform }}
@@ -399,6 +418,8 @@ jobs:
       env:
         VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}"
         PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}"
+        TEST_OLAP_COLLECT_EXPLAIN: ${{ github.event.inputs.collect_olap_explain || 'false' }}
+        TEST_OLAP_COLLECT_PG_STAT_STATEMENTS: ${{ github.event.inputs.collect_pg_stat_statements || 'false' }}
         BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }}
         TEST_OLAP_SCALE: 10
 
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 4283330128..96cc2997fa 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -863,7 +863,7 @@ jobs:
       run:
         shell: sh -eu {0}
     env:
-      VM_BUILDER_VERSION: v0.19.0
+      VM_BUILDER_VERSION: v0.21.0
 
     steps:
       - name: Checkout
diff --git a/Cargo.lock b/Cargo.lock
index e82c8641c6..6ebc2389c5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -190,9 +190,9 @@ dependencies = [
 
 [[package]]
 name = "async-compression"
-version = "0.4.0"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b0122885821398cc923ece939e24d1056a2384ee719432397fa9db87230ff11"
+checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5"
 dependencies = [
  "flate2",
  "futures-core",
@@ -233,7 +233,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -244,7 +244,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -694,9 +694,9 @@ dependencies = [
 
 [[package]]
 name = "azure_core"
-version = "0.17.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ccd63c07d1fbfb3d4543d7ea800941bf5a30db1911b9b9e4db3b2c4210a434f"
+checksum = "a6218987c374650fdad0b476bfc675729762c28dfb35f58608a38a2b1ea337dd"
 dependencies = [
  "async-trait",
  "base64 0.21.1",
@@ -704,8 +704,10 @@ dependencies = [
  "dyn-clone",
  "futures",
  "getrandom 0.2.11",
+ "hmac",
  "http-types",
  "log",
+ "once_cell",
  "paste",
  "pin-project",
  "quick-xml",
@@ -714,6 +716,7 @@ dependencies = [
  "rustc_version",
  "serde",
  "serde_json",
+ "sha2",
  "time",
  "url",
  "uuid",
@@ -721,9 +724,9 @@ dependencies = [
 
 [[package]]
 name = "azure_identity"
-version = "0.17.0"
+version = "0.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bd7ea32ca7eb66ff4757f83baac702ff11d469e5de365b6bc6f79f9c25d3436"
+checksum = "9e1eacc4f7fb2a73d57c39139d0fc3aed78435606055779ddaef4b43cdf919a8"
 dependencies = [
  "async-lock",
  "async-trait",
@@ -733,7 +736,6 @@ dependencies = [
  "oauth2",
  "pin-project",
  "serde",
- "serde_json",
  "time",
  "tz-rs",
  "url",
@@ -742,21 +744,18 @@ dependencies = [
 
 [[package]]
 name = "azure_storage"
-version = "0.17.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83ca0a07f89fd72a006da4713e93af3d6c44a693e61a1c3c2e7985de39c182e8"
+checksum = "ade8f2653e408de88b9eafec9f48c3c26b94026375e88adbd34523a7dd9795a1"
 dependencies = [
  "RustyXML",
+ "async-lock",
  "async-trait",
  "azure_core",
  "bytes",
- "futures",
- "hmac",
  "log",
  "serde",
  "serde_derive",
- "serde_json",
- "sha2",
  "time",
  "url",
  "uuid",
@@ -764,13 +763,14 @@ dependencies = [
 
 [[package]]
 name = "azure_storage_blobs"
-version = "0.17.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8096c04d370118323c42b2752aa1883e4880a56ef65239f317b359f263b6e194"
+checksum = "025701c7cc5b523100f0f3b2b01723564ec5a86c03236521c06826337047e872"
 dependencies = [
  "RustyXML",
  "azure_core",
  "azure_storage",
+ "azure_svc_blobstorage",
  "bytes",
  "futures",
  "log",
@@ -782,6 +782,22 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "azure_svc_blobstorage"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76051e5bb67cea1055abe5e530a0878feac7e0ab4cbbcb4a6adc953a58993389"
+dependencies = [
+ "azure_core",
+ "bytes",
+ "futures",
+ "log",
+ "once_cell",
+ "serde",
+ "serde_json",
+ "time",
+]
+
 [[package]]
 name = "backtrace"
 version = "0.3.67"
@@ -865,7 +881,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.28",
+ "syn 2.0.32",
  "which",
 ]
 
@@ -1079,7 +1095,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1229,16 +1245,19 @@ name = "control_plane"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "async-trait",
  "camino",
  "clap",
  "comfy-table",
  "compute_api",
+ "futures",
  "git-version",
  "hex",
  "hyper",
  "nix 0.26.2",
  "once_cell",
  "pageserver_api",
+ "pageserver_client",
  "postgres",
  "postgres_backend",
  "postgres_connection",
@@ -1252,6 +1271,8 @@ dependencies = [
  "tar",
  "thiserror",
  "tokio",
+ "tokio-postgres",
+ "tokio-util",
  "toml",
  "tracing",
  "url",
@@ -1465,7 +1486,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1476,7 +1497,7 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1551,7 +1572,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1645,7 +1666,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -1899,7 +1920,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -2476,13 +2497,14 @@ dependencies = [
 
 [[package]]
 name = "jsonwebtoken"
-version = "8.3.0"
+version = "9.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6971da4d9c3aa03c3d8f3ff0f4155b534aad021292003895a469716b2a230378"
+checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4"
 dependencies = [
  "base64 0.21.1",
- "pem 1.1.1",
- "ring 0.16.20",
+ "js-sys",
+ "pem 3.0.3",
+ "ring 0.17.6",
  "serde",
  "serde_json",
  "simple_asn1",
@@ -2895,7 +2917,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3097,6 +3119,7 @@ dependencies = [
  "humantime-serde",
  "hyper",
  "itertools",
+ "md5",
  "metrics",
  "nix 0.26.2",
  "num-traits",
@@ -3134,6 +3157,7 @@ dependencies = [
  "tokio-epoll-uring",
  "tokio-io-timeout",
  "tokio-postgres",
+ "tokio-stream",
  "tokio-tar",
  "tokio-util",
  "toml_edit",
@@ -3156,6 +3180,7 @@ dependencies = [
  "enum-map",
  "hex",
  "postgres_ffi",
+ "rand 0.8.5",
  "serde",
  "serde_json",
  "serde_with",
@@ -3166,6 +3191,27 @@ dependencies = [
  "workspace_hack",
 ]
 
+[[package]]
+name = "pageserver_client"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "async-trait",
+ "bytes",
+ "futures",
+ "pageserver_api",
+ "postgres",
+ "reqwest",
+ "serde",
+ "thiserror",
+ "tokio",
+ "tokio-postgres",
+ "tokio-stream",
+ "tokio-util",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "parking"
 version = "2.1.1"
@@ -3257,18 +3303,19 @@ checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
 
 [[package]]
 name = "pem"
-version = "1.1.1"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8835c273a76a90455d7344889b0964598e3316e2a79ede8e36f16bdcf2228b8"
+checksum = "6b13fe415cdf3c8e44518e18a7c95a13431d9bdf6d15367d82b23c377fdd441a"
 dependencies = [
- "base64 0.13.1",
+ "base64 0.21.1",
+ "serde",
 ]
 
 [[package]]
 name = "pem"
-version = "2.0.1"
+version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b13fe415cdf3c8e44518e18a7c95a13431d9bdf6d15367d82b23c377fdd441a"
+checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310"
 dependencies = [
  "base64 0.21.1",
  "serde",
@@ -3325,7 +3372,7 @@ checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -3532,7 +3579,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3b69d39aab54d069e7f2fe8cb970493e7834601ca2d8c65fd7bbd183578080d1"
 dependencies = [
  "proc-macro2",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -4140,7 +4187,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.28",
+ "syn 2.0.32",
  "unicode-ident",
 ]
 
@@ -4286,6 +4333,7 @@ dependencies = [
  "histogram",
  "itertools",
  "pageserver",
+ "pageserver_api",
  "rand 0.8.5",
  "remote_storage",
  "reqwest",
@@ -4393,12 +4441,12 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
 
 [[package]]
 name = "sct"
-version = "0.7.0"
+version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4"
+checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
 dependencies = [
- "ring 0.16.20",
- "untrusted 0.7.1",
+ "ring 0.17.6",
+ "untrusted 0.9.0",
 ]
 
 [[package]]
@@ -4574,7 +4622,7 @@ checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -4655,7 +4703,7 @@ dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -4922,9 +4970,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.28"
+version = "2.0.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567"
+checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -5054,7 +5102,7 @@ checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -5187,7 +5235,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -5488,7 +5536,7 @@ checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -5783,6 +5831,7 @@ dependencies = [
  "serde",
  "serde_assert",
  "serde_json",
+ "serde_path_to_error",
  "serde_with",
  "signal-hook",
  "strum",
@@ -5941,7 +5990,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
  "wasm-bindgen-shared",
 ]
 
@@ -5975,7 +6024,7 @@ checksum = "e128beba882dd1eb6200e1dc92ae6c5dbaa4311aa7bb211ca035779e5efc39f8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -6312,7 +6361,7 @@ dependencies = [
  "smallvec",
  "subtle",
  "syn 1.0.109",
- "syn 2.0.28",
+ "syn 2.0.32",
  "time",
  "time-macros",
  "tokio",
@@ -6374,22 +6423,22 @@ dependencies = [
 
 [[package]]
 name = "zerocopy"
-version = "0.7.3"
+version = "0.7.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7a7af71d8643341260a65f89fa60c0eeaa907f34544d8f6d9b0df72f069b5e74"
+checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.3"
+version = "0.7.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9731702e2f0617ad526794ae28fbc6f6ca8849b5ba729666c2a5bc4b6ddee2cd"
+checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.28",
+ "syn 2.0.32",
 ]
 
 [[package]]
@@ -6400,30 +6449,28 @@ checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
 
 [[package]]
 name = "zstd"
-version = "0.12.4"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c"
+checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110"
 dependencies = [
  "zstd-safe",
 ]
 
 [[package]]
 name = "zstd-safe"
-version = "6.0.6"
+version = "7.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581"
+checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e"
 dependencies = [
- "libc",
  "zstd-sys",
 ]
 
 [[package]]
 name = "zstd-sys"
-version = "2.0.8+zstd.1.5.5"
+version = "2.0.9+zstd.1.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c"
+checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656"
 dependencies = [
  "cc",
- "libc",
  "pkg-config",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 4673c00213..aaccb405be 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,6 +5,7 @@ members = [
     "control_plane",
     "pageserver",
     "pageserver/ctl",
+    "pageserver/client",
     "proxy",
     "safekeeper",
     "storage_broker",
@@ -38,10 +39,10 @@ license = "Apache-2.0"
 anyhow = { version = "1.0", features = ["backtrace"] }
 arc-swap = "1.6"
 async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] }
-azure_core = "0.17"
-azure_identity = "0.17"
-azure_storage = "0.17"
-azure_storage_blobs = "0.17"
+azure_core = "0.18"
+azure_identity = "0.18"
+azure_storage = "0.18"
+azure_storage_blobs = "0.18"
 flate2 = "1.0.26"
 async-stream = "0.3"
 async-trait = "0.1"
@@ -90,7 +91,7 @@ hyper-tungstenite = "0.11"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
-jsonwebtoken = "8"
+jsonwebtoken = "9"
 libc = "0.2"
 md5 = "0.7.0"
 memoffset = "0.8"
@@ -184,6 +185,7 @@ compute_api = { version = "0.1", path = "./libs/compute_api/" }
 consumption_metrics = { version = "0.1", path = "./libs/consumption_metrics/" }
 metrics = { version = "0.1", path = "./libs/metrics/" }
 pageserver_api = { version = "0.1", path = "./libs/pageserver_api/" }
+pageserver_client = { path = "./pageserver/client" }
 postgres_backend = { version = "0.1", path = "./libs/postgres_backend/" }
 postgres_connection = { version = "0.1", path = "./libs/postgres_connection/" }
 postgres_ffi = { version = "0.1", path = "./libs/postgres_ffi/" }
diff --git a/Dockerfile.compute-node b/Dockerfile.compute-node
index 03280586f8..a23e930c48 100644
--- a/Dockerfile.compute-node
+++ b/Dockerfile.compute-node
@@ -569,6 +569,23 @@ RUN wget https://github.com/ChenHuajun/pg_roaringbitmap/archive/refs/tags/v0.5.4
     make -j $(getconf _NPROCESSORS_ONLN) install && \
     echo 'trusted = true' >> /usr/local/pgsql/share/extension/roaringbitmap.control
 
+#########################################################################################
+#
+# Layer "pg-semver-pg-build"
+# compile pg_semver extension
+#
+#########################################################################################
+FROM build-deps AS pg-semver-pg-build
+COPY --from=pg-build /usr/local/pgsql/ /usr/local/pgsql/
+
+ENV PATH "/usr/local/pgsql/bin/:$PATH"
+RUN wget https://github.com/theory/pg-semver/archive/refs/tags/v0.32.1.tar.gz -O pg_semver.tar.gz && \
+    echo "fbdaf7512026d62eec03fad8687c15ed509b6ba395bff140acd63d2e4fbe25d7 pg_semver.tar.gz" | sha256sum --check && \
+    mkdir pg_semver-src && cd pg_semver-src && tar xvzf ../pg_semver.tar.gz --strip-components=1 -C . && \
+    make -j $(getconf _NPROCESSORS_ONLN) && \
+    make -j $(getconf _NPROCESSORS_ONLN) install && \
+    echo 'trusted = true' >> /usr/local/pgsql/share/extension/semver.control
+
 #########################################################################################
 #
 # Layer "pg-embedding-pg-build"
@@ -768,6 +785,7 @@ COPY --from=pg-pgx-ulid-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=rdkit-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-uuidv7-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-roaringbitmap-pg-build /usr/local/pgsql/ /usr/local/pgsql/
+COPY --from=pg-semver-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=pg-embedding-pg-build /usr/local/pgsql/ /usr/local/pgsql/
 COPY --from=wal2json-pg-build /usr/local/pgsql /usr/local/pgsql
 COPY pgxn/ pgxn/
diff --git a/README.md b/README.md
index 3e3123f5ee..98af1edee6 100644
--- a/README.md
+++ b/README.md
@@ -29,13 +29,14 @@ See developer documentation in [SUMMARY.md](/docs/SUMMARY.md) for more informati
 ```bash
 apt install build-essential libtool libreadline-dev zlib1g-dev flex bison libseccomp-dev \
 libssl-dev clang pkg-config libpq-dev cmake postgresql-client protobuf-compiler \
-libcurl4-openssl-dev openssl python-poetry lsof libicu-dev
+libcurl4-openssl-dev openssl python3-poetry lsof libicu-dev
 ```
 * On Fedora, these packages are needed:
 ```bash
 dnf install flex bison readline-devel zlib-devel openssl-devel \
   libseccomp-devel perl clang cmake postgresql postgresql-contrib protobuf-compiler \
-  protobuf-devel libcurl-devel openssl poetry lsof libicu-devel
+  protobuf-devel libcurl-devel openssl poetry lsof libicu-devel libpq-devel python3-devel \
+  libffi-devel
 ```
 * On Arch based systems, these packages are needed:
 ```bash
diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml
index 47378f1910..18b30810b0 100644
--- a/compute_tools/Cargo.toml
+++ b/compute_tools/Cargo.toml
@@ -37,5 +37,5 @@ workspace_hack.workspace = true
 toml_edit.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
 vm_monitor = { version = "0.1", path = "../libs/vm_monitor/" }
-zstd = "0.12.4"
+zstd = "0.13"
 bytes = "1.0"
diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs
index ba1ee6d1b2..d545858dc2 100644
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -298,7 +298,7 @@ pub fn handle_roles(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                 // safe to add more permissions here. BYPASSRLS and REPLICATION are inherited
                 // from neon_superuser.
                 let mut query: String = format!(
-                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB IN ROLE neon_superuser",
+                    "CREATE ROLE {} INHERIT CREATEROLE CREATEDB BYPASSRLS REPLICATION IN ROLE neon_superuser",
                     name.pg_quote()
                 );
                 info!("role create query: '{}'", &query);
@@ -370,33 +370,49 @@ pub fn handle_role_deletions(spec: &ComputeSpec, connstr: &str, client: &mut Cli
     Ok(())
 }
 
+fn reassign_owned_objects_in_one_db(
+    conf: Config,
+    role_name: &PgIdent,
+    db_owner: &PgIdent,
+) -> Result<()> {
+    let mut client = conf.connect(NoTls)?;
+
+    // This will reassign all dependent objects to the db owner
+    let reassign_query = format!(
+        "REASSIGN OWNED BY {} TO {}",
+        role_name.pg_quote(),
+        db_owner.pg_quote()
+    );
+    info!(
+        "reassigning objects owned by '{}' in db '{}' to '{}'",
+        role_name,
+        conf.get_dbname().unwrap_or(""),
+        db_owner
+    );
+    client.simple_query(&reassign_query)?;
+
+    // This now will only drop privileges of the role
+    let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
+    client.simple_query(&drop_query)?;
+    Ok(())
+}
+
 // Reassign all owned objects in all databases to the owner of the database.
 fn reassign_owned_objects(spec: &ComputeSpec, connstr: &str, role_name: &PgIdent) -> Result<()> {
     for db in &spec.cluster.databases {
         if db.owner != *role_name {
             let mut conf = Config::from_str(connstr)?;
             conf.dbname(&db.name);
-
-            let mut client = conf.connect(NoTls)?;
-
-            // This will reassign all dependent objects to the db owner
-            let reassign_query = format!(
-                "REASSIGN OWNED BY {} TO {}",
-                role_name.pg_quote(),
-                db.owner.pg_quote()
-            );
-            info!(
-                "reassigning objects owned by '{}' in db '{}' to '{}'",
-                role_name, &db.name, &db.owner
-            );
-            client.simple_query(&reassign_query)?;
-
-            // This now will only drop privileges of the role
-            let drop_query = format!("DROP OWNED BY {}", role_name.pg_quote());
-            client.simple_query(&drop_query)?;
+            reassign_owned_objects_in_one_db(conf, role_name, &db.owner)?;
         }
     }
 
+    // Also handle case when there are no databases in the spec.
+    // In this case we need to reassign objects in the default database.
+    let conf = Config::from_str(connstr)?;
+    let db_owner = PgIdent::from_str("cloud_admin")?;
+    reassign_owned_objects_in_one_db(conf, role_name, &db_owner)?;
+
     Ok(())
 }
 
diff --git a/control_plane/Cargo.toml b/control_plane/Cargo.toml
index 7ccddc161e..898ad05add 100644
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -6,9 +6,11 @@ license.workspace = true
 
 [dependencies]
 anyhow.workspace = true
+async-trait.workspace = true
 camino.workspace = true
 clap.workspace = true
 comfy-table.workspace = true
+futures.workspace = true
 git-version.workspace = true
 nix.workspace = true
 once_cell.workspace = true
@@ -24,10 +26,11 @@ tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
 tokio.workspace = true
+tokio-postgres.workspace = true
+tokio-util.workspace = true
 url.workspace = true
-# Note: Do not directly depend on pageserver or safekeeper; use pageserver_api or safekeeper_api
-# instead, so that recompile times are better.
 pageserver_api.workspace = true
+pageserver_client.workspace = true
 postgres_backend.workspace = true
 safekeeper_api.workspace = true
 postgres_connection.workspace = true
diff --git a/control_plane/src/attachment_service.rs b/control_plane/src/attachment_service.rs
index 822ac7d8a6..731c05809e 100644
--- a/control_plane/src/attachment_service.rs
+++ b/control_plane/src/attachment_service.rs
@@ -9,7 +9,7 @@ pub struct AttachmentService {
     env: LocalEnv,
     listen: String,
     path: PathBuf,
-    client: reqwest::blocking::Client,
+    client: reqwest::Client,
 }
 
 const COMMAND: &str = "attachment_service";
@@ -53,7 +53,7 @@ impl AttachmentService {
             env: env.clone(),
             path,
             listen,
-            client: reqwest::blocking::ClientBuilder::new()
+            client: reqwest::ClientBuilder::new()
                 .build()
                 .expect("Failed to construct http client"),
         }
@@ -64,7 +64,7 @@ impl AttachmentService {
             .expect("non-Unicode path")
     }
 
-    pub fn start(&self) -> anyhow::Result<Child> {
+    pub async fn start(&self) -> anyhow::Result<Child> {
         let path_str = self.path.to_string_lossy();
 
         background_process::start_process(
@@ -73,10 +73,11 @@ impl AttachmentService {
             &self.env.attachment_service_bin(),
             ["-l", &self.listen, "-p", &path_str],
             [],
-            background_process::InitialPidFile::Create(&self.pid_file()),
+            background_process::InitialPidFile::Create(self.pid_file()),
             // TODO: a real status check
-            || Ok(true),
+            || async move { anyhow::Ok(true) },
         )
+        .await
     }
 
     pub fn stop(&self, immediate: bool) -> anyhow::Result<()> {
@@ -84,7 +85,7 @@ impl AttachmentService {
     }
 
     /// Call into the attach_hook API, for use before handing out attachments to pageservers
-    pub fn attach_hook(
+    pub async fn attach_hook(
         &self,
         tenant_id: TenantId,
         pageserver_id: NodeId,
@@ -104,16 +105,16 @@ impl AttachmentService {
             node_id: Some(pageserver_id),
         };
 
-        let response = self.client.post(url).json(&request).send()?;
+        let response = self.client.post(url).json(&request).send().await?;
         if response.status() != StatusCode::OK {
             return Err(anyhow!("Unexpected status {}", response.status()));
         }
 
-        let response = response.json::<AttachHookResponse>()?;
+        let response = response.json::<AttachHookResponse>().await?;
         Ok(response.gen)
     }
 
-    pub fn inspect(&self, tenant_id: TenantId) -> anyhow::Result<Option<(u32, NodeId)>> {
+    pub async fn inspect(&self, tenant_id: TenantId) -> anyhow::Result<Option<(u32, NodeId)>> {
         use hyper::StatusCode;
 
         let url = self
@@ -126,12 +127,12 @@ impl AttachmentService {
 
         let request = InspectRequest { tenant_id };
 
-        let response = self.client.post(url).json(&request).send()?;
+        let response = self.client.post(url).json(&request).send().await?;
         if response.status() != StatusCode::OK {
             return Err(anyhow!("Unexpected status {}", response.status()));
         }
 
-        let response = response.json::<InspectResponse>()?;
+        let response = response.json::<InspectResponse>().await?;
         Ok(response.attachment)
     }
 }
diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs
index 26fc08fc8f..20fa3af9b8 100644
--- a/control_plane/src/background_process.rs
+++ b/control_plane/src/background_process.rs
@@ -44,15 +44,15 @@ const NOTICE_AFTER_RETRIES: u64 = 50;
 
 /// Argument to `start_process`, to indicate whether it should create pidfile or if the process creates
 /// it itself.
-pub enum InitialPidFile<'t> {
+pub enum InitialPidFile {
     /// Create a pidfile, to allow future CLI invocations to manipulate the process.
-    Create(&'t Utf8Path),
+    Create(Utf8PathBuf),
     /// The process will create the pidfile itself, need to wait for that event.
-    Expect(&'t Utf8Path),
+    Expect(Utf8PathBuf),
 }
 
 /// Start a background child process using the parameters given.
-pub fn start_process<F, AI, A, EI>(
+pub async fn start_process<F, Fut, AI, A, EI>(
     process_name: &str,
     datadir: &Path,
     command: &Path,
@@ -62,7 +62,8 @@ pub fn start_process<F, AI, A, EI>(
     process_status_check: F,
 ) -> anyhow::Result<Child>
 where
-    F: Fn() -> anyhow::Result<bool>,
+    F: Fn() -> Fut,
+    Fut: std::future::Future<Output = anyhow::Result<bool>>,
     AI: IntoIterator<Item = A>,
     A: AsRef<OsStr>,
     // Not generic AsRef<OsStr>, otherwise empty `envs` prevents type inference
@@ -89,7 +90,7 @@ where
     let filled_cmd = fill_remote_storage_secrets_vars(fill_rust_env_vars(background_command));
     filled_cmd.envs(envs);
 
-    let pid_file_to_check = match initial_pid_file {
+    let pid_file_to_check = match &initial_pid_file {
         InitialPidFile::Create(path) => {
             pre_exec_create_pidfile(filled_cmd, path);
             path
@@ -107,7 +108,7 @@ where
     );
 
     for retries in 0..RETRIES {
-        match process_started(pid, Some(pid_file_to_check), &process_status_check) {
+        match process_started(pid, pid_file_to_check, &process_status_check).await {
             Ok(true) => {
                 println!("\n{process_name} started, pid: {pid}");
                 return Ok(spawned_process);
@@ -316,22 +317,20 @@ where
     cmd
 }
 
-fn process_started<F>(
+async fn process_started<F, Fut>(
     pid: Pid,
-    pid_file_to_check: Option<&Utf8Path>,
+    pid_file_to_check: &Utf8Path,
     status_check: &F,
 ) -> anyhow::Result<bool>
 where
-    F: Fn() -> anyhow::Result<bool>,
+    F: Fn() -> Fut,
+    Fut: std::future::Future<Output = anyhow::Result<bool>>,
 {
-    match status_check() {
-        Ok(true) => match pid_file_to_check {
-            Some(pid_file_path) => match pid_file::read(pid_file_path)? {
-                PidFileRead::NotExist => Ok(false),
-                PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
-                PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
-            },
-            None => Ok(true),
+    match status_check().await {
+        Ok(true) => match pid_file::read(pid_file_to_check)? {
+            PidFileRead::NotExist => Ok(false),
+            PidFileRead::LockedByOtherProcess(pid_in_file) => Ok(pid_in_file == pid),
+            PidFileRead::NotHeldByAnyProcess(_) => Ok(false),
         },
         Ok(false) => Ok(false),
         Err(e) => anyhow::bail!("process failed to start: {e}"),
diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs
index 8d53a6a658..03e69010f7 100644
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -120,15 +120,20 @@ fn main() -> Result<()> {
         let mut env = LocalEnv::load_config().context("Error loading config")?;
         let original_env = env.clone();
 
+        let rt = tokio::runtime::Builder::new_current_thread()
+            .enable_all()
+            .build()
+            .unwrap();
+
         let subcommand_result = match sub_name {
-            "tenant" => handle_tenant(sub_args, &mut env),
-            "timeline" => handle_timeline(sub_args, &mut env),
-            "start" => handle_start_all(sub_args, &env),
+            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
+            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
+            "start" => rt.block_on(handle_start_all(sub_args, &env)),
             "stop" => handle_stop_all(sub_args, &env),
-            "pageserver" => handle_pageserver(sub_args, &env),
-            "attachment_service" => handle_attachment_service(sub_args, &env),
-            "safekeeper" => handle_safekeeper(sub_args, &env),
-            "endpoint" => handle_endpoint(sub_args, &env),
+            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
+            "attachment_service" => rt.block_on(handle_attachment_service(sub_args, &env)),
+            "safekeeper" => rt.block_on(handle_safekeeper(sub_args, &env)),
+            "endpoint" => rt.block_on(handle_endpoint(sub_args, &env)),
             "mappings" => handle_mappings(sub_args, &mut env),
             "pg" => bail!("'pg' subcommand has been renamed to 'endpoint'"),
             _ => bail!("unexpected subcommand {sub_name}"),
@@ -168,7 +173,7 @@ fn print_timelines_tree(
                     info: t.clone(),
                     children: BTreeSet::new(),
                     name: timeline_name_mappings
-                        .remove(&TenantTimelineId::new(t.tenant_id, t.timeline_id)),
+                        .remove(&TenantTimelineId::new(t.tenant_id.tenant_id, t.timeline_id)),
                 },
             )
         })
@@ -269,12 +274,13 @@ fn print_timeline(
 
 /// Returns a map of timeline IDs to timeline_id@lsn strings.
 /// Connects to the pageserver to query this information.
-fn get_timeline_infos(
+async fn get_timeline_infos(
     env: &local_env::LocalEnv,
     tenant_id: &TenantId,
 ) -> Result<HashMap<TimelineId, TimelineInfo>> {
     Ok(get_default_pageserver(env)
-        .timeline_list(tenant_id)?
+        .timeline_list(tenant_id)
+        .await?
         .into_iter()
         .map(|timeline_info| (timeline_info.timeline_id, timeline_info))
         .collect())
@@ -373,11 +379,14 @@ fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
         .collect()
 }
 
-fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> anyhow::Result<()> {
+async fn handle_tenant(
+    tenant_match: &ArgMatches,
+    env: &mut local_env::LocalEnv,
+) -> anyhow::Result<()> {
     let pageserver = get_default_pageserver(env);
     match tenant_match.subcommand() {
         Some(("list", _)) => {
-            for t in pageserver.tenant_list()? {
+            for t in pageserver.tenant_list().await? {
                 println!("{} {:?}", t.id, t.state);
             }
         }
@@ -394,12 +403,16 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
                 // We must register the tenant with the attachment service, so
                 // that when the pageserver restarts, it will be re-attached.
                 let attachment_service = AttachmentService::from_env(env);
-                attachment_service.attach_hook(tenant_id, pageserver.conf.id)?
+                attachment_service
+                    .attach_hook(tenant_id, pageserver.conf.id)
+                    .await?
             } else {
                 None
             };
 
-            pageserver.tenant_create(tenant_id, generation, tenant_conf)?;
+            pageserver
+                .tenant_create(tenant_id, generation, tenant_conf)
+                .await?;
             println!("tenant {tenant_id} successfully created on the pageserver");
 
             // Create an initial timeline for the new tenant
@@ -409,14 +422,16 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
                 .copied()
                 .context("Failed to parse postgres version from the argument string")?;
 
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                new_timeline_id,
-                None,
-                None,
-                Some(pg_version),
-                None,
-            )?;
+            let timeline_info = pageserver
+                .timeline_create(
+                    tenant_id,
+                    new_timeline_id,
+                    None,
+                    None,
+                    Some(pg_version),
+                    None,
+                )
+                .await?;
             let new_timeline_id = timeline_info.timeline_id;
             let last_record_lsn = timeline_info.last_record_lsn;
 
@@ -450,6 +465,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
 
             pageserver
                 .tenant_config(tenant_id, tenant_conf)
+                .await
                 .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?;
             println!("tenant {tenant_id} successfully configured on the pageserver");
         }
@@ -458,7 +474,7 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
             let new_pageserver = get_pageserver(env, matches)?;
             let new_pageserver_id = new_pageserver.conf.id;
 
-            migrate_tenant(env, tenant_id, new_pageserver)?;
+            migrate_tenant(env, tenant_id, new_pageserver).await?;
             println!("tenant {tenant_id} migrated to {}", new_pageserver_id);
         }
 
@@ -468,13 +484,13 @@ fn handle_tenant(tenant_match: &ArgMatches, env: &mut local_env::LocalEnv) -> an
     Ok(())
 }
 
-fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
+async fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -> Result<()> {
     let pageserver = get_default_pageserver(env);
 
     match timeline_match.subcommand() {
         Some(("list", list_match)) => {
             let tenant_id = get_tenant_id(list_match, env)?;
-            let timelines = pageserver.timeline_list(&tenant_id)?;
+            let timelines = pageserver.timeline_list(&tenant_id).await?;
             print_timelines_tree(timelines, env.timeline_name_mappings())?;
         }
         Some(("create", create_match)) => {
@@ -490,14 +506,16 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
 
             let new_timeline_id_opt = parse_timeline_id(create_match)?;
 
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                new_timeline_id_opt,
-                None,
-                None,
-                Some(pg_version),
-                None,
-            )?;
+            let timeline_info = pageserver
+                .timeline_create(
+                    tenant_id,
+                    new_timeline_id_opt,
+                    None,
+                    None,
+                    Some(pg_version),
+                    None,
+                )
+                .await?;
             let new_timeline_id = timeline_info.timeline_id;
 
             let last_record_lsn = timeline_info.last_record_lsn;
@@ -542,7 +560,9 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
 
             let mut cplane = ComputeControlPlane::load(env.clone())?;
             println!("Importing timeline into pageserver ...");
-            pageserver.timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)?;
+            pageserver
+                .timeline_import(tenant_id, timeline_id, base, pg_wal, pg_version)
+                .await?;
             env.register_branch_mapping(name.to_string(), tenant_id, timeline_id)?;
 
             println!("Creating endpoint for imported timeline ...");
@@ -578,14 +598,16 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
                 .map(|lsn_str| Lsn::from_str(lsn_str))
                 .transpose()
                 .context("Failed to parse ancestor start Lsn from the request")?;
-            let timeline_info = pageserver.timeline_create(
-                tenant_id,
-                None,
-                start_lsn,
-                Some(ancestor_timeline_id),
-                None,
-                None,
-            )?;
+            let timeline_info = pageserver
+                .timeline_create(
+                    tenant_id,
+                    None,
+                    start_lsn,
+                    Some(ancestor_timeline_id),
+                    None,
+                    None,
+                )
+                .await?;
             let new_timeline_id = timeline_info.timeline_id;
 
             let last_record_lsn = timeline_info.last_record_lsn;
@@ -604,7 +626,7 @@ fn handle_timeline(timeline_match: &ArgMatches, env: &mut local_env::LocalEnv) -
     Ok(())
 }
 
-fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
     let (sub_name, sub_args) = match ep_match.subcommand() {
         Some(ep_subcommand_data) => ep_subcommand_data,
         None => bail!("no endpoint subcommand provided"),
@@ -614,10 +636,12 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
     match sub_name {
         "list" => {
             let tenant_id = get_tenant_id(sub_args, env)?;
-            let timeline_infos = get_timeline_infos(env, &tenant_id).unwrap_or_else(|e| {
-                eprintln!("Failed to load timeline info: {}", e);
-                HashMap::new()
-            });
+            let timeline_infos = get_timeline_infos(env, &tenant_id)
+                .await
+                .unwrap_or_else(|e| {
+                    eprintln!("Failed to load timeline info: {}", e);
+                    HashMap::new()
+                });
 
             let timeline_name_mappings = env.timeline_name_mappings();
 
@@ -791,7 +815,9 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
             };
 
             println!("Starting existing endpoint {endpoint_id}...");
-            endpoint.start(&auth_token, safekeepers, remote_ext_config)?;
+            endpoint
+                .start(&auth_token, safekeepers, remote_ext_config)
+                .await?;
         }
         "reconfigure" => {
             let endpoint_id = sub_args
@@ -809,7 +835,7 @@ fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<(
                 } else {
                     None
                 };
-            endpoint.reconfigure(pageserver_id)?;
+            endpoint.reconfigure(pageserver_id).await?;
         }
         "stop" => {
             let endpoint_id = sub_args
@@ -875,11 +901,12 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
     ))
 }
 
-fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
     match sub_match.subcommand() {
         Some(("start", subcommand_args)) => {
             if let Err(e) = get_pageserver(env, subcommand_args)?
                 .start(&pageserver_config_overrides(subcommand_args))
+                .await
             {
                 eprintln!("pageserver start failed: {e}");
                 exit(1);
@@ -906,7 +933,10 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
                 exit(1);
             }
 
-            if let Err(e) = pageserver.start(&pageserver_config_overrides(subcommand_args)) {
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                 eprintln!("pageserver start failed: {e}");
                 exit(1);
             }
@@ -920,14 +950,17 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
                 exit(1);
             }
 
-            if let Err(e) = pageserver.start(&pageserver_config_overrides(subcommand_args)) {
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                 eprintln!("pageserver start failed: {e}");
                 exit(1);
             }
         }
 
         Some(("status", subcommand_args)) => {
-            match get_pageserver(env, subcommand_args)?.check_status() {
+            match get_pageserver(env, subcommand_args)?.check_status().await {
                 Ok(_) => println!("Page server is up and running"),
                 Err(err) => {
                     eprintln!("Page server is not available: {}", err);
@@ -942,11 +975,14 @@ fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
     Ok(())
 }
 
-fn handle_attachment_service(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+async fn handle_attachment_service(
+    sub_match: &ArgMatches,
+    env: &local_env::LocalEnv,
+) -> Result<()> {
     let svc = AttachmentService::from_env(env);
     match sub_match.subcommand() {
         Some(("start", _start_match)) => {
-            if let Err(e) = svc.start() {
+            if let Err(e) = svc.start().await {
                 eprintln!("start failed: {e}");
                 exit(1);
             }
@@ -987,7 +1023,7 @@ fn safekeeper_extra_opts(init_match: &ArgMatches) -> Vec<String> {
         .collect()
 }
 
-fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
+async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
     let (sub_name, sub_args) = match sub_match.subcommand() {
         Some(safekeeper_command_data) => safekeeper_command_data,
         None => bail!("no safekeeper subcommand provided"),
@@ -1005,7 +1041,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
         "start" => {
             let extra_opts = safekeeper_extra_opts(sub_args);
 
-            if let Err(e) = safekeeper.start(extra_opts) {
+            if let Err(e) = safekeeper.start(extra_opts).await {
                 eprintln!("safekeeper start failed: {}", e);
                 exit(1);
             }
@@ -1031,7 +1067,7 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
             }
 
             let extra_opts = safekeeper_extra_opts(sub_args);
-            if let Err(e) = safekeeper.start(extra_opts) {
+            if let Err(e) = safekeeper.start(extra_opts).await {
                 eprintln!("safekeeper start failed: {}", e);
                 exit(1);
             }
@@ -1044,15 +1080,15 @@ fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Resul
     Ok(())
 }
 
-fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
+async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
     // Endpoints are not started automatically
 
-    broker::start_broker_process(env)?;
+    broker::start_broker_process(env).await?;
 
     // Only start the attachment service if the pageserver is configured to need it
     if env.control_plane_api.is_some() {
         let attachment_service = AttachmentService::from_env(env);
-        if let Err(e) = attachment_service.start() {
+        if let Err(e) = attachment_service.start().await {
             eprintln!("attachment_service start failed: {:#}", e);
             try_stop_all(env, true);
             exit(1);
@@ -1061,7 +1097,10 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
 
     for ps_conf in &env.pageservers {
         let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start(&pageserver_config_overrides(sub_match)) {
+        if let Err(e) = pageserver
+            .start(&pageserver_config_overrides(sub_match))
+            .await
+        {
             eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
             try_stop_all(env, true);
             exit(1);
@@ -1070,7 +1109,7 @@ fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow
 
     for node in env.safekeepers.iter() {
         let safekeeper = SafekeeperNode::from_env(env, node);
-        if let Err(e) = safekeeper.start(vec![]) {
+        if let Err(e) = safekeeper.start(vec![]).await {
             eprintln!("safekeeper {} start failed: {:#}", safekeeper.id, e);
             try_stop_all(env, false);
             exit(1);
diff --git a/control_plane/src/broker.rs b/control_plane/src/broker.rs
index 6be865cc2e..f40705863b 100644
--- a/control_plane/src/broker.rs
+++ b/control_plane/src/broker.rs
@@ -11,7 +11,7 @@ use camino::Utf8PathBuf;
 
 use crate::{background_process, local_env};
 
-pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
+pub async fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
     let broker = &env.broker;
     let listen_addr = &broker.listen_addr;
 
@@ -19,15 +19,15 @@ pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
 
     let args = [format!("--listen-addr={listen_addr}")];
 
-    let client = reqwest::blocking::Client::new();
+    let client = reqwest::Client::new();
     background_process::start_process(
         "storage_broker",
         &env.base_data_dir,
         &env.storage_broker_bin(),
         args,
         [],
-        background_process::InitialPidFile::Create(&storage_broker_pid_file_path(env)),
-        || {
+        background_process::InitialPidFile::Create(storage_broker_pid_file_path(env)),
+        || async {
             let url = broker.client_url();
             let status_url = url.join("status").with_context(|| {
                 format!("Failed to append /status path to broker endpoint {url}")
@@ -36,12 +36,13 @@ pub fn start_broker_process(env: &local_env::LocalEnv) -> anyhow::Result<()> {
                 .get(status_url)
                 .build()
                 .with_context(|| format!("Failed to construct request to broker endpoint {url}"))?;
-            match client.execute(request) {
+            match client.execute(request).await {
                 Ok(resp) => Ok(resp.status().is_success()),
                 Err(_) => Ok(false),
             }
         },
     )
+    .await
     .context("Failed to spawn storage_broker subprocess")?;
     Ok(())
 }
diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs
index a566f03db9..071f22dc2b 100644
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -464,7 +464,7 @@ impl Endpoint {
         }
     }
 
-    pub fn start(
+    pub async fn start(
         &self,
         auth_token: &Option<String>,
         safekeepers: Vec<NodeId>,
@@ -587,7 +587,7 @@ impl Endpoint {
         const MAX_ATTEMPTS: u32 = 10 * 30; // Wait up to 30 s
         loop {
             attempt += 1;
-            match self.get_status() {
+            match self.get_status().await {
                 Ok(state) => {
                     match state.status {
                         ComputeStatus::Init => {
@@ -629,8 +629,8 @@ impl Endpoint {
     }
 
     // Call the /status HTTP API
-    pub fn get_status(&self) -> Result<ComputeState> {
-        let client = reqwest::blocking::Client::new();
+    pub async fn get_status(&self) -> Result<ComputeState> {
+        let client = reqwest::Client::new();
 
         let response = client
             .request(
@@ -641,16 +641,17 @@ impl Endpoint {
                     self.http_address.port()
                 ),
             )
-            .send()?;
+            .send()
+            .await?;
 
         // Interpret the response
         let status = response.status();
         if !(status.is_client_error() || status.is_server_error()) {
-            Ok(response.json()?)
+            Ok(response.json().await?)
         } else {
             // reqwest does not export its error construction utility functions, so let's craft the message ourselves
             let url = response.url().to_owned();
-            let msg = match response.text() {
+            let msg = match response.text().await {
                 Ok(err_body) => format!("Error: {}", err_body),
                 Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
             };
@@ -658,7 +659,7 @@ impl Endpoint {
         }
     }
 
-    pub fn reconfigure(&self, pageserver_id: Option<NodeId>) -> Result<()> {
+    pub async fn reconfigure(&self, pageserver_id: Option<NodeId>) -> Result<()> {
         let mut spec: ComputeSpec = {
             let spec_path = self.endpoint_path().join("spec.json");
             let file = std::fs::File::open(spec_path)?;
@@ -687,7 +688,7 @@ impl Endpoint {
             spec.pageserver_connstring = Some(format!("postgresql://no_user@{host}:{port}"));
         }
 
-        let client = reqwest::blocking::Client::new();
+        let client = reqwest::Client::new();
         let response = client
             .post(format!(
                 "http://{}:{}/configure",
@@ -698,14 +699,15 @@ impl Endpoint {
                 "{{\"spec\":{}}}",
                 serde_json::to_string_pretty(&spec)?
             ))
-            .send()?;
+            .send()
+            .await?;
 
         let status = response.status();
         if !(status.is_client_error() || status.is_server_error()) {
             Ok(())
         } else {
             let url = response.url().to_owned();
-            let msg = match response.text() {
+            let msg = match response.text().await {
                 Ok(err_body) => format!("Error: {}", err_body),
                 Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
             };
diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs
index 96a41874fd..7d490016bf 100644
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -6,28 +6,24 @@
 //!
 use std::borrow::Cow;
 use std::collections::HashMap;
-use std::fs::File;
-use std::io::{BufReader, Write};
+
+use std::io;
+use std::io::Write;
 use std::num::NonZeroU64;
 use std::path::PathBuf;
 use std::process::{Child, Command};
 use std::time::Duration;
-use std::{io, result};
 
 use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
-use pageserver_api::models::{
-    self, LocationConfig, TenantInfo, TenantLocationConfigRequest, TimelineInfo,
-};
+use futures::SinkExt;
+use pageserver_api::models::{self, LocationConfig, TenantInfo, TimelineInfo};
 use pageserver_api::shard::TenantShardId;
+use pageserver_client::mgmt_api;
 use postgres_backend::AuthType;
 use postgres_connection::{parse_host_port, PgConnectionConfig};
-use reqwest::blocking::{Client, RequestBuilder, Response};
-use reqwest::{IntoUrl, Method};
-use thiserror::Error;
 use utils::auth::{Claims, Scope};
 use utils::{
-    http::error::HttpErrorBody,
     id::{TenantId, TimelineId},
     lsn::Lsn,
 };
@@ -38,45 +34,6 @@ use crate::{background_process, local_env::LocalEnv};
 /// Directory within .neon which will be used by default for LocalFs remote storage.
 pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver";
 
-#[derive(Error, Debug)]
-pub enum PageserverHttpError {
-    #[error("Reqwest error: {0}")]
-    Transport(#[from] reqwest::Error),
-
-    #[error("Error: {0}")]
-    Response(String),
-}
-
-impl From<anyhow::Error> for PageserverHttpError {
-    fn from(e: anyhow::Error) -> Self {
-        Self::Response(e.to_string())
-    }
-}
-
-type Result<T> = result::Result<T, PageserverHttpError>;
-
-pub trait ResponseErrorMessageExt: Sized {
-    fn error_from_body(self) -> Result<Self>;
-}
-
-impl ResponseErrorMessageExt for Response {
-    fn error_from_body(self) -> Result<Self> {
-        let status = self.status();
-        if !(status.is_client_error() || status.is_server_error()) {
-            return Ok(self);
-        }
-
-        // reqwest does not export its error construction utility functions, so let's craft the message ourselves
-        let url = self.url().to_owned();
-        Err(PageserverHttpError::Response(
-            match self.json::<HttpErrorBody>() {
-                Ok(err_body) => format!("Error: {}", err_body.msg),
-                Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
-            },
-        ))
-    }
-}
-
 //
 // Control routines for pageserver.
 //
@@ -87,8 +44,7 @@ pub struct PageServerNode {
     pub pg_connection_config: PgConnectionConfig,
     pub conf: PageServerConf,
     pub env: LocalEnv,
-    pub http_client: Client,
-    pub http_base_url: String,
+    pub http_client: mgmt_api::Client,
 }
 
 impl PageServerNode {
@@ -100,8 +56,19 @@ impl PageServerNode {
             pg_connection_config: PgConnectionConfig::new_host_port(host, port),
             conf: conf.clone(),
             env: env.clone(),
-            http_client: Client::new(),
-            http_base_url: format!("http://{}/v1", conf.listen_http_addr),
+            http_client: mgmt_api::Client::new(
+                format!("http://{}", conf.listen_http_addr),
+                {
+                    match conf.http_auth_type {
+                        AuthType::Trust => None,
+                        AuthType::NeonJWT => Some(
+                            env.generate_auth_token(&Claims::new(None, Scope::PageServerApi))
+                                .unwrap(),
+                        ),
+                    }
+                }
+                .as_deref(),
+            ),
         }
     }
 
@@ -182,8 +149,8 @@ impl PageServerNode {
             .expect("non-Unicode path")
     }
 
-    pub fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
-        self.start_node(config_overrides, false)
+    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<Child> {
+        self.start_node(config_overrides, false).await
     }
 
     fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
@@ -224,7 +191,12 @@ impl PageServerNode {
         Ok(())
     }
 
-    fn start_node(&self, config_overrides: &[&str], update_config: bool) -> anyhow::Result<Child> {
+    async fn start_node(
+        &self,
+        config_overrides: &[&str],
+        update_config: bool,
+    ) -> anyhow::Result<Child> {
+        // TODO: using a thread here because start_process() is not async but we need to call check_status()
         let datadir = self.repo_path();
         print!(
             "Starting pageserver node {} at '{}' in {:?}",
@@ -232,7 +204,7 @@ impl PageServerNode {
             self.pg_connection_config.raw_address(),
             datadir
         );
-        io::stdout().flush()?;
+        io::stdout().flush().context("flush stdout")?;
 
         let datadir_path_str = datadir.to_str().with_context(|| {
             format!(
@@ -244,20 +216,23 @@ impl PageServerNode {
         if update_config {
             args.push(Cow::Borrowed("--update-config"));
         }
-
         background_process::start_process(
             "pageserver",
             &datadir,
             &self.env.pageserver_bin(),
             args.iter().map(Cow::as_ref),
             self.pageserver_env_variables()?,
-            background_process::InitialPidFile::Expect(&self.pid_file()),
-            || match self.check_status() {
-                Ok(()) => Ok(true),
-                Err(PageserverHttpError::Transport(_)) => Ok(false),
-                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+            background_process::InitialPidFile::Expect(self.pid_file()),
+            || async {
+                let st = self.check_status().await;
+                match st {
+                    Ok(()) => Ok(true),
+                    Err(mgmt_api::Error::ReceiveBody(_)) => Ok(false),
+                    Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+                }
             },
         )
+        .await
     }
 
     fn pageserver_basic_args<'a>(
@@ -303,7 +278,12 @@ impl PageServerNode {
         background_process::stop_process(immediate, "pageserver", &self.pid_file())
     }
 
-    pub fn page_server_psql_client(&self) -> anyhow::Result<postgres::Client> {
+    pub async fn page_server_psql_client(
+        &self,
+    ) -> anyhow::Result<(
+        tokio_postgres::Client,
+        tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
+    )> {
         let mut config = self.pg_connection_config.clone();
         if self.conf.pg_auth_type == AuthType::NeonJWT {
             let token = self
@@ -311,36 +291,18 @@ impl PageServerNode {
                 .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
             config = config.set_password(Some(token));
         }
-        Ok(config.connect_no_tls()?)
+        Ok(config.connect_no_tls().await?)
     }
 
-    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> anyhow::Result<RequestBuilder> {
-        let mut builder = self.http_client.request(method, url);
-        if self.conf.http_auth_type == AuthType::NeonJWT {
-            let token = self
-                .env
-                .generate_auth_token(&Claims::new(None, Scope::PageServerApi))?;
-            builder = builder.bearer_auth(token)
-        }
-        Ok(builder)
+    pub async fn check_status(&self) -> mgmt_api::Result<()> {
+        self.http_client.status().await
     }
 
-    pub fn check_status(&self) -> Result<()> {
-        self.http_request(Method::GET, format!("{}/status", self.http_base_url))?
-            .send()?
-            .error_from_body()?;
-        Ok(())
+    pub async fn tenant_list(&self) -> mgmt_api::Result<Vec<TenantInfo>> {
+        self.http_client.list_tenants().await
     }
 
-    pub fn tenant_list(&self) -> Result<Vec<TenantInfo>> {
-        Ok(self
-            .http_request(Method::GET, format!("{}/tenant", self.http_base_url))?
-            .send()?
-            .error_from_body()?
-            .json()?)
-    }
-
-    pub fn tenant_create(
+    pub async fn tenant_create(
         &self,
         new_tenant_id: TenantId,
         generation: Option<u32>,
@@ -407,6 +369,7 @@ impl PageServerNode {
                 .map(|x| x.parse::<bool>())
                 .transpose()
                 .context("Failed to parse 'gc_feedback' as bool")?,
+            heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
         };
 
         let request = models::TenantCreateRequest {
@@ -417,23 +380,10 @@ impl PageServerNode {
         if !settings.is_empty() {
             bail!("Unrecognized tenant settings: {settings:?}")
         }
-        self.http_request(Method::POST, format!("{}/tenant", self.http_base_url))?
-            .json(&request)
-            .send()?
-            .error_from_body()?
-            .json::<Option<String>>()
-            .with_context(|| {
-                format!("Failed to parse tenant creation response for tenant id: {new_tenant_id:?}")
-            })?
-            .context("No tenant id was found in the tenant creation response")
-            .and_then(|tenant_id_string| {
-                tenant_id_string.parse().with_context(|| {
-                    format!("Failed to parse response string as tenant id: '{tenant_id_string}'")
-                })
-            })
+        Ok(self.http_client.tenant_create(&request).await?)
     }
 
-    pub fn tenant_config(
+    pub async fn tenant_config(
         &self,
         tenant_id: TenantId,
         mut settings: HashMap<&str, &str>,
@@ -504,6 +454,7 @@ impl PageServerNode {
                     .map(|x| x.parse::<bool>())
                     .transpose()
                     .context("Failed to parse 'gc_feedback' as bool")?,
+                heatmap_period: settings.remove("heatmap_period").map(|x| x.to_string()),
             }
         };
 
@@ -511,54 +462,30 @@ impl PageServerNode {
             bail!("Unrecognized tenant settings: {settings:?}")
         }
 
-        self.http_request(Method::PUT, format!("{}/tenant/config", self.http_base_url))?
-            .json(&models::TenantConfigRequest { tenant_id, config })
-            .send()?
-            .error_from_body()?;
+        self.http_client
+            .tenant_config(&models::TenantConfigRequest { tenant_id, config })
+            .await?;
 
         Ok(())
     }
 
-    pub fn location_config(
+    pub async fn location_config(
         &self,
         tenant_id: TenantId,
         config: LocationConfig,
         flush_ms: Option<Duration>,
     ) -> anyhow::Result<()> {
-        let req_body = TenantLocationConfigRequest { tenant_id, config };
-
-        let path = format!(
-            "{}/tenant/{}/location_config",
-            self.http_base_url, tenant_id
-        );
-        let path = if let Some(flush_ms) = flush_ms {
-            format!("{}?flush_ms={}", path, flush_ms.as_millis())
-        } else {
-            path
-        };
-
-        self.http_request(Method::PUT, path)?
-            .json(&req_body)
-            .send()?
-            .error_from_body()?;
-
-        Ok(())
+        Ok(self
+            .http_client
+            .location_config(tenant_id, config, flush_ms)
+            .await?)
     }
 
-    pub fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
-        let timeline_infos: Vec<TimelineInfo> = self
-            .http_request(
-                Method::GET,
-                format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
-            )?
-            .send()?
-            .error_from_body()?
-            .json()?;
-
-        Ok(timeline_infos)
+    pub async fn timeline_list(&self, tenant_id: &TenantId) -> anyhow::Result<Vec<TimelineInfo>> {
+        Ok(self.http_client.list_timelines(*tenant_id).await?)
     }
 
-    pub fn timeline_create(
+    pub async fn timeline_create(
         &self,
         tenant_id: TenantId,
         new_timeline_id: Option<TimelineId>,
@@ -569,29 +496,14 @@ impl PageServerNode {
     ) -> anyhow::Result<TimelineInfo> {
         // If timeline ID was not specified, generate one
         let new_timeline_id = new_timeline_id.unwrap_or(TimelineId::generate());
-
-        self.http_request(
-            Method::POST,
-            format!("{}/tenant/{}/timeline", self.http_base_url, tenant_id),
-        )?
-        .json(&models::TimelineCreateRequest {
+        let req = models::TimelineCreateRequest {
             new_timeline_id,
             ancestor_start_lsn,
             ancestor_timeline_id,
             pg_version,
             existing_initdb_timeline_id,
-        })
-        .send()?
-        .error_from_body()?
-        .json::<Option<TimelineInfo>>()
-        .with_context(|| {
-            format!("Failed to parse timeline creation response for tenant id: {tenant_id}")
-        })?
-        .with_context(|| {
-            format!(
-                "No timeline id was found in the timeline creation response for tenant {tenant_id}"
-            )
-        })
+        };
+        Ok(self.http_client.timeline_create(tenant_id, &req).await?)
     }
 
     /// Import a basebackup prepared using either:
@@ -603,7 +515,7 @@ impl PageServerNode {
     /// * `timeline_id` - id to assign to imported timeline
     /// * `base` - (start lsn of basebackup, path to `base.tar` file)
     /// * `pg_wal` - if there's any wal to import: (end lsn, path to `pg_wal.tar`)
-    pub fn timeline_import(
+    pub async fn timeline_import(
         &self,
         tenant_id: TenantId,
         timeline_id: TimelineId,
@@ -611,36 +523,60 @@ impl PageServerNode {
         pg_wal: Option<(Lsn, PathBuf)>,
         pg_version: u32,
     ) -> anyhow::Result<()> {
-        let mut client = self.page_server_psql_client()?;
+        let (client, conn) = self.page_server_psql_client().await?;
+        // The connection object performs the actual communication with the database,
+        // so spawn it off to run on its own.
+        tokio::spawn(async move {
+            if let Err(e) = conn.await {
+                eprintln!("connection error: {}", e);
+            }
+        });
+        tokio::pin!(client);
 
         // Init base reader
         let (start_lsn, base_tarfile_path) = base;
-        let base_tarfile = File::open(base_tarfile_path)?;
-        let mut base_reader = BufReader::new(base_tarfile);
+        let base_tarfile = tokio::fs::File::open(base_tarfile_path).await?;
+        let base_tarfile = tokio_util::io::ReaderStream::new(base_tarfile);
 
         // Init wal reader if necessary
         let (end_lsn, wal_reader) = if let Some((end_lsn, wal_tarfile_path)) = pg_wal {
-            let wal_tarfile = File::open(wal_tarfile_path)?;
-            let wal_reader = BufReader::new(wal_tarfile);
+            let wal_tarfile = tokio::fs::File::open(wal_tarfile_path).await?;
+            let wal_reader = tokio_util::io::ReaderStream::new(wal_tarfile);
             (end_lsn, Some(wal_reader))
         } else {
             (start_lsn, None)
         };
 
-        // Import base
-        let import_cmd = format!(
-            "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
-        );
-        let mut writer = client.copy_in(&import_cmd)?;
-        io::copy(&mut base_reader, &mut writer)?;
-        writer.finish()?;
+        let copy_in = |reader, cmd| {
+            let client = &client;
+            async move {
+                let writer = client.copy_in(&cmd).await?;
+                let writer = std::pin::pin!(writer);
+                let mut writer = writer.sink_map_err(|e| {
+                    std::io::Error::new(std::io::ErrorKind::Other, format!("{e}"))
+                });
+                let mut reader = std::pin::pin!(reader);
+                writer.send_all(&mut reader).await?;
+                writer.into_inner().finish().await?;
+                anyhow::Ok(())
+            }
+        };
 
+        // Import base
+        copy_in(
+            base_tarfile,
+            format!(
+                "import basebackup {tenant_id} {timeline_id} {start_lsn} {end_lsn} {pg_version}"
+            ),
+        )
+        .await?;
         // Import wal if necessary
-        if let Some(mut wal_reader) = wal_reader {
-            let import_cmd = format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}");
-            let mut writer = client.copy_in(&import_cmd)?;
-            io::copy(&mut wal_reader, &mut writer)?;
-            writer.finish()?;
+        if let Some(wal_reader) = wal_reader {
+            copy_in(
+                wal_reader,
+                format!("import wal {tenant_id} {timeline_id} {start_lsn} {end_lsn}"),
+            )
+            .await?;
         }
 
         Ok(())
diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs
index a8baa0ac53..4026ef0eb9 100644
--- a/control_plane/src/safekeeper.rs
+++ b/control_plane/src/safekeeper.rs
@@ -13,7 +13,6 @@ use std::{io, result};
 use anyhow::Context;
 use camino::Utf8PathBuf;
 use postgres_connection::PgConnectionConfig;
-use reqwest::blocking::{Client, RequestBuilder, Response};
 use reqwest::{IntoUrl, Method};
 use thiserror::Error;
 use utils::{http::error::HttpErrorBody, id::NodeId};
@@ -34,12 +33,14 @@ pub enum SafekeeperHttpError {
 
 type Result<T> = result::Result<T, SafekeeperHttpError>;
 
+#[async_trait::async_trait]
 pub trait ResponseErrorMessageExt: Sized {
-    fn error_from_body(self) -> Result<Self>;
+    async fn error_from_body(self) -> Result<Self>;
 }
 
-impl ResponseErrorMessageExt for Response {
-    fn error_from_body(self) -> Result<Self> {
+#[async_trait::async_trait]
+impl ResponseErrorMessageExt for reqwest::Response {
+    async fn error_from_body(self) -> Result<Self> {
         let status = self.status();
         if !(status.is_client_error() || status.is_server_error()) {
             return Ok(self);
@@ -48,7 +49,7 @@ impl ResponseErrorMessageExt for Response {
         // reqwest does not export its error construction utility functions, so let's craft the message ourselves
         let url = self.url().to_owned();
         Err(SafekeeperHttpError::Response(
-            match self.json::<HttpErrorBody>() {
+            match self.json::<HttpErrorBody>().await {
                 Ok(err_body) => format!("Error: {}", err_body.msg),
                 Err(_) => format!("Http error ({}) at {}.", status.as_u16(), url),
             },
@@ -69,7 +70,7 @@ pub struct SafekeeperNode {
 
     pub pg_connection_config: PgConnectionConfig,
     pub env: LocalEnv,
-    pub http_client: Client,
+    pub http_client: reqwest::Client,
     pub http_base_url: String,
 }
 
@@ -80,7 +81,7 @@ impl SafekeeperNode {
             conf: conf.clone(),
             pg_connection_config: Self::safekeeper_connection_config(conf.pg_port),
             env: env.clone(),
-            http_client: Client::new(),
+            http_client: reqwest::Client::new(),
             http_base_url: format!("http://127.0.0.1:{}/v1", conf.http_port),
         }
     }
@@ -103,7 +104,7 @@ impl SafekeeperNode {
             .expect("non-Unicode path")
     }
 
-    pub fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<Child> {
+    pub async fn start(&self, extra_opts: Vec<String>) -> anyhow::Result<Child> {
         print!(
             "Starting safekeeper at '{}' in '{}'",
             self.pg_connection_config.raw_address(),
@@ -191,13 +192,16 @@ impl SafekeeperNode {
             &self.env.safekeeper_bin(),
             &args,
             [],
-            background_process::InitialPidFile::Expect(&self.pid_file()),
-            || match self.check_status() {
-                Ok(()) => Ok(true),
-                Err(SafekeeperHttpError::Transport(_)) => Ok(false),
-                Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+            background_process::InitialPidFile::Expect(self.pid_file()),
+            || async {
+                match self.check_status().await {
+                    Ok(()) => Ok(true),
+                    Err(SafekeeperHttpError::Transport(_)) => Ok(false),
+                    Err(e) => Err(anyhow::anyhow!("Failed to check node status: {e}")),
+                }
             },
         )
+        .await
     }
 
     ///
@@ -216,7 +220,7 @@ impl SafekeeperNode {
         )
     }
 
-    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> RequestBuilder {
+    fn http_request<U: IntoUrl>(&self, method: Method, url: U) -> reqwest::RequestBuilder {
         // TODO: authentication
         //if self.env.auth_type == AuthType::NeonJWT {
         //    builder = builder.bearer_auth(&self.env.safekeeper_auth_token)
@@ -224,10 +228,12 @@ impl SafekeeperNode {
         self.http_client.request(method, url)
     }
 
-    pub fn check_status(&self) -> Result<()> {
+    pub async fn check_status(&self) -> Result<()> {
         self.http_request(Method::GET, format!("{}/{}", self.http_base_url, "status"))
-            .send()?
-            .error_from_body()?;
+            .send()
+            .await?
+            .error_from_body()
+            .await?;
         Ok(())
     }
 }
diff --git a/control_plane/src/tenant_migration.rs b/control_plane/src/tenant_migration.rs
index c0c44e279f..79df108896 100644
--- a/control_plane/src/tenant_migration.rs
+++ b/control_plane/src/tenant_migration.rs
@@ -19,11 +19,11 @@ use utils::{
 };
 
 /// Given an attached pageserver, retrieve the LSN for all timelines
-fn get_lsns(
+async fn get_lsns(
     tenant_id: TenantId,
     pageserver: &PageServerNode,
 ) -> anyhow::Result<HashMap<TimelineId, Lsn>> {
-    let timelines = pageserver.timeline_list(&tenant_id)?;
+    let timelines = pageserver.timeline_list(&tenant_id).await?;
     Ok(timelines
         .into_iter()
         .map(|t| (t.timeline_id, t.last_record_lsn))
@@ -32,13 +32,13 @@ fn get_lsns(
 
 /// Wait for the timeline LSNs on `pageserver` to catch up with or overtake
 /// `baseline`.
-fn await_lsn(
+async fn await_lsn(
     tenant_id: TenantId,
     pageserver: &PageServerNode,
     baseline: HashMap<TimelineId, Lsn>,
 ) -> anyhow::Result<()> {
     loop {
-        let latest = match get_lsns(tenant_id, pageserver) {
+        let latest = match get_lsns(tenant_id, pageserver).await {
             Ok(l) => l,
             Err(e) => {
                 println!(
@@ -84,7 +84,7 @@ fn await_lsn(
 ///  - Coordinate attach/secondary/detach on pageservers
 ///  - call into attachment_service for generations
 ///  - reconfigure compute endpoints to point to new attached pageserver
-pub fn migrate_tenant(
+pub async fn migrate_tenant(
     env: &LocalEnv,
     tenant_id: TenantId,
     dest_ps: PageServerNode,
@@ -108,16 +108,18 @@ pub fn migrate_tenant(
         }
     }
 
-    let previous = attachment_service.inspect(tenant_id)?;
+    let previous = attachment_service.inspect(tenant_id).await?;
     let mut baseline_lsns = None;
     if let Some((generation, origin_ps_id)) = &previous {
         let origin_ps = PageServerNode::from_env(env, env.get_pageserver_conf(*origin_ps_id)?);
 
         if origin_ps_id == &dest_ps.conf.id {
             println!("🔁 Already attached to {origin_ps_id}, freshening...");
-            let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
+            let gen = attachment_service
+                .attach_hook(tenant_id, dest_ps.conf.id)
+                .await?;
             let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
-            dest_ps.location_config(tenant_id, dest_conf, None)?;
+            dest_ps.location_config(tenant_id, dest_conf, None).await?;
             println!("✅ Migration complete");
             return Ok(());
         }
@@ -126,20 +128,24 @@ pub fn migrate_tenant(
 
         let stale_conf =
             build_location_config(LocationConfigMode::AttachedStale, Some(*generation), None);
-        origin_ps.location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))?;
+        origin_ps
+            .location_config(tenant_id, stale_conf, Some(Duration::from_secs(10)))
+            .await?;
 
-        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps)?);
+        baseline_lsns = Some(get_lsns(tenant_id, &origin_ps).await?);
     }
 
-    let gen = attachment_service.attach_hook(tenant_id, dest_ps.conf.id)?;
+    let gen = attachment_service
+        .attach_hook(tenant_id, dest_ps.conf.id)
+        .await?;
     let dest_conf = build_location_config(LocationConfigMode::AttachedMulti, gen, None);
 
     println!("🔁 Attaching to pageserver {}", dest_ps.conf.id);
-    dest_ps.location_config(tenant_id, dest_conf, None)?;
+    dest_ps.location_config(tenant_id, dest_conf, None).await?;
 
     if let Some(baseline) = baseline_lsns {
         println!("🕑 Waiting for LSN to catch up...");
-        await_lsn(tenant_id, &dest_ps, baseline)?;
+        await_lsn(tenant_id, &dest_ps, baseline).await?;
     }
 
     let cplane = ComputeControlPlane::load(env.clone())?;
@@ -149,7 +155,7 @@ pub fn migrate_tenant(
                 "🔁 Reconfiguring endpoint {} to use pageserver {}",
                 endpoint_name, dest_ps.conf.id
             );
-            endpoint.reconfigure(Some(dest_ps.conf.id))?;
+            endpoint.reconfigure(Some(dest_ps.conf.id)).await?;
         }
     }
 
@@ -159,13 +165,13 @@ pub fn migrate_tenant(
         }
 
         let other_ps = PageServerNode::from_env(env, other_ps_conf);
-        let other_ps_tenants = other_ps.tenant_list()?;
+        let other_ps_tenants = other_ps.tenant_list().await?;
 
         // Check if this tenant is attached
         let found = other_ps_tenants
             .into_iter()
             .map(|t| t.id)
-            .any(|i| i == tenant_id);
+            .any(|i| i.tenant_id == tenant_id);
         if !found {
             continue;
         }
@@ -181,7 +187,9 @@ pub fn migrate_tenant(
             "💤 Switching to secondary mode on pageserver {}",
             other_ps.conf.id
         );
-        other_ps.location_config(tenant_id, secondary_conf, None)?;
+        other_ps
+            .location_config(tenant_id, secondary_conf, None)
+            .await?;
     }
 
     println!(
@@ -189,7 +197,7 @@ pub fn migrate_tenant(
         dest_ps.conf.id
     );
     let dest_conf = build_location_config(LocationConfigMode::AttachedSingle, gen, None);
-    dest_ps.location_config(tenant_id, dest_conf, None)?;
+    dest_ps.location_config(tenant_id, dest_conf, None).await?;
 
     println!("✅ Migration complete");
 
diff --git a/docs/rfcs/029-getpage-throttling.md b/docs/rfcs/029-getpage-throttling.md
new file mode 100644
index 0000000000..b4f9adefc5
--- /dev/null
+++ b/docs/rfcs/029-getpage-throttling.md
@@ -0,0 +1,197 @@
+# Per-Tenant GetPage@LSN Throttling
+
+Author: Christian Schwarz
+Date: Oct 24, 2023
+
+## Summary
+
+This RFC proposes per-tenant throttling of GetPage@LSN requests inside Pageserver
+and the interactions with its client, i.e., the neon_smgr component in Compute.
+
+The result of implementing & executing this RFC will be a fleet-wide upper limit for
+**"the highest GetPage/second that Pageserver can support for a single tenant/shard"**.
+
+## Background
+
+### GetPage@LSN Request Flow
+
+Pageserver exposes its `page_service.rs` as a libpq listener.
+The Computes' `neon_smgr` module connects to that libpq listener.
+Once a connection is established, the protocol allows Compute to request page images at a given LSN.
+We call these requests GetPage@LSN requests, or GetPage requests for short.
+Other request types can be sent, but these are low traffic compared to GetPage requests
+and are not the concern of this RFC.
+
+Pageserver associates one libpq connection with one tokio task.
+
+Per connection/task, the pq protocol is handled by the common `postgres_backend` crate.
+Its `run_message_loop` function invokes the `page_service` specific `impl<IO> postgres_backend::Handler<IO> for PageServerHandler`.
+Requests are processed in the order in which they arrive via the TCP-based pq protocol.
+So, there is no concurrent request processing within one connection/task.
+
+There is a degree of natural pipelining:
+Compute can "fill the pipe" by sending more than one GetPage request into the libpq TCP stream.
+And Pageserver can fill the pipe with responses in the other direction.
+Both directions are subject to the limit of tx/rx buffers, nodelay, TCP flow control, etc.
+
+### GetPage@LSN Access Pattern
+
+The Compute has its own hierarchy of caches, specifically `shared_buffers` and the `local file cache` (LFC).
+Compute only issues GetPage requests to Pageserver if it encounters a miss in these caches.
+
+If the working set stops fitting into Compute's caches, requests to Pageserver increase sharply -- the Compute starts *thrashing*.
+
+## Motivation
+
+In INC-69, a tenant issued 155k GetPage/second for a period of 10 minutes and 60k GetPage/second for a period of 3h,
+then dropping to ca 18k GetPage/second for a period of 9h.
+
+We noticed this because of an internal GetPage latency SLO burn rate alert, i.e.,
+the request latency profile during this period significantly exceeded what was acceptable according to the internal SLO.
+
+Sadly, we do not have the observability data to determine the impact of this tenant on other tenants on the same tenants.
+
+However, here are some illustrative data points for the 155k period:
+The tenant was responsible for >= 99% of the GetPage traffic and, frankly, the overall activity on this Pageserver instance.
+We were serving pages at 10 Gb/s (`155k x 8 kbyte (PAGE_SZ) per second is 1.12GiB/s = 9.4Gb/s.`)
+The CPU utilization of the instance was 75% user+system.
+Pageserver page cache served 1.75M accesses/second at a hit rate of ca 90%.
+The hit rate for materialized pages was ca. 40%.
+Curiously, IOPS to the Instance Store NVMe were very low, rarely exceeding 100.
+
+The fact that the IOPS were so low / the materialized page cache hit rate was so high suggests that **this tenant's compute's caches were thrashing**.
+The compute was of type `k8s-pod`; hence, auto-scaling could/would not have helped remediate the thrashing by provisioning more RAM.
+The consequence was that the **thrashing translated into excessive GetPage requests against Pageserver**.
+
+My claim is that it was **unhealthy to serve this workload at the pace we did**:
+* it is likely that other tenants were/would have experienced high latencies (again, we sadly don't have per-tenant latency data to confirm this)
+* more importantly, it was **unsustainable** to serve traffic at this pace for multiple reasons:
+    * **predictability of performance**: when the working set grows, the pageserver materialized page cache hit rate drops.
+      At some point, we're bound by the EC2 Instance Store NVMe drive's IOPS limit.
+      The result is an **uneven** performance profile from the Compute perspective.
+
+    * **economics**: Neon currently does not charge for IOPS, only capacity.
+      **We cannot afford to undercut the market in IOPS/$ this drastically; it leads to adverse selection and perverse incentives.**
+      For example, the 155k IOPS, which we served for 10min, would cost ca. 6.5k$/month when provisioned as an io2 EBS volume.
+      Even the 18k IOPS, which we served for 9h, would cost ca. 1.1k$/month when provisioned as an io2 EBS volume.
+      We charge 0$.
+      It could be economically advantageous to keep using a low-DRAM compute because Pageserver IOPS are fast enough and free.
+
+
+Note: It is helpful to think of Pageserver as a disk, because it's precisely where `neon_smgr` sits:
+vanilla Postgres gets its pages from disk, Neon Postgres gets them from Pageserver.
+So, regarding the above performance & economic arguments, it is fair to say that we currently provide an "as-fast-as-possible-IOPS" disk that we charge for only by capacity.
+
+## Solution: Throttling GetPage Requests
+
+**The consequence of the above analysis must be that Pageserver throttles GetPage@LSN requests**.
+That is, unless we want to start charging for provisioned GetPage@LSN/second.
+Throttling sets the correct incentive for a thrashing Compute to scale up its DRAM to the working set size.
+Neon Autoscaling will make this easy, [eventually](https://github.com/neondatabase/neon/pull/3913).
+
+## The Design Space
+
+What that remains is the question about *policy* and *mechanism*:
+
+**Policy** concerns itself with the question of what limit applies to a given connection|timeline|tenant.
+Candidates are:
+
+* hard limit, same limit value per connection|timeline|tenant
+    * Per-tenant will provide an upper bound for the impact of a tenant on a given Pageserver instance.
+      This is a major operational pain point / risk right now.
+* hard limit, configurable per connection|timeline|tenant
+    * This outsources policy to console/control plane, with obvious advantages for flexible structuring of what service we offer to customers.
+    * Note that this is not a mechanism to guarantee a minium provisioned rate, i.e., this is not a mechanism to guarantee a certain QoS for a tenant.
+* fair share among active connections|timelines|tenants per instance
+    * example: each connection|timeline|tenant gets a fair fraction of the machine's GetPage/second capacity
+    * NB: needs definition of "active", and knowledge of available GetPage/second capacity in advance
+* ...
+
+
+Regarding **mechanism**, it's clear that **backpressure** is the way to go.
+However, we must choose between
+* **implicit** backpressure through pq/TCP and
+* **explicit** rejection of requests + retries with exponential backoff
+
+Further, there is the question of how throttling GetPage@LSN will affect the **internal GetPage latency SLO**:
+where do we measure the SLI for Pageserver's internal getpage latency SLO? Before or after the throttling?
+
+And when we eventually move the measurement point into the Computes (to avoid coordinated omission),
+how do we avoid counting throttling-induced latency toward the internal getpage latency SLI/SLO?
+
+## Scope Of This RFC
+
+**This RFC proposes introducing a hard GetPage@LSN/second limit per tenant, with the same value applying to each tenant on a Pageserver**.
+
+This proposal is easy to implement and significantly de-risks operating large Pageservers,
+based on the assumption that extremely-high-GetPage-rate-episodes like the one from the "Motivation" section are uncorrelated between tenants.
+
+For example, suppose we pick a limit that allows up to 10 tenants to go at limit rate.
+Suppose our Pageserver can serve 100k GetPage/second total at a 100% page cache miss rate.
+If each tenant gets a hard limit of 10k GetPage/second, we can serve up to 10 tenants at limit speed without latency degradation.
+
+The mechanism for backpressure will be TCP-based implicit backpressure.
+The compute team isn't concerned about prefetch queue depth.
+Pageserver will implement it by delaying the reading of requests from the libpq connection(s).
+
+The rate limit will be implemented using a per-tenant token bucket.
+The bucket will be be shared among all connections to the tenant.
+The bucket implementation supports starvation-preventing `await`ing.
+The current candidate for the implementation is [`leaky_bucket`](https://docs.rs/leaky-bucket/).
+The getpage@lsn benchmark that's being added in https://github.com/neondatabase/neon/issues/5771
+can be used to evaluate the overhead of sharing the bucket among connections of a tenant.
+A possible technique to mitigate the impact of sharing the bucket would be to maintain a buffer of a few tokens per connection handler.
+
+Regarding metrics / the internal GetPage latency SLO:
+we will measure the GetPage latency SLO _after_ the throttler and introduce a new metric to measure the amount of throttling, quantified by:
+- histogram that records the tenants' observations of queue depth before they start waiting (one such histogram per pageserver)
+- histogram that records the tenants' observations of time spent waiting (one such histogram per pageserver)
+
+Further observability measures:
+- an INFO log message at frequency 1/min if the tenant/timeline/connection was throttled in that last minute.
+  The message will identify the tenant/timeline/connection to allow correlation with compute logs/stats.
+
+Rollout will happen as follows:
+- deploy 1: implementation + config: disabled by default, ability to enable it per tenant through tenant_conf
+- experimentation in staging and later production to study impact & interaction with auto-scaling
+- determination of a sensible global default value
+  - the value will be chosen as high as possible ...
+  - ... but low enough to work towards this RFC's goal that one tenant should not be able to dominate a pageserver instance.
+- deploy 2: implementation fixes if any + config: enabled by default with the aforementioned global default
+- reset of the experimental per-tenant overrides
+- gain experience & lower the limit over time
+  - we stop lowering the limit as soon as this RFC's goal is achieved, i.e.,
+    once we decide that in practice the chosen value sufficiently de-risks operating large pageservers
+
+The per-tenant override will remain for emergencies and testing.
+But since Console doesn't preserve it during tenant migrations, it isn't durably configurable for the tenant.
+
+Toward the upper layers of the Neon stack, the resulting limit will be
+**"the highest GetPage/second that Pageserver can support for a single tenant"**.
+
+### Rationale
+
+We decided against error + retry because of worries about starvation.
+
+## Future Work
+
+Enable per-tenant emergency override of the limit via Console.
+Should be part of a more general framework to specify tenant config overrides.
+**NB:** this is **not** the right mechanism to _sell_ different max GetPage/second levels to users,
+or _auto-scale_ the GetPage/second levels. Such functionality will require a separate RFC that
+concerns itself with GetPage/second capacity planning.
+
+Compute-side metrics for GetPage latency.
+
+Back-channel to inform Compute/Autoscaling/ControlPlane that the project is being throttled.
+
+Compute-side neon_smgr improvements to avoid sending the same GetPage request multiple times if multiple backends experience a cache miss.
+
+Dealing with read-only endpoints: users use read-only endpoints to scale reads for a single tenant.
+Possibly there are also assumptions around read-only endpoints not affecting the primary read-write endpoint's performance.
+With per-tenant rate limiting, we will not meet that expectation.
+However, we can currently only scale per tenant.
+Soon, we will have sharding (#5505), which will apply the throttling on a per-shard basis.
+But, that's orthogonal to scaling reads: if many endpoints hit one shard, they share the same throttling limit.
+To solve this properly, I think we'll need replicas for tenants / shard.
+To performance-isolate a tenant's endpoints from each other, we'd then route them to different replicas.
diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs
index ed375a152f..d09ba11344 100644
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -3,8 +3,11 @@
 //! Otherwise, we might not see all metrics registered via
 //! a default registry.
 #![deny(clippy::undocumented_unsafe_blocks)]
+
 use once_cell::sync::Lazy;
-use prometheus::core::{AtomicU64, Collector, GenericGauge, GenericGaugeVec};
+use prometheus::core::{
+    Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec,
+};
 pub use prometheus::opts;
 pub use prometheus::register;
 pub use prometheus::Error;
@@ -132,3 +135,137 @@ fn get_rusage_stats() -> libc::rusage {
         rusage.assume_init()
     }
 }
+
+/// Create an [`IntCounterPairVec`] and registers to default registry.
+#[macro_export(local_inner_macros)]
+macro_rules! register_int_counter_pair_vec {
+    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr, $LABELS_NAMES:expr $(,)?) => {{
+        match (
+            $crate::register_int_counter_vec!($NAME1, $HELP1, $LABELS_NAMES),
+            $crate::register_int_counter_vec!($NAME2, $HELP2, $LABELS_NAMES),
+        ) {
+            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPairVec::new(inc, dec)),
+            (Err(e), _) | (_, Err(e)) => Err(e),
+        }
+    }};
+}
+/// Create an [`IntCounterPair`] and registers to default registry.
+#[macro_export(local_inner_macros)]
+macro_rules! register_int_counter_pair {
+    ($NAME1:expr, $HELP1:expr, $NAME2:expr, $HELP2:expr $(,)?) => {{
+        match (
+            $crate::register_int_counter!($NAME1, $HELP1),
+            $crate::register_int_counter!($NAME2, $HELP2),
+        ) {
+            (Ok(inc), Ok(dec)) => Ok($crate::IntCounterPair::new(inc, dec)),
+            (Err(e), _) | (_, Err(e)) => Err(e),
+        }
+    }};
+}
+
+/// A Pair of [`GenericCounterVec`]s. Like an [`GenericGaugeVec`] but will always observe changes
+pub struct GenericCounterPairVec<P: Atomic> {
+    inc: GenericCounterVec<P>,
+    dec: GenericCounterVec<P>,
+}
+
+/// A Pair of [`GenericCounter`]s. Like an [`GenericGauge`] but will always observe changes
+pub struct GenericCounterPair<P: Atomic> {
+    inc: GenericCounter<P>,
+    dec: GenericCounter<P>,
+}
+
+impl<P: Atomic> GenericCounterPairVec<P> {
+    pub fn new(inc: GenericCounterVec<P>, dec: GenericCounterVec<P>) -> Self {
+        Self { inc, dec }
+    }
+
+    /// `get_metric_with_label_values` returns the [`GenericCounterPair<P>`] for the given slice
+    /// of label values (same order as the VariableLabels in Desc). If that combination of
+    /// label values is accessed for the first time, a new [`GenericCounterPair<P>`] is created.
+    ///
+    /// An error is returned if the number of label values is not the same as the
+    /// number of VariableLabels in Desc.
+    pub fn get_metric_with_label_values(&self, vals: &[&str]) -> Result<GenericCounterPair<P>> {
+        Ok(GenericCounterPair {
+            inc: self.inc.get_metric_with_label_values(vals)?,
+            dec: self.dec.get_metric_with_label_values(vals)?,
+        })
+    }
+
+    /// `with_label_values` works as `get_metric_with_label_values`, but panics if an error
+    /// occurs.
+    pub fn with_label_values(&self, vals: &[&str]) -> GenericCounterPair<P> {
+        self.get_metric_with_label_values(vals).unwrap()
+    }
+}
+
+impl<P: Atomic> GenericCounterPair<P> {
+    pub fn new(inc: GenericCounter<P>, dec: GenericCounter<P>) -> Self {
+        Self { inc, dec }
+    }
+
+    /// Increment the gauge by 1, returning a guard that decrements by 1 on drop.
+    pub fn guard(&self) -> GenericCounterPairGuard<P> {
+        self.inc.inc();
+        GenericCounterPairGuard(self.dec.clone())
+    }
+
+    /// Increment the gauge by n, returning a guard that decrements by n on drop.
+    pub fn guard_by(&self, n: P::T) -> GenericCounterPairGuardBy<P> {
+        self.inc.inc_by(n);
+        GenericCounterPairGuardBy(self.dec.clone(), n)
+    }
+
+    /// Increase the gauge by 1.
+    #[inline]
+    pub fn inc(&self) {
+        self.inc.inc();
+    }
+
+    /// Decrease the gauge by 1.
+    #[inline]
+    pub fn dec(&self) {
+        self.dec.inc();
+    }
+
+    /// Add the given value to the gauge. (The value can be
+    /// negative, resulting in a decrement of the gauge.)
+    #[inline]
+    pub fn inc_by(&self, v: P::T) {
+        self.inc.inc_by(v);
+    }
+
+    /// Subtract the given value from the gauge. (The value can be
+    /// negative, resulting in an increment of the gauge.)
+    #[inline]
+    pub fn dec_by(&self, v: P::T) {
+        self.dec.inc_by(v);
+    }
+}
+
+/// Guard returned by [`GenericCounterPair::guard`]
+pub struct GenericCounterPairGuard<P: Atomic>(GenericCounter<P>);
+
+impl<P: Atomic> Drop for GenericCounterPairGuard<P> {
+    fn drop(&mut self) {
+        self.0.inc();
+    }
+}
+/// Guard returned by [`GenericCounterPair::guard_by`]
+pub struct GenericCounterPairGuardBy<P: Atomic>(GenericCounter<P>, P::T);
+
+impl<P: Atomic> Drop for GenericCounterPairGuardBy<P> {
+    fn drop(&mut self) {
+        self.0.inc_by(self.1);
+    }
+}
+
+/// A Pair of [`IntCounterVec`]s. Like an [`IntGaugeVec`] but will always observe changes
+pub type IntCounterPairVec = GenericCounterPairVec<AtomicU64>;
+
+/// A Pair of [`IntCounter`]s. Like an [`IntGauge`] but will always observe changes
+pub type IntCounterPair = GenericCounterPair<AtomicU64>;
+
+/// A guard for [`IntCounterPair`] that will decrement the gauge on drop
+pub type IntCounterPairGuard = GenericCounterPairGuard<AtomicU64>;
diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml
index 4d08d78e87..4146597d8d 100644
--- a/libs/pageserver_api/Cargo.toml
+++ b/libs/pageserver_api/Cargo.toml
@@ -24,3 +24,4 @@ workspace_hack.workspace = true
 
 [dev-dependencies]
 bincode.workspace = true
+rand.workspace = true
diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs
index 16715bc667..d680a5600e 100644
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -144,3 +144,37 @@ impl Key {
 pub fn is_rel_block_key(key: &Key) -> bool {
     key.field1 == 0x00 && key.field4 != 0
 }
+
+impl std::str::FromStr for Key {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        Self::from_hex(s)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::str::FromStr;
+
+    use crate::key::Key;
+
+    use rand::Rng;
+    use rand::SeedableRng;
+
+    #[test]
+    fn display_fromstr_bijection() {
+        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+
+        let key = Key {
+            field1: rng.gen(),
+            field2: rng.gen(),
+            field3: rng.gen(),
+            field4: rng.gen(),
+            field5: rng.gen(),
+            field6: rng.gen(),
+        };
+
+        assert_eq!(key, Key::from_str(&format!("{key}")).unwrap());
+    }
+}
diff --git a/pageserver/src/keyspace.rs b/libs/pageserver_api/src/keyspace.rs
similarity index 93%
rename from pageserver/src/keyspace.rs
rename to libs/pageserver_api/src/keyspace.rs
index 20e6df9c7b..16651c322e 100644
--- a/pageserver/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -1,11 +1,12 @@
-use crate::repository::{key_range_size, singleton_range, Key};
 use postgres_ffi::BLCKSZ;
 use std::ops::Range;
 
+use crate::key::Key;
+
 ///
 /// Represents a set of Keys, in a compact form.
 ///
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct KeySpace {
     /// Contiguous ranges of keys that belong to the key space. In key order,
     /// and with no overlap.
@@ -186,6 +187,33 @@ impl KeySpaceRandomAccum {
     }
 }
 
+pub fn key_range_size(key_range: &Range<Key>) -> u32 {
+    let start = key_range.start;
+    let end = key_range.end;
+
+    if end.field1 != start.field1
+        || end.field2 != start.field2
+        || end.field3 != start.field3
+        || end.field4 != start.field4
+    {
+        return u32::MAX;
+    }
+
+    let start = (start.field5 as u64) << 32 | start.field6 as u64;
+    let end = (end.field5 as u64) << 32 | end.field6 as u64;
+
+    let diff = end - start;
+    if diff > u32::MAX as u64 {
+        u32::MAX
+    } else {
+        diff as u32
+    }
+}
+
+pub fn singleton_range(key: Key) -> Range<Key> {
+    key..key.next()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/libs/pageserver_api/src/lib.rs b/libs/pageserver_api/src/lib.rs
index 511c5ed208..b236b93428 100644
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -5,6 +5,7 @@ use const_format::formatcp;
 /// Public API types
 pub mod control_api;
 pub mod key;
+pub mod keyspace;
 pub mod models;
 pub mod reltag;
 pub mod shard;
diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs
index 495a58e865..be41b610b8 100644
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1,5 +1,8 @@
+pub mod partitioning;
+
 use std::{
     collections::HashMap,
+    io::Read,
     num::{NonZeroU64, NonZeroUsize},
     time::SystemTime,
 };
@@ -17,7 +20,7 @@ use utils::{
 
 use crate::{reltag::RelTag, shard::TenantShardId};
 use anyhow::bail;
-use bytes::{BufMut, Bytes, BytesMut};
+use bytes::{Buf, BufMut, Bytes, BytesMut};
 
 /// The state of a tenant in this pageserver.
 ///
@@ -237,6 +240,7 @@ pub struct TenantConfig {
     pub min_resident_size_override: Option<u64>,
     pub evictions_low_residence_duration_metric_threshold: Option<String>,
     pub gc_feedback: Option<bool>,
+    pub heatmap_period: Option<String>,
 }
 
 /// A flattened analog of a `pagesever::tenant::LocationMode`, which
@@ -357,7 +361,7 @@ pub enum TenantAttachmentStatus {
 
 #[derive(Serialize, Deserialize, Clone)]
 pub struct TenantInfo {
-    pub id: TenantId,
+    pub id: TenantShardId,
     // NB: intentionally not part of OpenAPI, we don't want to commit to a specific set of TenantState's
     pub state: TenantState,
     /// Sum of the size of all layer files.
@@ -366,10 +370,18 @@ pub struct TenantInfo {
     pub attachment_status: TenantAttachmentStatus,
 }
 
+#[derive(Serialize, Deserialize, Clone)]
+pub struct TenantDetails {
+    #[serde(flatten)]
+    pub tenant_info: TenantInfo,
+
+    pub timelines: Vec<TimelineId>,
+}
+
 /// This represents the output of the "timeline_detail" and "timeline_list" API calls.
 #[derive(Debug, Serialize, Deserialize, Clone)]
 pub struct TimelineInfo {
-    pub tenant_id: TenantId,
+    pub tenant_id: TenantShardId,
     pub timeline_id: TimelineId,
 
     pub ancestor_timeline_id: Option<TimelineId>,
@@ -385,6 +397,9 @@ pub struct TimelineInfo {
     /// The LSN that we are advertizing to safekeepers
     pub remote_consistent_lsn_visible: Lsn,
 
+    /// The LSN from the start of the root timeline (never changes)
+    pub initdb_lsn: Lsn,
+
     pub current_logical_size: u64,
     pub current_logical_size_is_accurate: bool,
 
@@ -570,6 +585,7 @@ pub enum PagestreamFeMessage {
 }
 
 // Wrapped in libpq CopyData
+#[derive(strum_macros::EnumProperty)]
 pub enum PagestreamBeMessage {
     Exists(PagestreamExistsResponse),
     Nblocks(PagestreamNblocksResponse),
@@ -578,6 +594,29 @@ pub enum PagestreamBeMessage {
     DbSize(PagestreamDbSizeResponse),
 }
 
+// Keep in sync with `pagestore_client.h`
+#[repr(u8)]
+enum PagestreamBeMessageTag {
+    Exists = 100,
+    Nblocks = 101,
+    GetPage = 102,
+    Error = 103,
+    DbSize = 104,
+}
+impl TryFrom<u8> for PagestreamBeMessageTag {
+    type Error = u8;
+    fn try_from(value: u8) -> Result<Self, u8> {
+        match value {
+            100 => Ok(PagestreamBeMessageTag::Exists),
+            101 => Ok(PagestreamBeMessageTag::Nblocks),
+            102 => Ok(PagestreamBeMessageTag::GetPage),
+            103 => Ok(PagestreamBeMessageTag::Error),
+            104 => Ok(PagestreamBeMessageTag::DbSize),
+            _ => Err(value),
+        }
+    }
+}
+
 #[derive(Debug, PartialEq, Eq)]
 pub struct PagestreamExistsRequest {
     pub latest: bool,
@@ -733,35 +772,91 @@ impl PagestreamBeMessage {
     pub fn serialize(&self) -> Bytes {
         let mut bytes = BytesMut::new();
 
+        use PagestreamBeMessageTag as Tag;
         match self {
             Self::Exists(resp) => {
-                bytes.put_u8(100); /* tag from pagestore_client.h */
+                bytes.put_u8(Tag::Exists as u8);
                 bytes.put_u8(resp.exists as u8);
             }
 
             Self::Nblocks(resp) => {
-                bytes.put_u8(101); /* tag from pagestore_client.h */
+                bytes.put_u8(Tag::Nblocks as u8);
                 bytes.put_u32(resp.n_blocks);
             }
 
             Self::GetPage(resp) => {
-                bytes.put_u8(102); /* tag from pagestore_client.h */
+                bytes.put_u8(Tag::GetPage as u8);
                 bytes.put(&resp.page[..]);
             }
 
             Self::Error(resp) => {
-                bytes.put_u8(103); /* tag from pagestore_client.h */
+                bytes.put_u8(Tag::Error as u8);
                 bytes.put(resp.message.as_bytes());
                 bytes.put_u8(0); // null terminator
             }
             Self::DbSize(resp) => {
-                bytes.put_u8(104); /* tag from pagestore_client.h */
+                bytes.put_u8(Tag::DbSize as u8);
                 bytes.put_i64(resp.db_size);
             }
         }
 
         bytes.into()
     }
+
+    pub fn deserialize(buf: Bytes) -> anyhow::Result<Self> {
+        let mut buf = buf.reader();
+        let msg_tag = buf.read_u8()?;
+
+        use PagestreamBeMessageTag as Tag;
+        let ok =
+            match Tag::try_from(msg_tag).map_err(|tag: u8| anyhow::anyhow!("invalid tag {tag}"))? {
+                Tag::Exists => {
+                    let exists = buf.read_u8()?;
+                    Self::Exists(PagestreamExistsResponse {
+                        exists: exists != 0,
+                    })
+                }
+                Tag::Nblocks => {
+                    let n_blocks = buf.read_u32::<BigEndian>()?;
+                    Self::Nblocks(PagestreamNblocksResponse { n_blocks })
+                }
+                Tag::GetPage => {
+                    let mut page = vec![0; 8192]; // TODO: use MaybeUninit
+                    buf.read_exact(&mut page)?;
+                    PagestreamBeMessage::GetPage(PagestreamGetPageResponse { page: page.into() })
+                }
+                Tag::Error => {
+                    let buf = buf.get_ref();
+                    let cstr = std::ffi::CStr::from_bytes_until_nul(buf)?;
+                    let rust_str = cstr.to_str()?;
+                    PagestreamBeMessage::Error(PagestreamErrorResponse {
+                        message: rust_str.to_owned(),
+                    })
+                }
+                Tag::DbSize => {
+                    let db_size = buf.read_i64::<BigEndian>()?;
+                    Self::DbSize(PagestreamDbSizeResponse { db_size })
+                }
+            };
+        let remaining = buf.into_inner();
+        if !remaining.is_empty() {
+            anyhow::bail!(
+                "remaining bytes in msg with tag={msg_tag}: {}",
+                remaining.len()
+            );
+        }
+        Ok(ok)
+    }
+
+    pub fn kind(&self) -> &'static str {
+        match self {
+            Self::Exists(_) => "Exists",
+            Self::Nblocks(_) => "Nblocks",
+            Self::GetPage(_) => "GetPage",
+            Self::Error(_) => "Error",
+            Self::DbSize(_) => "DbSize",
+        }
+    }
 }
 
 #[cfg(test)]
@@ -823,7 +918,7 @@ mod tests {
     fn test_tenantinfo_serde() {
         // Test serialization/deserialization of TenantInfo
         let original_active = TenantInfo {
-            id: TenantId::generate(),
+            id: TenantShardId::unsharded(TenantId::generate()),
             state: TenantState::Active,
             current_physical_size: Some(42),
             attachment_status: TenantAttachmentStatus::Attached,
@@ -840,7 +935,7 @@ mod tests {
         });
 
         let original_broken = TenantInfo {
-            id: TenantId::generate(),
+            id: TenantShardId::unsharded(TenantId::generate()),
             state: TenantState::Broken {
                 reason: "reason".into(),
                 backtrace: "backtrace info".into(),
diff --git a/libs/pageserver_api/src/models/partitioning.rs b/libs/pageserver_api/src/models/partitioning.rs
new file mode 100644
index 0000000000..0d287f7be0
--- /dev/null
+++ b/libs/pageserver_api/src/models/partitioning.rs
@@ -0,0 +1,151 @@
+use utils::lsn::Lsn;
+
+#[derive(Debug, PartialEq, Eq)]
+pub struct Partitioning {
+    pub keys: crate::keyspace::KeySpace,
+
+    pub at_lsn: Lsn,
+}
+
+impl serde::Serialize for Partitioning {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        pub struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
+
+        impl<'a> serde::Serialize for KeySpace<'a> {
+            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+            where
+                S: serde::Serializer,
+            {
+                use serde::ser::SerializeSeq;
+                let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;
+                for kr in &self.0.ranges {
+                    seq.serialize_element(&KeyRange(kr))?;
+                }
+                seq.end()
+            }
+        }
+
+        use serde::ser::SerializeMap;
+        let mut map = serializer.serialize_map(Some(2))?;
+        map.serialize_key("keys")?;
+        map.serialize_value(&KeySpace(&self.keys))?;
+        map.serialize_key("at_lsn")?;
+        map.serialize_value(&WithDisplay(&self.at_lsn))?;
+        map.end()
+    }
+}
+
+pub struct WithDisplay<'a, T>(&'a T);
+
+impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
+    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.collect_str(&self.0)
+    }
+}
+
+pub struct KeyRange<'a>(&'a std::ops::Range<crate::key::Key>);
+
+impl<'a> serde::Serialize for KeyRange<'a> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeTuple;
+        let mut t = serializer.serialize_tuple(2)?;
+        t.serialize_element(&WithDisplay(&self.0.start))?;
+        t.serialize_element(&WithDisplay(&self.0.end))?;
+        t.end()
+    }
+}
+
+impl<'a> serde::Deserialize<'a> for Partitioning {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'a>,
+    {
+        pub struct KeySpace(crate::keyspace::KeySpace);
+
+        impl<'de> serde::Deserialize<'de> for KeySpace {
+            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+            where
+                D: serde::Deserializer<'de>,
+            {
+                #[serde_with::serde_as]
+                #[derive(serde::Deserialize)]
+                #[serde(transparent)]
+                struct Key(#[serde_as(as = "serde_with::DisplayFromStr")] crate::key::Key);
+
+                #[serde_with::serde_as]
+                #[derive(serde::Deserialize)]
+                struct Range(Key, Key);
+
+                let ranges: Vec<Range> = serde::Deserialize::deserialize(deserializer)?;
+                Ok(Self(crate::keyspace::KeySpace {
+                    ranges: ranges
+                        .into_iter()
+                        .map(|Range(start, end)| (start.0..end.0))
+                        .collect(),
+                }))
+            }
+        }
+
+        #[serde_with::serde_as]
+        #[derive(serde::Deserialize)]
+        struct De {
+            keys: KeySpace,
+            #[serde_as(as = "serde_with::DisplayFromStr")]
+            at_lsn: Lsn,
+        }
+
+        let de: De = serde::Deserialize::deserialize(deserializer)?;
+        Ok(Self {
+            at_lsn: de.at_lsn,
+            keys: de.keys.0,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_serialization_roundtrip() {
+        let reference = r#"
+        {
+            "keys": [
+              [
+                "000000000000000000000000000000000000",
+                "000000000000000000000000000000000001"
+              ],
+              [
+                "000000067F00000001000000000000000000",
+                "000000067F00000001000000000000000002"
+              ],
+              [
+                "030000000000000000000000000000000000",
+                "030000000000000000000000000000000003"
+              ]
+            ],
+            "at_lsn": "0/2240160"
+        }
+        "#;
+
+        let de: Partitioning = serde_json::from_str(reference).unwrap();
+
+        let ser = serde_json::to_string(&de).unwrap();
+
+        let ser_de: serde_json::Value = serde_json::from_str(&ser).unwrap();
+
+        assert_eq!(
+            ser_de,
+            serde_json::from_str::<'_, serde_json::Value>(reference).unwrap()
+        );
+    }
+}
diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs
index 9e83e0eee2..3668f7939d 100644
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -76,6 +76,11 @@ impl TenantShardId {
     pub fn shard_slug(&self) -> impl std::fmt::Display + '_ {
         ShardSlug(self)
     }
+
+    /// Convenience for code that has special behavior on the 0th shard.
+    pub fn is_zero(&self) -> bool {
+        self.shard_number == ShardNumber(0)
+    }
 }
 
 /// Formatting helper
@@ -154,7 +159,7 @@ impl From<[u8; 18]> for TenantShardId {
 /// shard we're dealing with, but do not need to know the full ShardIdentity (because
 /// we won't be doing any page->shard mapping), and do not need to know the fully qualified
 /// TenantShardId.
-#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy)]
+#[derive(Eq, PartialEq, PartialOrd, Ord, Clone, Copy, Hash)]
 pub struct ShardIndex {
     pub shard_number: ShardNumber,
     pub shard_count: ShardCount,
diff --git a/libs/postgres_connection/src/lib.rs b/libs/postgres_connection/src/lib.rs
index 35cb1a2691..ccf9108895 100644
--- a/libs/postgres_connection/src/lib.rs
+++ b/libs/postgres_connection/src/lib.rs
@@ -163,8 +163,18 @@ impl PgConnectionConfig {
     }
 
     /// Connect using postgres protocol with TLS disabled.
-    pub fn connect_no_tls(&self) -> Result<postgres::Client, postgres::Error> {
-        postgres::Config::from(self.to_tokio_postgres_config()).connect(postgres::NoTls)
+    pub async fn connect_no_tls(
+        &self,
+    ) -> Result<
+        (
+            tokio_postgres::Client,
+            tokio_postgres::Connection<tokio_postgres::Socket, tokio_postgres::tls::NoTlsStream>,
+        ),
+        postgres::Error,
+    > {
+        self.to_tokio_postgres_config()
+            .connect(postgres::NoTls)
+            .await
     }
 }
 
diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs
index e559d00ded..548bde02f6 100644
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -271,17 +271,12 @@ impl RemoteStorage for AzureBlobStorage {
 
         let mut builder = blob_client.get();
 
-        if let Some(end_exclusive) = end_exclusive {
-            builder = builder.range(Range::new(start_inclusive, end_exclusive));
+        let range: Range = if let Some(end_exclusive) = end_exclusive {
+            (start_inclusive..end_exclusive).into()
         } else {
-            // Open ranges are not supported by the SDK so we work around
-            // by setting the upper limit extremely high (but high enough
-            // to still be representable by signed 64 bit integers).
-            // TODO remove workaround once the SDK adds open range support
-            // https://github.com/Azure/azure-sdk-for-rust/issues/1438
-            let end_exclusive = u64::MAX / 4;
-            builder = builder.range(Range::new(start_inclusive, end_exclusive));
-        }
+            (start_inclusive..).into()
+        };
+        builder = builder.range(range);
 
         self.download_for_builder(builder).await
     }
diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs
index 97fa1bbf5b..d63a5ed99b 100644
--- a/libs/remote_storage/src/s3_bucket.rs
+++ b/libs/remote_storage/src/s3_bucket.rs
@@ -218,14 +218,6 @@ impl S3Bucket {
 
         let started_at = ScopeGuard::into_inner(started_at);
 
-        if get_object.is_err() {
-            metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
-                kind,
-                AttemptOutcome::Err,
-                started_at,
-            );
-        }
-
         match get_object {
             Ok(object_output) => {
                 let metadata = object_output.metadata().cloned().map(StorageMetadata);
@@ -241,11 +233,27 @@ impl S3Bucket {
                 })
             }
             Err(SdkError::ServiceError(e)) if matches!(e.err(), GetObjectError::NoSuchKey(_)) => {
+                // Count this in the AttemptOutcome::Ok bucket, because 404 is not
+                // an error: we expect to sometimes fetch an object and find it missing,
+                // e.g. when probing for timeline indices.
+                metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
+                    kind,
+                    AttemptOutcome::Ok,
+                    started_at,
+                );
                 Err(DownloadError::NotFound)
             }
-            Err(e) => Err(DownloadError::Other(
-                anyhow::Error::new(e).context("download s3 object"),
-            )),
+            Err(e) => {
+                metrics::BUCKET_METRICS.req_seconds.observe_elapsed(
+                    kind,
+                    AttemptOutcome::Err,
+                    started_at,
+                );
+
+                Err(DownloadError::Other(
+                    anyhow::Error::new(e).context("download s3 object"),
+                ))
+            }
         }
     }
 }
diff --git a/libs/remote_storage/tests/common/mod.rs b/libs/remote_storage/tests/common/mod.rs
new file mode 100644
index 0000000000..bca117ed1a
--- /dev/null
+++ b/libs/remote_storage/tests/common/mod.rs
@@ -0,0 +1,200 @@
+use std::collections::HashSet;
+use std::ops::ControlFlow;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use anyhow::Context;
+use bytes::Bytes;
+use camino::Utf8Path;
+use futures::stream::Stream;
+use once_cell::sync::OnceCell;
+use remote_storage::{Download, GenericRemoteStorage, RemotePath};
+use tokio::task::JoinSet;
+use tracing::{debug, error, info};
+
+static LOGGING_DONE: OnceCell<()> = OnceCell::new();
+
+pub(crate) fn upload_stream(
+    content: std::borrow::Cow<'static, [u8]>,
+) -> (
+    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+    usize,
+) {
+    use std::borrow::Cow;
+
+    let content = match content {
+        Cow::Borrowed(x) => Bytes::from_static(x),
+        Cow::Owned(vec) => Bytes::from(vec),
+    };
+    wrap_stream(content)
+}
+
+pub(crate) fn wrap_stream(
+    content: bytes::Bytes,
+) -> (
+    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
+    usize,
+) {
+    let len = content.len();
+    let content = futures::future::ready(Ok(content));
+
+    (futures::stream::once(content), len)
+}
+
+pub(crate) async fn download_to_vec(dl: Download) -> anyhow::Result<Vec<u8>> {
+    let mut buf = Vec::new();
+    tokio::io::copy_buf(
+        &mut tokio_util::io::StreamReader::new(dl.download_stream),
+        &mut buf,
+    )
+    .await?;
+    Ok(buf)
+}
+
+// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
+pub(crate) async fn upload_simple_remote_data(
+    client: &Arc<GenericRemoteStorage>,
+    upload_tasks_count: usize,
+) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
+    info!("Creating {upload_tasks_count} remote files");
+    let mut upload_tasks = JoinSet::new();
+    for i in 1..upload_tasks_count + 1 {
+        let task_client = Arc::clone(client);
+        upload_tasks.spawn(async move {
+            let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
+            let blob_path = RemotePath::new(
+                Utf8Path::from_path(blob_path.as_path()).expect("must be valid blob path"),
+            )
+            .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
+            debug!("Creating remote item {i} at path {blob_path:?}");
+
+            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
+            task_client.upload(data, len, &blob_path, None).await?;
+
+            Ok::<_, anyhow::Error>(blob_path)
+        });
+    }
+
+    let mut upload_tasks_failed = false;
+    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
+    while let Some(task_run_result) = upload_tasks.join_next().await {
+        match task_run_result
+            .context("task join failed")
+            .and_then(|task_result| task_result.context("upload task failed"))
+        {
+            Ok(upload_path) => {
+                uploaded_blobs.insert(upload_path);
+            }
+            Err(e) => {
+                error!("Upload task failed: {e:?}");
+                upload_tasks_failed = true;
+            }
+        }
+    }
+
+    if upload_tasks_failed {
+        ControlFlow::Break(uploaded_blobs)
+    } else {
+        ControlFlow::Continue(uploaded_blobs)
+    }
+}
+
+pub(crate) async fn cleanup(
+    client: &Arc<GenericRemoteStorage>,
+    objects_to_delete: HashSet<RemotePath>,
+) {
+    info!(
+        "Removing {} objects from the remote storage during cleanup",
+        objects_to_delete.len()
+    );
+    let mut delete_tasks = JoinSet::new();
+    for object_to_delete in objects_to_delete {
+        let task_client = Arc::clone(client);
+        delete_tasks.spawn(async move {
+            debug!("Deleting remote item at path {object_to_delete:?}");
+            task_client
+                .delete(&object_to_delete)
+                .await
+                .with_context(|| format!("{object_to_delete:?} removal"))
+        });
+    }
+
+    while let Some(task_run_result) = delete_tasks.join_next().await {
+        match task_run_result {
+            Ok(task_result) => match task_result {
+                Ok(()) => {}
+                Err(e) => error!("Delete task failed: {e:?}"),
+            },
+            Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
+        }
+    }
+}
+pub(crate) struct Uploads {
+    pub(crate) prefixes: HashSet<RemotePath>,
+    pub(crate) blobs: HashSet<RemotePath>,
+}
+
+pub(crate) async fn upload_remote_data(
+    client: &Arc<GenericRemoteStorage>,
+    base_prefix_str: &'static str,
+    upload_tasks_count: usize,
+) -> ControlFlow<Uploads, Uploads> {
+    info!("Creating {upload_tasks_count} remote files");
+    let mut upload_tasks = JoinSet::new();
+    for i in 1..upload_tasks_count + 1 {
+        let task_client = Arc::clone(client);
+        upload_tasks.spawn(async move {
+            let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
+            let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
+                .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
+            let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
+            debug!("Creating remote item {i} at path {blob_path:?}");
+
+            let (data, data_len) =
+                upload_stream(format!("remote blob data {i}").into_bytes().into());
+            task_client.upload(data, data_len, &blob_path, None).await?;
+
+            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
+        });
+    }
+
+    let mut upload_tasks_failed = false;
+    let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
+    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
+    while let Some(task_run_result) = upload_tasks.join_next().await {
+        match task_run_result
+            .context("task join failed")
+            .and_then(|task_result| task_result.context("upload task failed"))
+        {
+            Ok((upload_prefix, upload_path)) => {
+                uploaded_prefixes.insert(upload_prefix);
+                uploaded_blobs.insert(upload_path);
+            }
+            Err(e) => {
+                error!("Upload task failed: {e:?}");
+                upload_tasks_failed = true;
+            }
+        }
+    }
+
+    let uploads = Uploads {
+        prefixes: uploaded_prefixes,
+        blobs: uploaded_blobs,
+    };
+    if upload_tasks_failed {
+        ControlFlow::Break(uploads)
+    } else {
+        ControlFlow::Continue(uploads)
+    }
+}
+
+pub(crate) fn ensure_logging_ready() {
+    LOGGING_DONE.get_or_init(|| {
+        utils::logging::init(
+            utils::logging::LogFormat::Test,
+            utils::logging::TracingErrorLayerEnablement::Disabled,
+            utils::logging::Output::Stdout,
+        )
+        .expect("logging init failed");
+    });
+}
diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs
index 7327803198..0387dc30e7 100644
--- a/libs/remote_storage/tests/test_real_azure.rs
+++ b/libs/remote_storage/tests/test_real_azure.rs
@@ -2,23 +2,23 @@ use std::collections::HashSet;
 use std::env;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
-use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::UNIX_EPOCH;
 
 use anyhow::Context;
-use bytes::Bytes;
 use camino::Utf8Path;
-use futures::stream::Stream;
-use once_cell::sync::OnceCell;
 use remote_storage::{
-    AzureConfig, Download, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,
+    AzureConfig, GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind,
 };
 use test_context::{test_context, AsyncTestContext};
-use tokio::task::JoinSet;
-use tracing::{debug, error, info};
+use tracing::{debug, info};
 
-static LOGGING_DONE: OnceCell<()> = OnceCell::new();
+mod common;
+
+use common::{
+    cleanup, download_to_vec, ensure_logging_ready, upload_remote_data, upload_simple_remote_data,
+    upload_stream, wrap_stream,
+};
 
 const ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_AZURE_REMOTE_STORAGE";
 
@@ -30,7 +30,7 @@ const BASE_PREFIX: &str = "test";
 /// If real Azure tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
 /// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
 ///
-/// First, the test creates a set of Azure blobs with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_azure_data`]
+/// First, the test creates a set of Azure blobs with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]
 /// where
 /// * `random_prefix_part` is set for the entire Azure client during the Azure client creation in [`create_azure_client`], to avoid multiple test runs interference
 /// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
@@ -97,7 +97,7 @@ async fn azure_pagination_should_work(
 /// Uses real Azure and requires [`ENABLE_REAL_AZURE_REMOTE_STORAGE_ENV_VAR_NAME`] and related Azure cred env vars specified. Test will skip real code and pass if env vars not set.
 /// See `Azure_pagination_should_work` for more information.
 ///
-/// First, create a set of Azure objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_azure_data`]
+/// First, create a set of Azure objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
 /// Then performs the following queries:
 ///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
 ///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
@@ -218,18 +218,9 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
 
     ctx.client.upload(data, len, &path, None).await?;
 
-    async fn download_and_compare(dl: Download) -> anyhow::Result<Vec<u8>> {
-        let mut buf = Vec::new();
-        tokio::io::copy_buf(
-            &mut tokio_util::io::StreamReader::new(dl.download_stream),
-            &mut buf,
-        )
-        .await?;
-        Ok(buf)
-    }
     // Normal download request
     let dl = ctx.client.download(&path).await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig);
 
     // Full range (end specified)
@@ -237,12 +228,12 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
         .client
         .download_byte_range(&path, 0, Some(len as u64))
         .await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig);
 
     // partial range (end specified)
     let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig[4..10]);
 
     // partial range (end beyond real end)
@@ -250,17 +241,17 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
         .client
         .download_byte_range(&path, 8, Some(len as u64 * 100))
         .await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig[8..]);
 
     // Partial range (end unspecified)
     let dl = ctx.client.download_byte_range(&path, 4, None).await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig[4..]);
 
     // Full range (end unspecified)
     let dl = ctx.client.download_byte_range(&path, 0, None).await?;
-    let buf = download_and_compare(dl).await?;
+    let buf = download_to_vec(dl).await?;
     assert_eq!(&buf, &orig);
 
     debug!("Cleanup: deleting file at path {path:?}");
@@ -272,17 +263,6 @@ async fn azure_upload_download_works(ctx: &mut MaybeEnabledAzure) -> anyhow::Res
     Ok(())
 }
 
-fn ensure_logging_ready() {
-    LOGGING_DONE.get_or_init(|| {
-        utils::logging::init(
-            utils::logging::LogFormat::Test,
-            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
-        )
-        .expect("logging init failed");
-    });
-}
-
 struct EnabledAzure {
     client: Arc<GenericRemoteStorage>,
     base_prefix: &'static str,
@@ -352,7 +332,7 @@ impl AsyncTestContext for MaybeEnabledAzureWithTestBlobs {
 
         let enabled = EnabledAzure::setup(Some(max_keys_in_list_response)).await;
 
-        match upload_azure_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {
+        match upload_remote_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {
             ControlFlow::Continue(uploads) => {
                 info!("Remote objects created successfully");
 
@@ -414,7 +394,7 @@ impl AsyncTestContext for MaybeEnabledAzureWithSimpleTestBlobs {
 
         let enabled = EnabledAzure::setup(Some(max_keys_in_list_response)).await;
 
-        match upload_simple_azure_data(&enabled.client, upload_tasks_count).await {
+        match upload_simple_remote_data(&enabled.client, upload_tasks_count).await {
             ControlFlow::Continue(uploads) => {
                 info!("Remote objects created successfully");
 
@@ -478,166 +458,3 @@ fn create_azure_client(
         GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
     ))
 }
-
-struct Uploads {
-    prefixes: HashSet<RemotePath>,
-    blobs: HashSet<RemotePath>,
-}
-
-async fn upload_azure_data(
-    client: &Arc<GenericRemoteStorage>,
-    base_prefix_str: &'static str,
-    upload_tasks_count: usize,
-) -> ControlFlow<Uploads, Uploads> {
-    info!("Creating {upload_tasks_count} Azure files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
-            let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
-                .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
-            let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, len, &blob_path, None).await?;
-
-            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok((upload_prefix, upload_path)) => {
-                uploaded_prefixes.insert(upload_prefix);
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    let uploads = Uploads {
-        prefixes: uploaded_prefixes,
-        blobs: uploaded_blobs,
-    };
-    if upload_tasks_failed {
-        ControlFlow::Break(uploads)
-    } else {
-        ControlFlow::Continue(uploads)
-    }
-}
-
-async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<RemotePath>) {
-    info!(
-        "Removing {} objects from the remote storage during cleanup",
-        objects_to_delete.len()
-    );
-    let mut delete_tasks = JoinSet::new();
-    for object_to_delete in objects_to_delete {
-        let task_client = Arc::clone(client);
-        delete_tasks.spawn(async move {
-            debug!("Deleting remote item at path {object_to_delete:?}");
-            task_client
-                .delete(&object_to_delete)
-                .await
-                .with_context(|| format!("{object_to_delete:?} removal"))
-        });
-    }
-
-    while let Some(task_run_result) = delete_tasks.join_next().await {
-        match task_run_result {
-            Ok(task_result) => match task_result {
-                Ok(()) => {}
-                Err(e) => error!("Delete task failed: {e:?}"),
-            },
-            Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
-        }
-    }
-}
-
-// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
-async fn upload_simple_azure_data(
-    client: &Arc<GenericRemoteStorage>,
-    upload_tasks_count: usize,
-) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
-    info!("Creating {upload_tasks_count} Azure files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
-            let blob_path = RemotePath::new(
-                Utf8Path::from_path(blob_path.as_path()).expect("must be valid blob path"),
-            )
-            .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let (data, len) = upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, len, &blob_path, None).await?;
-
-            Ok::<_, anyhow::Error>(blob_path)
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok(upload_path) => {
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    if upload_tasks_failed {
-        ControlFlow::Break(uploaded_blobs)
-    } else {
-        ControlFlow::Continue(uploaded_blobs)
-    }
-}
-
-// FIXME: copypasted from test_real_s3, can't remember how to share a module which is not compiled
-// to binary
-fn upload_stream(
-    content: std::borrow::Cow<'static, [u8]>,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    use std::borrow::Cow;
-
-    let content = match content {
-        Cow::Borrowed(x) => Bytes::from_static(x),
-        Cow::Owned(vec) => Bytes::from(vec),
-    };
-    wrap_stream(content)
-}
-
-fn wrap_stream(
-    content: bytes::Bytes,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    let len = content.len();
-    let content = futures::future::ready(Ok(content));
-
-    (futures::stream::once(content), len)
-}
diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs
index ecd834e61c..8f46b2abd6 100644
--- a/libs/remote_storage/tests/test_real_s3.rs
+++ b/libs/remote_storage/tests/test_real_s3.rs
@@ -2,23 +2,23 @@ use std::collections::HashSet;
 use std::env;
 use std::num::NonZeroUsize;
 use std::ops::ControlFlow;
-use std::path::PathBuf;
 use std::sync::Arc;
 use std::time::UNIX_EPOCH;
 
 use anyhow::Context;
-use bytes::Bytes;
 use camino::Utf8Path;
-use futures::stream::Stream;
-use once_cell::sync::OnceCell;
 use remote_storage::{
     GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config,
 };
 use test_context::{test_context, AsyncTestContext};
-use tokio::task::JoinSet;
-use tracing::{debug, error, info};
+use tracing::{debug, info};
 
-static LOGGING_DONE: OnceCell<()> = OnceCell::new();
+mod common;
+
+use common::{
+    cleanup, download_to_vec, ensure_logging_ready, upload_remote_data, upload_simple_remote_data,
+    upload_stream, wrap_stream,
+};
 
 const ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME: &str = "ENABLE_REAL_S3_REMOTE_STORAGE";
 
@@ -30,7 +30,7 @@ const BASE_PREFIX: &str = "test";
 /// If real S3 tests are disabled, the test passes, skipping any real test run: currently, there's no way to mark the test ignored in runtime with the
 /// deafult test framework, see https://github.com/rust-lang/rust/issues/68007 for details.
 ///
-/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_s3_data`]
+/// First, the test creates a set of S3 objects with keys `/${random_prefix_part}/${base_prefix_str}/sub_prefix_${i}/blob_${i}` in [`upload_remote_data`]
 /// where
 /// * `random_prefix_part` is set for the entire S3 client during the S3 client creation in [`create_s3_client`], to avoid multiple test runs interference
 /// * `base_prefix_str` is a common prefix to use in the client requests: we would want to ensure that the client is able to list nested prefixes inside the bucket
@@ -95,7 +95,7 @@ async fn s3_pagination_should_work(ctx: &mut MaybeEnabledS3WithTestBlobs) -> any
 /// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. Test will skip real code and pass if env vars not set.
 /// See `s3_pagination_should_work` for more information.
 ///
-/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_s3_data`]
+/// First, create a set of S3 objects with keys `random_prefix/folder{j}/blob_{i}.txt` in [`upload_remote_data`]
 /// Then performs the following queries:
 ///    1. `list_files(None)`. This should return all files `random_prefix/folder{j}/blob_{i}.txt`
 ///    2. `list_files("folder1")`.  This  should return all files `random_prefix/folder1/blob_{i}.txt`
@@ -198,15 +198,65 @@ async fn s3_delete_objects_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()>
     Ok(())
 }
 
-fn ensure_logging_ready() {
-    LOGGING_DONE.get_or_init(|| {
-        utils::logging::init(
-            utils::logging::LogFormat::Test,
-            utils::logging::TracingErrorLayerEnablement::Disabled,
-            utils::logging::Output::Stdout,
-        )
-        .expect("logging init failed");
-    });
+#[test_context(MaybeEnabledS3)]
+#[tokio::test]
+async fn s3_upload_download_works(ctx: &mut MaybeEnabledS3) -> anyhow::Result<()> {
+    let MaybeEnabledS3::Enabled(ctx) = ctx else {
+        return Ok(());
+    };
+
+    let path = RemotePath::new(Utf8Path::new(format!("{}/file", ctx.base_prefix).as_str()))
+        .with_context(|| "RemotePath conversion")?;
+
+    let orig = bytes::Bytes::from_static("remote blob data here".as_bytes());
+
+    let (data, len) = wrap_stream(orig.clone());
+
+    ctx.client.upload(data, len, &path, None).await?;
+
+    // Normal download request
+    let dl = ctx.client.download(&path).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // Full range (end specified)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 0, Some(len as u64))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    // partial range (end specified)
+    let dl = ctx.client.download_byte_range(&path, 4, Some(10)).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..10]);
+
+    // partial range (end beyond real end)
+    let dl = ctx
+        .client
+        .download_byte_range(&path, 8, Some(len as u64 * 100))
+        .await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[8..]);
+
+    // Partial range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 4, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig[4..]);
+
+    // Full range (end unspecified)
+    let dl = ctx.client.download_byte_range(&path, 0, None).await?;
+    let buf = download_to_vec(dl).await?;
+    assert_eq!(&buf, &orig);
+
+    debug!("Cleanup: deleting file at path {path:?}");
+    ctx.client
+        .delete(&path)
+        .await
+        .with_context(|| format!("{path:?} removal"))?;
+
+    Ok(())
 }
 
 struct EnabledS3 {
@@ -278,7 +328,7 @@ impl AsyncTestContext for MaybeEnabledS3WithTestBlobs {
 
         let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
 
-        match upload_s3_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {
+        match upload_remote_data(&enabled.client, enabled.base_prefix, upload_tasks_count).await {
             ControlFlow::Continue(uploads) => {
                 info!("Remote objects created successfully");
 
@@ -340,7 +390,7 @@ impl AsyncTestContext for MaybeEnabledS3WithSimpleTestBlobs {
 
         let enabled = EnabledS3::setup(Some(max_keys_in_list_response)).await;
 
-        match upload_simple_s3_data(&enabled.client, upload_tasks_count).await {
+        match upload_simple_remote_data(&enabled.client, upload_tasks_count).await {
             ControlFlow::Continue(uploads) => {
                 info!("Remote objects created successfully");
 
@@ -403,166 +453,3 @@ fn create_s3_client(
         GenericRemoteStorage::from_config(&remote_storage_config).context("remote storage init")?,
     ))
 }
-
-struct Uploads {
-    prefixes: HashSet<RemotePath>,
-    blobs: HashSet<RemotePath>,
-}
-
-async fn upload_s3_data(
-    client: &Arc<GenericRemoteStorage>,
-    base_prefix_str: &'static str,
-    upload_tasks_count: usize,
-) -> ControlFlow<Uploads, Uploads> {
-    info!("Creating {upload_tasks_count} S3 files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let prefix = format!("{base_prefix_str}/sub_prefix_{i}/");
-            let blob_prefix = RemotePath::new(Utf8Path::new(&prefix))
-                .with_context(|| format!("{prefix:?} to RemotePath conversion"))?;
-            let blob_path = blob_prefix.join(Utf8Path::new(&format!("blob_{i}")));
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let (data, data_len) =
-                upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, data_len, &blob_path, None).await?;
-
-            Ok::<_, anyhow::Error>((blob_prefix, blob_path))
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_prefixes = HashSet::with_capacity(upload_tasks_count);
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok((upload_prefix, upload_path)) => {
-                uploaded_prefixes.insert(upload_prefix);
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    let uploads = Uploads {
-        prefixes: uploaded_prefixes,
-        blobs: uploaded_blobs,
-    };
-    if upload_tasks_failed {
-        ControlFlow::Break(uploads)
-    } else {
-        ControlFlow::Continue(uploads)
-    }
-}
-
-async fn cleanup(client: &Arc<GenericRemoteStorage>, objects_to_delete: HashSet<RemotePath>) {
-    info!(
-        "Removing {} objects from the remote storage during cleanup",
-        objects_to_delete.len()
-    );
-    let mut delete_tasks = JoinSet::new();
-    for object_to_delete in objects_to_delete {
-        let task_client = Arc::clone(client);
-        delete_tasks.spawn(async move {
-            debug!("Deleting remote item at path {object_to_delete:?}");
-            task_client
-                .delete(&object_to_delete)
-                .await
-                .with_context(|| format!("{object_to_delete:?} removal"))
-        });
-    }
-
-    while let Some(task_run_result) = delete_tasks.join_next().await {
-        match task_run_result {
-            Ok(task_result) => match task_result {
-                Ok(()) => {}
-                Err(e) => error!("Delete task failed: {e:?}"),
-            },
-            Err(join_err) => error!("Delete task did not finish correctly: {join_err}"),
-        }
-    }
-}
-
-// Uploads files `folder{j}/blob{i}.txt`. See test description for more details.
-async fn upload_simple_s3_data(
-    client: &Arc<GenericRemoteStorage>,
-    upload_tasks_count: usize,
-) -> ControlFlow<HashSet<RemotePath>, HashSet<RemotePath>> {
-    info!("Creating {upload_tasks_count} S3 files");
-    let mut upload_tasks = JoinSet::new();
-    for i in 1..upload_tasks_count + 1 {
-        let task_client = Arc::clone(client);
-        upload_tasks.spawn(async move {
-            let blob_path = PathBuf::from(format!("folder{}/blob_{}.txt", i / 7, i));
-            let blob_path = RemotePath::new(
-                Utf8Path::from_path(blob_path.as_path()).expect("must be valid blob path"),
-            )
-            .with_context(|| format!("{blob_path:?} to RemotePath conversion"))?;
-            debug!("Creating remote item {i} at path {blob_path:?}");
-
-            let (data, data_len) =
-                upload_stream(format!("remote blob data {i}").into_bytes().into());
-            task_client.upload(data, data_len, &blob_path, None).await?;
-
-            Ok::<_, anyhow::Error>(blob_path)
-        });
-    }
-
-    let mut upload_tasks_failed = false;
-    let mut uploaded_blobs = HashSet::with_capacity(upload_tasks_count);
-    while let Some(task_run_result) = upload_tasks.join_next().await {
-        match task_run_result
-            .context("task join failed")
-            .and_then(|task_result| task_result.context("upload task failed"))
-        {
-            Ok(upload_path) => {
-                uploaded_blobs.insert(upload_path);
-            }
-            Err(e) => {
-                error!("Upload task failed: {e:?}");
-                upload_tasks_failed = true;
-            }
-        }
-    }
-
-    if upload_tasks_failed {
-        ControlFlow::Break(uploaded_blobs)
-    } else {
-        ControlFlow::Continue(uploaded_blobs)
-    }
-}
-
-fn upload_stream(
-    content: std::borrow::Cow<'static, [u8]>,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    use std::borrow::Cow;
-
-    let content = match content {
-        Cow::Borrowed(x) => Bytes::from_static(x),
-        Cow::Owned(vec) => Bytes::from(vec),
-    };
-    wrap_stream(content)
-}
-
-fn wrap_stream(
-    content: bytes::Bytes,
-) -> (
-    impl Stream<Item = std::io::Result<Bytes>> + Send + Sync + 'static,
-    usize,
-) {
-    let len = content.len();
-    let content = futures::future::ready(Ok(content));
-
-    (futures::stream::once(content), len)
-}
diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml
index ccf6f4f2d7..af0414daa2 100644
--- a/libs/utils/Cargo.toml
+++ b/libs/utils/Cargo.toml
@@ -50,6 +50,8 @@ const_format.workspace = true
 # why is it only here? no other crate should use it, streams are rarely needed.
 tokio-stream = { version = "0.1.14" }
 
+serde_path_to_error.workspace = true
+
 [dev-dependencies]
 byteorder.workspace = true
 bytes.workspace = true
diff --git a/libs/utils/src/http/json.rs b/libs/utils/src/http/json.rs
index 70e682cb76..7ca62561fe 100644
--- a/libs/utils/src/http/json.rs
+++ b/libs/utils/src/http/json.rs
@@ -25,8 +25,12 @@ pub async fn json_request_or_empty_body<T: for<'de> Deserialize<'de>>(
     if body.remaining() == 0 {
         return Ok(None);
     }
-    serde_json::from_reader(body.reader())
-        .context("Failed to parse json request")
+
+    let mut deser = serde_json::de::Deserializer::from_reader(body.reader());
+
+    serde_path_to_error::deserialize(&mut deser)
+        // intentionally stringify because the debug version is not helpful in python logs
+        .map_err(|e| anyhow::anyhow!("Failed to parse json request: {e}"))
         .map(Some)
         .map_err(ApiError::BadRequest)
 }
diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs
index 2f09c2f3ea..f7b73dc984 100644
--- a/libs/utils/src/logging.rs
+++ b/libs/utils/src/logging.rs
@@ -1,6 +1,7 @@
 use std::str::FromStr;
 
 use anyhow::Context;
+use metrics::{IntCounter, IntCounterVec};
 use once_cell::sync::Lazy;
 use strum_macros::{EnumString, EnumVariantNames};
 
@@ -24,16 +25,48 @@ impl LogFormat {
     }
 }
 
-static TRACING_EVENT_COUNT: Lazy<metrics::IntCounterVec> = Lazy::new(|| {
-    metrics::register_int_counter_vec!(
+struct TracingEventCountMetric {
+    error: IntCounter,
+    warn: IntCounter,
+    info: IntCounter,
+    debug: IntCounter,
+    trace: IntCounter,
+}
+
+static TRACING_EVENT_COUNT_METRIC: Lazy<TracingEventCountMetric> = Lazy::new(|| {
+    let vec = metrics::register_int_counter_vec!(
         "libmetrics_tracing_event_count",
         "Number of tracing events, by level",
         &["level"]
     )
-    .expect("failed to define metric")
+    .expect("failed to define metric");
+    TracingEventCountMetric::new(vec)
 });
 
-struct TracingEventCountLayer(&'static metrics::IntCounterVec);
+impl TracingEventCountMetric {
+    fn new(vec: IntCounterVec) -> Self {
+        Self {
+            error: vec.with_label_values(&["error"]),
+            warn: vec.with_label_values(&["warn"]),
+            info: vec.with_label_values(&["info"]),
+            debug: vec.with_label_values(&["debug"]),
+            trace: vec.with_label_values(&["trace"]),
+        }
+    }
+
+    fn inc_for_level(&self, level: tracing::Level) {
+        let counter = match level {
+            tracing::Level::ERROR => &self.error,
+            tracing::Level::WARN => &self.warn,
+            tracing::Level::INFO => &self.info,
+            tracing::Level::DEBUG => &self.debug,
+            tracing::Level::TRACE => &self.trace,
+        };
+        counter.inc();
+    }
+}
+
+struct TracingEventCountLayer(&'static TracingEventCountMetric);
 
 impl<S> tracing_subscriber::layer::Layer<S> for TracingEventCountLayer
 where
@@ -44,15 +77,7 @@ where
         event: &tracing::Event<'_>,
         _ctx: tracing_subscriber::layer::Context<'_, S>,
     ) {
-        let level = event.metadata().level();
-        let level = match *level {
-            tracing::Level::ERROR => "error",
-            tracing::Level::WARN => "warn",
-            tracing::Level::INFO => "info",
-            tracing::Level::DEBUG => "debug",
-            tracing::Level::TRACE => "trace",
-        };
-        self.0.with_label_values(&[level]).inc();
+        self.0.inc_for_level(*event.metadata().level());
     }
 }
 
@@ -106,7 +131,9 @@ pub fn init(
         };
         log_layer.with_filter(rust_log_env_filter())
     });
-    let r = r.with(TracingEventCountLayer(&TRACING_EVENT_COUNT).with_filter(rust_log_env_filter()));
+    let r = r.with(
+        TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()),
+    );
     match tracing_error_layer_enablement {
         TracingErrorLayerEnablement::EnableWithRustLogFilter => r
             .with(tracing_error::ErrorLayer::default().with_filter(rust_log_env_filter()))
@@ -257,14 +284,14 @@ impl std::fmt::Debug for SecretString {
 mod tests {
     use metrics::{core::Opts, IntCounterVec};
 
-    use super::TracingEventCountLayer;
+    use crate::logging::{TracingEventCountLayer, TracingEventCountMetric};
 
     #[test]
     fn tracing_event_count_metric() {
         let counter_vec =
             IntCounterVec::new(Opts::new("testmetric", "testhelp"), &["level"]).unwrap();
-        let counter_vec = Box::leak(Box::new(counter_vec)); // make it 'static
-        let layer = TracingEventCountLayer(counter_vec);
+        let metric = Box::leak(Box::new(TracingEventCountMetric::new(counter_vec.clone())));
+        let layer = TracingEventCountLayer(metric);
         use tracing_subscriber::prelude::*;
 
         tracing::subscriber::with_default(tracing_subscriber::registry().with(layer), || {
diff --git a/libs/utils/src/sync/gate.rs b/libs/utils/src/sync/gate.rs
index 9aad0af22d..31c76d2f74 100644
--- a/libs/utils/src/sync/gate.rs
+++ b/libs/utils/src/sync/gate.rs
@@ -30,18 +30,32 @@ async fn warn_if_stuck<Fut: std::future::Future>(
 
     let mut fut = std::pin::pin!(fut);
 
-    loop {
+    let mut warned = false;
+    let ret = loop {
         match tokio::time::timeout(warn_period, &mut fut).await {
-            Ok(ret) => return ret,
+            Ok(ret) => break ret,
             Err(_) => {
                 tracing::warn!(
                     gate = name,
                     elapsed_ms = started.elapsed().as_millis(),
                     "still waiting, taking longer than expected..."
                 );
+                warned = true;
             }
         }
+    };
+
+    // If we emitted a warning for slowness, also emit a message when we complete, so that
+    // someone debugging a shutdown can know for sure whether we have moved past this operation.
+    if warned {
+        tracing::info!(
+            gate = name,
+            elapsed_ms = started.elapsed().as_millis(),
+            "completed, after taking longer than expected"
+        )
     }
+
+    ret
 }
 
 #[derive(Debug)]
diff --git a/libs/utils/src/timeout.rs b/libs/utils/src/timeout.rs
index 11fa417242..56bf57a900 100644
--- a/libs/utils/src/timeout.rs
+++ b/libs/utils/src/timeout.rs
@@ -2,8 +2,11 @@ use std::time::Duration;
 
 use tokio_util::sync::CancellationToken;
 
+#[derive(thiserror::Error, Debug)]
 pub enum TimeoutCancellableError {
+    #[error("Timed out")]
     Timeout,
+    #[error("Cancelled")]
     Cancelled,
 }
 
diff --git a/libs/walproposer/bindgen_deps.h b/libs/walproposer/bindgen_deps.h
index b95788347c..41ee1cd4a3 100644
--- a/libs/walproposer/bindgen_deps.h
+++ b/libs/walproposer/bindgen_deps.h
@@ -1 +1,2 @@
+#include "postgres.h"
 #include "walproposer.h"
diff --git a/libs/walproposer/build.rs b/libs/walproposer/build.rs
index d32c8ab299..fd09030dbd 100644
--- a/libs/walproposer/build.rs
+++ b/libs/walproposer/build.rs
@@ -1,3 +1,6 @@
+//! Links with walproposer, pgcommon, pgport and runs bindgen on walproposer.h
+//! to generate Rust bindings for it.
+
 use std::{env, path::PathBuf, process::Command};
 
 use anyhow::{anyhow, Context};
diff --git a/libs/walproposer/src/api_bindings.rs b/libs/walproposer/src/api_bindings.rs
index 7f1bbc3b80..77afe1e686 100644
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -1,3 +1,6 @@
+//! A C-Rust shim: defines implementation of C walproposer API, assuming wp
+//! callback_data stores Box to some Rust implementation.
+
 #![allow(dead_code)]
 
 use std::ffi::CStr;
diff --git a/libs/walproposer/src/walproposer.rs b/libs/walproposer/src/walproposer.rs
index 0661d3a969..f5723018d7 100644
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -436,9 +436,9 @@ mod tests {
                 event_mask: 0,
             }),
             expected_messages: vec![
-                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160000, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
+                // Greeting(ProposerGreeting { protocol_version: 2, pg_version: 160001, proposer_id: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], system_id: 0, timeline_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tenant_id: 9e4c8f36063c6c6e93bc20d65a820f3d, tli: 1, wal_seg_size: 16777216 })
                 vec![
-                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                    103, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 113, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158, 76, 143, 54, 6, 60, 108, 110,
                     147, 188, 32, 214, 90, 130, 15, 61, 158, 76, 143, 54, 6, 60, 108, 110, 147,
                     188, 32, 214, 90, 130, 15, 61, 1, 0, 0, 0, 0, 0, 0, 1,
@@ -478,7 +478,7 @@ mod tests {
         // walproposer will panic when it finishes sync_safekeepers
         std::panic::catch_unwind(|| wp.start()).unwrap_err();
         // validate the resulting LSN
-        assert_eq!(receiver.recv()?, 1337);
+        assert_eq!(receiver.try_recv(), Ok(1337));
         Ok(())
         // drop() will free up resources here
     }
diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml
index 26460d3926..e44501d1ed 100644
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -36,6 +36,7 @@ humantime.workspace = true
 humantime-serde.workspace = true
 hyper.workspace = true
 itertools.workspace = true
+md5.workspace = true
 nix.workspace = true
 # hack to get the number of worker threads tokio uses
 num_cpus = { version = "1.15" }
@@ -63,6 +64,7 @@ tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util"
 tokio-epoll-uring.workspace = true
 tokio-io-timeout.workspace = true
 tokio-postgres.workspace = true
+tokio-stream.workspace = true
 tokio-util.workspace = true
 toml_edit = { workspace = true, features = [ "serde" ] }
 tracing.workspace = true
diff --git a/pageserver/client/Cargo.toml b/pageserver/client/Cargo.toml
new file mode 100644
index 0000000000..0ed27602cd
--- /dev/null
+++ b/pageserver/client/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "pageserver_client"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+pageserver_api.workspace = true
+thiserror.workspace = true
+async-trait.workspace = true
+reqwest.workspace = true
+utils.workspace = true
+serde.workspace = true
+workspace_hack = { version = "0.1", path = "../../workspace_hack" }
+tokio-postgres.workspace = true
+tokio-stream.workspace = true
+tokio.workspace = true
+futures.workspace = true
+tokio-util.workspace = true
+anyhow.workspace = true
+postgres.workspace = true
+bytes.workspace = true
diff --git a/pageserver/client/src/lib.rs b/pageserver/client/src/lib.rs
new file mode 100644
index 0000000000..4a3f4dea47
--- /dev/null
+++ b/pageserver/client/src/lib.rs
@@ -0,0 +1,2 @@
+pub mod mgmt_api;
+pub mod page_service;
diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs
new file mode 100644
index 0000000000..0ad4e1551e
--- /dev/null
+++ b/pageserver/client/src/mgmt_api.rs
@@ -0,0 +1,200 @@
+use pageserver_api::models::*;
+use reqwest::{IntoUrl, Method};
+use utils::{
+    http::error::HttpErrorBody,
+    id::{TenantId, TimelineId},
+};
+
+#[derive(Debug)]
+pub struct Client {
+    mgmt_api_endpoint: String,
+    authorization_header: Option<String>,
+    client: reqwest::Client,
+}
+
+#[derive(thiserror::Error, Debug)]
+pub enum Error {
+    #[error("receive body: {0}")]
+    ReceiveBody(reqwest::Error),
+
+    #[error("receive error body: {0}")]
+    ReceiveErrorBody(String),
+
+    #[error("pageserver API: {0}")]
+    ApiError(String),
+}
+
+pub type Result<T> = std::result::Result<T, Error>;
+
+#[async_trait::async_trait]
+pub trait ResponseErrorMessageExt: Sized {
+    async fn error_from_body(self) -> Result<Self>;
+}
+
+#[async_trait::async_trait]
+impl ResponseErrorMessageExt for reqwest::Response {
+    async fn error_from_body(mut self) -> Result<Self> {
+        let status = self.status();
+        if !(status.is_client_error() || status.is_server_error()) {
+            return Ok(self);
+        }
+
+        let url = self.url().to_owned();
+        Err(match self.json::<HttpErrorBody>().await {
+            Ok(HttpErrorBody { msg }) => Error::ApiError(msg),
+            Err(_) => {
+                Error::ReceiveErrorBody(format!("Http error ({}) at {}.", status.as_u16(), url))
+            }
+        })
+    }
+}
+
+impl Client {
+    pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self {
+        Self {
+            mgmt_api_endpoint,
+            authorization_header: jwt.map(|jwt| format!("Bearer {jwt}")),
+            client: reqwest::Client::new(),
+        }
+    }
+
+    pub async fn list_tenants(&self) -> Result<Vec<pageserver_api::models::TenantInfo>> {
+        let uri = format!("{}/v1/tenant", self.mgmt_api_endpoint);
+        let resp = self.get(&uri).await?;
+        resp.json().await.map_err(Error::ReceiveBody)
+    }
+
+    pub async fn tenant_details(
+        &self,
+        tenant_id: TenantId,
+    ) -> Result<pageserver_api::models::TenantDetails> {
+        let uri = format!("{}/v1/tenant/{tenant_id}", self.mgmt_api_endpoint);
+        self.get(uri)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    pub async fn list_timelines(
+        &self,
+        tenant_id: TenantId,
+    ) -> Result<Vec<pageserver_api::models::TimelineInfo>> {
+        let uri = format!("{}/v1/tenant/{tenant_id}/timeline", self.mgmt_api_endpoint);
+        self.get(&uri)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    pub async fn timeline_info(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<pageserver_api::models::TimelineInfo> {
+        let uri = format!(
+            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}",
+            self.mgmt_api_endpoint
+        );
+        self.get(&uri)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    pub async fn keyspace(
+        &self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> Result<pageserver_api::models::partitioning::Partitioning> {
+        let uri = format!(
+            "{}/v1/tenant/{tenant_id}/timeline/{timeline_id}/keyspace",
+            self.mgmt_api_endpoint
+        );
+        self.get(&uri)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    async fn get<U: IntoUrl>(&self, uri: U) -> Result<reqwest::Response> {
+        self.request(Method::GET, uri, ()).await
+    }
+
+    async fn request<B: serde::Serialize, U: reqwest::IntoUrl>(
+        &self,
+        method: Method,
+        uri: U,
+        body: B,
+    ) -> Result<reqwest::Response> {
+        let req = self.client.request(method, uri);
+        let req = if let Some(value) = &self.authorization_header {
+            req.header(reqwest::header::AUTHORIZATION, value)
+        } else {
+            req
+        };
+        let res = req.json(&body).send().await.map_err(Error::ReceiveBody)?;
+        let response = res.error_from_body().await?;
+        Ok(response)
+    }
+
+    pub async fn status(&self) -> Result<()> {
+        let uri = format!("{}/v1/status", self.mgmt_api_endpoint);
+        self.get(&uri).await?;
+        Ok(())
+    }
+
+    pub async fn tenant_create(&self, req: &TenantCreateRequest) -> Result<TenantId> {
+        let uri = format!("{}/v1/tenant", self.mgmt_api_endpoint);
+        self.request(Method::POST, &uri, req)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+
+    pub async fn tenant_config(&self, req: &TenantConfigRequest) -> Result<()> {
+        let uri = format!("{}/v1/tenant/config", self.mgmt_api_endpoint);
+        self.request(Method::PUT, &uri, req).await?;
+        Ok(())
+    }
+
+    pub async fn location_config(
+        &self,
+        tenant_id: TenantId,
+        config: LocationConfig,
+        flush_ms: Option<std::time::Duration>,
+    ) -> Result<()> {
+        let req_body = TenantLocationConfigRequest { tenant_id, config };
+        let path = format!(
+            "{}/v1/tenant/{}/location_config",
+            self.mgmt_api_endpoint, tenant_id
+        );
+        let path = if let Some(flush_ms) = flush_ms {
+            format!("{}?flush_ms={}", path, flush_ms.as_millis())
+        } else {
+            path
+        };
+        self.request(Method::PUT, &path, &req_body).await?;
+        Ok(())
+    }
+
+    pub async fn timeline_create(
+        &self,
+        tenant_id: TenantId,
+        req: &TimelineCreateRequest,
+    ) -> Result<TimelineInfo> {
+        let uri = format!(
+            "{}/v1/tenant/{}/timeline",
+            self.mgmt_api_endpoint, tenant_id
+        );
+        self.request(Method::POST, &uri, req)
+            .await?
+            .json()
+            .await
+            .map_err(Error::ReceiveBody)
+    }
+}
diff --git a/pageserver/client/src/page_service.rs b/pageserver/client/src/page_service.rs
new file mode 100644
index 0000000000..fc0d2311f7
--- /dev/null
+++ b/pageserver/client/src/page_service.rs
@@ -0,0 +1,151 @@
+use std::pin::Pin;
+
+use futures::SinkExt;
+use pageserver_api::{
+    models::{
+        PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest,
+        PagestreamGetPageResponse,
+    },
+    reltag::RelTag,
+};
+use tokio::task::JoinHandle;
+use tokio_postgres::CopyOutStream;
+use tokio_stream::StreamExt;
+use tokio_util::sync::CancellationToken;
+use utils::{
+    id::{TenantId, TimelineId},
+    lsn::Lsn,
+};
+
+pub struct Client {
+    client: tokio_postgres::Client,
+    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
+    conn_task: JoinHandle<()>,
+}
+
+pub struct BasebackupRequest {
+    pub tenant_id: TenantId,
+    pub timeline_id: TimelineId,
+    pub lsn: Option<Lsn>,
+    pub gzip: bool,
+}
+
+impl Client {
+    pub async fn new(connstring: String) -> anyhow::Result<Self> {
+        let (client, connection) = tokio_postgres::connect(&connstring, postgres::NoTls).await?;
+
+        let conn_task_cancel = CancellationToken::new();
+        let conn_task = tokio::spawn({
+            let conn_task_cancel = conn_task_cancel.clone();
+            async move {
+                tokio::select! {
+                    _ = conn_task_cancel.cancelled() => { }
+                    res = connection => {
+                        res.unwrap();
+                    }
+                }
+            }
+        });
+        Ok(Self {
+            cancel_on_client_drop: Some(conn_task_cancel.drop_guard()),
+            conn_task,
+            client,
+        })
+    }
+
+    pub async fn pagestream(
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+    ) -> anyhow::Result<PagestreamClient> {
+        let copy_both: tokio_postgres::CopyBothDuplex<bytes::Bytes> = self
+            .client
+            .copy_both_simple(&format!("pagestream {tenant_id} {timeline_id}"))
+            .await?;
+        let Client {
+            cancel_on_client_drop,
+            conn_task,
+            client: _,
+        } = self;
+        Ok(PagestreamClient {
+            copy_both: Box::pin(copy_both),
+            conn_task,
+            cancel_on_client_drop,
+        })
+    }
+
+    pub async fn basebackup(&self, req: &BasebackupRequest) -> anyhow::Result<CopyOutStream> {
+        let BasebackupRequest {
+            tenant_id,
+            timeline_id,
+            lsn,
+            gzip,
+        } = req;
+        let mut args = Vec::with_capacity(5);
+        args.push("basebackup".to_string());
+        args.push(format!("{tenant_id}"));
+        args.push(format!("{timeline_id}"));
+        if let Some(lsn) = lsn {
+            args.push(format!("{lsn}"));
+        }
+        if *gzip {
+            args.push("--gzip".to_string())
+        }
+        Ok(self.client.copy_out(&args.join(" ")).await?)
+    }
+}
+
+/// Create using [`Client::pagestream`].
+pub struct PagestreamClient {
+    copy_both: Pin<Box<tokio_postgres::CopyBothDuplex<bytes::Bytes>>>,
+    cancel_on_client_drop: Option<tokio_util::sync::DropGuard>,
+    conn_task: JoinHandle<()>,
+}
+
+pub struct RelTagBlockNo {
+    pub rel_tag: RelTag,
+    pub block_no: u32,
+}
+
+impl PagestreamClient {
+    pub async fn shutdown(mut self) {
+        let _ = self.cancel_on_client_drop.take();
+        self.conn_task.await.unwrap();
+    }
+
+    pub async fn getpage(
+        &mut self,
+        key: RelTagBlockNo,
+        lsn: Lsn,
+    ) -> anyhow::Result<PagestreamGetPageResponse> {
+        let req = PagestreamGetPageRequest {
+            latest: false,
+            rel: key.rel_tag,
+            blkno: key.block_no,
+            lsn,
+        };
+        let req = PagestreamFeMessage::GetPage(req);
+        let req: bytes::Bytes = req.serialize();
+        // let mut req = tokio_util::io::ReaderStream::new(&req);
+        let mut req = tokio_stream::once(Ok(req));
+
+        self.copy_both.send_all(&mut req).await?;
+
+        let next: Option<Result<bytes::Bytes, _>> = self.copy_both.next().await;
+        let next: bytes::Bytes = next.unwrap()?;
+
+        let msg = PagestreamBeMessage::deserialize(next)?;
+        match msg {
+            PagestreamBeMessage::GetPage(p) => Ok(p),
+            PagestreamBeMessage::Error(e) => anyhow::bail!("Error: {:?}", e),
+            PagestreamBeMessage::Exists(_)
+            | PagestreamBeMessage::Nblocks(_)
+            | PagestreamBeMessage::DbSize(_) => {
+                anyhow::bail!(
+                    "unexpected be message kind in response to getpage request: {}",
+                    msg.kind()
+                )
+            }
+        }
+    }
+}
diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs
index 8da1d858e4..4531b9d989 100644
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -14,7 +14,7 @@ use pageserver::control_plane_client::ControlPlaneClient;
 use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task};
 use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING};
 use pageserver::task_mgr::WALRECEIVER_RUNTIME;
-use pageserver::tenant::TenantSharedResources;
+use pageserver::tenant::{secondary, TenantSharedResources};
 use remote_storage::GenericRemoteStorage;
 use tokio::time::Instant;
 use tracing::*;
@@ -504,6 +504,17 @@ fn start_pageserver(
         }
     });
 
+    let secondary_controller = if let Some(remote_storage) = &remote_storage {
+        secondary::spawn_tasks(
+            tenant_manager.clone(),
+            remote_storage.clone(),
+            background_jobs_barrier.clone(),
+            shutdown_pageserver.clone(),
+        )
+    } else {
+        secondary::null_controller()
+    };
+
     // shared state between the disk-usage backed eviction background task and the http endpoint
     // that allows triggering disk-usage based eviction manually. note that the http endpoint
     // is still accessible even if background task is not configured as long as remote storage has
@@ -533,6 +544,7 @@ fn start_pageserver(
                 broker_client.clone(),
                 disk_usage_eviction_state,
                 deletion_queue.new_client(),
+                secondary_controller,
             )
             .context("Failed to initialize router state")?,
         );
diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs
index 8675dfcfee..b91f137cdb 100644
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -42,6 +42,8 @@ use crate::{
     TIMELINE_DELETE_MARK_SUFFIX, TIMELINE_UNINIT_MARK_SUFFIX,
 };
 
+use self::defaults::DEFAULT_CONCURRENT_TENANT_WARMUP;
+
 use self::defaults::DEFAULT_VIRTUAL_FILE_IO_ENGINE;
 
 pub mod defaults {
@@ -64,6 +66,8 @@ pub mod defaults {
 
     pub const DEFAULT_LOG_FORMAT: &str = "plain";
 
+    pub const DEFAULT_CONCURRENT_TENANT_WARMUP: usize = 8;
+
     pub const DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES: usize =
         super::ConfigurableSemaphore::DEFAULT_INITIAL.get();
 
@@ -73,6 +77,8 @@ pub mod defaults {
     pub const DEFAULT_SYNTHETIC_SIZE_CALCULATION_INTERVAL: &str = "10 min";
     pub const DEFAULT_BACKGROUND_TASK_MAXIMUM_DELAY: &str = "10s";
 
+    pub const DEFAULT_HEATMAP_UPLOAD_CONCURRENCY: usize = 8;
+
     pub const DEFAULT_VIRTUAL_FILE_IO_ENGINE: &str = "std-fs";
 
     ///
@@ -97,6 +103,7 @@ pub mod defaults {
 #log_format = '{DEFAULT_LOG_FORMAT}'
 
 #concurrent_tenant_size_logical_size_queries = '{DEFAULT_CONCURRENT_TENANT_SIZE_LOGICAL_SIZE_QUERIES}'
+#concurrent_tenant_warmup = '{DEFAULT_CONCURRENT_TENANT_WARMUP}'
 
 #metric_collection_interval = '{DEFAULT_METRIC_COLLECTION_INTERVAL}'
 #cached_metric_collection_interval = '{DEFAULT_CACHED_METRIC_COLLECTION_INTERVAL}'
@@ -124,6 +131,8 @@ pub mod defaults {
 #evictions_low_residence_duration_metric_threshold = '{DEFAULT_EVICTIONS_LOW_RESIDENCE_DURATION_METRIC_THRESHOLD}'
 #gc_feedback = false
 
+#heatmap_upload_concurrency = {DEFAULT_HEATMAP_UPLOAD_CONCURRENCY}
+
 [remote_storage]
 
 "#
@@ -183,6 +192,11 @@ pub struct PageServerConf {
 
     pub log_format: LogFormat,
 
+    /// Number of tenants which will be concurrently loaded from remote storage proactively on startup,
+    /// does not limit tenants loaded in response to client I/O.  A lower value implicitly deprioritizes
+    /// loading such tenants, vs. other work in the system.
+    pub concurrent_tenant_warmup: ConfigurableSemaphore,
+
     /// Number of concurrent [`Tenant::gather_size_inputs`](crate::tenant::Tenant::gather_size_inputs) allowed.
     pub concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore,
     /// Limit of concurrent [`Tenant::gather_size_inputs`] issued by module `eviction_task`.
@@ -223,6 +237,10 @@ pub struct PageServerConf {
     /// for use in major incidents.
     pub control_plane_emergency_mode: bool,
 
+    /// How many heatmap uploads may be done concurrency: lower values implicitly deprioritize
+    /// heatmap uploads vs. other remote storage operations.
+    pub heatmap_upload_concurrency: usize,
+
     pub virtual_file_io_engine: virtual_file::IoEngineKind,
 }
 
@@ -284,6 +302,7 @@ struct PageServerConfigBuilder {
 
     log_format: BuilderValue<LogFormat>,
 
+    concurrent_tenant_warmup: BuilderValue<NonZeroUsize>,
     concurrent_tenant_size_logical_size_queries: BuilderValue<NonZeroUsize>,
 
     metric_collection_interval: BuilderValue<Duration>,
@@ -303,6 +322,8 @@ struct PageServerConfigBuilder {
     control_plane_api_token: BuilderValue<Option<SecretString>>,
     control_plane_emergency_mode: BuilderValue<bool>,
 
+    heatmap_upload_concurrency: BuilderValue<usize>,
+
     virtual_file_io_engine: BuilderValue<virtual_file::IoEngineKind>,
 }
 
@@ -341,6 +362,8 @@ impl Default for PageServerConfigBuilder {
             .expect("cannot parse default keepalive interval")),
             log_format: Set(LogFormat::from_str(DEFAULT_LOG_FORMAT).unwrap()),
 
+            concurrent_tenant_warmup: Set(NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
+                .expect("Invalid default constant")),
             concurrent_tenant_size_logical_size_queries: Set(
                 ConfigurableSemaphore::DEFAULT_INITIAL,
             ),
@@ -373,6 +396,8 @@ impl Default for PageServerConfigBuilder {
             control_plane_api_token: Set(None),
             control_plane_emergency_mode: Set(false),
 
+            heatmap_upload_concurrency: Set(DEFAULT_HEATMAP_UPLOAD_CONCURRENCY),
+
             virtual_file_io_engine: Set(DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap()),
         }
     }
@@ -454,6 +479,10 @@ impl PageServerConfigBuilder {
         self.log_format = BuilderValue::Set(log_format)
     }
 
+    pub fn concurrent_tenant_warmup(&mut self, u: NonZeroUsize) {
+        self.concurrent_tenant_warmup = BuilderValue::Set(u);
+    }
+
     pub fn concurrent_tenant_size_logical_size_queries(&mut self, u: NonZeroUsize) {
         self.concurrent_tenant_size_logical_size_queries = BuilderValue::Set(u);
     }
@@ -514,11 +543,18 @@ impl PageServerConfigBuilder {
         self.control_plane_emergency_mode = BuilderValue::Set(enabled)
     }
 
+    pub fn heatmap_upload_concurrency(&mut self, value: usize) {
+        self.heatmap_upload_concurrency = BuilderValue::Set(value)
+    }
+
     pub fn virtual_file_io_engine(&mut self, value: virtual_file::IoEngineKind) {
         self.virtual_file_io_engine = BuilderValue::Set(value);
     }
 
     pub fn build(self) -> anyhow::Result<PageServerConf> {
+        let concurrent_tenant_warmup = self
+            .concurrent_tenant_warmup
+            .ok_or(anyhow!("missing concurrent_tenant_warmup"))?;
         let concurrent_tenant_size_logical_size_queries = self
             .concurrent_tenant_size_logical_size_queries
             .ok_or(anyhow!(
@@ -571,6 +607,7 @@ impl PageServerConfigBuilder {
                 .broker_keepalive_interval
                 .ok_or(anyhow!("No broker keepalive interval provided"))?,
             log_format: self.log_format.ok_or(anyhow!("missing log_format"))?,
+            concurrent_tenant_warmup: ConfigurableSemaphore::new(concurrent_tenant_warmup),
             concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::new(
                 concurrent_tenant_size_logical_size_queries,
             ),
@@ -612,6 +649,10 @@ impl PageServerConfigBuilder {
             control_plane_emergency_mode: self
                 .control_plane_emergency_mode
                 .ok_or(anyhow!("missing control_plane_emergency_mode"))?,
+
+            heatmap_upload_concurrency: self
+                .heatmap_upload_concurrency
+                .ok_or(anyhow!("missing heatmap_upload_concurrency"))?,
             virtual_file_io_engine: self
                 .virtual_file_io_engine
                 .ok_or(anyhow!("missing virtual_file_io_engine"))?,
@@ -807,6 +848,11 @@ impl PageServerConf {
                 "log_format" => builder.log_format(
                     LogFormat::from_config(&parse_toml_string(key, item)?)?
                 ),
+                "concurrent_tenant_warmup" => builder.concurrent_tenant_warmup({
+                    let input = parse_toml_string(key, item)?;
+                    let permits = input.parse::<usize>().context("expected a number of initial permits, not {s:?}")?;
+                    NonZeroUsize::new(permits).context("initial semaphore permits out of range: 0, use other configuration to disable a feature")?
+                }),
                 "concurrent_tenant_size_logical_size_queries" => builder.concurrent_tenant_size_logical_size_queries({
                     let input = parse_toml_string(key, item)?;
                     let permits = input.parse::<usize>().context("expected a number of initial permits, not {s:?}")?;
@@ -849,6 +895,9 @@ impl PageServerConf {
                 "control_plane_emergency_mode" => {
                     builder.control_plane_emergency_mode(parse_toml_bool(key, item)?)
                 },
+                "heatmap_upload_concurrency" => {
+                    builder.heatmap_upload_concurrency(parse_toml_u64(key, item)? as usize)
+                },
                 "virtual_file_io_engine" => {
                     builder.virtual_file_io_engine(parse_toml_from_str("virtual_file_io_engine", item)?)
                 }
@@ -904,6 +953,10 @@ impl PageServerConf {
             broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
             broker_keepalive_interval: Duration::from_secs(5000),
             log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
+            concurrent_tenant_warmup: ConfigurableSemaphore::new(
+                NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP)
+                    .expect("Invalid default constant"),
+            ),
             concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
             eviction_task_immitated_concurrent_logical_size_queries: ConfigurableSemaphore::default(
             ),
@@ -918,6 +971,7 @@ impl PageServerConf {
             control_plane_api: None,
             control_plane_api_token: None,
             control_plane_emergency_mode: false,
+            heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
             virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
         }
     }
@@ -1122,6 +1176,9 @@ background_task_maximum_delay = '334 s'
                     storage_broker::DEFAULT_KEEPALIVE_INTERVAL
                 )?,
                 log_format: LogFormat::from_str(defaults::DEFAULT_LOG_FORMAT).unwrap(),
+                concurrent_tenant_warmup: ConfigurableSemaphore::new(
+                    NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP).unwrap()
+                ),
                 concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
                 eviction_task_immitated_concurrent_logical_size_queries:
                     ConfigurableSemaphore::default(),
@@ -1144,6 +1201,7 @@ background_task_maximum_delay = '334 s'
                 control_plane_api: None,
                 control_plane_api_token: None,
                 control_plane_emergency_mode: false,
+                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
                 virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
             },
             "Correct defaults should be used when no config values are provided"
@@ -1188,6 +1246,9 @@ background_task_maximum_delay = '334 s'
                 broker_endpoint: storage_broker::DEFAULT_ENDPOINT.parse().unwrap(),
                 broker_keepalive_interval: Duration::from_secs(5),
                 log_format: LogFormat::Json,
+                concurrent_tenant_warmup: ConfigurableSemaphore::new(
+                    NonZeroUsize::new(DEFAULT_CONCURRENT_TENANT_WARMUP).unwrap()
+                ),
                 concurrent_tenant_size_logical_size_queries: ConfigurableSemaphore::default(),
                 eviction_task_immitated_concurrent_logical_size_queries:
                     ConfigurableSemaphore::default(),
@@ -1202,6 +1263,7 @@ background_task_maximum_delay = '334 s'
                 control_plane_api: None,
                 control_plane_api_token: None,
                 control_plane_emergency_mode: false,
+                heatmap_upload_concurrency: defaults::DEFAULT_HEATMAP_UPLOAD_CONCURRENCY,
                 virtual_file_io_engine: DEFAULT_VIRTUAL_FILE_IO_ENGINE.parse().unwrap(),
             },
             "Should be able to parse all basic config values correctly"
diff --git a/pageserver/src/consumption_metrics.rs b/pageserver/src/consumption_metrics.rs
index 7ad6a0f890..bde2cedca7 100644
--- a/pageserver/src/consumption_metrics.rs
+++ b/pageserver/src/consumption_metrics.rs
@@ -3,7 +3,7 @@
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
 use crate::tenant::tasks::BackgroundLoopKind;
-use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError};
+use crate::tenant::{mgr, LogicalSizeCalculationCause, PageReconstructError, Tenant};
 use camino::Utf8PathBuf;
 use consumption_metrics::EventType;
 use pageserver_api::models::TenantState;
@@ -256,8 +256,6 @@ async fn calculate_synthetic_size_worker(
         info!("calculate_synthetic_size_worker stopped");
     };
 
-    let cause = LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
-
     loop {
         let started_at = Instant::now();
 
@@ -269,26 +267,25 @@ async fn calculate_synthetic_size_worker(
             }
         };
 
-        for (tenant_id, tenant_state) in tenants {
+        for (tenant_shard_id, tenant_state) in tenants {
             if tenant_state != TenantState::Active {
                 continue;
             }
 
-            if let Ok(tenant) = mgr::get_tenant(tenant_id, true) {
-                // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
-                // We can put in some prioritization for consumption metrics.
-                // Same for the loop that fetches computed metrics.
-                // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
-                // which turns out is really handy to understand the system.
-                if let Err(e) = tenant.calculate_synthetic_size(cause, cancel, ctx).await {
-                    if let Some(PageReconstructError::Cancelled) =
-                        e.downcast_ref::<PageReconstructError>()
-                    {
-                        return Ok(());
-                    }
-                    error!("failed to calculate synthetic size for tenant {tenant_id}: {e:#}");
-                }
+            if !tenant_shard_id.is_zero() {
+                // We only send consumption metrics from shard 0, so don't waste time calculating
+                // synthetic size on other shards.
+                continue;
             }
+
+            let Ok(tenant) = mgr::get_tenant(tenant_shard_id, true) else {
+                continue;
+            };
+
+            // there is never any reason to exit calculate_synthetic_size_worker following any
+            // return value -- we don't need to care about shutdown because no tenant is found when
+            // pageserver is shut down.
+            calculate_and_log(&tenant, cancel, ctx).await;
         }
 
         crate::tenant::tasks::warn_when_period_overrun(
@@ -299,7 +296,7 @@ async fn calculate_synthetic_size_worker(
 
         let res = tokio::time::timeout_at(
             started_at + synthetic_size_calculation_interval,
-            task_mgr::shutdown_token().cancelled(),
+            cancel.cancelled(),
         )
         .await;
         if res.is_ok() {
@@ -307,3 +304,31 @@ async fn calculate_synthetic_size_worker(
         }
     }
 }
+
+async fn calculate_and_log(tenant: &Tenant, cancel: &CancellationToken, ctx: &RequestContext) {
+    const CAUSE: LogicalSizeCalculationCause =
+        LogicalSizeCalculationCause::ConsumptionMetricsSyntheticSize;
+
+    // TODO should we use concurrent_background_tasks_rate_limit() here, like the other background tasks?
+    // We can put in some prioritization for consumption metrics.
+    // Same for the loop that fetches computed metrics.
+    // By using the same limiter, we centralize metrics collection for "start" and "finished" counters,
+    // which turns out is really handy to understand the system.
+    let Err(e) = tenant.calculate_synthetic_size(CAUSE, cancel, ctx).await else {
+        return;
+    };
+
+    // this error can be returned if timeline is shutting down, but it does not
+    // mean the synthetic size worker should terminate. we do not need any checks
+    // in this function because `mgr::get_tenant` will error out after shutdown has
+    // progressed to shutting down tenants.
+    let shutting_down = matches!(
+        e.downcast_ref::<PageReconstructError>(),
+        Some(PageReconstructError::Cancelled | PageReconstructError::AncestorStopping(_))
+    );
+
+    if !shutting_down {
+        let tenant_shard_id = tenant.tenant_shard_id();
+        error!("failed to calculate synthetic size for tenant {tenant_shard_id}: {e:#}");
+    }
+}
diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs
index 918e45ea9e..0b827816bc 100644
--- a/pageserver/src/consumption_metrics/metrics.rs
+++ b/pageserver/src/consumption_metrics/metrics.rs
@@ -2,7 +2,6 @@ use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogi
 use chrono::{DateTime, Utc};
 use consumption_metrics::EventType;
 use futures::stream::StreamExt;
-use pageserver_api::shard::ShardNumber;
 use std::{sync::Arc, time::SystemTime};
 use utils::{
     id::{TenantId, TimelineId},
@@ -198,12 +197,12 @@ pub(super) async fn collect_all_metrics(
     };
 
     let tenants = futures::stream::iter(tenants).filter_map(|(id, state)| async move {
-        if state != TenantState::Active {
+        if state != TenantState::Active || !id.is_zero() {
             None
         } else {
             crate::tenant::mgr::get_tenant(id, true)
                 .ok()
-                .map(|tenant| (id, tenant))
+                .map(|tenant| (id.tenant_id, tenant))
         }
     });
 
@@ -229,11 +228,6 @@ where
     while let Some((tenant_id, tenant)) = tenants.next().await {
         let mut tenant_resident_size = 0;
 
-        // Sharded tenants report all consumption metrics from shard zero
-        if tenant.tenant_shard_id().shard_number != ShardNumber(0) {
-            continue;
-        }
-
         for timeline in tenant.list_timelines() {
             let timeline_id = timeline.timeline_id;
 
diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs
index f01cd1cf8c..76906cfaf7 100644
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -42,7 +42,6 @@
 //   reading these fields. We use the Debug impl for semi-structured logging, though.
 
 use std::{
-    collections::HashMap,
     sync::Arc,
     time::{Duration, SystemTime},
 };
@@ -125,7 +124,7 @@ pub fn launch_disk_usage_global_eviction_task(
 async fn disk_usage_eviction_task(
     state: &State,
     task_config: &DiskUsageEvictionTaskConfig,
-    _storage: &GenericRemoteStorage,
+    storage: &GenericRemoteStorage,
     tenants_dir: &Utf8Path,
     cancel: CancellationToken,
 ) {
@@ -149,8 +148,14 @@ async fn disk_usage_eviction_task(
         let start = Instant::now();
 
         async {
-            let res =
-                disk_usage_eviction_task_iteration(state, task_config, tenants_dir, &cancel).await;
+            let res = disk_usage_eviction_task_iteration(
+                state,
+                task_config,
+                storage,
+                tenants_dir,
+                &cancel,
+            )
+            .await;
 
             match res {
                 Ok(()) => {}
@@ -181,12 +186,13 @@ pub trait Usage: Clone + Copy + std::fmt::Debug {
 async fn disk_usage_eviction_task_iteration(
     state: &State,
     task_config: &DiskUsageEvictionTaskConfig,
+    storage: &GenericRemoteStorage,
     tenants_dir: &Utf8Path,
     cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
     let usage_pre = filesystem_level_usage::get(tenants_dir, task_config)
         .context("get filesystem-level disk usage before evictions")?;
-    let res = disk_usage_eviction_task_iteration_impl(state, usage_pre, cancel).await;
+    let res = disk_usage_eviction_task_iteration_impl(state, storage, usage_pre, cancel).await;
     match res {
         Ok(outcome) => {
             debug!(?outcome, "disk_usage_eviction_iteration finished");
@@ -268,8 +274,9 @@ struct LayerCount {
     count: usize,
 }
 
-pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
+pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
     state: &State,
+    _storage: &GenericRemoteStorage,
     usage_pre: U,
     cancel: &CancellationToken,
 ) -> anyhow::Result<IterationOutcome<U>> {
@@ -321,16 +328,16 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
     // Walk through the list of candidates, until we have accumulated enough layers to get
     // us back under the pressure threshold. 'usage_planned' is updated so that it tracks
     // how much disk space would be used after evicting all the layers up to the current
-    // point in the list. The layers are collected in 'batched', grouped per timeline.
+    // point in the list.
     //
     // If we get far enough in the list that we start to evict layers that are below
     // the tenant's min-resident-size threshold, print a warning, and memorize the disk
     // usage at that point, in 'usage_planned_min_resident_size_respecting'.
-    let mut batched: HashMap<_, Vec<_>> = HashMap::new();
     let mut warned = None;
     let mut usage_planned = usage_pre;
-    let mut max_batch_size = 0;
-    for (i, (partition, candidate)) in candidates.into_iter().enumerate() {
+    let mut evicted_amount = 0;
+
+    for (i, (partition, candidate)) in candidates.iter().enumerate() {
         if !usage_planned.has_pressure() {
             debug!(
                 no_candidates_evicted = i,
@@ -339,25 +346,13 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
             break;
         }
 
-        if partition == MinResidentSizePartition::Below && warned.is_none() {
+        if partition == &MinResidentSizePartition::Below && warned.is_none() {
             warn!(?usage_pre, ?usage_planned, candidate_no=i, "tenant_min_resident_size-respecting LRU would not relieve pressure, evicting more following global LRU policy");
             warned = Some(usage_planned);
         }
 
         usage_planned.add_available_bytes(candidate.layer.layer_desc().file_size);
-
-        // FIXME: batching makes no sense anymore because of no layermap locking, should just spawn
-        // tasks to evict all seen layers until we have evicted enough
-
-        let batch = batched.entry(TimelineKey(candidate.timeline)).or_default();
-
-        // semaphore will later be used to limit eviction concurrency, and we can express at
-        // most u32 number of permits. unlikely we would have u32::MAX layers to be evicted,
-        // but fail gracefully by not making batches larger.
-        if batch.len() < u32::MAX as usize {
-            batch.push(candidate.layer);
-            max_batch_size = max_batch_size.max(batch.len());
-        }
+        evicted_amount += 1;
     }
 
     let usage_planned = match warned {
@@ -372,100 +367,79 @@ pub async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
     };
     debug!(?usage_planned, "usage planned");
 
-    // phase2: evict victims batched by timeline
+    // phase2: evict layers
 
     let mut js = tokio::task::JoinSet::new();
+    let limit = 1000;
 
-    // ratelimit to 1k files or any higher max batch size
-    let limit = Arc::new(tokio::sync::Semaphore::new(1000.max(max_batch_size)));
+    let mut evicted = candidates.into_iter().take(evicted_amount).fuse();
+    let mut consumed_all = false;
 
-    for (timeline, batch) in batched {
-        let tenant_shard_id = timeline.tenant_shard_id;
-        let timeline_id = timeline.timeline_id;
-        let batch_size =
-            u32::try_from(batch.len()).expect("batch size limited to u32::MAX during partitioning");
+    // After the evictions, `usage_assumed` is the post-eviction usage,
+    // according to internal accounting.
+    let mut usage_assumed = usage_pre;
+    let mut evictions_failed = LayerCount::default();
 
-        // I dislike naming of `available_permits` but it means current total amount of permits
-        // because permits can be added
-        assert!(batch_size as usize <= limit.available_permits());
+    let evict_layers = async move {
+        loop {
+            let next = if js.len() >= limit || consumed_all {
+                js.join_next().await
+            } else if !js.is_empty() {
+                // opportunistically consume ready result, one per each new evicted
+                futures::future::FutureExt::now_or_never(js.join_next()).and_then(|x| x)
+            } else {
+                None
+            };
 
-        debug!(%timeline_id, "evicting batch for timeline");
-
-        let evict = {
-            let limit = limit.clone();
-            let cancel = cancel.clone();
-            async move {
-                let mut evicted_bytes = 0;
-                let mut evictions_failed = LayerCount::default();
-
-                let Ok(_permit) = limit.acquire_many_owned(batch_size).await else {
-                    // semaphore closing means cancelled
-                    return (evicted_bytes, evictions_failed);
-                };
-
-                let results = timeline.evict_layers(&batch).await;
-
-                match results {
-                    Ok(results) => {
-                        assert_eq!(results.len(), batch.len());
-                        for (result, layer) in results.into_iter().zip(batch.iter()) {
-                            let file_size = layer.layer_desc().file_size;
-                            match result {
-                                Some(Ok(())) => {
-                                    evicted_bytes += file_size;
-                                }
-                                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
-                                    evictions_failed.file_sizes += file_size;
-                                    evictions_failed.count += 1;
-                                }
-                                None => {
-                                    assert!(cancel.is_cancelled());
-                                }
-                            }
-                        }
+            if let Some(next) = next {
+                match next {
+                    Ok(Ok(file_size)) => {
+                        usage_assumed.add_available_bytes(file_size);
                     }
-                    Err(e) => {
-                        warn!("failed to evict batch: {:#}", e);
+                    Ok(Err((file_size, EvictionError::NotFound | EvictionError::Downloaded))) => {
+                        evictions_failed.file_sizes += file_size;
+                        evictions_failed.count += 1;
                     }
+                    Err(je) if je.is_cancelled() => unreachable!("not used"),
+                    Err(je) if je.is_panic() => { /* already logged */ }
+                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
                 }
-                (evicted_bytes, evictions_failed)
             }
-        }
-        .instrument(tracing::info_span!("evict_batch", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id, batch_size));
 
-        js.spawn(evict);
-
-        // spwaning multiple thousands of these is essentially blocking, so give already spawned a
-        // chance of making progress
-        tokio::task::yield_now().await;
-    }
-
-    let join_all = async move {
-        // After the evictions, `usage_assumed` is the post-eviction usage,
-        // according to internal accounting.
-        let mut usage_assumed = usage_pre;
-        let mut evictions_failed = LayerCount::default();
-
-        while let Some(res) = js.join_next().await {
-            match res {
-                Ok((evicted_bytes, failed)) => {
-                    usage_assumed.add_available_bytes(evicted_bytes);
-                    evictions_failed.file_sizes += failed.file_sizes;
-                    evictions_failed.count += failed.count;
-                }
-                Err(je) if je.is_cancelled() => unreachable!("not used"),
-                Err(je) if je.is_panic() => { /* already logged */ }
-                Err(je) => tracing::error!("unknown JoinError: {je:?}"),
+            if consumed_all && js.is_empty() {
+                break;
             }
+
+            // calling again when consumed_all is fine as evicted is fused.
+            let Some((_partition, candidate)) = evicted.next() else {
+                consumed_all = true;
+                continue;
+            };
+
+            js.spawn(async move {
+                let rtc = candidate.timeline.remote_client.as_ref().expect(
+                    "holding the witness, all timelines must have a remote timeline client",
+                );
+                let file_size = candidate.layer.layer_desc().file_size;
+                candidate
+                    .layer
+                    .evict_and_wait(rtc)
+                    .await
+                    .map(|()| file_size)
+                    .map_err(|e| (file_size, e))
+            });
+
+            tokio::task::yield_now().await;
         }
+
         (usage_assumed, evictions_failed)
     };
 
     let (usage_assumed, evictions_failed) = tokio::select! {
-        tuple = join_all => { tuple },
+        tuple = evict_layers => { tuple },
         _ = cancel.cancelled() => {
-            // close the semaphore to stop any pending acquires
-            limit.close();
+            // dropping joinset will abort all pending evict_and_waits and that is fine, our
+            // requests will still stand
             return Ok(IterationOutcome::Cancelled);
         }
     };
diff --git a/pageserver/src/http/mod.rs b/pageserver/src/http/mod.rs
index 1c083bd382..c82d1c0362 100644
--- a/pageserver/src/http/mod.rs
+++ b/pageserver/src/http/mod.rs
@@ -1,4 +1,2 @@
 pub mod routes;
 pub use routes::make_router;
-
-pub use pageserver_api::models;
diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml
index 237109abfe..b79c5ada9a 100644
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -84,7 +84,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: Get tenant status
       responses:
@@ -181,7 +180,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: Get timelines for tenant
       responses:
@@ -232,7 +230,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -338,7 +335,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -401,7 +397,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -469,7 +464,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: timeline_id
         in: path
         required: true
@@ -523,7 +517,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Schedules attach operation to happen in the background for the given tenant.
@@ -631,7 +624,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: flush_ms
         in: query
         required: false
@@ -724,7 +716,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: detach_ignored
         in: query
         required: false
@@ -784,7 +775,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Remove tenant data (including all corresponding timelines) from pageserver's memory.
@@ -833,7 +823,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
         Schedules an operation that attempts to load a tenant from the local disk and
@@ -890,7 +879,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: |
         Calculate tenant's synthetic size
@@ -933,7 +921,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
       - name: inputs_only
         in: query
         required: false
@@ -1003,11 +990,10 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     post:
       description: |
-        Create a timeline. Returns new timeline id on success.\
-        If no new timeline id is specified in parameters, it would be generated. It's an error to recreate the same timeline.
+        Create a timeline. Returns new timeline id on success.
+        Recreating the same timeline will succeed if the parameters match the existing timeline.
         If no pg_version is specified, assume DEFAULT_PG_VERSION hardcoded in the pageserver.
       requestBody:
         content:
@@ -1137,7 +1123,6 @@ paths:
             application/json:
               schema:
                 type: string
-                format: hex
         "400":
           description: Malformed tenant create request
           content:
@@ -1234,7 +1219,6 @@ paths:
         required: true
         schema:
           type: string
-          format: hex
     get:
       description: |
         Returns tenant's config description: specific config overrides a tenant has
@@ -1340,7 +1324,6 @@ components:
           properties:
             new_tenant_id:
               type: string
-              format: hex
             generation:
               type: integer
               description: Attachment generation number.
@@ -1369,7 +1352,6 @@ components:
           properties:
             tenant_id:
               type: string
-              format: hex
     TenantLocationConfigRequest:
       type: object
       required:
@@ -1377,7 +1359,6 @@ components:
       properties:
         tenant_id:
           type: string
-          format: hex
         mode:
           type: string
           enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
@@ -1424,6 +1405,8 @@ components:
           type: integer
         trace_read_requests:
           type: boolean
+        heatmap_period:
+          type: integer
     TenantConfigResponse:
       type: object
       properties:
@@ -1446,7 +1429,6 @@ components:
           format: hex
         tenant_id:
           type: string
-          format: hex
         last_record_lsn:
           type: string
           format: hex
diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs
index 9e41d912c2..e641e44b08 100644
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -14,6 +14,7 @@ use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
+use pageserver_api::models::TenantDetails;
 use pageserver_api::models::{
     DownloadRemoteLayersTaskSpawnRequest, LocationConfigMode, TenantAttachRequest,
     TenantLoadRequest, TenantLocationConfigRequest,
@@ -28,20 +29,18 @@ use utils::http::endpoint::request_span;
 use utils::http::json::json_request_or_empty_body;
 use utils::http::request::{get_request_param, must_get_query_param, parse_query_param};
 
-use super::models::{
-    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
-    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
-};
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
 use crate::metrics::{StorageTimeOperation, STORAGE_TIME_GLOBAL};
 use crate::pgdatadir_mapping::LsnForTimestamp;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::{LocationConf, TenantConfOpt};
+use crate::tenant::mgr::GetActiveTenantError;
 use crate::tenant::mgr::{
     GetTenantError, SetNewTenantConfigError, TenantManager, TenantMapError, TenantMapInsertError,
     TenantSlotError, TenantSlotUpsertError, TenantStateError,
 };
+use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
 use crate::tenant::timeline::CompactFlags;
@@ -49,6 +48,10 @@ use crate::tenant::timeline::Timeline;
 use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError, TenantSharedResources};
 use crate::{config::PageServerConf, tenant::mgr};
 use crate::{disk_usage_eviction_task, tenant};
+use pageserver_api::models::{
+    StatusResponse, TenantConfigRequest, TenantCreateRequest, TenantCreateResponse, TenantInfo,
+    TimelineCreateRequest, TimelineGcRequest, TimelineInfo,
+};
 use utils::{
     auth::SwappableJwtAuth,
     generation::Generation,
@@ -64,7 +67,12 @@ use utils::{
 };
 
 // Imports only used for testing APIs
-use super::models::ConfigureFailpointsRequest;
+use pageserver_api::models::ConfigureFailpointsRequest;
+
+// For APIs that require an Active tenant, how long should we block waiting for that state?
+// This is not functionally necessary (clients will retry), but avoids generating a lot of
+// failed API calls while tenants are activating.
+const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(5000);
 
 pub struct State {
     conf: &'static PageServerConf,
@@ -75,9 +83,11 @@ pub struct State {
     broker_client: storage_broker::BrokerClientChannel,
     disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
     deletion_queue_client: DeletionQueueClient,
+    secondary_controller: SecondaryController,
 }
 
 impl State {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
         conf: &'static PageServerConf,
         tenant_manager: Arc<TenantManager>,
@@ -86,6 +96,7 @@ impl State {
         broker_client: storage_broker::BrokerClientChannel,
         disk_usage_eviction_state: Arc<disk_usage_eviction_task::State>,
         deletion_queue_client: DeletionQueueClient,
+        secondary_controller: SecondaryController,
     ) -> anyhow::Result<Self> {
         let allowlist_routes = ["/v1/status", "/v1/doc", "/swagger.yml", "/metrics"]
             .iter()
@@ -100,6 +111,7 @@ impl State {
             broker_client,
             disk_usage_eviction_state,
             deletion_queue_client,
+            secondary_controller,
         })
     }
 
@@ -136,11 +148,6 @@ impl From<PageReconstructError> for ApiError {
     fn from(pre: PageReconstructError) -> ApiError {
         match pre {
             PageReconstructError::Other(pre) => ApiError::InternalServerError(pre),
-            PageReconstructError::NeedsDownload(_, _) => {
-                // This shouldn't happen, because we use a RequestContext that requests to
-                // download any missing layer files on-demand.
-                ApiError::InternalServerError(anyhow::anyhow!("need to download remote layer file"))
-            }
             PageReconstructError::Cancelled => {
                 ApiError::InternalServerError(anyhow::anyhow!("request was cancelled"))
             }
@@ -233,6 +240,19 @@ impl From<GetTenantError> for ApiError {
     }
 }
 
+impl From<GetActiveTenantError> for ApiError {
+    fn from(e: GetActiveTenantError) -> ApiError {
+        match e {
+            GetActiveTenantError::WillNotBecomeActive(_) => ApiError::Conflict(format!("{}", e)),
+            GetActiveTenantError::Cancelled => ApiError::ShuttingDown,
+            GetActiveTenantError::NotFound(gte) => gte.into(),
+            GetActiveTenantError::WaitForActiveTimeout { .. } => {
+                ApiError::ResourceUnavailable(format!("{}", e).into())
+            }
+        }
+    }
+}
+
 impl From<SetNewTenantConfigError> for ApiError {
     fn from(e: SetNewTenantConfigError) -> ApiError {
         match e {
@@ -319,6 +339,7 @@ async fn build_timeline_info_common(
     ctx: &RequestContext,
 ) -> anyhow::Result<TimelineInfo> {
     crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id();
+    let initdb_lsn = timeline.initdb_lsn;
     let last_record_lsn = timeline.get_last_record_lsn();
     let (wal_source_connstr, last_received_msg_lsn, last_received_msg_ts) = {
         let guard = timeline.last_received_wal.lock().unwrap();
@@ -352,14 +373,14 @@ async fn build_timeline_info_common(
     let walreceiver_status = timeline.walreceiver_status();
 
     let info = TimelineInfo {
-        // TODO(sharding): add a shard_id field, or make tenant_id into a tenant_shard_id
-        tenant_id: timeline.tenant_shard_id.tenant_id,
+        tenant_id: timeline.tenant_shard_id,
         timeline_id: timeline.timeline_id,
         ancestor_timeline_id,
         ancestor_lsn,
         disk_consistent_lsn: timeline.get_disk_consistent_lsn(),
         remote_consistent_lsn: remote_consistent_lsn_projected,
         remote_consistent_lsn_visible,
+        initdb_lsn,
         last_record_lsn,
         prev_record_lsn: Some(timeline.get_prev_record_lsn()),
         latest_gc_cutoff_lsn: *timeline.get_latest_gc_cutoff_lsn(),
@@ -434,7 +455,10 @@ async fn timeline_create_handler(
     let state = get_state(&request);
 
     async {
-        let tenant = state.tenant_manager.get_attached_tenant_shard(tenant_shard_id, true)?;
+        let tenant = state.tenant_manager.get_attached_tenant_shard(tenant_shard_id, false)?;
+
+        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
+
         match tenant.create_timeline(
             new_timeline_id,
             request_data.ancestor_timeline_id.map(TimelineId::from),
@@ -452,7 +476,7 @@ async fn timeline_create_handler(
                     .map_err(ApiError::InternalServerError)?;
                 json_response(StatusCode::CREATED, timeline_info)
             }
-            Err(tenant::CreateTimelineError::AlreadyExists) => {
+            Err(tenant::CreateTimelineError::Conflict | tenant::CreateTimelineError::AlreadyCreating) => {
                 json_response(StatusCode::CONFLICT, ())
             }
             Err(tenant::CreateTimelineError::AncestorLsn(err)) => {
@@ -480,15 +504,15 @@ async fn timeline_list_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let include_non_incremental_logical_size: Option<bool> =
         parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
 
     let response_data = async {
-        let tenant = mgr::get_tenant(tenant_id, true)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
         let timelines = tenant.list_timelines();
 
         let mut response_data = Vec::with_capacity(timelines.len());
@@ -507,7 +531,9 @@ async fn timeline_list_handler(
         }
         Ok::<Vec<TimelineInfo>, ApiError>(response_data)
     }
-    .instrument(info_span!("timeline_list", %tenant_id))
+    .instrument(info_span!("timeline_list",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug()))
     .await?;
 
     json_response(StatusCode::OK, response_data)
@@ -517,17 +543,17 @@ async fn timeline_detail_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let include_non_incremental_logical_size: Option<bool> =
         parse_query_param(&request, "include-non-incremental-logical-size")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     // Logical size calculation needs downloading.
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
 
     let timeline_info = async {
-        let tenant = mgr::get_tenant(tenant_id, true)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, true)?;
 
         let timeline = tenant
             .get_timeline(timeline_id, false)
@@ -544,7 +570,10 @@ async fn timeline_detail_handler(
 
         Ok::<_, ApiError>(timeline_info)
     }
-    .instrument(info_span!("timeline_detail", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_detail",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug(),
+                %timeline_id))
     .await?;
 
     json_response(StatusCode::OK, timeline_info)
@@ -554,10 +583,15 @@ async fn get_lsn_by_timestamp_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let version: Option<u8> = parse_query_param(&request, "version")?;
+    if !tenant_shard_id.is_zero() {
+        // Requires SLRU contents, which are only stored on shard zero
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let timestamp_raw = must_get_query_param(&request, "timestamp")?;
@@ -567,43 +601,37 @@ async fn get_lsn_by_timestamp_handler(
     let timestamp_pg = postgres_ffi::to_pg_timestamp(timestamp);
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let result = timeline
         .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx)
         .await?;
-
-    if version.unwrap_or(0) > 1 {
-        #[derive(serde::Serialize)]
-        struct Result {
-            lsn: Lsn,
-            kind: &'static str,
-        }
-        let (lsn, kind) = match result {
-            LsnForTimestamp::Present(lsn) => (lsn, "present"),
-            LsnForTimestamp::Future(lsn) => (lsn, "future"),
-            LsnForTimestamp::Past(lsn) => (lsn, "past"),
-            LsnForTimestamp::NoData(lsn) => (lsn, "nodata"),
-        };
-        json_response(StatusCode::OK, Result { lsn, kind })
-    } else {
-        // FIXME: this is a temporary crutch not to break backwards compatibility
-        // See https://github.com/neondatabase/neon/pull/5608
-        let result = match result {
-            LsnForTimestamp::Present(lsn) => format!("{lsn}"),
-            LsnForTimestamp::Future(_lsn) => "future".into(),
-            LsnForTimestamp::Past(_lsn) => "past".into(),
-            LsnForTimestamp::NoData(_lsn) => "nodata".into(),
-        };
-        json_response(StatusCode::OK, result)
+    #[derive(serde::Serialize)]
+    struct Result {
+        lsn: Lsn,
+        kind: &'static str,
     }
+    let (lsn, kind) = match result {
+        LsnForTimestamp::Present(lsn) => (lsn, "present"),
+        LsnForTimestamp::Future(lsn) => (lsn, "future"),
+        LsnForTimestamp::Past(lsn) => (lsn, "past"),
+        LsnForTimestamp::NoData(lsn) => (lsn, "nodata"),
+    };
+    json_response(StatusCode::OK, Result { lsn, kind })
 }
 
 async fn get_timestamp_of_lsn_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
+
+    if !tenant_shard_id.is_zero() {
+        // Requires SLRU contents, which are only stored on shard zero
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
 
@@ -613,7 +641,7 @@ async fn get_timestamp_of_lsn_handler(
         .map_err(ApiError::BadRequest)?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?;
 
     match result {
@@ -674,11 +702,23 @@ async fn timeline_delete_handler(
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Warn);
     let state = get_state(&request);
 
-    state.tenant_manager.delete_timeline(tenant_shard_id, timeline_id, &ctx)
-        .instrument(info_span!("timeline_delete", tenant_id=%tenant_shard_id.tenant_id, shard=%tenant_shard_id.shard_slug(), %timeline_id))
+    let tenant = state
+        .tenant_manager
+        .get_attached_tenant_shard(tenant_shard_id, false)
+        .map_err(|e| {
+            match e {
+                // GetTenantError has a built-in conversion to ApiError, but in this context we don't
+                // want to treat missing tenants as 404, to avoid ambiguity with successful deletions.
+                GetTenantError::NotFound(_) => ApiError::PreconditionFailed(
+                    "Requested tenant is missing".to_string().into_boxed_str(),
+                ),
+                e => e.into(),
+            }
+        })?;
+    tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
+    tenant.delete_timeline(timeline_id).instrument(info_span!("timeline_delete", tenant_id=%tenant_shard_id.tenant_id, shard=%tenant_shard_id.shard_slug(), %timeline_id))
         .await?;
 
     json_response(StatusCode::ACCEPTED, ())
@@ -805,11 +845,11 @@ async fn tenant_status(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let tenant_info = async {
-        let tenant = mgr::get_tenant(tenant_id, false)?;
+        let tenant = mgr::get_tenant(tenant_shard_id, false)?;
 
         // Calculate total physical size of all timelines
         let mut current_physical_size = 0;
@@ -818,14 +858,19 @@ async fn tenant_status(
         }
 
         let state = tenant.current_state();
-        Result::<_, ApiError>::Ok(TenantInfo {
-            id: tenant_id,
-            state: state.clone(),
-            current_physical_size: Some(current_physical_size),
-            attachment_status: state.attachment_status(),
+        Result::<_, ApiError>::Ok(TenantDetails {
+            tenant_info: TenantInfo {
+                id: tenant_shard_id,
+                state: state.clone(),
+                current_physical_size: Some(current_physical_size),
+                attachment_status: state.attachment_status(),
+            },
+            timelines: tenant.list_timeline_ids(),
         })
     }
-    .instrument(info_span!("tenant_status_handler", %tenant_id))
+    .instrument(info_span!("tenant_status_handler",
+                tenant_id = %tenant_shard_id.tenant_id,
+                shard_id = %tenant_shard_id.shard_slug()))
     .await?;
 
     json_response(StatusCode::OK, tenant_info)
@@ -868,14 +913,20 @@ async fn tenant_size_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let inputs_only: Option<bool> = parse_query_param(&request, "inputs_only")?;
     let retention_period: Option<u64> = parse_query_param(&request, "retention_period")?;
     let headers = request.headers();
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let tenant = mgr::get_tenant(tenant_id, true)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
+
+    if !tenant_shard_id.is_zero() {
+        return Err(ApiError::BadRequest(anyhow!(
+            "Size calculations are only available on shard zero"
+        )));
+    }
 
     // this can be long operation
     let inputs = tenant
@@ -927,7 +978,7 @@ async fn tenant_size_handler(
     json_response(
         StatusCode::OK,
         TenantHistorySize {
-            id: tenant_id,
+            id: tenant_shard_id.tenant_id,
             size: sizes.as_ref().map(|x| x.total_size),
             segment_sizes: sizes.map(|x| x.segments),
             inputs,
@@ -939,14 +990,14 @@ async fn layer_map_info_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let reset: LayerAccessStatsReset =
         parse_query_param(&request, "reset")?.unwrap_or(LayerAccessStatsReset::NoReset);
 
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let layer_map_info = timeline.layer_map_info(reset).await;
 
     json_response(StatusCode::OK, layer_map_info)
@@ -956,13 +1007,12 @@ async fn layer_download_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let layer_file_name = get_request_param(&request, "layer_file_name")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let downloaded = timeline
         .download_layer(layer_file_name)
         .await
@@ -973,7 +1023,7 @@ async fn layer_download_handler(
         Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
         None => json_response(
             StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
         ),
     }
 }
@@ -982,12 +1032,12 @@ async fn evict_timeline_layer_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let layer_file_name = get_request_param(&request, "layer_file_name")?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let evicted = timeline
         .evict_layer(layer_file_name)
         .await
@@ -998,7 +1048,7 @@ async fn evict_timeline_layer_handler(
         Some(false) => json_response(StatusCode::NOT_MODIFIED, ()),
         None => json_response(
             StatusCode::BAD_REQUEST,
-            format!("Layer {tenant_id}/{timeline_id}/{layer_file_name} not found"),
+            format!("Layer {tenant_shard_id}/{timeline_id}/{layer_file_name} not found"),
         ),
     }
 }
@@ -1109,7 +1159,10 @@ async fn tenant_create_handler(
 
     // We created the tenant. Existing API semantics are that the tenant
     // is Active when this function returns.
-    if let res @ Err(_) = new_tenant.wait_to_become_active().await {
+    if let res @ Err(_) = new_tenant
+        .wait_to_become_active(ACTIVE_TENANT_TIMEOUT)
+        .await
+    {
         // This shouldn't happen because we just created the tenant directory
         // in tenant::mgr::create_tenant, and there aren't any remote timelines
         // to load, so, nothing can really fail during load.
@@ -1130,10 +1183,10 @@ async fn get_tenant_config_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let tenant = mgr::get_tenant(tenant_id, false)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, false)?;
 
     let response = HashMap::from([
         (
@@ -1227,9 +1280,9 @@ async fn handle_tenant_break(
     r: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&r, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&r, "tenant_shard_id")?;
 
-    let tenant = crate::tenant::mgr::get_tenant(tenant_id, true)
+    let tenant = crate::tenant::mgr::get_tenant(tenant_shard_id, true)
         .map_err(|_| ApiError::Conflict(String::from("no active tenant found")))?;
 
     tenant.set_broken("broken from test".to_owned()).await;
@@ -1270,14 +1323,15 @@ async fn timeline_gc_handler(
     mut request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let gc_req: TimelineGcRequest = json_request(&mut request).await?;
 
     let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-    let wait_task_done = mgr::immediate_gc(tenant_id, timeline_id, gc_req, cancel, &ctx).await?;
+    let wait_task_done =
+        mgr::immediate_gc(tenant_shard_id, timeline_id, gc_req, cancel, &ctx).await?;
     let gc_result = wait_task_done
         .await
         .context("wait for gc task")
@@ -1292,9 +1346,9 @@ async fn timeline_compact_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let mut flags = EnumSet::empty();
     if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1302,14 +1356,14 @@ async fn timeline_compact_handler(
     }
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         timeline
             .compact(&cancel, flags, &ctx)
             .await
             .map_err(|e| ApiError::InternalServerError(e.into()))?;
         json_response(StatusCode::OK, ())
     }
-    .instrument(info_span!("manual_compaction", %tenant_id, %timeline_id))
+    .instrument(info_span!("manual_compaction", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1318,9 +1372,9 @@ async fn timeline_checkpoint_handler(
     request: Request<Body>,
     cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let mut flags = EnumSet::empty();
     if Some(true) == parse_query_param::<_, bool>(&request, "force_repartition")? {
@@ -1328,7 +1382,7 @@ async fn timeline_checkpoint_handler(
     }
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         timeline
             .freeze_and_flush()
             .await
@@ -1340,7 +1394,7 @@ async fn timeline_checkpoint_handler(
 
         json_response(StatusCode::OK, ())
     }
-    .instrument(info_span!("manual_checkpoint", %tenant_id, %timeline_id))
+    .instrument(info_span!("manual_checkpoint", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1348,12 +1402,12 @@ async fn timeline_download_remote_layers_handler_post(
     mut request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
     let body: DownloadRemoteLayersTaskSpawnRequest = json_request(&mut request).await?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     match timeline.spawn_download_all_remote_layers(body).await {
         Ok(st) => json_response(StatusCode::ACCEPTED, st),
         Err(st) => json_response(StatusCode::CONFLICT, st),
@@ -1364,11 +1418,11 @@ async fn timeline_download_remote_layers_handler_get(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
 
-    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+    let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
     let info = timeline
         .get_download_all_remote_layers_task_info()
         .context("task never started since last pageserver process start")
@@ -1414,9 +1468,9 @@ async fn getpage_at_lsn_handler(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     struct Key(crate::repository::Key);
 
@@ -1435,7 +1489,7 @@ async fn getpage_at_lsn_handler(
 
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
 
         let page = timeline.get(key.0, lsn, &ctx).await?;
 
@@ -1447,7 +1501,7 @@ async fn getpage_at_lsn_handler(
                 .unwrap(),
         )
     }
-    .instrument(info_span!("timeline_get", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
@@ -1455,95 +1509,34 @@ async fn timeline_collect_keyspace(
     request: Request<Body>,
     _cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
-    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
     let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-    check_permission(&request, Some(tenant_id))?;
-
-    struct Partitioning {
-        keys: crate::keyspace::KeySpace,
-
-        at_lsn: Lsn,
-    }
-
-    impl serde::Serialize for Partitioning {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeMap;
-            let mut map = serializer.serialize_map(Some(2))?;
-            map.serialize_key("keys")?;
-            map.serialize_value(&KeySpace(&self.keys))?;
-            map.serialize_key("at_lsn")?;
-            map.serialize_value(&WithDisplay(&self.at_lsn))?;
-            map.end()
-        }
-    }
-
-    struct WithDisplay<'a, T>(&'a T);
-
-    impl<'a, T: std::fmt::Display> serde::Serialize for WithDisplay<'a, T> {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            serializer.collect_str(&self.0)
-        }
-    }
-
-    struct KeySpace<'a>(&'a crate::keyspace::KeySpace);
-
-    impl<'a> serde::Serialize for KeySpace<'a> {
-        fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeSeq;
-            let mut seq = serializer.serialize_seq(Some(self.0.ranges.len()))?;
-            for kr in &self.0.ranges {
-                seq.serialize_element(&KeyRange(kr))?;
-            }
-            seq.end()
-        }
-    }
-
-    struct KeyRange<'a>(&'a std::ops::Range<crate::repository::Key>);
-
-    impl<'a> serde::Serialize for KeyRange<'a> {
-        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-        where
-            S: serde::Serializer,
-        {
-            use serde::ser::SerializeTuple;
-            let mut t = serializer.serialize_tuple(2)?;
-            t.serialize_element(&WithDisplay(&self.0.start))?;
-            t.serialize_element(&WithDisplay(&self.0.end))?;
-            t.end()
-        }
-    }
+    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
 
     let at_lsn: Option<Lsn> = parse_query_param(&request, "at_lsn")?;
 
     async {
         let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
-        let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
+        let timeline = active_timeline_of_active_tenant(tenant_shard_id, timeline_id).await?;
         let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
         let keys = timeline
             .collect_keyspace(at_lsn, &ctx)
             .await
             .map_err(|e| ApiError::InternalServerError(e.into()))?;
 
-        json_response(StatusCode::OK, Partitioning { keys, at_lsn })
+        let res = pageserver_api::models::partitioning::Partitioning { keys, at_lsn };
+
+        json_response(StatusCode::OK, res)
     }
-    .instrument(info_span!("timeline_collect_keyspace", %tenant_id, %timeline_id))
+    .instrument(info_span!("timeline_collect_keyspace", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
     .await
 }
 
 async fn active_timeline_of_active_tenant(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
 ) -> Result<Arc<Timeline>, ApiError> {
-    let tenant = mgr::get_tenant(tenant_id, true)?;
+    let tenant = mgr::get_tenant(tenant_shard_id, true)?;
     tenant
         .get_timeline(timeline_id, true)
         .map_err(|e| ApiError::NotFound(e.into()))
@@ -1565,7 +1558,7 @@ async fn always_panic_handler(
 
 async fn disk_usage_eviction_run(
     mut r: Request<Body>,
-    _cancel: CancellationToken,
+    cancel: CancellationToken,
 ) -> Result<Response<Body>, ApiError> {
     check_permission(&r, None)?;
 
@@ -1593,57 +1586,48 @@ async fn disk_usage_eviction_run(
         }
     }
 
-    let config = json_request::<Config>(&mut r)
-        .await
-        .map_err(|_| ApiError::BadRequest(anyhow::anyhow!("invalid JSON body")))?;
+    let config = json_request::<Config>(&mut r).await?;
 
     let usage = Usage {
         config,
         freed_bytes: 0,
     };
 
-    let (tx, rx) = tokio::sync::oneshot::channel();
-
     let state = get_state(&r);
 
-    if state.remote_storage.as_ref().is_none() {
+    let Some(storage) = state.remote_storage.as_ref() else {
         return Err(ApiError::InternalServerError(anyhow::anyhow!(
             "remote storage not configured, cannot run eviction iteration"
         )));
-    }
+    };
 
     let state = state.disk_usage_eviction_state.clone();
 
-    let cancel = CancellationToken::new();
-    let child_cancel = cancel.clone();
-    let _g = cancel.drop_guard();
+    let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
+        &state, storage, usage, &cancel,
+    )
+    .await;
 
-    crate::task_mgr::spawn(
-        crate::task_mgr::BACKGROUND_RUNTIME.handle(),
-        TaskKind::DiskUsageEviction,
-        None,
-        None,
-        "ondemand disk usage eviction",
-        false,
-        async move {
-            let res = crate::disk_usage_eviction_task::disk_usage_eviction_task_iteration_impl(
-                &state,
-                usage,
-                &child_cancel,
-            )
-            .await;
+    info!(?res, "disk_usage_eviction_task_iteration_impl finished");
 
-            info!(?res, "disk_usage_eviction_task_iteration_impl finished");
+    let res = res.map_err(ApiError::InternalServerError)?;
 
-            let _ = tx.send(res);
-            Ok(())
-        }
-        .in_current_span(),
-    );
+    json_response(StatusCode::OK, res)
+}
 
-    let response = rx.await.unwrap().map_err(ApiError::InternalServerError)?;
+async fn secondary_upload_handler(
+    request: Request<Body>,
+    _cancel: CancellationToken,
+) -> Result<Response<Body>, ApiError> {
+    let state = get_state(&request);
+    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
+    state
+        .secondary_controller
+        .upload_tenant(tenant_shard_id)
+        .await
+        .map_err(ApiError::InternalServerError)?;
 
-    json_response(StatusCode::OK, response)
+    json_response(StatusCode::OK, ())
 }
 
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
@@ -1820,23 +1804,25 @@ pub fn make_router(
         })
         .get("/v1/tenant", |r| api_handler(r, tenant_list_handler))
         .post("/v1/tenant", |r| api_handler(r, tenant_create_handler))
-        .get("/v1/tenant/:tenant_id", |r| api_handler(r, tenant_status))
+        .get("/v1/tenant/:tenant_shard_id", |r| {
+            api_handler(r, tenant_status)
+        })
         .delete("/v1/tenant/:tenant_shard_id", |r| {
             api_handler(r, tenant_delete_handler)
         })
-        .get("/v1/tenant/:tenant_id/synthetic_size", |r| {
+        .get("/v1/tenant/:tenant_shard_id/synthetic_size", |r| {
             api_handler(r, tenant_size_handler)
         })
         .put("/v1/tenant/config", |r| {
             api_handler(r, update_tenant_config_handler)
         })
-        .get("/v1/tenant/:tenant_id/config", |r| {
+        .get("/v1/tenant/:tenant_shard_id/config", |r| {
             api_handler(r, get_tenant_config_handler)
         })
         .put("/v1/tenant/:tenant_shard_id/location_config", |r| {
             api_handler(r, put_tenant_location_config_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline", |r| {
+        .get("/v1/tenant/:tenant_shard_id/timeline", |r| {
             api_handler(r, timeline_list_handler)
         })
         .post("/v1/tenant/:tenant_shard_id/timeline", |r| {
@@ -1857,67 +1843,74 @@ pub fn make_router(
         .post("/v1/tenant/:tenant_id/ignore", |r| {
             api_handler(r, tenant_ignore_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id", |r| {
+        .get("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
             api_handler(r, timeline_detail_handler)
         })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_lsn_by_timestamp",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_lsn_by_timestamp",
             |r| api_handler(r, get_lsn_by_timestamp_handler),
         )
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/get_timestamp_of_lsn",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn",
             |r| api_handler(r, get_timestamp_of_lsn_handler),
         )
-        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/do_gc", |r| {
-            api_handler(r, timeline_gc_handler)
-        })
-        .put("/v1/tenant/:tenant_id/timeline/:timeline_id/compact", |r| {
-            testing_api_handler("run timeline compaction", r, timeline_compact_handler)
-        })
         .put(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/checkpoint",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/do_gc",
+            |r| api_handler(r, timeline_gc_handler),
+        )
+        .put(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/compact",
+            |r| testing_api_handler("run timeline compaction", r, timeline_compact_handler),
+        )
+        .put(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/checkpoint",
             |r| testing_api_handler("run timeline checkpoint", r, timeline_checkpoint_handler),
         )
         .post(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
             |r| api_handler(r, timeline_download_remote_layers_handler_post),
         )
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/download_remote_layers",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
             |r| api_handler(r, timeline_download_remote_layers_handler_get),
         )
         .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
             api_handler(r, timeline_delete_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/layer", |r| {
-            api_handler(r, layer_map_info_handler)
-        })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer",
+            |r| api_handler(r, layer_map_info_handler),
+        )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
             |r| api_handler(r, layer_download_handler),
         )
         .delete(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/layer/:layer_file_name",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/layer/:layer_file_name",
             |r| api_handler(r, evict_timeline_layer_handler),
         )
+        .post("/v1/tenant/:tenant_shard_id/heatmap_upload", |r| {
+            api_handler(r, secondary_upload_handler)
+        })
         .put("/v1/disk_usage_eviction/run", |r| {
             api_handler(r, disk_usage_eviction_run)
         })
         .put("/v1/deletion_queue/flush", |r| {
             api_handler(r, deletion_queue_flush)
         })
-        .put("/v1/tenant/:tenant_id/break", |r| {
+        .put("/v1/tenant/:tenant_shard_id/break", |r| {
             testing_api_handler("set tenant state to broken", r, handle_tenant_break)
         })
         .get("/v1/panic", |r| api_handler(r, always_panic_handler))
         .post("/v1/tracing/event", |r| {
             testing_api_handler("emit a tracing event", r, post_tracing_event_handler)
         })
-        .get("/v1/tenant/:tenant_id/timeline/:timeline_id/getpage", |r| {
-            testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler)
-        })
         .get(
-            "/v1/tenant/:tenant_id/timeline/:timeline_id/keyspace",
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
+            |r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
+        )
+        .get(
+            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
             |r| testing_api_handler("read out the keyspace", r, timeline_collect_keyspace),
         )
         .any(handler_404))
diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs
index 797cb6f944..58adf6e8c4 100644
--- a/pageserver/src/lib.rs
+++ b/pageserver/src/lib.rs
@@ -10,7 +10,7 @@ pub mod deletion_queue;
 pub mod disk_usage_eviction_task;
 pub mod http;
 pub mod import_datadir;
-pub mod keyspace;
+pub use pageserver_api::keyspace;
 pub mod metrics;
 pub mod page_cache;
 pub mod page_service;
diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs
index f2a5d01902..f2bf65da24 100644
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -2,9 +2,10 @@ use enum_map::EnumMap;
 use metrics::metric_vec_duration::DurationResultObserver;
 use metrics::{
     register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
-    register_int_counter, register_int_counter_vec, register_int_gauge, register_int_gauge_vec,
-    register_uint_gauge, register_uint_gauge_vec, Counter, CounterVec, GaugeVec, Histogram,
-    HistogramVec, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
+    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
+    register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
+    Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPairVec,
+    IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
 };
 use once_cell::sync::Lazy;
 use pageserver_api::shard::TenantShardId;
@@ -521,14 +522,18 @@ pub(crate) mod initial_logical_size {
     impl StartCalculation {
         pub(crate) fn first(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
             let circumstances_label: &'static str = circumstances.into();
-            self.0.with_label_values(&["first", circumstances_label]);
+            self.0
+                .with_label_values(&["first", circumstances_label])
+                .inc();
             OngoingCalculationGuard {
                 inc_drop_calculation: Some(DROP_CALCULATION.first.clone()),
             }
         }
         pub(crate) fn retry(&self, circumstances: StartCircumstances) -> OngoingCalculationGuard {
             let circumstances_label: &'static str = circumstances.into();
-            self.0.with_label_values(&["retry", circumstances_label]);
+            self.0
+                .with_label_values(&["retry", circumstances_label])
+                .inc();
             OngoingCalculationGuard {
                 inc_drop_calculation: Some(DROP_CALCULATION.retry.clone()),
             }
@@ -650,7 +655,7 @@ static EVICTIONS_WITH_LOW_RESIDENCE_DURATION: Lazy<IntCounterVec> = Lazy::new(||
         "pageserver_evictions_with_low_residence_duration",
         "If a layer is evicted that was resident for less than `low_threshold`, it is counted to this counter. \
          Residence duration is determined using the `residence_duration_data_source`.",
-        &["tenant_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
+        &["tenant_id", "shard_id", "timeline_id", "residence_duration_data_source", "low_threshold_secs"]
     )
     .expect("failed to define a metric")
 });
@@ -683,14 +688,54 @@ pub static STARTUP_IS_LOADING: Lazy<UIntGauge> = Lazy::new(|| {
     .expect("Failed to register pageserver_startup_is_loading")
 });
 
-/// How long did tenants take to go from construction to active state?
-pub(crate) static TENANT_ACTIVATION: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
+/// Metrics related to the lifecycle of a [`crate::tenant::Tenant`] object: things
+/// like how long it took to load.
+///
+/// Note that these are process-global metrics, _not_ per-tenant metrics.  Per-tenant
+/// metrics are rather expensive, and usually fine grained stuff makes more sense
+/// at a timeline level than tenant level.
+pub(crate) struct TenantMetrics {
+    /// How long did tenants take to go from construction to active state?
+    pub(crate) activation: Histogram,
+    pub(crate) preload: Histogram,
+    pub(crate) attach: Histogram,
+
+    /// How many tenants are included in the initial startup of the pagesrever?
+    pub(crate) startup_scheduled: IntCounter,
+    pub(crate) startup_complete: IntCounter,
+}
+
+pub(crate) static TENANT: Lazy<TenantMetrics> = Lazy::new(|| {
+    TenantMetrics {
+    activation: register_histogram!(
         "pageserver_tenant_activation_seconds",
         "Time taken by tenants to activate, in seconds",
         CRITICAL_OP_BUCKETS.into()
     )
-    .expect("Failed to register pageserver_tenant_activation_seconds metric")
+    .expect("Failed to register metric"),
+    preload: register_histogram!(
+        "pageserver_tenant_preload_seconds",
+        "Time taken by tenants to load remote metadata on startup/attach, in seconds",
+        CRITICAL_OP_BUCKETS.into()
+    )
+    .expect("Failed to register metric"),
+    attach: register_histogram!(
+        "pageserver_tenant_attach_seconds",
+        "Time taken by tenants to intialize, after remote metadata is already loaded",
+        CRITICAL_OP_BUCKETS.into()
+    )
+    .expect("Failed to register metric"),
+    startup_scheduled: register_int_counter!(
+        "pageserver_tenant_startup_scheduled",
+        "Number of tenants included in pageserver startup (doesn't count tenants attached later)"
+    ).expect("Failed to register metric"),
+    startup_complete: register_int_counter!(
+        "pageserver_tenant_startup_complete",
+        "Number of tenants that have completed warm-up, or activated on-demand during initial startup: \
+         should eventually reach `pageserver_tenant_startup_scheduled_total`.  Does not include broken \
+         tenants: such cases will lead to this metric never reaching the scheduled count."
+    ).expect("Failed to register metric"),
+}
 });
 
 /// Each `Timeline`'s  [`EVICTIONS_WITH_LOW_RESIDENCE_DURATION`] metric.
@@ -714,10 +759,16 @@ impl EvictionsWithLowResidenceDurationBuilder {
         }
     }
 
-    fn build(&self, tenant_id: &str, timeline_id: &str) -> EvictionsWithLowResidenceDuration {
+    fn build(
+        &self,
+        tenant_id: &str,
+        shard_id: &str,
+        timeline_id: &str,
+    ) -> EvictionsWithLowResidenceDuration {
         let counter = EVICTIONS_WITH_LOW_RESIDENCE_DURATION
             .get_metric_with_label_values(&[
                 tenant_id,
+                shard_id,
                 timeline_id,
                 self.data_source,
                 &EvictionsWithLowResidenceDuration::threshold_label_value(self.threshold),
@@ -748,21 +799,24 @@ impl EvictionsWithLowResidenceDuration {
     pub fn change_threshold(
         &mut self,
         tenant_id: &str,
+        shard_id: &str,
         timeline_id: &str,
         new_threshold: Duration,
     ) {
         if new_threshold == self.threshold {
             return;
         }
-        let mut with_new =
-            EvictionsWithLowResidenceDurationBuilder::new(self.data_source, new_threshold)
-                .build(tenant_id, timeline_id);
+        let mut with_new = EvictionsWithLowResidenceDurationBuilder::new(
+            self.data_source,
+            new_threshold,
+        )
+        .build(tenant_id, shard_id, timeline_id);
         std::mem::swap(self, &mut with_new);
-        with_new.remove(tenant_id, timeline_id);
+        with_new.remove(tenant_id, shard_id, timeline_id);
     }
 
     // This could be a `Drop` impl, but, we need the `tenant_id` and `timeline_id`.
-    fn remove(&mut self, tenant_id: &str, timeline_id: &str) {
+    fn remove(&mut self, tenant_id: &str, shard_id: &str, timeline_id: &str) {
         let Some(_counter) = self.counter.take() else {
             return;
         };
@@ -771,6 +825,7 @@ impl EvictionsWithLowResidenceDuration {
 
         let removed = EVICTIONS_WITH_LOW_RESIDENCE_DURATION.remove_label_values(&[
             tenant_id,
+            shard_id,
             timeline_id,
             self.data_source,
             &threshold,
@@ -969,12 +1024,62 @@ static SMGR_QUERY_TIME_PER_TENANT_TIMELINE: Lazy<HistogramVec> = Lazy::new(|| {
     .expect("failed to define a metric")
 });
 
+static SMGR_QUERY_TIME_GLOBAL_BUCKETS: Lazy<Vec<f64>> = Lazy::new(|| {
+    [
+        1,
+        10,
+        20,
+        40,
+        60,
+        80,
+        100,
+        200,
+        300,
+        400,
+        500,
+        600,
+        700,
+        800,
+        900,
+        1_000, // 1ms
+        2_000,
+        4_000,
+        6_000,
+        8_000,
+        10_000, // 10ms
+        20_000,
+        40_000,
+        60_000,
+        80_000,
+        100_000,
+        200_000,
+        400_000,
+        600_000,
+        800_000,
+        1_000_000, // 1s
+        2_000_000,
+        4_000_000,
+        6_000_000,
+        8_000_000,
+        10_000_000, // 10s
+        20_000_000,
+        50_000_000,
+        100_000_000,
+        200_000_000,
+        1_000_000_000, // 1000s
+    ]
+    .into_iter()
+    .map(Duration::from_micros)
+    .map(|d| d.as_secs_f64())
+    .collect()
+});
+
 static SMGR_QUERY_TIME_GLOBAL: Lazy<HistogramVec> = Lazy::new(|| {
     register_histogram_vec!(
         "pageserver_smgr_query_seconds_global",
         "Time spent on smgr query handling, aggregated by query type.",
         &["smgr_query_type"],
-        CRITICAL_OP_BUCKETS.into(),
+        SMGR_QUERY_TIME_GLOBAL_BUCKETS.clone(),
     )
     .expect("failed to define a metric")
 });
@@ -1261,6 +1366,28 @@ pub(crate) static WAL_INGEST: Lazy<WalIngestMetrics> = Lazy::new(|| WalIngestMet
     )
     .expect("failed to define a metric"),
 });
+pub(crate) struct SecondaryModeMetrics {
+    pub(crate) upload_heatmap: IntCounter,
+    pub(crate) upload_heatmap_errors: IntCounter,
+    pub(crate) upload_heatmap_duration: Histogram,
+}
+pub(crate) static SECONDARY_MODE: Lazy<SecondaryModeMetrics> = Lazy::new(|| SecondaryModeMetrics {
+    upload_heatmap: register_int_counter!(
+        "pageserver_secondary_upload_heatmap",
+        "Number of heatmaps written to remote storage by attached tenants"
+    )
+    .expect("failed to define a metric"),
+    upload_heatmap_errors: register_int_counter!(
+        "pageserver_secondary_upload_heatmap_errors",
+        "Failures writing heatmap to remote storage"
+    )
+    .expect("failed to define a metric"),
+    upload_heatmap_duration: register_histogram!(
+        "pageserver_secondary_upload_heatmap_duration",
+        "Time to build and upload a heatmap, including any waiting inside the S3 client"
+    )
+    .expect("failed to define a metric"),
+});
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum RemoteOpKind {
@@ -1312,25 +1439,16 @@ pub(crate) static TENANT_TASK_EVENTS: Lazy<IntCounterVec> = Lazy::new(|| {
     .expect("Failed to register tenant_task_events metric")
 });
 
-pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT: Lazy<IntCounterVec> =
-    Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_background_loop_semaphore_wait_start_count",
-            "Counter for background loop concurrency-limiting semaphore acquire calls started",
-            &["task"],
-        )
-        .unwrap()
-    });
-
-pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT: Lazy<IntCounterVec> =
-    Lazy::new(|| {
-        register_int_counter_vec!(
-            "pageserver_background_loop_semaphore_wait_finish_count",
-            "Counter for background loop concurrency-limiting semaphore acquire calls finished",
-            &["task"],
-        )
-        .unwrap()
-    });
+pub(crate) static BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "pageserver_background_loop_semaphore_wait_start_count",
+        "Counter for background loop concurrency-limiting semaphore acquire calls started",
+        "pageserver_background_loop_semaphore_wait_finish_count",
+        "Counter for background loop concurrency-limiting semaphore acquire calls finished",
+        &["task"],
+    )
+    .unwrap()
+});
 
 pub(crate) static BACKGROUND_LOOP_PERIOD_OVERRUN_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
     register_int_counter_vec!(
@@ -1604,6 +1722,7 @@ impl StorageTimeMetrics {
 #[derive(Debug)]
 pub struct TimelineMetrics {
     tenant_id: String,
+    shard_id: String,
     timeline_id: String,
     pub flush_time_histo: StorageTimeMetrics,
     pub compact_time_histo: StorageTimeMetrics,
@@ -1624,11 +1743,12 @@ pub struct TimelineMetrics {
 
 impl TimelineMetrics {
     pub fn new(
-        tenant_id: &TenantId,
+        tenant_shard_id: &TenantShardId,
         timeline_id: &TimelineId,
         evictions_with_low_residence_duration_builder: EvictionsWithLowResidenceDurationBuilder,
     ) -> Self {
-        let tenant_id = tenant_id.to_string();
+        let tenant_id = tenant_shard_id.tenant_id.to_string();
+        let shard_id = format!("{}", tenant_shard_id.shard_slug());
         let timeline_id = timeline_id.to_string();
         let flush_time_histo =
             StorageTimeMetrics::new(StorageTimeOperation::LayerFlush, &tenant_id, &timeline_id);
@@ -1665,11 +1785,12 @@ impl TimelineMetrics {
         let evictions = EVICTIONS
             .get_metric_with_label_values(&[&tenant_id, &timeline_id])
             .unwrap();
-        let evictions_with_low_residence_duration =
-            evictions_with_low_residence_duration_builder.build(&tenant_id, &timeline_id);
+        let evictions_with_low_residence_duration = evictions_with_low_residence_duration_builder
+            .build(&tenant_id, &shard_id, &timeline_id);
 
         TimelineMetrics {
             tenant_id,
+            shard_id,
             timeline_id,
             flush_time_histo,
             compact_time_histo,
@@ -1715,6 +1836,7 @@ impl Drop for TimelineMetrics {
     fn drop(&mut self) {
         let tenant_id = &self.tenant_id;
         let timeline_id = &self.timeline_id;
+        let shard_id = &self.shard_id;
         let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, timeline_id]);
         {
             RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());
@@ -1728,7 +1850,7 @@ impl Drop for TimelineMetrics {
         self.evictions_with_low_residence_duration
             .write()
             .unwrap()
-            .remove(tenant_id, timeline_id);
+            .remove(tenant_id, shard_id, timeline_id);
 
         // The following metrics are born outside of the TimelineMetrics lifecycle but still
         // removed at the end of it. The idea is to have the metrics outlive the
@@ -2186,6 +2308,9 @@ pub fn preinitialize_metrics() {
     // Deletion queue stats
     Lazy::force(&DELETION_QUEUE);
 
+    // Tenant stats
+    Lazy::force(&TENANT);
+
     // Tenant manager stats
     Lazy::force(&TENANT_MANAGER);
 
diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs
index dbd85d2dcf..c3c98af406 100644
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -28,7 +28,7 @@
 //! Page cache maps from a cache key to a buffer slot.
 //! The cache key uniquely identifies the piece of data that is being cached.
 //!
-//! The cache key for **materialized pages** is  [`TenantId`], [`TimelineId`], [`Key`], and [`Lsn`].
+//! The cache key for **materialized pages** is  [`TenantShardId`], [`TimelineId`], [`Key`], and [`Lsn`].
 //! Use [`PageCache::memorize_materialized_page`] and [`PageCache::lookup_materialized_page`] for fill & access.
 //!
 //! The cache key for **immutable file** pages is [`FileId`] and a block number.
@@ -83,10 +83,8 @@ use std::{
 
 use anyhow::Context;
 use once_cell::sync::OnceCell;
-use utils::{
-    id::{TenantId, TimelineId},
-    lsn::Lsn,
-};
+use pageserver_api::shard::TenantShardId;
+use utils::{id::TimelineId, lsn::Lsn};
 
 use crate::{
     context::RequestContext,
@@ -154,7 +152,13 @@ enum CacheKey {
 
 #[derive(Debug, PartialEq, Eq, Hash, Clone)]
 struct MaterializedPageHashKey {
-    tenant_id: TenantId,
+    /// Why is this TenantShardId rather than TenantId?
+    ///
+    /// Usually, the materialized value of a page@lsn is identical on any shard in the same tenant.  However, this
+    /// this not the case for certain internally-generated pages (e.g. relation sizes).  In future, we may make this
+    /// key smaller by omitting the shard, if we ensure that reads to such pages always skip the cache, or are
+    /// special-cased in some other way.
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     key: Key,
 }
@@ -378,7 +382,7 @@ impl PageCache {
     /// returned page.
     pub async fn lookup_materialized_page(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key: &Key,
         lsn: Lsn,
@@ -395,7 +399,7 @@ impl PageCache {
 
         let mut cache_key = CacheKey::MaterializedPage {
             hash_key: MaterializedPageHashKey {
-                tenant_id,
+                tenant_shard_id,
                 timeline_id,
                 key: *key,
             },
@@ -436,7 +440,7 @@ impl PageCache {
     ///
     pub async fn memorize_materialized_page(
         &self,
-        tenant_id: TenantId,
+        tenant_shard_id: TenantShardId,
         timeline_id: TimelineId,
         key: Key,
         lsn: Lsn,
@@ -444,7 +448,7 @@ impl PageCache {
     ) -> anyhow::Result<()> {
         let cache_key = CacheKey::MaterializedPage {
             hash_key: MaterializedPageHashKey {
-                tenant_id,
+                tenant_shard_id,
                 timeline_id,
                 key,
             },
diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs
index c653f0b7ea..b81037ae47 100644
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -822,10 +822,7 @@ impl<'a> DatadirModification<'a> {
         self.put(DBDIR_KEY, Value::Image(buf.into()));
 
         // Create AuxFilesDirectory
-        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-            files: HashMap::new(),
-        })?;
-        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+        self.init_aux_dir()?;
 
         let buf = TwoPhaseDirectory::ser(&TwoPhaseDirectory {
             xids: HashSet::new(),
@@ -933,10 +930,7 @@ impl<'a> DatadirModification<'a> {
             self.put(DBDIR_KEY, Value::Image(buf.into()));
 
             // Create AuxFilesDirectory as well
-            let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
-                files: HashMap::new(),
-            })?;
-            self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+            self.init_aux_dir()?;
         }
         if r.is_none() {
             // Create RelDirectory
@@ -1261,6 +1255,14 @@ impl<'a> DatadirModification<'a> {
         Ok(())
     }
 
+    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
+        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
+            files: HashMap::new(),
+        })?;
+        self.put(AUX_FILES_KEY, Value::Image(Bytes::from(buf)));
+        Ok(())
+    }
+
     pub async fn put_file(
         &mut self,
         path: &str,
@@ -1767,6 +1769,13 @@ const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.
 
+// AUX_FILES currently stores only data for logical replication (slots etc), and
+// we don't preserve these on a branch because safekeepers can't follow timeline
+// switch (and generally it likely should be optional), so ignore these.
+pub fn is_inherited_key(key: Key) -> bool {
+    key != AUX_FILES_KEY
+}
+
 pub fn key_to_rel_block(key: Key) -> anyhow::Result<(RelTag, BlockNumber)> {
     Ok(match key.field1 {
         0x00 => (
diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs
index 97d731bf49..c726139524 100644
--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -2,38 +2,11 @@ use crate::walrecord::NeonWalRecord;
 use anyhow::Result;
 use bytes::Bytes;
 use serde::{Deserialize, Serialize};
-use std::ops::{AddAssign, Range};
+use std::ops::AddAssign;
 use std::time::Duration;
 
 pub use pageserver_api::key::{Key, KEY_SIZE};
 
-pub fn key_range_size(key_range: &Range<Key>) -> u32 {
-    let start = key_range.start;
-    let end = key_range.end;
-
-    if end.field1 != start.field1
-        || end.field2 != start.field2
-        || end.field3 != start.field3
-        || end.field4 != start.field4
-    {
-        return u32::MAX;
-    }
-
-    let start = (start.field5 as u64) << 32 | start.field6 as u64;
-    let end = (end.field5 as u64) << 32 | end.field6 as u64;
-
-    let diff = end - start;
-    if diff > u32::MAX as u64 {
-        u32::MAX
-    } else {
-        diff as u32
-    }
-}
-
-pub fn singleton_range(key: Key) -> Range<Key> {
-    key..key.next()
-}
-
 /// A 'value' stored for a one Key.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[cfg_attr(test, derive(PartialEq))]
diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs
index 4270b6edb0..cb1b2b8011 100644
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -42,6 +42,7 @@ use std::sync::atomic::{AtomicU64, Ordering};
 use std::sync::{Arc, Mutex};
 
 use futures::FutureExt;
+use pageserver_api::shard::TenantShardId;
 use tokio::runtime::Runtime;
 use tokio::task::JoinHandle;
 use tokio::task_local;
@@ -51,7 +52,7 @@ use tracing::{debug, error, info, warn};
 
 use once_cell::sync::Lazy;
 
-use utils::id::{TenantId, TimelineId};
+use utils::id::TimelineId;
 
 use crate::shutdown_pageserver;
 
@@ -257,6 +258,9 @@ pub enum TaskKind {
     /// See [`crate::disk_usage_eviction_task`].
     DiskUsageEviction,
 
+    /// See [`crate::tenant::secondary`].
+    SecondaryUploads,
+
     // Initial logical size calculation
     InitialLogicalSizeCalculation,
 
@@ -317,7 +321,7 @@ struct PageServerTask {
 
     /// Tasks may optionally be launched for a particular tenant/timeline, enabling
     /// later cancelling tasks for that tenant/timeline in [`shutdown_tasks`]
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
 
     mutable: Mutex<MutableTaskState>,
@@ -329,7 +333,7 @@ struct PageServerTask {
 pub fn spawn<F>(
     runtime: &tokio::runtime::Handle,
     kind: TaskKind,
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
     name: &str,
     shutdown_process_on_error: bool,
@@ -345,7 +349,7 @@ where
         kind,
         name: name.to_string(),
         cancel: cancel.clone(),
-        tenant_id,
+        tenant_shard_id,
         timeline_id,
         mutable: Mutex::new(MutableTaskState { join_handle: None }),
     });
@@ -424,28 +428,28 @@ async fn task_finish(
             Ok(Err(err)) => {
                 if shutdown_process_on_error {
                     error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                     shutdown_process = true;
                 } else {
                     error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} exited with error: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} exited with error: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                 }
             }
             Err(err) => {
                 if shutdown_process_on_error {
                     error!(
-                        "Shutting down: task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Shutting down: task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                     shutdown_process = true;
                 } else {
                     error!(
-                        "Task '{}' tenant_id: {:?}, timeline_id: {:?} panicked: {:?}",
-                        task_name, task.tenant_id, task.timeline_id, err
+                        "Task '{}' tenant_shard_id: {:?}, timeline_id: {:?} panicked: {:?}",
+                        task_name, task.tenant_shard_id, task.timeline_id, err
                     );
                 }
             }
@@ -467,11 +471,11 @@ async fn task_finish(
 ///
 /// Or to shut down all tasks for given timeline:
 ///
-///   shutdown_tasks(None, Some(tenant_id), Some(timeline_id))
+///   shutdown_tasks(None, Some(tenant_shard_id), Some(timeline_id))
 ///
 pub async fn shutdown_tasks(
     kind: Option<TaskKind>,
-    tenant_id: Option<TenantId>,
+    tenant_shard_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
 ) {
     let mut victim_tasks = Vec::new();
@@ -480,35 +484,35 @@ pub async fn shutdown_tasks(
         let tasks = TASKS.lock().unwrap();
         for task in tasks.values() {
             if (kind.is_none() || Some(task.kind) == kind)
-                && (tenant_id.is_none() || task.tenant_id == tenant_id)
+                && (tenant_shard_id.is_none() || task.tenant_shard_id == tenant_shard_id)
                 && (timeline_id.is_none() || task.timeline_id == timeline_id)
             {
                 task.cancel.cancel();
                 victim_tasks.push((
                     Arc::clone(task),
                     task.kind,
-                    task.tenant_id,
+                    task.tenant_shard_id,
                     task.timeline_id,
                 ));
             }
         }
     }
 
-    let log_all = kind.is_none() && tenant_id.is_none() && timeline_id.is_none();
+    let log_all = kind.is_none() && tenant_shard_id.is_none() && timeline_id.is_none();
 
-    for (task, task_kind, tenant_id, timeline_id) in victim_tasks {
+    for (task, task_kind, tenant_shard_id, timeline_id) in victim_tasks {
         let join_handle = {
             let mut task_mut = task.mutable.lock().unwrap();
             task_mut.join_handle.take()
         };
         if let Some(mut join_handle) = join_handle {
             if log_all {
-                if tenant_id.is_none() {
+                if tenant_shard_id.is_none() {
                     // there are quite few of these
                     info!(name = task.name, kind = ?task_kind, "stopping global task");
                 } else {
                     // warn to catch these in tests; there shouldn't be any
-                    warn!(name = task.name, tenant_id = ?tenant_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
+                    warn!(name = task.name, tenant_shard_id = ?tenant_shard_id, timeline_id = ?timeline_id, kind = ?task_kind, "stopping left-over");
                 }
             }
             if tokio::time::timeout(std::time::Duration::from_secs(1), &mut join_handle)
@@ -517,12 +521,13 @@ pub async fn shutdown_tasks(
             {
                 // allow some time to elapse before logging to cut down the number of log
                 // lines.
-                info!("waiting for {} to shut down", task.name);
+                info!("waiting for task {} to shut down", task.name);
                 // we never handled this return value, but:
                 // - we don't deschedule which would lead to is_cancelled
                 // - panics are already logged (is_panicked)
                 // - task errors are already logged in the wrapper
                 let _ = join_handle.await;
+                info!("task {} completed", task.name);
             }
         } else {
             // Possibly one of:
@@ -556,9 +561,14 @@ pub async fn shutdown_watcher() {
 /// cancelled. It can however be moved to other tasks, such as `tokio::task::spawn_blocking` or
 /// `tokio::task::JoinSet::spawn`.
 pub fn shutdown_token() -> CancellationToken {
-    SHUTDOWN_TOKEN
-        .try_with(|t| t.clone())
-        .expect("shutdown_token() called in an unexpected task or thread")
+    let res = SHUTDOWN_TOKEN.try_with(|t| t.clone());
+
+    if cfg!(test) {
+        // in tests this method is called from non-taskmgr spawned tasks, and that is all ok.
+        res.unwrap_or_default()
+    } else {
+        res.expect("shutdown_token() called in an unexpected task or thread")
+    }
 }
 
 /// Has the current task been requested to shut down?
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 48f71d7747..1d6f1001db 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -36,6 +36,8 @@ use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext;
 use utils::sync::gate::Gate;
 use utils::sync::gate::GateGuard;
+use utils::timeout::timeout_cancellable;
+use utils::timeout::TimeoutCancellableError;
 
 use self::config::AttachedLocationConfig;
 use self::config::AttachmentMode;
@@ -48,6 +50,7 @@ use self::mgr::GetActiveTenantError;
 use self::mgr::GetTenantError;
 use self::mgr::TenantsMap;
 use self::remote_timeline_client::RemoteTimelineClient;
+use self::timeline::uninit::TimelineExclusionError;
 use self::timeline::uninit::TimelineUninitMark;
 use self::timeline::uninit::UninitializedTimeline;
 use self::timeline::EvictionTaskTenantState;
@@ -58,7 +61,7 @@ use crate::deletion_queue::DeletionQueueClient;
 use crate::deletion_queue::DeletionQueueError;
 use crate::import_datadir;
 use crate::is_uninit_mark;
-use crate::metrics::TENANT_ACTIVATION;
+use crate::metrics::TENANT;
 use crate::metrics::{remove_tenant_metrics, TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC};
 use crate::repository::GcResult;
 use crate::task_mgr;
@@ -87,7 +90,6 @@ use std::process::Stdio;
 use std::sync::atomic::AtomicU64;
 use std::sync::atomic::Ordering;
 use std::sync::Arc;
-use std::sync::MutexGuard;
 use std::sync::{Mutex, RwLock};
 use std::time::{Duration, Instant};
 
@@ -144,6 +146,7 @@ pub mod storage_layer;
 pub mod config;
 pub mod delete;
 pub mod mgr;
+pub mod secondary;
 pub mod tasks;
 pub mod upload_queue;
 
@@ -225,7 +228,7 @@ pub struct Tenant {
 
     /// The value creation timestamp, used to measure activation delay, see:
     /// <https://github.com/neondatabase/neon/issues/4025>
-    loading_started_at: Instant,
+    constructed_at: Instant,
 
     state: watch::Sender<TenantState>,
 
@@ -248,6 +251,12 @@ pub struct Tenant {
     generation: Generation,
 
     timelines: Mutex<HashMap<TimelineId, Arc<Timeline>>>,
+
+    /// During timeline creation, we first insert the TimelineId to the
+    /// creating map, then `timelines`, then remove it from the creating map.
+    /// **Lock order**: if acquring both, acquire`timelines` before `timelines_creating`
+    timelines_creating: std::sync::Mutex<HashSet<TimelineId>>,
+
     // This mutex prevents creation of new timelines during GC.
     // Adding yet another mutex (in addition to `timelines`) is needed because holding
     // `timelines` mutex during all GC iteration
@@ -269,6 +278,11 @@ pub struct Tenant {
 
     eviction_task_tenant_state: tokio::sync::Mutex<EvictionTaskTenantState>,
 
+    /// If the tenant is in Activating state, notify this to encourage it
+    /// to proceed to Active as soon as possible, rather than waiting for lazy
+    /// background warmup.
+    pub(crate) activate_now_sem: tokio::sync::Semaphore,
+
     pub(crate) delete_progress: Arc<tokio::sync::Mutex<DeleteTenantFlow>>,
 
     // Cancellation token fires when we have entered shutdown().  This is a parent of
@@ -406,8 +420,10 @@ impl Debug for SetStoppingError {
 
 #[derive(thiserror::Error, Debug)]
 pub enum CreateTimelineError {
-    #[error("a timeline with the given ID already exists")]
-    AlreadyExists,
+    #[error("creation of timeline with the given ID is in progress")]
+    AlreadyCreating,
+    #[error("timeline already exists with different parameters")]
+    Conflict,
     #[error(transparent)]
     AncestorLsn(anyhow::Error),
     #[error("ancestor timeline is not active")]
@@ -608,11 +624,19 @@ impl Tenant {
         task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             TaskKind::Attach,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             None,
             "attach tenant",
             false,
             async move {
+                // Is this tenant being spawned as part of process startup?
+                let starting_up = init_order.is_some();
+                scopeguard::defer! {
+                    if starting_up {
+                        TENANT.startup_complete.inc();
+                    }
+                }
+
                 // Ideally we should use Tenant::set_broken_no_wait, but it is not supposed to be used when tenant is in loading state.
                 let make_broken =
                     |t: &Tenant, err: anyhow::Error| {
@@ -639,8 +663,62 @@ impl Tenant {
                     .as_mut()
                     .and_then(|x| x.initial_tenant_load_remote.take());
 
+                enum AttachType<'a> {
+                    // During pageserver startup, we are attaching this tenant lazily in the background
+                    Warmup(tokio::sync::SemaphorePermit<'a>),
+                    // During pageserver startup, we are attaching this tenant as soon as we can,
+                    // because a client tried to access it.
+                    OnDemand,
+                    // During normal operations after startup, we are attaching a tenant.
+                    Normal,
+                }
+
+                // Before doing any I/O, wait for either or:
+                // - A client to attempt to access to this tenant (on-demand loading)
+                // - A permit to become available in the warmup semaphore (background warmup)
+                //
+                // Some-ness of init_order is how we know if we're attaching during startup or later
+                // in process lifetime.
+                let attach_type = if init_order.is_some() {
+                    tokio::select!(
+                        _ = tenant_clone.activate_now_sem.acquire() => {
+                            tracing::info!("Activating tenant (on-demand)");
+                            AttachType::OnDemand
+                        },
+                        permit_result = conf.concurrent_tenant_warmup.inner().acquire() => {
+                            match permit_result {
+                                Ok(p) => {
+                                    tracing::info!("Activating tenant (warmup)");
+                                    AttachType::Warmup(p)
+                                }
+                                Err(_) => {
+                                    // This is unexpected: the warmup semaphore should stay alive
+                                    // for the lifetime of init_order.  Log a warning and proceed.
+                                    tracing::warn!("warmup_limit semaphore unexpectedly closed");
+                                    AttachType::Normal
+                                }
+                            }
+
+                        }
+                        _ = tenant_clone.cancel.cancelled() => {
+                            // This is safe, but should be pretty rare: it is interesting if a tenant
+                            // stayed in Activating for such a long time that shutdown found it in
+                            // that state.
+                            tracing::info!(state=%tenant_clone.current_state(), "Tenant shut down before activation");
+                            return Ok(());
+                        },
+                    )
+                } else {
+                    AttachType::Normal
+                };
+
+                let preload_timer = TENANT.preload.start_timer();
                 let preload = match mode {
-                    SpawnMode::Create => {None},
+                    SpawnMode::Create => {
+                        // Don't count the skipped preload into the histogram of preload durations
+                        preload_timer.stop_and_discard();
+                        None
+                    },
                     SpawnMode::Normal => {
                         match &remote_storage {
                             Some(remote_storage) => Some(
@@ -650,7 +728,11 @@ impl Tenant {
                                         tracing::info_span!(parent: None, "attach_preload", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()),
                                     )
                                     .await {
-                                        Ok(p) => p,
+                                        Ok(p) => {
+                                            preload_timer.observe_duration();
+                                            p
+                                        }
+                                            ,
                                         Err(e) => {
                                             make_broken(&tenant_clone, anyhow::anyhow!(e));
                                                 return Ok(());
@@ -712,15 +794,43 @@ impl Tenant {
                     }
                 }
 
+                // We will time the duration of the attach phase unless this is a creation (attach will do no work)
+                let attach_timer = match mode {
+                    SpawnMode::Create => None,
+                    SpawnMode::Normal => {Some(TENANT.attach.start_timer())}
+                };
                 match tenant_clone.attach(preload, &ctx).await {
                     Ok(()) => {
                         info!("attach finished, activating");
+                        if let Some(t)=  attach_timer {t.observe_duration();}
                         tenant_clone.activate(broker_client, None, &ctx);
                     }
                     Err(e) => {
+                        if let Some(t)=  attach_timer {t.observe_duration();}
                         make_broken(&tenant_clone, anyhow::anyhow!(e));
                     }
                 }
+
+                // If we are doing an opportunistic warmup attachment at startup, initialize
+                // logical size at the same time.  This is better than starting a bunch of idle tenants
+                // with cold caches and then coming back later to initialize their logical sizes.
+                //
+                // It also prevents the warmup proccess competing with the concurrency limit on
+                // logical size calculations: if logical size calculation semaphore is saturated,
+                // then warmup will wait for that before proceeding to the next tenant.
+                if let AttachType::Warmup(_permit) = attach_type {
+                    let mut futs = FuturesUnordered::new();
+                    let timelines: Vec<_> = tenant_clone.timelines.lock().unwrap().values().cloned().collect();
+                    for t in timelines {
+                        futs.push(t.await_initial_logical_size())
+                    }
+                    tracing::info!("Waiting for initial logical sizes while warming up...");
+                    while futs.next().await.is_some() {
+
+                    }
+                    tracing::info!("Warm-up complete");
+                }
+
                 Ok(())
             }
             .instrument({
@@ -1442,6 +1552,10 @@ impl Tenant {
             .collect()
     }
 
+    pub fn list_timeline_ids(&self) -> Vec<TimelineId> {
+        self.timelines.lock().unwrap().keys().cloned().collect()
+    }
+
     /// This is used to create the initial 'main' timeline during bootstrapping,
     /// or when importing a new base backup. The caller is expected to load an
     /// initial image of the datadir to the new timeline after this.
@@ -1457,7 +1571,7 @@ impl Tenant {
     /// For tests, use `DatadirModification::init_empty_test_timeline` + `commit` to setup the
     /// minimum amount of keys required to get a writable timeline.
     /// (Without it, `put` might fail due to `repartition` failing.)
-    pub async fn create_empty_timeline(
+    pub(crate) async fn create_empty_timeline(
         &self,
         new_timeline_id: TimelineId,
         initdb_lsn: Lsn,
@@ -1469,10 +1583,7 @@ impl Tenant {
             "Cannot create empty timelines on inactive tenant"
         );
 
-        let timeline_uninit_mark = {
-            let timelines = self.timelines.lock().unwrap();
-            self.create_timeline_uninit_mark(new_timeline_id, &timelines)?
-        };
+        let timeline_uninit_mark = self.create_timeline_uninit_mark(new_timeline_id)?;
         let new_metadata = TimelineMetadata::new(
             // Initialize disk_consistent LSN to 0, The caller must import some data to
             // make it valid, before calling finish_creation()
@@ -1549,7 +1660,7 @@ impl Tenant {
     /// If the caller specified the timeline ID to use (`new_timeline_id`), and timeline with
     /// the same timeline ID already exists, returns CreateTimelineError::AlreadyExists.
     #[allow(clippy::too_many_arguments)]
-    pub async fn create_timeline(
+    pub(crate) async fn create_timeline(
         &self,
         new_timeline_id: TimelineId,
         ancestor_timeline_id: Option<TimelineId>,
@@ -1570,26 +1681,51 @@ impl Tenant {
             .enter()
             .map_err(|_| CreateTimelineError::ShuttingDown)?;
 
-        if let Ok(existing) = self.get_timeline(new_timeline_id, false) {
-            debug!("timeline {new_timeline_id} already exists");
-
-            if let Some(remote_client) = existing.remote_client.as_ref() {
-                // Wait for uploads to complete, so that when we return Ok, the timeline
-                // is known to be durable on remote storage. Just like we do at the end of
-                // this function, after we have created the timeline ourselves.
-                //
-                // We only really care that the initial version of `index_part.json` has
-                // been uploaded. That's enough to remember that the timeline
-                // exists. However, there is no function to wait specifically for that so
-                // we just wait for all in-progress uploads to finish.
-                remote_client
-                    .wait_completion()
-                    .await
-                    .context("wait for timeline uploads to complete")?;
+        // Get exclusive access to the timeline ID: this ensures that it does not already exist,
+        // and that no other creation attempts will be allowed in while we are working.  The
+        // uninit_mark is a guard.
+        let uninit_mark = match self.create_timeline_uninit_mark(new_timeline_id) {
+            Ok(m) => m,
+            Err(TimelineExclusionError::AlreadyCreating) => {
+                // Creation is in progress, we cannot create it again, and we cannot
+                // check if this request matches the existing one, so caller must try
+                // again later.
+                return Err(CreateTimelineError::AlreadyCreating);
             }
+            Err(TimelineExclusionError::Other(e)) => {
+                return Err(CreateTimelineError::Other(e));
+            }
+            Err(TimelineExclusionError::AlreadyExists(existing)) => {
+                debug!("timeline {new_timeline_id} already exists");
 
-            return Err(CreateTimelineError::AlreadyExists);
-        }
+                // Idempotency: creating the same timeline twice is not an error, unless
+                // the second creation has different parameters.
+                if existing.get_ancestor_timeline_id() != ancestor_timeline_id
+                    || existing.pg_version != pg_version
+                    || (ancestor_start_lsn.is_some()
+                        && ancestor_start_lsn != Some(existing.get_ancestor_lsn()))
+                {
+                    return Err(CreateTimelineError::Conflict);
+                }
+
+                if let Some(remote_client) = existing.remote_client.as_ref() {
+                    // Wait for uploads to complete, so that when we return Ok, the timeline
+                    // is known to be durable on remote storage. Just like we do at the end of
+                    // this function, after we have created the timeline ourselves.
+                    //
+                    // We only really care that the initial version of `index_part.json` has
+                    // been uploaded. That's enough to remember that the timeline
+                    // exists. However, there is no function to wait specifically for that so
+                    // we just wait for all in-progress uploads to finish.
+                    remote_client
+                        .wait_completion()
+                        .await
+                        .context("wait for timeline uploads to complete")?;
+                }
+
+                return Ok(existing);
+            }
+        };
 
         let loaded_timeline = match ancestor_timeline_id {
             Some(ancestor_timeline_id) => {
@@ -1626,18 +1762,32 @@ impl Tenant {
                     ancestor_timeline.wait_lsn(*lsn, ctx).await?;
                 }
 
-                self.branch_timeline(&ancestor_timeline, new_timeline_id, ancestor_start_lsn, ctx)
-                    .await?
+                self.branch_timeline(
+                    &ancestor_timeline,
+                    new_timeline_id,
+                    ancestor_start_lsn,
+                    uninit_mark,
+                    ctx,
+                )
+                .await?
             }
             None => {
-                self.bootstrap_timeline(new_timeline_id, pg_version, load_existing_initdb, ctx)
-                    .await?
+                self.bootstrap_timeline(
+                    new_timeline_id,
+                    pg_version,
+                    load_existing_initdb,
+                    uninit_mark,
+                    ctx,
+                )
+                .await?
             }
         };
 
+        // At this point we have dropped our guard on [`Self::timelines_creating`], and
+        // the timeline is visible in [`Self::timelines`], but it is _not_ durable yet.  We must
+        // not send a success to the caller until it is.  The same applies to handling retries,
+        // see the handling of [`TimelineExclusionError::AlreadyExists`] above.
         if let Some(remote_client) = loaded_timeline.remote_client.as_ref() {
-            // Wait for the upload of the 'index_part.json` file to finish, so that when we return
-            // Ok, the timeline is durable in remote storage.
             let kind = ancestor_timeline_id
                 .map(|_| "branched")
                 .unwrap_or("bootstrapped");
@@ -1651,6 +1801,15 @@ impl Tenant {
         Ok(loaded_timeline)
     }
 
+    pub(crate) async fn delete_timeline(
+        self: Arc<Self>,
+        timeline_id: TimelineId,
+    ) -> Result<(), DeleteTimelineError> {
+        DeleteTimelineFlow::run(&self, timeline_id, false).await?;
+
+        Ok(())
+    }
+
     /// perform one garbage collection iteration, removing old data files from disk.
     /// this function is periodically called by gc task.
     /// also it can be explicitly requested through page server api 'do_gc' command.
@@ -1812,7 +1971,7 @@ impl Tenant {
                 );
                 *current_state = TenantState::Active;
 
-                let elapsed = self.loading_started_at.elapsed();
+                let elapsed = self.constructed_at.elapsed();
                 let total_timelines = timelines_accessor.len();
 
                 // log a lot of stuff, because some tenants sometimes suffer from user-visible
@@ -1827,7 +1986,7 @@ impl Tenant {
                     "activation attempt finished"
                 );
 
-                TENANT_ACTIVATION.observe(elapsed.as_secs_f64());
+                TENANT.activation.observe(elapsed.as_secs_f64());
             });
         }
     }
@@ -1917,7 +2076,7 @@ impl Tenant {
         //
         // this will additionally shutdown and await all timeline tasks.
         tracing::debug!("Waiting for tasks...");
-        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id.tenant_id), None).await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), None).await;
 
         // Wait for any in-flight operations to complete
         self.gate.close().await;
@@ -2082,18 +2241,41 @@ impl Tenant {
         self.state.subscribe()
     }
 
-    pub(crate) async fn wait_to_become_active(&self) -> Result<(), GetActiveTenantError> {
+    /// The activate_now semaphore is initialized with zero units.  As soon as
+    /// we add a unit, waiters will be able to acquire a unit and proceed.
+    pub(crate) fn activate_now(&self) {
+        self.activate_now_sem.add_permits(1);
+    }
+
+    pub(crate) async fn wait_to_become_active(
+        &self,
+        timeout: Duration,
+    ) -> Result<(), GetActiveTenantError> {
         let mut receiver = self.state.subscribe();
         loop {
             let current_state = receiver.borrow_and_update().clone();
             match current_state {
                 TenantState::Loading | TenantState::Attaching | TenantState::Activating(_) => {
                     // in these states, there's a chance that we can reach ::Active
-                    receiver.changed().await.map_err(
-                        |_e: tokio::sync::watch::error::RecvError|
-                            // Tenant existed but was dropped: report it as non-existent
-                            GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_shard_id.tenant_id))
-                    )?;
+                    self.activate_now();
+                    match timeout_cancellable(timeout, &self.cancel, receiver.changed()).await {
+                        Ok(r) => {
+                            r.map_err(
+                            |_e: tokio::sync::watch::error::RecvError|
+                                // Tenant existed but was dropped: report it as non-existent
+                                GetActiveTenantError::NotFound(GetTenantError::NotFound(self.tenant_shard_id.tenant_id))
+                        )?
+                        }
+                        Err(TimeoutCancellableError::Cancelled) => {
+                            return Err(GetActiveTenantError::Cancelled);
+                        }
+                        Err(TimeoutCancellableError::Timeout) => {
+                            return Err(GetActiveTenantError::WaitForActiveTimeout {
+                                latest_state: Some(self.current_state()),
+                                wait_time: timeout,
+                            });
+                        }
+                    }
                 }
                 TenantState::Active { .. } => {
                     return Ok(());
@@ -2114,6 +2296,14 @@ impl Tenant {
             .attach_mode
             .clone()
     }
+
+    pub(crate) fn get_tenant_shard_id(&self) -> &TenantShardId {
+        &self.tenant_shard_id
+    }
+
+    pub(crate) fn get_generation(&self) -> Generation {
+        self.generation
+    }
 }
 
 /// Given a Vec of timelines and their ancestors (timeline_id, ancestor_id),
@@ -2252,6 +2442,18 @@ impl Tenant {
             .or(self.conf.default_tenant_conf.min_resident_size_override)
     }
 
+    pub fn get_heatmap_period(&self) -> Option<Duration> {
+        let tenant_conf = self.tenant_conf.read().unwrap().tenant_conf;
+        let heatmap_period = tenant_conf
+            .heatmap_period
+            .unwrap_or(self.conf.default_tenant_conf.heatmap_period);
+        if heatmap_period.is_zero() {
+            None
+        } else {
+            Some(heatmap_period)
+        }
+    }
+
     pub fn set_new_tenant_config(&self, new_tenant_conf: TenantConfOpt) {
         self.tenant_conf.write().unwrap().tenant_conf = new_tenant_conf;
         // Don't hold self.timelines.lock() during the notifies.
@@ -2398,9 +2600,10 @@ impl Tenant {
             conf,
             // using now here is good enough approximation to catch tenants with really long
             // activation times.
-            loading_started_at: Instant::now(),
+            constructed_at: Instant::now(),
             tenant_conf: Arc::new(RwLock::new(attached_conf)),
             timelines: Mutex::new(HashMap::new()),
+            timelines_creating: Mutex::new(HashSet::new()),
             gc_cs: tokio::sync::Mutex::new(()),
             walredo_mgr,
             remote_storage,
@@ -2409,6 +2612,7 @@ impl Tenant {
             cached_logical_sizes: tokio::sync::Mutex::new(HashMap::new()),
             cached_synthetic_tenant_size: Arc::new(AtomicU64::new(0)),
             eviction_task_tenant_state: tokio::sync::Mutex::new(EvictionTaskTenantState::default()),
+            activate_now_sem: tokio::sync::Semaphore::new(0),
             delete_progress: Arc::new(tokio::sync::Mutex::new(DeleteTenantFlow::default())),
             cancel: CancellationToken::default(),
             gate: Gate::new(format!("Tenant<{tenant_shard_id}>")),
@@ -2792,8 +2996,9 @@ impl Tenant {
         start_lsn: Option<Lsn>,
         ctx: &RequestContext,
     ) -> Result<Arc<Timeline>, CreateTimelineError> {
+        let uninit_mark = self.create_timeline_uninit_mark(dst_id).unwrap();
         let tl = self
-            .branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx)
+            .branch_timeline_impl(src_timeline, dst_id, start_lsn, uninit_mark, ctx)
             .await?;
         tl.set_state(TimelineState::Active);
         Ok(tl)
@@ -2807,9 +3012,10 @@ impl Tenant {
         src_timeline: &Arc<Timeline>,
         dst_id: TimelineId,
         start_lsn: Option<Lsn>,
+        timeline_uninit_mark: TimelineUninitMark<'_>,
         ctx: &RequestContext,
     ) -> Result<Arc<Timeline>, CreateTimelineError> {
-        self.branch_timeline_impl(src_timeline, dst_id, start_lsn, ctx)
+        self.branch_timeline_impl(src_timeline, dst_id, start_lsn, timeline_uninit_mark, ctx)
             .await
     }
 
@@ -2818,13 +3024,14 @@ impl Tenant {
         src_timeline: &Arc<Timeline>,
         dst_id: TimelineId,
         start_lsn: Option<Lsn>,
+        timeline_uninit_mark: TimelineUninitMark<'_>,
         _ctx: &RequestContext,
     ) -> Result<Arc<Timeline>, CreateTimelineError> {
         let src_id = src_timeline.timeline_id;
 
-        // First acquire the GC lock so that another task cannot advance the GC
-        // cutoff in 'gc_info', and make 'start_lsn' invalid, while we are
-        // creating the branch.
+        // We will validate our ancestor LSN in this function.  Acquire the GC lock so that
+        // this check cannot race with GC, and the ancestor LSN is guaranteed to remain
+        // valid while we are creating the branch.
         let _gc_cs = self.gc_cs.lock().await;
 
         // If no start LSN is specified, we branch the new timeline from the source timeline's last record LSN
@@ -2834,13 +3041,6 @@ impl Tenant {
             lsn
         });
 
-        // Create a placeholder for the new branch. This will error
-        // out if the new timeline ID is already in use.
-        let timeline_uninit_mark = {
-            let timelines = self.timelines.lock().unwrap();
-            self.create_timeline_uninit_mark(dst_id, &timelines)?
-        };
-
         // Ensure that `start_lsn` is valid, i.e. the LSN is within the PITR
         // horizon on the source timeline
         //
@@ -2932,21 +3132,38 @@ impl Tenant {
         Ok(new_timeline)
     }
 
-    /// - run initdb to init temporary instance and get bootstrap data
-    /// - after initialization completes, tar up the temp dir and upload it to S3.
-    ///
-    /// The caller is responsible for activating the returned timeline.
-    pub(crate) async fn bootstrap_timeline(
+    /// For unit tests, make this visible so that other modules can directly create timelines
+    #[cfg(test)]
+    pub(crate) async fn bootstrap_timeline_test(
         &self,
         timeline_id: TimelineId,
         pg_version: u32,
         load_existing_initdb: Option<TimelineId>,
         ctx: &RequestContext,
     ) -> anyhow::Result<Arc<Timeline>> {
-        let timeline_uninit_mark = {
-            let timelines = self.timelines.lock().unwrap();
-            self.create_timeline_uninit_mark(timeline_id, &timelines)?
-        };
+        let uninit_mark = self.create_timeline_uninit_mark(timeline_id).unwrap();
+        self.bootstrap_timeline(
+            timeline_id,
+            pg_version,
+            load_existing_initdb,
+            uninit_mark,
+            ctx,
+        )
+        .await
+    }
+
+    /// - run initdb to init temporary instance and get bootstrap data
+    /// - after initialization completes, tar up the temp dir and upload it to S3.
+    ///
+    /// The caller is responsible for activating the returned timeline.
+    async fn bootstrap_timeline(
+        &self,
+        timeline_id: TimelineId,
+        pg_version: u32,
+        load_existing_initdb: Option<TimelineId>,
+        timeline_uninit_mark: TimelineUninitMark<'_>,
+        ctx: &RequestContext,
+    ) -> anyhow::Result<Arc<Timeline>> {
         // create a `tenant/{tenant_id}/timelines/basebackup-{timeline_id}.{TEMP_FILE_SUFFIX}/`
         // temporary directory for basebackup files for the given timeline.
 
@@ -2980,6 +3197,7 @@ impl Tenant {
                     storage,
                     &self.tenant_shard_id,
                     &existing_initdb_timeline_id,
+                    &self.cancel,
                 )
                 .await
                 .context("download initdb tar")?;
@@ -3020,6 +3238,7 @@ impl Tenant {
                             &timeline_id,
                             pgdata_zstd.try_clone().await?,
                             tar_zst_size,
+                            &self.cancel,
                         )
                         .await
                     },
@@ -3027,8 +3246,7 @@ impl Tenant {
                     3,
                     u32::MAX,
                     "persist_initdb_tar_zst",
-                    // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-                    backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
+                    backoff::Cancel::new(self.cancel.clone(), || anyhow::anyhow!("Cancelled")),
                 )
                 .await?;
 
@@ -3143,11 +3361,11 @@ impl Tenant {
     /// at 'disk_consistent_lsn'. After any initial data has been imported, call
     /// `finish_creation` to insert the Timeline into the timelines map and to remove the
     /// uninit mark file.
-    async fn prepare_new_timeline(
-        &self,
+    async fn prepare_new_timeline<'a>(
+        &'a self,
         new_timeline_id: TimelineId,
         new_metadata: &TimelineMetadata,
-        uninit_mark: TimelineUninitMark,
+        uninit_mark: TimelineUninitMark<'a>,
         start_lsn: Lsn,
         ancestor: Option<Arc<Timeline>>,
     ) -> anyhow::Result<UninitializedTimeline> {
@@ -3220,23 +3438,38 @@ impl Tenant {
     fn create_timeline_uninit_mark(
         &self,
         timeline_id: TimelineId,
-        timelines: &MutexGuard<HashMap<TimelineId, Arc<Timeline>>>,
-    ) -> anyhow::Result<TimelineUninitMark> {
+    ) -> Result<TimelineUninitMark, TimelineExclusionError> {
         let tenant_shard_id = self.tenant_shard_id;
 
-        anyhow::ensure!(
-            timelines.get(&timeline_id).is_none(),
-            "Timeline {tenant_shard_id}/{timeline_id} already exists in pageserver's memory"
-        );
-        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
-        anyhow::ensure!(
-            !timeline_path.exists(),
-            "Timeline {timeline_path} already exists, cannot create its uninit mark file",
-        );
-
         let uninit_mark_path = self
             .conf
             .timeline_uninit_mark_file_path(tenant_shard_id, timeline_id);
+        let timeline_path = self.conf.timeline_path(&tenant_shard_id, &timeline_id);
+
+        let uninit_mark = TimelineUninitMark::new(
+            self,
+            timeline_id,
+            uninit_mark_path.clone(),
+            timeline_path.clone(),
+        )?;
+
+        // At this stage, we have got exclusive access to in-memory state for this timeline ID
+        // for creation.
+        // A timeline directory should never exist on disk already:
+        // - a previous failed creation would have cleaned up after itself
+        // - a pageserver restart would clean up timeline directories that don't have valid remote state
+        //
+        // Therefore it is an unexpected internal error to encounter a timeline directory already existing here,
+        // this error may indicate a bug in cleanup on failed creations.
+        if timeline_path.exists() {
+            return Err(TimelineExclusionError::Other(anyhow::anyhow!(
+                "Timeline directory already exists! This is a bug."
+            )));
+        }
+
+        // Create the on-disk uninit mark _after_ the in-memory acquisition of the tenant ID: guarantees
+        // that during process runtime, colliding creations will be caught in-memory without getting
+        // as far as failing to write a file.
         fs::OpenOptions::new()
             .write(true)
             .create_new(true)
@@ -3250,8 +3483,6 @@ impl Tenant {
                 format!("Failed to crate uninit mark for timeline {tenant_shard_id}/{timeline_id}")
             })?;
 
-        let uninit_mark = TimelineUninitMark::new(uninit_mark_path, timeline_path);
-
         Ok(uninit_mark)
     }
 
@@ -3694,6 +3925,7 @@ pub(crate) mod harness {
                     tenant_conf.evictions_low_residence_duration_metric_threshold,
                 ),
                 gc_feedback: Some(tenant_conf.gc_feedback),
+                heatmap_period: Some(tenant_conf.heatmap_period),
             }
         }
     }
@@ -4000,13 +4232,7 @@ mod tests {
             .await
         {
             Ok(_) => panic!("duplicate timeline creation should fail"),
-            Err(e) => assert_eq!(
-                e.to_string(),
-                format!(
-                    "Timeline {}/{} already exists in pageserver's memory",
-                    tenant.tenant_shard_id, TIMELINE_ID
-                )
-            ),
+            Err(e) => assert_eq!(e.to_string(), "Already exists".to_string()),
         }
 
         Ok(())
diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs
index 7a454b53d2..25d97f51ce 100644
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -334,6 +334,11 @@ pub struct TenantConf {
     #[serde(with = "humantime_serde")]
     pub evictions_low_residence_duration_metric_threshold: Duration,
     pub gc_feedback: bool,
+
+    /// If non-zero, the period between uploads of a heatmap from attached tenants.  This
+    /// may be disabled if a Tenant will not have secondary locations: only secondary
+    /// locations will use the heatmap uploaded by attached locations.
+    pub heatmap_period: Duration,
 }
 
 /// Same as TenantConf, but this struct preserves the information about
@@ -414,6 +419,11 @@ pub struct TenantConfOpt {
     #[serde(skip_serializing_if = "Option::is_none")]
     #[serde(default)]
     pub gc_feedback: Option<bool>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(with = "humantime_serde")]
+    #[serde(default)]
+    pub heatmap_period: Option<Duration>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -482,6 +492,7 @@ impl TenantConfOpt {
                 .evictions_low_residence_duration_metric_threshold
                 .unwrap_or(global_conf.evictions_low_residence_duration_metric_threshold),
             gc_feedback: self.gc_feedback.unwrap_or(global_conf.gc_feedback),
+            heatmap_period: self.heatmap_period.unwrap_or(global_conf.heatmap_period),
         }
     }
 }
@@ -519,6 +530,7 @@ impl Default for TenantConf {
             )
             .expect("cannot parse default evictions_low_residence_duration_metric_threshold"),
             gc_feedback: false,
+            heatmap_period: Duration::ZERO,
         }
     }
 }
diff --git a/pageserver/src/tenant/delete.rs b/pageserver/src/tenant/delete.rs
index b8d6d0a321..e8491f26db 100644
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -71,6 +71,7 @@ async fn create_remote_delete_mark(
     conf: &PageServerConf,
     remote_storage: &GenericRemoteStorage,
     tenant_shard_id: &TenantShardId,
+    cancel: &CancellationToken,
 ) -> Result<(), DeleteTenantError> {
     let remote_mark_path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
 
@@ -87,8 +88,7 @@ async fn create_remote_delete_mark(
         FAILED_UPLOAD_WARN_THRESHOLD,
         FAILED_REMOTE_OP_RETRIES,
         "mark_upload",
-        // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-        backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
+        backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
     )
     .await
     .context("mark_upload")?;
@@ -170,6 +170,7 @@ async fn remove_tenant_remote_delete_mark(
     conf: &PageServerConf,
     remote_storage: Option<&GenericRemoteStorage>,
     tenant_shard_id: &TenantShardId,
+    cancel: &CancellationToken,
 ) -> Result<(), DeleteTenantError> {
     if let Some(remote_storage) = remote_storage {
         let path = remote_tenant_delete_mark_path(conf, tenant_shard_id)?;
@@ -179,8 +180,7 @@ async fn remove_tenant_remote_delete_mark(
             FAILED_UPLOAD_WARN_THRESHOLD,
             FAILED_REMOTE_OP_RETRIES,
             "remove_tenant_remote_delete_mark",
-            // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-            backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
+            backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
         )
         .await
         .context("remove_tenant_remote_delete_mark")?;
@@ -322,9 +322,15 @@ impl DeleteTenantFlow {
         // Though sounds scary, different mark name?
         // Detach currently uses remove_dir_all so in case of a crash we can end up in a weird state.
         if let Some(remote_storage) = &remote_storage {
-            create_remote_delete_mark(conf, remote_storage, &tenant.tenant_shard_id)
-                .await
-                .context("remote_mark")?
+            create_remote_delete_mark(
+                conf,
+                remote_storage,
+                &tenant.tenant_shard_id,
+                // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
+                &CancellationToken::new(),
+            )
+            .await
+            .context("remote_mark")?
         }
 
         fail::fail_point!("tenant-delete-before-create-local-mark", |_| {
@@ -463,7 +469,7 @@ impl DeleteTenantFlow {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             None,
             "tenant_delete",
             false,
@@ -524,8 +530,14 @@ impl DeleteTenantFlow {
                 .context("timelines dir not empty")?;
         }
 
-        remove_tenant_remote_delete_mark(conf, remote_storage.as_ref(), &tenant.tenant_shard_id)
-            .await?;
+        remove_tenant_remote_delete_mark(
+            conf,
+            remote_storage.as_ref(),
+            &tenant.tenant_shard_id,
+            // Can't use tenant.cancel, it's already shut down.  TODO: wire in an appropriate token
+            &CancellationToken::new(),
+        )
+        .await?;
 
         fail::fail_point!("tenant-delete-before-cleanup-remaining-fs-traces", |_| {
             Err(anyhow::anyhow!(
@@ -550,7 +562,7 @@ impl DeleteTenantFlow {
                 // we encounter an InProgress marker, yield the barrier it contains and wait on it.
                 let barrier = {
                     let mut locked = tenants.write().unwrap();
-                    let removed = locked.remove(&tenant.tenant_shard_id.tenant_id);
+                    let removed = locked.remove(tenant.tenant_shard_id);
 
                     // FIXME: we should not be modifying this from outside of mgr.rs.
                     // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 8466fe7fca..31d80026f0 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -28,7 +28,7 @@ use crate::control_plane_client::{
     ControlPlaneClient, ControlPlaneGenerationsApi, RetryForeverError,
 };
 use crate::deletion_queue::DeletionQueueClient;
-use crate::metrics::TENANT_MANAGER as METRICS;
+use crate::metrics::{TENANT, TENANT_MANAGER as METRICS};
 use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::{
     AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, TenantConfOpt,
@@ -44,7 +44,6 @@ use utils::generation::Generation;
 use utils::id::{TenantId, TimelineId};
 
 use super::delete::DeleteTenantError;
-use super::timeline::delete::DeleteTimelineFlow;
 use super::TenantSharedResources;
 
 /// For a tenant that appears in TenantsMap, it may either be
@@ -98,33 +97,6 @@ pub(crate) enum TenantsMap {
     ShuttingDown(BTreeMap<TenantShardId, TenantSlot>),
 }
 
-/// Helper for mapping shard-unaware functions to a sharding-aware map
-/// TODO(sharding): all users of this must be made shard-aware.
-fn exactly_one_or_none<'a>(
-    map: &'a BTreeMap<TenantShardId, TenantSlot>,
-    tenant_id: &TenantId,
-) -> Option<(&'a TenantShardId, &'a TenantSlot)> {
-    let mut slots = map.range(TenantShardId::tenant_range(*tenant_id));
-
-    // Retrieve the first two slots in the range: if both are populated, we must panic because the caller
-    // needs a shard-naive view of the world in which only one slot can exist for a TenantId at a time.
-    let slot_a = slots.next();
-    let slot_b = slots.next();
-    match (slot_a, slot_b) {
-        (None, None) => None,
-        (Some(slot), None) => {
-            // Exactly one matching slot
-            Some(slot)
-        }
-        (Some(_slot_a), Some(_slot_b)) => {
-            // Multiple shards for this tenant: cannot handle this yet.
-            // TODO(sharding): callers of get() should be shard-aware.
-            todo!("Attaching multiple shards in teh same tenant to the same pageserver")
-        }
-        (None, Some(_)) => unreachable!(),
-    }
-}
-
 pub(crate) enum TenantsMapRemoveResult {
     Occupied(TenantSlot),
     Vacant,
@@ -147,12 +119,11 @@ impl TenantsMap {
     /// Convenience function for typical usage, where we want to get a `Tenant` object, for
     /// working with attached tenants.  If the TenantId is in the map but in Secondary state,
     /// None is returned.
-    pub(crate) fn get(&self, tenant_id: &TenantId) -> Option<&Arc<Tenant>> {
+    pub(crate) fn get(&self, tenant_shard_id: &TenantShardId) -> Option<&Arc<Tenant>> {
         match self {
             TenantsMap::Initializing => None,
             TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                // TODO(sharding): callers of get() should be shard-aware.
-                exactly_one_or_none(m, tenant_id).and_then(|(_, slot)| slot.get_attached())
+                m.get(tenant_shard_id).and_then(|slot| slot.get_attached())
             }
         }
     }
@@ -204,25 +175,19 @@ impl TenantsMap {
     ///
     /// The normal way to remove a tenant is using a SlotGuard, which will gracefully remove the guarded
     /// slot if the enclosed tenant is shutdown.
-    pub(crate) fn remove(&mut self, tenant_id: &TenantId) -> TenantsMapRemoveResult {
+    pub(crate) fn remove(&mut self, tenant_shard_id: TenantShardId) -> TenantsMapRemoveResult {
         use std::collections::btree_map::Entry;
         match self {
             TenantsMap::Initializing => TenantsMapRemoveResult::Vacant,
-            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => {
-                let key = exactly_one_or_none(m, tenant_id).map(|(k, _)| *k);
-                match key {
-                    Some(key) => match m.entry(key) {
-                        Entry::Occupied(entry) => match entry.get() {
-                            TenantSlot::InProgress(barrier) => {
-                                TenantsMapRemoveResult::InProgress(barrier.clone())
-                            }
-                            _ => TenantsMapRemoveResult::Occupied(entry.remove()),
-                        },
-                        Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
-                    },
-                    None => TenantsMapRemoveResult::Vacant,
-                }
-            }
+            TenantsMap::Open(m) | TenantsMap::ShuttingDown(m) => match m.entry(tenant_shard_id) {
+                Entry::Occupied(entry) => match entry.get() {
+                    TenantSlot::InProgress(barrier) => {
+                        TenantsMapRemoveResult::InProgress(barrier.clone())
+                    }
+                    _ => TenantsMapRemoveResult::Occupied(entry.remove()),
+                },
+                Entry::Vacant(_entry) => TenantsMapRemoveResult::Vacant,
+            },
         }
     }
 
@@ -464,6 +429,13 @@ pub async fn init_tenant_mgr(
     let tenant_generations =
         init_load_generations(conf, &tenant_configs, &resources, &cancel).await?;
 
+    tracing::info!(
+        "Attaching {} tenants at startup, warming up {} at a time",
+        tenant_configs.len(),
+        conf.concurrent_tenant_warmup.initial_permits()
+    );
+    TENANT.startup_scheduled.inc_by(tenant_configs.len() as u64);
+
     // Construct `Tenant` objects and start them running
     for (tenant_shard_id, location_conf) in tenant_configs {
         let tenant_dir_path = conf.tenant_path(&tenant_shard_id);
@@ -542,10 +514,7 @@ pub async fn init_tenant_mgr(
             &ctx,
         ) {
             Ok(tenant) => {
-                tenants.insert(
-                    TenantShardId::unsharded(tenant.tenant_id()),
-                    TenantSlot::Attached(tenant),
-                );
+                tenants.insert(tenant_shard_id, TenantSlot::Attached(tenant));
             }
             Err(e) => {
                 error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Failed to start tenant: {e:#}");
@@ -822,14 +791,16 @@ pub(crate) async fn set_new_tenant_config(
     new_tenant_conf: TenantConfOpt,
     tenant_id: TenantId,
 ) -> Result<(), SetNewTenantConfigError> {
+    // Legacy API: does not support sharding
+    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+
     info!("configuring tenant {tenant_id}");
-    let tenant = get_tenant(tenant_id, true)?;
+    let tenant = get_tenant(tenant_shard_id, true)?;
 
     // This is a legacy API that only operates on attached tenants: the preferred
     // API to use is the location_config/ endpoint, which lets the caller provide
     // the full LocationConf.
     let location_conf = LocationConf::attached_single(new_tenant_conf, tenant.generation);
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
 
     Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf)
         .await
@@ -839,6 +810,12 @@ pub(crate) async fn set_new_tenant_config(
 }
 
 impl TenantManager {
+    /// Convenience function so that anyone with a TenantManager can get at the global configuration, without
+    /// having to pass it around everywhere as a separate object.
+    pub(crate) fn get_conf(&self) -> &'static PageServerConf {
+        self.conf
+    }
+
     /// Gets the attached tenant from the in-memory data, erroring if it's absent, in secondary mode, or is not fitting to the query.
     /// `active_only = true` allows to query only tenants that are ready for operations, erroring on other kinds of tenants.
     pub(crate) fn get_attached_tenant_shard(
@@ -874,17 +851,6 @@ impl TenantManager {
         }
     }
 
-    pub(crate) async fn delete_timeline(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        _ctx: &RequestContext,
-    ) -> Result<(), DeleteTimelineError> {
-        let tenant = self.get_attached_tenant_shard(tenant_shard_id, true)?;
-        DeleteTimelineFlow::run(&tenant, timeline_id, false).await?;
-        Ok(())
-    }
-
     #[instrument(skip_all, fields(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug()))]
     pub(crate) async fn upsert_location(
         &self,
@@ -993,35 +959,27 @@ impl TenantManager {
         }
 
         let tenant_path = self.conf.tenant_path(&tenant_shard_id);
+        let timelines_path = self.conf.timelines_path(&tenant_shard_id);
+
+        // Directory structure is the same for attached and secondary modes:
+        // create it if it doesn't exist.  Timeline load/creation expects the
+        // timelines/ subdir to already exist.
+        //
+        // Does not need to be fsync'd because local storage is just a cache.
+        tokio::fs::create_dir_all(&timelines_path)
+            .await
+            .with_context(|| format!("Creating {timelines_path}"))?;
+
+        // Before activating either secondary or attached mode, persist the
+        // configuration, so that on restart we will re-attach (or re-start
+        // secondary) on the tenant.
+        Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
+            .await
+            .map_err(SetNewTenantConfigError::Persist)?;
 
         let new_slot = match &new_location_config.mode {
-            LocationMode::Secondary(_) => {
-                // Directory doesn't need to be fsync'd because if we crash it can
-                // safely be recreated next time this tenant location is configured.
-                tokio::fs::create_dir_all(&tenant_path)
-                    .await
-                    .with_context(|| format!("Creating {tenant_path}"))?;
-
-                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
-                    .await
-                    .map_err(SetNewTenantConfigError::Persist)?;
-
-                TenantSlot::Secondary
-            }
+            LocationMode::Secondary(_) => TenantSlot::Secondary,
             LocationMode::Attached(_attach_config) => {
-                let timelines_path = self.conf.timelines_path(&tenant_shard_id);
-
-                // Directory doesn't need to be fsync'd because we do not depend on
-                // it to exist after crashes: it may be recreated when tenant is
-                // re-attached, see https://github.com/neondatabase/neon/issues/5550
-                tokio::fs::create_dir_all(&tenant_path)
-                    .await
-                    .with_context(|| format!("Creating {timelines_path}"))?;
-
-                Tenant::persist_tenant_config(self.conf, &tenant_shard_id, &new_location_config)
-                    .await
-                    .map_err(SetNewTenantConfigError::Persist)?;
-
                 let shard_identity = new_location_config.shard;
                 let tenant = tenant_spawn(
                     self.conf,
@@ -1119,6 +1077,20 @@ impl TenantManager {
 
         Ok(())
     }
+
+    pub(crate) fn get_attached_active_tenant_shards(&self) -> Vec<Arc<Tenant>> {
+        let locked = self.tenants.read().unwrap();
+        match &*locked {
+            TenantsMap::Initializing => Vec::new(),
+            TenantsMap::Open(map) | TenantsMap::ShuttingDown(map) => map
+                .values()
+                .filter_map(|slot| {
+                    slot.get_attached()
+                        .and_then(|t| if t.is_active() { Some(t.clone()) } else { None })
+                })
+                .collect(),
+        }
+    }
 }
 
 #[derive(Debug, thiserror::Error)]
@@ -1143,14 +1115,11 @@ pub(crate) enum GetTenantError {
 ///
 /// This method is cancel-safe.
 pub(crate) fn get_tenant(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     active_only: bool,
 ) -> Result<Arc<Tenant>, GetTenantError> {
     let locked = TENANTS.read().unwrap();
 
-    // TODO(sharding): make all callers of get_tenant shard-aware
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
-
     let peek_slot = tenant_map_peek_slot(&locked, &tenant_shard_id, TenantSlotPeekMode::Read)?;
 
     match peek_slot {
@@ -1162,14 +1131,18 @@ pub(crate) fn get_tenant(
             TenantState::Active => Ok(Arc::clone(tenant)),
             _ => {
                 if active_only {
-                    Err(GetTenantError::NotActive(tenant_id))
+                    Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
                 } else {
                     Ok(Arc::clone(tenant))
                 }
             }
         },
-        Some(TenantSlot::InProgress(_)) => Err(GetTenantError::NotActive(tenant_id)),
-        None | Some(TenantSlot::Secondary) => Err(GetTenantError::NotFound(tenant_id)),
+        Some(TenantSlot::InProgress(_)) => {
+            Err(GetTenantError::NotActive(tenant_shard_id.tenant_id))
+        }
+        None | Some(TenantSlot::Secondary) => {
+            Err(GetTenantError::NotFound(tenant_shard_id.tenant_id))
+        }
     }
 }
 
@@ -1232,7 +1205,10 @@ pub(crate) async fn get_active_tenant_with_timeout(
                         // Fast path: we don't need to do any async waiting.
                         return Ok(tenant.clone());
                     }
-                    _ => (WaitFor::Tenant(tenant.clone()), tenant_shard_id),
+                    _ => {
+                        tenant.activate_now();
+                        (WaitFor::Tenant(tenant.clone()), tenant_shard_id)
+                    }
                 }
             }
             Some(TenantSlot::Secondary) => {
@@ -1286,28 +1262,10 @@ pub(crate) async fn get_active_tenant_with_timeout(
     };
 
     tracing::debug!("Waiting for tenant to enter active state...");
-    match timeout_cancellable(
-        deadline.duration_since(Instant::now()),
-        cancel,
-        tenant.wait_to_become_active(),
-    )
-    .await
-    {
-        Ok(Ok(())) => Ok(tenant),
-        Ok(Err(e)) => Err(e),
-        Err(TimeoutCancellableError::Timeout) => {
-            let latest_state = tenant.current_state();
-            if latest_state == TenantState::Active {
-                Ok(tenant)
-            } else {
-                Err(GetActiveTenantError::WaitForActiveTimeout {
-                    latest_state: Some(latest_state),
-                    wait_time: timeout,
-                })
-            }
-        }
-        Err(TimeoutCancellableError::Cancelled) => Err(GetActiveTenantError::Cancelled),
-    }
+    tenant
+        .wait_to_become_active(deadline.duration_since(Instant::now()))
+        .await?;
+    Ok(tenant)
 }
 
 pub(crate) async fn delete_tenant(
@@ -1542,7 +1500,8 @@ pub(crate) enum TenantMapListError {
 ///
 /// Get list of tenants, for the mgmt API
 ///
-pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, TenantMapListError> {
+pub(crate) async fn list_tenants() -> Result<Vec<(TenantShardId, TenantState)>, TenantMapListError>
+{
     let tenants = TENANTS.read().unwrap();
     let m = match &*tenants {
         TenantsMap::Initializing => return Err(TenantMapListError::Initializing),
@@ -1550,12 +1509,10 @@ pub(crate) async fn list_tenants() -> Result<Vec<(TenantId, TenantState)>, Tenan
     };
     Ok(m.iter()
         .filter_map(|(id, tenant)| match tenant {
-            TenantSlot::Attached(tenant) => Some((id, tenant.current_state())),
+            TenantSlot::Attached(tenant) => Some((*id, tenant.current_state())),
             TenantSlot::Secondary => None,
             TenantSlot::InProgress(_) => None,
         })
-        // TODO(sharding): make callers of this function shard-aware
-        .map(|(k, v)| (k.tenant_id, v))
         .collect())
 }
 
@@ -2089,21 +2046,19 @@ use {
 };
 
 pub(crate) async fn immediate_gc(
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
     timeline_id: TimelineId,
     gc_req: TimelineGcRequest,
     cancel: CancellationToken,
     ctx: &RequestContext,
 ) -> Result<tokio::sync::oneshot::Receiver<Result<GcResult, anyhow::Error>>, ApiError> {
     let guard = TENANTS.read().unwrap();
-    let tenant = guard
-        .get(&tenant_id)
-        .map(Arc::clone)
-        .with_context(|| format!("tenant {tenant_id}"))
-        .map_err(|e| ApiError::NotFound(e.into()))?;
 
-    // TODO(sharding): make callers of this function shard-aware
-    let tenant_shard_id = TenantShardId::unsharded(tenant_id);
+    let tenant = guard
+        .get(&tenant_shard_id)
+        .map(Arc::clone)
+        .with_context(|| format!("tenant {tenant_shard_id}"))
+        .map_err(|e| ApiError::NotFound(e.into()))?;
 
     let gc_horizon = gc_req.gc_horizon.unwrap_or_else(|| tenant.get_gc_horizon());
     // Use tenant's pitr setting
@@ -2116,9 +2071,9 @@ pub(crate) async fn immediate_gc(
     task_mgr::spawn(
         &tokio::runtime::Handle::current(),
         TaskKind::GarbageCollector,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         Some(timeline_id),
-        &format!("timeline_gc_handler garbage collection run for tenant {tenant_id} timeline {timeline_id}"),
+        &format!("timeline_gc_handler garbage collection run for tenant {tenant_shard_id} timeline {timeline_id}"),
         false,
         async move {
             fail::fail_point!("immediate_gc_task_pre");
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 03600cf5ae..52ee8f49ce 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -180,7 +180,7 @@
 //! [`Tenant::timeline_init_and_sync`]: super::Tenant::timeline_init_and_sync
 //! [`Timeline::load_layer_map`]: super::Timeline::load_layer_map
 
-mod download;
+pub(crate) mod download;
 pub mod index;
 mod upload;
 
@@ -196,10 +196,12 @@ pub(crate) use upload::upload_initdb_dir;
 use utils::backoff::{
     self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS,
 };
+use utils::timeout::{timeout_cancellable, TimeoutCancellableError};
 
 use std::collections::{HashMap, VecDeque};
 use std::sync::atomic::{AtomicU32, Ordering};
 use std::sync::{Arc, Mutex};
+use std::time::Duration;
 
 use remote_storage::{DownloadError, GenericRemoteStorage, RemotePath};
 use std::ops::DerefMut;
@@ -316,6 +318,47 @@ pub struct RemoteTimelineClient {
     storage_impl: GenericRemoteStorage,
 
     deletion_queue_client: DeletionQueueClient,
+
+    cancel: CancellationToken,
+}
+
+/// This timeout is intended to deal with hangs in lower layers, e.g. stuck TCP flows.  It is not
+/// intended to be snappy enough for prompt shutdown, as we have a CancellationToken for that.
+const UPLOAD_TIMEOUT: Duration = Duration::from_secs(120);
+const DOWNLOAD_TIMEOUT: Duration = Duration::from_secs(120);
+
+/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to anyhow.
+///
+/// This is a convenience for the various upload functions.  In future
+/// the anyhow::Error result should be replaced with a more structured type that
+/// enables callers to avoid handling shutdown as an error.
+async fn upload_cancellable<F>(cancel: &CancellationToken, future: F) -> anyhow::Result<()>
+where
+    F: std::future::Future<Output = anyhow::Result<()>>,
+{
+    match timeout_cancellable(UPLOAD_TIMEOUT, cancel, future).await {
+        Ok(Ok(())) => Ok(()),
+        Ok(Err(e)) => Err(e),
+        Err(TimeoutCancellableError::Timeout) => Err(anyhow::anyhow!("Timeout")),
+        Err(TimeoutCancellableError::Cancelled) => Err(anyhow::anyhow!("Shutting down")),
+    }
+}
+/// Wrapper for timeout_cancellable that flattens result and converts TimeoutCancellableError to DownloaDError.
+async fn download_cancellable<F, R>(
+    cancel: &CancellationToken,
+    future: F,
+) -> Result<R, DownloadError>
+where
+    F: std::future::Future<Output = Result<R, DownloadError>>,
+{
+    match timeout_cancellable(DOWNLOAD_TIMEOUT, cancel, future).await {
+        Ok(Ok(r)) => Ok(r),
+        Ok(Err(e)) => Err(e),
+        Err(TimeoutCancellableError::Timeout) => {
+            Err(DownloadError::Other(anyhow::anyhow!("Timed out")))
+        }
+        Err(TimeoutCancellableError::Cancelled) => Err(DownloadError::Cancelled),
+    }
 }
 
 impl RemoteTimelineClient {
@@ -351,6 +394,7 @@ impl RemoteTimelineClient {
                 &tenant_shard_id,
                 &timeline_id,
             )),
+            cancel: CancellationToken::new(),
         }
     }
 
@@ -501,6 +545,7 @@ impl RemoteTimelineClient {
         &self,
         layer_file_name: &LayerFileName,
         layer_metadata: &LayerFileMetadata,
+        cancel: &CancellationToken,
     ) -> anyhow::Result<u64> {
         let downloaded_size = {
             let _unfinished_gauge_guard = self.metrics.call_begin(
@@ -517,6 +562,7 @@ impl RemoteTimelineClient {
                 self.timeline_id,
                 layer_file_name,
                 layer_metadata,
+                cancel,
             )
             .measure_remote_op(
                 self.tenant_shard_id.tenant_id,
@@ -971,6 +1017,7 @@ impl RemoteTimelineClient {
                     &self.timeline_id,
                     self.generation,
                     &index_part_with_deleted_at,
+                    &self.cancel,
                 )
             },
             |_e| false,
@@ -980,8 +1027,7 @@ impl RemoteTimelineClient {
             // when executed as part of tenant deletion this happens in the background
             2,
             "persist_index_part_with_deleted_flag",
-            // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-            backoff::Cancel::new(CancellationToken::new(), || unreachable!()),
+            backoff::Cancel::new(self.cancel.clone(), || anyhow::anyhow!("Cancelled")),
         )
         .await?;
 
@@ -1223,7 +1269,7 @@ impl RemoteTimelineClient {
             task_mgr::spawn(
                 &self.runtime,
                 TaskKind::RemoteUploadTask,
-                Some(self.tenant_shard_id.tenant_id),
+                Some(self.tenant_shard_id),
                 Some(self.timeline_id),
                 "remote upload",
                 false,
@@ -1281,6 +1327,7 @@ impl RemoteTimelineClient {
                         path,
                         layer_metadata,
                         self.generation,
+                        &self.cancel,
                     )
                     .measure_remote_op(
                         self.tenant_shard_id.tenant_id,
@@ -1307,6 +1354,7 @@ impl RemoteTimelineClient {
                         &self.timeline_id,
                         self.generation,
                         index_part,
+                        &self.cancel,
                     )
                     .measure_remote_op(
                         self.tenant_shard_id.tenant_id,
@@ -1604,6 +1652,23 @@ impl RemoteTimelineClient {
             }
         }
     }
+
+    pub(crate) fn get_layers_metadata(
+        &self,
+        layers: Vec<LayerFileName>,
+    ) -> anyhow::Result<Vec<Option<LayerFileMetadata>>> {
+        let q = self.upload_queue.lock().unwrap();
+        let q = match &*q {
+            UploadQueue::Stopped(_) | UploadQueue::Uninitialized => {
+                anyhow::bail!("queue is in state {}", q.as_str())
+            }
+            UploadQueue::Initialized(inner) => inner,
+        };
+
+        let decorated = layers.into_iter().map(|l| q.latest_files.get(&l).cloned());
+
+        Ok(decorated.collect())
+    }
 }
 
 pub fn remote_timelines_path(tenant_shard_id: &TenantShardId) -> RemotePath {
@@ -1659,6 +1724,13 @@ pub fn remote_index_path(
     .expect("Failed to construct path")
 }
 
+pub const HEATMAP_BASENAME: &str = "heatmap-v1.json";
+
+pub(crate) fn remote_heatmap_path(tenant_shard_id: &TenantShardId) -> RemotePath {
+    RemotePath::from_string(&format!("tenants/{tenant_shard_id}/{HEATMAP_BASENAME}"))
+        .expect("Failed to construct path")
+}
+
 /// Given the key of an index, parse out the generation part of the name
 pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
     let file_name = match path.get_path().file_name() {
@@ -1804,6 +1876,7 @@ mod tests {
                     &self.harness.tenant_shard_id,
                     &TIMELINE_ID,
                 )),
+                cancel: CancellationToken::new(),
             })
         }
 
diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs
index ed32c4eed9..d3956163c8 100644
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -5,7 +5,6 @@
 
 use std::collections::HashSet;
 use std::future::Future;
-use std::time::Duration;
 
 use anyhow::{anyhow, Context};
 use camino::{Utf8Path, Utf8PathBuf};
@@ -14,13 +13,17 @@ use tokio::fs::{self, File, OpenOptions};
 use tokio::io::{AsyncSeekExt, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
 use tracing::warn;
+use utils::timeout::timeout_cancellable;
 use utils::{backoff, crashsafe};
 
 use crate::config::PageServerConf;
-use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
+use crate::tenant::remote_timeline_client::{
+    download_cancellable, remote_layer_path, remote_timelines_path, DOWNLOAD_TIMEOUT,
+};
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::timeline::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::Generation;
+use crate::virtual_file::on_fatal_io_error;
 use crate::TEMP_FILE_SUFFIX;
 use remote_storage::{DownloadError, GenericRemoteStorage, ListingMode};
 use utils::crashsafe::path_with_suffix_extension;
@@ -32,8 +35,6 @@ use super::{
     FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH,
 };
 
-static MAX_DOWNLOAD_DURATION: Duration = Duration::from_secs(120);
-
 ///
 /// If 'metadata' is given, we will validate that the downloaded file's size matches that
 /// in the metadata. (In the future, we might do more cross-checks, like CRC validation)
@@ -46,6 +47,7 @@ pub async fn download_layer_file<'a>(
     timeline_id: TimelineId,
     layer_file_name: &'a LayerFileName,
     layer_metadata: &'a LayerFileMetadata,
+    cancel: &CancellationToken,
 ) -> Result<u64, DownloadError> {
     debug_assert_current_span_has_tenant_and_timeline_id();
 
@@ -73,14 +75,18 @@ pub async fn download_layer_file<'a>(
     // If pageserver crashes the temp file will be deleted on startup and re-downloaded.
     let temp_file_path = path_with_suffix_extension(&local_path, TEMP_DOWNLOAD_EXTENSION);
 
+    let cancel_inner = cancel.clone();
     let (mut destination_file, bytes_amount) = download_retry(
         || async {
             let destination_file = tokio::fs::File::create(&temp_file_path)
                 .await
                 .with_context(|| format!("create a destination file for layer '{temp_file_path}'"))
                 .map_err(DownloadError::Other)?;
-            let download = storage
-                .download(&remote_path)
+
+            // Cancellation safety: it is safe to cancel this future, because it isn't writing to a local
+            // file: the write to local file doesn't start until after the request header is returned
+            // and we start draining the body stream below
+            let download = download_cancellable(&cancel_inner, storage.download(&remote_path))
                 .await
                 .with_context(|| {
                     format!(
@@ -94,12 +100,33 @@ pub async fn download_layer_file<'a>(
 
             let mut reader = tokio_util::io::StreamReader::new(download.download_stream);
 
-            let bytes_amount = tokio::time::timeout(
-                MAX_DOWNLOAD_DURATION,
+            // Cancellation safety: it is safe to cancel this future because it is writing into a temporary file,
+            // and we will unlink the temporary file if there is an error.  This unlink is important because we
+            // are in a retry loop, and we wouldn't want to leave behind a rogue write I/O to a file that
+            // we will imminiently try and write to again.
+            let bytes_amount: u64 = match timeout_cancellable(
+                DOWNLOAD_TIMEOUT,
+                &cancel_inner,
                 tokio::io::copy_buf(&mut reader, &mut destination_file),
             )
             .await
-            .map_err(|e| DownloadError::Other(anyhow::anyhow!("Timed out  {:?}", e)))?
+            .with_context(|| {
+                format!(
+                    "download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
+                )
+            })
+            .map_err(DownloadError::Other)?
+            {
+                Ok(b) => Ok(b),
+                Err(e) => {
+                    // Remove incomplete files: on restart Timeline would do this anyway, but we must
+                    // do it here for the retry case.
+                    if let Err(e) = tokio::fs::remove_file(&temp_file_path).await {
+                        on_fatal_io_error(&e, &format!("Removing temporary file {temp_file_path}"));
+                    }
+                    Err(e)
+                }
+            }
             .with_context(|| {
                 format!(
                     "download layer at remote path '{remote_path:?}' into file {temp_file_path:?}"
@@ -112,6 +139,7 @@ pub async fn download_layer_file<'a>(
             Ok((destination_file, bytes_amount))
         },
         &format!("download {remote_path:?}"),
+        cancel,
     )
     .await?;
 
@@ -188,8 +216,14 @@ pub async fn list_remote_timelines(
         anyhow::bail!("storage-sync-list-remote-timelines");
     });
 
+    let cancel_inner = cancel.clone();
     let listing = download_retry_forever(
-        || storage.list(Some(&remote_path), ListingMode::WithDelimiter),
+        || {
+            download_cancellable(
+                &cancel_inner,
+                storage.list(Some(&remote_path), ListingMode::WithDelimiter),
+            )
+        },
         &format!("list timelines for {tenant_shard_id}"),
         cancel,
     )
@@ -230,9 +264,13 @@ async fn do_download_index_part(
 
     let remote_path = remote_index_path(tenant_shard_id, timeline_id, index_generation);
 
+    let cancel_inner = cancel.clone();
     let index_part_bytes = download_retry_forever(
         || async {
-            let index_part_download = storage.download(&remote_path).await?;
+            // Cancellation: if is safe to cancel this future because we're just downloading into
+            // a memory buffer, not touching local disk.
+            let index_part_download =
+                download_cancellable(&cancel_inner, storage.download(&remote_path)).await?;
 
             let mut index_part_bytes = Vec::new();
             let mut stream = std::pin::pin!(index_part_download.download_stream);
@@ -347,10 +385,7 @@ pub(super) async fn download_index_part(
         FAILED_DOWNLOAD_WARN_THRESHOLD,
         FAILED_REMOTE_OP_RETRIES,
         "listing index_part files",
-        // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-        backoff::Cancel::new(CancellationToken::new(), || -> anyhow::Error {
-            unreachable!()
-        }),
+        backoff::Cancel::new(cancel.clone(), || anyhow::anyhow!("Cancelled")),
     )
     .await
     .map_err(DownloadError::Other)?;
@@ -389,6 +424,7 @@ pub(crate) async fn download_initdb_tar_zst(
     storage: &GenericRemoteStorage,
     tenant_shard_id: &TenantShardId,
     timeline_id: &TimelineId,
+    cancel: &CancellationToken,
 ) -> Result<(Utf8PathBuf, File), DownloadError> {
     debug_assert_current_span_has_tenant_and_timeline_id();
 
@@ -406,6 +442,8 @@ pub(crate) async fn download_initdb_tar_zst(
         "{INITDB_PATH}.download-{timeline_id}.{TEMP_FILE_SUFFIX}"
     ));
 
+    let cancel_inner = cancel.clone();
+
     let file = download_retry(
         || async {
             let file = OpenOptions::new()
@@ -418,10 +456,14 @@ pub(crate) async fn download_initdb_tar_zst(
                 .with_context(|| format!("tempfile creation {temp_path}"))
                 .map_err(DownloadError::Other)?;
 
-            let download = storage.download(&remote_path).await?;
+            let download =
+                download_cancellable(&cancel_inner, storage.download(&remote_path)).await?;
             let mut download = tokio_util::io::StreamReader::new(download.download_stream);
             let mut writer = tokio::io::BufWriter::with_capacity(8 * 1024, file);
 
+            // TODO: this consumption of the response body should be subject to timeout + cancellation, but
+            // not without thinking carefully about how to recover safely from cancelling a write to
+            // local storage (e.g. by writing into a temp file as we do in download_layer)
             tokio::io::copy_buf(&mut download, &mut writer)
                 .await
                 .with_context(|| format!("download initdb.tar.zst at {remote_path:?}"))
@@ -437,6 +479,7 @@ pub(crate) async fn download_initdb_tar_zst(
             Ok(file)
         },
         &format!("download {remote_path}"),
+        cancel,
     )
     .await
     .map_err(|e| {
@@ -460,7 +503,11 @@ pub(crate) async fn download_initdb_tar_zst(
 /// with backoff.
 ///
 /// (See similar logic for uploads in `perform_upload_task`)
-async fn download_retry<T, O, F>(op: O, description: &str) -> Result<T, DownloadError>
+async fn download_retry<T, O, F>(
+    op: O,
+    description: &str,
+    cancel: &CancellationToken,
+) -> Result<T, DownloadError>
 where
     O: FnMut() -> F,
     F: Future<Output = Result<T, DownloadError>>,
@@ -471,10 +518,7 @@ where
         FAILED_DOWNLOAD_WARN_THRESHOLD,
         FAILED_REMOTE_OP_RETRIES,
         description,
-        // TODO: use a cancellation token (https://github.com/neondatabase/neon/issues/5066)
-        backoff::Cancel::new(CancellationToken::new(), || -> DownloadError {
-            unreachable!()
-        }),
+        backoff::Cancel::new(cancel.clone(), || DownloadError::Cancelled),
     )
     .await
 }
diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs
index d0744e7c83..11c6956875 100644
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -4,14 +4,17 @@ use anyhow::{bail, Context};
 use camino::Utf8Path;
 use fail::fail_point;
 use pageserver_api::shard::TenantShardId;
-use std::io::ErrorKind;
+use std::io::{ErrorKind, SeekFrom};
 use tokio::fs::{self, File};
+use tokio::io::AsyncSeekExt;
+use tokio_util::sync::CancellationToken;
 
 use super::Generation;
 use crate::{
     config::PageServerConf,
     tenant::remote_timeline_client::{
         index::IndexPart, remote_index_path, remote_initdb_archive_path, remote_path,
+        upload_cancellable,
     },
 };
 use remote_storage::GenericRemoteStorage;
@@ -28,6 +31,7 @@ pub(super) async fn upload_index_part<'a>(
     timeline_id: &TimelineId,
     generation: Generation,
     index_part: &'a IndexPart,
+    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
     tracing::trace!("uploading new index part");
 
@@ -43,14 +47,16 @@ pub(super) async fn upload_index_part<'a>(
     let index_part_bytes = bytes::Bytes::from(index_part_bytes);
 
     let remote_path = remote_index_path(tenant_shard_id, timeline_id, generation);
-    storage
-        .upload_storage_object(
+    upload_cancellable(
+        cancel,
+        storage.upload_storage_object(
             futures::stream::once(futures::future::ready(Ok(index_part_bytes))),
             index_part_size,
             &remote_path,
-        )
-        .await
-        .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
+        ),
+    )
+    .await
+    .with_context(|| format!("upload index part for '{tenant_shard_id} / {timeline_id}'"))
 }
 
 /// Attempts to upload given layer files.
@@ -63,6 +69,7 @@ pub(super) async fn upload_timeline_layer<'a>(
     source_path: &'a Utf8Path,
     known_metadata: &'a LayerFileMetadata,
     generation: Generation,
+    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
     fail_point!("before-upload-layer", |_| {
         bail!("failpoint before-upload-layer")
@@ -106,8 +113,7 @@ pub(super) async fn upload_timeline_layer<'a>(
 
     let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);
 
-    storage
-        .upload(reader, fs_size, &storage_path, None)
+    upload_cancellable(cancel, storage.upload(reader, fs_size, &storage_path, None))
         .await
         .with_context(|| format!("upload layer from local path '{source_path}'"))?;
 
@@ -119,16 +125,22 @@ pub(crate) async fn upload_initdb_dir(
     storage: &GenericRemoteStorage,
     tenant_id: &TenantId,
     timeline_id: &TimelineId,
-    initdb_tar_zst: File,
+    mut initdb_tar_zst: File,
     size: u64,
+    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
     tracing::trace!("uploading initdb dir");
 
+    // We might have read somewhat into the file already in the prior retry attempt
+    initdb_tar_zst.seek(SeekFrom::Start(0)).await?;
+
     let file = tokio_util::io::ReaderStream::with_capacity(initdb_tar_zst, super::BUFFER_SIZE);
 
     let remote_path = remote_initdb_archive_path(tenant_id, timeline_id);
-    storage
-        .upload_storage_object(file, size as usize, &remote_path)
-        .await
-        .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
+    upload_cancellable(
+        cancel,
+        storage.upload_storage_object(file, size as usize, &remote_path),
+    )
+    .await
+    .with_context(|| format!("upload initdb dir for '{tenant_id} / {timeline_id}'"))
 }
diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs
new file mode 100644
index 0000000000..d25fe56b92
--- /dev/null
+++ b/pageserver/src/tenant/secondary.rs
@@ -0,0 +1,104 @@
+pub mod heatmap;
+mod heatmap_uploader;
+
+use std::sync::Arc;
+
+use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME};
+
+use self::heatmap_uploader::heatmap_uploader_task;
+
+use super::mgr::TenantManager;
+
+use pageserver_api::shard::TenantShardId;
+use remote_storage::GenericRemoteStorage;
+
+use tokio_util::sync::CancellationToken;
+use utils::completion::Barrier;
+
+enum UploadCommand {
+    Upload(TenantShardId),
+}
+
+struct CommandRequest<T> {
+    payload: T,
+    response_tx: tokio::sync::oneshot::Sender<CommandResponse>,
+}
+
+struct CommandResponse {
+    result: anyhow::Result<()>,
+}
+
+/// The SecondaryController is a pseudo-rpc client for administrative control of secondary mode downloads,
+/// and heatmap uploads.  This is not a hot data path: it's primarily a hook for tests,
+/// where we want to immediately upload/download for a particular tenant.  In normal operation
+/// uploads & downloads are autonomous and not driven by this interface.
+pub struct SecondaryController {
+    upload_req_tx: tokio::sync::mpsc::Sender<CommandRequest<UploadCommand>>,
+}
+
+impl SecondaryController {
+    async fn dispatch<T>(
+        &self,
+        queue: &tokio::sync::mpsc::Sender<CommandRequest<T>>,
+        payload: T,
+    ) -> anyhow::Result<()> {
+        let (response_tx, response_rx) = tokio::sync::oneshot::channel();
+
+        queue
+            .send(CommandRequest {
+                payload,
+                response_tx,
+            })
+            .await
+            .map_err(|_| anyhow::anyhow!("Receiver shut down"))?;
+
+        let response = response_rx
+            .await
+            .map_err(|_| anyhow::anyhow!("Request dropped"))?;
+
+        response.result
+    }
+
+    pub async fn upload_tenant(&self, tenant_shard_id: TenantShardId) -> anyhow::Result<()> {
+        self.dispatch(&self.upload_req_tx, UploadCommand::Upload(tenant_shard_id))
+            .await
+    }
+}
+
+pub fn spawn_tasks(
+    tenant_manager: Arc<TenantManager>,
+    remote_storage: GenericRemoteStorage,
+    background_jobs_can_start: Barrier,
+    cancel: CancellationToken,
+) -> SecondaryController {
+    let (upload_req_tx, upload_req_rx) =
+        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);
+
+    task_mgr::spawn(
+        BACKGROUND_RUNTIME.handle(),
+        TaskKind::SecondaryUploads,
+        None,
+        None,
+        "heatmap uploads",
+        false,
+        async move {
+            heatmap_uploader_task(
+                tenant_manager,
+                remote_storage,
+                upload_req_rx,
+                background_jobs_can_start,
+                cancel,
+            )
+            .await
+        },
+    );
+
+    SecondaryController { upload_req_tx }
+}
+
+/// For running with remote storage disabled: a SecondaryController that is connected to nothing.
+pub fn null_controller() -> SecondaryController {
+    let (upload_req_tx, _upload_req_rx) =
+        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);
+    SecondaryController { upload_req_tx }
+}
diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs
new file mode 100644
index 0000000000..99aaaeb8c8
--- /dev/null
+++ b/pageserver/src/tenant/secondary/heatmap.rs
@@ -0,0 +1,64 @@
+use std::time::SystemTime;
+
+use crate::tenant::{
+    remote_timeline_client::index::IndexLayerMetadata, storage_layer::LayerFileName,
+};
+
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, DisplayFromStr, TimestampSeconds};
+
+use utils::{generation::Generation, id::TimelineId};
+
+#[derive(Serialize, Deserialize)]
+pub(super) struct HeatMapTenant {
+    /// Generation of the attached location that uploaded the heatmap: this is not required
+    /// for correctness, but acts as a hint to secondary locations in order to detect thrashing
+    /// in the unlikely event that two attached locations are both uploading conflicting heatmaps.
+    pub(super) generation: Generation,
+
+    pub(super) timelines: Vec<HeatMapTimeline>,
+}
+
+#[serde_as]
+#[derive(Serialize, Deserialize)]
+pub(crate) struct HeatMapTimeline {
+    #[serde_as(as = "DisplayFromStr")]
+    pub(super) timeline_id: TimelineId,
+
+    pub(super) layers: Vec<HeatMapLayer>,
+}
+
+#[serde_as]
+#[derive(Serialize, Deserialize)]
+pub(crate) struct HeatMapLayer {
+    pub(super) name: LayerFileName,
+    pub(super) metadata: IndexLayerMetadata,
+
+    #[serde_as(as = "TimestampSeconds<i64>")]
+    pub(super) access_time: SystemTime,
+    // TODO: an actual 'heat' score that would let secondary locations prioritize downloading
+    // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary.
+}
+
+impl HeatMapLayer {
+    pub(crate) fn new(
+        name: LayerFileName,
+        metadata: IndexLayerMetadata,
+        access_time: SystemTime,
+    ) -> Self {
+        Self {
+            name,
+            metadata,
+            access_time,
+        }
+    }
+}
+
+impl HeatMapTimeline {
+    pub(crate) fn new(timeline_id: TimelineId, layers: Vec<HeatMapLayer>) -> Self {
+        Self {
+            timeline_id,
+            layers,
+        }
+    }
+}
diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs
new file mode 100644
index 0000000000..ece2b93ce1
--- /dev/null
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -0,0 +1,582 @@
+use std::{
+    collections::HashMap,
+    sync::{Arc, Weak},
+    time::{Duration, Instant},
+};
+
+use crate::{
+    metrics::SECONDARY_MODE,
+    tenant::{
+        config::AttachmentMode, mgr::TenantManager, remote_timeline_client::remote_heatmap_path,
+        secondary::CommandResponse, span::debug_assert_current_span_has_tenant_id, Tenant,
+    },
+};
+
+use md5;
+use pageserver_api::shard::TenantShardId;
+use remote_storage::GenericRemoteStorage;
+
+use tokio::task::JoinSet;
+use tokio_util::sync::CancellationToken;
+use tracing::instrument;
+use utils::{backoff, completion::Barrier};
+
+use super::{heatmap::HeatMapTenant, CommandRequest, UploadCommand};
+
+/// Period between heatmap uploader walking Tenants to look for work to do.
+/// If any tenants have a heatmap upload period lower than this, it will be adjusted
+/// downward to match.
+const DEFAULT_SCHEDULING_INTERVAL: Duration = Duration::from_millis(60000);
+const MIN_SCHEDULING_INTERVAL: Duration = Duration::from_millis(1000);
+
+struct WriteInProgress {
+    barrier: Barrier,
+}
+
+struct UploadPending {
+    tenant: Arc<Tenant>,
+    last_digest: Option<md5::Digest>,
+}
+
+struct WriteComplete {
+    tenant_shard_id: TenantShardId,
+    completed_at: Instant,
+    digest: Option<md5::Digest>,
+    next_upload: Option<Instant>,
+}
+
+/// The heatmap uploader keeps a little bit of per-tenant state, mainly to remember
+/// when we last did a write.  We only populate this after doing at least one
+/// write for a tenant -- this avoids holding state for tenants that have
+/// uploads disabled.
+
+struct UploaderTenantState {
+    // This Weak only exists to enable culling idle instances of this type
+    // when the Tenant has been deallocated.
+    tenant: Weak<Tenant>,
+
+    /// Digest of the serialized heatmap that we last successfully uploaded
+    ///
+    /// md5 is generally a bad hash.  We use it because it's convenient for interop with AWS S3's ETag,
+    /// which is also an md5sum.
+    last_digest: Option<md5::Digest>,
+
+    /// When the last upload attempt completed (may have been successful or failed)
+    last_upload: Option<Instant>,
+
+    /// When should we next do an upload?  None means never.
+    next_upload: Option<Instant>,
+}
+
+/// This type is owned by a single task ([`heatmap_uploader_task`]) which runs an event
+/// handling loop and mutates it as needed: there are no locks here, because that event loop
+/// can hold &mut references to this type throughout.
+struct HeatmapUploader {
+    tenant_manager: Arc<TenantManager>,
+    remote_storage: GenericRemoteStorage,
+    cancel: CancellationToken,
+
+    tenants: HashMap<TenantShardId, UploaderTenantState>,
+
+    /// Tenants with work to do, for which tasks should be spawned as soon as concurrency
+    /// limits permit it.
+    tenants_pending: std::collections::VecDeque<UploadPending>,
+
+    /// Tenants for which a task in `tasks` has been spawned.
+    tenants_uploading: HashMap<TenantShardId, WriteInProgress>,
+
+    tasks: JoinSet<()>,
+
+    /// Channel for our child tasks to send results to: we use a channel for results rather than
+    /// just getting task results via JoinSet because we need the channel's recv() "sleep until something
+    /// is available" semantic, rather than JoinSet::join_next()'s "sleep until next thing is available _or_ I'm empty"
+    /// behavior.
+    task_result_tx: tokio::sync::mpsc::UnboundedSender<WriteComplete>,
+    task_result_rx: tokio::sync::mpsc::UnboundedReceiver<WriteComplete>,
+
+    concurrent_uploads: usize,
+
+    scheduling_interval: Duration,
+}
+
+/// The uploader task runs a loop that periodically wakes up and schedules tasks for
+/// tenants that require an upload, or handles any commands that have been sent into
+/// `command_queue`.  No I/O is done in this loop: that all happens in the tasks we
+/// spawn.
+///
+/// Scheduling iterations are somewhat infrequent.  However, each one will enqueue
+/// all tenants that require an upload, and in between scheduling iterations we will
+/// continue to spawn new tasks for pending tenants, as our concurrency limit permits.
+///
+/// While we take a CancellationToken here, it is subordinate to the CancellationTokens
+/// of tenants: i.e. we expect all Tenants to have been shut down before we are shut down, otherwise
+/// we might block waiting on a Tenant.
+pub(super) async fn heatmap_uploader_task(
+    tenant_manager: Arc<TenantManager>,
+    remote_storage: GenericRemoteStorage,
+    mut command_queue: tokio::sync::mpsc::Receiver<CommandRequest<UploadCommand>>,
+    background_jobs_can_start: Barrier,
+    cancel: CancellationToken,
+) -> anyhow::Result<()> {
+    let concurrent_uploads = tenant_manager.get_conf().heatmap_upload_concurrency;
+
+    let (result_tx, result_rx) = tokio::sync::mpsc::unbounded_channel();
+
+    let mut uploader = HeatmapUploader {
+        tenant_manager,
+        remote_storage,
+        cancel: cancel.clone(),
+        tasks: JoinSet::new(),
+        tenants: HashMap::new(),
+        tenants_pending: std::collections::VecDeque::new(),
+        tenants_uploading: HashMap::new(),
+        task_result_tx: result_tx,
+        task_result_rx: result_rx,
+        concurrent_uploads,
+        scheduling_interval: DEFAULT_SCHEDULING_INTERVAL,
+    };
+
+    tracing::info!("Waiting for background_jobs_can start...");
+    background_jobs_can_start.wait().await;
+    tracing::info!("background_jobs_can is ready, proceeding.");
+
+    while !cancel.is_cancelled() {
+        // Look for new work: this is relatively expensive because we have to go acquire the lock on
+        // the tenant manager to retrieve tenants, and then iterate over them to figure out which ones
+        // require an upload.
+        uploader.schedule_iteration().await?;
+
+        // Between scheduling iterations, we will:
+        //  - Drain any complete tasks and spawn pending tasks
+        //  - Handle incoming administrative commands
+        //  - Check our cancellation token
+        let next_scheduling_iteration = Instant::now()
+            .checked_add(uploader.scheduling_interval)
+            .unwrap_or_else(|| {
+                tracing::warn!(
+                    "Scheduling interval invalid ({}s), running immediately!",
+                    uploader.scheduling_interval.as_secs_f64()
+                );
+                Instant::now()
+            });
+        loop {
+            tokio::select! {
+                _ = cancel.cancelled() => {
+                    // We do not simply drop the JoinSet, in order to have an orderly shutdown without cancellation.
+                    tracing::info!("Heatmap uploader joining tasks");
+                    while let Some(_r) = uploader.tasks.join_next().await {};
+                    tracing::info!("Heatmap uploader terminating");
+
+                    break;
+                },
+                _ = tokio::time::sleep(next_scheduling_iteration.duration_since(Instant::now())) => {
+                    tracing::debug!("heatmap_uploader_task: woke for scheduling interval");
+                    break;},
+                cmd = command_queue.recv() => {
+                    tracing::debug!("heatmap_uploader_task: woke for command queue");
+                    let cmd = match cmd {
+                        Some(c) =>c,
+                        None => {
+                            // SecondaryController was destroyed, and this has raced with
+                            // our CancellationToken
+                            tracing::info!("Heatmap uploader terminating");
+                            cancel.cancel();
+                            break;
+                        }
+                    };
+
+                    let CommandRequest{
+                        response_tx,
+                        payload
+                    } = cmd;
+                    uploader.handle_command(payload, response_tx);
+                },
+                _ = uploader.process_next_completion() => {
+                    if !cancel.is_cancelled() {
+                        uploader.spawn_pending();
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+impl HeatmapUploader {
+    /// Periodic execution phase: inspect all attached tenants and schedule any work they require.
+    async fn schedule_iteration(&mut self) -> anyhow::Result<()> {
+        // Cull any entries in self.tenants whose Arc<Tenant> is gone
+        self.tenants
+            .retain(|_k, v| v.tenant.upgrade().is_some() && v.next_upload.is_some());
+
+        // The priority order of previously scheduled work may be invalidated by current state: drop
+        // all pending work (it will be re-scheduled if still needed)
+        self.tenants_pending.clear();
+
+        // Used a fixed 'now' through the following loop, for efficiency and fairness.
+        let now = Instant::now();
+
+        // While iterating over the potentially-long list of tenants, we will periodically yield
+        // to avoid blocking executor.
+        const YIELD_ITERATIONS: usize = 1000;
+
+        // Iterate over tenants looking for work to do.
+        let tenants = self.tenant_manager.get_attached_active_tenant_shards();
+        for (i, tenant) in tenants.into_iter().enumerate() {
+            // Process is shutting down, drop out
+            if self.cancel.is_cancelled() {
+                return Ok(());
+            }
+
+            // Skip tenants that already have a write in flight
+            if self
+                .tenants_uploading
+                .contains_key(tenant.get_tenant_shard_id())
+            {
+                continue;
+            }
+
+            self.maybe_schedule_upload(&now, tenant);
+
+            if i + 1 % YIELD_ITERATIONS == 0 {
+                tokio::task::yield_now().await;
+            }
+        }
+
+        // Spawn tasks for as many of our pending tenants as we can.
+        self.spawn_pending();
+
+        Ok(())
+    }
+
+    ///
+    /// Cancellation: this method is cancel-safe.
+    async fn process_next_completion(&mut self) {
+        match self.task_result_rx.recv().await {
+            Some(r) => {
+                self.on_completion(r);
+            }
+            None => {
+                unreachable!("Result sender is stored on Self");
+            }
+        }
+    }
+
+    /// The 'maybe' refers to the tenant's state: whether it is configured
+    /// for heatmap uploads at all, and whether sufficient time has passed
+    /// since the last upload.
+    fn maybe_schedule_upload(&mut self, now: &Instant, tenant: Arc<Tenant>) {
+        match tenant.get_heatmap_period() {
+            None => {
+                // Heatmaps are disabled for this tenant
+                return;
+            }
+            Some(period) => {
+                // If any tenant has asked for uploads more frequent than our scheduling interval,
+                // reduce it to match so that we can keep up.  This is mainly useful in testing, where
+                // we may set rather short intervals.
+                if period < self.scheduling_interval {
+                    self.scheduling_interval = std::cmp::max(period, MIN_SCHEDULING_INTERVAL);
+                }
+            }
+        }
+
+        // Stale attachments do not upload anything: if we are in this state, there is probably some
+        // other attachment in mode Single or Multi running on another pageserver, and we don't
+        // want to thrash and overwrite their heatmap uploads.
+        if tenant.get_attach_mode() == AttachmentMode::Stale {
+            return;
+        }
+
+        // Create an entry in self.tenants if one doesn't already exist: this will later be updated
+        // with the completion time in on_completion.
+        let state = self
+            .tenants
+            .entry(*tenant.get_tenant_shard_id())
+            .or_insert_with(|| UploaderTenantState {
+                tenant: Arc::downgrade(&tenant),
+                last_upload: None,
+                next_upload: Some(Instant::now()),
+                last_digest: None,
+            });
+
+        // Decline to do the upload if insufficient time has passed
+        if state.next_upload.map(|nu| &nu > now).unwrap_or(false) {
+            return;
+        }
+
+        let last_digest = state.last_digest;
+        self.tenants_pending.push_back(UploadPending {
+            tenant,
+            last_digest,
+        })
+    }
+
+    fn spawn_pending(&mut self) {
+        while !self.tenants_pending.is_empty()
+            && self.tenants_uploading.len() < self.concurrent_uploads
+        {
+            // unwrap: loop condition includes !is_empty()
+            let pending = self.tenants_pending.pop_front().unwrap();
+            self.spawn_upload(pending.tenant, pending.last_digest);
+        }
+    }
+
+    fn spawn_upload(&mut self, tenant: Arc<Tenant>, last_digest: Option<md5::Digest>) {
+        let remote_storage = self.remote_storage.clone();
+        let tenant_shard_id = *tenant.get_tenant_shard_id();
+        let (completion, barrier) = utils::completion::channel();
+        let result_tx = self.task_result_tx.clone();
+        self.tasks.spawn(async move {
+            // Guard for the barrier in [`WriteInProgress`]
+            let _completion = completion;
+
+            let started_at = Instant::now();
+            let digest = match upload_tenant_heatmap(remote_storage, &tenant, last_digest).await {
+                Ok(UploadHeatmapOutcome::Uploaded(digest)) => {
+                    let duration = Instant::now().duration_since(started_at);
+                    SECONDARY_MODE
+                        .upload_heatmap_duration
+                        .observe(duration.as_secs_f64());
+                    SECONDARY_MODE.upload_heatmap.inc();
+                    Some(digest)
+                }
+                Ok(UploadHeatmapOutcome::NoChange | UploadHeatmapOutcome::Skipped) => last_digest,
+                Err(UploadHeatmapError::Upload(e)) => {
+                    tracing::warn!(
+                        "Failed to upload heatmap for tenant {}: {e:#}",
+                        tenant.get_tenant_shard_id(),
+                    );
+                    let duration = Instant::now().duration_since(started_at);
+                    SECONDARY_MODE
+                        .upload_heatmap_duration
+                        .observe(duration.as_secs_f64());
+                    SECONDARY_MODE.upload_heatmap_errors.inc();
+                    last_digest
+                }
+                Err(UploadHeatmapError::Cancelled) => {
+                    tracing::info!("Cancelled heatmap upload, shutting down");
+                    last_digest
+                }
+            };
+
+            let now = Instant::now();
+            let next_upload = tenant
+                .get_heatmap_period()
+                .and_then(|period| now.checked_add(period));
+
+            result_tx
+                .send(WriteComplete {
+                    tenant_shard_id: *tenant.get_tenant_shard_id(),
+                    completed_at: now,
+                    digest,
+                    next_upload,
+                })
+                .ok();
+        });
+
+        self.tenants_uploading
+            .insert(tenant_shard_id, WriteInProgress { barrier });
+    }
+
+    #[instrument(skip_all, fields(tenant_id=%completion.tenant_shard_id.tenant_id, shard_id=%completion.tenant_shard_id.shard_slug()))]
+    fn on_completion(&mut self, completion: WriteComplete) {
+        tracing::debug!("Heatmap upload completed");
+        let WriteComplete {
+            tenant_shard_id,
+            completed_at,
+            digest,
+            next_upload,
+        } = completion;
+        self.tenants_uploading.remove(&tenant_shard_id);
+        use std::collections::hash_map::Entry;
+        match self.tenants.entry(tenant_shard_id) {
+            Entry::Vacant(_) => {
+                // Tenant state was dropped, nothing to update.
+            }
+            Entry::Occupied(mut entry) => {
+                entry.get_mut().last_upload = Some(completed_at);
+                entry.get_mut().last_digest = digest;
+                entry.get_mut().next_upload = next_upload
+            }
+        }
+    }
+
+    fn handle_command(
+        &mut self,
+        command: UploadCommand,
+        response_tx: tokio::sync::oneshot::Sender<CommandResponse>,
+    ) {
+        match command {
+            UploadCommand::Upload(tenant_shard_id) => {
+                // If an upload was ongoing for this tenant, let it finish first.
+                let barrier = if let Some(writing_state) =
+                    self.tenants_uploading.get(&tenant_shard_id)
+                {
+                    tracing::info!(
+                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+                        "Waiting for heatmap write to complete");
+                    writing_state.barrier.clone()
+                } else {
+                    // Spawn the upload then immediately wait for it.  This will block processing of other commands and
+                    // starting of other background work.
+                    tracing::info!(
+                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+                        "Starting heatmap write on command");
+                    let tenant = match self
+                        .tenant_manager
+                        .get_attached_tenant_shard(tenant_shard_id, true)
+                    {
+                        Ok(t) => t,
+                        Err(e) => {
+                            // Drop result of send: we don't care if caller dropped their receiver
+                            drop(response_tx.send(CommandResponse {
+                                result: Err(e.into()),
+                            }));
+                            return;
+                        }
+                    };
+                    self.spawn_upload(tenant, None);
+                    let writing_state = self
+                        .tenants_uploading
+                        .get(&tenant_shard_id)
+                        .expect("We just inserted this");
+                    tracing::info!(
+                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+                        "Waiting for heatmap upload to complete");
+
+                    writing_state.barrier.clone()
+                };
+
+                // This task does no I/O: it only listens for a barrier's completion and then
+                // sends to the command response channel.  It is therefore safe to spawn this without
+                // any gates/task_mgr hooks.
+                tokio::task::spawn(async move {
+                    barrier.wait().await;
+
+                    tracing::info!(
+                        tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+                        "Heatmap upload complete");
+
+                    // Drop result of send: we don't care if caller dropped their receiver
+                    drop(response_tx.send(CommandResponse { result: Ok(()) }))
+                });
+            }
+        }
+    }
+}
+
+enum UploadHeatmapOutcome {
+    /// We successfully wrote to remote storage, with this digest.
+    Uploaded(md5::Digest),
+    /// We did not upload because the heatmap digest was unchanged since the last upload
+    NoChange,
+    /// We skipped the upload for some reason, such as tenant/timeline not ready
+    Skipped,
+}
+
+#[derive(thiserror::Error, Debug)]
+enum UploadHeatmapError {
+    #[error("Cancelled")]
+    Cancelled,
+
+    #[error(transparent)]
+    Upload(#[from] anyhow::Error),
+}
+
+/// The inner upload operation.  This will skip if `last_digest` is Some and matches the digest
+/// of the object we would have uploaded.
+#[instrument(skip_all, fields(tenant_id = %tenant.get_tenant_shard_id().tenant_id, shard_id = %tenant.get_tenant_shard_id().shard_slug()))]
+async fn upload_tenant_heatmap(
+    remote_storage: GenericRemoteStorage,
+    tenant: &Arc<Tenant>,
+    last_digest: Option<md5::Digest>,
+) -> Result<UploadHeatmapOutcome, UploadHeatmapError> {
+    debug_assert_current_span_has_tenant_id();
+
+    let generation = tenant.get_generation();
+    if generation.is_none() {
+        // We do not expect this: generations were implemented before heatmap uploads.  However,
+        // handle it so that we don't have to make the generation in the heatmap an Option<>
+        // (Generation::none is not serializable)
+        tracing::warn!("Skipping heatmap upload for tenant with generation==None");
+        return Ok(UploadHeatmapOutcome::Skipped);
+    }
+
+    let mut heatmap = HeatMapTenant {
+        timelines: Vec::new(),
+        generation,
+    };
+    let timelines = tenant.timelines.lock().unwrap().clone();
+
+    let tenant_cancel = tenant.cancel.clone();
+
+    // Ensure that Tenant::shutdown waits for any upload in flight: this is needed because otherwise
+    // when we delete a tenant, we might race with an upload in flight and end up leaving a heatmap behind
+    // in remote storage.
+    let _guard = match tenant.gate.enter() {
+        Ok(g) => g,
+        Err(_) => {
+            tracing::info!("Skipping heatmap upload for tenant which is shutting down");
+            return Err(UploadHeatmapError::Cancelled);
+        }
+    };
+
+    for (timeline_id, timeline) in timelines {
+        let heatmap_timeline = timeline.generate_heatmap().await;
+        match heatmap_timeline {
+            None => {
+                tracing::debug!(
+                    "Skipping heatmap upload because timeline {timeline_id} is not ready"
+                );
+                return Ok(UploadHeatmapOutcome::Skipped);
+            }
+            Some(heatmap_timeline) => {
+                heatmap.timelines.push(heatmap_timeline);
+            }
+        }
+    }
+
+    // Serialize the heatmap
+    let bytes = serde_json::to_vec(&heatmap).map_err(|e| anyhow::anyhow!(e))?;
+    let size = bytes.len();
+
+    // Drop out early if nothing changed since our last upload
+    let digest = md5::compute(&bytes);
+    if Some(digest) == last_digest {
+        return Ok(UploadHeatmapOutcome::NoChange);
+    }
+
+    let path = remote_heatmap_path(tenant.get_tenant_shard_id());
+
+    // Write the heatmap.
+    tracing::debug!("Uploading {size} byte heatmap to {path}");
+    if let Err(e) = backoff::retry(
+        || async {
+            let bytes = futures::stream::once(futures::future::ready(Ok(bytes::Bytes::from(
+                bytes.clone(),
+            ))));
+            remote_storage
+                .upload_storage_object(bytes, size, &path)
+                .await
+        },
+        |_| false,
+        3,
+        u32::MAX,
+        "Uploading heatmap",
+        backoff::Cancel::new(tenant_cancel.clone(), || anyhow::anyhow!("Shutting down")),
+    )
+    .await
+    {
+        if tenant_cancel.is_cancelled() {
+            return Err(UploadHeatmapError::Cancelled);
+        } else {
+            return Err(e.into());
+        }
+    }
+
+    tracing::info!("Successfully uploaded {size} byte heatmap to {path}");
+
+    Ok(UploadHeatmapOutcome::Uploaded(digest))
+}
diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs
index 126d4d5563..8ae911b31e 100644
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -259,8 +259,9 @@ impl Layer {
 
         layer
             .get_value_reconstruct_data(key, lsn_range, reconstruct_data, &self.0, ctx)
-            .instrument(tracing::info_span!("get_value_reconstruct_data", layer=%self))
+            .instrument(tracing::debug_span!("get_value_reconstruct_data", layer=%self))
             .await
+            .with_context(|| format!("get_value_reconstruct_data for layer {self}"))
     }
 
     /// Download the layer if evicted.
@@ -457,6 +458,8 @@ struct LayerInner {
     /// For loaded layers, this may be some other value if the tenant has undergone
     /// a shard split since the layer was originally written.
     shard: ShardIndex,
+
+    last_evicted_at: std::sync::Mutex<Option<std::time::Instant>>,
 }
 
 impl std::fmt::Display for LayerInner {
@@ -587,6 +590,7 @@ impl LayerInner {
             consecutive_failures: AtomicUsize::new(0),
             generation,
             shard,
+            last_evicted_at: std::sync::Mutex::default(),
         }
     }
 
@@ -651,7 +655,6 @@ impl LayerInner {
     }
 
     /// Cancellation safe.
-    #[tracing::instrument(skip_all, fields(layer=%self))]
     async fn get_or_maybe_download(
         self: &Arc<Self>,
         allow_download: bool,
@@ -660,87 +663,101 @@ impl LayerInner {
         let mut init_permit = None;
 
         loop {
-            let download = move |permit| async move {
-                // disable any scheduled but not yet running eviction deletions for this
-                let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);
+            let download = move |permit| {
+                async move {
+                    // disable any scheduled but not yet running eviction deletions for this
+                    let next_version = 1 + self.version.fetch_add(1, Ordering::Relaxed);
 
-                // count cancellations, which currently remain largely unexpected
-                let init_cancelled =
-                    scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
+                    // count cancellations, which currently remain largely unexpected
+                    let init_cancelled =
+                        scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
 
-                // no need to make the evict_and_wait wait for the actual download to complete
-                drop(self.status.send(Status::Downloaded));
+                    // no need to make the evict_and_wait wait for the actual download to complete
+                    drop(self.status.send(Status::Downloaded));
 
-                let timeline = self
-                    .timeline
-                    .upgrade()
-                    .ok_or_else(|| DownloadError::TimelineShutdown)?;
+                    let timeline = self
+                        .timeline
+                        .upgrade()
+                        .ok_or_else(|| DownloadError::TimelineShutdown)?;
 
-                // FIXME: grab a gate
+                    // FIXME: grab a gate
 
-                let can_ever_evict = timeline.remote_client.as_ref().is_some();
+                    let can_ever_evict = timeline.remote_client.as_ref().is_some();
 
-                // check if we really need to be downloaded; could have been already downloaded by a
-                // cancelled previous attempt.
-                let needs_download = self
-                    .needs_download()
-                    .await
-                    .map_err(DownloadError::PreStatFailed)?;
+                    // check if we really need to be downloaded; could have been already downloaded by a
+                    // cancelled previous attempt.
+                    let needs_download = self
+                        .needs_download()
+                        .await
+                        .map_err(DownloadError::PreStatFailed)?;
 
-                let permit = if let Some(reason) = needs_download {
-                    if let NeedsDownload::NotFile(ft) = reason {
-                        return Err(DownloadError::NotFile(ft));
+                    let permit = if let Some(reason) = needs_download {
+                        if let NeedsDownload::NotFile(ft) = reason {
+                            return Err(DownloadError::NotFile(ft));
+                        }
+
+                        // only reset this after we've decided we really need to download. otherwise it'd
+                        // be impossible to mark cancelled downloads for eviction, like one could imagine
+                        // we would like to do for prefetching which was not needed.
+                        self.wanted_evicted.store(false, Ordering::Release);
+
+                        if !can_ever_evict {
+                            return Err(DownloadError::NoRemoteStorage);
+                        }
+
+                        if let Some(ctx) = ctx {
+                            self.check_expected_download(ctx)?;
+                        }
+
+                        if !allow_download {
+                            // this does look weird, but for LayerInner the "downloading" means also changing
+                            // internal once related state ...
+                            return Err(DownloadError::DownloadRequired);
+                        }
+
+                        tracing::info!(%reason, "downloading on-demand");
+
+                        self.spawn_download_and_wait(timeline, permit).await?
+                    } else {
+                        // the file is present locally, probably by a previous but cancelled call to
+                        // get_or_maybe_download. alternatively we might be running without remote storage.
+                        LAYER_IMPL_METRICS.inc_init_needed_no_download();
+
+                        permit
+                    };
+
+                    let since_last_eviction =
+                        self.last_evicted_at.lock().unwrap().map(|ts| ts.elapsed());
+                    if let Some(since_last_eviction) = since_last_eviction {
+                        // FIXME: this will not always be recorded correctly until #6028 (the no
+                        // download needed branch above)
+                        LAYER_IMPL_METRICS.record_redownloaded_after(since_last_eviction);
                     }
 
-                    // only reset this after we've decided we really need to download. otherwise it'd
-                    // be impossible to mark cancelled downloads for eviction, like one could imagine
-                    // we would like to do for prefetching which was not needed.
-                    self.wanted_evicted.store(false, Ordering::Release);
+                    let res = Arc::new(DownloadedLayer {
+                        owner: Arc::downgrade(self),
+                        kind: tokio::sync::OnceCell::default(),
+                        version: next_version,
+                    });
 
-                    if !can_ever_evict {
-                        return Err(DownloadError::NoRemoteStorage);
+                    self.access_stats.record_residence_event(
+                        LayerResidenceStatus::Resident,
+                        LayerResidenceEventReason::ResidenceChange,
+                    );
+
+                    let waiters = self.inner.initializer_count();
+                    if waiters > 0 {
+                        tracing::info!(
+                            waiters,
+                            "completing the on-demand download for other tasks"
+                        );
                     }
 
-                    if let Some(ctx) = ctx {
-                        self.check_expected_download(ctx)?;
-                    }
+                    scopeguard::ScopeGuard::into_inner(init_cancelled);
 
-                    if !allow_download {
-                        // this does look weird, but for LayerInner the "downloading" means also changing
-                        // internal once related state ...
-                        return Err(DownloadError::DownloadRequired);
-                    }
-
-                    tracing::info!(%reason, "downloading on-demand");
-
-                    self.spawn_download_and_wait(timeline, permit).await?
-                } else {
-                    // the file is present locally, probably by a previous but cancelled call to
-                    // get_or_maybe_download. alternatively we might be running without remote storage.
-                    LAYER_IMPL_METRICS.inc_init_needed_no_download();
-
-                    permit
-                };
-
-                let res = Arc::new(DownloadedLayer {
-                    owner: Arc::downgrade(self),
-                    kind: tokio::sync::OnceCell::default(),
-                    version: next_version,
-                });
-
-                self.access_stats.record_residence_event(
-                    LayerResidenceStatus::Resident,
-                    LayerResidenceEventReason::ResidenceChange,
-                );
-
-                let waiters = self.inner.initializer_count();
-                if waiters > 0 {
-                    tracing::info!(waiters, "completing the on-demand download for other tasks");
+                    Ok((ResidentOrWantedEvicted::Resident(res), permit))
                 }
-
-                scopeguard::ScopeGuard::into_inner(init_cancelled);
-
-                Ok((ResidentOrWantedEvicted::Resident(res), permit))
+                .instrument(tracing::info_span!("get_or_maybe_download", layer=%self))
             };
 
             if let Some(init_permit) = init_permit.take() {
@@ -837,7 +854,7 @@ impl LayerInner {
         crate::task_mgr::spawn(
             &tokio::runtime::Handle::current(),
             crate::task_mgr::TaskKind::RemoteDownloadTask,
-            Some(self.desc.tenant_shard_id.tenant_id),
+            Some(self.desc.tenant_shard_id),
             Some(self.desc.timeline_id),
             &task_name,
             false,
@@ -851,6 +868,7 @@ impl LayerInner {
                 let result = client.download_layer_file(
                     &this.desc.filename(),
                     &this.metadata(),
+                    &crate::task_mgr::shutdown_token()
                 )
                 .await;
 
@@ -860,6 +878,23 @@ impl LayerInner {
                         Ok(())
                     }
                     Err(e) => {
+                        let consecutive_failures =
+                            this.consecutive_failures.fetch_add(1, Ordering::Relaxed);
+
+                        let backoff = utils::backoff::exponential_backoff_duration_seconds(
+                            consecutive_failures.min(u32::MAX as usize) as u32,
+                            1.5,
+                            60.0,
+                        );
+
+                        let backoff = std::time::Duration::from_secs_f64(backoff);
+
+                        tokio::select! {
+                            _ = tokio::time::sleep(backoff) => {},
+                            _ = crate::task_mgr::shutdown_token().cancelled_owned() => {},
+                            _ = timeline.cancel.cancelled() => {},
+                        };
+
                         Err(e)
                     }
                 };
@@ -908,21 +943,9 @@ impl LayerInner {
                 Ok(permit)
             }
             Ok((Err(e), _permit)) => {
-                // FIXME: this should be with the spawned task and be cancellation sensitive
-                //
-                // while we should not need this, this backoff has turned out to be useful with
-                // a bug of unexpectedly deleted remote layer file (#5787).
-                let consecutive_failures =
-                    self.consecutive_failures.fetch_add(1, Ordering::Relaxed);
+                // sleep already happened in the spawned task, if it was not cancelled
+                let consecutive_failures = self.consecutive_failures.load(Ordering::Relaxed);
                 tracing::error!(consecutive_failures, "layer file download failed: {e:#}");
-                let backoff = utils::backoff::exponential_backoff_duration_seconds(
-                    consecutive_failures.min(u32::MAX as usize) as u32,
-                    1.5,
-                    60.0,
-                );
-                let backoff = std::time::Duration::from_secs_f64(backoff);
-
-                tokio::time::sleep(backoff).await;
                 Err(DownloadError::DownloadFailed)
             }
             Err(_gone) => Err(DownloadError::DownloadCancelled),
@@ -1117,6 +1140,8 @@ impl LayerInner {
         // we are still holding the permit, so no new spawn_download_and_wait can happen
         drop(self.status.send(Status::Evicted));
 
+        *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now());
+
         res
     }
 
@@ -1421,6 +1446,7 @@ pub(crate) struct LayerImplMetrics {
 
     rare_counters: enum_map::EnumMap<RareEvent, IntCounter>,
     inits_cancelled: metrics::core::GenericCounter<metrics::core::AtomicU64>,
+    redownload_after: metrics::Histogram,
 }
 
 impl Default for LayerImplMetrics {
@@ -1496,6 +1522,26 @@ impl Default for LayerImplMetrics {
         )
         .unwrap();
 
+        let redownload_after = {
+            let minute = 60.0;
+            let hour = 60.0 * minute;
+            metrics::register_histogram!(
+                "pageserver_layer_redownloaded_after",
+                "Time between evicting and re-downloading.",
+                vec![
+                    10.0,
+                    30.0,
+                    minute,
+                    5.0 * minute,
+                    15.0 * minute,
+                    30.0 * minute,
+                    hour,
+                    12.0 * hour,
+                ]
+            )
+            .unwrap()
+        };
+
         Self {
             started_evictions,
             completed_evictions,
@@ -1507,6 +1553,7 @@ impl Default for LayerImplMetrics {
 
             rare_counters,
             inits_cancelled,
+            redownload_after,
         }
     }
 }
@@ -1574,6 +1621,10 @@ impl LayerImplMetrics {
     fn inc_init_cancelled(&self) {
         self.inits_cancelled.inc()
     }
+
+    fn record_redownloaded_after(&self, duration: std::time::Duration) {
+        self.redownload_after.observe(duration.as_secs_f64())
+    }
 }
 
 #[derive(enum_map::Enum)]
diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs
index bc404c41a0..7ff1873eda 100644
--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -54,31 +54,18 @@ impl BackgroundLoopKind {
     }
 }
 
-pub(crate) enum RateLimitError {
-    Cancelled,
-}
-
-pub(crate) async fn concurrent_background_tasks_rate_limit(
+/// Cancellation safe.
+pub(crate) async fn concurrent_background_tasks_rate_limit_permit(
     loop_kind: BackgroundLoopKind,
     _ctx: &RequestContext,
-    cancel: &CancellationToken,
-) -> Result<impl Drop, RateLimitError> {
-    crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_START_COUNT
+) -> impl Drop {
+    let _guard = crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_GAUGE
         .with_label_values(&[loop_kind.as_static_str()])
-        .inc();
-    scopeguard::defer!(
-        crate::metrics::BACKGROUND_LOOP_SEMAPHORE_WAIT_FINISH_COUNT.with_label_values(&[loop_kind.as_static_str()]).inc();
-    );
-    tokio::select! {
-        permit = CONCURRENT_BACKGROUND_TASKS.acquire() => {
-            match permit {
-                Ok(permit) => Ok(permit),
-                Err(_closed) => unreachable!("we never close the semaphore"),
-            }
-        },
-        _ = cancel.cancelled() => {
-            Err(RateLimitError::Cancelled)
-        }
+        .guard();
+
+    match CONCURRENT_BACKGROUND_TASKS.acquire().await {
+        Ok(permit) => permit,
+        Err(_closed) => unreachable!("we never close the semaphore"),
     }
 }
 
@@ -87,13 +74,13 @@ pub fn start_background_loops(
     tenant: &Arc<Tenant>,
     background_jobs_can_start: Option<&completion::Barrier>,
 ) {
-    let tenant_id = tenant.tenant_shard_id.tenant_id;
+    let tenant_shard_id = tenant.tenant_shard_id;
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::Compaction,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         None,
-        &format!("compactor for tenant {tenant_id}"),
+        &format!("compactor for tenant {tenant_shard_id}"),
         false,
         {
             let tenant = Arc::clone(tenant);
@@ -105,7 +92,7 @@ pub fn start_background_loops(
                     _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                 };
                 compaction_loop(tenant, cancel)
-                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_id))
+                    .instrument(info_span!("compaction_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
                 Ok(())
             }
@@ -114,9 +101,9 @@ pub fn start_background_loops(
     task_mgr::spawn(
         BACKGROUND_RUNTIME.handle(),
         TaskKind::GarbageCollector,
-        Some(tenant_id),
+        Some(tenant_shard_id),
         None,
-        &format!("garbage collector for tenant {tenant_id}"),
+        &format!("garbage collector for tenant {tenant_shard_id}"),
         false,
         {
             let tenant = Arc::clone(tenant);
@@ -128,7 +115,7 @@ pub fn start_background_loops(
                     _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
                 };
                 gc_loop(tenant, cancel)
-                    .instrument(info_span!("gc_loop", tenant_id = %tenant_id))
+                    .instrument(info_span!("gc_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
                     .await;
                 Ok(())
             }
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index 551b66b77d..1e84fa1848 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -29,7 +29,7 @@ use tokio::{
 };
 use tokio_util::sync::CancellationToken;
 use tracing::*;
-use utils::{id::TenantTimelineId, sync::gate::Gate};
+use utils::sync::gate::Gate;
 
 use std::collections::{BinaryHeap, HashMap, HashSet};
 use std::ops::{Deref, Range};
@@ -51,7 +51,7 @@ use crate::tenant::storage_layer::{
     LayerAccessStatsReset, LayerFileName, ResidentLayer, ValueReconstructResult,
     ValueReconstructState,
 };
-use crate::tenant::tasks::{BackgroundLoopKind, RateLimitError};
+use crate::tenant::tasks::BackgroundLoopKind;
 use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
     layer_map::{LayerMap, SearchResult},
@@ -66,7 +66,7 @@ use crate::metrics::{
     TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::LsnForTimestamp;
-use crate::pgdatadir_mapping::{is_rel_fsm_block_key, is_rel_vm_block_key};
+use crate::pgdatadir_mapping::{is_inherited_key, is_rel_fsm_block_key, is_rel_vm_block_key};
 use crate::pgdatadir_mapping::{BlockNumber, CalculateLogicalSizeError};
 use crate::tenant::config::{EvictionPolicy, TenantConfOpt};
 use pageserver_api::reltag::RelTag;
@@ -77,7 +77,7 @@ use postgres_ffi::to_pg_timestamp;
 use utils::{
     completion,
     generation::Generation,
-    id::{TenantId, TimelineId},
+    id::TimelineId,
     lsn::{AtomicLsn, Lsn, RecordLsn},
     seqwait::SeqWait,
     simple_rcu::{Rcu, RcuReadGuard},
@@ -98,8 +98,9 @@ use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};
 
 use super::config::TenantConf;
-use super::remote_timeline_client::index::IndexPart;
+use super::remote_timeline_client::index::{IndexLayerMetadata, IndexPart};
 use super::remote_timeline_client::RemoteTimelineClient;
+use super::secondary::heatmap::{HeatMapLayer, HeatMapTimeline};
 use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf};
 
 #[derive(Debug, PartialEq, Eq, Clone, Copy)]
@@ -377,9 +378,6 @@ pub enum PageReconstructError {
     #[error(transparent)]
     Other(#[from] anyhow::Error),
 
-    /// The operation would require downloading a layer that is missing locally.
-    NeedsDownload(TenantTimelineId, LayerFileName),
-
     /// The operation was cancelled
     Cancelled,
 
@@ -408,14 +406,6 @@ impl std::fmt::Debug for PageReconstructError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
         match self {
             Self::Other(err) => err.fmt(f),
-            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
-                write!(
-                    f,
-                    "layer {}/{} needs download",
-                    tenant_timeline_id,
-                    layer_file_name.file_name()
-                )
-            }
             Self::Cancelled => write!(f, "cancelled"),
             Self::AncestorStopping(timeline_id) => {
                 write!(f, "ancestor timeline {timeline_id} is being stopped")
@@ -429,14 +419,6 @@ impl std::fmt::Display for PageReconstructError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
         match self {
             Self::Other(err) => err.fmt(f),
-            Self::NeedsDownload(tenant_timeline_id, layer_file_name) => {
-                write!(
-                    f,
-                    "layer {}/{} needs download",
-                    tenant_timeline_id,
-                    layer_file_name.file_name()
-                )
-            }
             Self::Cancelled => write!(f, "cancelled"),
             Self::AncestorStopping(timeline_id) => {
                 write!(f, "ancestor timeline {timeline_id} is being stopped")
@@ -464,6 +446,12 @@ pub(crate) enum CompactFlags {
     ForceRepartition,
 }
 
+impl std::fmt::Debug for Timeline {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        write!(f, "Timeline<{}>", self.timeline_id)
+    }
+}
+
 /// Public interface functions
 impl Timeline {
     /// Get the LSN where this branch was created
@@ -727,19 +715,27 @@ impl Timeline {
         flags: EnumSet<CompactFlags>,
         ctx: &RequestContext,
     ) -> Result<(), CompactionError> {
-        let _g = self.compaction_lock.lock().await;
+        // most likely the cancellation token is from background task, but in tests it could be the
+        // request task as well.
+
+        let prepare = async move {
+            let guard = self.compaction_lock.lock().await;
+
+            let permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
+                BackgroundLoopKind::Compaction,
+                ctx,
+            )
+            .await;
+
+            (guard, permit)
+        };
 
         // this wait probably never needs any "long time spent" logging, because we already nag if
         // compaction task goes over it's period (20s) which is quite often in production.
-        let _permit = match super::tasks::concurrent_background_tasks_rate_limit(
-            BackgroundLoopKind::Compaction,
-            ctx,
-            cancel,
-        )
-        .await
-        {
-            Ok(permit) => permit,
-            Err(RateLimitError::Cancelled) => return Ok(()),
+        let (_guard, _permit) = tokio::select! {
+            tuple = prepare => { tuple },
+            _ = self.cancel.cancelled() => return Ok(()),
+            _ = cancel.cancelled() => return Ok(()),
         };
 
         let last_record_lsn = self.get_last_record_lsn();
@@ -926,7 +922,7 @@ impl Timeline {
         tracing::debug!("Waiting for WalReceiverManager...");
         task_mgr::shutdown_tasks(
             Some(TaskKind::WalReceiverManager),
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
         )
         .await;
@@ -977,7 +973,7 @@ impl Timeline {
         // Shut down the layer flush task before the remote client, as one depends on the other
         task_mgr::shutdown_tasks(
             Some(TaskKind::LayerFlushTask),
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
         )
         .await;
@@ -995,12 +991,7 @@ impl Timeline {
 
         tracing::debug!("Waiting for tasks...");
 
-        task_mgr::shutdown_tasks(
-            None,
-            Some(self.tenant_shard_id.tenant_id),
-            Some(self.timeline_id),
-        )
-        .await;
+        task_mgr::shutdown_tasks(None, Some(self.tenant_shard_id), Some(self.timeline_id)).await;
 
         // Finally wait until any gate-holders are complete
         self.gate.close().await;
@@ -1123,8 +1114,9 @@ impl Timeline {
         Ok(Some(true))
     }
 
-    /// Like [`evict_layer_batch`](Self::evict_layer_batch), but for just one layer.
-    /// Additional case `Ok(None)` covers the case where the layer could not be found by its `layer_file_name`.
+    /// Evict just one layer.
+    ///
+    /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.
     pub async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
         let _gate = self
             .gate
@@ -1135,109 +1127,17 @@ impl Timeline {
             return Ok(None);
         };
 
-        let Some(local_layer) = local_layer.keep_resident().await? else {
-            return Ok(Some(false));
-        };
-
-        let local_layer: Layer = local_layer.into();
-
-        let remote_client = self
+        let rtc = self
             .remote_client
             .as_ref()
             .ok_or_else(|| anyhow::anyhow!("remote storage not configured; cannot evict"))?;
 
-        let results = self
-            .evict_layer_batch(remote_client, &[local_layer])
-            .await?;
-        assert_eq!(results.len(), 1);
-        let result: Option<Result<(), EvictionError>> = results.into_iter().next().unwrap();
-        match result {
-            None => anyhow::bail!("task_mgr shutdown requested"),
-            Some(Ok(())) => Ok(Some(true)),
-            Some(Err(e)) => Err(anyhow::Error::new(e)),
+        match local_layer.evict_and_wait(rtc).await {
+            Ok(()) => Ok(Some(true)),
+            Err(EvictionError::NotFound) => Ok(Some(false)),
+            Err(EvictionError::Downloaded) => Ok(Some(false)),
         }
     }
-
-    /// Evict a batch of layers.
-    pub(crate) async fn evict_layers(
-        &self,
-        layers_to_evict: &[Layer],
-    ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
-        let _gate = self
-            .gate
-            .enter()
-            .map_err(|_| anyhow::anyhow!("Shutting down"))?;
-
-        let remote_client = self
-            .remote_client
-            .as_ref()
-            .context("timeline must have RemoteTimelineClient")?;
-
-        self.evict_layer_batch(remote_client, layers_to_evict).await
-    }
-
-    /// Evict multiple layers at once, continuing through errors.
-    ///
-    /// The `remote_client` should be this timeline's `self.remote_client`.
-    /// We make the caller provide it so that they are responsible for handling the case
-    /// where someone wants to evict the layer but no remote storage is configured.
-    ///
-    /// Returns either `Err()` or `Ok(results)` where `results.len() == layers_to_evict.len()`.
-    /// If `Err()` is returned, no eviction was attempted.
-    /// Each position of `Ok(results)` corresponds to the layer in `layers_to_evict`.
-    /// Meaning of each `result[i]`:
-    /// - `Some(Err(...))` if layer replacement failed for some reason
-    ///    - replacement failed for an expectable reason (e.g., layer removed by GC before we grabbed all locks)
-    /// - `Some(Ok(()))` if everything went well.
-    /// - `None` if no eviction attempt was made for the layer because `cancel.is_cancelled() == true`.
-    async fn evict_layer_batch(
-        &self,
-        remote_client: &Arc<RemoteTimelineClient>,
-        layers_to_evict: &[Layer],
-    ) -> anyhow::Result<Vec<Option<Result<(), EvictionError>>>> {
-        {
-            // to avoid racing with detach and delete_timeline
-            let state = self.current_state();
-            anyhow::ensure!(
-                state == TimelineState::Active,
-                "timeline is not active but {state:?}"
-            );
-        }
-
-        let mut results = Vec::with_capacity(layers_to_evict.len());
-        for _ in 0..layers_to_evict.len() {
-            results.push(None);
-        }
-
-        let mut js = tokio::task::JoinSet::new();
-
-        for (i, l) in layers_to_evict.iter().enumerate() {
-            js.spawn({
-                let l = l.to_owned();
-                let remote_client = remote_client.clone();
-                async move { (i, l.evict_and_wait(&remote_client).await) }
-            });
-        }
-
-        let join = async {
-            while let Some(next) = js.join_next().await {
-                match next {
-                    Ok((i, res)) => results[i] = Some(res),
-                    Err(je) if je.is_cancelled() => unreachable!("not used"),
-                    Err(je) if je.is_panic() => { /* already logged */ }
-                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
-                }
-            }
-        };
-
-        tokio::select! {
-            _ = self.cancel.cancelled() => {},
-            _ = join => {}
-        }
-
-        assert_eq!(results.len(), layers_to_evict.len());
-        Ok(results)
-    }
 }
 
 /// Number of times we will compute partition within a checkpoint distance.
@@ -1314,16 +1214,20 @@ impl Timeline {
                 &self.conf.default_tenant_conf,
             );
 
-            // TODO(sharding): make evictions state shard aware
-            // (https://github.com/neondatabase/neon/issues/5953)
             let tenant_id_str = self.tenant_shard_id.tenant_id.to_string();
+            let shard_id_str = format!("{}", self.tenant_shard_id.shard_slug());
 
             let timeline_id_str = self.timeline_id.to_string();
             self.metrics
                 .evictions_with_low_residence_duration
                 .write()
                 .unwrap()
-                .change_threshold(&tenant_id_str, &timeline_id_str, new_threshold);
+                .change_threshold(
+                    &tenant_id_str,
+                    &shard_id_str,
+                    &timeline_id_str,
+                    new_threshold,
+                );
         }
     }
 
@@ -1395,7 +1299,7 @@ impl Timeline {
                 ancestor_lsn: metadata.ancestor_lsn(),
 
                 metrics: TimelineMetrics::new(
-                    &tenant_shard_id.tenant_id,
+                    &tenant_shard_id,
                     &timeline_id,
                     crate::metrics::EvictionsWithLowResidenceDurationBuilder::new(
                         "mtime",
@@ -1496,7 +1400,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::LayerFlushTask,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "layer flush task",
             false,
@@ -1830,6 +1734,7 @@ impl Timeline {
                 self.current_logical_size.current_size().accuracy(),
                 logical_size::Accuracy::Exact,
             );
+            self.current_logical_size.initialized.add_permits(1);
             return;
         };
 
@@ -1847,7 +1752,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::InitialLogicalSizeCalculation,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "initial size calculation",
             false,
@@ -1875,6 +1780,11 @@ impl Timeline {
         cancel: CancellationToken,
         background_ctx: RequestContext,
     ) {
+        scopeguard::defer! {
+            // Irrespective of the outcome of this operation, we should unblock anyone waiting for it.
+            self.current_logical_size.initialized.add_permits(1);
+        }
+
         enum BackgroundCalculationError {
             Cancelled,
             Other(anyhow::Error),
@@ -1886,22 +1796,22 @@ impl Timeline {
             let skip_concurrency_limiter = &skip_concurrency_limiter;
             async move {
                 let cancel = task_mgr::shutdown_token();
-                let wait_for_permit = super::tasks::concurrent_background_tasks_rate_limit(
+                let wait_for_permit = super::tasks::concurrent_background_tasks_rate_limit_permit(
                     BackgroundLoopKind::InitialLogicalSizeCalculation,
                     background_ctx,
-                    &cancel,
                 );
 
                 use crate::metrics::initial_logical_size::StartCircumstances;
                 let (_maybe_permit, circumstances) = tokio::select! {
-                    res = wait_for_permit => {
-                        match res {
-                            Ok(permit) => (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit),
-                            Err(RateLimitError::Cancelled) => {
-                                return Err(BackgroundCalculationError::Cancelled);
-                            }
-                        }
+                    permit = wait_for_permit => {
+                        (Some(permit), StartCircumstances::AfterBackgroundTasksRateLimit)
                     }
+                    _ = self_ref.cancel.cancelled() => {
+                        return Err(BackgroundCalculationError::Cancelled);
+                    }
+                    _ = cancel.cancelled() => {
+                        return Err(BackgroundCalculationError::Cancelled);
+                    },
                     () = skip_concurrency_limiter.cancelled() => {
                         // Some action that is part of a end user interaction requested logical size
                         // => break out of the rate limit
@@ -2020,7 +1930,7 @@ impl Timeline {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::OndemandLogicalSizeCalculation,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "ondemand logical size calculation",
             false,
@@ -2166,6 +2076,55 @@ impl Timeline {
 
         None
     }
+
+    /// The timeline heatmap is a hint to secondary locations from the primary location,
+    /// indicating which layers are currently on-disk on the primary.
+    ///
+    /// None is returned if the Timeline is in a state where uploading a heatmap
+    /// doesn't make sense, such as shutting down or initializing.  The caller
+    /// should treat this as a cue to simply skip doing any heatmap uploading
+    /// for this timeline.
+    pub(crate) async fn generate_heatmap(&self) -> Option<HeatMapTimeline> {
+        let eviction_info = self.get_local_layers_for_disk_usage_eviction().await;
+
+        let remote_client = match &self.remote_client {
+            Some(c) => c,
+            None => return None,
+        };
+
+        let layer_file_names = eviction_info
+            .resident_layers
+            .iter()
+            .map(|l| l.layer.layer_desc().filename())
+            .collect::<Vec<_>>();
+
+        let decorated = match remote_client.get_layers_metadata(layer_file_names) {
+            Ok(d) => d,
+            Err(_) => {
+                // Getting metadata only fails on Timeline in bad state.
+                return None;
+            }
+        };
+
+        let heatmap_layers = std::iter::zip(
+            eviction_info.resident_layers.into_iter(),
+            decorated.into_iter(),
+        )
+        .filter_map(|(layer, remote_info)| {
+            remote_info.map(|remote_info| {
+                HeatMapLayer::new(
+                    layer.layer.layer_desc().filename(),
+                    IndexLayerMetadata::from(remote_info),
+                    layer.last_activity_ts,
+                )
+            })
+        });
+
+        Some(HeatMapTimeline::new(
+            self.timeline_id,
+            heatmap_layers.collect(),
+        ))
+    }
 }
 
 type TraversalId = String;
@@ -2279,7 +2238,7 @@ impl Timeline {
             }
 
             // Recurse into ancestor if needed
-            if Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
+            if is_inherited_key(key) && Lsn(cont_lsn.0 - 1) <= timeline.ancestor_lsn {
                 trace!(
                     "going into ancestor {}, cont_lsn is {}",
                     timeline.ancestor_lsn,
@@ -2461,13 +2420,7 @@ impl Timeline {
         // FIXME: It's pointless to check the cache for things that are not 8kB pages.
         // We should look at the key to determine if it's a cacheable object
         let (lsn, read_guard) = cache
-            .lookup_materialized_page(
-                self.tenant_shard_id.tenant_id,
-                self.timeline_id,
-                key,
-                lsn,
-                ctx,
-            )
+            .lookup_materialized_page(self.tenant_shard_id, self.timeline_id, key, lsn, ctx)
             .await?;
         let img = Bytes::from(read_guard.to_vec());
         Some((lsn, img))
@@ -3157,6 +3110,32 @@ impl Timeline {
 
         Ok(image_layers)
     }
+
+    /// Wait until the background initial logical size calculation is complete, or
+    /// this Timeline is shut down.  Calling this function will cause the initial
+    /// logical size calculation to skip waiting for the background jobs barrier.
+    pub(crate) async fn await_initial_logical_size(self: Arc<Self>) {
+        if let Some(await_bg_cancel) = self
+            .current_logical_size
+            .cancel_wait_for_background_loop_concurrency_limit_semaphore
+            .get()
+        {
+            await_bg_cancel.cancel();
+        } else {
+            // We should not wait if we were not able to explicitly instruct
+            // the logical size cancellation to skip the concurrency limit semaphore.
+            // TODO: this is an unexpected case.  We should restructure so that it
+            // can't happen.
+            tracing::info!(
+                "await_initial_logical_size: can't get semaphore cancel token, skipping"
+            );
+        }
+
+        tokio::select!(
+            _ = self.current_logical_size.initialized.acquire() => {},
+            _ = self.cancel.cancelled() => {}
+        )
+    }
 }
 
 #[derive(Default)]
@@ -3209,7 +3188,7 @@ impl DurationRecorder {
 #[derive(Default)]
 struct CompactLevel0Phase1StatsBuilder {
     version: Option<u64>,
-    tenant_id: Option<TenantId>,
+    tenant_id: Option<TenantShardId>,
     timeline_id: Option<TimelineId>,
     read_lock_acquisition_micros: DurationRecorder,
     read_lock_held_spawn_blocking_startup_micros: DurationRecorder,
@@ -3226,7 +3205,7 @@ struct CompactLevel0Phase1StatsBuilder {
 #[derive(serde::Serialize)]
 struct CompactLevel0Phase1Stats {
     version: u64,
-    tenant_id: TenantId,
+    tenant_id: TenantShardId,
     timeline_id: TimelineId,
     read_lock_acquisition_micros: RecordedDuration,
     read_lock_held_spawn_blocking_startup_micros: RecordedDuration,
@@ -3745,7 +3724,7 @@ impl Timeline {
             let ctx = ctx.attached_child();
             let mut stats = CompactLevel0Phase1StatsBuilder {
                 version: Some(2),
-                tenant_id: Some(self.tenant_shard_id.tenant_id),
+                tenant_id: Some(self.tenant_shard_id),
                 timeline_id: Some(self.timeline_id),
                 ..Default::default()
             };
@@ -3913,7 +3892,14 @@ impl Timeline {
     /// within a layer file. We can only remove the whole file if it's fully
     /// obsolete.
     pub(super) async fn gc(&self) -> anyhow::Result<GcResult> {
-        let _g = self.gc_lock.lock().await;
+        // this is most likely the background tasks, but it might be the spawned task from
+        // immediate_gc
+        let cancel = crate::task_mgr::shutdown_token();
+        let _g = tokio::select! {
+            guard = self.gc_lock.lock() => guard,
+            _ = self.cancel.cancelled() => return Ok(GcResult::default()),
+            _ = cancel.cancelled() => return Ok(GcResult::default()),
+        };
         let timer = self.metrics.garbage_collect_histo.start_timer();
 
         fail_point!("before-timeline-gc");
@@ -4207,7 +4193,7 @@ impl Timeline {
                     let cache = page_cache::get();
                     if let Err(e) = cache
                         .memorize_materialized_page(
-                            self.tenant_shard_id.tenant_id,
+                            self.tenant_shard_id,
                             self.timeline_id,
                             key,
                             last_rec_lsn,
@@ -4251,7 +4237,7 @@ impl Timeline {
         let task_id = task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             task_mgr::TaskKind::DownloadAllRemoteLayers,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             "download all remote layers task",
             false,
@@ -4612,7 +4598,7 @@ mod tests {
             .await
             .unwrap();
 
-        let rc = timeline
+        let rtc = timeline
             .remote_client
             .clone()
             .expect("just configured this");
@@ -4625,16 +4611,12 @@ mod tests {
             .expect("should had been resident")
             .drop_eviction_guard();
 
-        let batch = [layer];
-
-        let first = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };
-        let second = async { timeline.evict_layer_batch(&rc, &batch).await.unwrap() };
+        let first = async { layer.evict_and_wait(&rtc).await };
+        let second = async { layer.evict_and_wait(&rtc).await };
 
         let (first, second) = tokio::join!(first, second);
 
-        let (first, second) = (only_one(first), only_one(second));
-
-        let res = batch[0].keep_resident().await;
+        let res = layer.keep_resident().await;
         assert!(matches!(res, Ok(None)), "{res:?}");
 
         match (first, second) {
@@ -4655,14 +4637,6 @@ mod tests {
         RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
     }
 
-    fn only_one<T>(mut input: Vec<Option<T>>) -> T {
-        assert_eq!(1, input.len());
-        input
-            .pop()
-            .expect("length just checked")
-            .expect("no cancellation")
-    }
-
     async fn find_some_layer(timeline: &Timeline) -> Layer {
         let layers = timeline.layers.read().await;
         let desc = layers
diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs
index 2a103a7ff4..be873181d9 100644
--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -43,7 +43,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     // Shut down the layer flush task before the remote client, as one depends on the other
     task_mgr::shutdown_tasks(
         Some(TaskKind::LayerFlushTask),
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
     )
     .await;
@@ -71,7 +71,7 @@ async fn stop_tasks(timeline: &Timeline) -> Result<(), DeleteTimelineError> {
     info!("waiting for timeline tasks to shutdown");
     task_mgr::shutdown_tasks(
         None,
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
     )
     .await;
@@ -528,7 +528,7 @@ impl DeleteTimelineFlow {
         task_mgr::spawn(
             task_mgr::BACKGROUND_RUNTIME.handle(),
             TaskKind::TimelineDeletionWorker,
-            Some(tenant_shard_id.tenant_id),
+            Some(tenant_shard_id),
             Some(timeline_id),
             "timeline_delete",
             false,
diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs
index 3fe4bc0f83..ea5f5f5fa7 100644
--- a/pageserver/src/tenant/timeline/eviction_task.rs
+++ b/pageserver/src/tenant/timeline/eviction_task.rs
@@ -30,7 +30,7 @@ use crate::{
     task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
     tenant::{
         config::{EvictionPolicy, EvictionPolicyLayerAccessThreshold},
-        tasks::{BackgroundLoopKind, RateLimitError},
+        tasks::BackgroundLoopKind,
         timeline::EvictionError,
         LogicalSizeCalculationCause, Tenant,
     },
@@ -60,7 +60,7 @@ impl Timeline {
         task_mgr::spawn(
             BACKGROUND_RUNTIME.handle(),
             TaskKind::Eviction,
-            Some(self.tenant_shard_id.tenant_id),
+            Some(self.tenant_shard_id),
             Some(self.timeline_id),
             &format!(
                 "layer eviction for {}/{}",
@@ -158,15 +158,15 @@ impl Timeline {
     ) -> ControlFlow<()> {
         let now = SystemTime::now();
 
-        let _permit = match crate::tenant::tasks::concurrent_background_tasks_rate_limit(
+        let acquire_permit = crate::tenant::tasks::concurrent_background_tasks_rate_limit_permit(
             BackgroundLoopKind::Eviction,
             ctx,
-            cancel,
-        )
-        .await
-        {
-            Ok(permit) => permit,
-            Err(RateLimitError::Cancelled) => return ControlFlow::Break(()),
+        );
+
+        let _permit = tokio::select! {
+            permit = acquire_permit => permit,
+            _ = cancel.cancelled() => return ControlFlow::Break(()),
+            _ = self.cancel.cancelled() => return ControlFlow::Break(()),
         };
 
         // If we evict layers but keep cached values derived from those layers, then
@@ -212,11 +212,21 @@ impl Timeline {
         // Gather layers for eviction.
         // NB: all the checks can be invalidated as soon as we release the layer map lock.
         // We don't want to hold the layer map lock during eviction.
+
         // So, we just need to deal with this.
-        let candidates: Vec<_> = {
+
+        let remote_client = match self.remote_client.as_ref() {
+            Some(c) => c,
+            None => {
+                error!("no remote storage configured, cannot evict layers");
+                return ControlFlow::Continue(());
+            }
+        };
+
+        let mut js = tokio::task::JoinSet::new();
+        {
             let guard = self.layers.read().await;
             let layers = guard.layer_map();
-            let mut candidates = Vec::new();
             for hist_layer in layers.iter_historic_layers() {
                 let hist_layer = guard.get_from_desc(&hist_layer);
 
@@ -262,54 +272,49 @@ impl Timeline {
                         continue;
                     }
                 };
+                let layer = guard.drop_eviction_guard();
                 if no_activity_for > p.threshold {
-                    candidates.push(guard.drop_eviction_guard())
+                    let remote_client = remote_client.clone();
+                    // this could cause a lot of allocations in some cases
+                    js.spawn(async move { layer.evict_and_wait(&remote_client).await });
+                    stats.candidates += 1;
                 }
             }
-            candidates
-        };
-        stats.candidates = candidates.len();
-
-        let remote_client = match self.remote_client.as_ref() {
-            None => {
-                error!(
-                    num_candidates = candidates.len(),
-                    "no remote storage configured, cannot evict layers"
-                );
-                return ControlFlow::Continue(());
-            }
-            Some(c) => c,
         };
 
-        let results = match self.evict_layer_batch(remote_client, &candidates).await {
-            Err(pre_err) => {
-                stats.errors += candidates.len();
-                error!("could not do any evictions: {pre_err:#}");
-                return ControlFlow::Continue(());
+        let join_all = async move {
+            while let Some(next) = js.join_next().await {
+                match next {
+                    Ok(Ok(())) => stats.evicted += 1,
+                    Ok(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
+                        stats.not_evictable += 1;
+                    }
+                    Err(je) if je.is_cancelled() => unreachable!("not used"),
+                    Err(je) if je.is_panic() => {
+                        /* already logged */
+                        stats.errors += 1;
+                    }
+                    Err(je) => tracing::error!("unknown JoinError: {je:?}"),
+                }
             }
-            Ok(results) => results,
+            stats
         };
-        assert_eq!(results.len(), candidates.len());
-        for result in results {
-            match result {
-                None => {
-                    stats.skipped_for_shutdown += 1;
-                }
-                Some(Ok(())) => {
-                    stats.evicted += 1;
-                }
-                Some(Err(EvictionError::NotFound | EvictionError::Downloaded)) => {
-                    stats.not_evictable += 1;
+
+        tokio::select! {
+            stats = join_all => {
+                if stats.candidates == stats.not_evictable {
+                    debug!(stats=?stats, "eviction iteration complete");
+                } else if stats.errors > 0 || stats.not_evictable > 0 {
+                    warn!(stats=?stats, "eviction iteration complete");
+                } else {
+                    info!(stats=?stats, "eviction iteration complete");
                 }
             }
+            _ = cancel.cancelled() => {
+                // just drop the joinset to "abort"
+            }
         }
-        if stats.candidates == stats.not_evictable {
-            debug!(stats=?stats, "eviction iteration complete");
-        } else if stats.errors > 0 || stats.not_evictable > 0 {
-            warn!(stats=?stats, "eviction iteration complete");
-        } else {
-            info!(stats=?stats, "eviction iteration complete");
-        }
+
         ControlFlow::Continue(())
     }
 
@@ -343,7 +348,7 @@ impl Timeline {
         // Make one of the tenant's timelines draw the short straw and run the calculation.
         // The others wait until the calculation is done so that they take into account the
         // imitated accesses that the winner made.
-        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id.tenant_id, true) {
+        let tenant = match crate::tenant::mgr::get_tenant(self.tenant_shard_id, true) {
             Ok(t) => t,
             Err(_) => {
                 return ControlFlow::Break(());
diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs
index f2db8c91fc..03bc59ea38 100644
--- a/pageserver/src/tenant/timeline/logical_size.rs
+++ b/pageserver/src/tenant/timeline/logical_size.rs
@@ -34,6 +34,9 @@ pub(super) struct LogicalSize {
     pub(crate) cancel_wait_for_background_loop_concurrency_limit_semaphore:
         OnceCell<CancellationToken>,
 
+    /// Once the initial logical size is initialized, this is notified.
+    pub(crate) initialized: tokio::sync::Semaphore,
+
     /// Latest Lsn that has its size uncalculated, could be absent for freshly created timelines.
     pub initial_part_end: Option<Lsn>,
 
@@ -125,6 +128,7 @@ impl LogicalSize {
             initial_part_end: None,
             size_added_after_initial: AtomicI64::new(0),
             did_return_approximate_to_walreceiver: AtomicBool::new(false),
+            initialized: tokio::sync::Semaphore::new(0),
         }
     }
 
@@ -135,6 +139,7 @@ impl LogicalSize {
             initial_part_end: Some(compute_to),
             size_added_after_initial: AtomicI64::new(0),
             did_return_approximate_to_walreceiver: AtomicBool::new(false),
+            initialized: tokio::sync::Semaphore::new(0),
         }
     }
 
diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs
index 61130f541a..27d6fd9c28 100644
--- a/pageserver/src/tenant/timeline/uninit.rs
+++ b/pageserver/src/tenant/timeline/uninit.rs
@@ -19,14 +19,14 @@ use super::Timeline;
 pub struct UninitializedTimeline<'t> {
     pub(crate) owning_tenant: &'t Tenant,
     timeline_id: TimelineId,
-    raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
+    raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
 }
 
 impl<'t> UninitializedTimeline<'t> {
     pub(crate) fn new(
         owning_tenant: &'t Tenant,
         timeline_id: TimelineId,
-        raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark)>,
+        raw_timeline: Option<(Arc<Timeline>, TimelineUninitMark<'t>)>,
     ) -> Self {
         Self {
             owning_tenant,
@@ -169,18 +169,55 @@ pub(crate) fn cleanup_timeline_directory(uninit_mark: TimelineUninitMark) {
 ///
 /// XXX: it's important to create it near the timeline dir, not inside it to ensure timeline dir gets removed first.
 #[must_use]
-pub(crate) struct TimelineUninitMark {
+pub(crate) struct TimelineUninitMark<'t> {
+    owning_tenant: &'t Tenant,
+    timeline_id: TimelineId,
     uninit_mark_deleted: bool,
     uninit_mark_path: Utf8PathBuf,
     pub(crate) timeline_path: Utf8PathBuf,
 }
 
-impl TimelineUninitMark {
-    pub(crate) fn new(uninit_mark_path: Utf8PathBuf, timeline_path: Utf8PathBuf) -> Self {
-        Self {
-            uninit_mark_deleted: false,
-            uninit_mark_path,
-            timeline_path,
+/// Errors when acquiring exclusive access to a timeline ID for creation
+#[derive(thiserror::Error, Debug)]
+pub(crate) enum TimelineExclusionError {
+    #[error("Already exists")]
+    AlreadyExists(Arc<Timeline>),
+    #[error("Already creating")]
+    AlreadyCreating,
+
+    // e.g. I/O errors, or some failure deep in postgres initdb
+    #[error(transparent)]
+    Other(#[from] anyhow::Error),
+}
+
+impl<'t> TimelineUninitMark<'t> {
+    pub(crate) fn new(
+        owning_tenant: &'t Tenant,
+        timeline_id: TimelineId,
+        uninit_mark_path: Utf8PathBuf,
+        timeline_path: Utf8PathBuf,
+    ) -> Result<Self, TimelineExclusionError> {
+        // Lock order: this is the only place we take both locks.  During drop() we only
+        // lock creating_timelines
+        let timelines = owning_tenant.timelines.lock().unwrap();
+        let mut creating_timelines: std::sync::MutexGuard<
+            '_,
+            std::collections::HashSet<TimelineId>,
+        > = owning_tenant.timelines_creating.lock().unwrap();
+
+        if let Some(existing) = timelines.get(&timeline_id) {
+            Err(TimelineExclusionError::AlreadyExists(existing.clone()))
+        } else if creating_timelines.contains(&timeline_id) {
+            Err(TimelineExclusionError::AlreadyCreating)
+        } else {
+            creating_timelines.insert(timeline_id);
+            Ok(Self {
+                owning_tenant,
+                timeline_id,
+                uninit_mark_deleted: false,
+                uninit_mark_path,
+                timeline_path,
+            })
         }
     }
 
@@ -207,7 +244,7 @@ impl TimelineUninitMark {
     }
 }
 
-impl Drop for TimelineUninitMark {
+impl Drop for TimelineUninitMark<'_> {
     fn drop(&mut self) {
         if !self.uninit_mark_deleted {
             if self.timeline_path.exists() {
@@ -226,5 +263,11 @@ impl Drop for TimelineUninitMark {
                 }
             }
         }
+
+        self.owning_tenant
+            .timelines_creating
+            .lock()
+            .unwrap()
+            .remove(&self.timeline_id);
     }
 }
diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs
index 04ff8602d6..e32265afb5 100644
--- a/pageserver/src/tenant/timeline/walreceiver.rs
+++ b/pageserver/src/tenant/timeline/walreceiver.rs
@@ -30,6 +30,7 @@ use crate::tenant::timeline::walreceiver::connection_manager::{
     connection_manager_loop_step, ConnectionManagerState,
 };
 
+use pageserver_api::shard::TenantShardId;
 use std::future::Future;
 use std::num::NonZeroU64;
 use std::ops::ControlFlow;
@@ -41,7 +42,7 @@ use tokio::sync::watch;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 
-use utils::id::TenantTimelineId;
+use utils::id::TimelineId;
 
 use self::connection_manager::ConnectionManagerStatus;
 
@@ -60,7 +61,8 @@ pub struct WalReceiverConf {
 }
 
 pub struct WalReceiver {
-    timeline: TenantTimelineId,
+    tenant_shard_id: TenantShardId,
+    timeline_id: TimelineId,
     manager_status: Arc<std::sync::RwLock<Option<ConnectionManagerStatus>>>,
 }
 
@@ -71,7 +73,7 @@ impl WalReceiver {
         mut broker_client: BrokerClientChannel,
         ctx: &RequestContext,
     ) -> Self {
-        let tenant_id = timeline.tenant_shard_id.tenant_id;
+        let tenant_shard_id = timeline.tenant_shard_id;
         let timeline_id = timeline.timeline_id;
         let walreceiver_ctx =
             ctx.detached_child(TaskKind::WalReceiverManager, DownloadBehavior::Error);
@@ -81,9 +83,9 @@ impl WalReceiver {
         task_mgr::spawn(
             WALRECEIVER_RUNTIME.handle(),
             TaskKind::WalReceiverManager,
-            Some(tenant_id),
+            Some(timeline.tenant_shard_id),
             Some(timeline_id),
-            &format!("walreceiver for timeline {tenant_id}/{timeline_id}"),
+            &format!("walreceiver for timeline {tenant_shard_id}/{timeline_id}"),
             false,
             async move {
                 debug_assert_current_span_has_tenant_and_timeline_id();
@@ -117,11 +119,12 @@ impl WalReceiver {
                 *loop_status.write().unwrap() = None;
                 Ok(())
             }
-            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_id, timeline_id = %timeline_id))
+            .instrument(info_span!(parent: None, "wal_connection_manager", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), timeline_id = %timeline_id))
         );
 
         Self {
-            timeline: TenantTimelineId::new(tenant_id, timeline_id),
+            tenant_shard_id,
+            timeline_id,
             manager_status,
         }
     }
@@ -129,8 +132,8 @@ impl WalReceiver {
     pub async fn stop(self) {
         task_mgr::shutdown_tasks(
             Some(TaskKind::WalReceiverManager),
-            Some(self.timeline.tenant_id),
-            Some(self.timeline.timeline_id),
+            Some(self.tenant_shard_id),
+            Some(self.timeline_id),
         )
         .await;
     }
diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
index 7bfa246eeb..5a5b3d7586 100644
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -138,7 +138,7 @@ pub(super) async fn connection_manager_loop_step(
                     Ok(Some(broker_update)) => connection_manager_state.register_timeline_update(broker_update),
                     Err(status) => {
                         match status.code() {
-                            Code::Unknown if status.message().contains("stream closed because of a broken pipe") => {
+                            Code::Unknown if status.message().contains("stream closed because of a broken pipe") || status.message().contains("connection reset") => {
                                 // tonic's error handling doesn't provide a clear code for disconnections: we get
                                 // "h2 protocol error: error reading a body from connection: stream closed because of a broken pipe"
                                 info!("broker disconnected: {status}");
diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
index 3bcb7ff891..61ab236322 100644
--- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs
@@ -163,7 +163,7 @@ pub(super) async fn handle_walreceiver_connection(
     task_mgr::spawn(
         WALRECEIVER_RUNTIME.handle(),
         TaskKind::WalReceiverConnectionPoller,
-        Some(timeline.tenant_shard_id.tenant_id),
+        Some(timeline.tenant_shard_id),
         Some(timeline.timeline_id),
         "walreceiver connection",
         false,
diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs
index 75b29a2fed..16b245c488 100644
--- a/pageserver/src/walingest.rs
+++ b/pageserver/src/walingest.rs
@@ -458,8 +458,10 @@ impl<'a> WalIngest<'a> {
             && decoded.xl_rmid == pg_constants::RM_XLOG_ID
             && (decoded.xl_info == pg_constants::XLOG_FPI
                 || decoded.xl_info == pg_constants::XLOG_FPI_FOR_HINT)
-        // compression of WAL is not yet supported: fall back to storing the original WAL record
+            // compression of WAL is not yet supported: fall back to storing the original WAL record
             && !postgres_ffi::bkpimage_is_compressed(blk.bimg_info, self.timeline.pg_version)?
+            // do not materialize null pages because them most likely be soon replaced with real data
+            && blk.bimg_len != 0
         {
             // Extract page image from FPI record
             let img_len = blk.bimg_len as usize;
@@ -2189,7 +2191,7 @@ mod tests {
             .load()
             .await;
         let tline = tenant
-            .bootstrap_timeline(TIMELINE_ID, pg_version, None, &ctx)
+            .bootstrap_timeline_test(TIMELINE_ID, pg_version, None, &ctx)
             .await
             .unwrap();
 
diff --git a/pgxn/neon/control_plane_connector.c b/pgxn/neon/control_plane_connector.c
index 2e7da671f9..e467a9c43a 100644
--- a/pgxn/neon/control_plane_connector.c
+++ b/pgxn/neon/control_plane_connector.c
@@ -19,20 +19,21 @@
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
+
+#include <curl/curl.h>
+
+#include "access/xact.h"
+#include "commands/defrem.h"
+#include "fmgr.h"
+#include "libpq/crypt.h"
+#include "miscadmin.h"
 #include "tcop/pquery.h"
 #include "tcop/utility.h"
-#include "access/xact.h"
+#include "utils/acl.h"
+#include "utils/guc.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
-#include "commands/defrem.h"
-#include "miscadmin.h"
-#include "utils/acl.h"
-#include "fmgr.h"
-#include "utils/guc.h"
-#include "port.h"
-#include <curl/curl.h>
 #include "utils/jsonb.h"
-#include "libpq/crypt.h"
 
 static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL;
 
diff --git a/pgxn/neon/extension_server.c b/pgxn/neon/extension_server.c
index fbbb8fd448..d9a75142f1 100644
--- a/pgxn/neon/extension_server.c
+++ b/pgxn/neon/extension_server.c
@@ -1,4 +1,3 @@
-
 /*-------------------------------------------------------------------------
  *
  * extension_server.c
@@ -10,21 +9,11 @@
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
-#include "tcop/pquery.h"
-#include "tcop/utility.h"
-#include "access/xact.h"
-#include "utils/hsearch.h"
-#include "utils/memutils.h"
-#include "commands/defrem.h"
-#include "miscadmin.h"
-#include "utils/acl.h"
-#include "fmgr.h"
-#include "utils/guc.h"
-#include "port.h"
-#include "fmgr.h"
 
 #include <curl/curl.h>
 
+#include "utils/guc.h"
+
 static int	extension_server_port = 0;
 
 static download_extension_file_hook_type prev_download_extension_file_hook = NULL;
diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c
index 54b3661e66..6725ce8fff 100644
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -13,32 +13,30 @@
  *-------------------------------------------------------------------------
  */
 
+#include "postgres.h"
+
 #include <sys/file.h>
 #include <unistd.h>
 #include <fcntl.h>
 
-#include "postgres.h"
-
 #include "neon_pgversioncompat.h"
 
+#include "access/parallel.h"
 #include "funcapi.h"
 #include "miscadmin.h"
-#include "pgstat.h"
 #include "pagestore_client.h"
-#include "access/parallel.h"
+#include "pgstat.h"
 #include "postmaster/bgworker.h"
 #include RELFILEINFO_HDR
 #include "storage/buf_internals.h"
-#include "storage/latch.h"
+#include "storage/fd.h"
 #include "storage/ipc.h"
+#include "storage/latch.h"
 #include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
 #include "utils/builtins.h"
 #include "utils/dynahash.h"
 #include "utils/guc.h"
-#include "storage/fd.h"
-#include "storage/pg_shmem.h"
-#include "storage/buf_internals.h"
-#include "pgstat.h"
 
 /*
  * Local file cache is used to temporary store relations pages in local file system.
@@ -102,8 +100,6 @@ static shmem_request_hook_type prev_shmem_request_hook;
 
 #define LFC_ENABLED() (lfc_ctl->limit != 0)
 
-void		PGDLLEXPORT FileCacheMonitorMain(Datum main_arg);
-
 /*
  * Local file cache is optional and Neon can work without it.
  * In case of any any errors with this cache, we should disable it but to not throw error.
diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c
index 16406ce8a3..3b038f906f 100644
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -14,28 +14,24 @@
  */
 #include "postgres.h"
 
-#include "pagestore_client.h"
-#include "fmgr.h"
 #include "access/xlog.h"
-#include "access/xlogutils.h"
-#include "storage/buf_internals.h"
-#include "storage/lwlock.h"
-#include "storage/ipc.h"
-#include "storage/pg_shmem.h"
-#include "c.h"
-#include "postmaster/interrupt.h"
-
+#include "fmgr.h"
 #include "libpq-fe.h"
-#include "libpq/pqformat.h"
 #include "libpq/libpq.h"
-
+#include "libpq/pqformat.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "postmaster/interrupt.h"
+#include "storage/buf_internals.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
 #include "utils/guc.h"
 
 #include "neon.h"
-#include "walproposer.h"
 #include "neon_utils.h"
+#include "pagestore_client.h"
+#include "walproposer.h"
 
 #define PageStoreTrace DEBUG5
 
@@ -62,8 +58,8 @@ char	   *neon_auth_token;
 int			readahead_buffer_size = 128;
 int			flush_every_n_requests = 8;
 
-int			n_reconnect_attempts = 0;
-int			max_reconnect_attempts = 60;
+static int n_reconnect_attempts = 0;
+static int max_reconnect_attempts = 60;
 
 #define MAX_PAGESERVER_CONNSTRING_SIZE 256
 
@@ -83,8 +79,6 @@ static PagestoreShmemState *pagestore_shared;
 static uint64 pagestore_local_counter = 0;
 static char local_pageserver_connstring[MAX_PAGESERVER_CONNSTRING_SIZE];
 
-bool		(*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
-
 static bool pageserver_flush(void);
 static void pageserver_disconnect(void);
 
@@ -627,8 +621,6 @@ pg_init_libpagestore(void)
 		smgr_hook = smgr_neon;
 		smgr_init_hook = smgr_init_neon;
 		dbsize_hook = neon_dbsize;
-		old_redo_read_buffer_filter = redo_read_buffer_filter;
-		redo_read_buffer_filter = neon_redo_read_buffer_filter;
 	}
 
 	lfc_init();
diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h
index 897a8373a1..c3afecc679 100644
--- a/pgxn/neon/neon.h
+++ b/pgxn/neon/neon.h
@@ -27,13 +27,6 @@ extern void pg_init_walproposer(void);
 
 extern void pg_init_extension_server(void);
 
-/*
- * Returns true if we shouldn't do REDO on that block in record indicated by
- * block_id; false otherwise.
- */
-extern bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
-extern bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id);
-
 extern uint64 BackpressureThrottlingTime(void);
 extern void replication_feedback_get_lsns(XLogRecPtr *writeLsn, XLogRecPtr *flushLsn, XLogRecPtr *applyLsn);
 
diff --git a/pgxn/neon/neon_utils.c b/pgxn/neon/neon_utils.c
index 06faea7490..9135847aaf 100644
--- a/pgxn/neon/neon_utils.c
+++ b/pgxn/neon/neon_utils.c
@@ -1,32 +1,10 @@
+
+#include <sys/resource.h>
+
 #include "postgres.h"
 
-#include "access/timeline.h"
-#include "access/xlogutils.h"
-#include "common/logging.h"
-#include "common/ip.h"
-#include "funcapi.h"
-#include "libpq/libpq.h"
+#include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
-#include "miscadmin.h"
-#include "postmaster/interrupt.h"
-#include "replication/slot.h"
-#include "replication/walsender_private.h"
-
-#include "storage/ipc.h"
-#include "utils/builtins.h"
-#include "utils/ps_status.h"
-
-#include "libpq-fe.h"
-#include <netinet/tcp.h>
-#include <unistd.h>
-
-#if PG_VERSION_NUM >= 150000
-#include "access/xlogutils.h"
-#include "access/xlogrecovery.h"
-#endif
-#if PG_MAJORVERSION_NUM >= 16
-#include "utils/guc.h"
-#endif
 
 /*
  * Convert a character which represents a hexadecimal digit to an integer.
@@ -114,3 +92,25 @@ pq_sendint64_le(StringInfo buf, uint64 i)
 	memcpy(buf->data + buf->len, &i, sizeof(uint64));
 	buf->len += sizeof(uint64);
 }
+
+/*
+ * Disables core dump for the current process.
+ */
+void
+disable_core_dump()
+{
+	struct rlimit rlim;
+
+#ifdef WALPROPOSER_LIB			/* skip in simulation mode */
+	return;
+#endif
+
+	rlim.rlim_cur = 0;
+	rlim.rlim_max = 0;
+	if (setrlimit(RLIMIT_CORE, &rlim))
+	{
+		int			save_errno = errno;
+
+		fprintf(stderr, "WARNING: disable cores setrlimit failed: %s", strerror(save_errno));
+	}
+}
diff --git a/pgxn/neon/neon_utils.h b/pgxn/neon/neon_utils.h
index e3fafc8d0f..a86f1e061c 100644
--- a/pgxn/neon/neon_utils.h
+++ b/pgxn/neon/neon_utils.h
@@ -1,12 +1,11 @@
 #ifndef __NEON_UTILS_H__
 #define __NEON_UTILS_H__
 
-#include "postgres.h"
-
 bool		HexDecodeString(uint8 *result, char *input, int nbytes);
 uint32		pq_getmsgint32_le(StringInfo msg);
 uint64		pq_getmsgint64_le(StringInfo msg);
 void		pq_sendint32_le(StringInfo buf, uint32 i);
 void		pq_sendint64_le(StringInfo buf, uint64 i);
+extern void disable_core_dump();
 
 #endif							/* __NEON_UTILS_H__ */
diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h
index ecfadb01d6..3fcaab0bee 100644
--- a/pgxn/neon/pagestore_client.h
+++ b/pgxn/neon/pagestore_client.h
@@ -13,19 +13,16 @@
 #ifndef pageserver_h
 #define pageserver_h
 
-#include "postgres.h"
 #include "neon_pgversioncompat.h"
 
 #include "access/xlogdefs.h"
 #include RELFILEINFO_HDR
-#include "storage/block.h"
-#include "storage/smgr.h"
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
+#include "storage/block.h"
+#include "storage/smgr.h"
 #include "utils/memutils.h"
 
-#include "pg_config.h"
-
 typedef enum
 {
 	/* pagestore_client -> pagestore */
@@ -158,11 +155,8 @@ extern page_server_api *page_server;
 extern char *page_server_connstring;
 extern int	flush_every_n_requests;
 extern int	readahead_buffer_size;
-extern bool seqscan_prefetch_enabled;
-extern int	seqscan_prefetch_distance;
 extern char *neon_timeline;
 extern char *neon_tenant;
-extern bool wal_redo;
 extern int32 max_cluster_size;
 
 extern const f_smgr *smgr_neon(BackendId backend, NRelFileInfo rinfo);
diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c
index 609d80588c..8888cd89c6 100644
--- a/pgxn/neon/pagestore_smgr.c
+++ b/pgxn/neon/pagestore_smgr.c
@@ -47,25 +47,26 @@
 
 #include "access/xact.h"
 #include "access/xlog.h"
+#include "access/xlogdefs.h"
 #include "access/xloginsert.h"
 #include "access/xlog_internal.h"
-#include "access/xlogdefs.h"
+#include "access/xlogutils.h"
 #include "catalog/pg_class.h"
 #include "common/hashfn.h"
 #include "executor/instrument.h"
-#include "pagestore_client.h"
-#include "postmaster/interrupt.h"
+#include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "postmaster/interrupt.h"
 #include "replication/walsender.h"
 #include "storage/bufmgr.h"
 #include "storage/buf_internals.h"
 #include "storage/fsm_internals.h"
-#include "storage/smgr.h"
 #include "storage/md.h"
-#include "pgstat.h"
+#include "storage/smgr.h"
+
+#include "pagestore_client.h"
 
 #if PG_VERSION_NUM >= 150000
-#include "access/xlogutils.h"
 #include "access/xlogrecovery.h"
 #endif
 
@@ -106,6 +107,9 @@ typedef enum
 static SMgrRelation unlogged_build_rel = NULL;
 static UnloggedBuildPhase unlogged_build_phase = UNLOGGED_BUILD_NOT_IN_PROGRESS;
 
+static bool neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id);
+static bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
+
 /*
  * Prefetch implementation:
  *
@@ -239,7 +243,7 @@ typedef struct PrefetchState
 	PrefetchRequest prf_buffer[];	/* prefetch buffers */
 } PrefetchState;
 
-PrefetchState *MyPState;
+static PrefetchState *MyPState;
 
 #define GetPrfSlot(ring_index) ( \
 	( \
@@ -257,7 +261,7 @@ PrefetchState *MyPState;
 	) \
 )
 
-XLogRecPtr	prefetch_lsn = 0;
+static XLogRecPtr prefetch_lsn = 0;
 
 static bool compact_prefetch_buffers(void);
 static void consume_prefetch_responses(void);
@@ -1371,6 +1375,9 @@ neon_init(void)
 	MyPState->prf_hash = prfh_create(MyPState->hashctx,
 									 readahead_buffer_size, NULL);
 
+	old_redo_read_buffer_filter = redo_read_buffer_filter;
+	redo_read_buffer_filter = neon_redo_read_buffer_filter;
+
 #ifdef DEBUG_COMPARE_LOCAL
 	mdinit();
 #endif
@@ -2869,7 +2876,7 @@ get_fsm_physical_block(BlockNumber heapblk)
  * contents, where with REDO locking it would wait on block 1 and see
  * block 3 with post-REDO contents only.
  */
-bool
+static bool
 neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id)
 {
 	XLogRecPtr	end_recptr = record->EndRecPtr;
diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c
index 7d9dbfdb7f..fc3332612c 100644
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -35,6 +35,8 @@
  *
  *-------------------------------------------------------------------------
  */
+#include <sys/resource.h>
+
 #include "postgres.h"
 #include "libpq/pqformat.h"
 #include "neon.h"
@@ -1069,6 +1071,12 @@ DetermineEpochStartLsn(WalProposer *wp)
 			if (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term ==
 											walprop_shared->mineLastElectedTerm)))
 			{
+				/*
+				 * Panic to restart PG as we need to retake basebackup.
+				 * However, don't dump core as this is kinda expected
+				 * scenario.
+				 */
+				disable_core_dump();
 				walprop_log(PANIC,
 							"collected propEpochStartLsn %X/%X, but basebackup LSN %X/%X",
 							LSN_FORMAT_ARGS(wp->propEpochStartLsn),
@@ -1445,7 +1453,12 @@ RecvAppendResponses(Safekeeper *sk)
 
 		if (sk->appendResponse.term > wp->propTerm)
 		{
-			/* Another compute with higher term is running. */
+			/*
+			 * Another compute with higher term is running. Panic to restart
+			 * PG as we likely need to retake basebackup. However, don't dump
+			 * core as this is kinda expected scenario.
+			 */
+			disable_core_dump();
 			walprop_log(PANIC, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT "",
 						sk->host, sk->port,
 						sk->appendResponse.term, wp->propTerm);
diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h
index 615018c58e..6ba2aae75b 100644
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -1,14 +1,12 @@
 #ifndef __NEON_WALPROPOSER_H__
 #define __NEON_WALPROPOSER_H__
 
-#include "postgres.h"
-#include "access/xlogdefs.h"
-#include "port.h"
-#include "access/xlog_internal.h"
 #include "access/transam.h"
+#include "access/xlogdefs.h"
+#include "access/xlog_internal.h"
 #include "nodes/replnodes.h"
-#include "utils/uuid.h"
 #include "replication/walreceiver.h"
+#include "utils/uuid.h"
 
 #define SK_MAGIC 0xCafeCeefu
 #define SK_PROTOCOL_VERSION 2
diff --git a/pgxn/neon/walproposer_compat.c b/pgxn/neon/walproposer_compat.c
index 04b519ab15..35d984c52e 100644
--- a/pgxn/neon/walproposer_compat.c
+++ b/pgxn/neon/walproposer_compat.c
@@ -3,11 +3,13 @@
  * This is needed to avoid linking to full postgres server installation. This file
  * is compiled as a part of libwalproposer static library.
  */
+#include "postgres.h"
 
 #include <stdio.h>
-#include "walproposer.h"
-#include "utils/datetime.h"
+
 #include "miscadmin.h"
+#include "utils/datetime.h"
+#include "walproposer.h"
 
 void
 ExceptionalCondition(const char *conditionName,
diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c
index 551d56d416..9361f08ad2 100644
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -1482,6 +1482,21 @@ walprop_pg_wait_event_set(WalProposer *wp, long timeout, Safekeeper **sk, uint32
 #if PG_MAJORVERSION_NUM >= 16
 	if (WalSndCtl != NULL)
 		ConditionVariablePrepareToSleep(&WalSndCtl->wal_flush_cv);
+
+	/*
+	 * Now that we prepared the condvar, check flush ptr again -- it might have
+	 * changed before we subscribed to cv so we missed the wakeup.
+	 *
+	 * Do that only when we're interested in new WAL: without sync-safekeepers
+	 * and if election already passed.
+	 */
+	if (!wp->config->syncSafekeepers && wp->availableLsn != InvalidXLogRecPtr && GetFlushRecPtr(NULL) > wp->availableLsn)
+	{
+		ConditionVariableCancelSleep();
+		ResetLatch(MyLatch);
+		*events = WL_LATCH_SET;
+		return 1;
+	}
 #endif
 
 	/*
@@ -1697,9 +1712,9 @@ walprop_pg_after_election(WalProposer *wp)
 	f = fopen("restart.lsn", "rb");
 	if (f != NULL && !wp->config->syncSafekeepers)
 	{
-		fread(&lrRestartLsn, sizeof(lrRestartLsn), 1, f);
+		size_t rc = fread(&lrRestartLsn, sizeof(lrRestartLsn), 1, f);
 		fclose(f);
-		if (lrRestartLsn != InvalidXLogRecPtr)
+		if (rc == 1 && lrRestartLsn != InvalidXLogRecPtr)
 		{
 			elog(LOG, "Logical replication restart LSN %X/%X", LSN_FORMAT_ARGS(lrRestartLsn));
 
diff --git a/poetry.lock b/poetry.lock
index 8583a71f85..76dfd6d37d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2092,51 +2092,61 @@ files = [
 
 [[package]]
 name = "pyyaml"
-version = "6.0"
+version = "6.0.1"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
-    {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
-    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
-    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
-    {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
-    {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
-    {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
-    {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
-    {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
-    {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
-    {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
-    {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
-    {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
-    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
-    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
-    {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
-    {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
-    {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
-    {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
-    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
-    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
-    {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
-    {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
-    {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
-    {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
-    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
-    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
-    {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
-    {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
-    {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
-    {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
-    {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
-    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
-    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
-    {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
-    {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
-    {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
-    {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
+    {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
+    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
 ]
 
 [[package]]
@@ -2553,85 +2563,101 @@ files = [
 
 [[package]]
 name = "yarl"
-version = "1.8.2"
+version = "1.9.4"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bb81f753c815f6b8e2ddd2eef3c855cf7da193b82396ac013c661aaa6cc6b0a5"},
-    {file = "yarl-1.8.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:47d49ac96156f0928f002e2424299b2c91d9db73e08c4cd6742923a086f1c863"},
-    {file = "yarl-1.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3fc056e35fa6fba63248d93ff6e672c096f95f7836938241ebc8260e062832fe"},
-    {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58a3c13d1c3005dbbac5c9f0d3210b60220a65a999b1833aa46bd6677c69b08e"},
-    {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:10b08293cda921157f1e7c2790999d903b3fd28cd5c208cf8826b3b508026996"},
-    {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de986979bbd87272fe557e0a8fcb66fd40ae2ddfe28a8b1ce4eae22681728fef"},
-    {file = "yarl-1.8.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c4fcfa71e2c6a3cb568cf81aadc12768b9995323186a10827beccf5fa23d4f8"},
-    {file = "yarl-1.8.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae4d7ff1049f36accde9e1ef7301912a751e5bae0a9d142459646114c70ecba6"},
-    {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bf071f797aec5b96abfc735ab97da9fd8f8768b43ce2abd85356a3127909d146"},
-    {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:74dece2bfc60f0f70907c34b857ee98f2c6dd0f75185db133770cd67300d505f"},
-    {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:df60a94d332158b444301c7f569659c926168e4d4aad2cfbf4bce0e8fb8be826"},
-    {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:63243b21c6e28ec2375f932a10ce7eda65139b5b854c0f6b82ed945ba526bff3"},
-    {file = "yarl-1.8.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cfa2bbca929aa742b5084fd4663dd4b87c191c844326fcb21c3afd2d11497f80"},
-    {file = "yarl-1.8.2-cp310-cp310-win32.whl", hash = "sha256:b05df9ea7496df11b710081bd90ecc3a3db6adb4fee36f6a411e7bc91a18aa42"},
-    {file = "yarl-1.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:24ad1d10c9db1953291f56b5fe76203977f1ed05f82d09ec97acb623a7976574"},
-    {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2a1fca9588f360036242f379bfea2b8b44cae2721859b1c56d033adfd5893634"},
-    {file = "yarl-1.8.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f37db05c6051eff17bc832914fe46869f8849de5b92dc4a3466cd63095d23dfd"},
-    {file = "yarl-1.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:77e913b846a6b9c5f767b14dc1e759e5aff05502fe73079f6f4176359d832581"},
-    {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0978f29222e649c351b173da2b9b4665ad1feb8d1daa9d971eb90df08702668a"},
-    {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388a45dc77198b2460eac0aca1efd6a7c09e976ee768b0d5109173e521a19daf"},
-    {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2305517e332a862ef75be8fad3606ea10108662bc6fe08509d5ca99503ac2aee"},
-    {file = "yarl-1.8.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42430ff511571940d51e75cf42f1e4dbdded477e71c1b7a17f4da76c1da8ea76"},
-    {file = "yarl-1.8.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3150078118f62371375e1e69b13b48288e44f6691c1069340081c3fd12c94d5b"},
-    {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c15163b6125db87c8f53c98baa5e785782078fbd2dbeaa04c6141935eb6dab7a"},
-    {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4d04acba75c72e6eb90745447d69f84e6c9056390f7a9724605ca9c56b4afcc6"},
-    {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e7fd20d6576c10306dea2d6a5765f46f0ac5d6f53436217913e952d19237efc4"},
-    {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:75c16b2a900b3536dfc7014905a128a2bea8fb01f9ee26d2d7d8db0a08e7cb2c"},
-    {file = "yarl-1.8.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6d88056a04860a98341a0cf53e950e3ac9f4e51d1b6f61a53b0609df342cc8b2"},
-    {file = "yarl-1.8.2-cp311-cp311-win32.whl", hash = "sha256:fb742dcdd5eec9f26b61224c23baea46c9055cf16f62475e11b9b15dfd5c117b"},
-    {file = "yarl-1.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:8c46d3d89902c393a1d1e243ac847e0442d0196bbd81aecc94fcebbc2fd5857c"},
-    {file = "yarl-1.8.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ceff9722e0df2e0a9e8a79c610842004fa54e5b309fe6d218e47cd52f791d7ef"},
-    {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f6b4aca43b602ba0f1459de647af954769919c4714706be36af670a5f44c9c1"},
-    {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1684a9bd9077e922300ecd48003ddae7a7474e0412bea38d4631443a91d61077"},
-    {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ebb78745273e51b9832ef90c0898501006670d6e059f2cdb0e999494eb1450c2"},
-    {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3adeef150d528ded2a8e734ebf9ae2e658f4c49bf413f5f157a470e17a4a2e89"},
-    {file = "yarl-1.8.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57a7c87927a468e5a1dc60c17caf9597161d66457a34273ab1760219953f7f4c"},
-    {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:efff27bd8cbe1f9bd127e7894942ccc20c857aa8b5a0327874f30201e5ce83d0"},
-    {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:a783cd344113cb88c5ff7ca32f1f16532a6f2142185147822187913eb989f739"},
-    {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:705227dccbe96ab02c7cb2c43e1228e2826e7ead880bb19ec94ef279e9555b5b"},
-    {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:34c09b43bd538bf6c4b891ecce94b6fa4f1f10663a8d4ca589a079a5018f6ed7"},
-    {file = "yarl-1.8.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a48f4f7fea9a51098b02209d90297ac324241bf37ff6be6d2b0149ab2bd51b37"},
-    {file = "yarl-1.8.2-cp37-cp37m-win32.whl", hash = "sha256:0414fd91ce0b763d4eadb4456795b307a71524dbacd015c657bb2a39db2eab89"},
-    {file = "yarl-1.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:d881d152ae0007809c2c02e22aa534e702f12071e6b285e90945aa3c376463c5"},
-    {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5df5e3d04101c1e5c3b1d69710b0574171cc02fddc4b23d1b2813e75f35a30b1"},
-    {file = "yarl-1.8.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7a66c506ec67eb3159eea5096acd05f5e788ceec7b96087d30c7d2865a243918"},
-    {file = "yarl-1.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2b4fa2606adf392051d990c3b3877d768771adc3faf2e117b9de7eb977741229"},
-    {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e21fb44e1eff06dd6ef971d4bdc611807d6bd3691223d9c01a18cec3677939e"},
-    {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93202666046d9edadfe9f2e7bf5e0782ea0d497b6d63da322e541665d65a044e"},
-    {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc77086ce244453e074e445104f0ecb27530d6fd3a46698e33f6c38951d5a0f1"},
-    {file = "yarl-1.8.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dd68a92cab699a233641f5929a40f02a4ede8c009068ca8aa1fe87b8c20ae3"},
-    {file = "yarl-1.8.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1b372aad2b5f81db66ee7ec085cbad72c4da660d994e8e590c997e9b01e44901"},
-    {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:e6f3515aafe0209dd17fb9bdd3b4e892963370b3de781f53e1746a521fb39fc0"},
-    {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dfef7350ee369197106805e193d420b75467b6cceac646ea5ed3049fcc950a05"},
-    {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:728be34f70a190566d20aa13dc1f01dc44b6aa74580e10a3fb159691bc76909d"},
-    {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:ff205b58dc2929191f68162633d5e10e8044398d7a45265f90a0f1d51f85f72c"},
-    {file = "yarl-1.8.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baf211dcad448a87a0d9047dc8282d7de59473ade7d7fdf22150b1d23859f946"},
-    {file = "yarl-1.8.2-cp38-cp38-win32.whl", hash = "sha256:272b4f1599f1b621bf2aabe4e5b54f39a933971f4e7c9aa311d6d7dc06965165"},
-    {file = "yarl-1.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:326dd1d3caf910cd26a26ccbfb84c03b608ba32499b5d6eeb09252c920bcbe4f"},
-    {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f8ca8ad414c85bbc50f49c0a106f951613dfa5f948ab69c10ce9b128d368baf8"},
-    {file = "yarl-1.8.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:418857f837347e8aaef682679f41e36c24250097f9e2f315d39bae3a99a34cbf"},
-    {file = "yarl-1.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ae0eec05ab49e91a78700761777f284c2df119376e391db42c38ab46fd662b77"},
-    {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:009a028127e0a1755c38b03244c0bea9d5565630db9c4cf9572496e947137a87"},
-    {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3edac5d74bb3209c418805bda77f973117836e1de7c000e9755e572c1f7850d0"},
-    {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da65c3f263729e47351261351b8679c6429151ef9649bba08ef2528ff2c423b2"},
-    {file = "yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ef8fb25e52663a1c85d608f6dd72e19bd390e2ecaf29c17fb08f730226e3a08"},
-    {file = "yarl-1.8.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bcd7bb1e5c45274af9a1dd7494d3c52b2be5e6bd8d7e49c612705fd45420b12d"},
-    {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:44ceac0450e648de86da8e42674f9b7077d763ea80c8ceb9d1c3e41f0f0a9951"},
-    {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:97209cc91189b48e7cfe777237c04af8e7cc51eb369004e061809bcdf4e55220"},
-    {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:48dd18adcf98ea9cd721a25313aef49d70d413a999d7d89df44f469edfb38a06"},
-    {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e59399dda559688461762800d7fb34d9e8a6a7444fd76ec33220a926c8be1516"},
-    {file = "yarl-1.8.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d617c241c8c3ad5c4e78a08429fa49e4b04bedfc507b34b4d8dceb83b4af3588"},
-    {file = "yarl-1.8.2-cp39-cp39-win32.whl", hash = "sha256:cb6d48d80a41f68de41212f3dfd1a9d9898d7841c8f7ce6696cf2fd9cb57ef83"},
-    {file = "yarl-1.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:6604711362f2dbf7160df21c416f81fac0de6dbcf0b5445a2ef25478ecc4c778"},
-    {file = "yarl-1.8.2.tar.gz", hash = "sha256:49d43402c6e3013ad0978602bf6bf5328535c48d192304b91b97a3c6790b1562"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"},
+    {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"},
+    {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"},
+    {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"},
+    {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"},
+    {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"},
+    {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"},
+    {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"},
+    {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"},
+    {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"},
+    {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"},
+    {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"},
+    {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"},
+    {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"},
+    {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"},
+    {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"},
 ]
 
 [package.dependencies]
diff --git a/proxy/src/auth.rs b/proxy/src/auth.rs
index 7d79d34045..64ef108e11 100644
--- a/proxy/src/auth.rs
+++ b/proxy/src/auth.rs
@@ -62,6 +62,9 @@ pub enum AuthErrorImpl {
         Please add it to the allowed list in the Neon console."
     )]
     IpAddressNotAllowed,
+
+    #[error("Too many connections to this endpoint. Please try again later.")]
+    TooManyConnections,
 }
 
 #[derive(Debug, Error)]
@@ -80,6 +83,14 @@ impl AuthError {
     pub fn ip_address_not_allowed() -> Self {
         AuthErrorImpl::IpAddressNotAllowed.into()
     }
+
+    pub fn too_many_connections() -> Self {
+        AuthErrorImpl::TooManyConnections.into()
+    }
+
+    pub fn is_auth_failed(&self) -> bool {
+        matches!(self.0.as_ref(), AuthErrorImpl::AuthFailed(_))
+    }
 }
 
 impl<E: Into<AuthErrorImpl>> From<E> for AuthError {
@@ -102,6 +113,7 @@ impl UserFacingError for AuthError {
             MissingEndpointName => self.to_string(),
             Io(_) => "Internal error".to_string(),
             IpAddressNotAllowed => self.to_string(),
+            TooManyConnections => self.to_string(),
         }
     }
 }
diff --git a/proxy/src/auth/backend.rs b/proxy/src/auth/backend.rs
index 649b3f40f2..923bd02560 100644
--- a/proxy/src/auth/backend.rs
+++ b/proxy/src/auth/backend.rs
@@ -9,9 +9,9 @@ use tokio_postgres::config::AuthKeys;
 use crate::auth::credentials::check_peer_addr_is_in_list;
 use crate::auth::validate_password_and_exchange;
 use crate::console::errors::GetAuthInfoError;
-use crate::console::provider::AuthInfo;
 use crate::console::AuthSecret;
-use crate::proxy::{handle_try_wake, retry_after, LatencyTimer};
+use crate::proxy::connect_compute::handle_try_wake;
+use crate::proxy::retry::retry_after;
 use crate::scram;
 use crate::stream::Stream;
 use crate::{
@@ -22,6 +22,7 @@ use crate::{
         provider::{CachedNodeInfo, ConsoleReqExtra},
         Api,
     },
+    metrics::LatencyTimer,
     stream, url,
 };
 use futures::TryFutureExt;
@@ -166,7 +167,7 @@ impl TryFrom<ClientCredentials> for ComputeUserInfo {
 /// All authentication flows will emit an AuthenticationOk message if successful.
 async fn auth_quirks(
     api: &impl console::Api,
-    extra: &ConsoleReqExtra<'_>,
+    extra: &ConsoleReqExtra,
     creds: ClientCredentials,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
@@ -185,24 +186,52 @@ async fn auth_quirks(
     };
 
     info!("fetching user's authentication info");
-    // TODO(anna): this will slow down both "hacks" below; we probably need a cache.
-    let AuthInfo {
-        secret,
-        allowed_ips,
-    } = api.get_auth_info(extra, &info).await?;
+    let allowed_ips = api.get_allowed_ips(extra, &info).await?;
 
     // check allowed list
     if !check_peer_addr_is_in_list(&info.inner.peer_addr, &allowed_ips) {
         return Err(auth::AuthError::ip_address_not_allowed());
     }
-    let secret = secret.unwrap_or_else(|| {
+    let cached_secret = api.get_role_secret(extra, &info).await?;
+
+    let secret = cached_secret.clone().unwrap_or_else(|| {
         // If we don't have an authentication secret, we mock one to
         // prevent malicious probing (possible due to missing protocol steps).
         // This mocked secret will never lead to successful authentication.
         info!("authentication info not found, mocking it");
         AuthSecret::Scram(scram::ServerSecret::mock(&info.inner.user, rand::random()))
     });
+    match authenticate_with_secret(
+        secret,
+        info,
+        client,
+        unauthenticated_password,
+        allow_cleartext,
+        config,
+        latency_timer,
+    )
+    .await
+    {
+        Ok(keys) => Ok(keys),
+        Err(e) => {
+            if e.is_auth_failed() {
+                // The password could have been changed, so we invalidate the cache.
+                cached_secret.invalidate();
+            }
+            Err(e)
+        }
+    }
+}
 
+async fn authenticate_with_secret(
+    secret: AuthSecret,
+    info: ComputeUserInfo,
+    client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
+    unauthenticated_password: Option<Vec<u8>>,
+    allow_cleartext: bool,
+    config: &'static AuthenticationConfig,
+    latency_timer: &mut LatencyTimer,
+) -> auth::Result<ComputeCredentials<ComputeCredentialKeys>> {
     if let Some(password) = unauthenticated_password {
         let auth_outcome = validate_password_and_exchange(&password, secret)?;
         let keys = match auth_outcome {
@@ -235,7 +264,7 @@ async fn auth_quirks(
 /// only if authentication was successfuly.
 async fn auth_and_wake_compute(
     api: &impl console::Api,
-    extra: &ConsoleReqExtra<'_>,
+    extra: &ConsoleReqExtra,
     creds: ClientCredentials,
     client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
     allow_cleartext: bool,
@@ -314,7 +343,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
     #[tracing::instrument(fields(allow_cleartext = allow_cleartext), skip_all)]
     pub async fn authenticate(
         self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         client: &mut stream::PqStream<Stream<impl AsyncRead + AsyncWrite + Unpin>>,
         allow_cleartext: bool,
         config: &'static AuthenticationConfig,
@@ -387,7 +416,7 @@ impl<'a> BackendType<'a, ClientCredentials> {
 impl BackendType<'_, ComputeUserInfo> {
     pub async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
         use BackendType::*;
         match self {
@@ -404,7 +433,7 @@ impl BackendType<'_, ComputeUserInfo> {
     /// The link auth flow doesn't support this, so we return [`None`] in that case.
     pub async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
     ) -> Result<Option<CachedNodeInfo>, console::errors::WakeComputeError> {
         use BackendType::*;
 
diff --git a/proxy/src/auth/backend/classic.rs b/proxy/src/auth/backend/classic.rs
index ce52daf16c..5c394ec649 100644
--- a/proxy/src/auth/backend/classic.rs
+++ b/proxy/src/auth/backend/classic.rs
@@ -4,7 +4,7 @@ use crate::{
     compute,
     config::AuthenticationConfig,
     console::AuthSecret,
-    proxy::LatencyTimer,
+    metrics::LatencyTimer,
     sasl,
     stream::{PqStream, Stream},
 };
diff --git a/proxy/src/auth/backend/hacks.rs b/proxy/src/auth/backend/hacks.rs
index abbd25008b..5dde514bca 100644
--- a/proxy/src/auth/backend/hacks.rs
+++ b/proxy/src/auth/backend/hacks.rs
@@ -4,7 +4,7 @@ use super::{
 use crate::{
     auth::{self, AuthFlow},
     console::AuthSecret,
-    proxy::LatencyTimer,
+    metrics::LatencyTimer,
     sasl,
     stream::{self, Stream},
 };
diff --git a/proxy/src/auth/credentials.rs b/proxy/src/auth/credentials.rs
index 72149e8e29..c04769a199 100644
--- a/proxy/src/auth/credentials.rs
+++ b/proxy/src/auth/credentials.rs
@@ -1,9 +1,8 @@
 //! User credentials used in authentication.
 
 use crate::{
-    auth::password_hack::parse_endpoint_param,
-    error::UserFacingError,
-    proxy::{neon_options_str, NUM_CONNECTION_ACCEPTED_BY_SNI},
+    auth::password_hack::parse_endpoint_param, error::UserFacingError,
+    metrics::NUM_CONNECTION_ACCEPTED_BY_SNI, proxy::neon_options_str,
 };
 use itertools::Itertools;
 use pq_proto::StartupMessageParams;
diff --git a/proxy/src/bin/proxy.rs b/proxy/src/bin/proxy.rs
index fc1c44809a..be3989d387 100644
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -6,7 +6,10 @@ use proxy::config::HttpConfig;
 use proxy::console;
 use proxy::console::provider::AllowedIpsCache;
 use proxy::console::provider::NodeInfoCache;
+use proxy::console::provider::RoleSecretCache;
 use proxy::http;
+use proxy::rate_limiter::EndpointRateLimiter;
+use proxy::rate_limiter::RateBucketInfo;
 use proxy::rate_limiter::RateLimiterConfig;
 use proxy::usage_metrics;
 
@@ -14,6 +17,7 @@ use anyhow::bail;
 use proxy::config::{self, ProxyConfig};
 use proxy::serverless;
 use std::pin::pin;
+use std::sync::Arc;
 use std::{borrow::Cow, net::SocketAddr};
 use tokio::net::TcpListener;
 use tokio::task::JoinSet;
@@ -83,7 +87,7 @@ struct ProxyCliArgs {
     #[clap(long)]
     metric_collection_interval: Option<String>,
     /// cache for `wake_compute` api method (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
     wake_compute_cache: String,
     /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
     #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
@@ -112,14 +116,23 @@ struct ProxyCliArgs {
     /// Timeout for rate limiter. If it didn't manage to aquire a permit in this time, it will return an error.
     #[clap(long, default_value = "15s", value_parser = humantime::parse_duration)]
     rate_limiter_timeout: tokio::time::Duration,
+    /// Endpoint rate limiter max number of requests per second.
+    ///
+    /// Provided in the form '<Requests Per Second>@<Bucket Duration Size>'.
+    /// Can be given multiple times for different bucket sizes.
+    #[clap(long, default_values_t = RateBucketInfo::DEFAULT_SET)]
+    endpoint_rps_limit: Vec<RateBucketInfo>,
     /// Initial limit for dynamic rate limiter. Makes sense only if `rate_limit_algorithm` is *not* `None`.
     #[clap(long, default_value_t = 100)]
     initial_limit: usize,
     #[clap(flatten)]
     aimd_config: proxy::rate_limiter::AimdConfig,
     /// cache for `allowed_ips` (use `size=0` to disable)
-    #[clap(long, default_value = config::CacheOptions::DEFAULT_OPTIONS_NODE_INFO)]
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
     allowed_ips_cache: String,
+    /// cache for `role_secret` (use `size=0` to disable)
+    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
+    role_secret_cache: String,
     /// disable ip check for http requests. If it is too time consuming, it could be turned off.
     #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
     disable_ip_check_for_http: bool,
@@ -154,6 +167,8 @@ async fn main() -> anyhow::Result<()> {
     let proxy_listener = TcpListener::bind(proxy_address).await?;
     let cancellation_token = CancellationToken::new();
 
+    let endpoint_rate_limiter = Arc::new(EndpointRateLimiter::new(&config.endpoint_rps_limit));
+
     // client facing tasks. these will exit on error or on cancellation
     // cancellation returns Ok(())
     let mut client_tasks = JoinSet::new();
@@ -161,6 +176,7 @@ async fn main() -> anyhow::Result<()> {
         config,
         proxy_listener,
         cancellation_token.clone(),
+        endpoint_rate_limiter.clone(),
     ));
 
     // TODO: rename the argument to something like serverless.
@@ -174,6 +190,7 @@ async fn main() -> anyhow::Result<()> {
             config,
             serverless_listener,
             cancellation_token.clone(),
+            endpoint_rate_limiter.clone(),
         ));
     }
 
@@ -253,9 +270,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         AuthBackend::Console => {
             let wake_compute_cache_config: CacheOptions = args.wake_compute_cache.parse()?;
             let allowed_ips_cache_config: CacheOptions = args.allowed_ips_cache.parse()?;
+            let role_secret_cache_config: CacheOptions = args.role_secret_cache.parse()?;
 
             info!("Using NodeInfoCache (wake_compute) with options={wake_compute_cache_config:?}");
             info!("Using AllowedIpsCache (wake_compute) with options={allowed_ips_cache_config:?}");
+            info!("Using RoleSecretCache (wake_compute) with options={role_secret_cache_config:?}");
             let caches = Box::leak(Box::new(console::caches::ApiCaches {
                 node_info: NodeInfoCache::new(
                     "node_info_cache",
@@ -269,6 +288,12 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                     allowed_ips_cache_config.ttl,
                     false,
                 ),
+                role_secret: RoleSecretCache::new(
+                    "role_secret_cache",
+                    role_secret_cache_config.size,
+                    role_secret_cache_config.ttl,
+                    false,
+                ),
             }));
 
             let config::WakeComputeLockOptions {
@@ -308,6 +333,10 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
     let authentication_config = AuthenticationConfig {
         scram_protocol_timeout: args.scram_protocol_timeout,
     };
+
+    let mut endpoint_rps_limit = args.endpoint_rps_limit.clone();
+    RateBucketInfo::validate(&mut endpoint_rps_limit)?;
+
     let config = Box::leak(Box::new(ProxyConfig {
         tls_config,
         auth_backend,
@@ -317,7 +346,35 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
         authentication_config,
         require_client_ip: args.require_client_ip,
         disable_ip_check_for_http: args.disable_ip_check_for_http,
+        endpoint_rps_limit,
     }));
 
     Ok(config)
 }
+
+#[cfg(test)]
+mod tests {
+    use std::time::Duration;
+
+    use clap::Parser;
+    use proxy::rate_limiter::RateBucketInfo;
+
+    #[test]
+    fn parse_endpoint_rps_limit() {
+        let config = super::ProxyCliArgs::parse_from([
+            "proxy",
+            "--endpoint-rps-limit",
+            "100@1s",
+            "--endpoint-rps-limit",
+            "20@30s",
+        ]);
+
+        assert_eq!(
+            config.endpoint_rps_limit,
+            vec![
+                RateBucketInfo::new(100, Duration::from_secs(1)),
+                RateBucketInfo::new(20, Duration::from_secs(30)),
+            ]
+        );
+    }
+}
diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs
index 78c56300a5..a54ba56e43 100644
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -1,9 +1,10 @@
 use crate::{
     auth::parse_endpoint_param, cancellation::CancelClosure, console::errors::WakeComputeError,
-    error::UserFacingError, proxy::neon_option,
+    error::UserFacingError, metrics::NUM_DB_CONNECTIONS_GAUGE, proxy::neon_option,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
+use metrics::IntCounterPairGuard;
 use pq_proto::StartupMessageParams;
 use std::{io, net::SocketAddr, time::Duration};
 use thiserror::Error;
@@ -223,6 +224,8 @@ pub struct PostgresConnection {
     pub params: std::collections::HashMap<String, String>,
     /// Query cancellation token.
     pub cancel_closure: CancelClosure,
+
+    _guage: IntCounterPairGuard,
 }
 
 impl ConnCfg {
@@ -231,6 +234,7 @@ impl ConnCfg {
         &self,
         allow_self_signed_compute: bool,
         timeout: Duration,
+        proto: &'static str,
     ) -> Result<PostgresConnection, ConnectionError> {
         let (socket_addr, stream, host) = self.connect_raw(timeout).await?;
 
@@ -264,6 +268,7 @@ impl ConnCfg {
             stream,
             params,
             cancel_closure,
+            _guage: NUM_DB_CONNECTIONS_GAUGE.with_label_values(&[proto]).guard(),
         };
 
         Ok(connection)
diff --git a/proxy/src/config.rs b/proxy/src/config.rs
index 182d71f9be..2ed248af8d 100644
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,4 +1,4 @@
-use crate::auth;
+use crate::{auth, rate_limiter::RateBucketInfo};
 use anyhow::{bail, ensure, Context, Ok};
 use rustls::{sign, Certificate, PrivateKey};
 use sha2::{Digest, Sha256};
@@ -20,6 +20,7 @@ pub struct ProxyConfig {
     pub authentication_config: AuthenticationConfig,
     pub require_client_ip: bool,
     pub disable_ip_check_for_http: bool,
+    pub endpoint_rps_limit: Vec<RateBucketInfo>,
 }
 
 #[derive(Debug)]
@@ -309,10 +310,10 @@ pub struct CacheOptions {
 
 impl CacheOptions {
     /// Default options for [`crate::console::provider::NodeInfoCache`].
-    pub const DEFAULT_OPTIONS_NODE_INFO: &'static str = "size=4000,ttl=4m";
+    pub const CACHE_DEFAULT_OPTIONS: &'static str = "size=4000,ttl=4m";
 
     /// Parse cache options passed via cmdline.
-    /// Example: [`Self::DEFAULT_OPTIONS_NODE_INFO`].
+    /// Example: [`Self::CACHE_DEFAULT_OPTIONS`].
     fn parse(options: &str) -> anyhow::Result<Self> {
         let mut size = None;
         let mut ttl = None;
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index b0a73fd03d..e4cf1e8c8e 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -10,6 +10,7 @@ use crate::{
 };
 use async_trait::async_trait;
 use dashmap::DashMap;
+use smol_str::SmolStr;
 use std::{sync::Arc, time::Duration};
 use tokio::{
     sync::{OwnedSemaphorePermit, Semaphore},
@@ -21,7 +22,7 @@ pub mod errors {
     use crate::{
         error::{io_error, UserFacingError},
         http,
-        proxy::ShouldRetry,
+        proxy::retry::ShouldRetry,
     };
     use thiserror::Error;
 
@@ -196,15 +197,15 @@ pub mod errors {
 }
 
 /// Extra query params we'd like to pass to the console.
-pub struct ConsoleReqExtra<'a> {
+pub struct ConsoleReqExtra {
     /// A unique identifier for a connection.
     pub session_id: uuid::Uuid,
     /// Name of client application, if set.
-    pub application_name: Option<&'a str>,
+    pub application_name: String,
     pub options: Vec<(String, String)>,
 }
 
-impl<'a> ConsoleReqExtra<'a> {
+impl ConsoleReqExtra {
     // https://swagger.io/docs/specification/serialization/ DeepObject format
     // paramName[prop1]=value1&paramName[prop2]=value2&....
     pub fn options_as_deep_object(&self) -> Vec<(String, String)> {
@@ -216,6 +217,7 @@ impl<'a> ConsoleReqExtra<'a> {
 }
 
 /// Auth secret which is managed by the cloud.
+#[derive(Clone)]
 pub enum AuthSecret {
     #[cfg(feature = "testing")]
     /// Md5 hash of user's password.
@@ -250,29 +252,31 @@ pub struct NodeInfo {
 
 pub type NodeInfoCache = TimedLru<Arc<str>, NodeInfo>;
 pub type CachedNodeInfo = timed_lru::Cached<&'static NodeInfoCache>;
-pub type AllowedIpsCache = TimedLru<Arc<str>, Arc<Vec<String>>>;
+pub type AllowedIpsCache = TimedLru<SmolStr, Arc<Vec<String>>>;
+pub type RoleSecretCache = TimedLru<(SmolStr, SmolStr), Option<AuthSecret>>;
+pub type CachedRoleSecret = timed_lru::Cached<&'static RoleSecretCache>;
 
 /// This will allocate per each call, but the http requests alone
 /// already require a few allocations, so it should be fine.
 #[async_trait]
 pub trait Api {
     /// Get the client's auth secret for authentication.
-    async fn get_auth_info(
+    async fn get_role_secret(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
-    ) -> Result<AuthInfo, errors::GetAuthInfoError>;
+    ) -> Result<CachedRoleSecret, errors::GetAuthInfoError>;
 
     async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, errors::GetAuthInfoError>;
 
     /// Wake up the compute node and return the corresponding connection info.
     async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, errors::WakeComputeError>;
 }
@@ -282,7 +286,9 @@ pub struct ApiCaches {
     /// Cache for the `wake_compute` API method.
     pub node_info: NodeInfoCache,
     /// Cache for the `get_allowed_ips`. TODO(anna): use notifications listener instead.
-    pub allowed_ips: TimedLru<Arc<str>, Arc<Vec<String>>>,
+    pub allowed_ips: AllowedIpsCache,
+    /// Cache for the `get_role_secret`. TODO(anna): use notifications listener instead.
+    pub role_secret: RoleSecretCache,
 }
 
 /// Various caches for [`console`](super).
diff --git a/proxy/src/console/provider/mock.rs b/proxy/src/console/provider/mock.rs
index 8aad8c06bc..dba5e5863f 100644
--- a/proxy/src/console/provider/mock.rs
+++ b/proxy/src/console/provider/mock.rs
@@ -6,6 +6,7 @@ use super::{
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
     AuthInfo, AuthSecret, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
 };
+use crate::console::provider::CachedRoleSecret;
 use crate::{auth::backend::ComputeUserInfo, compute, error::io_error, scram, url::ApiUrl};
 use async_trait::async_trait;
 use futures::TryFutureExt;
@@ -142,17 +143,19 @@ async fn get_execute_postgres_query(
 #[async_trait]
 impl super::Api for Api {
     #[tracing::instrument(skip_all)]
-    async fn get_auth_info(
+    async fn get_role_secret(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
-    ) -> Result<AuthInfo, GetAuthInfoError> {
-        self.do_get_auth_info(creds).await
+    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
+        Ok(CachedRoleSecret::new_uncached(
+            self.do_get_auth_info(creds).await?.secret,
+        ))
     }
 
     async fn get_allowed_ips(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
         Ok(Arc::new(self.do_get_auth_info(creds).await?.allowed_ips))
@@ -161,7 +164,7 @@ impl super::Api for Api {
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        _extra: &ConsoleReqExtra<'_>,
+        _extra: &ConsoleReqExtra,
         _creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         self.do_wake_compute()
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index f8c3ee5b58..628d98df49 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -3,9 +3,10 @@
 use super::{
     super::messages::{ConsoleError, GetRoleSecret, WakeCompute},
     errors::{ApiError, GetAuthInfoError, WakeComputeError},
-    ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedNodeInfo, ConsoleReqExtra, NodeInfo,
+    ApiCaches, ApiLocks, AuthInfo, AuthSecret, CachedNodeInfo, CachedRoleSecret, ConsoleReqExtra,
+    NodeInfo,
 };
-use crate::proxy::{ALLOWED_IPS_BY_CACHE_OUTCOME, ALLOWED_IPS_NUMBER};
+use crate::metrics::{ALLOWED_IPS_BY_CACHE_OUTCOME, ALLOWED_IPS_NUMBER};
 use crate::{auth::backend::ComputeUserInfo, compute, http, scram};
 use async_trait::async_trait;
 use futures::TryFutureExt;
@@ -48,7 +49,7 @@ impl Api {
 
     async fn do_get_auth_info(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<AuthInfo, GetAuthInfoError> {
         let request_id = uuid::Uuid::new_v4().to_string();
@@ -60,9 +61,9 @@ impl Api {
                 .header("Authorization", format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", extra.session_id)])
                 .query(&[
-                    ("application_name", extra.application_name),
-                    ("project", Some(&creds.endpoint)),
-                    ("role", Some(&creds.inner.user)),
+                    ("application_name", extra.application_name.as_str()),
+                    ("project", creds.endpoint.as_str()),
+                    ("role", creds.inner.user.as_str()),
                 ])
                 .build()?;
 
@@ -101,7 +102,7 @@ impl Api {
 
     async fn do_wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<NodeInfo, WakeComputeError> {
         let request_id = uuid::Uuid::new_v4().to_string();
@@ -113,8 +114,8 @@ impl Api {
                 .header("Authorization", format!("Bearer {}", &self.jwt))
                 .query(&[("session_id", extra.session_id)])
                 .query(&[
-                    ("application_name", extra.application_name),
-                    ("project", Some(&creds.endpoint)),
+                    ("application_name", extra.application_name.as_str()),
+                    ("project", creds.endpoint.as_str()),
                 ]);
 
             request_builder = if extra.options.is_empty() {
@@ -159,21 +160,33 @@ impl Api {
 #[async_trait]
 impl super::Api for Api {
     #[tracing::instrument(skip_all)]
-    async fn get_auth_info(
+    async fn get_role_secret(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
-    ) -> Result<AuthInfo, GetAuthInfoError> {
-        self.do_get_auth_info(extra, creds).await
+    ) -> Result<CachedRoleSecret, GetAuthInfoError> {
+        let ep = creds.endpoint.clone();
+        let user = creds.inner.user.clone();
+        if let Some(role_secret) = self.caches.role_secret.get(&(ep.clone(), user.clone())) {
+            return Ok(role_secret);
+        }
+        let auth_info = self.do_get_auth_info(extra, creds).await?;
+        let (_, secret) = self
+            .caches
+            .role_secret
+            .insert((ep.clone(), user), auth_info.secret.clone());
+        self.caches
+            .allowed_ips
+            .insert(ep, Arc::new(auth_info.allowed_ips));
+        Ok(secret)
     }
 
     async fn get_allowed_ips(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<Arc<Vec<String>>, GetAuthInfoError> {
-        let key: &str = &creds.endpoint;
-        if let Some(allowed_ips) = self.caches.allowed_ips.get(key) {
+        if let Some(allowed_ips) = self.caches.allowed_ips.get(&creds.endpoint) {
             ALLOWED_IPS_BY_CACHE_OUTCOME
                 .with_label_values(&["hit"])
                 .inc();
@@ -182,17 +195,21 @@ impl super::Api for Api {
         ALLOWED_IPS_BY_CACHE_OUTCOME
             .with_label_values(&["miss"])
             .inc();
-        let allowed_ips = Arc::new(self.do_get_auth_info(extra, creds).await?.allowed_ips);
+        let auth_info = self.do_get_auth_info(extra, creds).await?;
+        let allowed_ips = Arc::new(auth_info.allowed_ips);
+        let ep = creds.endpoint.clone();
+        let user = creds.inner.user.clone();
         self.caches
-            .allowed_ips
-            .insert(key.into(), allowed_ips.clone());
+            .role_secret
+            .insert((ep.clone(), user), auth_info.secret);
+        self.caches.allowed_ips.insert(ep, allowed_ips.clone());
         Ok(allowed_ips)
     }
 
     #[tracing::instrument(skip_all)]
     async fn wake_compute(
         &self,
-        extra: &ConsoleReqExtra<'_>,
+        extra: &ConsoleReqExtra,
         creds: &ComputeUserInfo,
     ) -> Result<CachedNodeInfo, WakeComputeError> {
         let key: &str = &creds.inner.cache_key;
diff --git a/proxy/src/http.rs b/proxy/src/http.rs
index 09423eca77..59e1492ed4 100644
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -13,7 +13,7 @@ pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
 use tokio::time::Instant;
 use tracing::trace;
 
-use crate::{proxy::CONSOLE_REQUEST_LATENCY, rate_limiter, url::ApiUrl};
+use crate::{metrics::CONSOLE_REQUEST_LATENCY, rate_limiter, url::ApiUrl};
 use reqwest_middleware::RequestBuilder;
 
 /// This is the preferred way to create new http clients,
diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs
index a22600cbb3..2da1eaf482 100644
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -16,6 +16,7 @@ pub mod console;
 pub mod error;
 pub mod http;
 pub mod logging;
+pub mod metrics;
 pub mod parse;
 pub mod protocol2;
 pub mod proxy;
diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs
new file mode 100644
index 0000000000..8e2a6105b1
--- /dev/null
+++ b/proxy/src/metrics.rs
@@ -0,0 +1,232 @@
+use ::metrics::{
+    exponential_buckets, register_int_counter_pair_vec, register_int_counter_vec,
+    IntCounterPairVec, IntCounterVec,
+};
+use prometheus::{
+    register_histogram, register_histogram_vec, register_int_gauge_vec, Histogram, HistogramVec,
+    IntGaugeVec,
+};
+
+use once_cell::sync::Lazy;
+use tokio::time;
+
+pub static NUM_DB_CONNECTIONS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_opened_db_connections_total",
+        "Number of opened connections to a database.",
+        "proxy_closed_db_connections_total",
+        "Number of closed connections to a database.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static NUM_CLIENT_CONNECTION_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_opened_client_connections_total",
+        "Number of opened connections from a client.",
+        "proxy_closed_client_connections_total",
+        "Number of closed connections from a client.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static NUM_CONNECTION_REQUESTS_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
+        "proxy_accepted_connections_total",
+        "Number of client connections accepted.",
+        "proxy_closed_connections_total",
+        "Number of client connections closed.",
+        &["protocol"],
+    )
+    .unwrap()
+});
+
+pub static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_compute_connection_latency_seconds",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // http/ws/tcp, true/false, true/false, success/failure
+        // 3 * 2 * 2 * 2 = 24 counters
+        &["protocol", "cache_miss", "pool_miss", "outcome"],
+        // largest bucket = 2^16 * 0.5ms = 32s
+        exponential_buckets(0.0005, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});
+
+pub static CONSOLE_REQUEST_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
+    register_histogram_vec!(
+        "proxy_console_request_latency",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // proxy_wake_compute/proxy_get_role_info
+        &["request"],
+        // largest bucket = 2^16 * 0.2ms = 13s
+        exponential_buckets(0.0002, 2.0, 16).unwrap(),
+    )
+    .unwrap()
+});
+
+pub static ALLOWED_IPS_BY_CACHE_OUTCOME: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_allowed_ips_cache_misses",
+        "Number of cache hits/misses for allowed ips",
+        // hit/miss
+        &["outcome"],
+    )
+    .unwrap()
+});
+
+pub static RATE_LIMITER_ACQUIRE_LATENCY: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_control_plane_token_acquire_seconds",
+        "Time it took for proxy to establish a connection to the compute endpoint",
+        // largest bucket = 3^16 * 0.05ms = 2.15s
+        exponential_buckets(0.00005, 3.0, 16).unwrap(),
+    )
+    .unwrap()
+});
+
+pub static RATE_LIMITER_LIMIT: Lazy<IntGaugeVec> = Lazy::new(|| {
+    register_int_gauge_vec!(
+        "semaphore_control_plane_limit",
+        "Current limit of the semaphore control plane",
+        &["limit"], // 2 counters
+    )
+    .unwrap()
+});
+
+pub static NUM_CONNECTION_ACCEPTED_BY_SNI: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_accepted_connections_by_sni",
+        "Number of connections (per sni).",
+        &["kind"],
+    )
+    .unwrap()
+});
+
+pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {
+    register_histogram!(
+        "proxy_allowed_ips_number",
+        "Number of allowed ips",
+        vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0],
+    )
+    .unwrap()
+});
+
+pub struct LatencyTimer {
+    // time since the stopwatch was started
+    start: Option<time::Instant>,
+    // accumulated time on the stopwatch
+    accumulated: std::time::Duration,
+    // label data
+    protocol: &'static str,
+    cache_miss: bool,
+    pool_miss: bool,
+    outcome: &'static str,
+}
+
+pub struct LatencyTimerPause<'a> {
+    timer: &'a mut LatencyTimer,
+}
+
+impl LatencyTimer {
+    pub fn new(protocol: &'static str) -> Self {
+        Self {
+            start: Some(time::Instant::now()),
+            accumulated: std::time::Duration::ZERO,
+            protocol,
+            cache_miss: false,
+            // by default we don't do pooling
+            pool_miss: true,
+            // assume failed unless otherwise specified
+            outcome: "failed",
+        }
+    }
+
+    pub fn pause(&mut self) -> LatencyTimerPause<'_> {
+        // stop the stopwatch and record the time that we have accumulated
+        let start = self.start.take().expect("latency timer should be started");
+        self.accumulated += start.elapsed();
+        LatencyTimerPause { timer: self }
+    }
+
+    pub fn cache_miss(&mut self) {
+        self.cache_miss = true;
+    }
+
+    pub fn pool_hit(&mut self) {
+        self.pool_miss = false;
+    }
+
+    pub fn success(mut self) {
+        self.outcome = "success";
+    }
+}
+
+impl Drop for LatencyTimerPause<'_> {
+    fn drop(&mut self) {
+        // start the stopwatch again
+        self.timer.start = Some(time::Instant::now());
+    }
+}
+
+impl Drop for LatencyTimer {
+    fn drop(&mut self) {
+        let duration =
+            self.start.map(|start| start.elapsed()).unwrap_or_default() + self.accumulated;
+        COMPUTE_CONNECTION_LATENCY
+            .with_label_values(&[
+                self.protocol,
+                bool_to_str(self.cache_miss),
+                bool_to_str(self.pool_miss),
+                self.outcome,
+            ])
+            .observe(duration.as_secs_f64())
+    }
+}
+
+pub static NUM_CONNECTION_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_connection_failures_total",
+        "Number of connection failures (per kind).",
+        &["kind"],
+    )
+    .unwrap()
+});
+
+pub static NUM_WAKEUP_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_connection_failures_breakdown",
+        "Number of wake-up failures (per kind).",
+        &["retry", "kind"],
+    )
+    .unwrap()
+});
+
+pub static NUM_BYTES_PROXIED_PER_CLIENT_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_io_bytes_per_client",
+        "Number of bytes sent/received between client and backend.",
+        crate::console::messages::MetricsAuxInfo::TRAFFIC_LABELS,
+    )
+    .unwrap()
+});
+
+pub static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
+    register_int_counter_vec!(
+        "proxy_io_bytes",
+        "Number of bytes sent/received between all clients and backends.",
+        &["direction"],
+    )
+    .unwrap()
+});
+
+pub const fn bool_to_str(x: bool) -> &'static str {
+    if x {
+        "true"
+    } else {
+        "false"
+    }
+}
diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs
index 018f774c7e..17e910860c 100644
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -1,282 +1,41 @@
 #[cfg(test)]
 mod tests;
 
+pub mod connect_compute;
+pub mod retry;
+
 use crate::{
     auth,
     cancellation::{self, CancelMap},
-    compute::{self, PostgresConnection},
+    compute,
     config::{AuthenticationConfig, ProxyConfig, TlsConfig},
-    console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
-    http::StatusCode,
+    console::{self, messages::MetricsAuxInfo},
+    metrics::{
+        LatencyTimer, NUM_BYTES_PROXIED_COUNTER, NUM_BYTES_PROXIED_PER_CLIENT_COUNTER,
+        NUM_CLIENT_CONNECTION_GAUGE, NUM_CONNECTION_REQUESTS_GAUGE,
+    },
     protocol2::WithClientIp,
+    rate_limiter::EndpointRateLimiter,
     stream::{PqStream, Stream},
     usage_metrics::{Ids, USAGE_METRICS},
 };
 use anyhow::{bail, Context};
-use async_trait::async_trait;
 use futures::TryFutureExt;
 use itertools::Itertools;
-use metrics::{exponential_buckets, register_int_counter_vec, IntCounterVec};
-use once_cell::sync::{Lazy, OnceCell};
+use once_cell::sync::OnceCell;
 use pq_proto::{BeMessage as Be, FeStartupPacket, StartupMessageParams};
-use prometheus::{
-    register_histogram, register_histogram_vec, register_int_gauge_vec, Histogram, HistogramVec,
-    IntGaugeVec,
-};
 use regex::Regex;
-use std::{error::Error, io, net::IpAddr, ops::ControlFlow, sync::Arc, time::Instant};
-use tokio::{
-    io::{AsyncRead, AsyncWrite, AsyncWriteExt},
-    time,
-};
+use std::{net::IpAddr, sync::Arc};
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info, info_span, warn, Instrument};
+use tracing::{error, info, info_span, Instrument};
 use utils::measured_stream::MeasuredStream;
 
-/// Number of times we should retry the `/proxy_wake_compute` http request.
-/// Retry duration is BASE_RETRY_WAIT_DURATION * RETRY_WAIT_EXPONENT_BASE ^ n, where n starts at 0
-pub const NUM_RETRIES_CONNECT: u32 = 16;
-const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
-const BASE_RETRY_WAIT_DURATION: time::Duration = time::Duration::from_millis(25);
-const RETRY_WAIT_EXPONENT_BASE: f64 = std::f64::consts::SQRT_2;
+use self::connect_compute::{connect_to_compute, TcpMechanism};
 
 const ERR_INSECURE_CONNECTION: &str = "connection is insecure (try using `sslmode=require`)";
 const ERR_PROTO_VIOLATION: &str = "protocol violation";
 
-pub static NUM_DB_CONNECTIONS_OPENED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_opened_db_connections_total",
-        "Number of opened connections to a database.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-pub static NUM_DB_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_closed_db_connections_total",
-        "Number of closed connections to a database.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-pub static NUM_CLIENT_CONNECTION_OPENED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_opened_client_connections_total",
-        "Number of opened connections from a client.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-pub static NUM_CLIENT_CONNECTION_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_closed_client_connections_total",
-        "Number of closed connections from a client.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-pub static NUM_CONNECTIONS_ACCEPTED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_accepted_connections_total",
-        "Number of client connections accepted.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-pub static NUM_CONNECTIONS_CLOSED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_closed_connections_total",
-        "Number of client connections closed.",
-        &["protocol"],
-    )
-    .unwrap()
-});
-
-static COMPUTE_CONNECTION_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
-        "proxy_compute_connection_latency_seconds",
-        "Time it took for proxy to establish a connection to the compute endpoint",
-        // http/ws/tcp, true/false, true/false, success/failure
-        // 3 * 2 * 2 * 2 = 24 counters
-        &["protocol", "cache_miss", "pool_miss", "outcome"],
-        // largest bucket = 2^16 * 0.5ms = 32s
-        exponential_buckets(0.0005, 2.0, 16).unwrap(),
-    )
-    .unwrap()
-});
-
-pub static CONSOLE_REQUEST_LATENCY: Lazy<HistogramVec> = Lazy::new(|| {
-    register_histogram_vec!(
-        "proxy_console_request_latency",
-        "Time it took for proxy to establish a connection to the compute endpoint",
-        // proxy_wake_compute/proxy_get_role_info
-        &["request"],
-        // largest bucket = 2^16 * 0.2ms = 13s
-        exponential_buckets(0.0002, 2.0, 16).unwrap(),
-    )
-    .unwrap()
-});
-
-pub static ALLOWED_IPS_BY_CACHE_OUTCOME: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_allowed_ips_cache_misses",
-        "Number of cache hits/misses for allowed ips",
-        // hit/miss
-        &["outcome"],
-    )
-    .unwrap()
-});
-
-pub static RATE_LIMITER_ACQUIRE_LATENCY: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
-        "proxy_control_plane_token_acquire_seconds",
-        "Time it took for proxy to establish a connection to the compute endpoint",
-        // largest bucket = 3^16 * 0.05ms = 2.15s
-        exponential_buckets(0.00005, 3.0, 16).unwrap(),
-    )
-    .unwrap()
-});
-
-pub static RATE_LIMITER_LIMIT: Lazy<IntGaugeVec> = Lazy::new(|| {
-    register_int_gauge_vec!(
-        "semaphore_control_plane_limit",
-        "Current limit of the semaphore control plane",
-        &["limit"], // 2 counters
-    )
-    .unwrap()
-});
-
-pub static NUM_CONNECTION_ACCEPTED_BY_SNI: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_accepted_connections_by_sni",
-        "Number of connections (per sni).",
-        &["kind"],
-    )
-    .unwrap()
-});
-
-pub static ALLOWED_IPS_NUMBER: Lazy<Histogram> = Lazy::new(|| {
-    register_histogram!(
-        "proxy_allowed_ips_number",
-        "Number of allowed ips",
-        vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 10.0, 20.0, 50.0, 100.0],
-    )
-    .unwrap()
-});
-
-pub struct LatencyTimer {
-    // time since the stopwatch was started
-    start: Option<Instant>,
-    // accumulated time on the stopwatch
-    accumulated: std::time::Duration,
-    // label data
-    protocol: &'static str,
-    cache_miss: bool,
-    pool_miss: bool,
-    outcome: &'static str,
-}
-
-pub struct LatencyTimerPause<'a> {
-    timer: &'a mut LatencyTimer,
-}
-
-impl LatencyTimer {
-    pub fn new(protocol: &'static str) -> Self {
-        Self {
-            start: Some(Instant::now()),
-            accumulated: std::time::Duration::ZERO,
-            protocol,
-            cache_miss: false,
-            // by default we don't do pooling
-            pool_miss: true,
-            // assume failed unless otherwise specified
-            outcome: "failed",
-        }
-    }
-
-    pub fn pause(&mut self) -> LatencyTimerPause<'_> {
-        // stop the stopwatch and record the time that we have accumulated
-        let start = self.start.take().expect("latency timer should be started");
-        self.accumulated += start.elapsed();
-        LatencyTimerPause { timer: self }
-    }
-
-    pub fn cache_miss(&mut self) {
-        self.cache_miss = true;
-    }
-
-    pub fn pool_hit(&mut self) {
-        self.pool_miss = false;
-    }
-
-    pub fn success(mut self) {
-        self.outcome = "success";
-    }
-}
-
-impl Drop for LatencyTimerPause<'_> {
-    fn drop(&mut self) {
-        // start the stopwatch again
-        self.timer.start = Some(Instant::now());
-    }
-}
-
-impl Drop for LatencyTimer {
-    fn drop(&mut self) {
-        let duration =
-            self.start.map(|start| start.elapsed()).unwrap_or_default() + self.accumulated;
-        COMPUTE_CONNECTION_LATENCY
-            .with_label_values(&[
-                self.protocol,
-                bool_to_str(self.cache_miss),
-                bool_to_str(self.pool_miss),
-                self.outcome,
-            ])
-            .observe(duration.as_secs_f64())
-    }
-}
-
-static NUM_CONNECTION_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_connection_failures_total",
-        "Number of connection failures (per kind).",
-        &["kind"],
-    )
-    .unwrap()
-});
-
-static NUM_WAKEUP_FAILURES: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_connection_failures_breakdown",
-        "Number of wake-up failures (per kind).",
-        &["retry", "kind"],
-    )
-    .unwrap()
-});
-
-static NUM_BYTES_PROXIED_PER_CLIENT_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_io_bytes_per_client",
-        "Number of bytes sent/received between client and backend.",
-        crate::console::messages::MetricsAuxInfo::TRAFFIC_LABELS,
-    )
-    .unwrap()
-});
-
-static NUM_BYTES_PROXIED_COUNTER: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
-        "proxy_io_bytes",
-        "Number of bytes sent/received between all clients and backends.",
-        &["direction"],
-    )
-    .unwrap()
-});
-
 pub async fn run_until_cancelled<F: std::future::Future>(
     f: F,
     cancellation_token: &CancellationToken,
@@ -296,6 +55,7 @@ pub async fn task_main(
     config: &'static ProxyConfig,
     listener: tokio::net::TcpListener,
     cancellation_token: CancellationToken,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     scopeguard::defer! {
         info!("proxy has shut down");
@@ -315,6 +75,8 @@ pub async fn task_main(
 
         let session_id = uuid::Uuid::new_v4();
         let cancel_map = Arc::clone(&cancel_map);
+        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
+
         connections.spawn(
             async move {
                 info!("accepted postgres client connection");
@@ -340,6 +102,7 @@ pub async fn task_main(
                     socket,
                     ClientMode::Tcp,
                     peer_addr.ip(),
+                    endpoint_rate_limiter,
                 )
                 .await
             }
@@ -415,6 +178,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     stream: S,
     mode: ClientMode,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     info!(
         protocol = mode.protocol_label(),
@@ -422,16 +186,12 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
     );
 
     let proto = mode.protocol_label();
-    NUM_CLIENT_CONNECTION_OPENED_COUNTER
+    let _client_gauge = NUM_CLIENT_CONNECTION_GAUGE
         .with_label_values(&[proto])
-        .inc();
-    NUM_CONNECTIONS_ACCEPTED_COUNTER
+        .guard();
+    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
         .with_label_values(&[proto])
-        .inc();
-    scopeguard::defer! {
-        NUM_CLIENT_CONNECTION_CLOSED_COUNTER.with_label_values(&[proto]).inc();
-        NUM_CONNECTIONS_CLOSED_COUNTER.with_label_values(&[proto]).inc();
-    }
+        .guard();
 
     let tls = config.tls_config.as_ref();
 
@@ -463,6 +223,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(
         &params,
         session_id,
         mode.allow_self_signed_compute(config),
+        endpoint_rate_limiter,
     );
     cancel_map
         .with_session(|session| client.connect_to_db(session, mode, &config.authentication_config))
@@ -554,294 +315,6 @@ async fn handshake<S: AsyncRead + AsyncWrite + Unpin>(
     }
 }
 
-/// If we couldn't connect, a cached connection info might be to blame
-/// (e.g. the compute node's address might've changed at the wrong time).
-/// Invalidate the cache entry (if any) to prevent subsequent errors.
-#[tracing::instrument(name = "invalidate_cache", skip_all)]
-pub fn invalidate_cache(node_info: console::CachedNodeInfo) -> compute::ConnCfg {
-    let is_cached = node_info.cached();
-    if is_cached {
-        warn!("invalidating stalled compute node info cache entry");
-    }
-    let label = match is_cached {
-        true => "compute_cached",
-        false => "compute_uncached",
-    };
-    NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
-
-    node_info.invalidate().config
-}
-
-/// Try to connect to the compute node once.
-#[tracing::instrument(name = "connect_once", fields(pid = tracing::field::Empty), skip_all)]
-async fn connect_to_compute_once(
-    node_info: &console::CachedNodeInfo,
-    timeout: time::Duration,
-) -> Result<PostgresConnection, compute::ConnectionError> {
-    let allow_self_signed_compute = node_info.allow_self_signed_compute;
-
-    node_info
-        .config
-        .connect(allow_self_signed_compute, timeout)
-        .await
-}
-
-#[async_trait]
-pub trait ConnectMechanism {
-    type Connection;
-    type ConnectError;
-    type Error: From<Self::ConnectError>;
-    async fn connect_once(
-        &self,
-        node_info: &console::CachedNodeInfo,
-        timeout: time::Duration,
-    ) -> Result<Self::Connection, Self::ConnectError>;
-
-    fn update_connect_config(&self, conf: &mut compute::ConnCfg);
-}
-
-pub struct TcpMechanism<'a> {
-    /// KV-dictionary with PostgreSQL connection params.
-    pub params: &'a StartupMessageParams,
-}
-
-#[async_trait]
-impl ConnectMechanism for TcpMechanism<'_> {
-    type Connection = PostgresConnection;
-    type ConnectError = compute::ConnectionError;
-    type Error = compute::ConnectionError;
-
-    async fn connect_once(
-        &self,
-        node_info: &console::CachedNodeInfo,
-        timeout: time::Duration,
-    ) -> Result<PostgresConnection, Self::Error> {
-        connect_to_compute_once(node_info, timeout).await
-    }
-
-    fn update_connect_config(&self, config: &mut compute::ConnCfg) {
-        config.set_startup_params(self.params);
-    }
-}
-
-const fn bool_to_str(x: bool) -> &'static str {
-    if x {
-        "true"
-    } else {
-        "false"
-    }
-}
-
-fn report_error(e: &WakeComputeError, retry: bool) {
-    use crate::console::errors::ApiError;
-    let retry = bool_to_str(retry);
-    let kind = match e {
-        WakeComputeError::BadComputeAddress(_) => "bad_compute_address",
-        WakeComputeError::ApiError(ApiError::Transport(_)) => "api_transport_error",
-        WakeComputeError::ApiError(ApiError::Console {
-            status: StatusCode::LOCKED,
-            ref text,
-        }) if text.contains("written data quota exceeded")
-            || text.contains("the limit for current plan reached") =>
-        {
-            "quota_exceeded"
-        }
-        WakeComputeError::ApiError(ApiError::Console {
-            status: StatusCode::LOCKED,
-            ..
-        }) => "api_console_locked",
-        WakeComputeError::ApiError(ApiError::Console {
-            status: StatusCode::BAD_REQUEST,
-            ..
-        }) => "api_console_bad_request",
-        WakeComputeError::ApiError(ApiError::Console { status, .. })
-            if status.is_server_error() =>
-        {
-            "api_console_other_server_error"
-        }
-        WakeComputeError::ApiError(ApiError::Console { .. }) => "api_console_other_error",
-        WakeComputeError::TimeoutError => "timeout_error",
-    };
-    NUM_WAKEUP_FAILURES.with_label_values(&[retry, kind]).inc();
-}
-
-/// Try to connect to the compute node, retrying if necessary.
-/// This function might update `node_info`, so we take it by `&mut`.
-#[tracing::instrument(skip_all)]
-pub async fn connect_to_compute<M: ConnectMechanism>(
-    mechanism: &M,
-    mut node_info: console::CachedNodeInfo,
-    extra: &console::ConsoleReqExtra<'_>,
-    creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
-    mut latency_timer: LatencyTimer,
-) -> Result<M::Connection, M::Error>
-where
-    M::ConnectError: ShouldRetry + std::fmt::Debug,
-    M::Error: From<WakeComputeError>,
-{
-    mechanism.update_connect_config(&mut node_info.config);
-
-    // try once
-    let (config, err) = match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
-        Ok(res) => {
-            latency_timer.success();
-            return Ok(res);
-        }
-        Err(e) => {
-            error!(error = ?e, "could not connect to compute node");
-            (invalidate_cache(node_info), e)
-        }
-    };
-
-    latency_timer.cache_miss();
-
-    let mut num_retries = 1;
-
-    // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
-    info!("compute node's state has likely changed; requesting a wake-up");
-    let node_info = loop {
-        let wake_res = match creds {
-            auth::BackendType::Console(api, creds) => api.wake_compute(extra, creds).await,
-            #[cfg(feature = "testing")]
-            auth::BackendType::Postgres(api, creds) => api.wake_compute(extra, creds).await,
-            // nothing to do?
-            auth::BackendType::Link(_) => return Err(err.into()),
-            // test backend
-            #[cfg(test)]
-            auth::BackendType::Test(x) => x.wake_compute(),
-        };
-
-        match handle_try_wake(wake_res, num_retries) {
-            Err(e) => {
-                error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node");
-                report_error(&e, false);
-                return Err(e.into());
-            }
-            // failed to wake up but we can continue to retry
-            Ok(ControlFlow::Continue(e)) => {
-                report_error(&e, true);
-                warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node");
-            }
-            // successfully woke up a compute node and can break the wakeup loop
-            Ok(ControlFlow::Break(mut node_info)) => {
-                node_info.config.reuse_password(&config);
-                mechanism.update_connect_config(&mut node_info.config);
-                break node_info;
-            }
-        }
-
-        let wait_duration = retry_after(num_retries);
-        num_retries += 1;
-
-        time::sleep(wait_duration).await;
-    };
-
-    // now that we have a new node, try connect to it repeatedly.
-    // this can error for a few reasons, for instance:
-    // * DNS connection settings haven't quite propagated yet
-    info!("wake_compute success. attempting to connect");
-    loop {
-        match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
-            Ok(res) => {
-                latency_timer.success();
-                return Ok(res);
-            }
-            Err(e) => {
-                let retriable = e.should_retry(num_retries);
-                if !retriable {
-                    error!(error = ?e, num_retries, retriable, "couldn't connect to compute node");
-                    return Err(e.into());
-                }
-                warn!(error = ?e, num_retries, retriable, "couldn't connect to compute node");
-            }
-        }
-
-        let wait_duration = retry_after(num_retries);
-        num_retries += 1;
-
-        time::sleep(wait_duration).await;
-    }
-}
-
-/// Attempts to wake up the compute node.
-/// * Returns Ok(Continue(e)) if there was an error waking but retries are acceptable
-/// * Returns Ok(Break(node)) if the wakeup succeeded
-/// * Returns Err(e) if there was an error
-pub fn handle_try_wake(
-    result: Result<console::CachedNodeInfo, WakeComputeError>,
-    num_retries: u32,
-) -> Result<ControlFlow<console::CachedNodeInfo, WakeComputeError>, WakeComputeError> {
-    match result {
-        Err(err) => match &err {
-            WakeComputeError::ApiError(api) if api.should_retry(num_retries) => {
-                Ok(ControlFlow::Continue(err))
-            }
-            _ => Err(err),
-        },
-        // Ready to try again.
-        Ok(new) => Ok(ControlFlow::Break(new)),
-    }
-}
-
-pub trait ShouldRetry {
-    fn could_retry(&self) -> bool;
-    fn should_retry(&self, num_retries: u32) -> bool {
-        match self {
-            _ if num_retries >= NUM_RETRIES_CONNECT => false,
-            err => err.could_retry(),
-        }
-    }
-}
-
-impl ShouldRetry for io::Error {
-    fn could_retry(&self) -> bool {
-        use std::io::ErrorKind;
-        matches!(
-            self.kind(),
-            ErrorKind::ConnectionRefused | ErrorKind::AddrNotAvailable | ErrorKind::TimedOut
-        )
-    }
-}
-
-impl ShouldRetry for tokio_postgres::error::DbError {
-    fn could_retry(&self) -> bool {
-        use tokio_postgres::error::SqlState;
-        matches!(
-            self.code(),
-            &SqlState::CONNECTION_FAILURE
-                | &SqlState::CONNECTION_EXCEPTION
-                | &SqlState::CONNECTION_DOES_NOT_EXIST
-                | &SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION,
-        )
-    }
-}
-
-impl ShouldRetry for tokio_postgres::Error {
-    fn could_retry(&self) -> bool {
-        if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) {
-            io::Error::could_retry(io_err)
-        } else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) {
-            tokio_postgres::error::DbError::could_retry(db_err)
-        } else {
-            false
-        }
-    }
-}
-
-impl ShouldRetry for compute::ConnectionError {
-    fn could_retry(&self) -> bool {
-        match self {
-            compute::ConnectionError::Postgres(err) => err.could_retry(),
-            compute::ConnectionError::CouldNotConnect(err) => err.could_retry(),
-            _ => false,
-        }
-    }
-}
-
-pub fn retry_after(num_retries: u32) -> time::Duration {
-    BASE_RETRY_WAIT_DURATION.mul_f64(RETRY_WAIT_EXPONENT_BASE.powi((num_retries as i32) - 1))
-}
-
 /// Finish client connection initialization: confirm auth success, send params, etc.
 #[tracing::instrument(skip_all)]
 async fn prepare_client_connection(
@@ -928,6 +401,8 @@ struct Client<'a, S> {
     session_id: uuid::Uuid,
     /// Allow self-signed certificates (for testing).
     allow_self_signed_compute: bool,
+    /// Rate limiter for endpoints
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 }
 
 impl<'a, S> Client<'a, S> {
@@ -938,6 +413,7 @@ impl<'a, S> Client<'a, S> {
         params: &'a StartupMessageParams,
         session_id: uuid::Uuid,
         allow_self_signed_compute: bool,
+        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
     ) -> Self {
         Self {
             stream,
@@ -945,6 +421,7 @@ impl<'a, S> Client<'a, S> {
             params,
             session_id,
             allow_self_signed_compute,
+            endpoint_rate_limiter,
         }
     }
 }
@@ -966,15 +443,29 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
             params,
             session_id,
             allow_self_signed_compute,
+            endpoint_rate_limiter,
         } = self;
 
+        // check rate limit
+        if let Some(ep) = creds.get_endpoint() {
+            if !endpoint_rate_limiter.check(ep) {
+                return stream
+                    .throw_error(auth::AuthError::too_many_connections())
+                    .await;
+            }
+        }
+
+        let proto = mode.protocol_label();
         let extra = console::ConsoleReqExtra {
             session_id, // aka this connection's id
-            application_name: params.get("application_name"),
+            application_name: format!(
+                "{}/{}",
+                params.get("application_name").unwrap_or_default(),
+                proto
+            ),
             options: neon_options(params),
         };
-
-        let mut latency_timer = LatencyTimer::new(mode.protocol_label());
+        let mut latency_timer = LatencyTimer::new(proto);
 
         let user = creds.get_user().to_owned();
         let auth_result = match creds
@@ -1003,7 +494,7 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
 
         let aux = node_info.aux.clone();
         let mut node = connect_to_compute(
-            &TcpMechanism { params },
+            &TcpMechanism { params, proto },
             node_info,
             &extra,
             &creds,
@@ -1012,14 +503,6 @@ impl<S: AsyncRead + AsyncWrite + Unpin> Client<'_, S> {
         .or_else(|e| stream.throw_error(e))
         .await?;
 
-        let proto = mode.protocol_label();
-        NUM_DB_CONNECTIONS_OPENED_COUNTER
-            .with_label_values(&[proto])
-            .inc();
-        scopeguard::defer! {
-            NUM_DB_CONNECTIONS_CLOSED_COUNTER.with_label_values(&[proto]).inc();
-        }
-
         prepare_client_connection(&node, session, &mut stream).await?;
         // Before proxy passing, forward to compute whatever data is left in the
         // PqStream input buffer. Normally there is none, but our serverless npm
diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs
new file mode 100644
index 0000000000..88b0019c49
--- /dev/null
+++ b/proxy/src/proxy/connect_compute.rs
@@ -0,0 +1,238 @@
+use crate::{
+    auth,
+    compute::{self, PostgresConnection},
+    console::{self, errors::WakeComputeError, Api},
+    metrics::{bool_to_str, LatencyTimer, NUM_CONNECTION_FAILURES, NUM_WAKEUP_FAILURES},
+    proxy::retry::{retry_after, ShouldRetry},
+};
+use async_trait::async_trait;
+use hyper::StatusCode;
+use pq_proto::StartupMessageParams;
+use std::ops::ControlFlow;
+use tokio::time;
+use tracing::{error, info, warn};
+
+const CONNECT_TIMEOUT: time::Duration = time::Duration::from_secs(2);
+
+/// If we couldn't connect, a cached connection info might be to blame
+/// (e.g. the compute node's address might've changed at the wrong time).
+/// Invalidate the cache entry (if any) to prevent subsequent errors.
+#[tracing::instrument(name = "invalidate_cache", skip_all)]
+pub fn invalidate_cache(node_info: console::CachedNodeInfo) -> compute::ConnCfg {
+    let is_cached = node_info.cached();
+    if is_cached {
+        warn!("invalidating stalled compute node info cache entry");
+    }
+    let label = match is_cached {
+        true => "compute_cached",
+        false => "compute_uncached",
+    };
+    NUM_CONNECTION_FAILURES.with_label_values(&[label]).inc();
+
+    node_info.invalidate().config
+}
+
+/// Try to connect to the compute node once.
+#[tracing::instrument(name = "connect_once", fields(pid = tracing::field::Empty), skip_all)]
+async fn connect_to_compute_once(
+    node_info: &console::CachedNodeInfo,
+    timeout: time::Duration,
+    proto: &'static str,
+) -> Result<PostgresConnection, compute::ConnectionError> {
+    let allow_self_signed_compute = node_info.allow_self_signed_compute;
+
+    node_info
+        .config
+        .connect(allow_self_signed_compute, timeout, proto)
+        .await
+}
+
+#[async_trait]
+pub trait ConnectMechanism {
+    type Connection;
+    type ConnectError;
+    type Error: From<Self::ConnectError>;
+    async fn connect_once(
+        &self,
+        node_info: &console::CachedNodeInfo,
+        timeout: time::Duration,
+    ) -> Result<Self::Connection, Self::ConnectError>;
+
+    fn update_connect_config(&self, conf: &mut compute::ConnCfg);
+}
+
+pub struct TcpMechanism<'a> {
+    /// KV-dictionary with PostgreSQL connection params.
+    pub params: &'a StartupMessageParams,
+    pub proto: &'static str,
+}
+
+#[async_trait]
+impl ConnectMechanism for TcpMechanism<'_> {
+    type Connection = PostgresConnection;
+    type ConnectError = compute::ConnectionError;
+    type Error = compute::ConnectionError;
+
+    async fn connect_once(
+        &self,
+        node_info: &console::CachedNodeInfo,
+        timeout: time::Duration,
+    ) -> Result<PostgresConnection, Self::Error> {
+        connect_to_compute_once(node_info, timeout, self.proto).await
+    }
+
+    fn update_connect_config(&self, config: &mut compute::ConnCfg) {
+        config.set_startup_params(self.params);
+    }
+}
+
+fn report_error(e: &WakeComputeError, retry: bool) {
+    use crate::console::errors::ApiError;
+    let retry = bool_to_str(retry);
+    let kind = match e {
+        WakeComputeError::BadComputeAddress(_) => "bad_compute_address",
+        WakeComputeError::ApiError(ApiError::Transport(_)) => "api_transport_error",
+        WakeComputeError::ApiError(ApiError::Console {
+            status: StatusCode::LOCKED,
+            ref text,
+        }) if text.contains("written data quota exceeded")
+            || text.contains("the limit for current plan reached") =>
+        {
+            "quota_exceeded"
+        }
+        WakeComputeError::ApiError(ApiError::Console {
+            status: StatusCode::LOCKED,
+            ..
+        }) => "api_console_locked",
+        WakeComputeError::ApiError(ApiError::Console {
+            status: StatusCode::BAD_REQUEST,
+            ..
+        }) => "api_console_bad_request",
+        WakeComputeError::ApiError(ApiError::Console { status, .. })
+            if status.is_server_error() =>
+        {
+            "api_console_other_server_error"
+        }
+        WakeComputeError::ApiError(ApiError::Console { .. }) => "api_console_other_error",
+        WakeComputeError::TimeoutError => "timeout_error",
+    };
+    NUM_WAKEUP_FAILURES.with_label_values(&[retry, kind]).inc();
+}
+
+/// Try to connect to the compute node, retrying if necessary.
+/// This function might update `node_info`, so we take it by `&mut`.
+#[tracing::instrument(skip_all)]
+pub async fn connect_to_compute<M: ConnectMechanism>(
+    mechanism: &M,
+    mut node_info: console::CachedNodeInfo,
+    extra: &console::ConsoleReqExtra,
+    creds: &auth::BackendType<'_, auth::backend::ComputeUserInfo>,
+    mut latency_timer: LatencyTimer,
+) -> Result<M::Connection, M::Error>
+where
+    M::ConnectError: ShouldRetry + std::fmt::Debug,
+    M::Error: From<WakeComputeError>,
+{
+    mechanism.update_connect_config(&mut node_info.config);
+
+    // try once
+    let (config, err) = match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
+        Ok(res) => {
+            latency_timer.success();
+            return Ok(res);
+        }
+        Err(e) => {
+            error!(error = ?e, "could not connect to compute node");
+            (invalidate_cache(node_info), e)
+        }
+    };
+
+    latency_timer.cache_miss();
+
+    let mut num_retries = 1;
+
+    // if we failed to connect, it's likely that the compute node was suspended, wake a new compute node
+    info!("compute node's state has likely changed; requesting a wake-up");
+    let node_info = loop {
+        let wake_res = match creds {
+            auth::BackendType::Console(api, creds) => api.wake_compute(extra, creds).await,
+            #[cfg(feature = "testing")]
+            auth::BackendType::Postgres(api, creds) => api.wake_compute(extra, creds).await,
+            // nothing to do?
+            auth::BackendType::Link(_) => return Err(err.into()),
+            // test backend
+            #[cfg(test)]
+            auth::BackendType::Test(x) => x.wake_compute(),
+        };
+
+        match handle_try_wake(wake_res, num_retries) {
+            Err(e) => {
+                error!(error = ?e, num_retries, retriable = false, "couldn't wake compute node");
+                report_error(&e, false);
+                return Err(e.into());
+            }
+            // failed to wake up but we can continue to retry
+            Ok(ControlFlow::Continue(e)) => {
+                report_error(&e, true);
+                warn!(error = ?e, num_retries, retriable = true, "couldn't wake compute node");
+            }
+            // successfully woke up a compute node and can break the wakeup loop
+            Ok(ControlFlow::Break(mut node_info)) => {
+                node_info.config.reuse_password(&config);
+                mechanism.update_connect_config(&mut node_info.config);
+                break node_info;
+            }
+        }
+
+        let wait_duration = retry_after(num_retries);
+        num_retries += 1;
+
+        time::sleep(wait_duration).await;
+    };
+
+    // now that we have a new node, try connect to it repeatedly.
+    // this can error for a few reasons, for instance:
+    // * DNS connection settings haven't quite propagated yet
+    info!("wake_compute success. attempting to connect");
+    loop {
+        match mechanism.connect_once(&node_info, CONNECT_TIMEOUT).await {
+            Ok(res) => {
+                latency_timer.success();
+                return Ok(res);
+            }
+            Err(e) => {
+                let retriable = e.should_retry(num_retries);
+                if !retriable {
+                    error!(error = ?e, num_retries, retriable, "couldn't connect to compute node");
+                    return Err(e.into());
+                }
+                warn!(error = ?e, num_retries, retriable, "couldn't connect to compute node");
+            }
+        }
+
+        let wait_duration = retry_after(num_retries);
+        num_retries += 1;
+
+        time::sleep(wait_duration).await;
+    }
+}
+
+/// Attempts to wake up the compute node.
+/// * Returns Ok(Continue(e)) if there was an error waking but retries are acceptable
+/// * Returns Ok(Break(node)) if the wakeup succeeded
+/// * Returns Err(e) if there was an error
+pub fn handle_try_wake(
+    result: Result<console::CachedNodeInfo, WakeComputeError>,
+    num_retries: u32,
+) -> Result<ControlFlow<console::CachedNodeInfo, WakeComputeError>, WakeComputeError> {
+    match result {
+        Err(err) => match &err {
+            WakeComputeError::ApiError(api) if api.should_retry(num_retries) => {
+                Ok(ControlFlow::Continue(err))
+            }
+            _ => Err(err),
+        },
+        // Ready to try again.
+        Ok(new) => Ok(ControlFlow::Break(new)),
+    }
+}
diff --git a/proxy/src/proxy/retry.rs b/proxy/src/proxy/retry.rs
new file mode 100644
index 0000000000..a85ed380b0
--- /dev/null
+++ b/proxy/src/proxy/retry.rs
@@ -0,0 +1,68 @@
+use crate::compute;
+use std::{error::Error, io};
+use tokio::time;
+
+/// Number of times we should retry the `/proxy_wake_compute` http request.
+/// Retry duration is BASE_RETRY_WAIT_DURATION * RETRY_WAIT_EXPONENT_BASE ^ n, where n starts at 0
+pub const NUM_RETRIES_CONNECT: u32 = 16;
+const BASE_RETRY_WAIT_DURATION: time::Duration = time::Duration::from_millis(25);
+const RETRY_WAIT_EXPONENT_BASE: f64 = std::f64::consts::SQRT_2;
+
+pub trait ShouldRetry {
+    fn could_retry(&self) -> bool;
+    fn should_retry(&self, num_retries: u32) -> bool {
+        match self {
+            _ if num_retries >= NUM_RETRIES_CONNECT => false,
+            err => err.could_retry(),
+        }
+    }
+}
+
+impl ShouldRetry for io::Error {
+    fn could_retry(&self) -> bool {
+        use std::io::ErrorKind;
+        matches!(
+            self.kind(),
+            ErrorKind::ConnectionRefused | ErrorKind::AddrNotAvailable | ErrorKind::TimedOut
+        )
+    }
+}
+
+impl ShouldRetry for tokio_postgres::error::DbError {
+    fn could_retry(&self) -> bool {
+        use tokio_postgres::error::SqlState;
+        matches!(
+            self.code(),
+            &SqlState::CONNECTION_FAILURE
+                | &SqlState::CONNECTION_EXCEPTION
+                | &SqlState::CONNECTION_DOES_NOT_EXIST
+                | &SqlState::SQLCLIENT_UNABLE_TO_ESTABLISH_SQLCONNECTION,
+        )
+    }
+}
+
+impl ShouldRetry for tokio_postgres::Error {
+    fn could_retry(&self) -> bool {
+        if let Some(io_err) = self.source().and_then(|x| x.downcast_ref()) {
+            io::Error::could_retry(io_err)
+        } else if let Some(db_err) = self.source().and_then(|x| x.downcast_ref()) {
+            tokio_postgres::error::DbError::could_retry(db_err)
+        } else {
+            false
+        }
+    }
+}
+
+impl ShouldRetry for compute::ConnectionError {
+    fn could_retry(&self) -> bool {
+        match self {
+            compute::ConnectionError::Postgres(err) => err.could_retry(),
+            compute::ConnectionError::CouldNotConnect(err) => err.could_retry(),
+            _ => false,
+        }
+    }
+}
+
+pub fn retry_after(num_retries: u32) -> time::Duration {
+    BASE_RETRY_WAIT_DURATION.mul_f64(RETRY_WAIT_EXPONENT_BASE.powi((num_retries as i32) - 1))
+}
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 31c3ad1055..3c483c59ee 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -2,10 +2,13 @@
 
 mod mitm;
 
+use super::connect_compute::ConnectMechanism;
+use super::retry::ShouldRetry;
 use super::*;
 use crate::auth::backend::{ComputeUserInfo, TestBackend};
 use crate::config::CertResolver;
 use crate::console::{CachedNodeInfo, NodeInfo};
+use crate::proxy::retry::{retry_after, NUM_RETRIES_CONNECT};
 use crate::{auth, http, sasl, scram};
 use async_trait::async_trait;
 use rstest::rstest;
@@ -423,7 +426,7 @@ impl ConnectMechanism for TestConnectMechanism {
     async fn connect_once(
         &self,
         _node_info: &console::CachedNodeInfo,
-        _timeout: time::Duration,
+        _timeout: std::time::Duration,
     ) -> Result<Self::Connection, Self::ConnectError> {
         let mut counter = self.counter.lock().unwrap();
         let action = self.sequence[*counter];
@@ -484,13 +487,13 @@ fn helper_create_connect_info(
     mechanism: &TestConnectMechanism,
 ) -> (
     CachedNodeInfo,
-    console::ConsoleReqExtra<'static>,
+    console::ConsoleReqExtra,
     auth::BackendType<'_, ComputeUserInfo>,
 ) {
     let cache = helper_create_cached_node_info();
     let extra = console::ConsoleReqExtra {
         session_id: uuid::Uuid::new_v4(),
-        application_name: Some("TEST"),
+        application_name: "TEST".into(),
         options: vec![],
     };
     let creds = auth::BackendType::Test(mechanism);
diff --git a/proxy/src/proxy/tests/mitm.rs b/proxy/src/proxy/tests/mitm.rs
index 50b3034936..a0a84a1dc0 100644
--- a/proxy/src/proxy/tests/mitm.rs
+++ b/proxy/src/proxy/tests/mitm.rs
@@ -120,7 +120,7 @@ where
 struct PgFrame;
 impl Decoder for PgFrame {
     type Item = Bytes;
-    type Error = io::Error;
+    type Error = std::io::Error;
 
     fn decode(&mut self, src: &mut BytesMut) -> Result<Option<Self::Item>, Self::Error> {
         if src.len() < 5 {
@@ -136,7 +136,7 @@ impl Decoder for PgFrame {
     }
 }
 impl Encoder<Bytes> for PgFrame {
-    type Error = io::Error;
+    type Error = std::io::Error;
 
     fn encode(&mut self, item: Bytes, dst: &mut BytesMut) -> Result<(), Self::Error> {
         dst.extend_from_slice(&item);
diff --git a/proxy/src/rate_limiter.rs b/proxy/src/rate_limiter.rs
index 5622c44a68..b26386d159 100644
--- a/proxy/src/rate_limiter.rs
+++ b/proxy/src/rate_limiter.rs
@@ -4,3 +4,4 @@ mod limiter;
 pub use aimd::Aimd;
 pub use limit_algorithm::{AimdConfig, Fixed, RateLimitAlgorithm, RateLimiterConfig};
 pub use limiter::Limiter;
+pub use limiter::{EndpointRateLimiter, RateBucketInfo};
diff --git a/proxy/src/rate_limiter/aimd.rs b/proxy/src/rate_limiter/aimd.rs
index c6c532ae53..2c14a54a6c 100644
--- a/proxy/src/rate_limiter/aimd.rs
+++ b/proxy/src/rate_limiter/aimd.rs
@@ -33,39 +33,6 @@ impl Aimd {
             min_utilisation_threshold: config.aimd_min_utilisation_threshold,
         }
     }
-
-    pub fn decrease_factor(self, factor: f32) -> Self {
-        assert!((0.5..1.0).contains(&factor));
-        Self {
-            decrease_factor: factor,
-            ..self
-        }
-    }
-
-    pub fn increase_by(self, increase: usize) -> Self {
-        assert!(increase > 0);
-        Self {
-            increase_by: increase,
-            ..self
-        }
-    }
-
-    pub fn with_max_limit(self, max: usize) -> Self {
-        assert!(max > 0);
-        Self {
-            max_limit: max,
-            ..self
-        }
-    }
-
-    /// A threshold below which the limit won't be increased. 0.5 = 50%.
-    pub fn with_min_utilisation_threshold(self, min_util: f32) -> Self {
-        assert!(min_util > 0. && min_util < 1.);
-        Self {
-            min_utilisation_threshold: min_util,
-            ..self
-        }
-    }
 }
 
 #[async_trait]
diff --git a/proxy/src/rate_limiter/limiter.rs b/proxy/src/rate_limiter/limiter.rs
index 3a9fed3919..a190b2cf8f 100644
--- a/proxy/src/rate_limiter/limiter.rs
+++ b/proxy/src/rate_limiter/limiter.rs
@@ -1,13 +1,19 @@
 use std::{
+    collections::hash_map::RandomState,
+    hash::BuildHasher,
     sync::{
         atomic::{AtomicUsize, Ordering},
-        Arc,
+        Arc, Mutex,
     },
-    time::Duration,
 };
 
+use anyhow::bail;
+use dashmap::DashMap;
+use itertools::Itertools;
+use rand::{rngs::StdRng, Rng, SeedableRng};
+use smol_str::SmolStr;
 use tokio::sync::{Mutex as AsyncMutex, Semaphore, SemaphorePermit};
-use tokio::time::{timeout, Instant};
+use tokio::time::{timeout, Duration, Instant};
 use tracing::info;
 
 use super::{
@@ -15,6 +21,180 @@ use super::{
     RateLimiterConfig,
 };
 
+// Simple per-endpoint rate limiter.
+//
+// Check that number of connections to the endpoint is below `max_rps` rps.
+// Purposefully ignore user name and database name as clients can reconnect
+// with different names, so we'll end up sending some http requests to
+// the control plane.
+//
+// We also may save quite a lot of CPU (I think) by bailing out right after we
+// saw SNI, before doing TLS handshake. User-side error messages in that case
+// does not look very nice (`SSL SYSCALL error: Undefined error: 0`), so for now
+// I went with a more expensive way that yields user-friendlier error messages.
+pub struct EndpointRateLimiter<Rand = StdRng, Hasher = RandomState> {
+    map: DashMap<SmolStr, Vec<RateBucket>, Hasher>,
+    info: &'static [RateBucketInfo],
+    access_count: AtomicUsize,
+    rand: Mutex<Rand>,
+}
+
+#[derive(Clone, Copy)]
+struct RateBucket {
+    start: Instant,
+    count: u32,
+}
+
+impl RateBucket {
+    fn should_allow_request(&mut self, info: &RateBucketInfo, now: Instant) -> bool {
+        if now - self.start < info.interval {
+            self.count < info.max_rpi
+        } else {
+            // bucket expired, reset
+            self.count = 0;
+            self.start = now;
+
+            true
+        }
+    }
+
+    fn inc(&mut self) {
+        self.count += 1;
+    }
+}
+
+#[derive(Clone, Copy, PartialEq)]
+pub struct RateBucketInfo {
+    pub interval: Duration,
+    // requests per interval
+    pub max_rpi: u32,
+}
+
+impl std::fmt::Display for RateBucketInfo {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let rps = self.max_rpi * 1000 / self.interval.as_millis() as u32;
+        write!(f, "{rps}@{}", humantime::format_duration(self.interval))
+    }
+}
+
+impl std::fmt::Debug for RateBucketInfo {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{self}")
+    }
+}
+
+impl std::str::FromStr for RateBucketInfo {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let Some((max_rps, interval)) = s.split_once('@') else {
+            bail!("invalid rate info")
+        };
+        let max_rps = max_rps.parse()?;
+        let interval = humantime::parse_duration(interval)?;
+        Ok(Self::new(max_rps, interval))
+    }
+}
+
+impl RateBucketInfo {
+    pub const DEFAULT_SET: [Self; 3] = [
+        Self::new(300, Duration::from_secs(1)),
+        Self::new(200, Duration::from_secs(60)),
+        Self::new(100, Duration::from_secs(600)),
+    ];
+
+    pub fn validate(info: &mut [Self]) -> anyhow::Result<()> {
+        info.sort_unstable_by_key(|info| info.interval);
+        let invalid = info
+            .iter()
+            .tuple_windows()
+            .find(|(a, b)| a.max_rpi > b.max_rpi);
+        if let Some((a, b)) = invalid {
+            bail!(
+                "invalid endpoint RPS limits. {b} allows fewer requests per bucket than {a} ({} vs {})",
+                b.max_rpi,
+                a.max_rpi,
+            );
+        }
+
+        Ok(())
+    }
+
+    pub const fn new(max_rps: u32, interval: Duration) -> Self {
+        Self {
+            interval,
+            max_rpi: max_rps * interval.as_millis() as u32 / 1000,
+        }
+    }
+}
+
+impl EndpointRateLimiter {
+    pub fn new(info: &'static [RateBucketInfo]) -> Self {
+        Self::new_with_rand_and_hasher(info, StdRng::from_entropy(), RandomState::new())
+    }
+}
+
+impl<R: Rng, S: BuildHasher + Clone> EndpointRateLimiter<R, S> {
+    fn new_with_rand_and_hasher(info: &'static [RateBucketInfo], rand: R, hasher: S) -> Self {
+        info!(buckets = ?info, "endpoint rate limiter");
+        Self {
+            info,
+            map: DashMap::with_hasher_and_shard_amount(hasher, 64),
+            access_count: AtomicUsize::new(1), // start from 1 to avoid GC on the first request
+            rand: Mutex::new(rand),
+        }
+    }
+
+    /// Check that number of connections to the endpoint is below `max_rps` rps.
+    pub fn check(&self, endpoint: SmolStr) -> bool {
+        // do a partial GC every 2k requests. This cleans up ~ 1/64th of the map.
+        // worst case memory usage is about:
+        //    = 2 * 2048 * 64 * (48B + 72B)
+        //    = 30MB
+        if self.access_count.fetch_add(1, Ordering::AcqRel) % 2048 == 0 {
+            self.do_gc();
+        }
+
+        let now = Instant::now();
+        let mut entry = self.map.entry(endpoint).or_insert_with(|| {
+            vec![
+                RateBucket {
+                    start: now,
+                    count: 0,
+                };
+                self.info.len()
+            ]
+        });
+
+        let should_allow_request = entry
+            .iter_mut()
+            .zip(self.info)
+            .all(|(bucket, info)| bucket.should_allow_request(info, now));
+
+        if should_allow_request {
+            // only increment the bucket counts if the request will actually be accepted
+            entry.iter_mut().for_each(RateBucket::inc);
+        }
+
+        should_allow_request
+    }
+
+    /// Clean the map. Simple strategy: remove all entries in a random shard.
+    /// At worst, we'll double the effective max_rps during the cleanup.
+    /// But that way deletion does not aquire mutex on each entry access.
+    pub fn do_gc(&self) {
+        info!(
+            "cleaning up endpoint rate limiter, current size = {}",
+            self.map.len()
+        );
+        let n = self.map.shards().len();
+        // this lock is ok as the periodic cycle of do_gc makes this very unlikely to collide
+        // (impossible, infact, unless we have 2048 threads)
+        let shard = self.rand.lock().unwrap().gen_range(0..n);
+        self.map.shards()[shard].write().clear();
+    }
+}
+
 /// Limits the number of concurrent jobs.
 ///
 /// Concurrency is limited through the use of [Token]s. Acquire a token to run a job, and release the
@@ -53,7 +233,6 @@ pub struct Token<'t> {
 #[derive(Debug, Clone, Copy)]
 pub struct LimiterState {
     limit: usize,
-    available: usize,
     in_flight: usize,
 }
 
@@ -214,10 +393,10 @@ impl Limiter {
             }
             new_limit
         };
-        crate::proxy::RATE_LIMITER_LIMIT
+        crate::metrics::RATE_LIMITER_LIMIT
             .with_label_values(&["expected"])
             .set(new_limit as i64);
-        crate::proxy::RATE_LIMITER_LIMIT
+        crate::metrics::RATE_LIMITER_LIMIT
             .with_label_values(&["actual"])
             .set(actual_limit as i64);
         self.limits.store(new_limit, Ordering::Release);
@@ -231,11 +410,7 @@ impl Limiter {
     pub fn state(&self) -> LimiterState {
         let limit = self.limits.load(Ordering::Relaxed);
         let in_flight = self.in_flight.load(Ordering::Relaxed);
-        LimiterState {
-            limit,
-            available: limit.saturating_sub(in_flight),
-            in_flight,
-        }
+        LimiterState { limit, in_flight }
     }
 }
 
@@ -248,13 +423,6 @@ impl<'t> Token<'t> {
         }
     }
 
-    #[cfg(test)]
-    pub fn set_latency(&mut self, latency: Duration) {
-        use std::ops::Sub;
-
-        self.start = Instant::now().sub(latency);
-    }
-
     pub fn forget(&mut self) {
         if let Some(permit) = self.permit.take() {
             permit.forget();
@@ -273,10 +441,6 @@ impl LimiterState {
     pub fn limit(&self) -> usize {
         self.limit
     }
-    /// The amount of concurrency available to use.
-    pub fn available(&self) -> usize {
-        self.available
-    }
     /// The number of jobs in flight.
     pub fn in_flight(&self) -> usize {
         self.in_flight
@@ -306,7 +470,7 @@ impl reqwest_middleware::Middleware for Limiter {
                 )
             })?;
         info!(duration = ?start.elapsed(), "waiting for token to connect to the control plane");
-        crate::proxy::RATE_LIMITER_ACQUIRE_LATENCY.observe(start.elapsed().as_secs_f64());
+        crate::metrics::RATE_LIMITER_ACQUIRE_LATENCY.observe(start.elapsed().as_secs_f64());
         match next.run(req, extensions).await {
             Ok(response) => {
                 self.release(token, Some(Outcome::from_reqwest_response(&response)))
@@ -324,12 +488,16 @@ impl reqwest_middleware::Middleware for Limiter {
 
 #[cfg(test)]
 mod tests {
-    use std::{pin::pin, task::Context, time::Duration};
+    use std::{hash::BuildHasherDefault, pin::pin, task::Context, time::Duration};
 
     use futures::{task::noop_waker_ref, Future};
+    use rand::SeedableRng;
+    use rustc_hash::FxHasher;
+    use smol_str::SmolStr;
+    use tokio::time;
 
-    use super::{Limiter, Outcome};
-    use crate::rate_limiter::RateLimitAlgorithm;
+    use super::{EndpointRateLimiter, Limiter, Outcome};
+    use crate::rate_limiter::{RateBucketInfo, RateLimitAlgorithm};
 
     #[tokio::test]
     async fn it_works() {
@@ -438,4 +606,105 @@ mod tests {
         limiter.release(token1, None).await;
         limiter.release(token2, None).await;
     }
+
+    #[test]
+    fn rate_bucket_rpi() {
+        let rate_bucket = RateBucketInfo::new(50, Duration::from_secs(5));
+        assert_eq!(rate_bucket.max_rpi, 50 * 5);
+
+        let rate_bucket = RateBucketInfo::new(50, Duration::from_millis(500));
+        assert_eq!(rate_bucket.max_rpi, 50 / 2);
+    }
+
+    #[test]
+    fn rate_bucket_parse() {
+        let rate_bucket: RateBucketInfo = "100@10s".parse().unwrap();
+        assert_eq!(rate_bucket.interval, Duration::from_secs(10));
+        assert_eq!(rate_bucket.max_rpi, 100 * 10);
+        assert_eq!(rate_bucket.to_string(), "100@10s");
+
+        let rate_bucket: RateBucketInfo = "100@1m".parse().unwrap();
+        assert_eq!(rate_bucket.interval, Duration::from_secs(60));
+        assert_eq!(rate_bucket.max_rpi, 100 * 60);
+        assert_eq!(rate_bucket.to_string(), "100@1m");
+    }
+
+    #[test]
+    fn default_rate_buckets() {
+        let mut defaults = RateBucketInfo::DEFAULT_SET;
+        RateBucketInfo::validate(&mut defaults[..]).unwrap();
+    }
+
+    #[test]
+    #[should_panic = "invalid endpoint RPS limits. 10@10s allows fewer requests per bucket than 300@1s (100 vs 300)"]
+    fn rate_buckets_validate() {
+        let mut rates: Vec<RateBucketInfo> = ["300@1s", "10@10s"]
+            .into_iter()
+            .map(|s| s.parse().unwrap())
+            .collect();
+        RateBucketInfo::validate(&mut rates).unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_rate_limits() {
+        let mut rates: Vec<RateBucketInfo> = ["100@1s", "20@30s"]
+            .into_iter()
+            .map(|s| s.parse().unwrap())
+            .collect();
+        RateBucketInfo::validate(&mut rates).unwrap();
+        let limiter = EndpointRateLimiter::new(Vec::leak(rates));
+
+        let endpoint = SmolStr::from("ep-my-endpoint-1234");
+
+        time::pause();
+
+        for _ in 0..100 {
+            assert!(limiter.check(endpoint.clone()));
+        }
+        // more connections fail
+        assert!(!limiter.check(endpoint.clone()));
+
+        // fail even after 500ms as it's in the same bucket
+        time::advance(time::Duration::from_millis(500)).await;
+        assert!(!limiter.check(endpoint.clone()));
+
+        // after a full 1s, 100 requests are allowed again
+        time::advance(time::Duration::from_millis(500)).await;
+        for _ in 1..6 {
+            for _ in 0..100 {
+                assert!(limiter.check(endpoint.clone()));
+            }
+            time::advance(time::Duration::from_millis(1000)).await;
+        }
+
+        // more connections after 600 will exceed the 20rps@30s limit
+        assert!(!limiter.check(endpoint.clone()));
+
+        // will still fail before the 30 second limit
+        time::advance(time::Duration::from_millis(30_000 - 6_000 - 1)).await;
+        assert!(!limiter.check(endpoint.clone()));
+
+        // after the full 30 seconds, 100 requests are allowed again
+        time::advance(time::Duration::from_millis(1)).await;
+        for _ in 0..100 {
+            assert!(limiter.check(endpoint.clone()));
+        }
+    }
+
+    #[tokio::test]
+    async fn test_rate_limits_gc() {
+        // fixed seeded random/hasher to ensure that the test is not flaky
+        let rand = rand::rngs::StdRng::from_seed([1; 32]);
+        let hasher = BuildHasherDefault::<FxHasher>::default();
+
+        let limiter = EndpointRateLimiter::new_with_rand_and_hasher(
+            &RateBucketInfo::DEFAULT_SET,
+            rand,
+            hasher,
+        );
+        for i in 0..1_000_000 {
+            limiter.check(format!("{i}").into());
+        }
+        assert!(limiter.map.len() < 150_000);
+    }
 }
diff --git a/proxy/src/scram/key.rs b/proxy/src/scram/key.rs
index e9c65fcef3..bd93fb2b70 100644
--- a/proxy/src/scram/key.rs
+++ b/proxy/src/scram/key.rs
@@ -6,7 +6,7 @@ pub const SCRAM_KEY_LEN: usize = 32;
 /// One of the keys derived from the [password](super::password::SaltedPassword).
 /// We use the same structure for all keys, i.e.
 /// `ClientKey`, `StoredKey`, and `ServerKey`.
-#[derive(Default, PartialEq, Eq)]
+#[derive(Clone, Default, PartialEq, Eq)]
 #[repr(transparent)]
 pub struct ScramKey {
     bytes: [u8; SCRAM_KEY_LEN],
diff --git a/proxy/src/scram/secret.rs b/proxy/src/scram/secret.rs
index 424beccec9..9e74e07af1 100644
--- a/proxy/src/scram/secret.rs
+++ b/proxy/src/scram/secret.rs
@@ -5,6 +5,7 @@ use super::key::ScramKey;
 
 /// Server secret is produced from [password](super::password::SaltedPassword)
 /// and is used throughout the authentication process.
+#[derive(Clone)]
 pub struct ServerSecret {
     /// Number of iterations for `PBKDF2` function.
     pub iterations: u32,
diff --git a/proxy/src/serverless.rs b/proxy/src/serverless.rs
index cd496ff01e..e358a0712f 100644
--- a/proxy/src/serverless.rs
+++ b/proxy/src/serverless.rs
@@ -8,12 +8,14 @@ mod websocket;
 
 use anyhow::bail;
 use hyper::StatusCode;
+use metrics::IntCounterPairGuard;
 pub use reqwest_middleware::{ClientWithMiddleware, Error};
 pub use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
 use tokio_util::task::TaskTracker;
 
+use crate::metrics::NUM_CLIENT_CONNECTION_GAUGE;
 use crate::protocol2::{ProxyProtocolAccept, WithClientIp};
-use crate::proxy::{NUM_CLIENT_CONNECTION_CLOSED_COUNTER, NUM_CLIENT_CONNECTION_OPENED_COUNTER};
+use crate::rate_limiter::EndpointRateLimiter;
 use crate::{cancellation::CancelMap, config::ProxyConfig};
 use futures::StreamExt;
 use hyper::{
@@ -37,6 +39,7 @@ pub async fn task_main(
     config: &'static ProxyConfig,
     ws_listener: TcpListener,
     cancellation_token: CancellationToken,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     scopeguard::defer! {
         info!("websocket server has shut down");
@@ -91,6 +94,7 @@ pub async fn task_main(
             let sni_name = tls.server_name().map(|s| s.to_string());
             let conn_pool = conn_pool.clone();
             let ws_connections = ws_connections.clone();
+            let endpoint_rate_limiter = endpoint_rate_limiter.clone();
 
             async move {
                 let peer_addr = match client_addr {
@@ -103,6 +107,7 @@ pub async fn task_main(
                         let sni_name = sni_name.clone();
                         let conn_pool = conn_pool.clone();
                         let ws_connections = ws_connections.clone();
+                        let endpoint_rate_limiter = endpoint_rate_limiter.clone();
 
                         async move {
                             let cancel_map = Arc::new(CancelMap::default());
@@ -117,6 +122,7 @@ pub async fn task_main(
                                 session_id,
                                 sni_name,
                                 peer_addr.ip(),
+                                endpoint_rate_limiter,
                             )
                             .instrument(info_span!(
                                 "serverless",
@@ -144,22 +150,17 @@ pub async fn task_main(
 
 struct MetricService<S> {
     inner: S,
+    _gauge: IntCounterPairGuard,
 }
 
 impl<S> MetricService<S> {
     fn new(inner: S) -> MetricService<S> {
-        NUM_CLIENT_CONNECTION_OPENED_COUNTER
-            .with_label_values(&["http"])
-            .inc();
-        MetricService { inner }
-    }
-}
-
-impl<S> Drop for MetricService<S> {
-    fn drop(&mut self) {
-        NUM_CLIENT_CONNECTION_CLOSED_COUNTER
-            .with_label_values(&["http"])
-            .inc();
+        MetricService {
+            inner,
+            _gauge: NUM_CLIENT_CONNECTION_GAUGE
+                .with_label_values(&["http"])
+                .guard(),
+        }
     }
 }
 
@@ -190,6 +191,7 @@ async fn request_handler(
     session_id: uuid::Uuid,
     sni_hostname: Option<String>,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> Result<Response<Body>, ApiError> {
     let host = request
         .headers()
@@ -214,6 +216,7 @@ async fn request_handler(
                     session_id,
                     host,
                     peer_addr,
+                    endpoint_rate_limiter,
                 )
                 .await
                 {
@@ -241,7 +244,7 @@ async fn request_handler(
             .header("Access-Control-Allow-Origin", "*")
             .header(
                 "Access-Control-Allow-Headers",
-                "Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In",
+                "Neon-Connection-String, Neon-Raw-Text-Output, Neon-Array-Mode, Neon-Pool-Opt-In, Neon-Batch-Read-Only, Neon-Batch-Isolation-Level",
             )
             .header("Access-Control-Max-Age", "86400" /* 24 hours */)
             .status(StatusCode::OK) // 204 is also valid, but see: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/OPTIONS#status_code
diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs
index 734df11368..df2d1bea32 100644
--- a/proxy/src/serverless/conn_pool.rs
+++ b/proxy/src/serverless/conn_pool.rs
@@ -24,20 +24,16 @@ use tokio_postgres::{AsyncMessage, ReadyForQueryStatus};
 use crate::{
     auth::{self, backend::ComputeUserInfo, check_peer_addr_is_in_list},
     console,
-    proxy::{
-        neon_options, LatencyTimer, NUM_DB_CONNECTIONS_CLOSED_COUNTER,
-        NUM_DB_CONNECTIONS_OPENED_COUNTER,
-    },
+    metrics::{LatencyTimer, NUM_DB_CONNECTIONS_GAUGE},
+    proxy::{connect_compute::ConnectMechanism, neon_options},
     usage_metrics::{Ids, MetricCounter, USAGE_METRICS},
 };
 use crate::{compute, config};
 
-use crate::proxy::ConnectMechanism;
-
 use tracing::{error, warn, Span};
 use tracing::{info, info_span, Instrument};
 
-pub const APP_NAME: &str = "sql_over_http";
+pub const APP_NAME: &str = "/sql_over_http";
 const MAX_CONNS_PER_ENDPOINT: usize = 20;
 
 #[derive(Debug, Clone)]
@@ -432,10 +428,9 @@ async fn connect_to_compute(
 
     let extra = console::ConsoleReqExtra {
         session_id: uuid::Uuid::new_v4(),
-        application_name: Some(APP_NAME),
+        application_name: APP_NAME.to_string(),
         options: console_options,
     };
-    // TODO(anna): this is a bit hacky way, consider using console notification listener.
     if !config.disable_ip_check_for_http {
         let allowed_ips = backend.get_allowed_ips(&extra).await?;
         if !check_peer_addr_is_in_list(&peer_addr, &allowed_ips) {
@@ -447,7 +442,7 @@ async fn connect_to_compute(
         .await?
         .context("missing cache entry from wake_compute")?;
 
-    crate::proxy::connect_to_compute(
+    crate::proxy::connect_compute::connect_to_compute(
         &TokioMechanism {
             conn_id,
             conn_info,
@@ -477,6 +472,11 @@ async fn connect_to_compute_once(
         .connect_timeout(timeout)
         .connect(tokio_postgres::NoTls)
         .await?;
+
+    let conn_gauge = NUM_DB_CONNECTIONS_GAUGE
+        .with_label_values(&["http"])
+        .guard();
+
     tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));
 
     let (tx, mut rx) = tokio::sync::watch::channel(session);
@@ -492,10 +492,7 @@ async fn connect_to_compute_once(
 
     tokio::spawn(
         async move {
-            NUM_DB_CONNECTIONS_OPENED_COUNTER.with_label_values(&["http"]).inc();
-            scopeguard::defer! {
-                NUM_DB_CONNECTIONS_CLOSED_COUNTER.with_label_values(&["http"]).inc();
-            }
+            let _conn_gauge = conn_gauge;
             poll_fn(move |cx| {
                 if matches!(rx.has_changed(), Ok(true)) {
                     session = *rx.borrow_and_update();
diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs
index 6e80260193..307b085ce0 100644
--- a/proxy/src/serverless/sql_over_http.rs
+++ b/proxy/src/serverless/sql_over_http.rs
@@ -29,7 +29,7 @@ use utils::http::error::ApiError;
 use utils::http::json::json_response;
 
 use crate::config::HttpConfig;
-use crate::proxy::{NUM_CONNECTIONS_ACCEPTED_COUNTER, NUM_CONNECTIONS_CLOSED_COUNTER};
+use crate::metrics::NUM_CONNECTION_REQUESTS_GAUGE;
 
 use super::conn_pool::ConnInfo;
 use super::conn_pool::GlobalConnPool;
@@ -303,12 +303,9 @@ async fn handle_inner(
     session_id: uuid::Uuid,
     peer_addr: IpAddr,
 ) -> anyhow::Result<Response<Body>> {
-    NUM_CONNECTIONS_ACCEPTED_COUNTER
+    let _request_gauge = NUM_CONNECTION_REQUESTS_GAUGE
         .with_label_values(&["http"])
-        .inc();
-    scopeguard::defer! {
-        NUM_CONNECTIONS_CLOSED_COUNTER.with_label_values(&["http"]).inc();
-    }
+        .guard();
 
     //
     // Determine the destination and connection params
diff --git a/proxy/src/serverless/websocket.rs b/proxy/src/serverless/websocket.rs
index 199b03550d..071add3bca 100644
--- a/proxy/src/serverless/websocket.rs
+++ b/proxy/src/serverless/websocket.rs
@@ -3,6 +3,7 @@ use crate::{
     config::ProxyConfig,
     error::io_error,
     proxy::{handle_client, ClientMode},
+    rate_limiter::EndpointRateLimiter,
 };
 use bytes::{Buf, Bytes};
 use futures::{Sink, Stream};
@@ -13,6 +14,7 @@ use pin_project_lite::pin_project;
 use std::{
     net::IpAddr,
     pin::Pin,
+    sync::Arc,
     task::{ready, Context, Poll},
 };
 use tokio::io::{self, AsyncBufRead, AsyncRead, AsyncWrite, ReadBuf};
@@ -25,15 +27,15 @@ use sync_wrapper::SyncWrapper;
 pin_project! {
     /// This is a wrapper around a [`WebSocketStream`] that
     /// implements [`AsyncRead`] and [`AsyncWrite`].
-    pub struct WebSocketRw {
+    pub struct WebSocketRw<S = Upgraded> {
         #[pin]
-        stream: SyncWrapper<WebSocketStream<Upgraded>>,
+        stream: SyncWrapper<WebSocketStream<S>>,
         bytes: Bytes,
     }
 }
 
-impl WebSocketRw {
-    pub fn new(stream: WebSocketStream<Upgraded>) -> Self {
+impl<S> WebSocketRw<S> {
+    pub fn new(stream: WebSocketStream<S>) -> Self {
         Self {
             stream: stream.into(),
             bytes: Bytes::new(),
@@ -41,7 +43,7 @@ impl WebSocketRw {
     }
 }
 
-impl AsyncWrite for WebSocketRw {
+impl<S: AsyncRead + AsyncWrite + Unpin> AsyncWrite for WebSocketRw<S> {
     fn poll_write(
         self: Pin<&mut Self>,
         cx: &mut Context<'_>,
@@ -67,7 +69,7 @@ impl AsyncWrite for WebSocketRw {
     }
 }
 
-impl AsyncRead for WebSocketRw {
+impl<S: AsyncRead + AsyncWrite + Unpin> AsyncRead for WebSocketRw<S> {
     fn poll_read(
         mut self: Pin<&mut Self>,
         cx: &mut Context<'_>,
@@ -84,7 +86,7 @@ impl AsyncRead for WebSocketRw {
     }
 }
 
-impl AsyncBufRead for WebSocketRw {
+impl<S: AsyncRead + AsyncWrite + Unpin> AsyncBufRead for WebSocketRw<S> {
     fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<&[u8]>> {
         // Please refer to poll_fill_buf's documentation.
         const EOF: Poll<io::Result<&[u8]>> = Poll::Ready(Ok(&[]));
@@ -134,6 +136,7 @@ pub async fn serve_websocket(
     session_id: uuid::Uuid,
     hostname: Option<String>,
     peer_addr: IpAddr,
+    endpoint_rate_limiter: Arc<EndpointRateLimiter>,
 ) -> anyhow::Result<()> {
     let websocket = websocket.await?;
     handle_client(
@@ -143,7 +146,65 @@ pub async fn serve_websocket(
         WebSocketRw::new(websocket),
         ClientMode::Websockets { hostname },
         peer_addr,
+        endpoint_rate_limiter,
     )
     .await?;
     Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use std::pin::pin;
+
+    use futures::{SinkExt, StreamExt};
+    use hyper_tungstenite::{
+        tungstenite::{protocol::Role, Message},
+        WebSocketStream,
+    };
+    use tokio::{
+        io::{duplex, AsyncReadExt, AsyncWriteExt},
+        task::JoinSet,
+    };
+
+    use super::WebSocketRw;
+
+    #[tokio::test]
+    async fn websocket_stream_wrapper_happy_path() {
+        let (stream1, stream2) = duplex(1024);
+
+        let mut js = JoinSet::new();
+
+        js.spawn(async move {
+            let mut client = WebSocketStream::from_raw_socket(stream1, Role::Client, None).await;
+
+            client
+                .send(Message::Binary(b"hello world".to_vec()))
+                .await
+                .unwrap();
+
+            let message = client.next().await.unwrap().unwrap();
+            assert_eq!(message, Message::Binary(b"websockets are cool".to_vec()));
+
+            client.close(None).await.unwrap();
+        });
+
+        js.spawn(async move {
+            let mut rw = pin!(WebSocketRw::new(
+                WebSocketStream::from_raw_socket(stream2, Role::Server, None).await
+            ));
+
+            let mut buf = vec![0; 1024];
+            let n = rw.read(&mut buf).await.unwrap();
+            assert_eq!(&buf[..n], b"hello world");
+
+            rw.write_all(b"websockets are cool").await.unwrap();
+            rw.flush().await.unwrap();
+
+            let n = rw.read_to_end(&mut buf).await.unwrap();
+            assert_eq!(n, 0);
+        });
+
+        js.join_next().await.unwrap().unwrap();
+        js.join_next().await.unwrap().unwrap();
+    }
+}
diff --git a/s3_scrubber/Cargo.toml b/s3_scrubber/Cargo.toml
index e26f2c6d6b..fdae378d55 100644
--- a/s3_scrubber/Cargo.toml
+++ b/s3_scrubber/Cargo.toml
@@ -31,6 +31,7 @@ reqwest = { workspace = true, default-features = false, features = ["rustls-tls"
 aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }
 
 pageserver = { path = "../pageserver" }
+pageserver_api = { path = "../libs/pageserver_api" }
 remote_storage = { path = "../libs/remote_storage" }
 
 tracing.workspace = true
diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs
index 510a128663..7b9f96dce3 100644
--- a/s3_scrubber/src/checks.rs
+++ b/s3_scrubber/src/checks.rs
@@ -1,19 +1,21 @@
-use std::collections::HashSet;
+use std::collections::{HashMap, HashSet};
 
 use anyhow::Context;
 use aws_sdk_s3::{types::ObjectIdentifier, Client};
+use pageserver::tenant::remote_timeline_client::index::IndexLayerMetadata;
+use pageserver_api::shard::ShardIndex;
 use tracing::{error, info, warn};
 use utils::generation::Generation;
+use utils::id::TimelineId;
 
 use crate::cloud_admin_api::BranchData;
 use crate::metadata_stream::stream_listing;
-use crate::{download_object_with_retries, RootTarget};
+use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId};
 use futures_util::{pin_mut, StreamExt};
 use pageserver::tenant::remote_timeline_client::parse_remote_index_path;
 use pageserver::tenant::storage_layer::LayerFileName;
 use pageserver::tenant::IndexPart;
 use remote_storage::RemotePath;
-use utils::id::TenantTimelineId;
 
 pub(crate) struct TimelineAnalysis {
     /// Anomalies detected
@@ -39,9 +41,9 @@ impl TimelineAnalysis {
     }
 }
 
-pub(crate) async fn branch_cleanup_and_check_errors(
-    id: &TenantTimelineId,
-    s3_root: &RootTarget,
+pub(crate) fn branch_cleanup_and_check_errors(
+    id: &TenantShardTimelineId,
+    tenant_objects: &mut TenantObjectListing,
     s3_active_branch: Option<&BranchData>,
     console_branch: Option<BranchData>,
     s3_data: Option<S3TimelineBlobData>,
@@ -73,8 +75,8 @@ pub(crate) async fn branch_cleanup_and_check_errors(
             match s3_data.blob_data {
                 BlobDataParseResult::Parsed {
                     index_part,
-                    index_part_generation,
-                    mut s3_layers,
+                    index_part_generation: _index_part_generation,
+                    s3_layers: _s3_layers,
                 } => {
                     if !IndexPart::KNOWN_VERSIONS.contains(&index_part.get_version()) {
                         result.errors.push(format!(
@@ -112,64 +114,21 @@ pub(crate) async fn branch_cleanup_and_check_errors(
                             ))
                         }
 
-                        let layer_map_key = (layer, metadata.generation);
-                        if !s3_layers.remove(&layer_map_key) {
+                        if !tenant_objects.check_ref(id.timeline_id, &layer, &metadata) {
                             // FIXME: this will emit false positives if an index was
                             // uploaded concurrently with our scan.  To make this check
                             // correct, we need to try sending a HEAD request for the
                             // layer we think is missing.
                             result.errors.push(format!(
-                                "index_part.json contains a layer {}{} that is not present in remote storage",
-                                layer_map_key.0.file_name(),
-                                layer_map_key.1.get_suffix()
+                                "index_part.json contains a layer {}{} (shard {}) that is not present in remote storage",
+                                layer.file_name(),
+                                metadata.generation.get_suffix(),
+                                metadata.shard
                             ))
                         }
                     }
-
-                    let orphan_layers: Vec<(LayerFileName, Generation)> = s3_layers
-                        .into_iter()
-                        .filter(|(_layer_name, gen)|
-                            // A layer is only considered orphaned if it has a generation below
-                            // the index.  If the generation is >= the index, then the layer may
-                            // be an upload from a running pageserver, or even an upload from
-                            // a new generation that didn't upload an index yet.
-                            //
-                            // Even so, a layer that is not referenced by the index could just
-                            // be something enqueued for deletion, so while this check is valid
-                            // for indicating that a layer is garbage, it is not an indicator
-                            // of a problem.
-                            gen < &index_part_generation)
-                        .collect();
-
-                    if !orphan_layers.is_empty() {
-                        result.errors.push(format!(
-                            "index_part.json does not contain layers from S3: {:?}",
-                            orphan_layers
-                                .iter()
-                                .map(|(layer_name, gen)| format!(
-                                    "{}{}",
-                                    layer_name.file_name(),
-                                    gen.get_suffix()
-                                ))
-                                .collect::<Vec<_>>(),
-                        ));
-                        result.garbage_keys.extend(orphan_layers.iter().map(
-                            |(layer_name, layer_gen)| {
-                                let mut key = s3_root.timeline_root(id).prefix_in_bucket;
-                                let delimiter = s3_root.delimiter();
-                                if !key.ends_with(delimiter) {
-                                    key.push_str(delimiter);
-                                }
-                                key.push_str(&format!(
-                                    "{}{}",
-                                    &layer_name.file_name(),
-                                    layer_gen.get_suffix()
-                                ));
-                                key
-                            },
-                        ));
-                    }
                 }
+                BlobDataParseResult::Relic => {}
                 BlobDataParseResult::Incorrect(parse_errors) => result.errors.extend(
                     parse_errors
                         .into_iter()
@@ -202,6 +161,83 @@ pub(crate) async fn branch_cleanup_and_check_errors(
     result
 }
 
+#[derive(Default)]
+pub(crate) struct LayerRef {
+    ref_count: usize,
+}
+
+/// Top-level index of objects in a tenant.  This may be used by any shard-timeline within
+/// the tenant to query whether an object exists.
+#[derive(Default)]
+pub(crate) struct TenantObjectListing {
+    shard_timelines:
+        HashMap<(ShardIndex, TimelineId), HashMap<(LayerFileName, Generation), LayerRef>>,
+}
+
+impl TenantObjectListing {
+    /// Having done an S3 listing of the keys within a timeline prefix, merge them into the overall
+    /// list of layer keys for the Tenant.
+    pub(crate) fn push(
+        &mut self,
+        ttid: TenantShardTimelineId,
+        layers: HashSet<(LayerFileName, Generation)>,
+    ) {
+        let shard_index = ShardIndex::new(
+            ttid.tenant_shard_id.shard_number,
+            ttid.tenant_shard_id.shard_count,
+        );
+        let replaced = self.shard_timelines.insert(
+            (shard_index, ttid.timeline_id),
+            layers
+                .into_iter()
+                .map(|l| (l, LayerRef::default()))
+                .collect(),
+        );
+
+        assert!(
+            replaced.is_none(),
+            "Built from an S3 object listing, which should never repeat a key"
+        );
+    }
+
+    /// Having loaded a timeline index, check if a layer referenced by the index exists.  If it does,
+    /// the layer's refcount will be incremented.  Later, after calling this for all references in all indices
+    /// in a tenant, orphan layers may be detected by their zero refcounts.
+    ///
+    /// Returns true if the layer exists
+    pub(crate) fn check_ref(
+        &mut self,
+        timeline_id: TimelineId,
+        layer_file: &LayerFileName,
+        metadata: &IndexLayerMetadata,
+    ) -> bool {
+        let Some(shard_tl) = self.shard_timelines.get_mut(&(metadata.shard, timeline_id)) else {
+            return false;
+        };
+
+        let Some(layer_ref) = shard_tl.get_mut(&(layer_file.clone(), metadata.generation)) else {
+            return false;
+        };
+
+        layer_ref.ref_count += 1;
+
+        true
+    }
+
+    pub(crate) fn get_orphans(&self) -> Vec<(ShardIndex, TimelineId, LayerFileName, Generation)> {
+        let mut result = Vec::new();
+        for ((shard_index, timeline_id), layers) in &self.shard_timelines {
+            for ((layer_file, generation), layer_ref) in layers {
+                if layer_ref.ref_count == 0 {
+                    result.push((*shard_index, *timeline_id, layer_file.clone(), *generation))
+                }
+            }
+        }
+
+        result
+    }
+}
+
 #[derive(Debug)]
 pub(crate) struct S3TimelineBlobData {
     pub(crate) blob_data: BlobDataParseResult,
@@ -215,6 +251,8 @@ pub(crate) enum BlobDataParseResult {
         index_part_generation: Generation,
         s3_layers: HashSet<(LayerFileName, Generation)>,
     },
+    /// The remains of a deleted Timeline (i.e. an initdb archive only)
+    Relic,
     Incorrect(Vec<String>),
 }
 
@@ -233,7 +271,7 @@ fn parse_layer_object_name(name: &str) -> Result<(LayerFileName, Generation), St
 
 pub(crate) async fn list_timeline_blobs(
     s3_client: &Client,
-    id: TenantTimelineId,
+    id: TenantShardTimelineId,
     s3_root: &RootTarget,
 ) -> anyhow::Result<S3TimelineBlobData> {
     let mut s3_layers = HashSet::new();
@@ -245,6 +283,7 @@ pub(crate) async fn list_timeline_blobs(
     timeline_dir_target.delimiter = String::new();
 
     let mut index_parts: Vec<ObjectIdentifier> = Vec::new();
+    let mut initdb_archive: bool = false;
 
     let stream = stream_listing(s3_client, &timeline_dir_target);
     pin_mut!(stream);
@@ -258,6 +297,10 @@ pub(crate) async fn list_timeline_blobs(
                 tracing::info!("Index key {key}");
                 index_parts.push(obj)
             }
+            Some("initdb.tar.zst") => {
+                tracing::info!("initdb archive {key}");
+                initdb_archive = true;
+            }
             Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) {
                 Ok((new_layer, gen)) => {
                     tracing::info!("Parsed layer key: {} {:?}", new_layer, gen);
@@ -279,6 +322,16 @@ pub(crate) async fn list_timeline_blobs(
         }
     }
 
+    if index_parts.is_empty() && s3_layers.is_empty() && initdb_archive {
+        tracing::info!(
+            "Timeline is empty apart from initdb archive: expected post-deletion state."
+        );
+        return Ok(S3TimelineBlobData {
+            blob_data: BlobDataParseResult::Relic,
+            keys_to_remove: Vec::new(),
+        });
+    }
+
     // Choose the index_part with the highest generation
     let (index_part_object, index_part_generation) = match index_parts
         .iter()
diff --git a/s3_scrubber/src/garbage.rs b/s3_scrubber/src/garbage.rs
index f27e1d7f65..7192afb91b 100644
--- a/s3_scrubber/src/garbage.rs
+++ b/s3_scrubber/src/garbage.rs
@@ -10,15 +10,16 @@ use aws_sdk_s3::{
     Client,
 };
 use futures_util::{pin_mut, TryStreamExt};
+use pageserver_api::shard::TenantShardId;
 use serde::{Deserialize, Serialize};
 use tokio_stream::StreamExt;
-use utils::id::{TenantId, TenantTimelineId};
+use utils::id::TenantId;
 
 use crate::{
     cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData},
     init_remote,
     metadata_stream::{stream_listing, stream_tenant_timelines, stream_tenants},
-    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TraversingDepth,
+    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId, TraversingDepth,
 };
 
 #[derive(Serialize, Deserialize, Debug)]
@@ -29,8 +30,8 @@ enum GarbageReason {
 
 #[derive(Serialize, Deserialize, Debug)]
 enum GarbageEntity {
-    Tenant(TenantId),
-    Timeline(TenantTimelineId),
+    Tenant(TenantShardId),
+    Timeline(TenantShardTimelineId),
 }
 
 #[derive(Serialize, Deserialize, Debug)]
@@ -142,6 +143,9 @@ async fn find_garbage_inner(
         console_projects.len()
     );
 
+    // TODO(sharding): batch calls into Console so that we only call once for each TenantId,
+    // rather than checking the same TenantId for multiple TenantShardId
+
     // Enumerate Tenants in S3, and check if each one exists in Console
     tracing::info!("Finding all tenants in bucket {}...", bucket_config.bucket);
     let tenants = stream_tenants(&s3_client, &target);
@@ -149,10 +153,10 @@ async fn find_garbage_inner(
         let api_client = cloud_admin_api_client.clone();
         let console_projects = &console_projects;
         async move {
-            match console_projects.get(&t) {
+            match console_projects.get(&t.tenant_id) {
                 Some(project_data) => Ok((t, Some(project_data.clone()))),
                 None => api_client
-                    .find_tenant_project(t)
+                    .find_tenant_project(t.tenant_id)
                     .await
                     .map_err(|e| anyhow::anyhow!(e))
                     .map(|r| (t, r)),
@@ -166,21 +170,21 @@ async fn find_garbage_inner(
     // checks if they are enabled by the `depth` parameter.
     pin_mut!(tenants_checked);
     let mut garbage = GarbageList::new(node_kind, bucket_config);
-    let mut active_tenants: Vec<TenantId> = vec![];
+    let mut active_tenants: Vec<TenantShardId> = vec![];
     let mut counter = 0;
     while let Some(result) = tenants_checked.next().await {
-        let (tenant_id, console_result) = result?;
+        let (tenant_shard_id, console_result) = result?;
 
         // Paranoia check
         if let Some(project) = &console_result {
-            assert!(project.tenant == tenant_id);
+            assert!(project.tenant == tenant_shard_id.tenant_id);
         }
 
-        if garbage.maybe_append(GarbageEntity::Tenant(tenant_id), console_result) {
-            tracing::debug!("Tenant {tenant_id} is garbage");
+        if garbage.maybe_append(GarbageEntity::Tenant(tenant_shard_id), console_result) {
+            tracing::debug!("Tenant {tenant_shard_id} is garbage");
         } else {
-            tracing::debug!("Tenant {tenant_id} is active");
-            active_tenants.push(tenant_id);
+            tracing::debug!("Tenant {tenant_shard_id} is active");
+            active_tenants.push(tenant_shard_id);
         }
 
         counter += 1;
@@ -266,13 +270,13 @@ impl std::fmt::Display for PurgeMode {
 pub async fn get_tenant_objects(
     s3_client: &Arc<Client>,
     target: RootTarget,
-    tenant_id: TenantId,
+    tenant_shard_id: TenantShardId,
 ) -> anyhow::Result<Vec<ObjectIdentifier>> {
-    tracing::debug!("Listing objects in tenant {tenant_id}");
+    tracing::debug!("Listing objects in tenant {tenant_shard_id}");
     // TODO: apply extra validation based on object modification time.  Don't purge
     // tenants where any timeline's index_part.json has been touched recently.
 
-    let mut tenant_root = target.tenant_root(&tenant_id);
+    let mut tenant_root = target.tenant_root(&tenant_shard_id);
 
     // Remove delimiter, so that object listing lists all keys in the prefix and not just
     // common prefixes.
@@ -285,7 +289,7 @@ pub async fn get_tenant_objects(
 pub async fn get_timeline_objects(
     s3_client: &Arc<Client>,
     target: RootTarget,
-    ttid: TenantTimelineId,
+    ttid: TenantShardTimelineId,
 ) -> anyhow::Result<Vec<ObjectIdentifier>> {
     tracing::debug!("Listing objects in timeline {ttid}");
     let mut timeline_root = target.timeline_root(&ttid);
diff --git a/s3_scrubber/src/lib.rs b/s3_scrubber/src/lib.rs
index e5465952fb..d2338c21e5 100644
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -22,6 +22,7 @@ use aws_sdk_s3::{Client, Config};
 
 use clap::ValueEnum;
 use pageserver::tenant::TENANTS_SEGMENT_NAME;
+use pageserver_api::shard::TenantShardId;
 use reqwest::Url;
 use serde::{Deserialize, Serialize};
 use std::io::IsTerminal;
@@ -29,7 +30,7 @@ use tokio::io::AsyncReadExt;
 use tracing::error;
 use tracing_appender::non_blocking::WorkerGuard;
 use tracing_subscriber::{fmt, prelude::*, EnvFilter};
-use utils::id::{TenantId, TenantTimelineId};
+use utils::id::TimelineId;
 
 const MAX_RETRIES: usize = 20;
 const CLOUD_ADMIN_API_TOKEN_ENV_VAR: &str = "CLOUD_ADMIN_API_TOKEN";
@@ -44,6 +45,35 @@ pub struct S3Target {
     pub delimiter: String,
 }
 
+/// Convenience for referring to timelines within a particular shard: more ergonomic
+/// than using a 2-tuple.
+///
+/// This is the shard-aware equivalent of TenantTimelineId.  It's defined here rather
+/// than somewhere more broadly exposed, because this kind of thing is rarely needed
+/// in the pageserver, as all timeline objects existing in the scope of a particular
+/// tenant: the scrubber is different in that it handles collections of data referring to many
+/// TenantShardTimelineIds in on place.
+#[derive(Serialize, Deserialize, Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub struct TenantShardTimelineId {
+    tenant_shard_id: TenantShardId,
+    timeline_id: TimelineId,
+}
+
+impl TenantShardTimelineId {
+    fn new(tenant_shard_id: TenantShardId, timeline_id: TimelineId) -> Self {
+        Self {
+            tenant_shard_id,
+            timeline_id,
+        }
+    }
+}
+
+impl Display for TenantShardTimelineId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}/{}", self.tenant_shard_id, self.timeline_id)
+    }
+}
+
 #[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq)]
 pub enum TraversingDepth {
     Tenant,
@@ -86,7 +116,9 @@ impl S3Target {
         if new_self.prefix_in_bucket.is_empty() {
             new_self.prefix_in_bucket = format!("/{}/", new_segment);
         } else {
-            let _ = new_self.prefix_in_bucket.pop();
+            if new_self.prefix_in_bucket.ends_with('/') {
+                new_self.prefix_in_bucket.pop();
+            }
             new_self.prefix_in_bucket =
                 [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
         }
@@ -108,19 +140,19 @@ impl RootTarget {
         }
     }
 
-    pub fn tenant_root(&self, tenant_id: &TenantId) -> S3Target {
+    pub fn tenant_root(&self, tenant_id: &TenantShardId) -> S3Target {
         self.tenants_root().with_sub_segment(&tenant_id.to_string())
     }
 
-    pub fn timelines_root(&self, tenant_id: &TenantId) -> S3Target {
+    pub fn timelines_root(&self, tenant_id: &TenantShardId) -> S3Target {
         match self {
             Self::Pageserver(_) => self.tenant_root(tenant_id).with_sub_segment("timelines"),
             Self::Safekeeper(_) => self.tenant_root(tenant_id),
         }
     }
 
-    pub fn timeline_root(&self, id: &TenantTimelineId) -> S3Target {
-        self.timelines_root(&id.tenant_id)
+    pub fn timeline_root(&self, id: &TenantShardTimelineId) -> S3Target {
+        self.timelines_root(&id.tenant_shard_id)
             .with_sub_segment(&id.timeline_id.to_string())
     }
 
diff --git a/s3_scrubber/src/main.rs b/s3_scrubber/src/main.rs
index 1f0ceebdaf..ef020edc2a 100644
--- a/s3_scrubber/src/main.rs
+++ b/s3_scrubber/src/main.rs
@@ -57,7 +57,7 @@ async fn main() -> anyhow::Result<()> {
     ));
 
     match cli.command {
-        Command::ScanMetadata { json } => match scan_metadata(bucket_config).await {
+        Command::ScanMetadata { json } => match scan_metadata(bucket_config.clone()).await {
             Err(e) => {
                 tracing::error!("Failed: {e}");
                 Err(e)
@@ -70,6 +70,17 @@ async fn main() -> anyhow::Result<()> {
                 }
                 if summary.is_fatal() {
                     Err(anyhow::anyhow!("Fatal scrub errors detected"))
+                } else if summary.is_empty() {
+                    // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
+                    // scrubber they were likely expecting to scan something, and if we see no timelines
+                    // at all then it's likely due to some configuration issues like a bad prefix
+                    Err(anyhow::anyhow!(
+                        "No timelines found in bucket {} prefix {}",
+                        bucket_config.bucket,
+                        bucket_config
+                            .prefix_in_bucket
+                            .unwrap_or("<none>".to_string())
+                    ))
                 } else {
                     Ok(())
                 }
diff --git a/s3_scrubber/src/metadata_stream.rs b/s3_scrubber/src/metadata_stream.rs
index 4cfa77cfc1..073f37f319 100644
--- a/s3_scrubber/src/metadata_stream.rs
+++ b/s3_scrubber/src/metadata_stream.rs
@@ -3,14 +3,15 @@ use async_stream::{stream, try_stream};
 use aws_sdk_s3::{types::ObjectIdentifier, Client};
 use tokio_stream::Stream;
 
-use crate::{list_objects_with_retries, RootTarget, S3Target, TenantId};
-use utils::id::{TenantTimelineId, TimelineId};
+use crate::{list_objects_with_retries, RootTarget, S3Target, TenantShardTimelineId};
+use pageserver_api::shard::TenantShardId;
+use utils::id::TimelineId;
 
 /// Given an S3 bucket, output a stream of TenantIds discovered via ListObjectsv2
 pub fn stream_tenants<'a>(
     s3_client: &'a Client,
     target: &'a RootTarget,
-) -> impl Stream<Item = anyhow::Result<TenantId>> + 'a {
+) -> impl Stream<Item = anyhow::Result<TenantShardId>> + 'a {
     try_stream! {
         let mut continuation_token = None;
         let tenants_target = target.tenants_root();
@@ -44,14 +45,14 @@ pub fn stream_tenants<'a>(
     }
 }
 
-/// Given a TenantId, output a stream of the timelines within that tenant, discovered
+/// Given a TenantShardId, output a stream of the timelines within that tenant, discovered
 /// using ListObjectsv2.  The listing is done before the stream is built, so that this
 /// function can be used to generate concurrency on a stream using buffer_unordered.
 pub async fn stream_tenant_timelines<'a>(
     s3_client: &'a Client,
     target: &'a RootTarget,
-    tenant: TenantId,
-) -> anyhow::Result<impl Stream<Item = Result<TenantTimelineId, anyhow::Error>> + 'a> {
+    tenant: TenantShardId,
+) -> anyhow::Result<impl Stream<Item = Result<TenantShardTimelineId, anyhow::Error>> + 'a> {
     let mut timeline_ids: Vec<Result<TimelineId, anyhow::Error>> = Vec::new();
     let mut continuation_token = None;
     let timelines_target = target.timelines_root(&tenant);
@@ -98,7 +99,7 @@ pub async fn stream_tenant_timelines<'a>(
     Ok(stream! {
         for i in timeline_ids {
             let id = i?;
-            yield Ok(TenantTimelineId::new(tenant, id));
+            yield Ok(TenantShardTimelineId::new(tenant, id));
         }
     })
 }
diff --git a/s3_scrubber/src/scan_metadata.rs b/s3_scrubber/src/scan_metadata.rs
index ad82db1e76..bcc4d2e618 100644
--- a/s3_scrubber/src/scan_metadata.rs
+++ b/s3_scrubber/src/scan_metadata.rs
@@ -2,23 +2,25 @@ use std::collections::{HashMap, HashSet};
 
 use crate::checks::{
     branch_cleanup_and_check_errors, list_timeline_blobs, BlobDataParseResult, S3TimelineBlobData,
-    TimelineAnalysis,
+    TenantObjectListing, TimelineAnalysis,
 };
 use crate::metadata_stream::{stream_tenant_timelines, stream_tenants};
-use crate::{init_remote, BucketConfig, NodeKind, RootTarget};
+use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId};
 use aws_sdk_s3::Client;
 use futures_util::{pin_mut, StreamExt, TryStreamExt};
 use histogram::Histogram;
+use pageserver::tenant::remote_timeline_client::remote_layer_path;
 use pageserver::tenant::IndexPart;
+use pageserver_api::shard::TenantShardId;
 use serde::Serialize;
-use utils::id::TenantTimelineId;
+use utils::id::TenantId;
 
 #[derive(Serialize)]
 pub struct MetadataSummary {
     count: usize,
-    with_errors: HashSet<TenantTimelineId>,
-    with_warnings: HashSet<TenantTimelineId>,
-    with_garbage: HashSet<TenantTimelineId>,
+    with_errors: HashSet<TenantShardTimelineId>,
+    with_warnings: HashSet<TenantShardTimelineId>,
+    with_orphans: HashSet<TenantShardTimelineId>,
     indices_by_version: HashMap<usize, usize>,
 
     layer_count: MinMaxHisto,
@@ -88,7 +90,7 @@ impl MetadataSummary {
             count: 0,
             with_errors: HashSet::new(),
             with_warnings: HashSet::new(),
-            with_garbage: HashSet::new(),
+            with_orphans: HashSet::new(),
             indices_by_version: HashMap::new(),
             layer_count: MinMaxHisto::new(),
             timeline_size_bytes: MinMaxHisto::new(),
@@ -132,7 +134,7 @@ impl MetadataSummary {
         }
     }
 
-    fn update_analysis(&mut self, id: &TenantTimelineId, analysis: &TimelineAnalysis) {
+    fn update_analysis(&mut self, id: &TenantShardTimelineId, analysis: &TimelineAnalysis) {
         if !analysis.errors.is_empty() {
             self.with_errors.insert(*id);
         }
@@ -142,6 +144,10 @@ impl MetadataSummary {
         }
     }
 
+    fn notify_timeline_orphan(&mut self, ttid: &TenantShardTimelineId) {
+        self.with_orphans.insert(*ttid);
+    }
+
     /// Long-form output for printing at end of a scan
     pub fn summary_string(&self) -> String {
         let version_summary: String = itertools::join(
@@ -155,7 +161,7 @@ impl MetadataSummary {
             "Timelines: {0}
 With errors: {1}
 With warnings: {2}
-With garbage: {3}
+With orphan layers: {3}
 Index versions: {version_summary}
 Timeline size bytes: {4}
 Layer size bytes: {5}
@@ -164,7 +170,7 @@ Timeline layer count: {6}
             self.count,
             self.with_errors.len(),
             self.with_warnings.len(),
-            self.with_garbage.len(),
+            self.with_orphans.len(),
             self.timeline_size_bytes.oneline(),
             self.layer_size_bytes.oneline(),
             self.layer_count.oneline(),
@@ -174,6 +180,10 @@ Timeline layer count: {6}
     pub fn is_fatal(&self) -> bool {
         !self.with_errors.is_empty()
     }
+
+    pub fn is_empty(&self) -> bool {
+        self.count == 0
+    }
 }
 
 /// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics.
@@ -188,31 +198,131 @@ pub async fn scan_metadata(bucket_config: BucketConfig) -> anyhow::Result<Metada
 
     // Generate a stream of TenantTimelineId
     let timelines = tenants.map_ok(|t| stream_tenant_timelines(&s3_client, &target, t));
-    let timelines = timelines.try_buffer_unordered(CONCURRENCY);
+    let timelines = timelines.try_buffered(CONCURRENCY);
     let timelines = timelines.try_flatten();
 
     // Generate a stream of S3TimelineBlobData
     async fn report_on_timeline(
         s3_client: &Client,
         target: &RootTarget,
-        ttid: TenantTimelineId,
-    ) -> anyhow::Result<(TenantTimelineId, S3TimelineBlobData)> {
+        ttid: TenantShardTimelineId,
+    ) -> anyhow::Result<(TenantShardTimelineId, S3TimelineBlobData)> {
         let data = list_timeline_blobs(s3_client, ttid, target).await?;
         Ok((ttid, data))
     }
     let timelines = timelines.map_ok(|ttid| report_on_timeline(&s3_client, &target, ttid));
-    let timelines = timelines.try_buffer_unordered(CONCURRENCY);
+    let timelines = timelines.try_buffered(CONCURRENCY);
 
+    // We must gather all the TenantShardTimelineId->S3TimelineBlobData for each tenant, because different
+    // shards in the same tenant might refer to one anothers' keys if a shard split has happened.
+
+    let mut tenant_id = None;
+    let mut tenant_objects = TenantObjectListing::default();
+    let mut tenant_timeline_results = Vec::new();
+
+    fn analyze_tenant(
+        tenant_id: TenantId,
+        summary: &mut MetadataSummary,
+        mut tenant_objects: TenantObjectListing,
+        timelines: Vec<(TenantShardTimelineId, S3TimelineBlobData)>,
+    ) {
+        let mut timeline_generations = HashMap::new();
+        for (ttid, data) in timelines {
+            // Stash the generation of each timeline, for later use identifying orphan layers
+            if let BlobDataParseResult::Parsed {
+                index_part: _index_part,
+                index_part_generation,
+                s3_layers: _s3_layers,
+            } = &data.blob_data
+            {
+                timeline_generations.insert(ttid, *index_part_generation);
+            }
+
+            // Apply checks to this timeline shard's metadata, and in the process update `tenant_objects`
+            // reference counts for layers across the tenant.
+            let analysis =
+                branch_cleanup_and_check_errors(&ttid, &mut tenant_objects, None, None, Some(data));
+            summary.update_analysis(&ttid, &analysis);
+        }
+
+        // Identifying orphan layers must be done on a tenant-wide basis, because individual
+        // shards' layers may be referenced by other shards.
+        //
+        // Orphan layers are not a corruption, and not an indication of a problem.  They are just
+        // consuming some space in remote storage, and may be cleaned up at leisure.
+        for (shard_index, timeline_id, layer_file, generation) in tenant_objects.get_orphans() {
+            let ttid = TenantShardTimelineId {
+                tenant_shard_id: TenantShardId {
+                    tenant_id,
+                    shard_count: shard_index.shard_count,
+                    shard_number: shard_index.shard_number,
+                },
+                timeline_id,
+            };
+
+            if let Some(timeline_generation) = timeline_generations.get(&ttid) {
+                if &generation >= timeline_generation {
+                    // Candidate orphan layer is in the current or future generation relative
+                    // to the index we read for this timeline shard, so its absence from the index
+                    // doesn't make it an orphan: more likely, it is a case where the layer was
+                    // uploaded, but the index referencing the layer wasn't written yet.
+                    continue;
+                }
+            }
+
+            let orphan_path = remote_layer_path(
+                &tenant_id,
+                &timeline_id,
+                shard_index,
+                &layer_file,
+                generation,
+            );
+
+            tracing::info!("Orphan layer detected: {orphan_path}");
+
+            summary.notify_timeline_orphan(&ttid);
+        }
+    }
+
+    // Iterate through  all the timeline results.  These are in key-order, so
+    // all results for the same tenant will be adjacent.  We accumulate these,
+    // and then call `analyze_tenant` to flush, when we see the next tenant ID.
     let mut summary = MetadataSummary::new();
     pin_mut!(timelines);
     while let Some(i) = timelines.next().await {
         let (ttid, data) = i?;
         summary.update_data(&data);
 
-        let analysis =
-            branch_cleanup_and_check_errors(&ttid, &target, None, None, Some(data)).await;
+        match tenant_id {
+            None => tenant_id = Some(ttid.tenant_shard_id.tenant_id),
+            Some(prev_tenant_id) => {
+                if prev_tenant_id != ttid.tenant_shard_id.tenant_id {
+                    let tenant_objects = std::mem::take(&mut tenant_objects);
+                    let timelines = std::mem::take(&mut tenant_timeline_results);
+                    analyze_tenant(prev_tenant_id, &mut summary, tenant_objects, timelines);
+                    tenant_id = Some(ttid.tenant_shard_id.tenant_id);
+                }
+            }
+        }
 
-        summary.update_analysis(&ttid, &analysis);
+        if let BlobDataParseResult::Parsed {
+            index_part: _index_part,
+            index_part_generation: _index_part_generation,
+            s3_layers,
+        } = &data.blob_data
+        {
+            tenant_objects.push(ttid, s3_layers.clone());
+        }
+        tenant_timeline_results.push((ttid, data));
+    }
+
+    if !tenant_timeline_results.is_empty() {
+        analyze_tenant(
+            tenant_id.expect("Must be set if results are present"),
+            &mut summary,
+            tenant_objects,
+            tenant_timeline_results,
+        );
     }
 
     Ok(summary)
diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs
index d5333abae6..761541168c 100644
--- a/safekeeper/src/handler.rs
+++ b/safekeeper/src/handler.rs
@@ -11,7 +11,7 @@ use tracing::{debug, info, info_span, Instrument};
 use crate::auth::check_permission;
 use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage};
 
-use crate::metrics::{TrafficMetrics, PG_QUERIES_FINISHED, PG_QUERIES_RECEIVED};
+use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE};
 use crate::safekeeper::Term;
 use crate::timeline::TimelineError;
 use crate::wal_service::ConnectionId;
@@ -210,10 +210,7 @@ impl<IO: AsyncRead + AsyncWrite + Unpin + Send> postgres_backend::Handler<IO>
         let cmd = parse_cmd(query_string)?;
         let cmd_str = cmd_to_string(&cmd);
 
-        PG_QUERIES_RECEIVED.with_label_values(&[cmd_str]).inc();
-        scopeguard::defer! {
-            PG_QUERIES_FINISHED.with_label_values(&[cmd_str]).inc();
-        }
+        let _guard = PG_QUERIES_GAUGE.with_label_values(&[cmd_str]).guard();
 
         info!("got query {:?}", query_string);
 
diff --git a/safekeeper/src/metrics.rs b/safekeeper/src/metrics.rs
index 0711beb290..11a3f48922 100644
--- a/safekeeper/src/metrics.rs
+++ b/safekeeper/src/metrics.rs
@@ -11,7 +11,8 @@ use futures::Future;
 use metrics::{
     core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts},
     proto::MetricFamily,
-    register_int_counter, register_int_counter_vec, Gauge, IntCounter, IntCounterVec, IntGaugeVec,
+    register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, Gauge,
+    IntCounter, IntCounterPairVec, IntCounterVec, IntGaugeVec,
 };
 use once_cell::sync::Lazy;
 
@@ -89,16 +90,10 @@ pub static BROKER_PULLED_UPDATES: Lazy<IntCounterVec> = Lazy::new(|| {
     )
     .expect("Failed to register safekeeper_broker_pulled_updates_total counter")
 });
-pub static PG_QUERIES_RECEIVED: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
+pub static PG_QUERIES_GAUGE: Lazy<IntCounterPairVec> = Lazy::new(|| {
+    register_int_counter_pair_vec!(
         "safekeeper_pg_queries_received_total",
         "Number of queries received through pg protocol",
-        &["query"]
-    )
-    .expect("Failed to register safekeeper_pg_queries_received_total counter")
-});
-pub static PG_QUERIES_FINISHED: Lazy<IntCounterVec> = Lazy::new(|| {
-    register_int_counter_vec!(
         "safekeeper_pg_queries_finished_total",
         "Number of queries finished through pg protocol",
         &["query"]
diff --git a/scripts/sk_collect_dumps/.gitignore b/scripts/sk_collect_dumps/.gitignore
index d9d4d0296a..cdf99aefd7 100644
--- a/scripts/sk_collect_dumps/.gitignore
+++ b/scripts/sk_collect_dumps/.gitignore
@@ -1,2 +1,4 @@
 result
 *.json
+hosts
+poetry.lock
diff --git a/scripts/sk_collect_dumps/ansible.cfg b/scripts/sk_collect_dumps/ansible.cfg
new file mode 100644
index 0000000000..150986ab79
--- /dev/null
+++ b/scripts/sk_collect_dumps/ansible.cfg
@@ -0,0 +1,11 @@
+[defaults]
+host_key_checking = False
+inventory=./hosts
+remote_tmp=/tmp
+remote_user=developer
+callbacks_enabled = profile_tasks
+
+[ssh_connection]
+scp_if_ssh = True
+ssh_args = -F ./ssh.cfg
+pipelining = True
diff --git a/scripts/sk_collect_dumps/pyproject.toml b/scripts/sk_collect_dumps/pyproject.toml
new file mode 100644
index 0000000000..c6f6adafe2
--- /dev/null
+++ b/scripts/sk_collect_dumps/pyproject.toml
@@ -0,0 +1,16 @@
+[tool.poetry]
+name = "sk-collect-dumps"
+version = "0.1.0"
+description = ""
+authors = ["Arseny Sher <sher-ars@yandex.ru>"]
+readme = "README.md"
+packages = [{include = "sk_collect_dumps"}]
+
+[tool.poetry.dependencies]
+python = "^3.11"
+ansible = "^9.1.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/scripts/sk_collect_dumps/readme.md b/scripts/sk_collect_dumps/readme.md
index 52b73e9495..7494a6cb78 100644
--- a/scripts/sk_collect_dumps/readme.md
+++ b/scripts/sk_collect_dumps/readme.md
@@ -1,25 +1,43 @@
 # Collect /v1/debug_dump from all safekeeper nodes
 
-1. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
-2. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
-
-## How to use ansible (staging)
-
+3. Issue admin token (add/remove .stage from url for staging/prod and setting proper API key):
 ```
-AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
+# staging:
+AUTH_TOKEN=$(curl https://console.stage.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_STAGING_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
+# prod:
+AUTH_TOKEN=$(curl https://console.neon.tech/regions/console/api/v1/admin/issue_token -H "Accept: application/json" -H "Content-Type: application/json" -H "Authorization: Bearer $NEON_PROD_KEY" -X POST -d '{"ttl_seconds": 43200, "scope": "safekeeperdata"}' 2>/dev/null | jq --raw-output '.jwt')
+# check
+echo $AUTH_TOKEN
+```
+2. Run ansible playbooks to collect .json dumps from all safekeepers and store them in `./result` directory.
 
-AWS_DEFAULT_PROFILE=dev ansible-playbook -i ../../.github/ansible/staging.eu-west-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
+There are two ways to do that, with ssm or tsh. ssm:
+```
+# in aws repo, cd .github/ansible and run e.g. (adjusting profile and region in vars and limit):
+AWS_DEFAULT_PROFILE=dev ansible-playbook -i inventory_aws_ec2.yaml -i staging.us-east-2.vars.yaml -e @ssm_config -l 'safekeeper:&us_east_2' -e "auth_token=${AUTH_TOKEN}" ~/neon/neon/scripts/sk_collect_dumps/remote.yaml
+```
+It will put the results to .results directory *near the playbook*.
+
+tsh:
+
+Update the inventory, if needed, selecting .build/.tech and optionally region:
+```
+rm -f hosts && echo '[safekeeper]' >> hosts
+# staging:
+tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.build" | grep us-east-2 >> hosts
+# prod:
+tsh ls | awk '{print $1}' | grep safekeeper | grep "neon.tech" | grep us-east-2 >> hosts
 ```
 
-## How to use ansible (prod)
-
+Test ansible connection:
 ```
-AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-west-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
-
-AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.us-east-2.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
-
-AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.eu-central-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
-
-AWS_DEFAULT_PROFILE=prod ansible-playbook -i ../../.github/ansible/prod.ap-southeast-1.hosts.yaml -e @../../.github/ansible/ssm_config remote.yaml
+ansible all -m ping -v
 ```
 
+Download the dumps:
+```
+mkdir -p result && rm -f result/*
+ansible-playbook -e "auth_token=${AUTH_TOKEN}" remote.yaml
+```
+
+3. Run `DB_CONNSTR=... ./upload.sh prod_feb30` to upload dumps to `prod_feb30` table in specified postgres database.
diff --git a/scripts/sk_collect_dumps/remote.yaml b/scripts/sk_collect_dumps/remote.yaml
index 29ce83efde..f214d0ae2c 100644
--- a/scripts/sk_collect_dumps/remote.yaml
+++ b/scripts/sk_collect_dumps/remote.yaml
@@ -1,18 +1,37 @@
 - name: Fetch state dumps from safekeepers
-  hosts: safekeepers
+  hosts: safekeeper
   gather_facts: False
-  remote_user: "{{ remote_user }}"
     
   tasks:
-    - name: Download file
+    - name: Dump file
       get_url:
         url: "http://{{ inventory_hostname }}:7676/v1/debug_dump?dump_all=true&dump_disk_content=false"
-        dest: "/tmp/{{ inventory_hostname }}.json"
+        dest: "/tmp/{{ inventory_hostname }}-dump.json"
+        headers:
+          Authorization: "Bearer {{ auth_token }}"
 
-    - name: Fetch file from remote hosts
+    - name: install rsync
+      ansible.builtin.apt:
+        name: rsync
+        update_cache: yes
+      become: yes
+      ignore_errors: true # it can be already installed and we don't always have sudo
+
+    - name: Fetch file from remote hosts (works only with ssm)
       fetch:
-        src: "/tmp/{{ inventory_hostname }}.json"
-        dest: "./result/{{ inventory_hostname }}.json"
+        src: "/tmp/{{ inventory_hostname }}-dump.json"
+        dest: "./result/{{ inventory_hostname }}-dump.json"
         flat: yes
         fail_on_missing: no
+      when: ansible_connection == "aws_ssm"
 
+    # xxx not sure how to make ansible 'synchronize' work with tsh
+    - name: Fetch file from remote hosts
+      shell: rsync -e 'tsh ssh' -azvP "developer@{{ inventory_hostname }}:/tmp/{{ inventory_hostname }}-dump.json"  "./result/{{ inventory_hostname }}-dump.json"
+      delegate_to: localhost
+      when: ansible_connection != "aws_ssm"
+
+    - name: remove remote dumps
+      ansible.builtin.file:
+        path: "/tmp/{{ inventory_hostname }}-dump.json"
+        state: absent
diff --git a/scripts/sk_collect_dumps/ssh.cfg b/scripts/sk_collect_dumps/ssh.cfg
new file mode 100644
index 0000000000..827c5d9286
--- /dev/null
+++ b/scripts/sk_collect_dumps/ssh.cfg
@@ -0,0 +1,13 @@
+# Begin generated Teleport configuration for teleport.aws.neon.tech by tsh
+
+# Common flags for all teleport.aws.neon.tech hosts
+Host *
+    HostKeyAlgorithms rsa-sha2-512-cert-v01@openssh.com,rsa-sha2-256-cert-v01@openssh.com,ssh-rsa-cert-v01@openssh.com
+
+# Flags for all teleport.aws.neon.tech hosts except the proxy
+Host * !teleport.aws.neon.tech
+    Port 3022
+    ProxyCommand "/usr/local/bin/tsh" proxy ssh --cluster=teleport.aws.neon.tech --proxy=teleport.aws.neon.tech:443 %r@%h:%p
+    User developer
+
+# End generated Teleport configuration
\ No newline at end of file
diff --git a/scripts/sk_collect_dumps/upload.sh b/scripts/sk_collect_dumps/upload.sh
index 2e54ecba1c..5189883fcb 100755
--- a/scripts/sk_collect_dumps/upload.sh
+++ b/scripts/sk_collect_dumps/upload.sh
@@ -31,22 +31,22 @@ SELECT
   (data->>'tenant_id') AS tenant_id,
   (data->>'timeline_id') AS timeline_id,
   (data->'memory'->>'active')::bool AS active,
-  (data->'memory'->>'flush_lsn')::bigint AS flush_lsn,
-  (data->'memory'->'mem_state'->>'backup_lsn')::bigint AS backup_lsn,
-  (data->'memory'->'mem_state'->>'commit_lsn')::bigint AS commit_lsn,
-  (data->'memory'->'mem_state'->>'peer_horizon_lsn')::bigint AS peer_horizon_lsn,
-  (data->'memory'->'mem_state'->>'remote_consistent_lsn')::bigint AS remote_consistent_lsn,
-  (data->'memory'->>'write_lsn')::bigint AS write_lsn,
+  (data->'memory'->>'flush_lsn')::pg_lsn AS flush_lsn,
+  (data->'memory'->'mem_state'->>'backup_lsn')::pg_lsn AS backup_lsn,
+  (data->'memory'->'mem_state'->>'commit_lsn')::pg_lsn AS commit_lsn,
+  (data->'memory'->'mem_state'->>'peer_horizon_lsn')::pg_lsn AS peer_horizon_lsn,
+  (data->'memory'->'mem_state'->>'remote_consistent_lsn')::pg_lsn AS remote_consistent_lsn,
+  (data->'memory'->>'write_lsn')::pg_lsn AS write_lsn,
   (data->'memory'->>'num_computes')::bigint AS num_computes,
-  (data->'memory'->>'epoch_start_lsn')::bigint AS epoch_start_lsn,
+  (data->'memory'->>'epoch_start_lsn')::pg_lsn AS epoch_start_lsn,
   (data->'memory'->>'last_removed_segno')::bigint AS last_removed_segno,
   (data->'memory'->>'is_cancelled')::bool AS is_cancelled,
-  (data->'control_file'->>'backup_lsn')::bigint AS disk_backup_lsn,
-  (data->'control_file'->>'commit_lsn')::bigint AS disk_commit_lsn,
+  (data->'control_file'->>'backup_lsn')::pg_lsn AS disk_backup_lsn,
+  (data->'control_file'->>'commit_lsn')::pg_lsn AS disk_commit_lsn,
   (data->'control_file'->'acceptor_state'->>'term')::bigint AS disk_term,
-  (data->'control_file'->>'local_start_lsn')::bigint AS local_start_lsn,
-  (data->'control_file'->>'peer_horizon_lsn')::bigint AS disk_peer_horizon_lsn,
-  (data->'control_file'->>'timeline_start_lsn')::bigint AS timeline_start_lsn,
-  (data->'control_file'->>'remote_consistent_lsn')::bigint AS disk_remote_consistent_lsn
+  (data->'control_file'->>'local_start_lsn')::pg_lsn AS local_start_lsn,
+  (data->'control_file'->>'peer_horizon_lsn')::pg_lsn AS disk_peer_horizon_lsn,
+  (data->'control_file'->>'timeline_start_lsn')::pg_lsn AS timeline_start_lsn,
+  (data->'control_file'->>'remote_consistent_lsn')::pg_lsn AS disk_remote_consistent_lsn
 FROM tmp_json
 EOF
diff --git a/storage_broker/benches/rps.rs b/storage_broker/benches/rps.rs
index a0c8e1f749..d66cbefa45 100644
--- a/storage_broker/benches/rps.rs
+++ b/storage_broker/benches/rps.rs
@@ -3,9 +3,12 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use clap::Parser;
-use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey;
-use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
-use storage_broker::proto::{SafekeeperTimelineInfo, SubscribeSafekeeperInfoRequest};
+
+use storage_broker::proto::SafekeeperTimelineInfo;
+use storage_broker::proto::{
+    FilterTenantTimelineId, MessageType, SubscribeByFilterRequest,
+    TenantTimelineId as ProtoTenantTimelineId, TypeSubscription, TypedMessage,
+};
 
 use storage_broker::{BrokerClientChannel, DEFAULT_ENDPOINT};
 use tokio::time;
@@ -91,15 +94,23 @@ async fn subscribe(client: Option<BrokerClientChannel>, counter: Arc<AtomicU64>,
         None => storage_broker::connect(DEFAULT_ENDPOINT, Duration::from_secs(5)).unwrap(),
     };
 
-    let key = SubscriptionKey::TenantTimelineId(ProtoTenantTimelineId {
+    let ttid = ProtoTenantTimelineId {
         tenant_id: vec![0xFF; 16],
         timeline_id: tli_from_u64(i),
-    });
-    let request = SubscribeSafekeeperInfoRequest {
-        subscription_key: Some(key),
     };
-    let mut stream = client
-        .subscribe_safekeeper_info(request)
+
+    let request = SubscribeByFilterRequest {
+        types: vec![TypeSubscription {
+            r#type: MessageType::SafekeeperTimelineInfo.into(),
+        }],
+        tenant_timeline_id: Some(FilterTenantTimelineId {
+            enabled: true,
+            tenant_timeline_id: Some(ttid),
+        }),
+    };
+
+    let mut stream: tonic::Streaming<TypedMessage> = client
+        .subscribe_by_filter(request)
         .await
         .unwrap()
         .into_inner();
diff --git a/storage_broker/proto/broker.proto b/storage_broker/proto/broker.proto
index aa9d62a29f..7d1b63d23f 100644
--- a/storage_broker/proto/broker.proto
+++ b/storage_broker/proto/broker.proto
@@ -10,6 +10,12 @@ service BrokerService {
 
     // Publish safekeeper updates.
     rpc PublishSafekeeperInfo(stream SafekeeperTimelineInfo) returns (google.protobuf.Empty) {};
+
+    // Subscribe to all messages, limited by a filter.
+    rpc SubscribeByFilter(SubscribeByFilterRequest) returns (stream TypedMessage) {};
+
+    // Publish one message.
+    rpc PublishOne(TypedMessage) returns (google.protobuf.Empty) {};
 }
 
 message SubscribeSafekeeperInfoRequest {
@@ -48,3 +54,55 @@ message TenantTimelineId {
     bytes tenant_id = 1;
     bytes timeline_id = 2;
 }
+
+message FilterTenantTimelineId {
+    // If true, only messages related to `tenant_timeline_id` will be emitted.
+    // Otherwise, messages for all timelines will be emitted.
+    bool enabled = 1;
+    TenantTimelineId tenant_timeline_id = 2;
+}
+
+message TypeSubscription {
+    MessageType type = 1;
+}
+
+message SubscribeByFilterRequest {
+    // Subscription will emit messages only of the specified types. You need to specify
+    // at least one type to receive any messages.
+    repeated TypeSubscription types = 1;
+
+    // If set and enabled, subscription will emit messages only for the specified tenant/timeline.
+    optional FilterTenantTimelineId tenant_timeline_id = 2;
+}
+
+enum MessageType {
+    UNKNOWN = 0;
+    SAFEKEEPER_TIMELINE_INFO = 2;
+    SAFEKEEPER_DISCOVERY_REQUEST = 3;
+    SAFEKEEPER_DISCOVERY_RESPONSE = 4;
+}
+
+// A message with a type.
+message TypedMessage {
+    MessageType type = 1;
+
+    optional SafekeeperTimelineInfo safekeeper_timeline_info = 2;
+    optional SafekeeperDiscoveryRequest safekeeper_discovery_request = 3;
+    optional SafekeeperDiscoveryResponse safekeeper_discovery_response = 4;
+}
+
+message SafekeeperDiscoveryRequest {
+    TenantTimelineId tenant_timeline_id = 1;
+}
+
+// Shorter version of SafekeeperTimelineInfo, contains only necessary fields.
+message SafekeeperDiscoveryResponse {
+    uint64 safekeeper_id = 1;
+    TenantTimelineId tenant_timeline_id = 2;
+    // WAL available to download.
+    uint64 commit_lsn = 3;
+    // A connection string to use for WAL downloading.
+    string safekeeper_connstr = 4;
+    // Availability zone of a safekeeper.
+    optional string availability_zone = 5;
+}
diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs
index 9f81ac6cac..4e5f8ed724 100644
--- a/storage_broker/src/bin/storage_broker.rs
+++ b/storage_broker/src/bin/storage_broker.rs
@@ -35,10 +35,16 @@ use tracing::*;
 use utils::signals::ShutdownSignals;
 
 use metrics::{Encoder, TextEncoder};
-use storage_broker::metrics::{NUM_PUBS, NUM_SUBS_ALL, NUM_SUBS_TIMELINE};
+use storage_broker::metrics::{
+    BROADCASTED_MESSAGES_TOTAL, BROADCAST_DROPPED_MESSAGES_TOTAL, NUM_PUBS, NUM_SUBS_ALL,
+    NUM_SUBS_TIMELINE, PROCESSED_MESSAGES_TOTAL, PUBLISHED_ONEOFF_MESSAGES_TOTAL,
+};
 use storage_broker::proto::broker_service_server::{BrokerService, BrokerServiceServer};
 use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
-use storage_broker::proto::{SafekeeperTimelineInfo, SubscribeSafekeeperInfoRequest};
+use storage_broker::proto::{
+    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,
+    SafekeeperTimelineInfo, SubscribeByFilterRequest, SubscribeSafekeeperInfoRequest, TypedMessage,
+};
 use storage_broker::{
     parse_proto_ttid, EitherBody, DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_LISTEN_ADDR,
 };
@@ -73,8 +79,103 @@ struct Args {
     log_format: String,
 }
 
-type PubId = u64; // id of publisher for registering in maps
-type SubId = u64; // id of subscriber for registering in maps
+/// Id of publisher for registering in maps
+type PubId = u64;
+
+/// Id of subscriber for registering in maps
+type SubId = u64;
+
+/// Single enum type for all messages.
+#[derive(Clone, Debug, PartialEq)]
+#[allow(clippy::enum_variant_names)]
+enum Message {
+    SafekeeperTimelineInfo(SafekeeperTimelineInfo),
+    SafekeeperDiscoveryRequest(SafekeeperDiscoveryRequest),
+    SafekeeperDiscoveryResponse(SafekeeperDiscoveryResponse),
+}
+
+impl Message {
+    /// Convert proto message to internal message.
+    pub fn from(proto_msg: TypedMessage) -> Result<Self, Status> {
+        match proto_msg.r#type() {
+            MessageType::SafekeeperTimelineInfo => Ok(Message::SafekeeperTimelineInfo(
+                proto_msg.safekeeper_timeline_info.ok_or_else(|| {
+                    Status::new(Code::InvalidArgument, "missing safekeeper_timeline_info")
+                })?,
+            )),
+            MessageType::SafekeeperDiscoveryRequest => Ok(Message::SafekeeperDiscoveryRequest(
+                proto_msg.safekeeper_discovery_request.ok_or_else(|| {
+                    Status::new(
+                        Code::InvalidArgument,
+                        "missing safekeeper_discovery_request",
+                    )
+                })?,
+            )),
+            MessageType::SafekeeperDiscoveryResponse => Ok(Message::SafekeeperDiscoveryResponse(
+                proto_msg.safekeeper_discovery_response.ok_or_else(|| {
+                    Status::new(
+                        Code::InvalidArgument,
+                        "missing safekeeper_discovery_response",
+                    )
+                })?,
+            )),
+            MessageType::Unknown => Err(Status::new(
+                Code::InvalidArgument,
+                format!("invalid message type: {:?}", proto_msg.r#type),
+            )),
+        }
+    }
+
+    /// Get the tenant_timeline_id from the message.
+    pub fn tenant_timeline_id(&self) -> Result<Option<TenantTimelineId>, Status> {
+        match self {
+            Message::SafekeeperTimelineInfo(msg) => Ok(msg
+                .tenant_timeline_id
+                .as_ref()
+                .map(parse_proto_ttid)
+                .transpose()?),
+            Message::SafekeeperDiscoveryRequest(msg) => Ok(msg
+                .tenant_timeline_id
+                .as_ref()
+                .map(parse_proto_ttid)
+                .transpose()?),
+            Message::SafekeeperDiscoveryResponse(msg) => Ok(msg
+                .tenant_timeline_id
+                .as_ref()
+                .map(parse_proto_ttid)
+                .transpose()?),
+        }
+    }
+
+    /// Convert internal message to the protobuf struct.
+    pub fn as_typed_message(&self) -> TypedMessage {
+        let mut res = TypedMessage {
+            r#type: self.message_type() as i32,
+            ..Default::default()
+        };
+        match self {
+            Message::SafekeeperTimelineInfo(msg) => {
+                res.safekeeper_timeline_info = Some(msg.clone())
+            }
+            Message::SafekeeperDiscoveryRequest(msg) => {
+                res.safekeeper_discovery_request = Some(msg.clone())
+            }
+            Message::SafekeeperDiscoveryResponse(msg) => {
+                res.safekeeper_discovery_response = Some(msg.clone())
+            }
+        }
+        res
+    }
+
+    /// Get the message type.
+    pub fn message_type(&self) -> MessageType {
+        match self {
+            Message::SafekeeperTimelineInfo(_) => MessageType::SafekeeperTimelineInfo,
+            Message::SafekeeperDiscoveryRequest(_) => MessageType::SafekeeperDiscoveryRequest,
+            Message::SafekeeperDiscoveryResponse(_) => MessageType::SafekeeperDiscoveryResponse,
+        }
+    }
+}
 
 #[derive(Copy, Clone, Debug)]
 enum SubscriptionKey {
@@ -83,7 +184,7 @@ enum SubscriptionKey {
 }
 
 impl SubscriptionKey {
-    // Parse protobuf subkey (protobuf doesn't have fixed size bytes, we get vectors).
+    /// Parse protobuf subkey (protobuf doesn't have fixed size bytes, we get vectors).
     pub fn from_proto_subscription_key(key: ProtoSubscriptionKey) -> Result<Self, Status> {
         match key {
             ProtoSubscriptionKey::All(_) => Ok(SubscriptionKey::All),
@@ -92,14 +193,29 @@ impl SubscriptionKey {
             }
         }
     }
+
+    /// Parse from FilterTenantTimelineId
+    pub fn from_proto_filter_tenant_timeline_id(
+        f: &FilterTenantTimelineId,
+    ) -> Result<Self, Status> {
+        if !f.enabled {
+            return Ok(SubscriptionKey::All);
+        }
+
+        let ttid =
+            parse_proto_ttid(f.tenant_timeline_id.as_ref().ok_or_else(|| {
+                Status::new(Code::InvalidArgument, "missing tenant_timeline_id")
+            })?)?;
+        Ok(SubscriptionKey::Timeline(ttid))
+    }
 }
 
-// Channel to timeline subscribers.
+/// Channel to timeline subscribers.
 struct ChanToTimelineSub {
-    chan: broadcast::Sender<SafekeeperTimelineInfo>,
-    // Tracked separately to know when delete the shmem entry. receiver_count()
-    // is unhandy for that as unregistering and dropping the receiver side
-    // happens at different moments.
+    chan: broadcast::Sender<Message>,
+    /// Tracked separately to know when delete the shmem entry. receiver_count()
+    /// is unhandy for that as unregistering and dropping the receiver side
+    /// happens at different moments.
     num_subscribers: u64,
 }
 
@@ -110,7 +226,7 @@ struct SharedState {
     num_subs_to_timelines: i64,
     chans_to_timeline_subs: HashMap<TenantTimelineId, ChanToTimelineSub>,
     num_subs_to_all: i64,
-    chan_to_all_subs: broadcast::Sender<SafekeeperTimelineInfo>,
+    chan_to_all_subs: broadcast::Sender<Message>,
 }
 
 impl SharedState {
@@ -146,7 +262,7 @@ impl SharedState {
         &mut self,
         sub_key: SubscriptionKey,
         timeline_chan_size: usize,
-    ) -> (SubId, broadcast::Receiver<SafekeeperTimelineInfo>) {
+    ) -> (SubId, broadcast::Receiver<Message>) {
         let sub_id = self.next_sub_id;
         self.next_sub_id += 1;
         let sub_rx = match sub_key {
@@ -262,6 +378,29 @@ impl Registry {
             subscriber.id, subscriber.key, subscriber.remote_addr
         );
     }
+
+    /// Send msg to relevant subscribers.
+    pub fn send_msg(&self, msg: &Message) -> Result<(), Status> {
+        PROCESSED_MESSAGES_TOTAL.inc();
+
+        // send message to subscribers for everything
+        let shared_state = self.shared_state.read();
+        // Err means there is no subscribers, it is fine.
+        shared_state.chan_to_all_subs.send(msg.clone()).ok();
+
+        // send message to per timeline subscribers, if there is ttid
+        let ttid = msg.tenant_timeline_id()?;
+        if let Some(ttid) = ttid {
+            if let Some(subs) = shared_state.chans_to_timeline_subs.get(&ttid) {
+                // Err can't happen here, as tx is destroyed only after removing
+                // from the map the last subscriber along with tx.
+                subs.chan
+                    .send(msg.clone())
+                    .expect("rx is still in the map with zero subscribers");
+            }
+        }
+        Ok(())
+    }
 }
 
 // Private subscriber state.
@@ -269,7 +408,7 @@ struct Subscriber {
     id: SubId,
     key: SubscriptionKey,
     // Subscriber receives messages from publishers here.
-    sub_rx: broadcast::Receiver<SafekeeperTimelineInfo>,
+    sub_rx: broadcast::Receiver<Message>,
     // to unregister itself from shared state in Drop
     registry: Registry,
     // for logging
@@ -291,26 +430,9 @@ struct Publisher {
 }
 
 impl Publisher {
-    // Send msg to relevant subscribers.
-    pub fn send_msg(&mut self, msg: &SafekeeperTimelineInfo) -> Result<(), Status> {
-        // send message to subscribers for everything
-        let shared_state = self.registry.shared_state.read();
-        // Err means there is no subscribers, it is fine.
-        shared_state.chan_to_all_subs.send(msg.clone()).ok();
-
-        // send message to per timeline subscribers
-        let ttid =
-            parse_proto_ttid(msg.tenant_timeline_id.as_ref().ok_or_else(|| {
-                Status::new(Code::InvalidArgument, "missing tenant_timeline_id")
-            })?)?;
-        if let Some(subs) = shared_state.chans_to_timeline_subs.get(&ttid) {
-            // Err can't happen here, as tx is destroyed only after removing
-            // from the map the last subscriber along with tx.
-            subs.chan
-                .send(msg.clone())
-                .expect("rx is still in the map with zero subscribers");
-        }
-        Ok(())
+    /// Send msg to relevant subscribers.
+    pub fn send_msg(&mut self, msg: &Message) -> Result<(), Status> {
+        self.registry.send_msg(msg)
     }
 }
 
@@ -339,7 +461,7 @@ impl BrokerService for Broker {
 
         loop {
             match stream.next().await {
-                Some(Ok(msg)) => publisher.send_msg(&msg)?,
+                Some(Ok(msg)) => publisher.send_msg(&Message::SafekeeperTimelineInfo(msg))?,
                 Some(Err(e)) => return Err(e), // grpc error from the stream
                 None => break,                 // closed stream
             }
@@ -371,8 +493,15 @@ impl BrokerService for Broker {
             let mut missed_msgs: u64 = 0;
             loop {
                 match subscriber.sub_rx.recv().await {
-                    Ok(info) => yield info,
+                    Ok(info) => {
+                        match info {
+                            Message::SafekeeperTimelineInfo(info) => yield info,
+                            _ => {},
+                        }
+                        BROADCASTED_MESSAGES_TOTAL.inc();
+                    },
                     Err(RecvError::Lagged(skipped_msg)) => {
+                        BROADCAST_DROPPED_MESSAGES_TOTAL.inc_by(skipped_msg);
                         missed_msgs += skipped_msg;
                         if (futures::poll!(Box::pin(warn_interval.tick()))).is_ready() {
                             warn!("subscription id={}, key={:?} addr={:?} dropped {} messages, channel is full",
@@ -392,6 +521,78 @@ impl BrokerService for Broker {
             Box::pin(output) as Self::SubscribeSafekeeperInfoStream
         ))
     }
+
+    type SubscribeByFilterStream =
+        Pin<Box<dyn Stream<Item = Result<TypedMessage, Status>> + Send + 'static>>;
+
+    /// Subscribe to all messages, limited by a filter.
+    async fn subscribe_by_filter(
+        &self,
+        request: Request<SubscribeByFilterRequest>,
+    ) -> std::result::Result<Response<Self::SubscribeByFilterStream>, Status> {
+        let remote_addr = request
+            .remote_addr()
+            .expect("TCPConnectInfo inserted by handler");
+        let proto_filter = request.into_inner();
+        let ttid_filter = proto_filter
+            .tenant_timeline_id
+            .as_ref()
+            .ok_or_else(|| Status::new(Code::InvalidArgument, "missing tenant_timeline_id"))?;
+
+        let sub_key = SubscriptionKey::from_proto_filter_tenant_timeline_id(ttid_filter)?;
+        let types_set = proto_filter
+            .types
+            .iter()
+            .map(|t| t.r#type)
+            .collect::<std::collections::HashSet<_>>();
+
+        let mut subscriber = self.registry.register_subscriber(sub_key, remote_addr);
+
+        // transform rx into stream with item = Result, as method result demands
+        let output = async_stream::try_stream! {
+            let mut warn_interval = time::interval(Duration::from_millis(1000));
+            let mut missed_msgs: u64 = 0;
+            loop {
+                match subscriber.sub_rx.recv().await {
+                    Ok(msg) => {
+                        let msg_type = msg.message_type() as i32;
+                        if types_set.contains(&msg_type) {
+                            yield msg.as_typed_message();
+                            BROADCASTED_MESSAGES_TOTAL.inc();
+                        }
+                    },
+                    Err(RecvError::Lagged(skipped_msg)) => {
+                        BROADCAST_DROPPED_MESSAGES_TOTAL.inc_by(skipped_msg);
+                        missed_msgs += skipped_msg;
+                        if (futures::poll!(Box::pin(warn_interval.tick()))).is_ready() {
+                            warn!("subscription id={}, key={:?} addr={:?} dropped {} messages, channel is full",
+                                subscriber.id, subscriber.key, subscriber.remote_addr, missed_msgs);
+                            missed_msgs = 0;
+                        }
+                    }
+                    Err(RecvError::Closed) => {
+                        // can't happen, we never drop the channel while there is a subscriber
+                        Err(Status::new(Code::Internal, "channel unexpectantly closed"))?;
+                    }
+                }
+            }
+        };
+
+        Ok(Response::new(
+            Box::pin(output) as Self::SubscribeByFilterStream
+        ))
+    }
+
+    /// Publish one message.
+    async fn publish_one(
+        &self,
+        request: Request<TypedMessage>,
+    ) -> std::result::Result<Response<()>, Status> {
+        let msg = Message::from(request.into_inner())?;
+        PUBLISHED_ONEOFF_MESSAGES_TOTAL.inc();
+        self.registry.send_msg(&msg)?;
+        Ok(Response::new(()))
+    }
 }
 
 // We serve only metrics and healthcheck through http1.
@@ -515,8 +716,8 @@ mod tests {
     use tokio::sync::broadcast::error::TryRecvError;
     use utils::id::{TenantId, TimelineId};
 
-    fn msg(timeline_id: Vec<u8>) -> SafekeeperTimelineInfo {
-        SafekeeperTimelineInfo {
+    fn msg(timeline_id: Vec<u8>) -> Message {
+        Message::SafekeeperTimelineInfo(SafekeeperTimelineInfo {
             safekeeper_id: 1,
             tenant_timeline_id: Some(ProtoTenantTimelineId {
                 tenant_id: vec![0x00; 16],
@@ -533,7 +734,7 @@ mod tests {
             http_connstr: "neon-1-sk-1.local:7677".to_owned(),
             local_start_lsn: 0,
             availability_zone: None,
-        }
+        })
     }
 
     fn tli_from_u64(i: u64) -> Vec<u8> {
diff --git a/storage_broker/src/metrics.rs b/storage_broker/src/metrics.rs
index f0649d0f68..1fd3dd5ad6 100644
--- a/storage_broker/src/metrics.rs
+++ b/storage_broker/src/metrics.rs
@@ -1,6 +1,6 @@
 //! Broker metrics.
 
-use metrics::{register_int_gauge, IntGauge};
+use metrics::{register_int_counter, register_int_gauge, IntCounter, IntGauge};
 use once_cell::sync::Lazy;
 
 pub static NUM_PUBS: Lazy<IntGauge> = Lazy::new(|| {
@@ -23,3 +23,35 @@ pub static NUM_SUBS_ALL: Lazy<IntGauge> = Lazy::new(|| {
     )
     .expect("Failed to register metric")
 });
+
+pub static PROCESSED_MESSAGES_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "storage_broker_processed_messages_total",
+        "Number of messages received by storage broker, before routing and broadcasting"
+    )
+    .expect("Failed to register metric")
+});
+
+pub static BROADCASTED_MESSAGES_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "storage_broker_broadcasted_messages_total",
+        "Number of messages broadcasted (sent over network) to subscribers"
+    )
+    .expect("Failed to register metric")
+});
+
+pub static BROADCAST_DROPPED_MESSAGES_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "storage_broker_broadcast_dropped_messages_total",
+        "Number of messages dropped due to channel capacity overflow"
+    )
+    .expect("Failed to register metric")
+});
+
+pub static PUBLISHED_ONEOFF_MESSAGES_TOTAL: Lazy<IntCounter> = Lazy::new(|| {
+    register_int_counter!(
+        "storage_broker_published_oneoff_messages_total",
+        "Number of one-off messages sent via PublishOne method"
+    )
+    .expect("Failed to register metric")
+});
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index 75dc0d9f69..bec51ccbd3 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -28,6 +28,7 @@ import jwt
 import psycopg2
 import pytest
 import requests
+import toml
 from _pytest.config import Config
 from _pytest.config.argparsing import Parser
 from _pytest.fixtures import FixtureRequest
@@ -437,7 +438,7 @@ class NeonEnvBuilder:
         # Pageserver remote storage
         self.pageserver_remote_storage = pageserver_remote_storage
         # Safekeepers remote storage
-        self.sk_remote_storage: Optional[RemoteStorage] = None
+        self.safekeepers_remote_storage: Optional[RemoteStorage] = None
 
         self.broker = broker
         self.run_id = run_id
@@ -457,7 +458,6 @@ class NeonEnvBuilder:
         self.preserve_database_files = preserve_database_files
         self.initial_tenant = initial_tenant or TenantId.generate()
         self.initial_timeline = initial_timeline or TimelineId.generate()
-        self.enable_generations = True
         self.scrub_on_exit = False
         self.test_output_dir = test_output_dir
 
@@ -509,6 +509,66 @@ class NeonEnvBuilder:
 
         return env
 
+    def from_repo_dir(
+        self,
+        repo_dir: Path,
+        neon_binpath: Optional[Path] = None,
+        pg_distrib_dir: Optional[Path] = None,
+    ) -> NeonEnv:
+        """
+        A simple method to import data into the current NeonEnvBuilder from a snapshot of a repo dir.
+        """
+
+        # Setting custom `neon_binpath` and `pg_distrib_dir` is useful for compatibility tests
+        self.neon_binpath = neon_binpath or self.neon_binpath
+        self.pg_distrib_dir = pg_distrib_dir or self.pg_distrib_dir
+
+        # Get the initial tenant and timeline from the snapshot config
+        snapshot_config_toml = repo_dir / "config"
+        with snapshot_config_toml.open("r") as f:
+            snapshot_config = toml.load(f)
+
+        self.initial_tenant = TenantId(snapshot_config["default_tenant_id"])
+        self.initial_timeline = TimelineId(
+            dict(snapshot_config["branch_name_mappings"][DEFAULT_BRANCH_NAME])[
+                str(self.initial_tenant)
+            ]
+        )
+        self.env = self.init_configs()
+
+        for ps_dir in repo_dir.glob("pageserver_*"):
+            tenants_from_dir = ps_dir / "tenants"
+            tenants_to_dir = self.repo_dir / ps_dir.name / "tenants"
+
+            log.info(f"Copying pageserver tenants directory {tenants_from_dir} to {tenants_to_dir}")
+            shutil.copytree(tenants_from_dir, tenants_to_dir)
+
+        for sk_from_dir in (repo_dir / "safekeepers").glob("sk*"):
+            sk_to_dir = self.repo_dir / "safekeepers" / sk_from_dir.name
+            log.info(f"Copying safekeeper directory {sk_from_dir} to {sk_to_dir}")
+            sk_to_dir.rmdir()
+            shutil.copytree(sk_from_dir, sk_to_dir, ignore=shutil.ignore_patterns("*.log", "*.pid"))
+
+        shutil.rmtree(self.repo_dir / "local_fs_remote_storage", ignore_errors=True)
+        shutil.copytree(
+            repo_dir / "local_fs_remote_storage", self.repo_dir / "local_fs_remote_storage"
+        )
+
+        if (attachments_json := Path(repo_dir / "attachments.json")).exists():
+            shutil.copyfile(attachments_json, self.repo_dir / attachments_json.name)
+
+        # Update the config with info about tenants and timelines
+        with (self.repo_dir / "config").open("r") as f:
+            config = toml.load(f)
+
+        config["default_tenant_id"] = snapshot_config["default_tenant_id"]
+        config["branch_name_mappings"] = snapshot_config["branch_name_mappings"]
+
+        with (self.repo_dir / "config").open("w") as f:
+            toml.dump(config, f)
+
+        return self.env
+
     def enable_scrub_on_exit(self):
         """
         Call this if you would like the fixture to automatically run
@@ -537,9 +597,11 @@ class NeonEnvBuilder:
         self.pageserver_remote_storage = ret
 
     def enable_safekeeper_remote_storage(self, kind: RemoteStorageKind):
-        assert self.sk_remote_storage is None, "sk_remote_storage already configured"
+        assert (
+            self.safekeepers_remote_storage is None
+        ), "safekeepers_remote_storage already configured"
 
-        self.sk_remote_storage = self._configure_and_create_remote_storage(
+        self.safekeepers_remote_storage = self._configure_and_create_remote_storage(
             kind, RemoteStorageUser.SAFEKEEPER
         )
 
@@ -592,7 +654,7 @@ class NeonEnvBuilder:
                 directory_to_clean.rmdir()
 
     def cleanup_remote_storage(self):
-        for x in [self.pageserver_remote_storage, self.sk_remote_storage]:
+        for x in [self.pageserver_remote_storage, self.safekeepers_remote_storage]:
             if isinstance(x, S3Storage):
                 x.do_cleanup()
 
@@ -617,8 +679,7 @@ class NeonEnvBuilder:
 
                 pageserver.stop(immediate=True)
 
-            if self.env.attachment_service is not None:
-                self.env.attachment_service.stop(immediate=True)
+            self.env.attachment_service.stop(immediate=True)
 
             cleanup_error = None
 
@@ -696,7 +757,7 @@ class NeonEnv:
         self.pageservers: List[NeonPageserver] = []
         self.broker = config.broker
         self.pageserver_remote_storage = config.pageserver_remote_storage
-        self.safekeepers_remote_storage = config.sk_remote_storage
+        self.safekeepers_remote_storage = config.safekeepers_remote_storage
         self.pg_version = config.pg_version
         # Binary path for pageserver, safekeeper, etc
         self.neon_binpath = config.neon_binpath
@@ -712,36 +773,24 @@ class NeonEnv:
         self.initial_tenant = config.initial_tenant
         self.initial_timeline = config.initial_timeline
 
-        if config.enable_generations:
-            attachment_service_port = self.port_distributor.get_port()
-            self.control_plane_api: Optional[str] = f"http://127.0.0.1:{attachment_service_port}"
-            self.attachment_service: Optional[NeonAttachmentService] = NeonAttachmentService(self)
-        else:
-            self.control_plane_api = None
-            self.attachment_service = None
+        attachment_service_port = self.port_distributor.get_port()
+        self.control_plane_api: str = f"http://127.0.0.1:{attachment_service_port}"
+        self.attachment_service: NeonAttachmentService = NeonAttachmentService(self)
 
         self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine
 
         # Create a config file corresponding to the options
-        toml = textwrap.dedent(
-            f"""
-            default_tenant_id = '{config.initial_tenant}'
-        """
-        )
+        cfg: Dict[str, Any] = {
+            "default_tenant_id": str(self.initial_tenant),
+            "broker": {
+                "listen_addr": self.broker.listen_addr(),
+            },
+            "pageservers": [],
+            "safekeepers": [],
+        }
 
         if self.control_plane_api is not None:
-            toml += textwrap.dedent(
-                f"""
-                control_plane_api = '{self.control_plane_api}'
-            """
-            )
-
-        toml += textwrap.dedent(
-            f"""
-            [broker]
-            listen_addr = '{self.broker.listen_addr()}'
-        """
-        )
+            cfg["control_plane_api"] = self.control_plane_api
 
         # Create config for pageserver
         http_auth_type = "NeonJWT" if config.auth_enabled else "Trust"
@@ -754,22 +803,15 @@ class NeonEnv:
                 http=self.port_distributor.get_port(),
             )
 
-            toml += textwrap.dedent(
-                f"""
-                [[pageservers]]
-                id={ps_id}
-                listen_pg_addr = 'localhost:{pageserver_port.pg}'
-                listen_http_addr = 'localhost:{pageserver_port.http}'
-                pg_auth_type = '{pg_auth_type}'
-                http_auth_type = '{http_auth_type}'
-            """
-            )
+            ps_cfg: Dict[str, Any] = {
+                "id": ps_id,
+                "listen_pg_addr": f"localhost:{pageserver_port.pg}",
+                "listen_http_addr": f"localhost:{pageserver_port.http}",
+                "pg_auth_type": pg_auth_type,
+                "http_auth_type": http_auth_type,
+            }
             if self.pageserver_virtual_file_io_engine is not None:
-                toml += textwrap.dedent(
-                    f"""
-                    virtual_file_io_engine = '{self.pageserver_virtual_file_io_engine}'
-                """
-                )
+                ps_cfg["virtual_file_io_engine"] = self.pageserver_virtual_file_io_engine
 
             # Create a corresponding NeonPageserver object
             self.pageservers.append(
@@ -777,9 +819,11 @@ class NeonEnv:
                     self,
                     ps_id,
                     port=pageserver_port,
-                    config_override=config.pageserver_config_override,
+                    config_override=self.pageserver_config_override,
                 )
             )
+            cfg["pageservers"].append(ps_cfg)
+
         # Create config and a Safekeeper object for each safekeeper
         for i in range(1, config.num_safekeepers + 1):
             port = SafekeeperPort(
@@ -788,39 +832,28 @@ class NeonEnv:
                 http=self.port_distributor.get_port(),
             )
             id = config.safekeepers_id_start + i  # assign ids sequentially
-            toml += textwrap.dedent(
-                f"""
-                [[safekeepers]]
-                id = {id}
-                pg_port = {port.pg}
-                pg_tenant_only_port = {port.pg_tenant_only}
-                http_port = {port.http}
-                sync = {'true' if config.safekeepers_enable_fsync else 'false'}"""
-            )
+            sk_cfg: Dict[str, Any] = {
+                "id": id,
+                "pg_port": port.pg,
+                "pg_tenant_only_port": port.pg_tenant_only,
+                "http_port": port.http,
+                "sync": config.safekeepers_enable_fsync,
+            }
             if config.auth_enabled:
-                toml += textwrap.dedent(
-                    """
-                auth_enabled = true
-                """
-                )
-            if config.sk_remote_storage is not None:
-                toml += textwrap.dedent(
-                    f"""
-                remote_storage = "{remote_storage_to_toml_inline_table(config.sk_remote_storage)}"
-                """
-                )
-            safekeeper = Safekeeper(env=self, id=id, port=port)
-            self.safekeepers.append(safekeeper)
+                sk_cfg["auth_enabled"] = True
+            if self.safekeepers_remote_storage is not None:
+                sk_cfg["remote_storage"] = self.safekeepers_remote_storage.to_toml_inline_table()
+            self.safekeepers.append(Safekeeper(env=self, id=id, port=port))
+            cfg["safekeepers"].append(sk_cfg)
 
-        log.info(f"Config: {toml}")
-        self.neon_cli.init(toml)
+        log.info(f"Config: {cfg}")
+        self.neon_cli.init(cfg)
 
     def start(self):
         # Start up broker, pageserver and all safekeepers
         self.broker.try_start()
 
-        if self.attachment_service is not None:
-            self.attachment_service.start()
+        self.attachment_service.start()
 
         for pageserver in self.pageservers:
             pageserver.start()
@@ -1303,10 +1336,10 @@ class NeonCli(AbstractNeonCli):
 
     def init(
         self,
-        config_toml: str,
+        config: Dict[str, Any],
     ) -> "subprocess.CompletedProcess[str]":
         with tempfile.NamedTemporaryFile(mode="w+") as tmp:
-            tmp.write(config_toml)
+            tmp.write(toml.dumps(config))
             tmp.flush()
 
             cmd = ["init", f"--config={tmp.name}", "--pg-version", self.env.pg_version]
@@ -1744,11 +1777,16 @@ class NeonPageserver(PgProtocol):
 
     @property
     def workdir(self) -> Path:
-        return Path(os.path.join(self.env.repo_dir, f"pageserver_{self.id}"))
+        return self.env.repo_dir / f"pageserver_{self.id}"
 
     def assert_no_errors(self):
-        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
-        errors = scan_pageserver_log_for_errors(logfile, self.allowed_errors)
+        logfile = self.workdir / "pageserver.log"
+        if not logfile.exists():
+            log.warning(f"Skipping log check: {logfile} does not exist")
+            return
+
+        with logfile.open("r") as f:
+            errors = scan_pageserver_log_for_errors(f, self.allowed_errors)
 
         for _lineno, error in errors:
             log.info(f"not allowed error: {error.strip()}")
@@ -1772,7 +1810,10 @@ class NeonPageserver(PgProtocol):
 
     def log_contains(self, pattern: str) -> Optional[str]:
         """Check that the pageserver log contains a line that matches the given regex"""
-        logfile = open(os.path.join(self.workdir, "pageserver.log"), "r")
+        logfile = self.workdir / "pageserver.log"
+        if not logfile.exists():
+            log.warning(f"Skipping log check: {logfile} does not exist")
+            return None
 
         contains_re = re.compile(pattern)
 
@@ -1781,14 +1822,11 @@ class NeonPageserver(PgProtocol):
         # no guarantee it is already present in the log file. This hasn't
         # been a problem in practice, our python tests are not fast enough
         # to hit that race condition.
-        while True:
-            line = logfile.readline()
-            if not line:
-                break
-
-            if contains_re.search(line):
-                # found it!
-                return line
+        with logfile.open("r") as f:
+            for line in f:
+                if contains_re.search(line):
+                    # found it!
+                    return line
 
         return None
 
@@ -1801,43 +1839,53 @@ class NeonPageserver(PgProtocol):
         """
         client = self.http_client()
         return client.tenant_attach(
-            tenant_id, config, config_null, generation=self.maybe_get_generation(tenant_id)
+            tenant_id,
+            config,
+            config_null,
+            generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id),
         )
 
     def tenant_detach(self, tenant_id: TenantId):
-        if self.env.attachment_service is not None:
-            self.env.attachment_service.attach_hook_drop(tenant_id)
+        self.env.attachment_service.attach_hook_drop(tenant_id)
 
         client = self.http_client()
         return client.tenant_detach(tenant_id)
 
+    def tenant_location_configure(self, tenant_id: TenantId, config: dict[str, Any], **kwargs):
+        if config["mode"].startswith("Attached") and "generation" not in config:
+            config["generation"] = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
+
+        client = self.http_client()
+        return client.tenant_location_conf(tenant_id, config, **kwargs)
+
+    def read_tenant_location_conf(self, tenant_id: TenantId) -> dict[str, Any]:
+        path = self.tenant_dir(tenant_id) / "config-v1"
+        log.info(f"Reading location conf from {path}")
+        bytes = open(path, "r").read()
+        try:
+            decoded: dict[str, Any] = toml.loads(bytes)
+            return decoded
+        except:
+            log.error(f"Failed to decode LocationConf, raw content ({len(bytes)} bytes): {bytes}")
+            raise
+
     def tenant_create(
         self,
         tenant_id: TenantId,
         conf: Optional[Dict[str, Any]] = None,
         auth_token: Optional[str] = None,
+        generation: Optional[int] = None,
     ) -> TenantId:
+        if generation is None:
+            generation = self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
         client = self.http_client(auth_token=auth_token)
-        return client.tenant_create(
-            tenant_id, conf, generation=self.maybe_get_generation(tenant_id)
-        )
+        return client.tenant_create(tenant_id, conf, generation=generation)
 
     def tenant_load(self, tenant_id: TenantId):
         client = self.http_client()
-        return client.tenant_load(tenant_id, generation=self.maybe_get_generation(tenant_id))
-
-    def maybe_get_generation(self, tenant_id: TenantId):
-        """
-        For tests that would like to use an HTTP client directly instead of using
-        the `tenant_attach` and `tenant_create` helpers here: issue a generation
-        number for a tenant.
-
-        Returns None if the attachment service is not enabled (legacy mode)
-        """
-        if self.env.attachment_service is not None:
-            return self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
-        else:
-            return None
+        return client.tenant_load(
+            tenant_id, generation=self.env.attachment_service.attach_hook_issue(tenant_id, self.id)
+        )
 
 
 def append_pageserver_param_overrides(
@@ -2744,6 +2792,7 @@ class EndpointFactory:
         lsn: Optional[Lsn] = None,
         hot_standby: bool = False,
         config_lines: Optional[List[str]] = None,
+        pageserver_id: Optional[int] = None,
     ) -> Endpoint:
         ep = Endpoint(
             self.env,
@@ -2763,6 +2812,7 @@ class EndpointFactory:
             lsn=lsn,
             hot_standby=hot_standby,
             config_lines=config_lines,
+            pageserver_id=pageserver_id,
         )
 
     def stop_all(self) -> "EndpointFactory":
@@ -2888,7 +2938,7 @@ class Safekeeper:
         tli_dir = self.timeline_dir(tenant_id, timeline_id)
         segments = []
         for _, _, filenames in os.walk(tli_dir):
-            segments.extend([f for f in filenames if f != "safekeeper.control"])
+            segments.extend([f for f in filenames if not f.startswith("safekeeper.control")])
         segments.sort()
         return segments
 
@@ -3109,7 +3159,7 @@ def pytest_addoption(parser: Parser):
 
 
 SMALL_DB_FILE_NAME_REGEX: re.Pattern = re.compile(  # type: ignore[type-arg]
-    r"config|metadata|.+\.(?:toml|pid|json|sql)"
+    r"config|config-v1|heatmap-v1|metadata|.+\.(?:toml|pid|json|sql)"
 )
 
 
@@ -3370,8 +3420,6 @@ def parse_project_git_version_output(s: str) -> str:
 
     The information is generated by utils::project_git_version!
     """
-    import re
-
     res = re.search(r"git(-env)?:([0-9a-fA-F]{8,40})(-\S+)?", s)
     if res and (commit := res.group(2)):
         return commit
diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py
index 5c3ae3ce4b..74c6bddf23 100755
--- a/test_runner/fixtures/pageserver/allowed_errors.py
+++ b/test_runner/fixtures/pageserver/allowed_errors.py
@@ -79,6 +79,9 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = (
     # AWS S3 may emit 500 errors for keys in a DeleteObjects response: we retry these
     # and it is not a failure of our code when it happens.
     ".*DeleteObjects.*We encountered an internal error. Please try again.*",
+    # During shutdown, DownloadError::Cancelled may be logged as an error.  Cleaning this
+    # up is tracked in https://github.com/neondatabase/neon/issues/6096
+    ".*Cancelled, shutting down.*",
 )
 
 
diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py
index 3e75bac424..add6c4288a 100644
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -150,7 +150,7 @@ class PageserverHttpClient(requests.Session):
                 # (this may change in future if we do fault injection of a kind that causes
                 #  requests TCP flows to stick)
                 read=False,
-                backoff_factor=0,
+                backoff_factor=0.2,
                 status_forcelist=[503],
                 allowed_methods=None,
                 remove_headers_on_redirect=[],
@@ -277,6 +277,23 @@ class PageserverHttpClient(requests.Session):
         res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params)
         self.verbose_error(res)
 
+    def tenant_location_conf(
+        self, tenant_id: TenantId, location_conf=dict[str, Any], flush_ms=None
+    ):
+        body = location_conf.copy()
+        body["tenant_id"] = str(tenant_id)
+
+        params = {}
+        if flush_ms is not None:
+            params["flush_ms"] = str(flush_ms)
+
+        res = self.put(
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/location_config",
+            json=body,
+            params=params,
+        )
+        self.verbose_error(res)
+
     def tenant_delete(self, tenant_id: TenantId):
         res = self.delete(f"http://localhost:{self.port}/v1/tenant/{tenant_id}")
         self.verbose_error(res)
@@ -305,6 +322,10 @@ class PageserverHttpClient(requests.Session):
         self.verbose_error(res)
         return TenantConfig.from_json(res.json())
 
+    def tenant_heatmap_upload(self, tenant_id: TenantId):
+        res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/heatmap_upload")
+        self.verbose_error(res)
+
     def set_tenant_config(self, tenant_id: TenantId, config: dict[str, Any]):
         assert "tenant_id" not in config.keys()
         res = self.put(
@@ -489,13 +510,21 @@ class PageserverHttpClient(requests.Session):
         assert res_json is None
 
     def timeline_get_lsn_by_timestamp(
-        self, tenant_id: TenantId, timeline_id: TimelineId, timestamp, version: int
+        self,
+        tenant_id: TenantId,
+        timeline_id: TimelineId,
+        timestamp,
+        version: Optional[int] = None,
     ):
         log.info(
             f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}"
         )
+        if version is None:
+            version_str = ""
+        else:
+            version_str = f"&version={version}"
         res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}&version={version}",
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}{version_str}",
         )
         self.verbose_error(res)
         res_json = res.json()
diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py
index 565e5fa7f8..c0c2383feb 100644
--- a/test_runner/fixtures/remote_storage.py
+++ b/test_runner/fixtures/remote_storage.py
@@ -9,12 +9,14 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
 import boto3
+import toml
 from mypy_boto3_s3 import S3Client
 
 from fixtures.log_helper import log
 from fixtures.types import TenantId, TimelineId
 
 TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
+TENANT_HEATMAP_FILE_NAME = "heatmap-v1.json"
 
 
 @enum.unique
@@ -132,8 +134,18 @@ class LocalFsStorage:
         with self.index_path(tenant_id, timeline_id).open("r") as f:
             return json.load(f)
 
+    def heatmap_path(self, tenant_id: TenantId) -> Path:
+        return self.tenant_path(tenant_id) / TENANT_HEATMAP_FILE_NAME
+
+    def heatmap_content(self, tenant_id):
+        with self.heatmap_path(tenant_id).open("r") as f:
+            return json.load(f)
+
     def to_toml_inline_table(self) -> str:
-        return f"local_path='{self.root}'"
+        rv = {
+            "local_path": str(self.root),
+        }
+        return toml.TomlEncoder().dump_inline_table(rv)
 
     def cleanup(self):
         # no cleanup is done here, because there's NeonEnvBuilder.cleanup_local_storage which will remove everything, including localfs files
@@ -174,18 +186,18 @@ class S3Storage:
         )
 
     def to_toml_inline_table(self) -> str:
-        s = [
-            f"bucket_name='{self.bucket_name}'",
-            f"bucket_region='{self.bucket_region}'",
-        ]
+        rv = {
+            "bucket_name": self.bucket_name,
+            "bucket_region": self.bucket_region,
+        }
 
         if self.prefix_in_bucket is not None:
-            s.append(f"prefix_in_bucket='{self.prefix_in_bucket}'")
+            rv["prefix_in_bucket"] = self.prefix_in_bucket
 
         if self.endpoint is not None:
-            s.append(f"endpoint='{self.endpoint}'")
+            rv["endpoint"] = self.endpoint
 
-        return ",".join(s)
+        return toml.TomlEncoder().dump_inline_table(rv)
 
     def do_cleanup(self):
         if not self.cleanup:
@@ -384,4 +396,4 @@ def remote_storage_to_toml_inline_table(remote_storage: RemoteStorage) -> str:
     if not isinstance(remote_storage, (LocalFsStorage, S3Storage)):
         raise Exception("invalid remote storage type")
 
-    return f"{{{remote_storage.to_toml_inline_table()}}}"
+    return remote_storage.to_toml_inline_table()
diff --git a/test_runner/fixtures/types.py b/test_runner/fixtures/types.py
index d95368f990..ea648e460d 100644
--- a/test_runner/fixtures/types.py
+++ b/test_runner/fixtures/types.py
@@ -125,3 +125,51 @@ class TenantId(Id):
 class TimelineId(Id):
     def __repr__(self) -> str:
         return f'TimelineId("{self.id.hex()}")'
+
+
+# Workaround for compat with python 3.9, which does not have `typing.Self`
+TTenantShardId = TypeVar("TTenantShardId", bound="TenantShardId")
+
+
+class TenantShardId:
+    def __init__(self, tenant_id: TenantId, shard_number: int, shard_count: int):
+        self.tenant_id = tenant_id
+        self.shard_number = shard_number
+        self.shard_count = shard_count
+        assert self.shard_number < self.shard_count or self.shard_count == 0
+
+    @classmethod
+    def parse(cls: Type[TTenantShardId], input) -> TTenantShardId:
+        if len(input) == 32:
+            return cls(
+                tenant_id=TenantId(input),
+                shard_number=0,
+                shard_count=0,
+            )
+        elif len(input) == 37:
+            return cls(
+                tenant_id=TenantId(input[0:32]),
+                shard_number=int(input[33:35], 16),
+                shard_count=int(input[35:37], 16),
+            )
+        else:
+            raise ValueError(f"Invalid TenantShardId '{input}'")
+
+    def __str__(self):
+        return f"{self.tenant_id}-{self.shard_number:02x}{self.shard_count:02x}"
+
+    def _tuple(self) -> tuple[TenantId, int, int]:
+        return (self.tenant_id, self.shard_number, self.shard_count)
+
+    def __lt__(self, other) -> bool:
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        return self._tuple() < other._tuple()
+
+    def __eq__(self, other) -> bool:
+        if not isinstance(other, type(self)):
+            return NotImplemented
+        return self._tuple() == other._tuple()
+
+    def __hash__(self) -> int:
+        return hash(self._tuple())
diff --git a/test_runner/fixtures/workload.py b/test_runner/fixtures/workload.py
new file mode 100644
index 0000000000..241531437c
--- /dev/null
+++ b/test_runner/fixtures/workload.py
@@ -0,0 +1,148 @@
+from typing import Optional
+
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import (
+    Endpoint,
+    NeonEnv,
+    last_flush_lsn_upload,
+    wait_for_last_flush_lsn,
+)
+from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload
+from fixtures.types import TenantId, TimelineId
+
+
+class Workload:
+    """
+    This is not a general purpose load generator: it exists for storage tests that need to inject some
+    high level types of storage work via the postgres interface:
+    - layer writes (`write_rows`)
+    - work for compaction (`churn_rows`)
+    - reads, checking we get the right data (`validate`)
+    """
+
+    def __init__(self, env: NeonEnv, tenant_id: TenantId, timeline_id: TimelineId):
+        self.env = env
+        self.tenant_id = tenant_id
+        self.timeline_id = timeline_id
+        self.table = "foo"
+
+        self.expect_rows = 0
+        self.churn_cursor = 0
+
+        self._endpoint: Optional[Endpoint] = None
+
+    def endpoint(self, pageserver_id: int) -> Endpoint:
+        if self._endpoint is None:
+            self._endpoint = self.env.endpoints.create(
+                "main",
+                tenant_id=self.tenant_id,
+                pageserver_id=pageserver_id,
+                endpoint_id="ep-workload",
+            )
+            self._endpoint.start(pageserver_id=pageserver_id)
+        else:
+            self._endpoint.reconfigure(pageserver_id=pageserver_id)
+
+        connstring = self._endpoint.safe_psql(
+            "SELECT setting FROM pg_settings WHERE name='neon.pageserver_connstring'"
+        )
+        log.info(f"Workload.endpoint: connstr={connstring}")
+
+        return self._endpoint
+
+    def __del__(self):
+        if self._endpoint is not None:
+            self._endpoint.stop()
+
+    def init(self, pageserver_id: int):
+        endpoint = self.endpoint(pageserver_id)
+
+        endpoint.safe_psql(f"CREATE TABLE {self.table} (id INTEGER PRIMARY KEY, val text);")
+        endpoint.safe_psql("CREATE EXTENSION IF NOT EXISTS neon_test_utils;")
+        last_flush_lsn_upload(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+
+    def write_rows(self, n, pageserver_id):
+        endpoint = self.endpoint(pageserver_id)
+        start = self.expect_rows
+        end = start + n - 1
+        self.expect_rows += n
+        dummy_value = "blah"
+        endpoint.safe_psql(
+            f"""
+            INSERT INTO {self.table} (id, val)
+            SELECT g, '{dummy_value}'
+            FROM generate_series({start}, {end}) g
+            """
+        )
+
+        return last_flush_lsn_upload(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+
+    def churn_rows(self, n, pageserver_id, upload=True):
+        assert self.expect_rows >= n
+
+        max_iters = 10
+        endpoint = self.endpoint(pageserver_id)
+        todo = n
+        i = 0
+        while todo > 0:
+            i += 1
+            if i > max_iters:
+                raise RuntimeError("oops")
+            start = self.churn_cursor % self.expect_rows
+            n_iter = min((self.expect_rows - start), todo)
+            todo -= n_iter
+
+            end = start + n_iter - 1
+
+            log.info(
+                f"start,end = {start},{end}, cursor={self.churn_cursor}, expect_rows={self.expect_rows}"
+            )
+
+            assert end < self.expect_rows
+
+            self.churn_cursor += n_iter
+            dummy_value = "blah"
+            endpoint.safe_psql_many(
+                [
+                    f"""
+                INSERT INTO {self.table} (id, val)
+                SELECT g, '{dummy_value}'
+                FROM generate_series({start}, {end}) g
+                ON CONFLICT (id) DO UPDATE
+                SET val = EXCLUDED.val
+                """,
+                    f"VACUUM {self.table}",
+                ]
+            )
+
+        last_flush_lsn = wait_for_last_flush_lsn(
+            self.env, endpoint, self.tenant_id, self.timeline_id, pageserver_id=pageserver_id
+        )
+        ps_http = self.env.get_pageserver(pageserver_id).http_client()
+        wait_for_last_record_lsn(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
+
+        if upload:
+            # force a checkpoint to trigger upload
+            ps_http.timeline_checkpoint(self.tenant_id, self.timeline_id)
+            wait_for_upload(ps_http, self.tenant_id, self.timeline_id, last_flush_lsn)
+            log.info(f"Churn: waiting for remote LSN {last_flush_lsn}")
+        else:
+            log.info(f"Churn: not waiting for upload, disk LSN {last_flush_lsn}")
+
+    def validate(self, pageserver_id):
+        endpoint = self.endpoint(pageserver_id)
+        result = endpoint.safe_psql_many(
+            [
+                "select clear_buffer_cache()",
+                f"""
+            SELECT COUNT(*) FROM {self.table}
+            """,
+            ]
+        )
+
+        log.info(f"validate({self.expect_rows}): {result}")
+        assert result == [[("",)], [(self.expect_rows,)]]
diff --git a/test_runner/performance/test_bulk_insert.py b/test_runner/performance/test_bulk_insert.py
index a146e011cc..edc23b29ba 100644
--- a/test_runner/performance/test_bulk_insert.py
+++ b/test_runner/performance/test_bulk_insert.py
@@ -55,9 +55,19 @@ def measure_recovery_time(env: NeonCompare):
 
     # Delete the Tenant in the pageserver: this will drop local and remote layers, such that
     # when we "create" the Tenant again, we will replay the WAL from the beginning.
+    #
+    # This is a "weird" thing to do, and can confuse the attachment service as we're re-using
+    # the same tenant ID for a tenant that is logically different from the pageserver's point
+    # of view, but the same as far as the safekeeper/WAL is concerned.  To work around that,
+    # we will explicitly create the tenant in the same generation that it was previously
+    # attached in.
+    attach_status = env.env.attachment_service.inspect(tenant_id=env.tenant)
+    assert attach_status is not None
+    (attach_gen, _) = attach_status
+
     client.tenant_delete(env.tenant)
     wait_tenant_status_404(client, env.tenant, iterations=60, interval=0.5)
-    env.env.pageserver.tenant_create(tenant_id=env.tenant)
+    env.env.pageserver.tenant_create(tenant_id=env.tenant, generation=attach_gen)
 
     # Measure recovery time
     with env.record_duration("wal_recovery"):
diff --git a/test_runner/performance/test_perf_olap.py b/test_runner/performance/test_perf_olap.py
index 0f7615f7ed..1de7e95bbe 100644
--- a/test_runner/performance/test_perf_olap.py
+++ b/test_runner/performance/test_perf_olap.py
@@ -17,6 +17,27 @@ class LabelledQuery:
     query: str
 
 
+# This must run before all tests in this module
+# create extension pg_stat_statements if it does not exist
+# and TEST_OLAP_COLLECT_PG_STAT_STATEMENTS is set to true (default false)
+# Theoretically this could be in a module or session scope fixture,
+# however the code depends on other fixtures that have function scope
+@pytest.mark.skipif(
+    os.getenv("TEST_OLAP_COLLECT_PG_STAT_STATEMENTS", "false").lower() == "false",
+    reason="Skipping - Creating extension pg_stat_statements",
+)
+@pytest.mark.remote_cluster
+def test_clickbench_create_pg_stat_statements(remote_compare: RemoteCompare):
+    log.info("Creating extension pg_stat_statements")
+    query = LabelledQuery(
+        "Q_CREATE_EXTENSION", r"CREATE EXTENSION IF NOT EXISTS pg_stat_statements;"
+    )
+    run_psql(remote_compare, query, times=1, explain=False)
+    log.info("Reset pg_stat_statements")
+    query = LabelledQuery("Q_RESET", r"SELECT pg_stat_statements_reset();")
+    run_psql(remote_compare, query, times=1, explain=False)
+
+
 # A list of queries to run.
 # Please do not alter the label for the query, as it is used to identify it.
 # Labels for ClickBench queries match the labels in ClickBench reports
@@ -78,6 +99,8 @@ QUERIES: Tuple[LabelledQuery, ...] = (
     # fmt: on
 )
 
+EXPLAIN_STRING: str = "EXPLAIN (ANALYZE, VERBOSE, BUFFERS, COSTS, SETTINGS, FORMAT JSON)"
+
 
 def get_scale() -> List[str]:
     # We parametrize each tpc-h and clickbench test with scale
@@ -88,7 +111,10 @@ def get_scale() -> List[str]:
     return [scale]
 
 
-def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> None:
+# run the query times times plus once with EXPLAIN VERBOSE if explain is requestd
+def run_psql(
+    env: RemoteCompare, labelled_query: LabelledQuery, times: int, explain: bool = False
+) -> None:
     # prepare connstr:
     # - cut out password from connstr to pass it via env
     # - add options to connstr
@@ -108,6 +134,13 @@ def run_psql(env: RemoteCompare, labelled_query: LabelledQuery, times: int) -> N
         log.info(f"Run {run}/{times}")
         with env.zenbenchmark.record_duration(f"{label}/{run}"):
             env.pg_bin.run_capture(["psql", connstr, "-c", query], env=environ)
+    if explain:
+        log.info(f"Explaining query {label}")
+        run += 1
+        with env.zenbenchmark.record_duration(f"{label}/EXPLAIN"):
+            env.pg_bin.run_capture(
+                ["psql", connstr, "-c", f"{EXPLAIN_STRING} {query}"], env=environ
+            )
 
 
 @pytest.mark.parametrize("scale", get_scale())
@@ -120,8 +153,9 @@ def test_clickbench(query: LabelledQuery, remote_compare: RemoteCompare, scale:
     Based on https://github.com/ClickHouse/ClickBench/tree/c00135ca5b6a0d86fedcdbf998fdaa8ed85c1c3b/aurora-postgresql
     The DB prepared manually in advance
     """
+    explain: bool = os.getenv("TEST_OLAP_COLLECT_EXPLAIN", "false").lower() == "true"
 
-    run_psql(remote_compare, query, times=3)
+    run_psql(remote_compare, query, times=3, explain=explain)
 
 
 def tpch_queuies() -> Tuple[ParameterSet, ...]:
@@ -195,3 +229,16 @@ def test_user_examples(remote_compare: RemoteCompare):
         """,
     )
     run_psql(remote_compare, query, times=3)
+
+
+# This must run after all tests in this module
+# Collect pg_stat_statements after running the tests if TEST_OLAP_COLLECT_PG_STAT_STATEMENTS is set to true (default false)
+@pytest.mark.skipif(
+    os.getenv("TEST_OLAP_COLLECT_PG_STAT_STATEMENTS", "false").lower() == "false",
+    reason="Skipping - Collecting pg_stat_statements",
+)
+@pytest.mark.remote_cluster
+def test_clickbench_collect_pg_stat_statements(remote_compare: RemoteCompare):
+    log.info("Collecting pg_stat_statements")
+    query = LabelledQuery("Q_COLLECT_PG_STAT_STATEMENTS", r"SELECT * from pg_stat_statements;")
+    run_psql(remote_compare, query, times=1, explain=False)
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index 70d386a566..32397bbcc1 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -52,7 +52,16 @@ def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, N
         TenantId(t["id"]) for t in ps_http.tenant_list()
     ], "tenant should not be attached after negative test"
 
-    env.pageserver.allowed_errors.append(".*Error processing HTTP request: Bad request")
+    env.pageserver.allowed_errors.extend(
+        [
+            # This fixture detaches the tenant, and tests using it will tend to re-attach it
+            # shortly after. There may be un-processed deletion_queue validations from the
+            # initial attachment
+            ".*Dropped remote consistent LSN updates.*",
+            # This fixture is for tests that will intentionally generate 400 responses
+            ".*Error processing HTTP request: Bad request",
+        ]
+    )
 
     def log_contains_bad_request():
         env.pageserver.log_contains(".*Error processing HTTP request: Bad request")
@@ -127,10 +136,7 @@ def test_no_config(positive_env: NeonEnv, content_type: Optional[str]):
     ps_http.tenant_detach(tenant_id)
     assert tenant_id not in [TenantId(t["id"]) for t in ps_http.tenant_list()]
 
-    body = {}
-    gen = env.pageserver.maybe_get_generation(tenant_id)
-    if gen is not None:
-        body["generation"] = gen
+    body = {"generation": env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)}
 
     ps_http.post(
         f"{ps_http.base_url}/v1/tenant/{tenant_id}/attach",
@@ -163,6 +169,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
         "gc_feedback": True,
         "gc_horizon": 23 * (1024 * 1024),
         "gc_period": "2h 13m",
+        "heatmap_period": "10m",
         "image_creation_threshold": 7,
         "pitr_interval": "1m",
         "lagging_wal_timeout": "23m",
diff --git a/test_runner/regress/test_auth.py b/test_runner/regress/test_auth.py
index 7487106c44..bd87ff3efd 100644
--- a/test_runner/regress/test_auth.py
+++ b/test_runner/regress/test_auth.py
@@ -92,8 +92,9 @@ def test_compute_auth_to_pageserver(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.auth_enabled = True
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
-    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
+    env.pageserver.allowed_errors.extend(
+        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
+    )
 
     pageserver_token_old = env.auth_keys.generate_pageserver_token()
     pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
@@ -145,9 +146,9 @@ def test_pageserver_multiple_keys(neon_env_builder: NeonEnvBuilder):
 def test_pageserver_key_reload(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.auth_enabled = True
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(".*Authentication error: InvalidSignature.*")
-    env.pageserver.allowed_errors.append(".*Unauthorized: malformed jwt token.*")
-
+    env.pageserver.allowed_errors.extend(
+        [".*Authentication error: InvalidSignature.*", ".*Unauthorized: malformed jwt token.*"]
+    )
     pageserver_token_old = env.auth_keys.generate_pageserver_token()
     pageserver_http_client_old = env.pageserver.http_client(pageserver_token_old)
 
diff --git a/test_runner/regress/test_branch_behind.py b/test_runner/regress/test_branch_behind.py
index a19b2862f8..9879254897 100644
--- a/test_runner/regress/test_branch_behind.py
+++ b/test_runner/regress/test_branch_behind.py
@@ -14,8 +14,9 @@ def test_branch_behind(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.pageserver_config_override = "tenant_config={pitr_interval = '0 sec'}"
     env = neon_env_builder.init_start()
 
-    env.pageserver.allowed_errors.append(".*invalid branch start lsn.*")
-    env.pageserver.allowed_errors.append(".*invalid start lsn .* for ancestor timeline.*")
+    env.pageserver.allowed_errors.extend(
+        [".*invalid branch start lsn.*", ".*invalid start lsn .* for ancestor timeline.*"]
+    )
 
     # Branch at the point where only 100 rows were inserted
     branch_behind_timeline_id = env.neon_cli.create_branch("test_branch_behind")
diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py
index a908dd713a..9a0b91b54e 100644
--- a/test_runner/regress/test_branching.py
+++ b/test_runner/regress/test_branching.py
@@ -1,8 +1,7 @@
 import random
 import threading
 import time
-from queue import SimpleQueue
-from typing import Any, Dict, List, Union
+from typing import List
 
 import pytest
 from fixtures.log_helper import log
@@ -148,11 +147,11 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE
     env = neon_env_builder.init_configs()
     env.start()
 
-    env.pageserver.allowed_errors.append(
-        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*",
+            ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading",
+        ]
     )
     ps_http = env.pageserver.http_client()
 
@@ -239,92 +238,6 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder
         t.join()
 
 
-def test_competing_branchings_from_loading_race_to_ok_or_err(neon_env_builder: NeonEnvBuilder):
-    """
-    If the activate only after upload is used, then retries could become competing.
-    """
-
-    env = neon_env_builder.init_configs()
-    env.start()
-
-    env.pageserver.allowed_errors.append(
-        ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*Error processing HTTP request: InternalServerError\\(Timeline .*/.* already exists in pageserver's memory"
-    )
-    ps_http = env.pageserver.http_client()
-
-    # pause all uploads
-    ps_http.configure_failpoints(("before-upload-index-pausable", "pause"))
-    env.pageserver.tenant_create(env.initial_tenant)
-
-    def start_creating_timeline():
-        ps_http.timeline_create(
-            env.pg_version, env.initial_tenant, env.initial_timeline, timeout=60
-        )
-
-    create_root = threading.Thread(target=start_creating_timeline)
-
-    branch_id = TimelineId.generate()
-
-    queue: SimpleQueue[Union[Dict[Any, Any], Exception]] = SimpleQueue()
-    barrier = threading.Barrier(3)
-
-    def try_branch():
-        barrier.wait()
-        barrier.wait()
-        try:
-            ret = ps_http.timeline_create(
-                env.pg_version,
-                env.initial_tenant,
-                branch_id,
-                ancestor_timeline_id=env.initial_timeline,
-                timeout=5,
-            )
-            queue.put(ret)
-        except Exception as e:
-            queue.put(e)
-
-    threads = [threading.Thread(target=try_branch) for _ in range(2)]
-
-    try:
-        create_root.start()
-
-        for t in threads:
-            t.start()
-
-        wait_until_paused(env, "before-upload-index-pausable")
-
-        barrier.wait()
-        ps_http.configure_failpoints(("before-upload-index-pausable", "off"))
-        barrier.wait()
-
-        # now both requests race to branch, only one can win because they take gc_cs, Tenant::timelines or marker files
-        first = queue.get()
-        second = queue.get()
-
-        log.info(first)
-        log.info(second)
-
-        (succeeded, failed) = (first, second) if isinstance(second, Exception) else (second, first)
-        assert isinstance(failed, Exception)
-        assert isinstance(succeeded, Dict)
-
-        # there's multiple valid status codes:
-        # - Timeline x/y already exists
-        # - whatever 409 response says, but that is a subclass of PageserverApiException
-        assert isinstance(failed, PageserverApiException)
-        assert succeeded["state"] == "Active"
-    finally:
-        # we might still have the failpoint active
-        env.pageserver.stop(immediate=True)
-
-        for t in threads:
-            t.join()
-        create_root.join()
-
-
 def test_non_uploaded_root_timeline_is_deleted_after_restart(neon_env_builder: NeonEnvBuilder):
     """
     Check that a timeline is deleted locally on subsequent restart if it never successfully uploaded during creation.
diff --git a/test_runner/regress/test_broken_timeline.py b/test_runner/regress/test_broken_timeline.py
index 53eeb8bbe9..4da0ba7b20 100644
--- a/test_runner/regress/test_broken_timeline.py
+++ b/test_runner/regress/test_broken_timeline.py
@@ -20,7 +20,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
 
     env.pageserver.allowed_errors.extend(
         [
-            ".*layer loading failed:.*",
+            ".*get_value_reconstruct_data for layer .*",
             ".*could not find data for key.*",
             ".*is not active. Current state: Broken.*",
             ".*will not become active. Current state: Broken.*",
@@ -83,7 +83,7 @@ def test_local_corruption(neon_env_builder: NeonEnvBuilder):
     # (We don't check layer file contents on startup, when loading the timeline)
     #
     # This will change when we implement checksums for layers
-    with pytest.raises(Exception, match="layer loading failed:") as err:
+    with pytest.raises(Exception, match="get_value_reconstruct_data for layer ") as err:
         pg2.start()
     log.info(
         f"As expected, compute startup failed for timeline {tenant2}/{timeline2} with corrupt layers: {err}"
diff --git a/test_runner/regress/test_change_pageserver.py b/test_runner/regress/test_change_pageserver.py
index 1b6c982850..adb67a579e 100644
--- a/test_runner/regress/test_change_pageserver.py
+++ b/test_runner/regress/test_change_pageserver.py
@@ -87,7 +87,6 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder):
     #
     # Since we're dual-attached, need to tip-off attachment service to treat the one we're
     # about to start as the attached pageserver
-    assert env.attachment_service is not None
     env.attachment_service.attach_hook_issue(env.initial_tenant, env.pageservers[0].id)
     env.pageservers[0].start()
     env.pageservers[1].stop()
diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py
index f3c6af4427..5a9c2782e6 100644
--- a/test_runner/regress/test_compatibility.py
+++ b/test_runner/regress/test_compatibility.py
@@ -1,30 +1,25 @@
-import copy
 import os
 import shutil
 import subprocess
 import tempfile
 from pathlib import Path
-from typing import Any, List, Optional
+from typing import List, Optional
 
 import pytest
-import toml  # TODO: replace with tomllib for Python >= 3.11
-from fixtures.log_helper import log
+import toml
 from fixtures.neon_fixtures import (
-    NeonCli,
+    NeonEnv,
     NeonEnvBuilder,
     PgBin,
 )
-from fixtures.pageserver.http import PageserverHttpClient
 from fixtures.pageserver.utils import (
     timeline_delete_wait_completed,
     wait_for_last_record_lsn,
     wait_for_upload,
 )
 from fixtures.pg_version import PgVersion
-from fixtures.port_distributor import PortDistributor
-from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind, RemoteStorageUser
+from fixtures.remote_storage import RemoteStorageKind
 from fixtures.types import Lsn
-from pytest import FixtureRequest
 
 #
 # A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases.
@@ -37,8 +32,8 @@ from pytest import FixtureRequest
 #   If the breakage is intentional, the test can be xfaild with setting ALLOW_FORWARD_COMPATIBILITY_BREAKAGE=true.
 #
 # The file contains a couple of helper functions:
-# - prepare_snapshot copies the snapshot, cleans it up and makes it ready for the current version of Neon (replaces paths and ports in config files).
 # - check_neon_works performs the test itself, feel free to add more checks there.
+# - dump_differs compares two SQL dumps and writes the diff to a file.
 #
 #
 # How to run `test_backward_compatibility` locally:
@@ -46,6 +41,7 @@ from pytest import FixtureRequest
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
+#    export COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION}
 #
 #    # Build previous version of binaries and create a data snapshot:
 #    rm -rf pg_install target
@@ -59,8 +55,7 @@ from pytest import FixtureRequest
 #    CARGO_BUILD_FLAGS="--features=testing" make -s -j`nproc`
 #
 #    # Run backward compatibility test
-#    COMPATIBILITY_SNAPSHOT_DIR=test_output/compatibility_snapshot_pgv${DEFAULT_PG_VERSION} \
-#       ./scripts/pytest -k test_backward_compatibility
+#    ./scripts/pytest -k test_backward_compatibility
 #
 #
 # How to run `test_forward_compatibility` locally:
@@ -68,6 +63,8 @@ from pytest import FixtureRequest
 #    export DEFAULT_PG_VERSION=15
 #    export BUILD_TYPE=release
 #    export CHECK_ONDISK_DATA_COMPATIBILITY=true
+#    export COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE}
+#    export COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install
 #
 #    # Build previous version of binaries and store them somewhere:
 #    rm -rf pg_install target
@@ -84,9 +81,7 @@ from pytest import FixtureRequest
 #    ./scripts/pytest -k test_create_snapshot
 #
 #    # Run forward compatibility test
-#    COMPATIBILITY_NEON_BIN=neon_previous/target/${BUILD_TYPE} \
-#    COMPATIBILITY_POSTGRES_DISTRIB_DIR=neon_previous/pg_install \
-#       ./scripts/pytest -k test_forward_compatibility
+#    ./scripts/pytest -k test_forward_compatibility
 #
 
 check_ondisk_data_compatibility_if_enabled = pytest.mark.skipif(
@@ -155,13 +150,9 @@ def test_create_snapshot(
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
 def test_backward_compatibility(
-    pg_bin: PgBin,
-    port_distributor: PortDistributor,
+    neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
-    neon_binpath: Path,
-    pg_distrib_dir: Path,
     pg_version: PgVersion,
-    request: FixtureRequest,
 ):
     """
     Test that the new binaries can read old data
@@ -177,23 +168,15 @@ def test_backward_compatibility(
     )
 
     try:
-        # Copy the snapshot to current directory, and prepare for the test
-        prepare_snapshot(
-            from_dir=compatibility_snapshot_dir,
-            to_dir=test_output_dir / "compatibility_snapshot",
-            port_distributor=port_distributor,
-        )
+        neon_env_builder.num_safekeepers = 3
+        env = neon_env_builder.from_repo_dir(compatibility_snapshot_dir / "repo")
+        neon_env_builder.start()
 
         check_neon_works(
-            test_output_dir / "compatibility_snapshot" / "repo",
-            neon_binpath,
-            neon_binpath,
-            pg_distrib_dir,
-            pg_version,
-            port_distributor,
-            test_output_dir,
-            pg_bin,
-            request,
+            env,
+            test_output_dir=test_output_dir,
+            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
+            repo_dir=env.repo_dir,
         )
     except Exception:
         if breaking_changes_allowed:
@@ -212,12 +195,10 @@ def test_backward_compatibility(
 @pytest.mark.xdist_group("compatibility")
 @pytest.mark.order(after="test_create_snapshot")
 def test_forward_compatibility(
+    neon_env_builder: NeonEnvBuilder,
     test_output_dir: Path,
     top_output_dir: Path,
-    port_distributor: PortDistributor,
     pg_version: PgVersion,
-    request: FixtureRequest,
-    neon_binpath: Path,
 ):
     """
     Test that the old binaries can read new data
@@ -244,24 +225,19 @@ def test_forward_compatibility(
     )
 
     try:
-        # Copy the snapshot to current directory, and prepare for the test
-        prepare_snapshot(
-            from_dir=compatibility_snapshot_dir,
-            to_dir=test_output_dir / "compatibility_snapshot",
-            port_distributor=port_distributor,
+        neon_env_builder.num_safekeepers = 3
+        env = neon_env_builder.from_repo_dir(
+            compatibility_snapshot_dir / "repo",
+            neon_binpath=compatibility_neon_bin,
             pg_distrib_dir=compatibility_postgres_distrib_dir,
         )
+        neon_env_builder.start()
 
         check_neon_works(
-            test_output_dir / "compatibility_snapshot" / "repo",
-            compatibility_neon_bin,
-            neon_binpath,
-            compatibility_postgres_distrib_dir,
-            pg_version,
-            port_distributor,
-            test_output_dir,
-            PgBin(test_output_dir, compatibility_postgres_distrib_dir, pg_version),
-            request,
+            env,
+            test_output_dir=test_output_dir,
+            sql_dump_path=compatibility_snapshot_dir / "dump.sql",
+            repo_dir=env.repo_dir,
         )
     except Exception:
         if breaking_changes_allowed:
@@ -276,193 +252,45 @@ def test_forward_compatibility(
     ), "Breaking changes are allowed by ALLOW_FORWARD_COMPATIBILITY_BREAKAGE, but the test has passed without any breakage"
 
 
-def prepare_snapshot(
-    from_dir: Path,
-    to_dir: Path,
-    port_distributor: PortDistributor,
-    pg_distrib_dir: Optional[Path] = None,
-):
-    assert from_dir.exists(), f"Snapshot '{from_dir}' doesn't exist"
-    assert (from_dir / "repo").exists(), f"Snapshot '{from_dir}' doesn't contain a repo directory"
-    assert (from_dir / "dump.sql").exists(), f"Snapshot '{from_dir}' doesn't contain a dump.sql"
+def check_neon_works(env: NeonEnv, test_output_dir: Path, sql_dump_path: Path, repo_dir: Path):
+    ep = env.endpoints.create_start("main")
+    pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version)
 
-    log.info(f"Copying snapshot from {from_dir} to {to_dir}")
-    shutil.copytree(from_dir, to_dir)
-
-    repo_dir = to_dir / "repo"
-
-    snapshot_config_toml = repo_dir / "config"
-    snapshot_config = toml.load(snapshot_config_toml)
-
-    # Remove old logs to avoid confusion in test artifacts
-    for logfile in repo_dir.glob("**/*.log"):
-        logfile.unlink()
-
-    # Remove old computes in 'endpoints'. Old versions of the control plane used a directory
-    # called "pgdatadirs". Delete it, too.
-    if (repo_dir / "endpoints").exists():
-        shutil.rmtree(repo_dir / "endpoints")
-    if (repo_dir / "pgdatadirs").exists():
-        shutil.rmtree(repo_dir / "pgdatadirs")
-    os.mkdir(repo_dir / "endpoints")
-
-    # Update paths and ports in config files
-    legacy_pageserver_toml = repo_dir / "pageserver.toml"
-    legacy_bundle = os.path.exists(legacy_pageserver_toml)
-
-    path_to_config: dict[Path, dict[Any, Any]] = {}
-    if legacy_bundle:
-        os.mkdir(repo_dir / "pageserver_1")
-        path_to_config[repo_dir / "pageserver_1" / "pageserver.toml"] = toml.load(
-            legacy_pageserver_toml
-        )
-        os.remove(legacy_pageserver_toml)
-        os.rename(repo_dir / "tenants", repo_dir / "pageserver_1" / "tenants")
-    else:
-        for ps_conf in snapshot_config["pageservers"]:
-            config_path = repo_dir / f"pageserver_{ps_conf['id']}" / "pageserver.toml"
-            path_to_config[config_path] = toml.load(config_path)
-
-    # For each pageserver config, edit it and rewrite
-    for config_path, pageserver_config in path_to_config.items():
-        pageserver_config["remote_storage"]["local_path"] = str(
-            LocalFsStorage.component_path(repo_dir, RemoteStorageUser.PAGESERVER)
-        )
-
-        for param in ("listen_http_addr", "listen_pg_addr", "broker_endpoint"):
-            pageserver_config[param] = port_distributor.replace_with_new_port(
-                pageserver_config[param]
-            )
-
-        # We don't use authentication in compatibility tests
-        # so just remove authentication related settings.
-        pageserver_config.pop("pg_auth_type", None)
-        pageserver_config.pop("http_auth_type", None)
-
-        if pg_distrib_dir:
-            pageserver_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
-        with config_path.open("w") as f:
-            toml.dump(pageserver_config, f)
-
-    # neon_local config doesn't have to be backward compatible.  If we're using a dump from before
-    # it supported multiple pageservers, fix it up.
-    if "pageservers" not in snapshot_config:
-        snapshot_config["pageservers"] = [snapshot_config["pageserver"]]
-        del snapshot_config["pageserver"]
-
-    for param in ("listen_http_addr", "listen_pg_addr"):
-        for pageserver in snapshot_config["pageservers"]:
-            pageserver[param] = port_distributor.replace_with_new_port(pageserver[param])
-    snapshot_config["broker"]["listen_addr"] = port_distributor.replace_with_new_port(
-        snapshot_config["broker"]["listen_addr"]
-    )
-    for sk in snapshot_config["safekeepers"]:
-        for param in ("http_port", "pg_port", "pg_tenant_only_port"):
-            sk[param] = port_distributor.replace_with_new_port(sk[param])
-
-    if pg_distrib_dir:
-        snapshot_config["pg_distrib_dir"] = str(pg_distrib_dir)
-
-    with snapshot_config_toml.open("w") as f:
-        toml.dump(snapshot_config, f)
-
-    # Ensure that snapshot doesn't contain references to the original path
-    rv = subprocess.run(
-        [
-            "grep",
-            "--recursive",
-            "--binary-file=without-match",
-            "--files-with-matches",
-            "test_create_snapshot/repo",
-            str(repo_dir),
-        ],
-        capture_output=True,
-        text=True,
-    )
-    assert (
-        rv.returncode != 0
-    ), f"there're files referencing `test_create_snapshot/repo`, this path should be replaced with {repo_dir}:\n{rv.stdout}"
-
-
-def check_neon_works(
-    repo_dir: Path,
-    neon_target_binpath: Path,
-    neon_current_binpath: Path,
-    pg_distrib_dir: Path,
-    pg_version: PgVersion,
-    port_distributor: PortDistributor,
-    test_output_dir: Path,
-    pg_bin: PgBin,
-    request: FixtureRequest,
-):
-    snapshot_config_toml = repo_dir / "config"
-    snapshot_config = toml.load(snapshot_config_toml)
-    snapshot_config["neon_distrib_dir"] = str(neon_target_binpath)
-    snapshot_config["postgres_distrib_dir"] = str(pg_distrib_dir)
-    with (snapshot_config_toml).open("w") as f:
-        toml.dump(snapshot_config, f)
-
-    # TODO: replace with NeonEnvBuilder / NeonEnv
-    config: Any = type("NeonEnvStub", (object,), {})
-    config.rust_log_override = None
-    config.repo_dir = repo_dir
-    config.pg_version = pg_version
-    config.initial_tenant = snapshot_config["default_tenant_id"]
-    config.pg_distrib_dir = pg_distrib_dir
-    config.remote_storage = None
-    config.sk_remote_storage = None
-
-    # Use the "target" binaries to launch the storage nodes
-    config_target = config
-    config_target.neon_binpath = neon_target_binpath
-    # We are using maybe-old binaries for neon services, but want to use current
-    # binaries for test utilities like neon_local
-    config_target.neon_local_binpath = neon_current_binpath
-    cli_target = NeonCli(config_target)
-
-    # And the current binaries to launch computes
-    snapshot_config["neon_distrib_dir"] = str(neon_current_binpath)
-    with (snapshot_config_toml).open("w") as f:
-        toml.dump(snapshot_config, f)
-    config_current = copy.copy(config)
-    config_current.neon_binpath = neon_current_binpath
-    cli_current = NeonCli(config_current)
-
-    cli_target.raw_cli(["start"])
-    request.addfinalizer(lambda: cli_target.raw_cli(["stop"]))
-
-    pg_port = port_distributor.get_port()
-    http_port = port_distributor.get_port()
-    cli_current.endpoint_create(
-        branch_name="main", pg_port=pg_port, http_port=http_port, endpoint_id="ep-main"
-    )
-    cli_current.endpoint_start("ep-main")
-    request.addfinalizer(lambda: cli_current.endpoint_stop("ep-main"))
-
-    connstr = f"host=127.0.0.1 port={pg_port} user=cloud_admin dbname=postgres"
+    connstr = ep.connstr()
     pg_bin.run_capture(
         ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump.sql'}"]
     )
     initial_dump_differs = dump_differs(
-        repo_dir.parent / "dump.sql",
+        sql_dump_path,
         test_output_dir / "dump.sql",
         test_output_dir / "dump.filediff",
     )
 
     # Check that project can be recovered from WAL
     # loosely based on https://www.notion.so/neondatabase/Storage-Recovery-from-WAL-d92c0aac0ebf40df892b938045d7d720
-    tenant_id = snapshot_config["default_tenant_id"]
-    timeline_id = dict(snapshot_config["branch_name_mappings"]["main"])[tenant_id]
-    pageserver_port = snapshot_config["pageservers"][0]["listen_http_addr"].split(":")[-1]
-    pageserver_http = PageserverHttpClient(
-        port=pageserver_port,
-        is_testing_enabled_or_skip=lambda: True,  # TODO: check if testing really enabled
+    pageserver_http = env.pageserver.http_client()
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+    pg_version = env.pg_version
+
+    # Delete all files from local_fs_remote_storage except initdb.tar.zst,
+    # the file is required for `timeline_create` with `existing_initdb_timeline_id`.
+    #
+    # TODO: switch to Path.walk() in Python 3.12
+    # for dirpath, _dirnames, filenames in (repo_dir / "local_fs_remote_storage").walk():
+    for dirpath, _dirnames, filenames in os.walk(repo_dir / "local_fs_remote_storage"):
+        for filename in filenames:
+            if filename != "initdb.tar.zst":
+                (Path(dirpath) / filename).unlink()
+
+    timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
+    pageserver_http.timeline_create(
+        pg_version=pg_version,
+        tenant_id=tenant_id,
+        new_timeline_id=timeline_id,
+        existing_initdb_timeline_id=timeline_id,
     )
 
-    shutil.rmtree(repo_dir / "local_fs_remote_storage")
-    timeline_delete_wait_completed(pageserver_http, tenant_id, timeline_id)
-    pageserver_http.timeline_create(pg_version, tenant_id, timeline_id)
     pg_bin.run_capture(
         ["pg_dumpall", f"--dbname={connstr}", f"--file={test_output_dir / 'dump-from-wal.sql'}"]
     )
@@ -494,6 +322,11 @@ def dump_differs(
     Returns True if the dumps differ and produced diff is not allowed, False otherwise (in most cases we want it to return False).
     """
 
+    if not first.exists():
+        raise FileNotFoundError(f"{first} doesn't exist")
+    if not second.exists():
+        raise FileNotFoundError(f"{second} doesn't exist")
+
     with output.open("w") as stdout:
         res = subprocess.run(
             [
diff --git a/test_runner/regress/test_config.py b/test_runner/regress/test_config.py
old mode 100755
new mode 100644
diff --git a/test_runner/regress/test_import.py b/test_runner/regress/test_import.py
index 920e8d0b72..faedf5d944 100644
--- a/test_runner/regress/test_import.py
+++ b/test_runner/regress/test_import.py
@@ -99,12 +99,13 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build
         ]
     )
 
-    # FIXME: we should clean up pageserver to not print this
-    env.pageserver.allowed_errors.append(".*exited with error: unexpected message type: CopyData.*")
-
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: we should clean up pageserver to not print this
+            ".*exited with error: unexpected message type: CopyData.*",
+            # FIXME: Is this expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+        ]
     )
 
     def import_tar(base, wal):
diff --git a/test_runner/regress/test_layers_from_future.py b/test_runner/regress/test_layers_from_future.py
index ef2b2185c3..340188c1ae 100644
--- a/test_runner/regress/test_layers_from_future.py
+++ b/test_runner/regress/test_layers_from_future.py
@@ -157,7 +157,6 @@ def test_issue_5878(neon_env_builder: NeonEnvBuilder):
     time.sleep(1.1)  # so that we can use change in pre_stat.st_mtime to detect overwrites
 
     def get_generation_number():
-        assert env.attachment_service is not None
         attachment = env.attachment_service.inspect(tenant_id)
         assert attachment is not None
         return attachment[0]
diff --git a/test_runner/regress/test_logical_replication.py b/test_runner/regress/test_logical_replication.py
index d2d8d71e3f..51e358e60d 100644
--- a/test_runner/regress/test_logical_replication.py
+++ b/test_runner/regress/test_logical_replication.py
@@ -236,3 +236,30 @@ def test_wal_page_boundary_start(neon_simple_env: NeonEnv, vanilla_pg):
     assert vanilla_pg.safe_psql(
         "select sum(somedata) from replication_example"
     ) == endpoint.safe_psql("select sum(somedata) from replication_example")
+
+
+#
+# Check that slots are not inherited in brnach
+#
+def test_slots_and_branching(neon_simple_env: NeonEnv):
+    env = neon_simple_env
+
+    tenant, timeline = env.neon_cli.create_tenant()
+    env.pageserver.http_client()
+
+    main_branch = env.endpoints.create_start("main", tenant_id=tenant)
+    main_cur = main_branch.connect().cursor()
+
+    # Create table and insert some data
+    main_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
+
+    wait_for_last_flush_lsn(env, main_branch, tenant, timeline)
+
+    # Create branch ws.
+    env.neon_cli.create_branch("ws", "main", tenant_id=tenant)
+    ws_branch = env.endpoints.create_start("ws", tenant_id=tenant)
+    log.info("postgres is running on 'ws' branch")
+
+    # Check that we can create slot with the same name
+    ws_cur = ws_branch.connect().cursor()
+    ws_cur.execute("select pg_create_logical_replication_slot('my_slot', 'pgoutput')")
diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py
index f79c1c347c..65d6d7a9fd 100644
--- a/test_runner/regress/test_lsn_mapping.py
+++ b/test_runner/regress/test_lsn_mapping.py
@@ -8,71 +8,6 @@ from fixtures.types import Lsn
 from fixtures.utils import query_scalar
 
 
-#
-# Test pageserver get_lsn_by_timestamp API
-#
-def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder):
-    env = neon_env_builder.init_start()
-
-    new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
-    endpoint_main = env.endpoints.create_start("test_lsn_mapping")
-    log.info("postgres is running on 'test_lsn_mapping' branch")
-
-    cur = endpoint_main.connect().cursor()
-    # Create table, and insert rows, each in a separate transaction
-    # Disable synchronous_commit to make this initialization go faster.
-    #
-    # Each row contains current insert LSN and the current timestamp, when
-    # the row was inserted.
-    cur.execute("SET synchronous_commit=off")
-    cur.execute("CREATE TABLE foo (x integer)")
-    tbl = []
-    for i in range(1000):
-        cur.execute("INSERT INTO foo VALUES(%s)", (i,))
-        # Get the timestamp at UTC
-        after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None)
-        tbl.append([i, after_timestamp])
-
-    # Execute one more transaction with synchronous_commit enabled, to flush
-    # all the previous transactions
-    cur.execute("SET synchronous_commit=on")
-    cur.execute("INSERT INTO foo VALUES (-1)")
-
-    # Wait until WAL is received by pageserver
-    wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
-
-    with env.pageserver.http_client() as client:
-        # Check edge cases: timestamp in the future
-        probe_timestamp = tbl[-1][1] + timedelta(hours=1)
-        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
-        )
-        assert result == "future"
-
-        # timestamp too the far history
-        probe_timestamp = tbl[0][1] - timedelta(hours=10)
-        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
-        )
-        assert result == "past"
-
-        # Probe a bunch of timestamps in the valid range
-        for i in range(1, len(tbl), 100):
-            probe_timestamp = tbl[i][1]
-            lsn = client.timeline_get_lsn_by_timestamp(
-                env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
-            )
-            # Call get_lsn_by_timestamp to get the LSN
-            # Launch a new read-only node at that LSN, and check that only the rows
-            # that were supposed to be committed at that point in time are visible.
-            endpoint_here = env.endpoints.create_start(
-                branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
-            )
-            assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
-
-            endpoint_here.stop_and_destroy()
-
-
 #
 # Test pageserver get_lsn_by_timestamp API
 #
@@ -130,7 +65,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
         # Timestamp is in the future
         probe_timestamp = tbl[-1][1] + timedelta(hours=1)
         result = client.timeline_get_lsn_by_timestamp(
-            tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
+            tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z"
         )
         assert result["kind"] == "future"
         # make sure that we return a well advanced lsn here
@@ -139,7 +74,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
         # Timestamp is in the unreachable past
         probe_timestamp = tbl[0][1] - timedelta(hours=10)
         result = client.timeline_get_lsn_by_timestamp(
-            tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
+            tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z"
         )
         assert result["kind"] == "past"
         # make sure that we return the minimum lsn here at the start of the range
@@ -149,7 +84,7 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
         for i in range(1, len(tbl), 100):
             probe_timestamp = tbl[i][1]
             result = client.timeline_get_lsn_by_timestamp(
-                tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
+                tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z"
             )
             assert result["kind"] not in ["past", "nodata"]
             lsn = result["lsn"]
diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py
index 64e41a2dd5..573d2139ce 100644
--- a/test_runner/regress/test_pageserver_api.py
+++ b/test_runner/regress/test_pageserver_api.py
@@ -72,7 +72,9 @@ def check_client(env: NeonEnv, client: PageserverHttpClient):
 
     # create new tenant and check it is also there
     tenant_id = TenantId.generate()
-    client.tenant_create(tenant_id, generation=env.pageserver.maybe_get_generation(tenant_id))
+    client.tenant_create(
+        tenant_id, generation=env.attachment_service.attach_hook_issue(tenant_id, env.pageserver.id)
+    )
     assert tenant_id in {TenantId(t["id"]) for t in client.tenant_list()}
 
     timelines = client.timeline_list(tenant_id)
diff --git a/test_runner/regress/test_pageserver_generations.py b/test_runner/regress/test_pageserver_generations.py
index 66cc286aba..9c2f5786d4 100644
--- a/test_runner/regress/test_pageserver_generations.py
+++ b/test_runner/regress/test_pageserver_generations.py
@@ -23,14 +23,20 @@ from fixtures.neon_fixtures import (
     PgBin,
     S3Scrubber,
     last_flush_lsn_upload,
-    wait_for_last_flush_lsn,
 )
-from fixtures.pageserver.utils import list_prefix
+from fixtures.pageserver.http import PageserverApiException
+from fixtures.pageserver.utils import (
+    assert_tenant_state,
+    list_prefix,
+    wait_for_last_record_lsn,
+    wait_for_upload,
+)
 from fixtures.remote_storage import (
     RemoteStorageKind,
 )
 from fixtures.types import TenantId, TimelineId
 from fixtures.utils import print_gc_result, wait_until
+from fixtures.workload import Workload
 
 # A tenant configuration that is convenient for generating uploads and deletions
 # without a large amount of postgres traffic.
@@ -93,7 +99,10 @@ def generate_uploads_and_deletions(
             )
             assert tenant_id is not None
             assert timeline_id is not None
-            wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id)
+            # We are waiting for uploads as well as local flush, in order to avoid leaving the system
+            # in a state where there are "future layers" in remote storage that will generate deletions
+            # after a restart.
+            last_flush_lsn_upload(env, endpoint, tenant_id, timeline_id)
             ps_http.timeline_checkpoint(tenant_id, timeline_id)
 
         # Compaction should generate some GC-elegible layers
@@ -178,7 +187,6 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
     - After upgrade, the bucket should contain a mixture.
     - In both cases, postgres I/O should work.
     """
-    neon_env_builder.enable_generations = True
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
     )
@@ -187,7 +195,6 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
     env.broker.try_start()
     for sk in env.safekeepers:
         sk.start()
-    assert env.attachment_service is not None
     env.attachment_service.start()
 
     env.pageserver.start(overrides=('--pageserver-config-override=control_plane_api=""',))
@@ -253,12 +260,10 @@ def test_generations_upgrade(neon_env_builder: NeonEnvBuilder):
 
 
 def test_deferred_deletion(neon_env_builder: NeonEnvBuilder):
-    neon_env_builder.enable_generations = True
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
     )
     env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
-    assert env.attachment_service is not None
 
     some_other_pageserver = 1234
     ps_http = env.pageserver.http_client()
@@ -332,7 +337,6 @@ def test_deletion_queue_recovery(
     :param validate_before: whether to wait for deletions to be validated before restart.  This
     makes them elegible to be executed after restart, if the same node keeps the attachment.
     """
-    neon_env_builder.enable_generations = True
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
     )
@@ -396,7 +400,6 @@ def test_deletion_queue_recovery(
 
     if keep_attachment == KeepAttachment.LOSE:
         some_other_pageserver = 101010
-        assert env.attachment_service is not None
         env.attachment_service.attach_hook_issue(env.initial_tenant, some_other_pageserver)
 
     env.pageserver.start()
@@ -444,7 +447,6 @@ def test_deletion_queue_recovery(
 
 
 def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
-    neon_env_builder.enable_generations = True
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
     )
@@ -464,7 +466,6 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
     )
 
     # Simulate a major incident: the control plane goes offline
-    assert env.attachment_service is not None
     env.attachment_service.stop()
 
     # Remember how many validations had happened before the control plane went offline
@@ -536,7 +537,6 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
     and must be constructed using the proper generation for the layer, which may not be the same generation
     that the tenant is running in.
     """
-    neon_env_builder.enable_generations = True
     neon_env_builder.enable_pageserver_remote_storage(
         RemoteStorageKind.MOCK_S3,
     )
@@ -560,3 +560,90 @@ def test_eviction_across_generations(neon_env_builder: NeonEnvBuilder):
     read_all(env, tenant_id, timeline_id)
     evict_all_layers(env, tenant_id, timeline_id)
     read_all(env, tenant_id, timeline_id)
+
+
+def test_multi_attach(
+    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+):
+    neon_env_builder.num_pageservers = 3
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+
+    pageservers = env.pageservers
+    http_clients = list([p.http_client() for p in pageservers])
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # We will intentionally create situations where stale deletions happen from non-latest-generation
+    # nodes when the tenant is multiply-attached
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [".*Dropped remote consistent LSN updates.*", ".*Dropping stale deletions.*"]
+        )
+
+    # Initially, the tenant will be attached to the first pageserver (first is default in our test harness)
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[0], tenant_id, "Active"))
+    _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        http_clients[1].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(pageservers[0].id)
+    workload.write_rows(1000, pageservers[0].id)
+
+    # Attach the tenant to the other two pageservers
+    pageservers[1].tenant_attach(env.initial_tenant)
+    pageservers[2].tenant_attach(env.initial_tenant)
+
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[1], tenant_id, "Active"))
+    wait_until(10, 0.2, lambda: assert_tenant_state(http_clients[2], tenant_id, "Active"))
+
+    # Now they all have it attached
+    _details = list([c.timeline_detail(tenant_id, timeline_id) for c in http_clients])
+    _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
+    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    # The endpoint can use any pageserver to service its reads
+    for pageserver in pageservers:
+        workload.validate(pageserver.id)
+
+    # If we write some more data, all the nodes can see it, including stale ones
+    wrote_lsn = workload.write_rows(1000, pageservers[0].id)
+    for ps_http in http_clients:
+        wait_for_last_record_lsn(ps_http, tenant_id, timeline_id, wrote_lsn)
+
+    # ...and indeed endpoints can see it via any of the pageservers
+    for pageserver in pageservers:
+        workload.validate(pageserver.id)
+
+    # Prompt all the pageservers, including stale ones, to upload ingested layers to remote storage
+    for ps_http in http_clients:
+        ps_http.timeline_checkpoint(tenant_id, timeline_id)
+        wait_for_upload(ps_http, tenant_id, timeline_id, wrote_lsn)
+
+    # Now, the contents of remote storage will be a set of layers from each pageserver, but with unique
+    # generation numbers
+    # TODO: validate remote storage contents
+
+    # Stop all pageservers
+    for ps in pageservers:
+        ps.stop()
+
+    # Returning to a normal healthy state: all pageservers will start, but only the one most
+    # recently attached via the control plane will re-attach on startup
+    for ps in pageservers:
+        ps.start()
+
+    with pytest.raises(PageserverApiException):
+        _detail = http_clients[0].timeline_detail(tenant_id, timeline_id)
+    with pytest.raises(PageserverApiException):
+        _detail = http_clients[1].timeline_detail(tenant_id, timeline_id)
+    _detail = http_clients[2].timeline_detail(tenant_id, timeline_id)
+
+    # All data we wrote while multi-attached remains readable
+    workload.validate(pageservers[2].id)
diff --git a/test_runner/regress/test_pageserver_metric_collection.py b/test_runner/regress/test_pageserver_metric_collection.py
index b76dbbee03..042961baa5 100644
--- a/test_runner/regress/test_pageserver_metric_collection.py
+++ b/test_runner/regress/test_pageserver_metric_collection.py
@@ -64,13 +64,13 @@ def test_metric_collection(
     # spin up neon,  after http server is ready
     env = neon_env_builder.init_start(initial_tenant_conf={"pitr_interval": "0 sec"})
     # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
-    # we have a fast rate of calculation, these can happen at shutdown
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*metrics endpoint refused the sent metrics*",
+            # we have a fast rate of calculation, these can happen at shutdown
+            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
+            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id = env.initial_tenant
@@ -212,13 +212,13 @@ def test_metric_collection_cleans_up_tempfile(
     pageserver_http = env.pageserver.http_client()
 
     # httpserver is shut down before pageserver during passing run
-    env.pageserver.allowed_errors.append(".*metrics endpoint refused the sent metrics*")
-    # we have a fast rate of calculation, these can happen at shutdown
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*"
-    )
-    env.pageserver.allowed_errors.append(
-        ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*metrics endpoint refused the sent metrics*",
+            # we have a fast rate of calculation, these can happen at shutdown
+            ".*synthetic_size_worker:calculate_synthetic_size.*:gather_size_inputs.*: failed to calculate logical size at .*: cancelled.*",
+            ".*synthetic_size_worker: failed to calculate synthetic size for tenant .*: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id = env.initial_tenant
diff --git a/test_runner/regress/test_pageserver_restart.py b/test_runner/regress/test_pageserver_restart.py
index 3cac32b790..c4499196b5 100644
--- a/test_runner/regress/test_pageserver_restart.py
+++ b/test_runner/regress/test_pageserver_restart.py
@@ -9,9 +9,7 @@ from fixtures.utils import wait_until
 
 # Test restarting page server, while safekeeper and compute node keep
 # running.
-@pytest.mark.parametrize("generations", [True, False])
-def test_pageserver_restart(neon_env_builder: NeonEnvBuilder, generations: bool):
-    neon_env_builder.enable_generations = generations
+def test_pageserver_restart(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.enable_pageserver_remote_storage(s3_storage())
     neon_env_builder.enable_scrub_on_exit()
 
diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py
new file mode 100644
index 0000000000..8ae4297983
--- /dev/null
+++ b/test_runner/regress/test_pageserver_secondary.py
@@ -0,0 +1,371 @@
+import random
+from typing import Any, Dict, Optional
+
+import pytest
+from fixtures.log_helper import log
+from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
+from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
+from fixtures.types import TenantId, TimelineId
+from fixtures.utils import wait_until
+from fixtures.workload import Workload
+
+# A tenant configuration that is convenient for generating uploads and deletions
+# without a large amount of postgres traffic.
+TENANT_CONF = {
+    # small checkpointing and compaction targets to ensure we generate many upload operations
+    "checkpoint_distance": f"{128 * 1024}",
+    "compaction_target_size": f"{128 * 1024}",
+    "compaction_threshold": "1",
+    # no PITR horizon, we specify the horizon when we request on-demand GC
+    "pitr_interval": "0s",
+    # disable background compaction and GC. We invoke it manually when we want it to happen.
+    "gc_period": "0s",
+    "compaction_period": "0s",
+    # create image layers eagerly, so that GC can remove some layers
+    "image_creation_threshold": "1",
+}
+
+
+def evict_random_layers(
+    rng: random.Random, pageserver: NeonPageserver, tenant_id: TenantId, timeline_id: TimelineId
+):
+    """
+    Evict 50% of the layers on a pageserver
+    """
+    timeline_path = pageserver.timeline_dir(tenant_id, timeline_id)
+    initial_local_layers = sorted(
+        list(filter(lambda path: path.name != "metadata", timeline_path.glob("*")))
+    )
+    client = pageserver.http_client()
+    for layer in initial_local_layers:
+        if "ephemeral" in layer.name or "temp_download" in layer.name:
+            continue
+
+        if rng.choice([True, False]):
+            log.info(f"Evicting layer {tenant_id}/{timeline_id} {layer.name}")
+            client.evict_layer(tenant_id=tenant_id, timeline_id=timeline_id, layer_name=layer.name)
+
+
+@pytest.mark.parametrize("seed", [1, 2, 3])
+def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, seed: int):
+    """
+    Issue many location configuration changes, ensure that tenants
+    remain readable & we don't get any unexpected errors.  We should
+    have no ERROR in the log, and no 500s in the API.
+
+    The location_config API is intentionally designed so that all destination
+    states are valid, so that we may test it in this way: the API should always
+    work as long as the tenant exists.
+    """
+    neon_env_builder.num_pageservers = 3
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+
+    pageservers = env.pageservers
+    list([p.http_client() for p in pageservers])
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # We will make no effort to avoid stale attachments
+    for ps in env.pageservers:
+        ps.allowed_errors.extend(
+            [
+                ".*Dropped remote consistent LSN updates.*",
+                ".*Dropping stale deletions.*",
+                # page_service_conn_main{peer_addr=[::1]:41176}: query handler for 'pagestream 3b19aec5038c796f64b430b30a555121 d07776761d44050b8aab511df1657d83' failed: Tenant 3b19aec5038c796f64b430b30a555121 not found
+                ".*query handler.*Tenant.*not found.*",
+                # page_service_conn_main{peer_addr=[::1]:45552}: query handler for 'pagestream 414ede7ad50f775a8e7d9ba0e43b9efc a43884be16f44b3626482b6981b2c745' failed: Tenant 414ede7ad50f775a8e7d9ba0e43b9efc is not active
+                ".*query handler.*Tenant.*not active.*",
+            ]
+        )
+
+        # these can happen, if we shutdown at a good time. to be fixed as part of #5172.
+        message = ".*duplicated L1 layer layer=.*"
+        ps.allowed_errors.append(message)
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageservers[0].id)
+    workload.write_rows(256, env.pageservers[0].id)
+
+    # We use a fixed seed to make the test reproducible: we want a randomly
+    # chosen order, but not to change the order every time we run the test.
+    rng = random.Random(seed)
+
+    initial_generation = 1
+    last_state = {
+        env.pageservers[0].id: ("AttachedSingle", initial_generation),
+        env.pageservers[1].id: ("Detached", None),
+        env.pageservers[2].id: ("Detached", None),
+    }
+
+    latest_attached = env.pageservers[0].id
+
+    for _i in range(0, 64):
+        # Pick a pageserver
+        pageserver = rng.choice(env.pageservers)
+
+        # Pick a pseudorandom state
+        modes = [
+            "AttachedSingle",
+            "AttachedMulti",
+            "AttachedStale",
+            "Secondary",
+            "Detached",
+            "_Evictions",
+            "_Restart",
+        ]
+
+        mode = rng.choice(modes)
+
+        last_state_ps = last_state[pageserver.id]
+        if mode == "_Evictions":
+            if last_state_ps[0].startswith("Attached"):
+                log.info(f"Action: evictions on pageserver {pageserver.id}")
+                evict_random_layers(rng, pageserver, tenant_id, timeline_id)
+            else:
+                log.info(
+                    f"Action: skipping evictions on pageserver {pageserver.id}, is not attached"
+                )
+        elif mode == "_Restart":
+            log.info(f"Action: restarting pageserver {pageserver.id}")
+            pageserver.stop()
+            pageserver.start()
+            if last_state_ps[0].startswith("Attached") and latest_attached == pageserver.id:
+                log.info("Entering postgres...")
+                workload.churn_rows(rng.randint(128, 256), pageserver.id)
+                workload.validate(pageserver.id)
+            elif last_state_ps[0].startswith("Attached"):
+                # The `attachment_service` will only re-attach on startup when a pageserver was the
+                # holder of the latest generation: otherwise the pageserver will revert to detached
+                # state if it was running attached with a stale generation
+                last_state[pageserver.id] = ("Detached", None)
+        else:
+            secondary_conf: Optional[Dict[str, Any]] = None
+            if mode == "Secondary":
+                secondary_conf = {"warm": rng.choice([True, False])}
+
+            location_conf: Dict[str, Any] = {
+                "mode": mode,
+                "secondary_conf": secondary_conf,
+                "tenant_conf": {},
+            }
+
+            log.info(f"Action: Configuring pageserver {pageserver.id} to {location_conf}")
+
+            # Select a generation number
+            if mode.startswith("Attached"):
+                if last_state_ps[1] is not None:
+                    if rng.choice([True, False]):
+                        # Move between attached states, staying in the same generation
+                        generation = last_state_ps[1]
+                    else:
+                        # Switch generations, while also jumping between attached states
+                        generation = env.attachment_service.attach_hook_issue(
+                            tenant_id, pageserver.id
+                        )
+                        latest_attached = pageserver.id
+                else:
+                    generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver.id)
+                    latest_attached = pageserver.id
+            else:
+                generation = None
+
+            location_conf["generation"] = generation
+
+            pageserver.tenant_location_configure(tenant_id, location_conf)
+            last_state[pageserver.id] = (mode, generation)
+
+            if mode.startswith("Attached"):
+                # This is a basic test: we are validating that he endpoint works properly _between_
+                # configuration changes.  A stronger test would be to validate that clients see
+                # no errors while we are making the changes.
+                workload.churn_rows(
+                    rng.randint(128, 256), pageserver.id, upload=mode != "AttachedStale"
+                )
+                workload.validate(pageserver.id)
+
+    # Attach all pageservers
+    for ps in env.pageservers:
+        location_conf = {"mode": "AttachedMulti", "secondary_conf": None, "tenant_conf": {}}
+        ps.tenant_location_configure(tenant_id, location_conf)
+
+    # Confirm that all are readable
+    for ps in env.pageservers:
+        workload.validate(ps.id)
+
+    # Detach all pageservers
+    for ps in env.pageservers:
+        location_conf = {"mode": "Detached", "secondary_conf": None, "tenant_conf": {}}
+        ps.tenant_location_configure(tenant_id, location_conf)
+
+    # Confirm that all local disk state was removed on detach
+    # TODO
+
+
+def test_live_migration(neon_env_builder: NeonEnvBuilder):
+    """
+    Test the sequence of location states that are used in a live migration.
+    """
+    neon_env_builder.num_pageservers = 2
+    neon_env_builder.enable_pageserver_remote_storage(
+        remote_storage_kind=RemoteStorageKind.MOCK_S3,
+    )
+    env = neon_env_builder.init_start(initial_tenant_conf=TENANT_CONF)
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    pageserver_a = env.pageservers[0]
+    pageserver_b = env.pageservers[1]
+
+    initial_generation = 1
+
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageservers[0].id)
+    workload.write_rows(256, env.pageservers[0].id)
+
+    # Make the destination a secondary location
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "Secondary",
+            "secondary_conf": {"warm": True},
+            "tenant_conf": {},
+        },
+    )
+
+    workload.churn_rows(64, pageserver_a.id, upload=False)
+
+    # Set origin attachment to stale
+    log.info("Setting origin to AttachedStale")
+    pageserver_a.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedStale",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": initial_generation,
+        },
+        flush_ms=5000,
+    )
+
+    migrated_generation = env.attachment_service.attach_hook_issue(tenant_id, pageserver_b.id)
+    log.info(f"Acquired generation {migrated_generation} for destination pageserver")
+    assert migrated_generation == initial_generation + 1
+
+    # Writes and reads still work in AttachedStale.
+    workload.validate(pageserver_a.id)
+
+    # TODO: call into secondary mode API hooks to do an upload/download sync
+
+    # Generate some more dirty writes: we expect the origin to ingest WAL in
+    # in AttachedStale
+    workload.churn_rows(64, pageserver_a.id, upload=False)
+    workload.validate(pageserver_a.id)
+
+    # Attach the destination
+    log.info("Setting destination to AttachedMulti")
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedMulti",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": migrated_generation,
+        },
+    )
+
+    # Wait for destination LSN to catch up with origin
+    origin_lsn = pageserver_a.http_client().timeline_detail(tenant_id, timeline_id)[
+        "last_record_lsn"
+    ]
+
+    def caught_up():
+        destination_lsn = pageserver_b.http_client().timeline_detail(tenant_id, timeline_id)[
+            "last_record_lsn"
+        ]
+        log.info(
+            f"Waiting for LSN to catch up: origin {origin_lsn} vs destination {destination_lsn}"
+        )
+        assert destination_lsn >= origin_lsn
+
+    wait_until(100, 0.1, caught_up)
+
+    # The destination should accept writes
+    workload.churn_rows(64, pageserver_b.id)
+
+    # Dual attached: both are readable.
+    workload.validate(pageserver_a.id)
+    workload.validate(pageserver_b.id)
+
+    # Revert the origin to secondary
+    log.info("Setting origin to Secondary")
+    pageserver_a.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "Secondary",
+            "secondary_conf": {"warm": True},
+            "tenant_conf": {},
+        },
+    )
+
+    workload.churn_rows(64, pageserver_b.id)
+
+    # Put the destination into final state
+    pageserver_b.tenant_location_configure(
+        tenant_id,
+        {
+            "mode": "AttachedSingle",
+            "secondary_conf": None,
+            "tenant_conf": {},
+            "generation": migrated_generation,
+        },
+    )
+
+    workload.churn_rows(64, pageserver_b.id)
+    workload.validate(pageserver_b.id)
+
+
+def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
+    """
+    Test the sequence of location states that are used in a live migration.
+    """
+    env = neon_env_builder.init_start()  # initial_tenant_conf=TENANT_CONF)
+    assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
+
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
+
+    # Write some data so that we have some layers
+    workload = Workload(env, tenant_id, timeline_id)
+    workload.init(env.pageservers[0].id)
+
+    # Write some layers and upload a heatmap
+    workload.write_rows(256, env.pageservers[0].id)
+    env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
+
+    def validate_heatmap(heatmap):
+        assert len(heatmap["timelines"]) == 1
+        assert heatmap["timelines"][0]["timeline_id"] == str(timeline_id)
+        assert len(heatmap["timelines"][0]["layers"]) > 0
+        layers = heatmap["timelines"][0]["layers"]
+
+        # Each layer appears at most once
+        assert len(set(layer["name"] for layer in layers)) == len(layers)
+
+    # Download and inspect the heatmap that the pageserver uploaded
+    heatmap_first = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    log.info(f"Read back heatmap: {heatmap_first}")
+    validate_heatmap(heatmap_first)
+
+    # Do some more I/O to generate more layers
+    workload.churn_rows(64, env.pageservers[0].id)
+    env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
+
+    # Ensure that another heatmap upload includes the new layers
+    heatmap_second = env.pageserver_remote_storage.heatmap_content(tenant_id)
+    log.info(f"Read back heatmap: {heatmap_second}")
+    assert heatmap_second != heatmap_first
+    validate_heatmap(heatmap_second)
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 0a5046e219..2fda56d0f4 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -60,8 +60,6 @@ def test_remote_storage_backup_and_restore(
 
     neon_env_builder.enable_pageserver_remote_storage(remote_storage_kind)
 
-    neon_env_builder.enable_generations = generations
-
     # Exercise retry code path by making all uploads and downloads fail for the
     # first time. The retries print INFO-messages to the log; we will check
     # that they are present after the test.
@@ -73,19 +71,20 @@ def test_remote_storage_backup_and_restore(
     ##### First start, insert data and upload it to the remote storage
     env = neon_env_builder.init_start()
 
-    # FIXME: Is this expected?
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: Is this expected?
+            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
+            ".*No timelines to attach received.*",
+            ".*Failed to get local tenant state.*",
+            # FIXME retry downloads without throwing errors
+            ".*failed to load remote timeline.*",
+            # we have a bunch of pytest.raises for these below
+            ".*tenant .*? already exists, state:.*",
+            ".*tenant directory already exists.*",
+            ".*simulated failure of remote operation.*",
+        ]
     )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-
-    env.pageserver.allowed_errors.append(".*Failed to get local tenant state.*")
-    # FIXME retry downloads without throwing errors
-    env.pageserver.allowed_errors.append(".*failed to load remote timeline.*")
-    # we have a bunch of pytest.raises for these below
-    env.pageserver.allowed_errors.append(".*tenant .*? already exists, state:.*")
-    env.pageserver.allowed_errors.append(".*tenant directory already exists.*")
-    env.pageserver.allowed_errors.append(".*simulated failure of remote operation.*")
 
     pageserver_http = env.pageserver.http_client()
     endpoint = env.endpoints.create_start("main")
diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py
index 48f5682371..fece876459 100644
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -395,13 +395,13 @@ def test_long_timeline_create_cancelled_by_tenant_delete(neon_env_builder: NeonE
     env.start()
     pageserver_http = env.pageserver.http_client()
 
-    # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
-    env.pageserver.allowed_errors.append(
-        ".*Timeline got dropped without initializing, cleaning its files"
-    )
-    # the response hit_pausable_failpoint_and_later_fail
-    env.pageserver.allowed_errors.append(
-        f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn"
+    env.pageserver.allowed_errors.extend(
+        [
+            # happens with the cancellation bailing flushing loop earlier, leaving disk_consistent_lsn at zero
+            ".*Timeline got dropped without initializing, cleaning its files",
+            # the response hit_pausable_failpoint_and_later_fail
+            f".*Error processing HTTP request: InternalServerError\\(new timeline {env.initial_tenant}/{env.initial_timeline} has invalid disk_consistent_lsn",
+        ]
     )
 
     env.pageserver.tenant_create(env.initial_tenant)
diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py
index 5b63bd6161..0dcbb23ad4 100644
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -307,10 +307,14 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
         bogus_timeline_id = TimelineId.generate()
         pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
 
-    # the error will be printed to the log too
-    env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
-    # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
-    env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")
+    env.pageserver.allowed_errors.extend(
+        [
+            # the error will be printed to the log too
+            ".*gc target timeline does not exist.*",
+            # Timelines get stopped during detach, ignore the gc calls that error, witnessing that
+            ".*InternalServerError\\(timeline is Stopping.*",
+        ]
+    )
 
     # Detach while running manual GC.
     # It should wait for manual GC to finish because it runs in a task associated with the tenant.
diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py
index feacdcc802..dcd7232b1b 100644
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -216,16 +216,17 @@ def test_tenant_relocation(
 
     tenant_id = TenantId("74ee8b079a0e437eb0afea7d26a07209")
 
-    # FIXME: Is this expected?
-    env.pageservers[0].allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageservers[0].allowed_errors.extend(
+        [
+            # FIXME: Is this expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+            # Needed for detach polling on the original pageserver
+            f".*NotFound: tenant {tenant_id}.*",
+            # We will dual-attach in this test, so stale generations are expected
+            ".*Dropped remote consistent LSN updates.*",
+        ]
     )
 
-    # Needed for detach polling on the original pageserver
-    env.pageservers[0].allowed_errors.append(f".*NotFound: tenant {tenant_id}.*")
-    # We will dual-attach in this test, so stale generations are expected
-    env.pageservers[0].allowed_errors.append(".*Dropped remote consistent LSN updates.*")
-
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
     # we use two branches to check that they are both relocated
diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py
index b7b4e2be0b..07fb6dc5ca 100644
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -117,10 +117,12 @@ def test_tenants_attached_after_download(neon_env_builder: NeonEnvBuilder):
     ##### First start, insert secret data and upload it to the remote storage
     env = neon_env_builder.init_start()
 
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
-    env.pageserver.allowed_errors.append(
-        ".*marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            # FIXME: Are these expected?
+            ".*No timelines to attach received.*",
+            ".*marking .* as locally complete, while it doesnt exist in remote index.*",
+        ]
     )
 
     pageserver_http = env.pageserver.http_client()
@@ -218,13 +220,14 @@ def test_tenant_redownloads_truncated_file_on_startup(
 
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
-    env.pageserver.allowed_errors.append(".*removing local file .* because .*")
-
-    # FIXME: Are these expected?
-    env.pageserver.allowed_errors.append(
-        ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*"
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*removing local file .* because .*",
+            # FIXME: Are these expected?
+            ".*init_tenant_mgr: marking .* as locally complete, while it doesnt exist in remote index.*",
+            ".*No timelines to attach received.*",
+        ]
     )
-    env.pageserver.allowed_errors.append(".*No timelines to attach received.*")
 
     pageserver_http = env.pageserver.http_client()
     endpoint = env.endpoints.create_start("main")
diff --git a/test_runner/regress/test_threshold_based_eviction.py b/test_runner/regress/test_threshold_based_eviction.py
index 27d5cce5f2..5f72cfd747 100644
--- a/test_runner/regress/test_threshold_based_eviction.py
+++ b/test_runner/regress/test_threshold_based_eviction.py
@@ -36,12 +36,13 @@ def test_threshold_based_eviction(
         ".*metrics_collection:.* upload consumption_metrics (still failed|failed, will retry).*"
     )
     env = neon_env_builder.init_start()
-    env.pageserver.allowed_errors.append(metrics_refused_log_line)
-
-    # these can happen whenever we run consumption metrics collection
-    env.pageserver.allowed_errors.append(r".*failed to calculate logical size at \S+: cancelled")
-    env.pageserver.allowed_errors.append(
-        r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes"
+    env.pageserver.allowed_errors.extend(
+        [
+            metrics_refused_log_line,
+            # these can happen whenever we run consumption metrics collection
+            r".*failed to calculate logical size at \S+: cancelled",
+            r".*failed to calculate synthetic size for tenant \S+: failed to calculate some logical_sizes",
+        ]
     )
 
     tenant_id, timeline_id = env.initial_tenant, env.initial_timeline
diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py
index 17113a6bc5..82ffcb1177 100644
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -39,10 +39,14 @@ from urllib3.util.retry import Retry
 def test_timeline_delete(neon_simple_env: NeonEnv):
     env = neon_simple_env
 
-    env.pageserver.allowed_errors.append(".*Timeline .* was not found.*")
-    env.pageserver.allowed_errors.append(".*timeline not found.*")
-    env.pageserver.allowed_errors.append(".*Cannot delete timeline which has child timelines.*")
-    env.pageserver.allowed_errors.append(".*Precondition failed: Requested tenant is missing.*")
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*Timeline .* was not found.*",
+            ".*timeline not found.*",
+            ".*Cannot delete timeline which has child timelines.*",
+            ".*Precondition failed: Requested tenant is missing.*",
+        ]
+    )
 
     ps_http = env.pageserver.http_client()
 
@@ -198,22 +202,22 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
             ),
         )
 
-    env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
-    # It appears when we stopped flush loop during deletion and then pageserver is stopped
-    env.pageserver.allowed_errors.append(
-        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+    env.pageserver.allowed_errors.extend(
+        [
+            f".*{timeline_id}.*failpoint: {failpoint}",
+            # It appears when we stopped flush loop during deletion and then pageserver is stopped
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+            # This happens when we fail before scheduling background operation.
+            # Timeline is left in stopping state and retry tries to stop it again.
+            ".*Ignoring new state, equal to the existing one: Stopping",
+            # This happens when we retry delete requests for broken timelines
+            ".*Ignoring state update Stopping for broken timeline",
+            # This happens when timeline remains are cleaned up during loading
+            ".*Timeline dir entry become invalid.*",
+            # In one of the branches we poll for tenant to become active. Polls can generate this log message:
+            f".*Tenant {env.initial_tenant} is not active*",
+        ]
     )
-    # This happens when we fail before scheduling background operation.
-    # Timeline is left in stopping state and retry tries to stop it again.
-    env.pageserver.allowed_errors.append(
-        ".*Ignoring new state, equal to the existing one: Stopping"
-    )
-    # This happens when we retry delete requests for broken timelines
-    env.pageserver.allowed_errors.append(".*Ignoring state update Stopping for broken timeline")
-    # This happens when timeline remains are cleaned up during loading
-    env.pageserver.allowed_errors.append(".*Timeline dir entry become invalid.*")
-    # In one of the branches we poll for tenant to become active. Polls can generate this log message:
-    env.pageserver.allowed_errors.append(f".*Tenant {env.initial_tenant} is not active*")
 
     ps_http.configure_failpoints((failpoint, "return"))
 
@@ -259,15 +263,6 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
                 ps_http, env.initial_tenant, timeline_id, iterations=iterations
             )
 
-            if failpoint == "timeline-delete-after-index-delete":
-                m = ps_http.get_metrics()
-                assert (
-                    m.query_one(
-                        "remote_storage_s3_request_seconds_count",
-                        filter={"request_type": "get_object", "result": "ok"},
-                    ).value
-                    == 1  # index part for initial timeline
-                )
     elif check is Check.RETRY_WITHOUT_RESTART:
         # this should succeed
         # this also checks that delete can be retried even when timeline is in Broken state
@@ -398,13 +393,13 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
 
     env = neon_env_builder.init_start()
 
-    env.pageserver.allowed_errors.append(".*failpoint: timeline-delete-before-rm")
-    env.pageserver.allowed_errors.append(
-        ".*Ignoring new state, equal to the existing one: Stopping"
-    )
-    # this happens, because the stuck timeline is visible to shutdown
-    env.pageserver.allowed_errors.append(
-        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+    env.pageserver.allowed_errors.extend(
+        [
+            ".*failpoint: timeline-delete-before-rm",
+            ".*Ignoring new state, equal to the existing one: Stopping",
+            # this happens, because the stuck timeline is visible to shutdown
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+        ]
     )
 
     ps_http = env.pageserver.http_client()
@@ -551,10 +546,12 @@ def test_concurrent_timeline_delete_stuck_on(
         with pytest.raises(PageserverApiException, match=error_msg_re) as second_call_err:
             ps_http.timeline_delete(env.initial_tenant, child_timeline_id)
         assert second_call_err.value.status_code == 409
-        env.pageserver.allowed_errors.append(f".*{child_timeline_id}.*{error_msg_re}.*")
-        # the second call will try to transition the timeline into Stopping state as well
-        env.pageserver.allowed_errors.append(
-            f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping"
+        env.pageserver.allowed_errors.extend(
+            [
+                f".*{child_timeline_id}.*{error_msg_re}.*",
+                # the second call will try to transition the timeline into Stopping state as well
+                f".*{child_timeline_id}.*Ignoring new state, equal to the existing one: Stopping",
+            ]
         )
         log.info("second call failed as expected")
 
diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py
index 24cbe34457..6e510b2eba 100644
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -300,7 +300,8 @@ def test_timeline_initial_logical_size_calculation_cancellation(
     env = neon_env_builder.init_start()
     client = env.pageserver.http_client()
 
-    tenant_id, timeline_id = env.neon_cli.create_tenant()
+    tenant_id = env.initial_tenant
+    timeline_id = env.initial_timeline
 
     # load in some data
     endpoint = env.endpoints.create_start("main", tenant_id=tenant_id)
@@ -732,3 +733,142 @@ def wait_for_timeline_size_init(
     raise Exception(
         f"timed out while waiting for current_logical_size of a timeline to reach its non-incremental value, details: {timeline_details}"
     )
+
+
+def test_ondemand_activation(neon_env_builder: NeonEnvBuilder):
+    """
+    Tenants warmuping up opportunistically will wait for one another's logical size calculations to complete
+    before proceeding.  However, they skip this if a client is actively trying to access them.
+
+    This test is not purely about logical sizes, but logical size calculation is the phase that we
+    use as a proxy for "warming up" in this test: it happens within the semaphore guard used
+    to limit concurrent tenant warm-up.
+    """
+
+    # We will run with the limit set to 1, so that once we have one tenant stuck
+    # in a pausable failpoint, the rest are prevented from proceeding through warmup.
+    neon_env_builder.pageserver_config_override = "concurrent_tenant_warmup = '1'"
+
+    env = neon_env_builder.init_start()
+    pageserver_http = env.pageserver.http_client()
+
+    # Create some tenants
+    n_tenants = 10
+    tenant_ids = {env.initial_tenant}
+    for _i in range(0, n_tenants - 1):
+        tenant_id = TenantId.generate()
+        env.pageserver.tenant_create(tenant_id)
+
+        # Empty tenants are not subject to waiting for logical size calculations, because
+        # those hapen on timeline level
+        timeline_id = TimelineId.generate()
+        env.neon_cli.create_timeline(
+            new_branch_name="main", tenant_id=tenant_id, timeline_id=timeline_id
+        )
+
+        tenant_ids.add(tenant_id)
+
+    # Restart pageserver with logical size calculations paused
+    env.pageserver.stop()
+    env.pageserver.start(
+        extra_env_vars={"FAILPOINTS": "timeline-calculate-logical-size-pause=pause"}
+    )
+
+    def get_tenant_states():
+        states = {}
+        for tenant_id in tenant_ids:
+            tenant = pageserver_http.tenant_status(tenant_id=tenant_id)
+            states[tenant_id] = tenant["state"]["slug"]
+        log.info(f"Tenant states: {states}")
+        return states
+
+    def at_least_one_active():
+        assert "Active" in set(get_tenant_states().values())
+
+    # One tenant should activate, then get stuck in their logical size calculation
+    wait_until(10, 1, at_least_one_active)
+
+    # Wait some walltime to gain confidence that other tenants really are stuck and not proceeding to activate
+    time.sleep(5)
+
+    # We should see one tenant win the activation race, and enter logical size calculation.  The rest
+    # will stay in Attaching state, waiting for the "warmup_limit" semaphore
+    expect_activated = 1
+    states = get_tenant_states()
+    assert len([s for s in states.values() if s == "Active"]) == expect_activated
+    assert len([s for s in states.values() if s == "Attaching"]) == n_tenants - expect_activated
+
+    assert (
+        pageserver_http.get_metric_value("pageserver_tenant_startup_scheduled_total") == n_tenants
+    )
+
+    # This is zero, and subsequent checks are expect_activated - 1, because this counter does not
+    # count how may tenants are Active, it counts how many have finished warmup.  The first tenant
+    # that reached Active is still stuck in its local size calculation, and has therefore not finished warmup.
+    assert pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total") == 0
+
+    # If a client accesses one of the blocked tenants, it should skip waiting for warmup and
+    # go active as fast as it can.
+    stuck_tenant_id = list(
+        [(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
+    )[0][0]
+
+    endpoint = env.endpoints.create_start(branch_name="main", tenant_id=stuck_tenant_id)
+    endpoint.safe_psql_many(
+        [
+            "CREATE TABLE foo (x INTEGER)",
+            "INSERT INTO foo SELECT g FROM generate_series(1, 10) g",
+        ]
+    )
+    endpoint.stop()
+
+    # That one that we successfully accessed is now Active
+    expect_activated += 1
+    assert pageserver_http.tenant_status(tenant_id=stuck_tenant_id)["state"]["slug"] == "Active"
+    assert (
+        pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total")
+        == expect_activated - 1
+    )
+
+    # The ones we didn't touch are still in Attaching
+    assert (
+        len([s for s in get_tenant_states().values() if s == "Attaching"])
+        == n_tenants - expect_activated
+    )
+
+    # Timeline creation operations also wake up Attaching tenants
+    stuck_tenant_id = list(
+        [(tid, s) for (tid, s) in get_tenant_states().items() if s == "Attaching"]
+    )[0][0]
+    pageserver_http.timeline_create(env.pg_version, stuck_tenant_id, TimelineId.generate())
+    expect_activated += 1
+    assert pageserver_http.tenant_status(tenant_id=stuck_tenant_id)["state"]["slug"] == "Active"
+    assert (
+        len([s for s in get_tenant_states().values() if s == "Attaching"])
+        == n_tenants - expect_activated
+    )
+
+    assert (
+        pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total")
+        == expect_activated - 1
+    )
+
+    # When we unblock logical size calculation, all tenants should proceed to active state via
+    # the warmup route.
+    pageserver_http.configure_failpoints(("timeline-calculate-logical-size-pause", "off"))
+
+    def all_active():
+        assert all(s == "Active" for s in get_tenant_states().values())
+
+    wait_until(10, 1, all_active)
+
+    # Final control check: restarting with no failpoints at all results in all tenants coming active
+    # without being prompted by client I/O
+    env.pageserver.stop()
+    env.pageserver.start()
+    wait_until(10, 1, all_active)
+
+    assert (
+        pageserver_http.get_metric_value("pageserver_tenant_startup_scheduled_total") == n_tenants
+    )
+    assert pageserver_http.get_metric_value("pageserver_tenant_startup_complete_total") == n_tenants
diff --git a/test_runner/regress/test_wal_restore.py b/test_runner/regress/test_wal_restore.py
index 4a9ffeee4b..7d03f644d1 100644
--- a/test_runner/regress/test_wal_restore.py
+++ b/test_runner/regress/test_wal_restore.py
@@ -1,7 +1,6 @@
 import sys
 import tarfile
 import tempfile
-import time
 from pathlib import Path
 
 import pytest
@@ -12,6 +11,7 @@ from fixtures.neon_fixtures import (
     PgBin,
     VanillaPostgres,
 )
+from fixtures.pageserver.utils import timeline_delete_wait_completed
 from fixtures.port_distributor import PortDistributor
 from fixtures.remote_storage import LocalFsStorage
 from fixtures.types import Lsn, TenantId, TimelineId
@@ -128,10 +128,7 @@ def test_wal_restore_initdb(
         assert restored.safe_psql("select count(*) from t", user="cloud_admin") == [(300000,)]
 
 
-def test_wal_restore_http(
-    neon_env_builder: NeonEnvBuilder,
-    test_output_dir: Path,
-):
+def test_wal_restore_http(neon_env_builder: NeonEnvBuilder):
     env = neon_env_builder.init_start()
     endpoint = env.endpoints.create_start("main")
     endpoint.safe_psql("create table t as select generate_series(1,300000)")
@@ -145,15 +142,7 @@ def test_wal_restore_http(
 
     assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
 
-    test_output_dir / "initdb.tar.zst"
-
-    (env.pageserver_remote_storage.timeline_path(tenant_id, timeline_id) / "initdb.tar.zst")
-
-    ps_client.timeline_delete(tenant_id, timeline_id)
-    time.sleep(2)
-
-    # verify that it is indeed deleted
-    # TODO
+    timeline_delete_wait_completed(ps_client, tenant_id, timeline_id)
 
     # issue the restoration command
     ps_client.timeline_create(
diff --git a/vendor/postgres-v14 b/vendor/postgres-v14
index dd067cf656..0bb356aa0c 160000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
@@ -1 +1 @@
-Subproject commit dd067cf656f6810a25aca6025633d32d02c5085a
+Subproject commit 0bb356aa0cd1582112926fbcf0b5370222c2db6d
diff --git a/vendor/postgres-v15 b/vendor/postgres-v15
index bc88f53931..24333abb81 160000
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
@@ -1 +1 @@
-Subproject commit bc88f539312fcc4bb292ce94ae9db09ab6656e8a
+Subproject commit 24333abb81a9ecae4541019478f0bf7d0b289df7
diff --git a/vendor/postgres-v16 b/vendor/postgres-v16
index e3a22b7292..863b71572b 160000
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
@@ -1 +1 @@
-Subproject commit e3a22b72922055f9212eca12700190f118578362
+Subproject commit 863b71572bc441581efb3bbee2ad18af037be1bb
diff --git a/vendor/revisions.json b/vendor/revisions.json
index c4cea208ee..a9575a2cb7 100644
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,5 +1,5 @@
 {
-    "postgres-v16": "e3a22b72922055f9212eca12700190f118578362",
-    "postgres-v15": "bc88f539312fcc4bb292ce94ae9db09ab6656e8a",
-    "postgres-v14": "dd067cf656f6810a25aca6025633d32d02c5085a"
+    "postgres-v16": "863b71572bc441581efb3bbee2ad18af037be1bb",
+    "postgres-v15": "24333abb81a9ecae4541019478f0bf7d0b289df7",
+    "postgres-v14": "0bb356aa0cd1582112926fbcf0b5370222c2db6d"
 }
diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml
index 6f0ebe5f66..804405293f 100644
--- a/vm-image-spec.yaml
+++ b/vm-image-spec.yaml
@@ -34,7 +34,7 @@ files:
       server_tls_sslmode=disable
       pool_mode=transaction
       max_client_conn=10000
-      default_pool_size=16
+      default_pool_size=64
       max_prepared_statements=0
   - filename: cgconfig.conf
     content: |
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 4621a75c0b..82bbedc4ae 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -56,7 +56,7 @@ regex = { version = "1" }
 regex-automata = { version = "0.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] }
 regex-syntax = { version = "0.8" }
 reqwest = { version = "0.11", default-features = false, features = ["blocking", "default-tls", "json", "multipart", "rustls-tls", "stream"] }
-ring = { version = "0.16", features = ["std"] }
+ring = { version = "0.16" }
 rustls = { version = "0.21", features = ["dangerous_configuration"] }
 scopeguard = { version = "1" }
 serde = { version = "1", features = ["alloc", "derive"] }
@@ -75,8 +75,8 @@ tracing-core = { version = "0.1" }
 tungstenite = { version = "0.20" }
 url = { version = "2", features = ["serde"] }
 uuid = { version = "1", features = ["serde", "v4"] }
-zstd = { version = "0.12" }
-zstd-safe = { version = "6", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] }
+zstd = { version = "0.13" }
+zstd-safe = { version = "7", default-features = false, features = ["arrays", "legacy", "std", "zdict_builder"] }
 zstd-sys = { version = "2", default-features = false, features = ["legacy", "std", "zdict_builder"] }
 
 [build-dependencies]