pageserver: drop layers after shard split in GC

2026-06-03 13:30:38 +00:00 · 2024-04-30 11:29:14 +01:00
128 changed files with 1348 additions and 4672 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -595,7 +595,7 @@ dependencies = [
 "http 0.2.9",
 "http-body 0.4.5",
 "hyper 0.14.26",
- "hyper-rustls 0.24.0",
+ "hyper-rustls",
 "once_cell",
 "pin-project-lite",
 "pin-utils",
@@ -684,7 +684,7 @@ dependencies = [
 "http-body 0.4.5",
 "hyper 0.14.26",
 "itoa",
- "matchit 0.7.0",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -740,7 +740,7 @@ dependencies = [
 "pin-project",
 "quick-xml",
 "rand 0.8.5",
- "reqwest 0.11.19",
+ "reqwest",
 "rustc_version",
 "serde",
 "serde_json",
@@ -865,12 +865,6 @@ version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105"

-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
 [[package]]
 name = "base64-simd"
 version = "0.8.0"
@@ -1216,7 +1210,7 @@ dependencies = [
 "postgres",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "rust-ini",
 "serde",
 "serde_json",
@@ -1325,7 +1319,6 @@ dependencies = [
 "git-version",
 "hex",
 "humantime",
- "humantime-serde",
 "hyper 0.14.26",
 "nix 0.27.1",
 "once_cell",
@@ -1335,7 +1328,7 @@ dependencies = [
 "postgres_backend",
 "postgres_connection",
 "regex",
- "reqwest 0.12.4",
+ "reqwest",
 "safekeeper_api",
 "scopeguard",
 "serde",
@@ -2369,17 +2362,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "hostname"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
-dependencies = [
- "cfg-if",
- "libc",
- "windows 0.52.0",
-]
-
 [[package]]
 name = "http"
 version = "0.2.9"
@@ -2526,7 +2508,6 @@ dependencies = [
 "pin-project-lite",
 "smallvec",
 "tokio",
- "want",
 ]

 [[package]]
@@ -2544,23 +2525,6 @@ dependencies = [
 "tokio-rustls 0.24.0",
 ]

-[[package]]
-name = "hyper-rustls"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
-dependencies = [
- "futures-util",
- "http 1.1.0",
- "hyper 1.2.0",
- "hyper-util",
- "rustls 0.22.4",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls 0.25.0",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-timeout"
 version = "0.4.1"
@@ -2608,7 +2572,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
 dependencies = [
 "bytes",
- "futures-channel",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
@@ -2616,9 +2579,6 @@ dependencies = [
 "pin-project-lite",
 "socket2 0.5.5",
 "tokio",
- "tower",
- "tower-service",
- "tracing",
 ]

 [[package]]
@@ -2632,7 +2592,7 @@ dependencies = [
 "iana-time-zone-haiku",
 "js-sys",
 "wasm-bindgen",
- "windows 0.48.0",
+ "windows",
 ]

 [[package]]
@@ -2955,12 +2915,6 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"

-[[package]]
-name = "matchit"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
-
 [[package]]
 name = "md-5"
 version = "0.10.5"
@@ -3094,6 +3048,16 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"

+[[package]]
+name = "mime_guess"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -3220,16 +3184,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "nu-ansi-term"
-version = "0.46.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
-dependencies = [
- "overload",
- "winapi",
-]
-
 [[package]]
 name = "num"
 version = "0.4.1"
@@ -3437,7 +3391,7 @@ dependencies = [
 "bytes",
 "http 0.2.9",
 "opentelemetry_api",
- "reqwest 0.11.19",
+ "reqwest",
 ]

 [[package]]
@@ -3455,7 +3409,7 @@ dependencies = [
 "opentelemetry_api",
 "opentelemetry_sdk",
 "prost",
- "reqwest 0.11.19",
+ "reqwest",
 "thiserror",
 "tokio",
 "tonic",
@@ -3566,12 +3520,6 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"

-[[package]]
-name = "overload"
-version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
-
 [[package]]
 name = "p256"
 version = "0.11.1"
@@ -3684,7 +3632,7 @@ dependencies = [
 "rand 0.8.5",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "rpds",
 "scopeguard",
 "serde",
@@ -3754,7 +3702,7 @@ dependencies = [
 "futures",
 "pageserver_api",
 "postgres",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "thiserror",
 "tokio",
@@ -4363,7 +4311,7 @@ dependencies = [
 "hashlink",
 "hex",
 "hmac",
- "hostname 0.3.1",
+ "hostname",
 "http 1.1.0",
 "http-body-util",
 "humantime",
@@ -4396,7 +4344,7 @@ dependencies = [
 "redis",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
 "reqwest-retry",
 "reqwest-tracing",
@@ -4423,7 +4371,6 @@ dependencies = [
 "tokio-postgres-rustls",
 "tokio-rustls 0.25.0",
 "tokio-util",
- "tower-service",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -4714,7 +4661,6 @@ dependencies = [
 "scopeguard",
 "serde",
 "serde_json",
- "sync_wrapper",
 "test-context",
 "tokio",
 "tokio-stream",
@@ -4740,106 +4686,69 @@ dependencies = [
 "http 0.2.9",
 "http-body 0.4.5",
 "hyper 0.14.26",
+ "hyper-rustls",
 "hyper-tls",
 "ipnet",
 "js-sys",
 "log",
 "mime",
+ "mime_guess",
 "native-tls",
 "once_cell",
 "percent-encoding",
 "pin-project-lite",
+ "rustls 0.21.11",
+ "rustls-pemfile 1.0.2",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "tokio",
 "tokio-native-tls",
+ "tokio-rustls 0.24.0",
 "tokio-util",
 "tower-service",
 "url",
 "wasm-bindgen",
 "wasm-bindgen-futures",
- "wasm-streams 0.3.0",
+ "wasm-streams",
 "web-sys",
- "winreg 0.50.0",
-]
-
-[[package]]
-name = "reqwest"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10"
-dependencies = [
- "base64 0.22.1",
- "bytes",
- "futures-channel",
- "futures-core",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "hyper 1.2.0",
- "hyper-rustls 0.26.0",
- "hyper-util",
- "ipnet",
- "js-sys",
- "log",
- "mime",
- "once_cell",
- "percent-encoding",
- "pin-project-lite",
- "rustls 0.22.4",
- "rustls-pemfile 2.1.1",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "sync_wrapper",
- "tokio",
- "tokio-rustls 0.25.0",
- "tokio-util",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "wasm-streams 0.4.0",
- "web-sys",
- "webpki-roots 0.26.1",
- "winreg 0.52.0",
+ "webpki-roots 0.25.2",
+ "winreg",
 ]

 [[package]]
 name = "reqwest-middleware"
-version = "0.3.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0209efb52486ad88136190094ee214759ef7507068b27992256ed6610eb71a01"
+checksum = "4531c89d50effe1fac90d095c8b133c20c5c714204feee0bfc3fd158e784209d"
 dependencies = [
 "anyhow",
 "async-trait",
- "http 1.1.0",
- "reqwest 0.12.4",
+ "http 0.2.9",
+ "reqwest",
 "serde",
+ "task-local-extensions",
 "thiserror",
- "tower-service",
 ]

 [[package]]
 name = "reqwest-retry"
-version = "0.5.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40f342894422862af74c50e1e9601cf0931accc9c6981e5eb413c46603b616b5"
+checksum = "48d0fd6ef4c6d23790399fe15efc8d12cd9f3d4133958f9bd7801ee5cbaec6c4"
 dependencies = [
 "anyhow",
 "async-trait",
 "chrono",
 "futures",
 "getrandom 0.2.11",
- "http 1.1.0",
- "hyper 1.2.0",
+ "http 0.2.9",
+ "hyper 0.14.26",
 "parking_lot 0.11.2",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
 "retry-policies",
+ "task-local-extensions",
 "tokio",
 "tracing",
 "wasm-timer",
@@ -4847,27 +4756,27 @@ dependencies = [

 [[package]]
 name = "reqwest-tracing"
-version = "0.5.0"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b253954a1979e02eabccd7e9c3d61d8f86576108baa160775e7f160bb4e800a3"
+checksum = "5a0152176687dd5cfe7f507ac1cb1a491c679cfe483afd133a7db7aaea818bb3"
 dependencies = [
 "anyhow",
 "async-trait",
 "getrandom 0.2.11",
- "http 1.1.0",
- "matchit 0.8.2",
+ "matchit",
 "opentelemetry",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
+ "task-local-extensions",
 "tracing",
 "tracing-opentelemetry",
 ]

 [[package]]
 name = "retry-policies"
-version = "0.3.0"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "493b4243e32d6eedd29f9a398896e35c6943a123b55eec97dcaee98310d25810"
+checksum = "e09bbcb5003282bcb688f0bae741b278e9c7e8f378f561522c9806c58e075d9b"
 dependencies = [
 "anyhow",
 "chrono",
@@ -5186,23 +5095,18 @@ dependencies = [
 "hex",
 "histogram",
 "itertools",
- "native-tls",
 "pageserver",
 "pageserver_api",
- "postgres-native-tls",
- "postgres_ffi",
 "rand 0.8.5",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "serde_json",
 "serde_with",
 "thiserror",
 "tokio",
- "tokio-postgres",
 "tokio-rustls 0.25.0",
 "tokio-stream",
- "tokio-util",
 "tracing",
 "tracing-appender",
 "tracing-subscriber",
@@ -5244,7 +5148,7 @@ dependencies = [
 "rand 0.8.5",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "safekeeper_api",
 "scopeguard",
 "sd-notify",
@@ -5374,12 +5278,12 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"

 [[package]]
 name = "sentry"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00421ed8fa0c995f07cde48ba6c89e80f2b312f74ff637326f392fbfd23abe02"
+checksum = "2e95efd0cefa32028cdb9766c96de71d96671072f9fb494dc9fb84c0ef93e52b"
 dependencies = [
 "httpdate",
- "reqwest 0.12.4",
+ "reqwest",
 "rustls 0.21.11",
 "sentry-backtrace",
 "sentry-contexts",
@@ -5393,9 +5297,9 @@ dependencies = [

 [[package]]
 name = "sentry-backtrace"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a79194074f34b0cbe5dd33896e5928bbc6ab63a889bd9df2264af5acb186921e"
+checksum = "6ac2bac6f310c4c4c4bb094d1541d32ae497f8c5c23405e85492cefdfe0971a9"
 dependencies = [
 "backtrace",
 "once_cell",
@@ -5405,11 +5309,11 @@ dependencies = [

 [[package]]
 name = "sentry-contexts"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
+checksum = "6c3e17295cecdbacf66c5bd38d6e1147e09e1e9d824d2d5341f76638eda02a3a"
 dependencies = [
- "hostname 0.4.0",
+ "hostname",
 "libc",
 "os_info",
 "rustc_version",
@@ -5419,9 +5323,9 @@ dependencies = [

 [[package]]
 name = "sentry-core"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46a75011ea1c0d5c46e9e57df03ce81f5c7f0a9e199086334a1f9c0a541e0826"
+checksum = "8339474f587f36cb110fa1ed1b64229eea6d47b0b886375579297b7e47aeb055"
 dependencies = [
 "once_cell",
 "rand 0.8.5",
@@ -5432,9 +5336,9 @@ dependencies = [

 [[package]]
 name = "sentry-panic"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2eaa3ecfa3c8750c78dcfd4637cfa2598b95b52897ed184b4dc77fcf7d95060d"
+checksum = "875b69f506da75bd664029eafb05f8934297d2990192896d17325f066bd665b7"
 dependencies = [
 "sentry-backtrace",
 "sentry-core",
@@ -5442,9 +5346,9 @@ dependencies = [

 [[package]]
 name = "sentry-tracing"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f715932bf369a61b7256687c6f0554141b7ce097287e30e3f7ed6e9de82498fe"
+checksum = "89feead9bdd116f8035e89567651340fc382db29240b6c55ef412078b08d1aa3"
 dependencies = [
 "sentry-backtrace",
 "sentry-core",
@@ -5454,13 +5358,13 @@ dependencies = [

 [[package]]
 name = "sentry-types"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4519c900ce734f7a0eb7aba0869dfb225a7af8820634a7dd51449e3b093cfb7c"
+checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
 dependencies = [
 "debugid",
+ "getrandom 0.2.11",
 "hex",
- "rand 0.8.5",
 "serde",
 "serde_json",
 "thiserror",
@@ -5852,12 +5756,10 @@ dependencies = [
 "pageserver_client",
 "postgres_connection",
 "r2d2",
- "reqwest 0.12.4",
+ "reqwest",
 "routerify",
 "serde",
 "serde_json",
- "strum",
- "strum_macros",
 "thiserror",
 "tokio",
 "tokio-util",
@@ -5876,7 +5778,7 @@ dependencies = [
 "hyper 0.14.26",
 "pageserver_api",
 "pageserver_client",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "serde_json",
 "thiserror",
@@ -5930,7 +5832,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 [[package]]
 name = "svg_fmt"
 version = "0.4.2"
-source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#c1820b28664b5df68de7f043fccf2ed5d67b6ae8"
+source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#b9501105e746629004bc6d0473639320939dbe10"

 [[package]]
 name = "syn"
@@ -5959,9 +5861,6 @@ name = "sync_wrapper"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
-dependencies = [
- "futures-core",
-]

 [[package]]
 name = "synstructure"
@@ -6514,11 +6413,10 @@ dependencies = [

 [[package]]
 name = "tracing"
-version = "0.1.37"
+version = "0.1.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
 dependencies = [
- "cfg-if",
 "log",
 "pin-project-lite",
 "tracing-attributes",
@@ -6538,9 +6436,9 @@ dependencies = [

 [[package]]
 name = "tracing-attributes"
-version = "0.1.24"
+version = "0.1.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6549,9 +6447,9 @@ dependencies = [

 [[package]]
 name = "tracing-core"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
 dependencies = [
 "once_cell",
 "valuable",
@@ -6580,14 +6478,12 @@ dependencies = [

 [[package]]
 name = "tracing-opentelemetry"
-version = "0.21.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75327c6b667828ddc28f5e3f169036cb793c3f588d83bf0f262a7f062ffed3c8"
+checksum = "fc09e402904a5261e42cf27aea09ccb7d5318c6717a9eec3d8e2e65c56b18f19"
 dependencies = [
 "once_cell",
 "opentelemetry",
- "opentelemetry_sdk",
- "smallvec",
 "tracing",
 "tracing-core",
 "tracing-log",
@@ -6611,7 +6507,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "30a651bc37f915e81f087d86e62a18eec5f79550c7faff886f7090b4ea757c77"
 dependencies = [
 "matchers",
- "nu-ansi-term",
 "once_cell",
 "regex",
 "serde",
@@ -6633,7 +6528,7 @@ dependencies = [
 "opentelemetry",
 "opentelemetry-otlp",
 "opentelemetry-semantic-conventions",
- "reqwest 0.12.4",
+ "reqwest",
 "tokio",
 "tracing",
 "tracing-opentelemetry",
@@ -6719,6 +6614,15 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.13"
@@ -7077,19 +6981,6 @@ dependencies = [
 "web-sys",
 ]

-[[package]]
-name = "wasm-streams"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
 [[package]]
 name = "wasm-timer"
 version = "0.2.5"
@@ -7130,15 +7021,6 @@ version = "0.25.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"

-[[package]]
-name = "webpki-roots"
-version = "0.26.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009"
-dependencies = [
- "rustls-pki-types",
-]
-
 [[package]]
 name = "which"
 version = "4.4.0"
@@ -7190,25 +7072,6 @@ dependencies = [
 "windows-targets 0.48.0",
 ]

-[[package]]
-name = "windows"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
-dependencies = [
- "windows-core",
- "windows-targets 0.52.4",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
-dependencies = [
- "windows-targets 0.52.4",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.42.0"
@@ -7441,16 +7304,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "winreg"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "workspace_hack"
 version = "0.1.0"
@@ -7500,8 +7353,7 @@ dependencies = [
 "regex",
 "regex-automata 0.4.3",
 "regex-syntax 0.8.2",
- "reqwest 0.11.19",
- "reqwest 0.12.4",
+ "reqwest",
 "rustls 0.21.11",
 "scopeguard",
 "serde",
@@ -7511,7 +7363,6 @@ dependencies = [
 "subtle",
 "syn 1.0.109",
 "syn 2.0.52",
- "sync_wrapper",
 "time",
 "time-macros",
 "tokio",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -130,10 +130,10 @@ prost = "0.11"
 rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] }
-reqwest-middleware = "0.3.0"
-reqwest-retry = "0.5"
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
+reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
+reqwest-middleware = "0.2.0"
+reqwest-retry = "0.2.2"
 routerify = "3"
 rpds = "0.13"
 rustc-hash = "1.1.0"
@@ -143,7 +143,7 @@ rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
-sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
+sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_path_to_error = "0.1"
@@ -177,11 +177,10 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
-tower-service = "0.3.2"
 tracing = "0.1"
 tracing-error = "0.2.0"
-tracing-opentelemetry = "0.21.0"
-tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
+tracing-opentelemetry = "0.20.0"
+tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json"] }
 twox-hash = { version = "1.6.3", default-features = false }
 url = "2.2"
 urlencoding = "2.1"
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -65,7 +65,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
    && mv s5cmd /usr/local/bin/s5cmd

 # LLVM
-ENV LLVM_VERSION=18
+ENV LLVM_VERSION=17
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
    && echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
    && apt update \
@@ -141,7 +141,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.78.0
+ENV RUSTC_VERSION=1.77.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
--- a/18
+++ b/18
@@ -25,16 +25,14 @@ ifeq ($(UNAME_S),Linux)
 	# Seccomp BPF is only available for Linux
 	PG_CONFIGURE_OPTS += --with-libseccomp
 else ifeq ($(UNAME_S),Darwin)
-	ifndef DISABLE_HOMEBREW
-		# macOS with brew-installed openssl requires explicit paths
-		# It can be configured with OPENSSL_PREFIX variable
-		OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
-		PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
-		PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
-		# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
-		# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
-		EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
-	endif
+	# macOS with brew-installed openssl requires explicit paths
+	# It can be configured with OPENSSL_PREFIX variable
+	OPENSSL_PREFIX ?= $(shell brew --prefix openssl@3)
+	PG_CONFIGURE_OPTS += --with-includes=$(OPENSSL_PREFIX)/include --with-libraries=$(OPENSSL_PREFIX)/lib
+	PG_CONFIGURE_OPTS += PKG_CONFIG_PATH=$(shell brew --prefix icu4c)/lib/pkgconfig
+	# macOS already has bison and flex in the system, but they are old and result in postgres-v14 target failure
+	# brew formulae are keg-only and not symlinked into HOMEBREW_PREFIX, force their usage
+	EXTRA_PATH_OVERRIDES += $(shell brew --prefix bison)/bin/:$(shell brew --prefix flex)/bin/:
 endif

 # Use -C option so that when PostgreSQL "make install" installs the
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -47,7 +47,7 @@ use chrono::Utc;
 use clap::Arg;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
+use tracing::{error, info};
 use url::Url;

 use compute_api::responses::ComputeStatus;
@@ -62,7 +62,6 @@ use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
@@ -111,7 +110,6 @@ fn main() -> Result<()> {
        .expect("Postgres connection string is required");
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");
-    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");

    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
@@ -228,14 +226,14 @@ fn main() -> Result<()> {

    // If this is a pooled VM, prewarm before starting HTTP server and becoming
    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
+    // because QEMU will already have it's memory allocated from the host, and
    // the necessary binaries will already be cached.
    if !spec_set {
        compute.prewarm_postgres()?;
    }

-    // Launch http service first, so that we can serve control-plane requests
-    // while configuration is still in progress.
+    // Launch http service first, so we were able to serve control-plane
+    // requests, while configuration is still in progress.
    let _http_handle =
        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");

@@ -255,22 +253,21 @@ fn main() -> Result<()> {
                break;
            }
        }
-
-        // Record for how long we slept waiting for the spec.
-        let now = Utc::now();
-        state.metrics.wait_for_spec_ms = now
-            .signed_duration_since(state.start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
-
-        // Reset start time, so that the total startup time that is calculated later will
-        // not include the time that we waited for the spec.
-        state.start_time = now;
    }

    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
+
+    // Record for how long we slept waiting for the spec.
+    state.metrics.wait_for_spec_ms = Utc::now()
+        .signed_duration_since(state.start_time)
+        .to_std()
+        .unwrap()
+        .as_millis() as u64;
+    // Reset start time to the actual start of the configuration, so that
+    // total startup time was properly measured at the end.
+    state.start_time = Utc::now();
+
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();

@@ -278,72 +275,33 @@ fn main() -> Result<()> {
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
-    // before we release the mutex, fetch the swap size (if any) for later.
-    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
    drop(state);

    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute);
    let _configurator_handle = launch_configurator(&compute);

-    let mut prestartup_failed = false;
-    let mut delay_exit = false;
-
-    // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
-        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
-        // *before* starting postgres.
-        //
-        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
-        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
-        // OOM-killed during startup because swap wasn't available yet.
-        match resize_swap(size_bytes) {
-            Ok(()) => {
-                let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_gib, "resized swap");
-            }
-            Err(err) => {
-                let err = err.context("failed to resize swap");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{err:?}"));
-                state.status = ComputeStatus::Failed;
-                compute.state_changed.notify_all();
-                delay_exit = true;
-            }
-        }
-    }
-
    // Start Postgres
-    let mut pg = None;
+    let mut delay_exit = false;
    let mut exit_code = None;
-
-    if !prestartup_failed {
-        pg = match compute.start_compute(extension_server_port) {
-            Ok(pg) => Some(pg),
-            Err(err) => {
-                error!("could not start the compute node: {:#}", err);
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{:?}", err));
-                state.status = ComputeStatus::Failed;
-                // Notify others that Postgres failed to start. In case of configuring the
-                // empty compute, it's likely that API handler is still waiting for compute
-                // state change. With this we will notify it that compute is in Failed state,
-                // so control plane will know about it earlier and record proper error instead
-                // of timeout.
-                compute.state_changed.notify_all();
-                drop(state); // unlock
-                delay_exit = true;
-                None
-            }
-        };
-    } else {
-        warn!("skipping postgres startup because pre-startup step failed");
-    }
+    let pg = match compute.start_compute(extension_server_port) {
+        Ok(pg) => Some(pg),
+        Err(err) => {
+            error!("could not start the compute node: {:#}", err);
+            let mut state = compute.state.lock().unwrap();
+            state.error = Some(format!("{:?}", err));
+            state.status = ComputeStatus::Failed;
+            // Notify others that Postgres failed to start. In case of configuring the
+            // empty compute, it's likely that API handler is still waiting for compute
+            // state change. With this we will notify it that compute is in Failed state,
+            // so control plane will know about it earlier and record proper error instead
+            // of timeout.
+            compute.state_changed.notify_all();
+            drop(state); // unlock
+            delay_exit = true;
+            None
+        }
+    };

    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
    // because it requires cgroups.
@@ -568,11 +526,6 @@ fn cli() -> clap::Command {
                )
                .value_name("FILECACHE_CONNSTR"),
        )
-        .arg(
-            Arg::new("resize-swap-on-bind")
-                .long("resize-swap-on-bind")
-                .action(clap::ArgAction::SetTrue),
-        )
 }

 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -14,5 +14,4 @@ pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
-pub mod swap;
 pub mod sync_sk;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -490,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                "rename_db" => {
                    let new_name = op.new_name.as_ref().unwrap();

-                    if existing_dbs.contains_key(&op.name) {
+                    if existing_dbs.get(&op.name).is_some() {
                        let query: String = format!(
                            "ALTER DATABASE {} RENAME TO {}",
                            op.name.pg_quote(),
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,36 +0,0 @@
-use anyhow::{anyhow, Context};
-use tracing::warn;
-
-pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
-
-pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
-    // run `/neonvm/bin/resize-swap --once {size_bytes}`
-    //
-    // Passing '--once' causes resize-swap to delete itself after successful completion, which
-    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
-    // postgres is running.
-    //
-    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
-    let child_result = std::process::Command::new("/usr/bin/sudo")
-        .arg(RESIZE_SWAP_BIN)
-        .arg("--once")
-        .arg(size_bytes.to_string())
-        .spawn();
-
-    if matches!(&child_result, Err(e) if e.kind() == std::io::ErrorKind::NotFound) {
-        warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
-        return Ok(());
-    }
-
-    child_result
-        .context("spawn() failed")
-        .and_then(|mut child| child.wait().context("wait() failed"))
-        .and_then(|status| match status.success() {
-            true => Ok(()),
-            false => Err(anyhow!("process exited with {status}")),
-        })
-        // wrap any prior error with the overall context that we couldn't run the command
-        .with_context(|| {
-            format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
-        })
-}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -17,7 +17,6 @@ nix.workspace = true
 once_cell.workspace = true
 postgres.workspace = true
 hex.workspace = true
-humantime-serde.workspace = true
 hyper.workspace = true
 regex.workspace = true
 reqwest = { workspace = true, features = ["blocking", "json"] }
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -14,15 +14,15 @@ use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage_controller::StorageController;
 use control_plane::{broker, local_env};
-use pageserver_api::config::{
-    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
-    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
-};
 use pageserver_api::controller_api::PlacementPolicy;
 use pageserver_api::models::{
    ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
 };
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
+use pageserver_api::{
+    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
+    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
+};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
 use safekeeper_api::{
@@ -1554,8 +1554,8 @@ fn cli() -> Command {
            Command::new("storage_controller")
                .arg_required_else_help(true)
                .about("Manage storage_controller")
-                .subcommand(Command::new("start").about("Start storage controller"))
-                .subcommand(Command::new("stop").about("Stop storage controller")
+                .subcommand(Command::new("start").about("Start local pageserver").arg(pageserver_config_args.clone()))
+                .subcommand(Command::new("stop").about("Stop local pageserver")
                            .arg(stop_mode_arg.clone()))
        )
        .subcommand(
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -554,7 +554,6 @@ impl Endpoint {
            format_version: 1.0,
            operation_uuid: None,
            features: self.features.clone(),
-            swap_size_bytes: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -17,7 +17,6 @@ use std::net::Ipv4Addr;
 use std::net::SocketAddr;
 use std::path::{Path, PathBuf};
 use std::process::{Command, Stdio};
-use std::time::Duration;
 use utils::{
    auth::{encode_from_key_file, Claims},
    id::{NodeId, TenantId, TenantTimelineId, TimelineId},
@@ -67,10 +66,6 @@ pub struct LocalEnv {

    pub broker: NeonBroker,

-    // Configuration for the storage controller (1 per neon_local environment)
-    #[serde(default)]
-    pub storage_controller: NeonStorageControllerConf,
-
    /// This Vec must always contain at least one pageserver
    pub pageservers: Vec<PageServerConf>,

@@ -103,29 +98,6 @@ pub struct NeonBroker {
    pub listen_addr: SocketAddr,
 }

-/// Broker config for cluster internal communication.
-#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)]
-#[serde(default)]
-pub struct NeonStorageControllerConf {
-    /// Heartbeat timeout before marking a node offline
-    #[serde(with = "humantime_serde")]
-    pub max_unavailable: Duration,
-}
-
-impl NeonStorageControllerConf {
-    // Use a shorter pageserver unavailability interval than the default to speed up tests.
-    const DEFAULT_MAX_UNAVAILABLE_INTERVAL: std::time::Duration =
-        std::time::Duration::from_secs(10);
-}
-
-impl Default for NeonStorageControllerConf {
-    fn default() -> Self {
-        Self {
-            max_unavailable: Self::DEFAULT_MAX_UNAVAILABLE_INTERVAL,
-        }
-    }
-}
-
 // Dummy Default impl to satisfy Deserialize derive.
 impl Default for NeonBroker {
    fn default() -> Self {
@@ -382,10 +354,7 @@ impl LocalEnv {

        // Find neon binaries.
        if env.neon_distrib_dir == Path::new("") {
-            env::current_exe()?
-                .parent()
-                .unwrap()
-                .clone_into(&mut env.neon_distrib_dir);
+            env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
        }

        if env.pageservers.is_empty() {
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -248,13 +248,12 @@ impl PageServerNode {
        // situation: the metadata is written by some other script.
        std::fs::write(
            metadata_path,
-            serde_json::to_vec(&pageserver_api::config::NodeMetadata {
-                postgres_host: "localhost".to_string(),
-                postgres_port: self.pg_connection_config.port(),
-                http_host: "localhost".to_string(),
-                http_port,
-                other: HashMap::new(),
-            })
+            serde_json::to_vec(&serde_json::json!({
+                "host": "localhost",
+                "port": self.pg_connection_config.port(),
+                "http_host": "localhost",
+                "http_port": http_port,
+            }))
            .unwrap(),
        )
        .expect("Failed to write metadata file");
--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -1,8 +1,6 @@
-use crate::{
-    background_process,
-    local_env::{LocalEnv, NeonStorageControllerConf},
-};
+use crate::{background_process, local_env::LocalEnv};
 use camino::{Utf8Path, Utf8PathBuf};
+use hyper::Method;
 use pageserver_api::{
    controller_api::{
        NodeConfigureRequest, NodeRegisterRequest, TenantCreateResponse, TenantLocateResponse,
@@ -16,7 +14,6 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
-use reqwest::Method;
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
 use std::{fs, str::FromStr};
 use tokio::process::Command;
@@ -35,13 +32,15 @@ pub struct StorageController {
    public_key: Option<String>,
    postgres_port: u16,
    client: reqwest::Client,
-    config: NeonStorageControllerConf,
 }

 const COMMAND: &str = "storage_controller";

 const STORAGE_CONTROLLER_POSTGRES_VERSION: u32 = 16;

+// Use a shorter pageserver unavailability interval than the default to speed up tests.
+const NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL: std::time::Duration = std::time::Duration::from_secs(10);
+
 #[derive(Serialize, Deserialize)]
 pub struct AttachHookRequest {
    pub tenant_shard_id: TenantShardId,
@@ -136,7 +135,6 @@ impl StorageController {
            client: reqwest::ClientBuilder::new()
                .build()
                .expect("Failed to construct http client"),
-            config: env.storage_controller.clone(),
        }
    }

@@ -274,6 +272,8 @@ impl StorageController {
        // Run migrations on every startup, in case something changed.
        let database_url = self.setup_database().await?;

+        let max_unavailable: humantime::Duration = NEON_LOCAL_MAX_UNAVAILABLE_INTERVAL.into();
+
        let mut args = vec![
            "-l",
            &self.listen,
@@ -283,7 +283,7 @@ impl StorageController {
            "--database-url",
            &database_url,
            "--max-unavailable-interval",
-            &humantime::Duration::from(self.config.max_unavailable).to_string(),
+            &max_unavailable.to_string(),
        ]
        .into_iter()
        .map(|s| s.to_string())
@@ -379,7 +379,7 @@ impl StorageController {
    /// Simple HTTP request wrapper for calling into storage controller
    async fn dispatch<RQ, RS>(
        &self,
-        method: reqwest::Method,
+        method: hyper::Method,
        path: String,
        body: Option<RQ>,
    ) -> anyhow::Result<RS>
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,6 +1,7 @@
 use std::{collections::HashMap, str::FromStr, time::Duration};

 use clap::{Parser, Subcommand};
+use hyper::{Method, StatusCode};
 use pageserver_api::{
    controller_api::{
        NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy,
@@ -13,7 +14,7 @@ use pageserver_api::{
    shard::{ShardStripeSize, TenantShardId},
 };
 use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt};
-use reqwest::{Method, StatusCode, Url};
+use reqwest::Url;
 use serde::{de::DeserializeOwned, Serialize};
 use utils::id::{NodeId, TenantId};

@@ -231,7 +232,7 @@ impl Client {
    /// Simple HTTP request wrapper for calling into storage controller
    async fn dispatch<RQ, RS>(
        &self,
-        method: Method,
+        method: hyper::Method,
        path: String,
        body: Option<RQ>,
    ) -> mgmt_api::Result<RS>
--- a/docs/storage_controller.md
+++ b/docs/storage_controller.md
@@ -30,7 +30,7 @@ The storage controller uses a postgres database to persist a subset of its state
 persist the _relationships_ between them: the attachment state of a tenant's shards to nodes is kept in memory and
 rebuilt on startup.

-The file `persistence.rs` contains all the code for accessing the database, and has a large doc comment that goes into more detail about exactly what we persist and why.
+The file `[persistence.rs](http://persistence.rs)` contains all the code for accessing the database, and has a large doc comment that goes into more detail about exactly what we persist and why.

 The `diesel` crate is used for defining models & migrations.

--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -33,23 +33,6 @@ pub struct ComputeSpec {
    #[serde(default)]
    pub features: Vec<ComputeFeature>,

-    /// If compute_ctl was passed `--resize-swap-on-bind`, a value of `Some(_)` instructs
-    /// compute_ctl to `/neonvm/bin/resize-swap` with the given size, when the spec is first
-    /// received.
-    ///
-    /// Both this field and `--resize-swap-on-bind` are required, so that the control plane's
-    /// spec generation doesn't need to be aware of the actual compute it's running on, while
-    /// guaranteeing gradual rollout of swap. Otherwise, without `--resize-swap-on-bind`, we could
-    /// end up trying to resize swap in VMs without it -- or end up *not* resizing swap, thus
-    /// giving every VM much more swap than it should have (32GiB).
-    ///
-    /// Eventually we may remove `--resize-swap-on-bind` and exclusively use `swap_size_bytes` for
-    /// enabling the swap resizing behavior once rollout is complete.
-    ///
-    /// See neondatabase/cloud#12047 for more.
-    #[serde(default)]
-    pub swap_size_bytes: Option<u64>,
-
    /// Expected cluster state at the end of transition process.
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -1,31 +0,0 @@
-use std::collections::HashMap;
-
-use const_format::formatcp;
-
-#[cfg(test)]
-mod tests;
-
-pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
-pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
-pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
-pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
-
-// Certain metadata (e.g. externally-addressable name, AZ) is delivered
-// as a separate structure.  This information is not neeed by the pageserver
-// itself, it is only used for registering the pageserver with the control
-// plane and/or storage controller.
-//
-#[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
-pub struct NodeMetadata {
-    #[serde(rename = "host")]
-    pub postgres_host: String,
-    #[serde(rename = "port")]
-    pub postgres_port: u16,
-    pub http_host: String,
-    pub http_port: u16,
-
-    // Deployment tools may write fields to the metadata file beyond what we
-    // use in this type: this type intentionally only names fields that require.
-    #[serde(flatten)]
-    pub other: HashMap<String, serde_json::Value>,
-}
--- a/libs/pageserver_api/src/config/tests.rs
+++ b/libs/pageserver_api/src/config/tests.rs
@@ -1,22 +0,0 @@
-use super::*;
-
-#[test]
-fn test_node_metadata_v1_backward_compatibilty() {
-    let v1 = serde_json::to_vec(&serde_json::json!({
-        "host": "localhost",
-        "port": 23,
-        "http_host": "localhost",
-        "http_port": 42,
-    }));
-
-    assert_eq!(
-        serde_json::from_slice::<NodeMetadata>(&v1.unwrap()).unwrap(),
-        NodeMetadata {
-            postgres_host: "localhost".to_string(),
-            postgres_port: 23,
-            http_host: "localhost".to_string(),
-            http_port: 42,
-            other: HashMap::new(),
-        }
-    )
-}
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -80,7 +80,7 @@ impl Key {
    }

    /// Get the range of metadata keys.
-    pub const fn metadata_key_range() -> Range<Self> {
+    pub fn metadata_key_range() -> Range<Self> {
        Key {
            field1: METADATA_KEY_BEGIN_PREFIX,
            field2: 0,
@@ -572,17 +572,14 @@ pub const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

-/// Non inherited range for vectored get.
 pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
-/// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
-pub const NON_INHERITED_SPARSE_RANGE: Range<Key> = Key::metadata_key_range();

 // AUX_FILES currently stores only data for logical replication (slots etc), and
 // we don't preserve these on a branch because safekeepers can't follow timeline
 // switch (and generally it likely should be optional), so ignore these.
 #[inline(always)]
 pub fn is_inherited_key(key: Key) -> bool {
-    !NON_INHERITED_RANGE.contains(&key) && !NON_INHERITED_SPARSE_RANGE.contains(&key)
+    !NON_INHERITED_RANGE.contains(&key)
 }

 #[inline(always)]
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -17,10 +17,6 @@ pub struct KeySpace {
    pub ranges: Vec<Range<Key>>,
 }

-/// A wrapper type for sparse keyspaces.
-#[derive(Clone, Debug, Default, PartialEq, Eq)]
-pub struct SparseKeySpace(pub KeySpace);
-
 /// Represents a contiguous half-open range of the keyspace, masked according to a particular
 /// ShardNumber's stripes: within this range of keys, only some "belong" to the current
 /// shard.
@@ -439,33 +435,10 @@ pub struct KeyPartitioning {
    pub parts: Vec<KeySpace>,
 }

-/// Represents a partitioning of the sparse key space.
-#[derive(Clone, Debug, Default)]
-pub struct SparseKeyPartitioning {
-    pub parts: Vec<SparseKeySpace>,
-}
-
 impl KeyPartitioning {
    pub fn new() -> Self {
        KeyPartitioning { parts: Vec::new() }
    }
-
-    /// Convert a key partitioning to a sparse partition.
-    pub fn into_sparse(self) -> SparseKeyPartitioning {
-        SparseKeyPartitioning {
-            parts: self.parts.into_iter().map(SparseKeySpace).collect(),
-        }
-    }
-}
-
-impl SparseKeyPartitioning {
-    /// Note: use this function with caution. Attempt to handle a sparse keyspace in the same way as a dense keyspace will
-    /// cause long/dead loops.
-    pub fn into_dense(self) -> KeyPartitioning {
-        KeyPartitioning {
-            parts: self.parts.into_iter().map(|x| x.0).collect(),
-        }
-    }
 }

 ///
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -1,5 +1,6 @@
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]
+use const_format::formatcp;

 pub mod controller_api;
 pub mod key;
@@ -10,4 +11,7 @@ pub mod shard;
 /// Public API types
 pub mod upcall_api;

-pub mod config;
+pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
+pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
+pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -430,6 +430,8 @@ pub struct StatusResponse {
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantLocationConfigRequest {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tenant_id: Option<TenantShardId>,
    #[serde(flatten)]
    pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
 }
--- a/libs/pageserver_api/src/models/partitioning.rs
+++ b/libs/pageserver_api/src/models/partitioning.rs
@@ -1,11 +1,9 @@
 use utils::lsn::Lsn;

-use crate::keyspace::SparseKeySpace;
-
 #[derive(Debug, PartialEq, Eq)]
 pub struct Partitioning {
    pub keys: crate::keyspace::KeySpace,
-    pub sparse_keys: crate::keyspace::SparseKeySpace,
+
    pub at_lsn: Lsn,
 }

@@ -34,8 +32,6 @@ impl serde::Serialize for Partitioning {
        let mut map = serializer.serialize_map(Some(2))?;
        map.serialize_key("keys")?;
        map.serialize_value(&KeySpace(&self.keys))?;
-        map.serialize_key("sparse_keys")?;
-        map.serialize_value(&KeySpace(&self.sparse_keys.0))?;
        map.serialize_key("at_lsn")?;
        map.serialize_value(&WithDisplay(&self.at_lsn))?;
        map.end()
@@ -103,7 +99,6 @@ impl<'a> serde::Deserialize<'a> for Partitioning {
        #[derive(serde::Deserialize)]
        struct De {
            keys: KeySpace,
-            sparse_keys: KeySpace,
            #[serde_as(as = "serde_with::DisplayFromStr")]
            at_lsn: Lsn,
        }
@@ -112,7 +107,6 @@ impl<'a> serde::Deserialize<'a> for Partitioning {
        Ok(Self {
            at_lsn: de.at_lsn,
            keys: de.keys.0,
-            sparse_keys: SparseKeySpace(de.sparse_keys.0),
        })
    }
 }
@@ -139,12 +133,6 @@ mod tests {
                "030000000000000000000000000000000003"
              ]
            ],
-            "sparse_keys": [
-              [
-                "620000000000000000000000000000000000",
-                "620000000000000000000000000000000003"
-              ]
-            ],
            "at_lsn": "0/2240160"
        }
        "#;
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -97,7 +97,7 @@ impl ShardCount {

    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
    /// legacy format for TenantShardId that excludes the shard suffix", also known
-    /// as [`TenantShardId::unsharded`].
+    /// as `TenantShardId::unsharded`.
    ///
    /// This method returns the actual number of shards, i.e. if our internal value is
    /// zero, we return 1 (unsharded tenants have 1 shard).
@@ -116,9 +116,7 @@ impl ShardCount {
        self.0
    }

-    /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
-    /// uses the legacy format for `TenantShardId`. See also the documentation for
-    /// [`Self::count`].
+    ///
    pub fn is_unsharded(&self) -> bool {
        self.0 == 0
    }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -331,10 +331,7 @@ impl CheckPoint {
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
        // nextXid should be greater than any XID in WAL, so increment provided XID and check for wraparround.
-        let mut new_xid = std::cmp::max(
-            xid.wrapping_add(1),
-            pg_constants::FIRST_NORMAL_TRANSACTION_ID,
-        );
+        let mut new_xid = std::cmp::max(xid.wrapping_add(1), pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
        new_xid =
@@ -370,16 +367,8 @@ pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Byte
    let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE);

    let first_page_only = seg_off < XLOG_BLCKSZ;
-    // If first records starts in the middle of the page, pretend in page header
-    // there is a fake record which ends where first real record starts. This
-    // makes pg_waldump etc happy.
-    let (shdr_rem_len, infoflags) = if first_page_only && seg_off > 0 {
-        assert!(seg_off >= XLOG_SIZE_OF_XLOG_LONG_PHD);
-        // xlp_rem_len doesn't include page header, hence the subtraction.
-        (
-            seg_off - XLOG_SIZE_OF_XLOG_LONG_PHD,
-            pg_constants::XLP_FIRST_IS_CONTRECORD,
-        )
+    let (shdr_rem_len, infoflags) = if first_page_only {
+        (seg_off, pg_constants::XLP_FIRST_IS_CONTRECORD)
    } else {
        (0, 0)
    };
@@ -408,22 +397,20 @@ pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Byte

    if !first_page_only {
        let block_offset = lsn.page_offset_in_segment(WAL_SEGMENT_SIZE) as usize;
-        // see comments above about XLP_FIRST_IS_CONTRECORD and xlp_rem_len.
-        let (xlp_rem_len, xlp_info) = if page_off > 0 {
-            assert!(page_off >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);
-            (
-                (page_off - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64) as u32,
-                pg_constants::XLP_FIRST_IS_CONTRECORD,
-            )
-        } else {
-            (0, 0)
-        };
        let header = XLogPageHeaderData {
            xlp_magic: XLOG_PAGE_MAGIC as u16,
-            xlp_info,
+            xlp_info: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 {
+                pg_constants::XLP_FIRST_IS_CONTRECORD
+            } else {
+                0
+            },
            xlp_tli: PG_TLI,
            xlp_pageaddr: lsn.page_lsn().0,
-            xlp_rem_len,
+            xlp_rem_len: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 {
+                page_off as u32
+            } else {
+                0u32
+            },
            ..Default::default() // Put 0 in padding fields.
        };
        let hdr_bytes = header.encode()?;
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -38,7 +38,6 @@ azure_storage_blobs.workspace = true
 futures-util.workspace = true
 http-types.workspace = true
 itertools.workspace = true
-sync_wrapper = { workspace = true, features = ["futures"] }

 [dev-dependencies]
 camino-tempfile.workspace = true
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -3,7 +3,6 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::env;
-use std::io;
 use std::num::NonZeroU32;
 use std::pin::Pin;
 use std::str::FromStr;
@@ -21,7 +20,6 @@ use azure_storage_blobs::blob::CopyStatus;
 use azure_storage_blobs::prelude::ClientBuilder;
 use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
 use bytes::Bytes;
-use futures::future::Either;
 use futures::stream::Stream;
 use futures_util::StreamExt;
 use futures_util::TryStreamExt;
@@ -130,12 +128,12 @@ impl AzureBlobStorage {
        let kind = RequestKind::Get;

        let _permit = self.permit(kind, cancel).await?;
-        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
-        let cancel_or_timeout_ = crate::support::cancel_or_timeout(self.timeout, cancel.clone());

        let mut etag = None;
        let mut last_modified = None;
        let mut metadata = HashMap::new();
+        // TODO give proper streaming response instead of buffering into RAM
+        // https://github.com/neondatabase/neon/issues/5563

        let download = async {
            let response = builder
@@ -154,46 +152,39 @@ impl AzureBlobStorage {
                Err(_elapsed) => Err(DownloadError::Timeout),
            });

-            let mut response = Box::pin(response);
+            let mut response = std::pin::pin!(response);

-            let Some(part) = response.next().await else {
+            let mut bufs = Vec::new();
+            while let Some(part) = response.next().await {
+                let part = part?;
+                if etag.is_none() {
+                    etag = Some(part.blob.properties.etag);
+                }
+                if last_modified.is_none() {
+                    last_modified = Some(part.blob.properties.last_modified.into());
+                }
+                if let Some(blob_meta) = part.blob.metadata {
+                    metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
+                }
+                let data = part
+                    .data
+                    .collect()
+                    .await
+                    .map_err(|e| DownloadError::Other(e.into()))?;
+                bufs.push(data);
+            }
+
+            if bufs.is_empty() {
                return Err(DownloadError::Other(anyhow::anyhow!(
-                    "Azure GET response contained no response body"
+                    "Azure GET response contained no buffers"
                )));
-            };
-            let part = part?;
-            if etag.is_none() {
-                etag = Some(part.blob.properties.etag);
            }
-            if last_modified.is_none() {
-                last_modified = Some(part.blob.properties.last_modified.into());
-            }
-            if let Some(blob_meta) = part.blob.metadata {
-                metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
-            }
-
            // unwrap safety: if these were None, bufs would be empty and we would have returned an error already
            let etag = etag.unwrap();
            let last_modified = last_modified.unwrap();

-            let tail_stream = response
-                .map(|part| match part {
-                    Ok(part) => Either::Left(part.data.map(|r| r.map_err(io::Error::other))),
-                    Err(e) => {
-                        Either::Right(futures::stream::once(async { Err(io::Error::other(e)) }))
-                    }
-                })
-                .flatten();
-            let stream = part
-                .data
-                .map(|r| r.map_err(io::Error::other))
-                .chain(sync_wrapper::SyncStream::new(tail_stream));
-            //.chain(SyncStream::from_pin(Box::pin(tail_stream)));
-
-            let download_stream = crate::support::DownloadStream::new(cancel_or_timeout_, stream);
-
            Ok(Download {
-                download_stream: Box::pin(download_stream),
+                download_stream: Box::pin(futures::stream::iter(bufs.into_iter().map(Ok))),
                etag,
                last_modified,
                metadata: Some(StorageMetadata(metadata)),
@@ -202,10 +193,7 @@ impl AzureBlobStorage {

        tokio::select! {
            bufs = download => bufs,
-            cancel_or_timeout = cancel_or_timeout => match cancel_or_timeout {
-                TimeoutOrCancel::Timeout => Err(DownloadError::Timeout),
-                TimeoutOrCancel::Cancel => Err(DownloadError::Cancelled),
-            },
+            _ = cancel.cancelled() => Err(DownloadError::Cancelled),
        }
    }

--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -55,11 +55,11 @@ pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
 /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
 /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
-/// Set this limit analogously to the S3 limit
+/// We set this a little bit low as we currently buffer the entire file into RAM
 ///
 /// Here, a limit of max 20k concurrent connections was noted.
 /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
-pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
+pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 30;
 /// No limits on the client side, which currenltly means 1000 for AWS S3.
 /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
 pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
--- a/libs/utils/src/seqwait.rs
+++ b/libs/utils/src/seqwait.rs
@@ -2,10 +2,11 @@

 use std::cmp::{Eq, Ordering};
 use std::collections::BinaryHeap;
+use std::fmt::Debug;
 use std::mem;
 use std::sync::Mutex;
 use std::time::Duration;
-use tokio::sync::watch::{self, channel};
+use tokio::sync::watch::{channel, Receiver, Sender};
 use tokio::time::timeout;

 /// An error happened while waiting for a number
@@ -34,73 +35,23 @@ pub trait MonotonicCounter<V> {
    fn cnt_value(&self) -> V;
 }

-/// Heap of waiters, lowest numbers pop first.
-struct Waiters<V>
+/// Internal components of a `SeqWait`
+struct SeqWaitInt<S, V>
 where
+    S: MonotonicCounter<V>,
    V: Ord,
 {
-    heap: BinaryHeap<Waiter<V>>,
-    /// Number of the first waiter in the heap, or None if there are no waiters.
-    status_channel: watch::Sender<Option<V>>,
-}
-
-impl<V> Waiters<V>
-where
-    V: Ord + Copy,
-{
-    fn new() -> Self {
-        Waiters {
-            heap: BinaryHeap::new(),
-            status_channel: channel(None).0,
-        }
-    }
-
-    /// `status_channel` contains the number of the first waiter in the heap.
-    /// This function should be called whenever waiters heap changes.
-    fn update_status(&self) {
-        let first_waiter = self.heap.peek().map(|w| w.wake_num);
-        let _ = self.status_channel.send_replace(first_waiter);
-    }
-
-    /// Add new waiter to the heap, return a channel that will be notified when the number arrives.
-    fn add(&mut self, num: V) -> watch::Receiver<()> {
-        let (tx, rx) = channel(());
-        self.heap.push(Waiter {
-            wake_num: num,
-            wake_channel: tx,
-        });
-        self.update_status();
-        rx
-    }
-
-    /// Pop all waiters <= num from the heap. Collect channels in a vector,
-    /// so that caller can wake them up.
-    fn pop_leq(&mut self, num: V) -> Vec<watch::Sender<()>> {
-        let mut wake_these = Vec::new();
-        while let Some(n) = self.heap.peek() {
-            if n.wake_num > num {
-                break;
-            }
-            wake_these.push(self.heap.pop().unwrap().wake_channel);
-        }
-        self.update_status();
-        wake_these
-    }
-
-    /// Used on shutdown to efficiently drop all waiters.
-    fn take_all(&mut self) -> BinaryHeap<Waiter<V>> {
-        let heap = mem::take(&mut self.heap);
-        self.update_status();
-        heap
-    }
+    waiters: BinaryHeap<Waiter<V>>,
+    current: S,
+    shutdown: bool,
 }

 struct Waiter<T>
 where
    T: Ord,
 {
-    wake_num: T,                     // wake me when this number arrives ...
-    wake_channel: watch::Sender<()>, // ... by sending a message to this channel
+    wake_num: T,              // wake me when this number arrives ...
+    wake_channel: Sender<()>, // ... by sending a message to this channel
 }

 // BinaryHeap is a max-heap, and we want a min-heap. Reverse the ordering here
@@ -125,17 +76,6 @@ impl<T: Ord> PartialEq for Waiter<T> {

 impl<T: Ord> Eq for Waiter<T> {}

-/// Internal components of a `SeqWait`
-struct SeqWaitInt<S, V>
-where
-    S: MonotonicCounter<V>,
-    V: Ord,
-{
-    waiters: Waiters<V>,
-    current: S,
-    shutdown: bool,
-}
-
 /// A tool for waiting on a sequence number
 ///
 /// This provides a way to wait the arrival of a number.
@@ -168,7 +108,7 @@ where
    /// Create a new `SeqWait`, initialized to a particular number
    pub fn new(starting_num: S) -> Self {
        let internal = SeqWaitInt {
-            waiters: Waiters::new(),
+            waiters: BinaryHeap::new(),
            current: starting_num,
            shutdown: false,
        };
@@ -188,8 +128,9 @@ where
            // Block any future waiters from starting
            internal.shutdown = true;

-            // Take all waiters to drop them later.
-            internal.waiters.take_all()
+            // This will steal the entire waiters map.
+            // When we drop it all waiters will be woken.
+            mem::take(&mut internal.waiters)

            // Drop the lock as we exit this scope.
        };
@@ -255,7 +196,7 @@ where

    /// Register and return a channel that will be notified when a number arrives,
    /// or None, if it has already arrived.
-    fn queue_for_wait(&self, num: V) -> Result<Option<watch::Receiver<()>>, SeqWaitError> {
+    fn queue_for_wait(&self, num: V) -> Result<Option<Receiver<()>>, SeqWaitError> {
        let mut internal = self.internal.lock().unwrap();
        if internal.current.cnt_value() >= num {
            return Ok(None);
@@ -264,8 +205,12 @@ where
            return Err(SeqWaitError::Shutdown);
        }

-        // Add waiter channel to the queue.
-        let rx = internal.waiters.add(num);
+        // Create a new channel.
+        let (tx, rx) = channel(());
+        internal.waiters.push(Waiter {
+            wake_num: num,
+            wake_channel: tx,
+        });
        // Drop the lock as we exit this scope.
        Ok(Some(rx))
    }
@@ -286,8 +231,16 @@ where
            }
            internal.current.cnt_advance(num);

-            // Pop all waiters <= num from the heap.
-            internal.waiters.pop_leq(num)
+            // Pop all waiters <= num from the heap. Collect them in a vector, and
+            // wake them up after releasing the lock.
+            let mut wake_these = Vec::new();
+            while let Some(n) = internal.waiters.peek() {
+                if n.wake_num > num {
+                    break;
+                }
+                wake_these.push(internal.waiters.pop().unwrap().wake_channel);
+            }
+            wake_these
        };

        for tx in wake_these {
@@ -302,23 +255,6 @@ where
    pub fn load(&self) -> S {
        self.internal.lock().unwrap().current
    }
-
-    /// Get a Receiver for the current status.
-    ///
-    /// The current status is the number of the first waiter in the queue,
-    /// or None if there are no waiters.
-    ///
-    /// This receiver will be notified whenever the status changes.
-    /// It is useful for receiving notifications when the first waiter
-    /// starts waiting for a number, or when there are no more waiters left.
-    pub fn status_receiver(&self) -> watch::Receiver<Option<V>> {
-        self.internal
-            .lock()
-            .unwrap()
-            .waiters
-            .status_channel
-            .subscribe()
-    }
 }

 #[cfg(test)]
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -284,34 +284,6 @@ impl Client {
        Ok((status, progress))
    }

-    pub async fn tenant_secondary_status(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> Result<SecondaryProgress> {
-        let path = reqwest::Url::parse(&format!(
-            "{}/v1/tenant/{}/secondary/status",
-            self.mgmt_api_endpoint, tenant_shard_id
-        ))
-        .expect("Cannot build URL");
-
-        self.request(Method::GET, path, ())
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
-        let path = reqwest::Url::parse(&format!(
-            "{}/v1/tenant/{}/heatmap_upload",
-            self.mgmt_api_endpoint, tenant_id
-        ))
-        .expect("Cannot build URL");
-
-        self.request(Method::POST, path, ()).await?;
-        Ok(())
-    }
-
    pub async fn location_config(
        &self,
        tenant_shard_id: TenantShardId,
@@ -319,7 +291,10 @@ impl Client {
        flush_ms: Option<std::time::Duration>,
        lazy: bool,
    ) -> Result<()> {
-        let req_body = TenantLocationConfigRequest { config };
+        let req_body = TenantLocationConfigRequest {
+            tenant_id: None,
+            config,
+        };

        let mut path = reqwest::Url::parse(&format!(
            "{}/v1/tenant/{}/location_config",
--- a/pageserver/src/basebackup.rs
+++ b/pageserver/src/basebackup.rs
@@ -10,7 +10,7 @@
 //! This module is responsible for creation of such tarball
 //! from data stored in object storage.
 //!
-use anyhow::{anyhow, Context};
+use anyhow::{anyhow, bail, ensure, Context};
 use bytes::{BufMut, Bytes, BytesMut};
 use fail::fail_point;
 use pageserver_api::key::{key_to_slru_block, Key};
@@ -38,14 +38,6 @@ use postgres_ffi::PG_TLI;
 use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE};
 use utils::lsn::Lsn;

-#[derive(Debug, thiserror::Error)]
-pub enum BasebackupError {
-    #[error("basebackup pageserver error {0:#}")]
-    Server(#[from] anyhow::Error),
-    #[error("basebackup client error {0:#}")]
-    Client(#[source] io::Error),
-}
-
 /// Create basebackup with non-rel data in it.
 /// Only include relational data if 'full_backup' is true.
 ///
@@ -61,7 +53,7 @@ pub async fn send_basebackup_tarball<'a, W>(
    prev_lsn: Option<Lsn>,
    full_backup: bool,
    ctx: &'a RequestContext,
-) -> Result<(), BasebackupError>
+) -> anyhow::Result<()>
 where
    W: AsyncWrite + Send + Sync + Unpin,
 {
@@ -100,10 +92,8 @@ where

    // Consolidate the derived and the provided prev_lsn values
    let prev_lsn = if let Some(provided_prev_lsn) = prev_lsn {
-        if backup_prev != Lsn(0) && backup_prev != provided_prev_lsn {
-            return Err(BasebackupError::Server(anyhow!(
-                "backup_prev {backup_prev} != provided_prev_lsn {provided_prev_lsn}"
-            )));
+        if backup_prev != Lsn(0) {
+            ensure!(backup_prev == provided_prev_lsn);
        }
        provided_prev_lsn
    } else {
@@ -169,26 +159,15 @@ where
        }
    }

-    async fn add_block(&mut self, key: &Key, block: Bytes) -> Result<(), BasebackupError> {
+    async fn add_block(&mut self, key: &Key, block: Bytes) -> anyhow::Result<()> {
        let (kind, segno, _) = key_to_slru_block(*key)?;

        match kind {
            SlruKind::Clog => {
-                if !(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8) {
-                    return Err(BasebackupError::Server(anyhow!(
-                        "invalid SlruKind::Clog record: block.len()={}",
-                        block.len()
-                    )));
-                }
+                ensure!(block.len() == BLCKSZ as usize || block.len() == BLCKSZ as usize + 8);
            }
            SlruKind::MultiXactMembers | SlruKind::MultiXactOffsets => {
-                if block.len() != BLCKSZ as usize {
-                    return Err(BasebackupError::Server(anyhow!(
-                        "invalid {:?} record: block.len()={}",
-                        kind,
-                        block.len()
-                    )));
-                }
+                ensure!(block.len() == BLCKSZ as usize);
            }
        }

@@ -215,15 +194,12 @@ where
        Ok(())
    }

-    async fn flush(&mut self) -> Result<(), BasebackupError> {
+    async fn flush(&mut self) -> anyhow::Result<()> {
        let nblocks = self.buf.len() / BLCKSZ as usize;
        let (kind, segno) = self.current_segment.take().unwrap();
        let segname = format!("{}/{:>04X}", kind.to_str(), segno);
        let header = new_tar_header(&segname, self.buf.len() as u64)?;
-        self.ar
-            .append(&header, self.buf.as_slice())
-            .await
-            .map_err(BasebackupError::Client)?;
+        self.ar.append(&header, self.buf.as_slice()).await?;

        self.total_blocks += nblocks;
        debug!("Added to basebackup slru {} relsize {}", segname, nblocks);
@@ -233,7 +209,7 @@ where
        Ok(())
    }

-    async fn finish(mut self) -> Result<(), BasebackupError> {
+    async fn finish(mut self) -> anyhow::Result<()> {
        let res = if self.current_segment.is_none() || self.buf.is_empty() {
            Ok(())
        } else {
@@ -250,7 +226,7 @@ impl<'a, W> Basebackup<'a, W>
 where
    W: AsyncWrite + Send + Sync + Unpin,
 {
-    async fn send_tarball(mut self) -> Result<(), BasebackupError> {
+    async fn send_tarball(mut self) -> anyhow::Result<()> {
        // TODO include checksum

        let lazy_slru_download = self.timeline.get_lazy_slru_download() && !self.full_backup;
@@ -286,8 +262,7 @@ where
            let slru_partitions = self
                .timeline
                .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx)
-                .await
-                .map_err(|e| BasebackupError::Server(e.into()))?
+                .await?
                .partition(
                    self.timeline.get_shard_identity(),
                    Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64,
@@ -296,15 +271,10 @@ where
            let mut slru_builder = SlruSegmentsBuilder::new(&mut self.ar);

            for part in slru_partitions.parts {
-                let blocks = self
-                    .timeline
-                    .get_vectored(part, self.lsn, self.ctx)
-                    .await
-                    .map_err(|e| BasebackupError::Server(e.into()))?;
+                let blocks = self.timeline.get_vectored(part, self.lsn, self.ctx).await?;

                for (key, block) in blocks {
-                    let block = block.map_err(|e| BasebackupError::Server(e.into()))?;
-                    slru_builder.add_block(&key, block).await?;
+                    slru_builder.add_block(&key, block?).await?;
                }
            }
            slru_builder.finish().await?;
@@ -312,11 +282,8 @@ where

        let mut min_restart_lsn: Lsn = Lsn::MAX;
        // Create tablespace directories
-        for ((spcnode, dbnode), has_relmap_file) in self
-            .timeline
-            .list_dbdirs(self.lsn, self.ctx)
-            .await
-            .map_err(|e| BasebackupError::Server(e.into()))?
+        for ((spcnode, dbnode), has_relmap_file) in
+            self.timeline.list_dbdirs(self.lsn, self.ctx).await?
        {
            self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;

@@ -325,8 +292,7 @@ where
            let rels = self
                .timeline
                .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
-                .await
-                .map_err(|e| BasebackupError::Server(e.into()))?;
+                .await?;
            for &rel in rels.iter() {
                // Send init fork as main fork to provide well formed empty
                // contents of UNLOGGED relations. Postgres copies it in
@@ -349,12 +315,7 @@ where
                }
            }

-            for (path, content) in self
-                .timeline
-                .list_aux_files(self.lsn, self.ctx)
-                .await
-                .map_err(|e| BasebackupError::Server(e.into()))?
-            {
+            for (path, content) in self.timeline.list_aux_files(self.lsn, self.ctx).await? {
                if path.starts_with("pg_replslot") {
                    let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
                    let restart_lsn = Lsn(u64::from_le_bytes(
@@ -385,41 +346,34 @@ where
        for xid in self
            .timeline
            .list_twophase_files(self.lsn, self.ctx)
-            .await
-            .map_err(|e| BasebackupError::Server(e.into()))?
+            .await?
        {
            self.add_twophase_file(xid).await?;
        }

        fail_point!("basebackup-before-control-file", |_| {
-            Err(BasebackupError::Server(anyhow!(
-                "failpoint basebackup-before-control-file"
-            )))
+            bail!("failpoint basebackup-before-control-file")
        });

        // Generate pg_control and bootstrap WAL segment.
        self.add_pgcontrol_file().await?;
-        self.ar.finish().await.map_err(BasebackupError::Client)?;
+        self.ar.finish().await?;
        debug!("all tarred up!");
        Ok(())
    }

    /// Add contents of relfilenode `src`, naming it as `dst`.
-    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> Result<(), BasebackupError> {
+    async fn add_rel(&mut self, src: RelTag, dst: RelTag) -> anyhow::Result<()> {
        let nblocks = self
            .timeline
            .get_rel_size(src, Version::Lsn(self.lsn), self.ctx)
-            .await
-            .map_err(|e| BasebackupError::Server(e.into()))?;
+            .await?;

        // If the relation is empty, create an empty file
        if nblocks == 0 {
            let file_name = dst.to_segfile_name(0);
            let header = new_tar_header(&file_name, 0)?;
-            self.ar
-                .append(&header, &mut io::empty())
-                .await
-                .map_err(BasebackupError::Client)?;
+            self.ar.append(&header, &mut io::empty()).await?;
            return Ok(());
        }

@@ -434,17 +388,13 @@ where
                let img = self
                    .timeline
                    .get_rel_page_at_lsn(src, blknum, Version::Lsn(self.lsn), self.ctx)
-                    .await
-                    .map_err(|e| BasebackupError::Server(e.into()))?;
+                    .await?;
                segment_data.extend_from_slice(&img[..]);
            }

            let file_name = dst.to_segfile_name(seg as u32);
            let header = new_tar_header(&file_name, segment_data.len() as u64)?;
-            self.ar
-                .append(&header, segment_data.as_slice())
-                .await
-                .map_err(BasebackupError::Client)?;
+            self.ar.append(&header, segment_data.as_slice()).await?;

            seg += 1;
            startblk = endblk;
@@ -464,22 +414,20 @@ where
        spcnode: u32,
        dbnode: u32,
        has_relmap_file: bool,
-    ) -> Result<(), BasebackupError> {
+    ) -> anyhow::Result<()> {
        let relmap_img = if has_relmap_file {
            let img = self
                .timeline
                .get_relmap_file(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
-                .await
-                .map_err(|e| BasebackupError::Server(e.into()))?;
+                .await?;

-            if img.len()
-                != dispatch_pgversion!(self.timeline.pg_version, pgv::bindings::SIZEOF_RELMAPFILE)
-            {
-                return Err(BasebackupError::Server(anyhow!(
-                    "img.len() != SIZE_OF_RELMAPFILE, img.len()={}",
-                    img.len(),
-                )));
-            }
+            ensure!(
+                img.len()
+                    == dispatch_pgversion!(
+                        self.timeline.pg_version,
+                        pgv::bindings::SIZEOF_RELMAPFILE
+                    )
+            );

            Some(img)
        } else {
@@ -492,20 +440,14 @@ where
                ver => format!("{ver}\x0A"),
            };
            let header = new_tar_header("PG_VERSION", pg_version_str.len() as u64)?;
-            self.ar
-                .append(&header, pg_version_str.as_bytes())
-                .await
-                .map_err(BasebackupError::Client)?;
+            self.ar.append(&header, pg_version_str.as_bytes()).await?;

            info!("timeline.pg_version {}", self.timeline.pg_version);

            if let Some(img) = relmap_img {
                // filenode map for global tablespace
                let header = new_tar_header("global/pg_filenode.map", img.len() as u64)?;
-                self.ar
-                    .append(&header, &img[..])
-                    .await
-                    .map_err(BasebackupError::Client)?;
+                self.ar.append(&header, &img[..]).await?;
            } else {
                warn!("global/pg_filenode.map is missing");
            }
@@ -524,26 +466,18 @@ where
                && self
                    .timeline
                    .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx)
-                    .await
-                    .map_err(|e| BasebackupError::Server(e.into()))?
+                    .await?
                    .is_empty()
            {
                return Ok(());
            }
            // User defined tablespaces are not supported
-            if spcnode != DEFAULTTABLESPACE_OID {
-                return Err(BasebackupError::Server(anyhow!(
-                    "spcnode != DEFAULTTABLESPACE_OID, spcnode={spcnode}"
-                )));
-            }
+            ensure!(spcnode == DEFAULTTABLESPACE_OID);

            // Append dir path for each database
            let path = format!("base/{}", dbnode);
            let header = new_tar_header_dir(&path)?;
-            self.ar
-                .append(&header, &mut io::empty())
-                .await
-                .map_err(BasebackupError::Client)?;
+            self.ar.append(&header, &mut io::empty()).await?;

            if let Some(img) = relmap_img {
                let dst_path = format!("base/{}/PG_VERSION", dbnode);
@@ -553,17 +487,11 @@ where
                    ver => format!("{ver}\x0A"),
                };
                let header = new_tar_header(&dst_path, pg_version_str.len() as u64)?;
-                self.ar
-                    .append(&header, pg_version_str.as_bytes())
-                    .await
-                    .map_err(BasebackupError::Client)?;
+                self.ar.append(&header, pg_version_str.as_bytes()).await?;

                let relmap_path = format!("base/{}/pg_filenode.map", dbnode);
                let header = new_tar_header(&relmap_path, img.len() as u64)?;
-                self.ar
-                    .append(&header, &img[..])
-                    .await
-                    .map_err(BasebackupError::Client)?;
+                self.ar.append(&header, &img[..]).await?;
            }
        };
        Ok(())
@@ -572,12 +500,11 @@ where
    //
    // Extract twophase state files
    //
-    async fn add_twophase_file(&mut self, xid: TransactionId) -> Result<(), BasebackupError> {
+    async fn add_twophase_file(&mut self, xid: TransactionId) -> anyhow::Result<()> {
        let img = self
            .timeline
            .get_twophase_file(xid, self.lsn, self.ctx)
-            .await
-            .map_err(|e| BasebackupError::Server(e.into()))?;
+            .await?;

        let mut buf = BytesMut::new();
        buf.extend_from_slice(&img[..]);
@@ -585,10 +512,7 @@ where
        buf.put_u32_le(crc);
        let path = format!("pg_twophase/{:>08X}", xid);
        let header = new_tar_header(&path, buf.len() as u64)?;
-        self.ar
-            .append(&header, &buf[..])
-            .await
-            .map_err(BasebackupError::Client)?;
+        self.ar.append(&header, &buf[..]).await?;

        Ok(())
    }
@@ -597,28 +521,24 @@ where
    // Add generated pg_control file and bootstrap WAL segment.
    // Also send zenith.signal file with extra bootstrap data.
    //
-    async fn add_pgcontrol_file(&mut self) -> Result<(), BasebackupError> {
+    async fn add_pgcontrol_file(&mut self) -> anyhow::Result<()> {
        // add zenith.signal file
        let mut zenith_signal = String::new();
        if self.prev_record_lsn == Lsn(0) {
            if self.lsn == self.timeline.get_ancestor_lsn() {
-                write!(zenith_signal, "PREV LSN: none")
-                    .map_err(|e| BasebackupError::Server(e.into()))?;
+                write!(zenith_signal, "PREV LSN: none")?;
            } else {
-                write!(zenith_signal, "PREV LSN: invalid")
-                    .map_err(|e| BasebackupError::Server(e.into()))?;
+                write!(zenith_signal, "PREV LSN: invalid")?;
            }
        } else {
-            write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)
-                .map_err(|e| BasebackupError::Server(e.into()))?;
+            write!(zenith_signal, "PREV LSN: {}", self.prev_record_lsn)?;
        }
        self.ar
            .append(
                &new_tar_header("zenith.signal", zenith_signal.len() as u64)?,
                zenith_signal.as_bytes(),
            )
-            .await
-            .map_err(BasebackupError::Client)?;
+            .await?;

        let checkpoint_bytes = self
            .timeline
@@ -640,10 +560,7 @@ where

        //send pg_control
        let header = new_tar_header("global/pg_control", pg_control_bytes.len() as u64)?;
-        self.ar
-            .append(&header, &pg_control_bytes[..])
-            .await
-            .map_err(BasebackupError::Client)?;
+        self.ar.append(&header, &pg_control_bytes[..]).await?;

        //send wal segment
        let segno = self.lsn.segment_number(WAL_SEGMENT_SIZE);
@@ -658,16 +575,8 @@ where
            self.lsn,
        )
        .map_err(|e| anyhow!(e).context("Failed generating wal segment"))?;
-        if wal_seg.len() != WAL_SEGMENT_SIZE {
-            return Err(BasebackupError::Server(anyhow!(
-                "wal_seg.len() != WAL_SEGMENT_SIZE, wal_seg.len()={}",
-                wal_seg.len()
-            )));
-        }
-        self.ar
-            .append(&header, &wal_seg[..])
-            .await
-            .map_err(BasebackupError::Client)?;
+        ensure!(wal_seg.len() == WAL_SEGMENT_SIZE);
+        self.ar.append(&header, &wal_seg[..]).await?;
        Ok(())
    }
 }
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -9,7 +9,7 @@ use pageserver_api::shard::TenantShardId;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use serde;
 use serde::de::IntoDeserializer;
-use std::env;
+use std::{collections::HashMap, env};
 use storage_broker::Uri;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::ConnectionId;
@@ -51,7 +51,7 @@ pub mod defaults {
    use crate::tenant::config::defaults::*;
    use const_format::formatcp;

-    pub use pageserver_api::config::{
+    pub use pageserver_api::{
        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
        DEFAULT_PG_LISTEN_PORT,
    };
@@ -335,6 +335,26 @@ impl<T: Clone> BuilderValue<T> {
    }
 }

+// Certain metadata (e.g. externally-addressable name, AZ) is delivered
+// as a separate structure.  This information is not neeed by the pageserver
+// itself, it is only used for registering the pageserver with the control
+// plane and/or storage controller.
+//
+#[derive(serde::Deserialize)]
+pub(crate) struct NodeMetadata {
+    #[serde(rename = "host")]
+    pub(crate) postgres_host: String,
+    #[serde(rename = "port")]
+    pub(crate) postgres_port: u16,
+    pub(crate) http_host: String,
+    pub(crate) http_port: u16,
+
+    // Deployment tools may write fields to the metadata file beyond what we
+    // use in this type: this type intentionally only names fields that require.
+    #[serde(flatten)]
+    pub(crate) other: HashMap<String, serde_json::Value>,
+}
+
 // needed to simplify config construction
 #[derive(Default)]
 struct PageServerConfigBuilder {
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -14,8 +14,10 @@ use tokio_util::sync::CancellationToken;
 use url::Url;
 use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};

-use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
-use pageserver_api::config::NodeMetadata;
+use crate::{
+    config::{NodeMetadata, PageServerConf},
+    virtual_file::on_fatal_io_error,
+};

 /// The Pageserver's client for using the control plane API: this is a small subset
 /// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
@@ -63,7 +65,7 @@ impl ControlPlaneClient {
        let mut client = reqwest::ClientBuilder::new();

        if let Some(jwt) = &conf.control_plane_api_token {
-            let mut headers = reqwest::header::HeaderMap::new();
+            let mut headers = hyper::HeaderMap::new();
            headers.insert(
                "Authorization",
                format!("Bearer {}", jwt.get_contents()).parse().unwrap(),
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -782,6 +782,9 @@ components:
      required:
        - mode
      properties:
+        tenant_id:
+          type: string
+          description: Not used, scheduled for removal.
        mode:
          type: string
          enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -1918,14 +1918,12 @@ async fn timeline_collect_keyspace(
        let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
        let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
        let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
-        let (dense_ks, sparse_ks) = timeline
+        let keys = timeline
            .collect_keyspace(at_lsn, &ctx)
            .await
            .map_err(|e| ApiError::InternalServerError(e.into()))?;

-        // This API is currently used by pagebench. Pagebench will iterate all keys within the keyspace.
-        // Therefore, we split dense/sparse keys in this API.
-        let res = pageserver_api::models::partitioning::Partitioning { keys: dense_ks, sparse_keys: sparse_ks, at_lsn };
+        let res = pageserver_api::models::partitioning::Partitioning { keys, at_lsn };

        json_response(StatusCode::OK, res)
    }
@@ -2160,27 +2158,6 @@ async fn secondary_download_handler(
    json_response(status, progress)
 }

-async fn secondary_status_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&request);
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-
-    let Some(secondary_tenant) = state
-        .tenant_manager
-        .get_secondary_tenant_shard(tenant_shard_id)
-    else {
-        return Err(ApiError::NotFound(
-            anyhow::anyhow!("Shard {} not found", tenant_shard_id).into(),
-        ));
-    };
-
-    let progress = secondary_tenant.progress.lock().unwrap().clone();
-
-    json_response(StatusCode::OK, progress)
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -2542,9 +2519,6 @@ pub fn make_router(
        .put("/v1/deletion_queue/flush", |r| {
            api_handler(r, deletion_queue_flush)
        })
-        .get("/v1/tenant/:tenant_shard_id/secondary/status", |r| {
-            api_handler(r, secondary_status_handler)
-        })
        .post("/v1/tenant/:tenant_shard_id/secondary/download", |r| {
            api_handler(r, secondary_download_handler)
        })
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -51,8 +51,8 @@ pub(crate) enum StorageTimeOperation {
    #[strum(serialize = "gc")]
    Gc,

-    #[strum(serialize = "find gc cutoffs")]
-    FindGcCutoffs,
+    #[strum(serialize = "update gc info")]
+    UpdateGcInfo,

    #[strum(serialize = "create tenant")]
    CreateTenant,
@@ -194,11 +194,6 @@ pub(crate) struct GetVectoredLatency {
    map: EnumMap<TaskKind, Option<Histogram>>,
 }

-#[allow(dead_code)]
-pub(crate) struct ScanLatency {
-    map: EnumMap<TaskKind, Option<Histogram>>,
-}
-
 impl GetVectoredLatency {
    // Only these task types perform vectored gets. Filter all other tasks out to reduce total
    // cardinality of the metric.
@@ -209,48 +204,6 @@ impl GetVectoredLatency {
    }
 }

-impl ScanLatency {
-    // Only these task types perform vectored gets. Filter all other tasks out to reduce total
-    // cardinality of the metric.
-    const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
-
-    pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
-        self.map[task_kind].as_ref()
-    }
-}
-
-pub(crate) struct ScanLatencyOngoingRecording<'a> {
-    parent: &'a Histogram,
-    start: std::time::Instant,
-}
-
-impl<'a> ScanLatencyOngoingRecording<'a> {
-    pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
-        let start = Instant::now();
-        ScanLatencyOngoingRecording { parent, start }
-    }
-
-    pub(crate) fn observe(self, throttled: Option<Duration>) {
-        let elapsed = self.start.elapsed();
-        let ex_throttled = if let Some(throttled) = throttled {
-            elapsed.checked_sub(throttled)
-        } else {
-            Some(elapsed)
-        };
-        if let Some(ex_throttled) = ex_throttled {
-            self.parent.observe(ex_throttled.as_secs_f64());
-        } else {
-            use utils::rate_limit::RateLimit;
-            static LOGGED: Lazy<Mutex<RateLimit>> =
-                Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
-            let mut rate_limit = LOGGED.lock().unwrap();
-            rate_limit.call(|| {
-                warn!("error deducting time spent throttled; this message is logged at a global rate limit");
-            });
-        }
-    }
-}
-
 pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
    let inner = register_histogram_vec!(
        "pageserver_get_vectored_seconds",
@@ -274,29 +227,6 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(||
    }
 });

-pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
-    let inner = register_histogram_vec!(
-        "pageserver_scan_seconds",
-        "Time spent in scan, excluding time spent in timeline_get_throttle.",
-        &["task_kind"],
-        CRITICAL_OP_BUCKETS.into(),
-    )
-    .expect("failed to define a metric");
-
-    ScanLatency {
-        map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
-            let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
-
-            if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
-                let task_kind = task_kind.into();
-                Some(inner.with_label_values(&[task_kind]))
-            } else {
-                None
-            }
-        })),
-    }
-});
-
 pub(crate) struct PageCacheMetricsForTaskKind {
    pub read_accesses_materialized_page: IntCounter,
    pub read_accesses_immutable: IntCounter,
@@ -2059,7 +1989,7 @@ pub(crate) struct TimelineMetrics {
    pub imitate_logical_size_histo: StorageTimeMetrics,
    pub load_layer_map_histo: StorageTimeMetrics,
    pub garbage_collect_histo: StorageTimeMetrics,
-    pub find_gc_cutoffs_histo: StorageTimeMetrics,
+    pub update_gc_info_histo: StorageTimeMetrics,
    pub last_record_gauge: IntGauge,
    resident_physical_size_gauge: UIntGauge,
    /// copy of LayeredTimeline.current_logical_size
@@ -2120,8 +2050,8 @@ impl TimelineMetrics {
            &shard_id,
            &timeline_id,
        );
-        let find_gc_cutoffs_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::FindGcCutoffs,
+        let update_gc_info_histo = StorageTimeMetrics::new(
+            StorageTimeOperation::UpdateGcInfo,
            &tenant_id,
            &shard_id,
            &timeline_id,
@@ -2168,7 +2098,7 @@ impl TimelineMetrics {
            logical_size_histo,
            imitate_logical_size_histo,
            garbage_collect_histo,
-            find_gc_cutoffs_histo,
+            update_gc_info_histo,
            load_layer_map_histo,
            last_record_gauge,
            resident_physical_size_gauge,
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -48,7 +48,6 @@ use utils::{

 use crate::auth::check_permission;
 use crate::basebackup;
-use crate::basebackup::BasebackupError;
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::import_datadir::import_wal_from_tar;
@@ -1237,13 +1236,6 @@ impl PageServerHandler {
    where
        IO: AsyncRead + AsyncWrite + Send + Sync + Unpin,
    {
-        fn map_basebackup_error(err: BasebackupError) -> QueryError {
-            match err {
-                BasebackupError::Client(e) => QueryError::Disconnected(ConnectionError::Io(e)),
-                BasebackupError::Server(e) => QueryError::Other(e),
-            }
-        }
-
        let started = std::time::Instant::now();

        // check that the timeline exists
@@ -1269,8 +1261,7 @@ impl PageServerHandler {
        let lsn_awaited_after = started.elapsed();

        // switch client to COPYOUT
-        pgb.write_message_noflush(&BeMessage::CopyOutResponse)
-            .map_err(QueryError::Disconnected)?;
+        pgb.write_message_noflush(&BeMessage::CopyOutResponse)?;
        self.flush_cancellable(pgb, &timeline.cancel).await?;

        // Send a tarball of the latest layer on the timeline. Compress if not
@@ -1285,8 +1276,7 @@ impl PageServerHandler {
                full_backup,
                ctx,
            )
-            .await
-            .map_err(map_basebackup_error)?;
+            .await?;
        } else {
            let mut writer = pgb.copyout_writer();
            if gzip {
@@ -1307,13 +1297,9 @@ impl PageServerHandler {
                    full_backup,
                    ctx,
                )
-                .await
-                .map_err(map_basebackup_error)?;
+                .await?;
                // shutdown the encoder to ensure the gzip footer is written
-                encoder
-                    .shutdown()
-                    .await
-                    .map_err(|e| QueryError::Disconnected(ConnectionError::Io(e)))?;
+                encoder.shutdown().await?;
            } else {
                basebackup::send_basebackup_tarball(
                    &mut writer,
@@ -1323,13 +1309,11 @@ impl PageServerHandler {
                    full_backup,
                    ctx,
                )
-                .await
-                .map_err(map_basebackup_error)?;
+                .await?;
            }
        }

-        pgb.write_message_noflush(&BeMessage::CopyDone)
-            .map_err(QueryError::Disconnected)?;
+        pgb.write_message_noflush(&BeMessage::CopyDone)?;
        self.flush_cancellable(pgb, &timeline.cancel).await?;

        let basebackup_after = started
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -23,7 +23,6 @@ use pageserver_api::key::{
    slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range,
    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
 };
-use pageserver_api::keyspace::SparseKeySpace;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -279,7 +278,7 @@ impl Timeline {

        match RelDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
-                let exists = dir.rels.contains(&(tag.relnode, tag.forknum));
+                let exists = dir.rels.get(&(tag.relnode, tag.forknum)).is_some();
                Ok(exists)
            }
            Err(e) => Err(PageReconstructError::from(e)),
@@ -379,7 +378,7 @@ impl Timeline {

        match SlruSegmentDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
-                let exists = dir.segments.contains(&segno);
+                let exists = dir.segments.get(&segno).is_some();
                Ok(exists)
            }
            Err(e) => Err(PageReconstructError::from(e)),
@@ -731,13 +730,11 @@ impl Timeline {
    /// Get a KeySpace that covers all the Keys that are in use at the given LSN.
    /// Anything that's not listed maybe removed from the underlying storage (from
    /// that LSN forwards).
-    ///
-    /// The return value is (dense keyspace, sparse keyspace).
    pub(crate) async fn collect_keyspace(
        &self,
        lsn: Lsn,
        ctx: &RequestContext,
-    ) -> Result<(KeySpace, SparseKeySpace), CollectKeySpaceError> {
+    ) -> Result<KeySpace, CollectKeySpaceError> {
        // Iterate through key ranges, greedily packing them into partitions
        let mut result = KeySpaceAccum::new();

@@ -809,12 +806,7 @@ impl Timeline {
        if self.get(AUX_FILES_KEY, lsn, ctx).await.is_ok() {
            result.add_key(AUX_FILES_KEY);
        }
-
-        Ok((
-            result.to_keyspace(),
-            /* AUX sparse key space */
-            SparseKeySpace(KeySpace::single(Key::metadata_aux_key_range())),
-        ))
+        Ok(result.to_keyspace())
    }

    /// Get cached size of relation if it not updated after specified LSN
@@ -1143,22 +1135,21 @@ impl<'a> DatadirModification<'a> {
        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
            .context("deserialize db")?;
        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
-        let mut rel_dir =
-            if let hash_map::Entry::Vacant(e) = dbdir.dbdirs.entry((rel.spcnode, rel.dbnode)) {
-                // Didn't exist. Update dbdir
-                e.insert(false);
-                let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
-                self.pending_directory_entries
-                    .push((DirectoryKind::Db, dbdir.dbdirs.len()));
-                self.put(DBDIR_KEY, Value::Image(buf.into()));
+        let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
+            // Didn't exist. Update dbdir
+            dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
+            let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
+            self.pending_directory_entries
+                .push((DirectoryKind::Db, dbdir.dbdirs.len()));
+            self.put(DBDIR_KEY, Value::Image(buf.into()));

-                // and create the RelDirectory
-                RelDirectory::default()
-            } else {
-                // reldir already exists, fetch it
-                RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
-                    .context("deserialize db")?
-            };
+            // and create the RelDirectory
+            RelDirectory::default()
+        } else {
+            // reldir already exists, fetch it
+            RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
+                .context("deserialize db")?
+        };

        // Add the new relation to the rel directory entry, and write it back
        if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -363,8 +363,6 @@ pub enum TaskKind {

    EphemeralFilePreWarmPageCache,

-    LayerDownload,
-
    #[cfg(test)]
    UnitTest,
 }
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -64,7 +64,6 @@ use self::timeline::uninit::UninitializedTimeline;
 use self::timeline::EvictionTaskTenantState;
 use self::timeline::TimelineResources;
 use self::timeline::WaitLsnError;
-use self::timeline::{GcCutoffs, GcInfo};
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
@@ -87,6 +86,7 @@ use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::InitializationOrder;
+use std::cmp::min;
 use std::collections::hash_map::Entry;
 use std::collections::BTreeSet;
 use std::collections::HashMap;
@@ -2812,48 +2812,7 @@ impl Tenant {
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
-        // before taking the gc_cs lock, do the heavier weight finding of gc_cutoff points for
-        // currently visible timelines.
-        let timelines = self
-            .timelines
-            .lock()
-            .unwrap()
-            .values()
-            .filter(|tl| match target_timeline_id.as_ref() {
-                Some(target) => &tl.timeline_id == target,
-                None => true,
-            })
-            .cloned()
-            .collect::<Vec<_>>();
-
-        let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
-            HashMap::with_capacity(timelines.len());
-
-        for timeline in timelines.iter() {
-            let cutoff = timeline
-                .get_last_record_lsn()
-                .checked_sub(horizon)
-                .unwrap_or(Lsn(0));
-
-            let res = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await;
-
-            match res {
-                Ok(cutoffs) => {
-                    let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
-                    assert!(old.is_none());
-                }
-                Err(e) => {
-                    tracing::warn!(timeline_id = %timeline.timeline_id, "ignoring failure to find gc cutoffs: {e:#}");
-                }
-            }
-        }
-
-        if !self.is_active() {
-            anyhow::bail!("shutting down");
-        }
-
-        // grab mutex to prevent new timelines from being created here; avoid doing long operations
-        // because that will stall branch creation.
+        // grab mutex to prevent new timelines from being created here.
        let gc_cs = self.gc_cs.lock().await;

        // Scan all timelines. For each timeline, remember the timeline ID and
@@ -2915,6 +2874,11 @@ impl Tenant {
                }
            }

+            let cutoff = timeline
+                .get_last_record_lsn()
+                .checked_sub(horizon)
+                .unwrap_or(Lsn(0));
+
            let branchpoints: Vec<Lsn> = all_branchpoints
                .range((
                    Included((timeline_id, Lsn(0))),
@@ -2922,27 +2886,9 @@ impl Tenant {
                ))
                .map(|&x| x.1)
                .collect();
-
-            {
-                let mut target = timeline.gc_info.write().unwrap();
-
-                match gc_cutoffs.remove(&timeline_id) {
-                    Some(cutoffs) => {
-                        *target = GcInfo {
-                            retain_lsns: branchpoints,
-                            cutoffs,
-                        };
-                    }
-                    None => {
-                        // reasons for this being unavailable:
-                        // - this timeline was created while we were finding cutoffs
-                        // - lsn for timestamp search fails for this timeline repeatedly
-                        //
-                        // in both cases, refreshing the branchpoints is correct.
-                        target.retain_lsns = branchpoints;
-                    }
-                };
-            }
+            timeline
+                .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
+                .await?;

            gc_timelines.push(timeline);
        }
@@ -3031,7 +2977,7 @@ impl Tenant {
        // and then the planned GC cutoff
        {
            let gc_info = src_timeline.gc_info.read().unwrap();
-            let cutoff = gc_info.min_cutoff();
+            let cutoff = min(gc_info.pitr_cutoff, gc_info.horizon_cutoff);
            if start_lsn < cutoff {
                return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
                    "invalid branch start lsn: less than planned GC cutoff {cutoff}"
@@ -3925,9 +3871,8 @@ mod tests {
    use crate::DEFAULT_PG_VERSION;
    use bytes::BytesMut;
    use hex_literal::hex;
-    use pageserver_api::key::{AUX_KEY_PREFIX, NON_INHERITED_RANGE};
+    use pageserver_api::key::NON_INHERITED_RANGE;
    use pageserver_api::keyspace::KeySpace;
-    use pageserver_api::models::CompactionAlgorithm;
    use rand::{thread_rng, Rng};
    use tests::storage_layer::ValuesReconstructState;
    use tests::timeline::{GetVectoredError, ShutdownMode};
@@ -4567,25 +4512,11 @@ mod tests {
    }

    async fn bulk_insert_compact_gc(
-        tenant: &Tenant,
-        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
-        lsn: Lsn,
-        repeat: usize,
-        key_count: usize,
-    ) -> anyhow::Result<()> {
-        let compact = true;
-        bulk_insert_maybe_compact_gc(tenant, timeline, ctx, lsn, repeat, key_count, compact).await
-    }
-
-    async fn bulk_insert_maybe_compact_gc(
-        tenant: &Tenant,
-        timeline: &Arc<Timeline>,
+        timeline: Arc<Timeline>,
        ctx: &RequestContext,
        mut lsn: Lsn,
        repeat: usize,
        key_count: usize,
-        compact: bool,
    ) -> anyhow::Result<()> {
        let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
        let mut blknum = 0;
@@ -4593,8 +4524,6 @@ mod tests {
        // Enforce that key range is monotonously increasing
        let mut keyspace = KeySpaceAccum::new();

-        let cancel = CancellationToken::new();
-
        for _ in 0..repeat {
            for _ in 0..key_count {
                test_key.field6 = blknum;
@@ -4616,19 +4545,22 @@ mod tests {
                blknum += 1;
            }

-            timeline.freeze_and_flush().await?;
-            if compact {
-                // this requires timeline to be &Arc<Timeline>
-                timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
-            }
+            let cutoff = timeline.get_last_record_lsn();

-            // this doesn't really need to use the timeline_id target, but it is closer to what it
-            // originally was.
-            let res = tenant
-                .gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx)
+            timeline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    ctx,
+                )
                .await?;
-
-            assert_eq!(res.layers_removed, 0, "this never removes anything");
+            timeline.freeze_and_flush().await?;
+            timeline
+                .compact(&CancellationToken::new(), EnumSet::empty(), ctx)
+                .await?;
+            timeline.gc().await?;
        }

        Ok(())
@@ -4647,7 +4579,7 @@ mod tests {
            .await?;

        let lsn = Lsn(0x10);
-        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
+        bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;

        Ok(())
    }
@@ -4678,7 +4610,7 @@ mod tests {
            .await?;

        let lsn = Lsn(0x10);
-        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
+        bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;

        let guard = tline.layers.read().await;
        guard.layer_map().dump(true, &ctx).await?;
@@ -4791,7 +4723,15 @@ mod tests {
            .await;

        let images = vectored_res?;
-        assert!(images.is_empty());
+        let mut key = NON_INHERITED_RANGE.start;
+        while key < NON_INHERITED_RANGE.end {
+            assert!(matches!(
+                images[&key],
+                Err(PageReconstructError::MissingKey(_))
+            ));
+            key = key.next();
+        }
+
        Ok(())
    }

@@ -5102,29 +5042,13 @@ mod tests {

    #[tokio::test]
    async fn test_random_updates() -> anyhow::Result<()> {
-        let names_algorithms = [
-            ("test_random_updates_legacy", CompactionAlgorithm::Legacy),
-            ("test_random_updates_tiered", CompactionAlgorithm::Tiered),
-        ];
-        for (name, algorithm) in names_algorithms {
-            test_random_updates_algorithm(name, algorithm).await?;
-        }
-        Ok(())
-    }
-
-    async fn test_random_updates_algorithm(
-        name: &'static str,
-        compaction_algorithm: CompactionAlgorithm,
-    ) -> anyhow::Result<()> {
-        let mut harness = TenantHarness::create(name)?;
-        harness.tenant_conf.compaction_algorithm = compaction_algorithm;
+        let harness = TenantHarness::create("test_random_updates")?;
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
            .await?;

        const NUM_KEYS: usize = 1000;
-        let cancel = CancellationToken::new();

        let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();

@@ -5183,11 +5107,22 @@ mod tests {
                );
            }

-            // Perform a cycle of flush, and GC
-            tline.freeze_and_flush().await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
+            // Perform a cycle of flush, compact, and GC
+            let cutoff = tline.get_last_record_lsn();
+            tline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    &ctx,
+                )
                .await?;
+            tline.freeze_and_flush().await?;
+            tline
+                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+                .await?;
+            tline.gc().await?;
        }

        Ok(())
@@ -5208,8 +5143,6 @@ mod tests {

        let mut keyspace = KeySpaceAccum::new();

-        let cancel = CancellationToken::new();
-
        // Track when each page was last modified. Used to assert that
        // a read sees the latest page version.
        let mut updated = [Lsn(0); NUM_KEYS];
@@ -5273,11 +5206,21 @@ mod tests {
            }

            // Perform a cycle of flush, compact, and GC
-            tline.freeze_and_flush().await?;
-            tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
+            let cutoff = tline.get_last_record_lsn();
+            tline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    &ctx,
+                )
                .await?;
+            tline.freeze_and_flush().await?;
+            tline
+                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+                .await?;
+            tline.gc().await?;
        }

        Ok(())
@@ -5459,140 +5402,19 @@ mod tests {

    #[tokio::test]
    async fn test_read_at_max_lsn() -> anyhow::Result<()> {
-        let names_algorithms = [
-            ("test_read_at_max_lsn_legacy", CompactionAlgorithm::Legacy),
-            ("test_read_at_max_lsn_tiered", CompactionAlgorithm::Tiered),
-        ];
-        for (name, algorithm) in names_algorithms {
-            test_read_at_max_lsn_algorithm(name, algorithm).await?;
-        }
-        Ok(())
-    }
-
-    async fn test_read_at_max_lsn_algorithm(
-        name: &'static str,
-        compaction_algorithm: CompactionAlgorithm,
-    ) -> anyhow::Result<()> {
-        let mut harness = TenantHarness::create(name)?;
-        harness.tenant_conf.compaction_algorithm = compaction_algorithm;
+        let harness = TenantHarness::create("test_read_at_max_lsn")?;
        let (tenant, ctx) = harness.load().await;
        let tline = tenant
            .create_test_timeline(TIMELINE_ID, Lsn(0x08), DEFAULT_PG_VERSION, &ctx)
            .await?;

        let lsn = Lsn(0x10);
-        let compact = false;
-        bulk_insert_maybe_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000, compact).await?;
+        bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;

        let test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
        let read_lsn = Lsn(u64::MAX - 1);

-        let result = tline.get(test_key, read_lsn, &ctx).await;
-        assert!(result.is_ok(), "result is not Ok: {}", result.unwrap_err());
-
-        Ok(())
-    }
-
-    #[tokio::test]
-    async fn test_metadata_scan() -> anyhow::Result<()> {
-        let harness = TenantHarness::create("test_metadata_scan")?;
-        let (tenant, ctx) = harness.load().await;
-        let tline = tenant
-            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
-            .await?;
-
-        const NUM_KEYS: usize = 1000;
-        const STEP: usize = 100; // random update + scan base_key + idx * STEP
-
-        let cancel = CancellationToken::new();
-
-        let mut base_key = Key::from_hex("000000000033333333444444445500000000").unwrap();
-        base_key.field1 = AUX_KEY_PREFIX;
-        let mut test_key = base_key;
-
-        // Track when each page was last modified. Used to assert that
-        // a read sees the latest page version.
-        let mut updated = [Lsn(0); NUM_KEYS];
-
-        let mut lsn = Lsn(0x10);
-        #[allow(clippy::needless_range_loop)]
-        for blknum in 0..NUM_KEYS {
-            lsn = Lsn(lsn.0 + 0x10);
-            test_key.field6 = (blknum * STEP) as u32;
-            let mut writer = tline.writer().await;
-            writer
-                .put(
-                    test_key,
-                    lsn,
-                    &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
-                    &ctx,
-                )
-                .await?;
-            writer.finish_write(lsn);
-            updated[blknum] = lsn;
-            drop(writer);
-        }
-
-        let keyspace = KeySpace::single(base_key..base_key.add((NUM_KEYS * STEP) as u32));
-
-        for _ in 0..10 {
-            // Read all the blocks
-            for (blknum, last_lsn) in updated.iter().enumerate() {
-                test_key.field6 = (blknum * STEP) as u32;
-                assert_eq!(
-                    tline.get(test_key, lsn, &ctx).await?,
-                    test_img(&format!("{} at {}", blknum, last_lsn))
-                );
-            }
-
-            let mut cnt = 0;
-            for (key, value) in tline
-                .get_vectored_impl(
-                    keyspace.clone(),
-                    lsn,
-                    ValuesReconstructState::default(),
-                    &ctx,
-                )
-                .await?
-            {
-                let blknum = key.field6 as usize;
-                let value = value?;
-                assert!(blknum % STEP == 0);
-                let blknum = blknum / STEP;
-                assert_eq!(
-                    value,
-                    test_img(&format!("{} at {}", blknum, updated[blknum]))
-                );
-                cnt += 1;
-            }
-
-            assert_eq!(cnt, NUM_KEYS);
-
-            for _ in 0..NUM_KEYS {
-                lsn = Lsn(lsn.0 + 0x10);
-                let blknum = thread_rng().gen_range(0..NUM_KEYS);
-                test_key.field6 = (blknum * STEP) as u32;
-                let mut writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
-                        &ctx,
-                    )
-                    .await?;
-                writer.finish_write(lsn);
-                drop(writer);
-                updated[blknum] = lsn;
-            }
-
-            // Perform a cycle of flush, compact, and GC
-            tline.freeze_and_flush().await?;
-            tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
-                .await?;
-        }
+        assert!(tline.get(test_key, read_lsn, &ctx).await.is_ok());

        Ok(())
    }
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -130,9 +130,8 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    async fn write_all_unbuffered<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        src_buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
-        let (src_buf, res) = self.inner.write_all(src_buf, ctx).await;
+        let (src_buf, res) = self.inner.write_all(src_buf).await;
        let nbytes = match res {
            Ok(nbytes) => nbytes,
            Err(e) => return (src_buf, Err(e)),
@@ -143,9 +142,9 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {

    #[inline(always)]
    /// Flushes the internal buffer to the underlying `VirtualFile`.
-    pub async fn flush_buffer(&mut self, ctx: &RequestContext) -> Result<(), Error> {
+    pub async fn flush_buffer(&mut self) -> Result<(), Error> {
        let buf = std::mem::take(&mut self.buf);
-        let (mut buf, res) = self.inner.write_all(buf, ctx).await;
+        let (mut buf, res) = self.inner.write_all(buf).await;
        res?;
        buf.clear();
        self.buf = buf;
@@ -166,11 +165,10 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        src_buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
        if !BUFFERED {
            assert!(self.buf.is_empty());
-            return self.write_all_unbuffered(src_buf, ctx).await;
+            return self.write_all_unbuffered(src_buf).await;
        }
        let remaining = Self::CAPACITY - self.buf.len();
        let src_buf_len = src_buf.bytes_init();
@@ -185,7 +183,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
        }
        // Then, if the buffer is full, flush it out
        if self.buf.len() == Self::CAPACITY {
-            if let Err(e) = self.flush_buffer(ctx).await {
+            if let Err(e) = self.flush_buffer().await {
                return (Slice::into_inner(src_buf), Err(e));
            }
        }
@@ -201,7 +199,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
                assert_eq!(copied, src_buf.len());
                Slice::into_inner(src_buf)
            } else {
-                let (src_buf, res) = self.write_all_unbuffered(src_buf, ctx).await;
+                let (src_buf, res) = self.write_all_unbuffered(src_buf).await;
                if let Err(e) = res {
                    return (src_buf, Err(e));
                }
@@ -218,7 +216,6 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    pub async fn write_blob<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        srcbuf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<u64, Error>) {
        let offset = self.offset;

@@ -230,7 +227,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
            if len < 128 {
                // Short blob. Write a 1-byte length header
                io_buf.put_u8(len as u8);
-                self.write_all(io_buf, ctx).await
+                self.write_all(io_buf).await
            } else {
                // Write a 4-byte length header
                if len > 0x7fff_ffff {
@@ -245,7 +242,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
                let mut len_buf = (len as u32).to_be_bytes();
                len_buf[0] |= 0x80;
                io_buf.extend_from_slice(&len_buf[..]);
-                self.write_all(io_buf, ctx).await
+                self.write_all(io_buf).await
            }
        }
        .await;
@@ -254,7 +251,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
            Ok(_) => (),
            Err(e) => return (Slice::into_inner(srcbuf.slice(..)), Err(e)),
        }
-        let (srcbuf, res) = self.write_all(srcbuf, ctx).await;
+        let (srcbuf, res) = self.write_all(srcbuf).await;
        (srcbuf, res.map(|_| offset))
    }
 }
@@ -264,8 +261,8 @@ impl BlobWriter<true> {
    ///
    /// This function flushes the internal buffer before giving access
    /// to the underlying `VirtualFile`.
-    pub async fn into_inner(mut self, ctx: &RequestContext) -> Result<VirtualFile, Error> {
-        self.flush_buffer(ctx).await?;
+    pub async fn into_inner(mut self) -> Result<VirtualFile, Error> {
+        self.flush_buffer().await?;
        Ok(self.inner)
    }

@@ -302,16 +299,16 @@ mod tests {
            let file = VirtualFile::create(pathbuf.as_path()).await?;
            let mut wtr = BlobWriter::<BUFFERED>::new(file, 0);
            for blob in blobs.iter() {
-                let (_, res) = wtr.write_blob(blob.clone(), &ctx).await;
+                let (_, res) = wtr.write_blob(blob.clone()).await;
                let offs = res?;
                offsets.push(offs);
            }
            // Write out one page worth of zeros so that we can
            // read again with read_blk
-            let (_, res) = wtr.write_blob(vec![0; PAGE_SZ], &ctx).await;
+            let (_, res) = wtr.write_blob(vec![0; PAGE_SZ]).await;
            let offs = res?;
            println!("Writing final blob at offs={offs}");
-            wtr.flush_buffer(&ctx).await?;
+            wtr.flush_buffer().await?;
        }

        let file = VirtualFile::open(pathbuf.as_path()).await?;
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -74,7 +74,7 @@ impl EphemeralFile {
    pub(crate) async fn write_blob(
        &mut self,
        srcbuf: &[u8],
-        ctx: &RequestContext,
+        _ctx: &RequestContext,
    ) -> Result<u64, io::Error> {
        let pos = self.rw.bytes_written();

@@ -83,15 +83,15 @@ impl EphemeralFile {
            // short one-byte length header
            let len_buf = [srcbuf.len() as u8];

-            self.rw.write_all_borrowed(&len_buf, ctx).await?;
+            self.rw.write_all_borrowed(&len_buf).await?;
        } else {
            let mut len_buf = u32::to_be_bytes(srcbuf.len() as u32);
            len_buf[0] |= 0x80;
-            self.rw.write_all_borrowed(&len_buf, ctx).await?;
+            self.rw.write_all_borrowed(&len_buf).await?;
        }

        // Write the payload
-        self.rw.write_all_borrowed(srcbuf, ctx).await?;
+        self.rw.write_all_borrowed(srcbuf).await?;

        Ok(pos)
    }
--- a/pageserver/src/tenant/ephemeral_file/page_caching.rs
+++ b/pageserver/src/tenant/ephemeral_file/page_caching.rs
@@ -35,14 +35,10 @@ impl RW {
        self.page_cache_file_id
    }

-    pub(crate) async fn write_all_borrowed(
-        &mut self,
-        srcbuf: &[u8],
-        ctx: &RequestContext,
-    ) -> Result<usize, io::Error> {
+    pub(crate) async fn write_all_borrowed(&mut self, srcbuf: &[u8]) -> Result<usize, io::Error> {
        // It doesn't make sense to proactively fill the page cache on the Pageserver write path
        // because Compute is unlikely to access recently written data.
-        self.rw.write_all_borrowed(srcbuf, ctx).await
+        self.rw.write_all_borrowed(srcbuf).await
    }

    pub(crate) fn bytes_written(&self) -> u64 {
@@ -138,7 +134,6 @@ impl crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter for PreWarmi
    >(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
        let buf = buf.slice(..);
        let saved_bounds = buf.bounds(); // save for reconstructing the Slice from iobuf after the IO is done
@@ -155,7 +150,7 @@ impl crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter for PreWarmi
        );

        // Do the IO.
-        let iobuf = match self.file.write_all(buf, ctx).await {
+        let iobuf = match self.file.write_all(buf).await {
            (iobuf, Ok(nwritten)) => {
                assert_eq!(nwritten, buflen);
                iobuf
--- a/pageserver/src/tenant/ephemeral_file/zero_padded_read_write.rs
+++ b/pageserver/src/tenant/ephemeral_file/zero_padded_read_write.rs
@@ -20,7 +20,6 @@
 mod zero_padded;

 use crate::{
-    context::RequestContext,
    page_cache::PAGE_SZ,
    virtual_file::owned_buffers_io::{
        self,
@@ -61,12 +60,8 @@ where
        self.buffered_writer.as_inner().as_inner()
    }

-    pub async fn write_all_borrowed(
-        &mut self,
-        buf: &[u8],
-        ctx: &RequestContext,
-    ) -> std::io::Result<usize> {
-        self.buffered_writer.write_buffered_borrowed(buf, ctx).await
+    pub async fn write_all_borrowed(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        self.buffered_writer.write_buffered_borrowed(buf).await
    }

    pub fn bytes_written(&self) -> u64 {
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -588,7 +588,7 @@ impl LayerMap {
            let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
            coverage.push((kr, current_val.take()));
            current_key = change_key;
-            current_val.clone_from(&change_val);
+            current_val = change_val.clone();
        }

        // Add the final interval
@@ -672,12 +672,12 @@ impl LayerMap {
        // Loop through the delta coverage and recurse on each part
        for (change_key, change_val) in version.delta_coverage.range(start..end) {
            // If there's a relevant delta in this part, add 1 and recurse down
-            if let Some(val) = &current_val {
+            if let Some(val) = current_val {
                if val.get_lsn_range().end > lsn.start {
                    let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
                    let lr = lsn.start..val.get_lsn_range().start;
                    if !kr.is_empty() {
-                        let base_count = Self::is_reimage_worthy(val, key) as usize;
+                        let base_count = Self::is_reimage_worthy(&val, key) as usize;
                        let new_limit = limit.map(|l| l - base_count);
                        let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
                        max_stacked_deltas = std::cmp::max(
@@ -689,17 +689,17 @@ impl LayerMap {
            }

            current_key = change_key;
-            current_val.clone_from(&change_val);
+            current_val = change_val.clone();
        }

        // Consider the last part
-        if let Some(val) = &current_val {
+        if let Some(val) = current_val {
            if val.get_lsn_range().end > lsn.start {
                let kr = Key::from_i128(current_key)..Key::from_i128(end);
                let lr = lsn.start..val.get_lsn_range().start;

                if !kr.is_empty() {
-                    let base_count = Self::is_reimage_worthy(val, key) as usize;
+                    let base_count = Self::is_reimage_worthy(&val, key) as usize;
                    let new_limit = limit.map(|l| l - base_count);
                    let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
                    max_stacked_deltas = std::cmp::max(
@@ -916,7 +916,6 @@ mod tests {
        assert_eq!(lhs, rhs);
    }

-    #[cfg(test)]
    fn brute_force_range_search(
        layer_map: &LayerMap,
        key_range: Range<Key>,
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -2,7 +2,6 @@
 //! page server.

 use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf};
-use futures::StreamExt;
 use itertools::Itertools;
 use pageserver_api::key::Key;
 use pageserver_api::models::LocationConfigMode;
@@ -254,15 +253,17 @@ impl TenantsMap {
    }
 }

-/// Precursor to deletion of a tenant dir: we do a fast rename to a tmp path, and then
-/// the slower actual deletion in the background.
-///
 /// This is "safe" in that that it won't leave behind a partially deleted directory
 /// at the original path, because we rename with TEMP_FILE_SUFFIX before starting deleting
 /// the contents.
 ///
 /// This is pageserver-specific, as it relies on future processes after a crash to check
 /// for TEMP_FILE_SUFFIX when loading things.
+async fn safe_remove_tenant_dir_all(path: impl AsRef<Utf8Path>) -> std::io::Result<()> {
+    let tmp_path = safe_rename_tenant_dir(path).await?;
+    fs::remove_dir_all(tmp_path).await
+}
+
 async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<Utf8PathBuf> {
    let parent = path
        .as_ref()
@@ -285,28 +286,6 @@ async fn safe_rename_tenant_dir(path: impl AsRef<Utf8Path>) -> std::io::Result<U
    Ok(tmp_path)
 }

-/// When we have moved a tenant's content to a temporary directory, we may delete it lazily in
-/// the background, and thereby avoid blocking any API requests on this deletion completing.
-fn spawn_background_purge(tmp_path: Utf8PathBuf) {
-    // Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory.
-    // After a tenant is detached, there are no more task_mgr tasks for that tenant_id.
-    let task_tenant_id = None;
-
-    task_mgr::spawn(
-        task_mgr::BACKGROUND_RUNTIME.handle(),
-        TaskKind::MgmtRequest,
-        task_tenant_id,
-        None,
-        "tenant_files_delete",
-        false,
-        async move {
-            fs::remove_dir_all(tmp_path.as_path())
-                .await
-                .with_context(|| format!("tenant directory {:?} deletion", tmp_path))
-        },
-    );
-}
-
 static TENANTS: Lazy<std::sync::RwLock<TenantsMap>> =
    Lazy::new(|| std::sync::RwLock::new(TenantsMap::Initializing));

@@ -591,11 +570,7 @@ pub async fn init_tenant_mgr(
    );
    TENANT.startup_scheduled.inc_by(tenant_configs.len() as u64);

-    // Accumulate futures for writing tenant configs, so that we can execute in parallel
-    let mut config_write_futs = Vec::new();
-
-    // Update the location configs according to the re-attach response and persist them to disk
-    tracing::info!("Updating {} location configs", tenant_configs.len());
+    // Construct `Tenant` objects and start them running
    for (tenant_shard_id, location_conf) in tenant_configs {
        let tenant_dir_path = conf.tenant_path(&tenant_shard_id);

@@ -622,22 +597,18 @@ pub async fn init_tenant_mgr(
        const DEFAULT_SECONDARY_CONF: SecondaryLocationConfig =
            SecondaryLocationConfig { warm: true };

+        // Update the location config according to the re-attach response
        if let Some(tenant_modes) = &tenant_modes {
            // We have a generation map: treat it as the authority for whether
            // this tenant is really attached.
            match tenant_modes.get(&tenant_shard_id) {
                None => {
                    info!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), "Detaching tenant, control plane omitted it in re-attach response");
-
-                    match safe_rename_tenant_dir(&tenant_dir_path).await {
-                        Ok(tmp_path) => {
-                            spawn_background_purge(tmp_path);
-                        }
-                        Err(e) => {
-                            error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
-                            "Failed to move detached tenant directory '{tenant_dir_path}': {e:?}");
-                        }
-                    };
+                    if let Err(e) = safe_remove_tenant_dir_all(&tenant_dir_path).await {
+                        error!(tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(),
+                            "Failed to remove detached tenant directory '{tenant_dir_path}': {e:?}",
+                        );
+                    }

                    // We deleted local content: move on to next tenant, don't try and spawn this one.
                    continue;
@@ -683,32 +654,8 @@ pub async fn init_tenant_mgr(

        // Presence of a generation number implies attachment: attach the tenant
        // if it wasn't already, and apply the generation number.
-        config_write_futs.push(async move {
-            let r = Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await;
-            (tenant_shard_id, location_conf, r)
-        });
-    }
+        Tenant::persist_tenant_config(conf, &tenant_shard_id, &location_conf).await?;

-    // Execute config writes with concurrency, to avoid bottlenecking on local FS write latency
-    tracing::info!(
-        "Writing {} location config files...",
-        config_write_futs.len()
-    );
-    let config_write_results = futures::stream::iter(config_write_futs)
-        .buffer_unordered(16)
-        .collect::<Vec<_>>()
-        .await;
-
-    tracing::info!(
-        "Spawning {} tenant shard locations...",
-        config_write_results.len()
-    );
-    // For those shards that have live configurations, construct `Tenant` or `SecondaryTenant` objects and start them running
-    for (tenant_shard_id, location_conf, config_write_result) in config_write_results {
-        // Errors writing configs are fatal
-        config_write_result?;
-
-        let tenant_dir_path = conf.tenant_path(&tenant_shard_id);
        let shard_identity = location_conf.shard;
        let slot = match location_conf.mode {
            LocationMode::Attached(attached_conf) => {
@@ -1752,7 +1699,7 @@ impl TenantManager {
        let tmp_path = safe_rename_tenant_dir(&local_tenant_directory)
            .await
            .with_context(|| format!("local tenant directory {local_tenant_directory:?} rename"))?;
-        spawn_background_purge(tmp_path);
+        self.spawn_background_purge(tmp_path);

        fail::fail_point!("shard-split-pre-finish", |_| Err(anyhow::anyhow!(
            "failpoint"
@@ -1907,6 +1854,28 @@ impl TenantManager {
        shutdown_all_tenants0(self.tenants).await
    }

+    /// When we have moved a tenant's content to a temporary directory, we may delete it lazily in
+    /// the background, and thereby avoid blocking any API requests on this deletion completing.
+    fn spawn_background_purge(&self, tmp_path: Utf8PathBuf) {
+        // Although we are cleaning up the tenant, this task is not meant to be bound by the lifetime of the tenant in memory.
+        // After a tenant is detached, there are no more task_mgr tasks for that tenant_id.
+        let task_tenant_id = None;
+
+        task_mgr::spawn(
+            task_mgr::BACKGROUND_RUNTIME.handle(),
+            TaskKind::MgmtRequest,
+            task_tenant_id,
+            None,
+            "tenant_files_delete",
+            false,
+            async move {
+                fs::remove_dir_all(tmp_path.as_path())
+                    .await
+                    .with_context(|| format!("tenant directory {:?} deletion", tmp_path))
+            },
+        );
+    }
+
    pub(crate) async fn detach_tenant(
        &self,
        conf: &'static PageServerConf,
@@ -1923,7 +1892,7 @@ impl TenantManager {
                deletion_queue_client,
            )
            .await?;
-        spawn_background_purge(tmp_path);
+        self.spawn_background_purge(tmp_path);

        Ok(())
    }
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -210,7 +210,6 @@ use tracing::{debug, error, info, instrument, warn};
 use tracing::{info_span, Instrument};
 use utils::lsn::Lsn;

-use crate::context::RequestContext;
 use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
 use crate::metrics::{
    MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
@@ -506,7 +505,6 @@ impl RemoteTimelineClient {
        layer_file_name: &LayerFileName,
        layer_metadata: &LayerFileMetadata,
        cancel: &CancellationToken,
-        ctx: &RequestContext,
    ) -> anyhow::Result<u64> {
        let downloaded_size = {
            let _unfinished_gauge_guard = self.metrics.call_begin(
@@ -524,7 +522,6 @@ impl RemoteTimelineClient {
                layer_file_name,
                layer_metadata,
                cancel,
-                ctx,
            )
            .measure_remote_op(
                RemoteOpFileKind::Layer,
--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -18,7 +18,6 @@ use tracing::warn;
 use utils::backoff;

 use crate::config::PageServerConf;
-use crate::context::RequestContext;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
 use crate::tenant::storage_layer::LayerFileName;
@@ -41,7 +40,6 @@ use super::{
 /// in the metadata. (In the future, we might do more cross-checks, like CRC validation)
 ///
 /// Returns the size of the downloaded file.
-#[allow(clippy::too_many_arguments)]
 pub async fn download_layer_file<'a>(
    conf: &'static PageServerConf,
    storage: &'a GenericRemoteStorage,
@@ -50,7 +48,6 @@ pub async fn download_layer_file<'a>(
    layer_file_name: &'a LayerFileName,
    layer_metadata: &'a LayerFileMetadata,
    cancel: &CancellationToken,
-    ctx: &RequestContext,
 ) -> Result<u64, DownloadError> {
    debug_assert_current_span_has_tenant_and_timeline_id();

@@ -78,7 +75,7 @@ pub async fn download_layer_file<'a>(
    let temp_file_path = path_with_suffix_extension(&local_path, TEMP_DOWNLOAD_EXTENSION);

    let bytes_amount = download_retry(
-        || async { download_object(storage, &remote_path, &temp_file_path, cancel, ctx).await },
+        || async { download_object(storage, &remote_path, &temp_file_path, cancel).await },
        &format!("download {remote_path:?}"),
        cancel,
    )
@@ -136,7 +133,6 @@ async fn download_object<'a>(
    src_path: &RemotePath,
    dst_path: &Utf8PathBuf,
    cancel: &CancellationToken,
-    #[cfg_attr(target_os = "macos", allow(unused_variables))] ctx: &RequestContext,
 ) -> Result<u64, DownloadError> {
    let res = match crate::virtual_file::io_engine::get() {
        crate::virtual_file::io_engine::IoEngine::NotSet => panic!("unset"),
@@ -212,10 +208,10 @@ async fn download_object<'a>(
                            Err(e) => return Err(e),
                        };
                        buffered
-                            .write_buffered(tokio_epoll_uring::BoundedBuf::slice_full(chunk), ctx)
+                            .write_buffered(tokio_epoll_uring::BoundedBuf::slice_full(chunk))
                            .await?;
                    }
-                    let size_tracking = buffered.flush_and_into_inner(ctx).await?;
+                    let size_tracking = buffered.flush_and_into_inner().await?;
                    Ok(size_tracking.into_inner())
                }
                .await?;
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -7,7 +7,6 @@ use std::{sync::Arc, time::SystemTime};

 use crate::{
    config::PageServerConf,
-    context::RequestContext,
    disk_usage_eviction_task::DiskUsageEvictionInfo,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    virtual_file::MaybeFatalIo,
@@ -317,13 +316,9 @@ pub fn spawn_tasks(
    let (upload_req_tx, upload_req_rx) =
        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);

-    let downloader_task_ctx = RequestContext::new(
-        TaskKind::SecondaryDownloads,
-        crate::context::DownloadBehavior::Download,
-    );
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
-        downloader_task_ctx.task_kind(),
+        TaskKind::SecondaryDownloads,
        None,
        None,
        "secondary tenant downloads",
@@ -335,7 +330,6 @@ pub fn spawn_tasks(
                download_req_rx,
                bg_jobs_clone,
                cancel_clone,
-                downloader_task_ctx,
            )
            .await;

--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -8,7 +8,6 @@ use std::{

 use crate::{
    config::PageServerConf,
-    context::RequestContext,
    disk_usage_eviction_task::{
        finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
    },
@@ -31,10 +30,7 @@ use crate::{

 use super::{
    heatmap::HeatMapLayer,
-    scheduler::{
-        self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult,
-        TenantBackgroundJobs,
-    },
+    scheduler::{self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs},
    SecondaryTenant,
 };

@@ -48,6 +44,7 @@ use chrono::format::{DelayedFormat, StrftimeItems};
 use futures::Future;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
+use rand::Rng;
 use remote_storage::{DownloadError, Etag, GenericRemoteStorage};

 use tokio_util::sync::CancellationToken;
@@ -77,14 +74,12 @@ pub(super) async fn downloader_task(
    command_queue: tokio::sync::mpsc::Receiver<CommandRequest<DownloadCommand>>,
    background_jobs_can_start: Barrier,
    cancel: CancellationToken,
-    root_ctx: RequestContext,
 ) {
    let concurrency = tenant_manager.get_conf().secondary_download_concurrency;

    let generator = SecondaryDownloader {
        tenant_manager,
        remote_storage,
-        root_ctx,
    };
    let mut scheduler = Scheduler::new(generator, concurrency);

@@ -97,7 +92,6 @@ pub(super) async fn downloader_task(
 struct SecondaryDownloader {
    tenant_manager: Arc<TenantManager>,
    remote_storage: GenericRemoteStorage,
-    root_ctx: RequestContext,
 }

 #[derive(Debug, Clone)]
@@ -276,7 +270,7 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
        // Update freshened_at even if there was an error: we don't want errored tenants to implicitly
        // take priority to run again.
        let mut detail = secondary_state.detail.lock().unwrap();
-        detail.next_download = Some(Instant::now() + period_jitter(DOWNLOAD_FRESHEN_INTERVAL, 5));
+        detail.next_download = Some(Instant::now() + DOWNLOAD_FRESHEN_INTERVAL);
    }

    async fn schedule(&mut self) -> SchedulingResult<PendingDownload> {
@@ -307,9 +301,11 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
                    }

                    if detail.next_download.is_none() {
-                        // Initialize randomly in the range from 0 to our interval: this uniformly spreads the start times.  Subsequent
-                        // rounds will use a smaller jitter to avoid accidentally synchronizing later.
-                        detail.next_download = Some(now.checked_add(period_warmup(DOWNLOAD_FRESHEN_INTERVAL)).expect(
+                        // Initialize with a jitter: this spreads initial downloads on startup
+                        // or mass-attach across our freshen interval.
+                        let jittered_period =
+                            rand::thread_rng().gen_range(Duration::ZERO..DOWNLOAD_FRESHEN_INTERVAL);
+                        detail.next_download = Some(now.checked_add(jittered_period).expect(
                        "Using our constant, which is known to be small compared with clock range",
                    ));
                    }
@@ -371,12 +367,11 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
        let remote_storage = self.remote_storage.clone();
        let conf = self.tenant_manager.get_conf();
        let tenant_shard_id = *secondary_state.get_tenant_shard_id();
-        let download_ctx = self.root_ctx.attached_child();
        (RunningDownload { barrier }, Box::pin(async move {
            let _completion = completion;

            match TenantDownloader::new(conf, &remote_storage, &secondary_state)
-                .download(&download_ctx)
+                .download()
                .await
            {
                Err(UpdateError::NoData) => {
@@ -490,7 +485,7 @@ impl<'a> TenantDownloader<'a> {
        }
    }

-    async fn download(&self, ctx: &RequestContext) -> Result<(), UpdateError> {
+    async fn download(&self) -> Result<(), UpdateError> {
        debug_assert_current_span_has_tenant_id();

        // For the duration of a download, we must hold the SecondaryTenant::gate, to ensure
@@ -565,7 +560,7 @@ impl<'a> TenantDownloader<'a> {
            }

            let timeline_id = timeline.timeline_id;
-            self.download_timeline(timeline, ctx)
+            self.download_timeline(timeline)
                .instrument(tracing::info_span!(
                    "secondary_download_timeline",
                    tenant_id=%tenant_shard_id.tenant_id,
@@ -747,11 +742,7 @@ impl<'a> TenantDownloader<'a> {
        .and_then(|x| x)
    }

-    async fn download_timeline(
-        &self,
-        timeline: HeatMapTimeline,
-        ctx: &RequestContext,
-    ) -> Result<(), UpdateError> {
+    async fn download_timeline(&self, timeline: HeatMapTimeline) -> Result<(), UpdateError> {
        debug_assert_current_span_has_tenant_and_timeline_id();
        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
        let timeline_path = self
@@ -884,7 +875,6 @@ impl<'a> TenantDownloader<'a> {
                &layer.name,
                &LayerFileMetadata::from(&layer.metadata),
                &self.secondary_state.cancel,
-                ctx,
            )
            .await
            {
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -20,14 +20,12 @@ use crate::{

 use futures::Future;
 use pageserver_api::shard::TenantShardId;
+use rand::Rng;
 use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};

 use super::{
    heatmap::HeatMapTenant,
-    scheduler::{
-        self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult,
-        TenantBackgroundJobs,
-    },
+    scheduler::{self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs},
    CommandRequest, UploadCommand,
 };
 use tokio_util::sync::CancellationToken;
@@ -183,11 +181,15 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            let state = self
                .tenants
                .entry(*tenant.get_tenant_shard_id())
-                .or_insert_with(|| UploaderTenantState {
-                    tenant: Arc::downgrade(&tenant),
-                    last_upload: None,
-                    next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)),
-                    last_digest: None,
+                .or_insert_with(|| {
+                    let jittered_period = rand::thread_rng().gen_range(Duration::ZERO..period);
+
+                    UploaderTenantState {
+                        tenant: Arc::downgrade(&tenant),
+                        last_upload: None,
+                        next_upload: Some(now.checked_add(jittered_period).unwrap_or(now)),
+                        last_digest: None,
+                    }
                });

            // Decline to do the upload if insufficient time has passed
@@ -272,7 +274,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>

            let next_upload = tenant
                .get_heatmap_period()
-                .and_then(|period| now.checked_add(period_jitter(period, 5)));
+                .and_then(|period| now.checked_add(period));

            WriteComplete {
                    tenant_shard_id: *tenant.get_tenant_shard_id(),
--- a/pageserver/src/tenant/secondary/scheduler.rs
+++ b/pageserver/src/tenant/secondary/scheduler.rs
@@ -1,5 +1,4 @@
 use futures::Future;
-use rand::Rng;
 use std::{
    collections::HashMap,
    marker::PhantomData,
@@ -20,26 +19,6 @@ use super::{CommandRequest, CommandResponse};
 const MAX_SCHEDULING_INTERVAL: Duration = Duration::from_secs(10);
 const MIN_SCHEDULING_INTERVAL: Duration = Duration::from_secs(1);

-/// Jitter a Duration by an integer percentage.  Returned values are uniform
-/// in the range 100-pct..100+pct (i.e. a 5% jitter is 5% either way: a ~10% range)
-pub(super) fn period_jitter(d: Duration, pct: u32) -> Duration {
-    if d == Duration::ZERO {
-        d
-    } else {
-        rand::thread_rng().gen_range((d * (100 - pct)) / 100..(d * (100 + pct)) / 100)
-    }
-}
-
-/// When a periodic task first starts, it should wait for some time in the range 0..period, so
-/// that starting many such tasks at the same time spreads them across the time range.
-pub(super) fn period_warmup(period: Duration) -> Duration {
-    if period == Duration::ZERO {
-        period
-    } else {
-        rand::thread_rng().gen_range(Duration::ZERO..period)
-    }
-}
-
 /// Scheduling helper for background work across many tenants.
 ///
 /// Systems that need to run background work across many tenants may use this type
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -118,6 +118,9 @@ pub(super) async fn gather_inputs(
    ctx: &RequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
+    //
+    // FIXME: if a single timeline is deleted while refresh gc info is ongoing, we will fail the
+    // whole computation. It does not make sense from the billing perspective.
    tenant
        .refresh_gc_info(cancel, ctx)
        .await
@@ -189,9 +192,7 @@ pub(super) async fn gather_inputs(
        // than a space bound (horizon cutoff).  This means that if someone drops a database and waits for their
        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
        // horizon_cutoff.
-        let pitr_cutoff = gc_info.cutoffs.pitr;
-        let horizon_cutoff = gc_info.cutoffs.horizon;
-        let mut next_gc_cutoff = pitr_cutoff;
+        let mut next_gc_cutoff = gc_info.pitr_cutoff;

        // If the caller provided a shorter retention period, use that instead of the GC cutoff.
        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
@@ -218,8 +219,6 @@ pub(super) async fn gather_inputs(
            .map(|lsn| (lsn, LsnKind::BranchPoint))
            .collect::<Vec<_>>();

-        drop(gc_info);
-
        // Add branch points we collected earlier, just in case there were any that were
        // not present in retain_lsns. We will remove any duplicates below later.
        if let Some(this_branchpoints) = branchpoints.get(&timeline_id) {
@@ -298,8 +297,8 @@ pub(super) async fn gather_inputs(
            last_record: last_record_lsn,
            // this is not used above, because it might not have updated recently enough
            latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
-            horizon_cutoff,
-            pitr_cutoff,
+            horizon_cutoff: gc_info.horizon_cutoff,
+            pitr_cutoff: gc_info.pitr_cutoff,
            next_gc_cutoff,
            retention_param_cutoff,
        });
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -428,15 +428,9 @@ impl DeltaLayerWriterInner {
    ///
    /// The values must be appended in key, lsn order.
    ///
-    async fn put_value(
-        &mut self,
-        key: Key,
-        lsn: Lsn,
-        val: Value,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    async fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> {
        let (_, res) = self
-            .put_value_bytes(key, lsn, Value::ser(&val)?, val.will_init(), ctx)
+            .put_value_bytes(key, lsn, Value::ser(&val)?, val.will_init())
            .await;
        res
    }
@@ -447,10 +441,9 @@ impl DeltaLayerWriterInner {
        lsn: Lsn,
        val: Vec<u8>,
        will_init: bool,
-        ctx: &RequestContext,
    ) -> (Vec<u8>, anyhow::Result<()>) {
        assert!(self.lsn_range.start <= lsn);
-        let (val, res) = self.blob_writer.write_blob(val, ctx).await;
+        let (val, res) = self.blob_writer.write_blob(val).await;
        let off = match res {
            Ok(off) => off,
            Err(e) => return (val, Err(anyhow::anyhow!(e))),
@@ -470,23 +463,18 @@ impl DeltaLayerWriterInner {
    ///
    /// Finish writing the delta layer.
    ///
-    async fn finish(
-        self,
-        key_end: Key,
-        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<ResidentLayer> {
+    async fn finish(self, key_end: Key, timeline: &Arc<Timeline>) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

-        let mut file = self.blob_writer.into_inner(ctx).await?;
+        let mut file = self.blob_writer.into_inner().await?;

        // Write out the index
        let (index_root_blk, block_buf) = self.tree.finish()?;
        file.seek(SeekFrom::Start(index_start_blk as u64 * PAGE_SZ as u64))
            .await?;
        for buf in block_buf.blocks {
-            let (_buf, res) = file.write_all(buf, ctx).await;
+            let (_buf, res) = file.write_all(buf).await;
            res?;
        }
        assert!(self.lsn_range.start < self.lsn_range.end);
@@ -506,7 +494,7 @@ impl DeltaLayerWriterInner {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&summary, &mut buf)?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;

        let metadata = file
@@ -604,18 +592,8 @@ impl DeltaLayerWriter {
    ///
    /// The values must be appended in key, lsn order.
    ///
-    pub async fn put_value(
-        &mut self,
-        key: Key,
-        lsn: Lsn,
-        val: Value,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        self.inner
-            .as_mut()
-            .unwrap()
-            .put_value(key, lsn, val, ctx)
-            .await
+    pub async fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> {
+        self.inner.as_mut().unwrap().put_value(key, lsn, val).await
    }

    pub async fn put_value_bytes(
@@ -624,12 +602,11 @@ impl DeltaLayerWriter {
        lsn: Lsn,
        val: Vec<u8>,
        will_init: bool,
-        ctx: &RequestContext,
    ) -> (Vec<u8>, anyhow::Result<()>) {
        self.inner
            .as_mut()
            .unwrap()
-            .put_value_bytes(key, lsn, val, will_init, ctx)
+            .put_value_bytes(key, lsn, val, will_init)
            .await
    }

@@ -644,11 +621,10 @@ impl DeltaLayerWriter {
        mut self,
        key_end: Key,
        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<ResidentLayer> {
        let inner = self.inner.take().unwrap();
        let temp_path = inner.path.clone();
-        let result = inner.finish(key_end, timeline, ctx).await;
+        let result = inner.finish(key_end, timeline).await;
        // The delta layer files can sometimes be really large. Clean them up.
        if result.is_err() {
            tracing::warn!(
@@ -716,7 +692,7 @@ impl DeltaLayer {
        // TODO: could use smallvec here, but it's a pain with Slice<T>
        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;
        Ok(())
    }
@@ -1305,13 +1281,7 @@ impl DeltaLayerInner {
                    per_blob_copy.extend_from_slice(data);

                    let (tmp, res) = writer
-                        .put_value_bytes(
-                            key,
-                            lsn,
-                            std::mem::take(&mut per_blob_copy),
-                            will_init,
-                            ctx,
-                        )
+                        .put_value_bytes(key, lsn, std::mem::take(&mut per_blob_copy), will_init)
                        .await;
                    per_blob_copy = tmp;
                    res?;
@@ -1790,14 +1760,12 @@ mod test {

        for entry in entries {
            let (_, res) = writer
-                .put_value_bytes(entry.key, entry.lsn, entry.value, false, &ctx)
+                .put_value_bytes(entry.key, entry.lsn, entry.value, false)
                .await;
            res?;
        }

-        let resident = writer
-            .finish(entries_meta.key_range.end, &timeline, &ctx)
-            .await?;
+        let resident = writer.finish(entries_meta.key_range.end, &timeline).await?;

        let inner = resident.as_delta(&ctx).await?;

@@ -1983,7 +1951,7 @@ mod test {
                .await
                .unwrap();

-            let copied_layer = writer.finish(Key::MAX, &branch, ctx).await.unwrap();
+            let copied_layer = writer.finish(Key::MAX, &branch).await.unwrap();

            copied_layer.as_delta(ctx).await.unwrap();

--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -357,7 +357,7 @@ impl ImageLayer {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;
        Ok(())
    }
@@ -677,14 +677,9 @@ impl ImageLayerWriterInner {
    ///
    /// The page versions must be appended in blknum order.
    ///
-    async fn put_image(
-        &mut self,
-        key: Key,
-        img: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    async fn put_image(&mut self, key: Key, img: Bytes) -> anyhow::Result<()> {
        ensure!(self.key_range.contains(&key));
-        let (_img, res) = self.blob_writer.write_blob(img, ctx).await;
+        let (_img, res) = self.blob_writer.write_blob(img).await;
        // TODO: re-use the buffer for `img` further upstack
        let off = res?;

@@ -698,11 +693,7 @@ impl ImageLayerWriterInner {
    ///
    /// Finish writing the image layer.
    ///
-    async fn finish(
-        self,
-        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<ResidentLayer> {
+    async fn finish(self, timeline: &Arc<Timeline>) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

@@ -713,7 +704,7 @@ impl ImageLayerWriterInner {
            .await?;
        let (index_root_blk, block_buf) = self.tree.finish()?;
        for buf in block_buf.blocks {
-            let (_buf, res) = file.write_all(buf, ctx).await;
+            let (_buf, res) = file.write_all(buf).await;
            res?;
        }

@@ -733,7 +724,7 @@ impl ImageLayerWriterInner {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&summary, &mut buf)?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;

        let metadata = file
@@ -815,13 +806,8 @@ impl ImageLayerWriter {
    ///
    /// The page versions must be appended in blknum order.
    ///
-    pub async fn put_image(
-        &mut self,
-        key: Key,
-        img: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        self.inner.as_mut().unwrap().put_image(key, img, ctx).await
+    pub async fn put_image(&mut self, key: Key, img: Bytes) -> anyhow::Result<()> {
+        self.inner.as_mut().unwrap().put_image(key, img).await
    }

    ///
@@ -830,9 +816,8 @@ impl ImageLayerWriter {
    pub(crate) async fn finish(
        mut self,
        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<super::ResidentLayer> {
-        self.inner.take().unwrap().finish(timeline, ctx).await
+        self.inner.take().unwrap().finish(timeline).await
    }
 }

--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -597,17 +597,14 @@ impl InMemoryLayer {
        }
    }

-    /// Write this frozen in-memory layer to disk. If `key_range` is set, the delta
-    /// layer will only contain the key range the user specifies, and may return `None`
-    /// if there are no matching keys.
+    /// Write this frozen in-memory layer to disk.
    ///
    /// Returns a new delta layer with all the same data as this in-memory layer
    pub(crate) async fn write_to_disk(
        &self,
        timeline: &Arc<Timeline>,
        ctx: &RequestContext,
-        key_range: Option<Range<Key>>,
-    ) -> Result<Option<ResidentLayer>> {
+    ) -> Result<ResidentLayer> {
        // Grab the lock in read-mode. We hold it over the I/O, but because this
        // layer is not writeable anymore, no one should be trying to acquire the
        // write lock on it, so we shouldn't block anyone. There's one exception
@@ -621,21 +618,6 @@ impl InMemoryLayer {

        let end_lsn = *self.end_lsn.get().unwrap();

-        let keys: Vec<_> = if let Some(key_range) = key_range {
-            inner
-                .index
-                .iter()
-                .filter(|(k, _)| key_range.contains(k))
-                .map(|(k, m)| (k.to_i128(), m))
-                .collect()
-        } else {
-            inner.index.iter().map(|(k, m)| (k.to_i128(), m)).collect()
-        };
-
-        if keys.is_empty() {
-            return Ok(None);
-        }
-
        let mut delta_layer_writer = DeltaLayerWriter::new(
            self.conf,
            self.timeline_id,
@@ -659,14 +641,14 @@ impl InMemoryLayer {
                let will_init = Value::des(&buf)?.will_init();
                let res;
                (buf, res) = delta_layer_writer
-                    .put_value_bytes(*key, *lsn, buf, will_init, &ctx)
+                    .put_value_bytes(*key, *lsn, buf, will_init)
                    .await;
                res?;
            }
        }

        // MAX is used here because we identify L0 layers by full key range
-        let delta_layer = delta_layer_writer.finish(Key::MAX, timeline, &ctx).await?;
-        Ok(Some(delta_layer))
+        let delta_layer = delta_layer_writer.finish(Key::MAX, timeline).await?;
+        Ok(delta_layer)
    }
 }
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -14,10 +14,9 @@ use utils::lsn::Lsn;
 use utils::sync::heavier_once_cell;

 use crate::config::PageServerConf;
-use crate::context::{DownloadBehavior, RequestContext};
+use crate::context::RequestContext;
 use crate::repository::Key;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::task_mgr::TaskKind;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};

@@ -402,8 +401,8 @@ impl Layer {
        &self.0.path
    }

-    pub(crate) fn debug_str(&self) -> &Arc<str> {
-        &self.0.debug_str
+    pub(crate) fn local_path_str(&self) -> &Arc<str> {
+        &self.0.path_str
    }

    pub(crate) fn metadata(&self) -> LayerFileMetadata {
@@ -528,8 +527,8 @@ struct LayerInner {
    /// Full path to the file; unclear if this should exist anymore.
    path: Utf8PathBuf,

-    /// String representation of the layer, used for traversal id.
-    debug_str: Arc<str>,
+    /// String representation of the full path, used for traversal id.
+    path_str: Arc<str>,

    desc: PersistentLayerDesc,

@@ -736,7 +735,7 @@ impl LayerInner {

        LayerInner {
            conf,
-            debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() },
+            path_str: path.to_string().into(),
            path,
            desc,
            timeline: Arc::downgrade(timeline),
@@ -940,20 +939,11 @@ impl LayerInner {
            return Err(DownloadError::DownloadRequired);
        }

-        let download_ctx = ctx
-            .map(|ctx| ctx.detached_child(TaskKind::LayerDownload, DownloadBehavior::Download))
-            .unwrap_or(RequestContext::new(
-                TaskKind::LayerDownload,
-                DownloadBehavior::Download,
-            ));
-
        async move {
            tracing::info!(%reason, "downloading on-demand");

            let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
-            let res = self
-                .download_init_and_wait(timeline, permit, download_ctx)
-                .await?;
+            let res = self.download_init_and_wait(timeline, permit).await?;
            scopeguard::ScopeGuard::into_inner(init_cancelled);
            Ok(res)
        }
@@ -992,7 +982,6 @@ impl LayerInner {
        self: &Arc<Self>,
        timeline: Arc<Timeline>,
        permit: heavier_once_cell::InitPermit,
-        ctx: RequestContext,
    ) -> Result<Arc<DownloadedLayer>, DownloadError> {
        debug_assert_current_span_has_tenant_and_timeline_id();

@@ -1022,7 +1011,7 @@ impl LayerInner {
                    .await
                    .unwrap();

-                let res = this.download_and_init(timeline, permit, &ctx).await;
+                let res = this.download_and_init(timeline, permit).await;

                if let Err(res) = tx.send(res) {
                    match res {
@@ -1065,7 +1054,6 @@ impl LayerInner {
        self: &Arc<LayerInner>,
        timeline: Arc<Timeline>,
        permit: heavier_once_cell::InitPermit,
-        ctx: &RequestContext,
    ) -> anyhow::Result<Arc<DownloadedLayer>> {
        let client = timeline
            .remote_client
@@ -1073,12 +1061,7 @@ impl LayerInner {
            .expect("checked before download_init_and_wait");

        let result = client
-            .download_layer_file(
-                &self.desc.filename(),
-                &self.metadata(),
-                &timeline.cancel,
-                ctx,
-            )
+            .download_layer_file(&self.desc.filename(), &self.metadata(), &timeline.cancel)
            .await;

        match result {
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -16,17 +16,14 @@ use enumset::EnumSet;
 use fail::fail_point;
 use once_cell::sync::Lazy;
 use pageserver_api::{
-    key::{
-        AUX_FILES_KEY, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
-        NON_INHERITED_SPARSE_RANGE,
-    },
-    keyspace::{KeySpaceAccum, SparseKeyPartitioning},
+    key::{AUX_FILES_KEY, NON_INHERITED_RANGE},
+    keyspace::{KeySpaceAccum, ShardedRange},
    models::{
        CompactionAlgorithm, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
        EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState,
    },
    reltag::BlockNumber,
-    shard::{ShardIdentity, ShardNumber, TenantShardId},
+    shard::{ShardCount, ShardIdentity, ShardNumber, TenantShardId},
 };
 use rand::Rng;
 use serde_with::serde_as;
@@ -58,6 +55,8 @@ use std::{
    ops::ControlFlow,
 };

+use crate::deletion_queue::DeletionQueueClient;
+use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
    layer_map::{LayerMap, SearchResult},
    metadata::TimelineMetadata,
@@ -67,7 +66,6 @@ use crate::{
    disk_usage_eviction_task::DiskUsageEvictionInfo,
    pgdatadir_mapping::CollectKeySpaceError,
 };
-use crate::{deletion_queue::DeletionQueueClient, metrics::GetKind};
 use crate::{
    disk_usage_eviction_task::finite_f32,
    tenant::storage_layer::{
@@ -79,9 +77,6 @@ use crate::{
 use crate::{
    disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
 };
-use crate::{
-    metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
-};
 use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
 use crate::{
    pgdatadir_mapping::{AuxFilesDirectory, DirectoryKind},
@@ -91,7 +86,7 @@ use crate::{
 use crate::config::PageServerConf;
 use crate::keyspace::{KeyPartitioning, KeySpace};
 use crate::metrics::{
-    TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
+    GetKind, TimelineMetrics, MATERIALIZED_PAGE_CACHE_HIT, MATERIALIZED_PAGE_CACHE_HIT_DIRECT,
 };
 use crate::pgdatadir_mapping::CalculateLogicalSizeError;
 use crate::tenant::config::TenantConfOpt;
@@ -142,25 +137,6 @@ pub(super) enum FlushLoopState {
    Exited,
 }

-#[derive(Debug, Copy, Clone, PartialEq, Eq)]
-pub enum ImageLayerCreationMode {
-    /// Try to create image layers based on `time_for_new_image_layer`. Used in compaction code path.
-    Try,
-    /// Force creating the image layers if possible. For now, no image layers will be created
-    /// for metadata keys. Used in compaction code path with force flag enabled.
-    Force,
-    /// Initial ingestion of the data, and no data should be dropped in this function. This
-    /// means that no metadata keys should be included in the partitions. Used in flush frozen layer
-    /// code path.
-    Initial,
-}
-
-impl std::fmt::Display for ImageLayerCreationMode {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self)
-    }
-}
-
 /// Wrapper for key range to provide reverse ordering by range length for BinaryHeap
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub(crate) struct Hole {
@@ -330,7 +306,7 @@ pub struct Timeline {

    // List of child timelines and their branch points. This is needed to avoid
    // garbage collecting data that is still needed by the child timelines.
-    pub(crate) gc_info: std::sync::RwLock<GcInfo>,
+    pub gc_info: std::sync::RwLock<GcInfo>,

    // It may change across major versions so for simplicity
    // keep it after running initdb for a timeline.
@@ -341,7 +317,7 @@ pub struct Timeline {
    pub initdb_lsn: Lsn,

    /// When did we last calculate the partitioning?
-    partitioning: tokio::sync::Mutex<((KeyPartitioning, SparseKeyPartitioning), Lsn)>,
+    partitioning: tokio::sync::Mutex<(KeyPartitioning, Lsn)>,

    /// Configuration: how often should the partitioning be recalculated.
    repartition_threshold: u64,
@@ -414,59 +390,33 @@ pub struct WalReceiverInfo {
    pub last_received_msg_ts: u128,
 }

+///
 /// Information about how much history needs to be retained, needed by
 /// Garbage Collection.
-#[derive(Default)]
-pub(crate) struct GcInfo {
+///
+pub struct GcInfo {
    /// Specific LSNs that are needed.
    ///
    /// Currently, this includes all points where child branches have
    /// been forked off from. In the future, could also include
    /// explicit user-defined snapshot points.
-    pub(crate) retain_lsns: Vec<Lsn>,
+    pub retain_lsns: Vec<Lsn>,

-    /// The cutoff coordinates, which are combined by selecting the minimum.
-    pub(crate) cutoffs: GcCutoffs,
-}
-
-impl GcInfo {
-    pub(crate) fn min_cutoff(&self) -> Lsn {
-        self.cutoffs.select_min()
-    }
-}
-
-/// The `GcInfo` component describing which Lsns need to be retained.
-#[derive(Debug)]
-pub(crate) struct GcCutoffs {
-    /// Keep everything newer than this point.
+    /// In addition to 'retain_lsns', keep everything newer than this
+    /// point.
    ///
    /// This is calculated by subtracting 'gc_horizon' setting from
    /// last-record LSN
    ///
    /// FIXME: is this inclusive or exclusive?
-    pub(crate) horizon: Lsn,
+    pub horizon_cutoff: Lsn,

    /// In addition to 'retain_lsns' and 'horizon_cutoff', keep everything newer than this
    /// point.
    ///
    /// This is calculated by finding a number such that a record is needed for PITR
    /// if only if its LSN is larger than 'pitr_cutoff'.
-    pub(crate) pitr: Lsn,
-}
-
-impl Default for GcCutoffs {
-    fn default() -> Self {
-        Self {
-            horizon: Lsn::INVALID,
-            pitr: Lsn::INVALID,
-        }
-    }
-}
-
-impl GcCutoffs {
-    fn select_min(&self) -> Lsn {
-        std::cmp::min(self.horizon, self.pitr)
-    }
+    pub pitr_cutoff: Lsn,
 }

 /// An error happened in a get() operation.
@@ -495,6 +445,7 @@ pub(crate) enum PageReconstructError {

 #[derive(Debug)]
 pub struct MissingKeyError {
+    stuck_at_lsn: bool,
    key: Key,
    shard: ShardNumber,
    cont_lsn: Lsn,
@@ -506,13 +457,23 @@ pub struct MissingKeyError {

 impl std::fmt::Display for MissingKeyError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "could not find data for key {} (shard {:?}) at LSN {}, request LSN {}",
-            self.key, self.shard, self.cont_lsn, self.request_lsn
-        )?;
-        if let Some(ref ancestor_lsn) = self.ancestor_lsn {
-            write!(f, ", ancestor {}", ancestor_lsn)?;
+        if self.stuck_at_lsn {
+            // Records are found in this timeline but no image layer or initial delta record was found.
+            write!(
+                f,
+                "could not find layer with more data for key {} (shard {:?}) at LSN {}, request LSN {}",
+                self.key, self.shard, self.cont_lsn, self.request_lsn
+            )?;
+            if let Some(ref ancestor_lsn) = self.ancestor_lsn {
+                write!(f, ", ancestor {}", ancestor_lsn)?;
+            }
+        } else {
+            // No records in this timeline.
+            write!(
+                f,
+                "could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}",
+                self.key, self.shard, self.cont_lsn, self.request_lsn
+            )?;
        }

        if !self.traversal_path.is_empty() {
@@ -588,8 +549,8 @@ pub(crate) enum GetVectoredError {
    #[error("Requested at invalid LSN: {0}")]
    InvalidLsn(Lsn),

-    #[error("Requested key not found: {0}")]
-    MissingKey(MissingKeyError),
+    #[error("Requested key {0} not found")]
+    MissingKey(Key),

    #[error(transparent)]
    GetReadyAncestorError(GetReadyAncestorError),
@@ -698,7 +659,7 @@ impl From<GetVectoredError> for PageReconstructError {
            GetVectoredError::Cancelled => PageReconstructError::Cancelled,
            GetVectoredError::InvalidLsn(_) => PageReconstructError::Other(anyhow!("Invalid LSN")),
            err @ GetVectoredError::Oversized(_) => PageReconstructError::Other(err.into()),
-            GetVectoredError::MissingKey(err) => PageReconstructError::MissingKey(err),
+            err @ GetVectoredError::MissingKey(_) => PageReconstructError::Other(err.into()),
            GetVectoredError::GetReadyAncestorError(err) => PageReconstructError::from(err),
            GetVectoredError::Other(err) => PageReconstructError::Other(err),
        }
@@ -890,15 +851,16 @@ impl Timeline {
                            value
                        }
                    }
-                    None => Err(PageReconstructError::MissingKey(MissingKeyError {
-                        key,
-                        shard: self.shard_identity.get_shard_number(&key),
-                        cont_lsn: Lsn(0),
-                        request_lsn: lsn,
-                        ancestor_lsn: None,
-                        traversal_path: Vec::new(),
-                        backtrace: None,
-                    })),
+                    None => {
+                        error!(
+                            "Expected {}, but singular vectored get returned nothing",
+                            key
+                        );
+                        Err(PageReconstructError::Other(anyhow!(
+                            "Singular vectored get did not return a value for {}",
+                            key
+                        )))
+                    }
                }
            }
        }
@@ -1048,70 +1010,6 @@ impl Timeline {
        res
    }

-    /// Scan the keyspace and return all existing key-values in the keyspace. This currently uses vectored
-    /// get underlying. Normal vectored get would throw an error when a key in the keyspace is not found
-    /// during the search, but for the scan interface, it returns all existing key-value pairs, and does
-    /// not expect each single key in the key space will be found. The semantics is closer to the RocksDB
-    /// scan iterator interface. We could optimize this interface later to avoid some checks in the vectored
-    /// get path to maintain and split the probing and to-be-probe keyspace. We also need to ensure that
-    /// the scan operation will not cause OOM in the future.
-    #[allow(dead_code)]
-    pub(crate) async fn scan(
-        &self,
-        keyspace: KeySpace,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        if !lsn.is_valid() {
-            return Err(GetVectoredError::InvalidLsn(lsn));
-        }
-
-        trace!(
-            "key-value scan request for {:?}@{} from task kind {:?}",
-            keyspace,
-            lsn,
-            ctx.task_kind()
-        );
-
-        // We should generalize this into Keyspace::contains in the future.
-        for range in &keyspace.ranges {
-            if range.start.field1 < METADATA_KEY_BEGIN_PREFIX
-                || range.end.field1 >= METADATA_KEY_END_PREFIX
-            {
-                return Err(GetVectoredError::Other(anyhow::anyhow!(
-                    "only metadata keyspace can be scanned"
-                )));
-            }
-        }
-
-        let start = crate::metrics::SCAN_LATENCY
-            .for_task_kind(ctx.task_kind())
-            .map(ScanLatencyOngoingRecording::start_recording);
-
-        // start counting after throttle so that throttle time
-        // is always less than observation time
-        let throttled = self
-            .timeline_get_throttle
-            // assume scan = 1 quota for now until we find a better way to process this
-            .throttle(ctx, 1)
-            .await;
-
-        let vectored_res = self
-            .get_vectored_impl(
-                keyspace.clone(),
-                lsn,
-                ValuesReconstructState::default(),
-                ctx,
-            )
-            .await;
-
-        if let Some(recording) = start {
-            recording.observe(throttled);
-        }
-
-        vectored_res
-    }
-
    /// Not subject to [`Self::timeline_get_throttle`].
    pub(super) async fn get_vectored_sequential_impl(
        &self,
@@ -1120,7 +1018,6 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
        let mut values = BTreeMap::new();
-
        for range in keyspace.ranges {
            let mut key = range.start;
            while key != range.end {
@@ -1133,17 +1030,16 @@ impl Timeline {
                    Err(Cancelled | AncestorStopping(_)) => {
                        return Err(GetVectoredError::Cancelled)
                    }
-                    Err(MissingKey(_))
-                        if NON_INHERITED_RANGE.contains(&key)
-                            || NON_INHERITED_SPARSE_RANGE.contains(&key) =>
-                    {
-                        // Ignore missing key error for aux key range. TODO: currently, we assume non_inherited_range == aux_key_range.
-                        // When we add more types of keys into the page server, we should revisit this part of code and throw errors
-                        // accordingly.
-                        key = key.next();
-                    }
-                    Err(MissingKey(err)) => {
-                        return Err(GetVectoredError::MissingKey(err));
+                    // we only capture stuck_at_lsn=false now until we figure out https://github.com/neondatabase/neon/issues/7380
+                    Err(MissingKey(MissingKeyError {
+                        stuck_at_lsn: false,
+                        ..
+                    })) if !NON_INHERITED_RANGE.contains(&key) => {
+                        // The vectored read path handles non inherited keys specially.
+                        // If such a a key cannot be reconstructed from the current timeline,
+                        // the vectored read path returns a key level error as opposed to a top
+                        // level error.
+                        return Err(GetVectoredError::MissingKey(key));
                    }
                    Err(Other(err))
                        if err
@@ -1230,11 +1126,6 @@ impl Timeline {
        lsn: Lsn,
        ctx: &RequestContext,
    ) {
-        if keyspace.overlaps(&Key::metadata_key_range()) {
-            // skip validation for metadata key range
-            return;
-        }
-
        let sequential_res = self
            .get_vectored_sequential_impl(keyspace.clone(), lsn, ctx)
            .await;
@@ -1244,7 +1135,7 @@ impl Timeline {
            match (lhs, rhs) {
                (Oversized(l), Oversized(r)) => l == r,
                (InvalidLsn(l), InvalidLsn(r)) => l == r,
-                (MissingKey(l), MissingKey(r)) => l.key == r.key,
+                (MissingKey(l), MissingKey(r)) => l == r,
                (GetReadyAncestorError(_), GetReadyAncestorError(_)) => true,
                (Other(_), Other(_)) => true,
                _ => false,
@@ -1259,7 +1150,7 @@ impl Timeline {
                               " - keyspace={:?} lsn={}"),
                       seq_err, keyspace, lsn) },
            (Ok(_), Err(GetVectoredError::GetReadyAncestorError(GetReadyAncestorError::AncestorLsnTimeout(_)))) => {
-                // Sequential get runs after vectored get, so it is possible for the later
+                // Sequential get runs after vectored get, so it is possible for the later 
                // to time out while waiting for its ancestor's Lsn to become ready and for the
                // former to succeed (it essentially has a doubled wait time).
            },
@@ -1343,12 +1234,6 @@ impl Timeline {
        self.last_record_lsn.load()
    }

-    /// Subscribe to callers of wait_lsn(). The value of the channel is None if there are no
-    /// wait_lsn() calls in progress, and Some(Lsn) if there is an active waiter for wait_lsn().
-    pub(crate) fn subscribe_for_wait_lsn_updates(&self) -> watch::Receiver<Option<Lsn>> {
-        self.last_record_lsn.status_receiver()
-    }
-
    pub(crate) fn get_disk_consistent_lsn(&self) -> Lsn {
        self.disk_consistent_lsn.load()
    }
@@ -2201,7 +2086,11 @@ impl Timeline {

                write_lock: tokio::sync::Mutex::new(None),

-                gc_info: std::sync::RwLock::new(GcInfo::default()),
+                gc_info: std::sync::RwLock::new(GcInfo {
+                    retain_lsns: Vec::new(),
+                    horizon_cutoff: Lsn(0),
+                    pitr_cutoff: Lsn(0),
+                }),

                latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
                initdb_lsn: metadata.initdb_lsn(),
@@ -2215,10 +2104,7 @@ impl Timeline {
                    // initial logical size is 0.
                    LogicalSize::empty_initial()
                },
-                partitioning: tokio::sync::Mutex::new((
-                    (KeyPartitioning::new(), KeyPartitioning::new().into_sparse()),
-                    Lsn(0),
-                )),
+                partitioning: tokio::sync::Mutex::new((KeyPartitioning::new(), Lsn(0))),
                repartition_threshold: 0,
                last_image_layer_creation_check_at: AtomicLsn::new(0),

@@ -3034,7 +2920,7 @@ trait TraversalLayerExt {

 impl TraversalLayerExt for Layer {
    fn traversal_id(&self) -> TraversalId {
-        Arc::clone(self.debug_str())
+        Arc::clone(self.local_path_str())
    }
 }

@@ -3110,6 +2996,7 @@ impl Timeline {
                            // Didn't make any progress in last iteration. Error out to avoid
                            // getting stuck in the loop.
                            return Err(PageReconstructError::MissingKey(MissingKeyError {
+                                stuck_at_lsn: true,
                                key,
                                shard: self.shard_identity.get_shard_number(&key),
                                cont_lsn: Lsn(cont_lsn.0 - 1),
@@ -3124,6 +3011,7 @@ impl Timeline {
                }
                ValueReconstructResult::Missing => {
                    return Err(PageReconstructError::MissingKey(MissingKeyError {
+                        stuck_at_lsn: false,
                        key,
                        shard: self.shard_identity.get_shard_number(&key),
                        cont_lsn,
@@ -3218,6 +3106,7 @@ impl Timeline {
            if let Some(SearchResult { lsn_floor, layer }) = layers.search(key, cont_lsn) {
                let layer = guard.get_from_desc(&layer);
                drop(guard);
+
                // Get all the data needed to reconstruct the page version from this layer.
                // But if we have an older cached page image, no need to go past that.
                let lsn_floor = max(cached_lsn + 1, lsn_floor);
@@ -3287,12 +3176,37 @@ impl Timeline {
            // Do not descend into the ancestor timeline for aux files.
            // We don't return a blanket [`GetVectoredError::MissingKey`] to avoid
            // stalling compaction.
-            keyspace.remove_overlapping_with(&KeySpace {
-                ranges: vec![NON_INHERITED_RANGE, NON_INHERITED_SPARSE_RANGE],
-            });
+            // TODO(chi): this will need to be updated for aux files v2 storage
+            if keyspace.overlaps(&NON_INHERITED_RANGE) {
+                let removed = keyspace.remove_overlapping_with(&KeySpace {
+                    ranges: vec![NON_INHERITED_RANGE],
+                });
+
+                for range in removed.ranges {
+                    let mut key = range.start;
+                    while key < range.end {
+                        reconstruct_state.on_key_error(
+                            key,
+                            PageReconstructError::MissingKey(MissingKeyError {
+                                stuck_at_lsn: false,
+                                key,
+                                shard: self.shard_identity.get_shard_number(&key),
+                                cont_lsn,
+                                request_lsn,
+                                ancestor_lsn: None,
+                                traversal_path: Vec::default(),
+                                backtrace: if cfg!(test) {
+                                    Some(std::backtrace::Backtrace::force_capture())
+                                } else {
+                                    None
+                                },
+                            }),
+                        );
+                        key = key.next();
+                    }
+                }
+            }

-            // Keyspace is fully retrieved, no ancestor timeline, or metadata scan (where we do not look
-            // into ancestor timelines). TODO: is there any other metadata which we want to inherit?
            if keyspace.total_raw_size() == 0 || timeline.ancestor_timeline.is_none() {
                break;
            }
@@ -3307,23 +3221,13 @@ impl Timeline {
        }

        if keyspace.total_raw_size() != 0 {
-            return Err(GetVectoredError::MissingKey(MissingKeyError {
-                key: keyspace.start().unwrap(), /* better if we can store the full keyspace */
-                shard: self
-                    .shard_identity
-                    .get_shard_number(&keyspace.start().unwrap()),
-                cont_lsn,
-                request_lsn,
-                ancestor_lsn: Some(timeline.ancestor_lsn),
-                traversal_path: vec![],
-                backtrace: None,
-            }));
+            return Err(GetVectoredError::MissingKey(keyspace.start().unwrap()));
        }

        Ok(())
    }

-    /// Collect the reconstruct data for a keyspace from the specified timeline.
+    /// Collect the reconstruct data for a ketspace from the specified timeline.
    ///
    /// Maintain a fringe [`LayerFringe`] which tracks all the layers that intersect
    /// the current keyspace. The current keyspace of the search at any given timeline
@@ -3752,103 +3656,66 @@ impl Timeline {
        // files instead. This is possible as long as *all* the data imported into the
        // repository have the same LSN.
        let lsn_range = frozen_layer.get_lsn_range();
-
-        // Whether to directly create image layers for this flush, or flush them as delta layers
-        let create_image_layer =
-            lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1);
-
-        #[cfg(test)]
-        {
-            match &mut *self.flush_loop_state.lock().unwrap() {
-                FlushLoopState::NotStarted | FlushLoopState::Exited => {
-                    panic!("flush loop not running")
-                }
-                FlushLoopState::Running {
-                    expect_initdb_optimization,
-                    initdb_optimization_count,
-                    ..
-                } => {
-                    if create_image_layer {
+        let (layers_to_upload, delta_layer_to_add) =
+            if lsn_range.start == self.initdb_lsn && lsn_range.end == Lsn(self.initdb_lsn.0 + 1) {
+                #[cfg(test)]
+                match &mut *self.flush_loop_state.lock().unwrap() {
+                    FlushLoopState::NotStarted | FlushLoopState::Exited => {
+                        panic!("flush loop not running")
+                    }
+                    FlushLoopState::Running {
+                        initdb_optimization_count,
+                        ..
+                    } => {
                        *initdb_optimization_count += 1;
-                    } else {
+                    }
+                }
+                // Note: The 'ctx' in use here has DownloadBehavior::Error. We should not
+                // require downloading anything during initial import.
+                let (partitioning, _lsn) = self
+                    .repartition(
+                        self.initdb_lsn,
+                        self.get_compaction_target_size(),
+                        EnumSet::empty(),
+                        ctx,
+                    )
+                    .await?;
+
+                if self.cancel.is_cancelled() {
+                    return Err(FlushLayerError::Cancelled);
+                }
+
+                // For image layers, we add them immediately into the layer map.
+                (
+                    self.create_image_layers(&partitioning, self.initdb_lsn, true, ctx)
+                        .await?,
+                    None,
+                )
+            } else {
+                #[cfg(test)]
+                match &mut *self.flush_loop_state.lock().unwrap() {
+                    FlushLoopState::NotStarted | FlushLoopState::Exited => {
+                        panic!("flush loop not running")
+                    }
+                    FlushLoopState::Running {
+                        expect_initdb_optimization,
+                        ..
+                    } => {
                        assert!(!*expect_initdb_optimization, "expected initdb optimization");
                    }
                }
-            }
-        }
-
-        let (layers_to_upload, delta_layer_to_add) = if create_image_layer {
-            // Note: The 'ctx' in use here has DownloadBehavior::Error. We should not
-            // require downloading anything during initial import.
-            let ((rel_partition, metadata_partition), _lsn) = self
-                .repartition(
-                    self.initdb_lsn,
-                    self.get_compaction_target_size(),
-                    EnumSet::empty(),
-                    ctx,
+                // Normal case, write out a L0 delta layer file.
+                // `create_delta_layer` will not modify the layer map.
+                // We will remove frozen layer and add delta layer in one atomic operation later.
+                let layer = self.create_delta_layer(&frozen_layer, ctx).await?;
+                (
+                    // FIXME: even though we have a single image and single delta layer assumption
+                    // we push them to vec
+                    vec![layer.clone()],
+                    Some(layer),
                )
-                .await?;
-
-            if self.cancel.is_cancelled() {
-                return Err(FlushLayerError::Cancelled);
-            }
-
-            // For metadata, always create delta layers.
-            let delta_layer = if !metadata_partition.parts.is_empty() {
-                assert_eq!(
-                    metadata_partition.parts.len(),
-                    1,
-                    "currently sparse keyspace should only contain a single aux file keyspace"
-                );
-                let metadata_keyspace = &metadata_partition.parts[0];
-                assert_eq!(
-                    metadata_keyspace.0.ranges.len(),
-                    1,
-                    "aux file keyspace should be a single range"
-                );
-                self.create_delta_layer(
-                    &frozen_layer,
-                    ctx,
-                    Some(metadata_keyspace.0.ranges[0].clone()),
-                )
-                .await?
-            } else {
-                None
            };

-            // For image layers, we add them immediately into the layer map.
-            let mut layers_to_upload = Vec::new();
-            layers_to_upload.extend(
-                self.create_image_layers(
-                    &rel_partition,
-                    self.initdb_lsn,
-                    ImageLayerCreationMode::Initial,
-                    ctx,
-                )
-                .await?,
-            );
-
-            if let Some(delta_layer) = delta_layer {
-                layers_to_upload.push(delta_layer.clone());
-                (layers_to_upload, Some(delta_layer))
-            } else {
-                (layers_to_upload, None)
-            }
-        } else {
-            // Normal case, write out a L0 delta layer file.
-            // `create_delta_layer` will not modify the layer map.
-            // We will remove frozen layer and add delta layer in one atomic operation later.
-            let Some(layer) = self.create_delta_layer(&frozen_layer, ctx, None).await? else {
-                panic!("delta layer cannot be empty if no filter is applied");
-            };
-            (
-                // FIXME: even though we have a single image and single delta layer assumption
-                // we push them to vec
-                vec![layer.clone()],
-                Some(layer),
-            )
-        };
-
        pausable_failpoint!("flush-layer-cancel-after-writing-layer-out-pausable");

        if self.cancel.is_cancelled() {
@@ -3968,18 +3835,12 @@ impl Timeline {
        self: &Arc<Self>,
        frozen_layer: &Arc<InMemoryLayer>,
        ctx: &RequestContext,
-        key_range: Option<Range<Key>>,
-    ) -> anyhow::Result<Option<ResidentLayer>> {
+    ) -> anyhow::Result<ResidentLayer> {
        let self_clone = Arc::clone(self);
        let frozen_layer = Arc::clone(frozen_layer);
        let ctx = ctx.attached_child();
        let work = async move {
-            let Some(new_delta) = frozen_layer
-                .write_to_disk(&self_clone, &ctx, key_range)
-                .await?
-            else {
-                return Ok(None);
-            };
+            let new_delta = frozen_layer.write_to_disk(&self_clone, &ctx).await?;
            // The write_to_disk() above calls writer.finish() which already did the fsync of the inodes.
            // We just need to fsync the directory in which these inodes are linked,
            // which we know to be the timeline directory.
@@ -3998,7 +3859,7 @@ impl Timeline {
                .sync_all()
                .await
                .fatal_err("VirtualFile::sync_all timeline dir");
-            anyhow::Ok(Some(new_delta))
+            anyhow::Ok(new_delta)
        };
        // Before tokio-epoll-uring, we ran write_to_disk & the sync_all inside spawn_blocking.
        // Preserve that behavior to maintain the same behavior for `virtual_file_io_engine=std-fs`.
@@ -4025,20 +3886,19 @@ impl Timeline {
        partition_size: u64,
        flags: EnumSet<CompactFlags>,
        ctx: &RequestContext,
-    ) -> anyhow::Result<((KeyPartitioning, SparseKeyPartitioning), Lsn)> {
+    ) -> anyhow::Result<(KeyPartitioning, Lsn)> {
        let Ok(mut partitioning_guard) = self.partitioning.try_lock() else {
            // NB: there are two callers, one is the compaction task, of which there is only one per struct Tenant and hence Timeline.
            // The other is the initdb optimization in flush_frozen_layer, used by `boostrap_timeline`, which runs before `.activate()`
            // and hence before the compaction task starts.
            anyhow::bail!("repartition() called concurrently, this should not happen");
        };
-        let ((dense_partition, sparse_partition), partition_lsn) = &*partitioning_guard;
-        if lsn < *partition_lsn {
+        if lsn < partitioning_guard.1 {
            anyhow::bail!("repartition() called with LSN going backwards, this should not happen");
        }

-        let distance = lsn.0 - partition_lsn.0;
-        if *partition_lsn != Lsn(0)
+        let distance = lsn.0 - partitioning_guard.1 .0;
+        if partitioning_guard.1 != Lsn(0)
            && distance <= self.repartition_threshold
            && !flags.contains(CompactFlags::ForceRepartition)
        {
@@ -4047,18 +3907,13 @@ impl Timeline {
                threshold = self.repartition_threshold,
                "no repartitioning needed"
            );
-            return Ok((
-                (dense_partition.clone(), sparse_partition.clone()),
-                *partition_lsn,
-            ));
+            return Ok((partitioning_guard.0.clone(), partitioning_guard.1));
        }

-        let (dense_ks, sparse_ks) = self.collect_keyspace(lsn, ctx).await?;
-        let dense_partitioning = dense_ks.partition(&self.shard_identity, partition_size);
-        let sparse_partitioning = SparseKeyPartitioning {
-            parts: vec![sparse_ks],
-        }; // no partitioning for metadata keys for now
-        *partitioning_guard = ((dense_partitioning, sparse_partitioning), lsn);
+        let keyspace = self.collect_keyspace(lsn, ctx).await?;
+        let partitioning = keyspace.partition(&self.shard_identity, partition_size);
+
+        *partitioning_guard = (partitioning, lsn);

        Ok((partitioning_guard.0.clone(), partitioning_guard.1))
    }
@@ -4114,12 +3969,12 @@ impl Timeline {
        false
    }

-    #[tracing::instrument(skip_all, fields(%lsn, %mode))]
+    #[tracing::instrument(skip_all, fields(%lsn, %force))]
    async fn create_image_layers(
        self: &Arc<Timeline>,
        partitioning: &KeyPartitioning,
        lsn: Lsn,
-        mode: ImageLayerCreationMode,
+        force: bool,
        ctx: &RequestContext,
    ) -> Result<Vec<ResidentLayer>, CreateImageLayersError> {
        let timer = self.metrics.create_images_time_histo.start_timer();
@@ -4156,26 +4011,19 @@ impl Timeline {
        for partition in partitioning.parts.iter() {
            let img_range = start..partition.ranges.last().unwrap().end;

-            if partition.overlaps(&Key::metadata_key_range()) {
-                // TODO(chi): The next patch will correctly create image layers for metadata keys, and it would be a
-                // rather big change. Keep this patch small for now.
-                match mode {
-                    ImageLayerCreationMode::Force | ImageLayerCreationMode::Try => {
-                        // skip image layer creation anyways for metadata keys.
-                        start = img_range.end;
-                        continue;
-                    }
-                    ImageLayerCreationMode::Initial => {
-                        return Err(CreateImageLayersError::Other(anyhow::anyhow!("no image layer should be created for metadata keys when flushing frozen layers")));
-                    }
-                }
-            } else if let ImageLayerCreationMode::Try = mode {
-                // check_for_image_layers = false -> skip
-                // check_for_image_layers = true -> check time_for_new_image_layer -> skip/generate
-                if !check_for_image_layers || !self.time_for_new_image_layer(partition, lsn).await {
-                    start = img_range.end;
-                    continue;
-                }
+            let do_it = if force {
+                true
+            } else if check_for_image_layers {
+                // [`Self::time_for_new_image_layer`] is CPU expensive,
+                // so skip if we've not collected enough WAL since the last time
+                self.time_for_new_image_layer(partition, lsn).await
+            } else {
+                false
+            };
+
+            if !do_it {
+                start = img_range.end;
+                continue;
            }

            let mut image_layer_writer = ImageLayerWriter::new(
@@ -4255,7 +4103,7 @@ impl Timeline {
                            };

                            // Write all the keys we just read into our new image layer.
-                            image_layer_writer.put_image(img_key, img, ctx).await?;
+                            image_layer_writer.put_image(img_key, img).await?;
                            wrote_keys = true;
                        }
                    }
@@ -4266,7 +4114,7 @@ impl Timeline {
                // Normal path: we have written some data into the new image layer for this
                // partition, so flush it to disk.
                start = img_range.end;
-                let image_layer = image_layer_writer.finish(self, ctx).await?;
+                let image_layer = image_layer_writer.finish(self).await?;
                image_layers.push(image_layer);
            } else {
                // Special case: the image layer may be empty if this is a sharded tenant and the
@@ -4459,7 +4307,7 @@ impl Timeline {
        Ok(())
    }

-    /// Find the Lsns above which layer files need to be retained on
+    /// Update information about which layer files need to be retained on
    /// garbage collection. This is separate from actually performing the GC,
    /// and is updated more frequently, so that compaction can remove obsolete
    /// page versions more aggressively.
@@ -4467,6 +4315,17 @@ impl Timeline {
    /// TODO: that's wishful thinking, compaction doesn't actually do that
    /// currently.
    ///
+    /// The caller specifies how much history is needed with the 3 arguments:
+    ///
+    /// retain_lsns: keep a version of each page at these LSNs
+    /// cutoff_horizon: also keep everything newer than this LSN
+    /// pitr: the time duration required to keep data for PITR
+    ///
+    /// The 'retain_lsns' list is currently used to prevent removing files that
+    /// are needed by child timelines. In the future, the user might be able to
+    /// name additional points in time to retain. The caller is responsible for
+    /// collecting that information.
+    ///
    /// The 'cutoff_horizon' point is used to retain recent versions that might still be
    /// needed by read-only nodes. (As of this writing, the caller just passes
    /// the latest LSN subtracted by a constant, and doesn't do anything smart
@@ -4474,22 +4333,29 @@ impl Timeline {
    ///
    /// The 'pitr' duration is used to calculate a 'pitr_cutoff', which can be used to determine
    /// whether a record is needed for PITR.
+    ///
+    /// NOTE: This function holds a short-lived lock to protect the 'gc_info'
+    /// field, so that the three values passed as argument are stored
+    /// atomically. But the caller is responsible for ensuring that no new
+    /// branches are created that would need to be included in 'retain_lsns',
+    /// for example. The caller should hold `Tenant::gc_cs` lock to ensure
+    /// that.
+    ///
    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
-    pub(super) async fn find_gc_cutoffs(
+    pub(super) async fn update_gc_info(
        &self,
+        retain_lsns: Vec<Lsn>,
        cutoff_horizon: Lsn,
        pitr: Duration,
        cancel: &CancellationToken,
        ctx: &RequestContext,
-    ) -> anyhow::Result<GcCutoffs> {
+    ) -> anyhow::Result<()> {
        let _timer = self
            .metrics
-            .find_gc_cutoffs_histo
+            .update_gc_info_histo
            .start_timer()
            .record_on_drop();

-        pausable_failpoint!("Timeline::find_gc_cutoffs-pausable");
-
        // First, calculate pitr_cutoff_timestamp and then convert it to LSN.
        //
        // Some unit tests depend on garbage-collection working even when
@@ -4539,10 +4405,14 @@ impl Timeline {
            self.get_last_record_lsn()
        };

-        Ok(GcCutoffs {
-            horizon: cutoff_horizon,
-            pitr: pitr_cutoff,
-        })
+        // Grab the lock and update the values
+        *self.gc_info.write().unwrap() = GcInfo {
+            retain_lsns,
+            horizon_cutoff: cutoff_horizon,
+            pitr_cutoff,
+        };
+
+        Ok(())
    }

    /// Garbage collect layer files on a timeline that are no longer needed.
@@ -4571,8 +4441,8 @@ impl Timeline {
        let (horizon_cutoff, pitr_cutoff, retain_lsns) = {
            let gc_info = self.gc_info.read().unwrap();

-            let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
-            let pitr_cutoff = gc_info.cutoffs.pitr;
+            let horizon_cutoff = min(gc_info.horizon_cutoff, self.get_disk_consistent_lsn());
+            let pitr_cutoff = gc_info.pitr_cutoff;
            let retain_lsns = gc_info.retain_lsns.clone();
            (horizon_cutoff, pitr_cutoff, retain_lsns)
        };
@@ -4648,6 +4518,28 @@ impl Timeline {
        'outer: for l in layers.iter_historic_layers() {
            result.layers_total += 1;

+            // 0. Is this layer a relic from a shard split?
+            //    (Do this check first because irrespective of later logic regarding LSNs, this
+            //    layer should be dropped.)
+            if self.shard_identity.count >= ShardCount::new(2) {
+                // We are a sharded tenant
+                let layer = guard.get_from_desc(&l);
+                if layer.metadata().shard != self.tenant_shard_id.to_index() {
+                    // This is an ancestral layer
+                    let sharded_range = ShardedRange::new(l.get_key_range(), &self.shard_identity);
+                    if sharded_range.page_count() == 0 {
+                        // This ancestral layer only covers keys that belong to other shards
+                        info!(
+                            "garbate collecting layer {} ({:?}) after shard split",
+                            l.filename(),
+                            l.get_key_range()
+                        );
+                        layers_to_remove.push(l);
+                        continue;
+                    }
+                }
+            }
+
            // 1. Is it newer than GC horizon cutoff point?
            if l.get_lsn_range().end > horizon_cutoff {
                debug!(
--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -9,7 +9,7 @@ use std::ops::{Deref, Range};
 use std::sync::Arc;

 use super::layer_manager::LayerManager;
-use super::{CompactFlags, DurationRecorder, ImageLayerCreationMode, RecordedDuration, Timeline};
+use super::{CompactFlags, DurationRecorder, RecordedDuration, Timeline};

 use anyhow::{anyhow, Context};
 use enumset::EnumSet;
@@ -102,7 +102,7 @@ impl Timeline {
            )
            .await
        {
-            Ok(((dense_partitioning, sparse_partitioning), lsn)) => {
+            Ok((partitioning, lsn)) => {
                // Disables access_stats updates, so that the files we read remain candidates for eviction after we're done with them
                let image_ctx = RequestContextBuilder::extend(ctx)
                    .access_stats_behavior(AccessStatsBehavior::Skip)
@@ -115,37 +115,17 @@ impl Timeline {

                // 3. Create new image layers for partitions that have been modified
                // "enough".
-                let dense_layers = self
+                let layers = self
                    .create_image_layers(
-                        &dense_partitioning,
+                        &partitioning,
                        lsn,
-                        if flags.contains(CompactFlags::ForceImageLayerCreation) {
-                            ImageLayerCreationMode::Force
-                        } else {
-                            ImageLayerCreationMode::Try
-                        },
+                        flags.contains(CompactFlags::ForceImageLayerCreation),
                        &image_ctx,
                    )
                    .await
                    .map_err(anyhow::Error::from)?;

-                // For now, nothing will be produced...
-                let sparse_layers = self
-                    .create_image_layers(
-                        &sparse_partitioning.clone().into_dense(),
-                        lsn,
-                        if flags.contains(CompactFlags::ForceImageLayerCreation) {
-                            ImageLayerCreationMode::Force
-                        } else {
-                            ImageLayerCreationMode::Try
-                        },
-                        &image_ctx,
-                    )
-                    .await
-                    .map_err(anyhow::Error::from)?;
-                assert!(sparse_layers.is_empty());
-
-                self.upload_new_image_layers(dense_layers)?;
+                self.upload_new_image_layers(layers)?;
            }
            Err(err) => {
                // no partitioning? This is normal, if the timeline was just created
@@ -520,7 +500,7 @@ impl Timeline {
                            writer
                                .take()
                                .unwrap()
-                                .finish(prev_key.unwrap().next(), self, ctx)
+                                .finish(prev_key.unwrap().next(), self)
                                .await?,
                        );
                        writer = None;
@@ -562,11 +542,7 @@ impl Timeline {
                    );
                }

-                writer
-                    .as_mut()
-                    .unwrap()
-                    .put_value(key, lsn, value, ctx)
-                    .await?;
+                writer.as_mut().unwrap().put_value(key, lsn, value).await?;
            } else {
                debug!(
                    "Dropping key {} during compaction (it belongs on shard {:?})",
@@ -582,7 +558,7 @@ impl Timeline {
            prev_key = Some(key);
        }
        if let Some(writer) = writer {
-            new_layers.push(writer.finish(prev_key.unwrap().next(), self, ctx).await?);
+            new_layers.push(writer.finish(prev_key.unwrap().next(), self).await?);
        }

        // Sync layers
@@ -782,9 +758,8 @@ impl Timeline {
            return Err(CompactionError::ShuttingDown);
        }

-        let (dense_ks, _sparse_ks) = self.collect_keyspace(end_lsn, ctx).await?;
-        // TODO(chi): ignore sparse_keyspace for now, compact it in the future.
-        let mut adaptor = TimelineAdaptor::new(self, (end_lsn, dense_ks));
+        let keyspace = self.collect_keyspace(end_lsn, ctx).await?;
+        let mut adaptor = TimelineAdaptor::new(self, (end_lsn, keyspace));

        pageserver_compaction::compact_tiered::compact_tiered(
            &mut adaptor,
@@ -976,7 +951,7 @@ impl CompactionJobExecutor for TimelineAdaptor {

            let value = val.load(ctx).await?;

-            writer.put_value(key, lsn, value, ctx).await?;
+            writer.put_value(key, lsn, value).await?;

            prev = Some((key, lsn));
        }
@@ -992,7 +967,7 @@ impl CompactionJobExecutor for TimelineAdaptor {
        });

        let new_delta_layer = writer
-            .finish(prev.unwrap().0.next(), &self.timeline, ctx)
+            .finish(prev.unwrap().0.next(), &self.timeline)
            .await?;

        self.new_deltas.push(new_delta_layer);
@@ -1062,11 +1037,11 @@ impl TimelineAdaptor {
                        }
                    }
                };
-                image_layer_writer.put_image(key, img, ctx).await?;
+                image_layer_writer.put_image(key, img).await?;
                key = key.next();
            }
        }
-        let image_layer = image_layer_writer.finish(&self.timeline, ctx).await?;
+        let image_layer = image_layer_writer.finish(&self.timeline).await?;

        self.new_images.push(image_layer);

--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -22,12 +22,10 @@ use crate::tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeli
 use anyhow::Context;
 use chrono::{NaiveDateTime, Utc};
 use pageserver_api::models::TimelineState;
-
+use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey;
+use storage_broker::proto::SafekeeperTimelineInfo;
+use storage_broker::proto::SubscribeSafekeeperInfoRequest;
 use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId;
-use storage_broker::proto::{
-    FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse,
-    SubscribeByFilterRequest, TypeSubscription, TypedMessage,
-};
 use storage_broker::{BrokerClientChannel, Code, Streaming};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -91,14 +89,6 @@ pub(super) async fn connection_manager_loop_step(
        .timeline
        .subscribe_for_state_updates();

-    let mut wait_lsn_status = connection_manager_state
-        .timeline
-        .subscribe_for_wait_lsn_updates();
-
-    // TODO: create a separate config option for discovery request interval
-    let discovery_request_interval = connection_manager_state.conf.lagging_wal_timeout;
-    let mut last_discovery_ts: Option<std::time::Instant> = None;
-
    // Subscribe to the broker updates. Stream shares underlying TCP connection
    // with other streams on this client (other connection managers). When
    // object goes out of scope, stream finishes in drop() automatically.
@@ -107,12 +97,10 @@ pub(super) async fn connection_manager_loop_step(

    loop {
        let time_until_next_retry = connection_manager_state.time_until_next_retry();
-        let any_activity = connection_manager_state.wal_connection.is_some()
-            || !connection_manager_state.wal_stream_candidates.is_empty();

        // These things are happening concurrently:
        //
-        //  - cancellation request
+        // - cancellation request
        //  - keep receiving WAL on the current connection
        //      - if the shared state says we need to change connection, disconnect and return
        //      - this runs in a separate task and we receive updates via a watch channel
@@ -120,7 +108,6 @@ pub(super) async fn connection_manager_loop_step(
        //  - receive updates from broker
        //      - this might change the current desired connection
        //  - timeline state changes to something that does not allow walreceiver to run concurrently
-        //  - if there's no connection and no candidates, try to send a discovery request

        // NB: make sure each of the select expressions are cancellation-safe
        // (no need for arms to be cancellation-safe).
@@ -227,65 +214,6 @@ pub(super) async fn connection_manager_loop_step(
                    }
                }
            } => debug!("Waking up for the next retry after waiting for {time_until_next_retry:?}"),
-
-            Some(()) = async {
-                // Reminder: this match arm needs to be cancellation-safe.
-                // Calculating time needed to wait until sending the next discovery request.
-                // Current implementation is conservative and sends discovery requests only when there are no candidates.
-
-                if any_activity {
-                    // No need to send discovery requests if there is an active connection or candidates.
-                    return None;
-                }
-
-                // Waiting for an active wait_lsn request.
-                while wait_lsn_status.borrow().is_none() {
-                    if wait_lsn_status.changed().await.is_err() {
-                        // wait_lsn_status channel was closed, exiting
-                        warn!("wait_lsn_status channel was closed in connection_manager_loop_step");
-                        return None;
-                    }
-                }
-
-                // All preconditions met, preparing to send a discovery request.
-                let now = std::time::Instant::now();
-                let next_discovery_ts = last_discovery_ts
-                    .map(|ts| ts + discovery_request_interval)
-                    .unwrap_or_else(|| now);
-
-                if next_discovery_ts > now {
-                    // Prevent sending discovery requests too frequently.
-                    tokio::time::sleep(next_discovery_ts - now).await;
-                }
-
-                let tenant_timeline_id = Some(ProtoTenantTimelineId {
-                    tenant_id: id.tenant_id.as_ref().to_owned(),
-                    timeline_id: id.timeline_id.as_ref().to_owned(),
-                });
-                let request = SafekeeperDiscoveryRequest { tenant_timeline_id };
-                let msg = TypedMessage {
-                    r#type: MessageType::SafekeeperDiscoveryRequest as i32,
-                    safekeeper_timeline_info: None,
-                    safekeeper_discovery_request: Some(request),
-                    safekeeper_discovery_response: None,
-                    };
-
-                last_discovery_ts = Some(std::time::Instant::now());
-                debug!("No active connection and no candidates, sending discovery request to the broker");
-
-                // Cancellation safety: we want to send a message to the broker, but publish_one()
-                // function can get cancelled by the other select! arm. This is absolutely fine, because
-                // we just want to receive broker updates and discovery is not important if we already
-                // receive updates.
-                //
-                // It is possible that `last_discovery_ts` will be updated, but the message will not be sent.
-                // This is totally fine because of the reason above.
-
-                // This is a fire-and-forget request, we don't care about the response
-                let _ = broker_client.publish_one(msg).await;
-                debug!("Discovery request sent to the broker");
-                None
-            } => {}
        }

        if let Some(new_candidate) = connection_manager_state.next_connection_candidate() {
@@ -303,7 +231,7 @@ async fn subscribe_for_timeline_updates(
    broker_client: &mut BrokerClientChannel,
    id: TenantTimelineId,
    cancel: &CancellationToken,
-) -> Result<Streaming<TypedMessage>, Cancelled> {
+) -> Result<Streaming<SafekeeperTimelineInfo>, Cancelled> {
    let mut attempt = 0;
    loop {
        exponential_backoff(
@@ -316,27 +244,17 @@ async fn subscribe_for_timeline_updates(
        attempt += 1;

        // subscribe to the specific timeline
-        let request = SubscribeByFilterRequest {
-            types: vec![
-                TypeSubscription {
-                    r#type: MessageType::SafekeeperTimelineInfo as i32,
-                },
-                TypeSubscription {
-                    r#type: MessageType::SafekeeperDiscoveryResponse as i32,
-                },
-            ],
-            tenant_timeline_id: Some(FilterTenantTimelineId {
-                enabled: true,
-                tenant_timeline_id: Some(ProtoTenantTimelineId {
-                    tenant_id: id.tenant_id.as_ref().to_owned(),
-                    timeline_id: id.timeline_id.as_ref().to_owned(),
-                }),
-            }),
+        let key = SubscriptionKey::TenantTimelineId(ProtoTenantTimelineId {
+            tenant_id: id.tenant_id.as_ref().to_owned(),
+            timeline_id: id.timeline_id.as_ref().to_owned(),
+        });
+        let request = SubscribeSafekeeperInfoRequest {
+            subscription_key: Some(key),
        };

        match {
            tokio::select! {
-                r = broker_client.subscribe_by_filter(request) => { r }
+                r = broker_client.subscribe_safekeeper_info(request) => { r }
                _ = cancel.cancelled() => { return Err(Cancelled); }
            }
        } {
@@ -480,7 +398,7 @@ struct RetryInfo {
 /// Data about the timeline to connect to, received from the broker.
 #[derive(Debug, Clone)]
 struct BrokerSkTimeline {
-    timeline: SafekeeperDiscoveryResponse,
+    timeline: SafekeeperTimelineInfo,
    /// Time at which the data was fetched from the broker last time, to track the stale data.
    latest_update: NaiveDateTime,
 }
@@ -688,41 +606,7 @@ impl ConnectionManagerState {
    }

    /// Adds another broker timeline into the state, if its more recent than the one already added there for the same key.
-    fn register_timeline_update(&mut self, typed_msg: TypedMessage) {
-        let mut is_discovery = false;
-        let timeline_update = match typed_msg.r#type() {
-            MessageType::SafekeeperTimelineInfo => {
-                let info = match typed_msg.safekeeper_timeline_info {
-                    Some(info) => info,
-                    None => {
-                        warn!("bad proto message from broker: no safekeeper_timeline_info");
-                        return;
-                    }
-                };
-                SafekeeperDiscoveryResponse {
-                    safekeeper_id: info.safekeeper_id,
-                    tenant_timeline_id: info.tenant_timeline_id,
-                    commit_lsn: info.commit_lsn,
-                    safekeeper_connstr: info.safekeeper_connstr,
-                    availability_zone: info.availability_zone,
-                }
-            }
-            MessageType::SafekeeperDiscoveryResponse => {
-                is_discovery = true;
-                match typed_msg.safekeeper_discovery_response {
-                    Some(response) => response,
-                    None => {
-                        warn!("bad proto message from broker: no safekeeper_discovery_response");
-                        return;
-                    }
-                }
-            }
-            _ => {
-                // unexpected message
-                return;
-            }
-        };
-
+    fn register_timeline_update(&mut self, timeline_update: SafekeeperTimelineInfo) {
        WALRECEIVER_BROKER_UPDATES.inc();

        let new_safekeeper_id = NodeId(timeline_update.safekeeper_id);
@@ -735,11 +619,7 @@ impl ConnectionManagerState {
        );

        if old_entry.is_none() {
-            info!(
-                ?is_discovery,
-                %new_safekeeper_id,
-                "New SK node was added",
-            );
+            info!("New SK node was added: {new_safekeeper_id}");
            WALRECEIVER_CANDIDATES_ADDED.inc();
        }
    }
@@ -938,7 +818,7 @@ impl ConnectionManagerState {
    fn select_connection_candidate(
        &self,
        node_to_omit: Option<NodeId>,
-    ) -> Option<(NodeId, &SafekeeperDiscoveryResponse, PgConnectionConfig)> {
+    ) -> Option<(NodeId, &SafekeeperTimelineInfo, PgConnectionConfig)> {
        self.applicable_connection_candidates()
            .filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
            .max_by_key(|(_, info, _)| info.commit_lsn)
@@ -948,7 +828,7 @@ impl ConnectionManagerState {
    /// Some safekeepers are filtered by the retry cooldown.
    fn applicable_connection_candidates(
        &self,
-    ) -> impl Iterator<Item = (NodeId, &SafekeeperDiscoveryResponse, PgConnectionConfig)> {
+    ) -> impl Iterator<Item = (NodeId, &SafekeeperTimelineInfo, PgConnectionConfig)> {
        let now = Utc::now().naive_utc();

        self.wal_stream_candidates
@@ -1088,11 +968,19 @@ mod tests {
        latest_update: NaiveDateTime,
    ) -> BrokerSkTimeline {
        BrokerSkTimeline {
-            timeline: SafekeeperDiscoveryResponse {
+            timeline: SafekeeperTimelineInfo {
                safekeeper_id: 0,
                tenant_timeline_id: None,
+                term: 0,
+                last_log_term: 0,
+                flush_lsn: 0,
                commit_lsn,
+                backup_lsn: 0,
+                remote_consistent_lsn: 0,
+                peer_horizon_lsn: 0,
+                local_start_lsn: 0,
                safekeeper_connstr: safekeeper_connstr.to_owned(),
+                http_connstr: safekeeper_connstr.to_owned(),
                availability_zone: None,
            },
            latest_update,
@@ -1535,7 +1423,7 @@ mod tests {

        let harness = TenantHarness::create("switch_to_same_availability_zone")?;
        let mut state = dummy_state(&harness).await;
-        state.conf.availability_zone.clone_from(&test_az);
+        state.conf.availability_zone = test_az.clone();
        let current_lsn = Lsn(100_000).align();
        let now = Utc::now().naive_utc();

@@ -1568,7 +1456,7 @@ mod tests {
        // We have another safekeeper with the same commit_lsn, and it have the same availability zone as
        // the current pageserver.
        let mut same_az_sk = dummy_broker_sk_timeline(current_lsn.0, "same_az", now);
-        same_az_sk.timeline.availability_zone.clone_from(&test_az);
+        same_az_sk.timeline.availability_zone = test_az.clone();

        state.wal_stream_candidates = HashMap::from([
            (
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -10,7 +10,6 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use crate::context::RequestContext;
 use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};

 use crate::page_cache::PageWriteGuard;
@@ -616,7 +615,6 @@ impl VirtualFile {
        &self,
        buf: B,
        mut offset: u64,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
        let buf_len = buf.bytes_init();
        if buf_len == 0 {
@@ -625,7 +623,7 @@ impl VirtualFile {
        let mut buf = buf.slice(0..buf_len);
        while !buf.is_empty() {
            let res;
-            (buf, res) = self.write_at(buf, offset, ctx).await;
+            (buf, res) = self.write_at(buf, offset).await;
            match res {
                Ok(0) => {
                    return (
@@ -654,7 +652,6 @@ impl VirtualFile {
    pub async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<usize, Error>) {
        let nbytes = buf.bytes_init();
        if nbytes == 0 {
@@ -663,7 +660,7 @@ impl VirtualFile {
        let mut buf = buf.slice(0..nbytes);
        while !buf.is_empty() {
            let res;
-            (buf, res) = self.write(buf, ctx).await;
+            (buf, res) = self.write(buf).await;
            match res {
                Ok(0) => {
                    return (
@@ -687,10 +684,9 @@ impl VirtualFile {
    async fn write<B: IoBuf + Send>(
        &mut self,
        buf: Slice<B>,
-        ctx: &RequestContext,
    ) -> (Slice<B>, Result<usize, std::io::Error>) {
        let pos = self.pos;
-        let (buf, res) = self.write_at(buf, pos, ctx).await;
+        let (buf, res) = self.write_at(buf, pos).await;
        let n = match res {
            Ok(n) => n,
            Err(e) => return (buf, Err(e)),
@@ -728,7 +724,6 @@ impl VirtualFile {
        &self,
        buf: Slice<B>,
        offset: u64,
-        _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */
    ) -> (Slice<B>, Result<usize, Error>) {
        let file_guard = match self.lock_file().await {
            Ok(file_guard) => file_guard,
@@ -1093,9 +1088,8 @@ impl OwnedAsyncWriter for VirtualFile {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
-        let (buf, res) = VirtualFile::write_all(self, buf, ctx).await;
+        let (buf, res) = VirtualFile::write_all(self, buf).await;
        res.map(move |v| (v, buf))
    }
 }
@@ -1152,9 +1146,6 @@ fn get_open_files() -> &'static OpenFiles {

 #[cfg(test)]
 mod tests {
-    use crate::context::DownloadBehavior;
-    use crate::task_mgr::TaskKind;
-
    use super::*;
    use rand::seq::SliceRandom;
    use rand::thread_rng;
@@ -1186,11 +1177,10 @@ mod tests {
            &self,
            buf: B,
            offset: u64,
-            ctx: &RequestContext,
        ) -> Result<(), Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => {
-                    let (_buf, res) = file.write_all_at(buf, offset, ctx).await;
+                    let (_buf, res) = file.write_all_at(buf, offset).await;
                    res
                }
                MaybeVirtualFile::File(file) => {
@@ -1211,11 +1201,10 @@ mod tests {
        async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
            &mut self,
            buf: B,
-            ctx: &RequestContext,
        ) -> Result<(), Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => {
-                    let (_buf, res) = file.write_all(buf, ctx).await;
+                    let (_buf, res) = file.write_all(buf).await;
                    res.map(|_| ())
                }
                MaybeVirtualFile::File(file) => {
@@ -1286,7 +1275,6 @@ mod tests {
        OF: Fn(Utf8PathBuf, OpenOptions) -> FT,
        FT: Future<Output = Result<MaybeVirtualFile, std::io::Error>>,
    {
-        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir = crate::config::PageServerConf::test_repo_dir(testname);
        std::fs::create_dir_all(&testdir)?;

@@ -1300,7 +1288,7 @@ mod tests {
                .to_owned(),
        )
        .await?;
-        file_a.write_all(b"foobar".to_vec(), &ctx).await?;
+        file_a.write_all(b"foobar".to_vec()).await?;

        // cannot read from a file opened in write-only mode
        let _ = file_a.read_string().await.unwrap_err();
@@ -1309,7 +1297,7 @@ mod tests {
        let mut file_a = openfunc(path_a, OpenOptions::new().read(true).to_owned()).await?;

        // cannot write to a file opened in read-only mode
-        let _ = file_a.write_all(b"bar".to_vec(), &ctx).await.unwrap_err();
+        let _ = file_a.write_all(b"bar".to_vec()).await.unwrap_err();

        // Try simple read
        assert_eq!("foobar", file_a.read_string().await?);
@@ -1351,8 +1339,8 @@ mod tests {
                .to_owned(),
        )
        .await?;
-        file_b.write_all_at(b"BAR".to_vec(), 3, &ctx).await?;
-        file_b.write_all_at(b"FOO".to_vec(), 0, &ctx).await?;
+        file_b.write_all_at(b"BAR".to_vec(), 3).await?;
+        file_b.write_all_at(b"FOO".to_vec(), 0).await?;

        assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");

--- a/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs
@@ -1,4 +1,4 @@
-use crate::{context::RequestContext, virtual_file::owned_buffers_io::write::OwnedAsyncWriter};
+use crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter;
 use tokio_epoll_uring::{BoundedBuf, IoBuf};

 pub struct Writer<W> {
@@ -38,9 +38,8 @@ where
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
-        let (nwritten, buf) = self.dst.write_all(buf, ctx).await?;
+        let (nwritten, buf) = self.dst.write_all(buf).await?;
        self.bytes_amount += u64::try_from(nwritten).unwrap();
        Ok((nwritten, buf))
    }
--- a/pageserver/src/virtual_file/owned_buffers_io/write.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs
@@ -1,15 +1,12 @@
 use bytes::BytesMut;
 use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};

-use crate::context::RequestContext;
-
 /// A trait for doing owned-buffer write IO.
 /// Think [`tokio::io::AsyncWrite`] but with owned buffers.
 pub trait OwnedAsyncWriter {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)>;
 }

@@ -60,9 +57,8 @@ where
    }

    #[cfg_attr(target_os = "macos", allow(dead_code))]
-    pub async fn flush_and_into_inner(mut self, ctx: &RequestContext) -> std::io::Result<W> {
-        self.flush(ctx).await?;
-
+    pub async fn flush_and_into_inner(mut self) -> std::io::Result<W> {
+        self.flush().await?;
        let Self { buf, writer } = self;
        assert!(buf.is_some());
        Ok(writer)
@@ -76,15 +72,14 @@ where
    }

    #[cfg_attr(target_os = "macos", allow(dead_code))]
-    pub async fn write_buffered<S: IoBuf + Send>(
-        &mut self,
-        chunk: Slice<S>,
-        ctx: &RequestContext,
-    ) -> std::io::Result<(usize, S)> {
+    pub async fn write_buffered<S: IoBuf>(&mut self, chunk: Slice<S>) -> std::io::Result<(usize, S)>
+    where
+        S: IoBuf + Send,
+    {
        let chunk_len = chunk.len();
        // avoid memcpy for the middle of the chunk
        if chunk.len() >= self.buf().cap() {
-            self.flush(ctx).await?;
+            self.flush().await?;
            // do a big write, bypassing `buf`
            assert_eq!(
                self.buf
@@ -93,7 +88,7 @@ where
                    .pending(),
                0
            );
-            let (nwritten, chunk) = self.writer.write_all(chunk, ctx).await?;
+            let (nwritten, chunk) = self.writer.write_all(chunk).await?;
            assert_eq!(nwritten, chunk_len);
            return Ok((nwritten, chunk));
        }
@@ -109,7 +104,7 @@ where
            slice = &slice[n..];
            if buf.pending() >= buf.cap() {
                assert_eq!(buf.pending(), buf.cap());
-                self.flush(ctx).await?;
+                self.flush().await?;
            }
        }
        assert!(slice.is_empty(), "by now we should have drained the chunk");
@@ -121,11 +116,7 @@ where
    /// It is less performant because we always have to copy the borrowed data into the internal buffer
    /// before we can do the IO. The [`Self::write_buffered`] can avoid this, which is more performant
    /// for large writes.
-    pub async fn write_buffered_borrowed(
-        &mut self,
-        mut chunk: &[u8],
-        ctx: &RequestContext,
-    ) -> std::io::Result<usize> {
+    pub async fn write_buffered_borrowed(&mut self, mut chunk: &[u8]) -> std::io::Result<usize> {
        let chunk_len = chunk.len();
        while !chunk.is_empty() {
            let buf = self.buf.as_mut().expect("must not use after an error");
@@ -136,20 +127,20 @@ where
            chunk = &chunk[n..];
            if buf.pending() >= buf.cap() {
                assert_eq!(buf.pending(), buf.cap());
-                self.flush(ctx).await?;
+                self.flush().await?;
            }
        }
        Ok(chunk_len)
    }

-    async fn flush(&mut self, ctx: &RequestContext) -> std::io::Result<()> {
+    async fn flush(&mut self) -> std::io::Result<()> {
        let buf = self.buf.take().expect("must not use after an error");
        let buf_len = buf.pending();
        if buf_len == 0 {
            self.buf = Some(buf);
            return Ok(());
        }
-        let (nwritten, io_buf) = self.writer.write_all(buf.flush(), ctx).await?;
+        let (nwritten, io_buf) = self.writer.write_all(buf.flush()).await?;
        assert_eq!(nwritten, buf_len);
        self.buf = Some(Buffer::reuse_after_flush(io_buf));
        Ok(())
@@ -215,7 +206,6 @@ impl OwnedAsyncWriter for Vec<u8> {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        _: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
        let nbytes = buf.bytes_init();
        if nbytes == 0 {
@@ -232,8 +222,6 @@ mod tests {
    use bytes::BytesMut;

    use super::*;
-    use crate::context::{DownloadBehavior, RequestContext};
-    use crate::task_mgr::TaskKind;

    #[derive(Default)]
    struct RecorderWriter {
@@ -243,7 +231,6 @@ mod tests {
        async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
            &mut self,
            buf: B,
-            _: &RequestContext,
        ) -> std::io::Result<(usize, B::Buf)> {
            let nbytes = buf.bytes_init();
            if nbytes == 0 {
@@ -256,14 +243,10 @@ mod tests {
        }
    }

-    fn test_ctx() -> RequestContext {
-        RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
-    }
-
    macro_rules! write {
        ($writer:ident, $data:literal) => {{
            $writer
-                .write_buffered(::bytes::Bytes::from_static($data).slice_full(), &test_ctx())
+                .write_buffered(::bytes::Bytes::from_static($data).slice_full())
                .await?;
        }};
    }
@@ -277,7 +260,7 @@ mod tests {
        write!(writer, b"c");
        write!(writer, b"d");
        write!(writer, b"e");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"ab"), Vec::from(b"cd"), Vec::from(b"e")]
@@ -293,7 +276,7 @@ mod tests {
        write!(writer, b"de");
        write!(writer, b"");
        write!(writer, b"fghijk");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"abc"), Vec::from(b"de"), Vec::from(b"fghijk")]
@@ -309,7 +292,7 @@ mod tests {
        write!(writer, b"bc");
        write!(writer, b"d");
        write!(writer, b"e");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"a"), Vec::from(b"bc"), Vec::from(b"de")]
@@ -319,20 +302,18 @@ mod tests {

    #[tokio::test]
    async fn test_write_all_borrowed_always_goes_through_buffer() -> std::io::Result<()> {
-        let ctx = test_ctx();
-        let ctx = &ctx;
        let recorder = RecorderWriter::default();
        let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2));

-        writer.write_buffered_borrowed(b"abc", ctx).await?;
-        writer.write_buffered_borrowed(b"d", ctx).await?;
-        writer.write_buffered_borrowed(b"e", ctx).await?;
-        writer.write_buffered_borrowed(b"fg", ctx).await?;
-        writer.write_buffered_borrowed(b"hi", ctx).await?;
-        writer.write_buffered_borrowed(b"j", ctx).await?;
-        writer.write_buffered_borrowed(b"klmno", ctx).await?;
+        writer.write_buffered_borrowed(b"abc").await?;
+        writer.write_buffered_borrowed(b"d").await?;
+        writer.write_buffered_borrowed(b"e").await?;
+        writer.write_buffered_borrowed(b"fg").await?;
+        writer.write_buffered_borrowed(b"hi").await?;
+        writer.write_buffered_borrowed(b"j").await?;
+        writer.write_buffered_borrowed(b"klmno").await?;

-        let recorder = writer.flush_and_into_inner(ctx).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            {
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -59,8 +59,8 @@ prometheus.workspace = true
 rand.workspace = true
 regex.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
-reqwest.workspace = true
-reqwest-middleware = { workspace = true, features = ["json"] }
+reqwest = { workspace = true, features = ["json"] }
+reqwest-middleware.workspace = true
 reqwest-retry.workspace = true
 reqwest-tracing.workspace = true
 routerify.workspace = true
@@ -84,7 +84,6 @@ tokio-postgres.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
-tower-service.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -118,11 +118,8 @@ struct ProxyCliArgs {
    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
    wake_compute_cache: String,
    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
+    #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
    wake_compute_lock: String,
-    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
-    connect_compute_lock: String,
    /// Allow self-signed certificates for compute nodes (for testing)
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    allow_self_signed_compute: bool,
@@ -532,21 +529,24 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                endpoint_cache_config,
            )));

-            let config::ConcurrencyLockOptions {
+            let config::WakeComputeLockOptions {
                shards,
                permits,
                epoch,
                timeout,
            } = args.wake_compute_lock.parse()?;
            info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
-                "wake_compute_lock",
-                permits,
-                shards,
-                timeout,
-                epoch,
-                &Metrics::get().wake_compute_lock,
-            )?));
+            let locks = Box::leak(Box::new(
+                console::locks::ApiLocks::new(
+                    "wake_compute_lock",
+                    permits,
+                    shards,
+                    timeout,
+                    epoch,
+                    &Metrics::get().wake_compute_lock,
+                )
+                .unwrap(),
+            ));
            tokio::spawn(locks.garbage_collect_worker());

            let url = args.auth_endpoint.parse()?;
@@ -572,23 +572,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            auth::BackendType::Link(MaybeOwned::Owned(url), ())
        }
    };
-
-    let config::ConcurrencyLockOptions {
-        shards,
-        permits,
-        epoch,
-        timeout,
-    } = args.connect_compute_lock.parse()?;
-    info!(permits, shards, ?epoch, "Using NodeLocks (connect_compute)");
-    let connect_compute_locks = console::locks::ApiLocks::new(
-        "connect_compute_lock",
-        permits,
-        shards,
-        timeout,
-        epoch,
-        &Metrics::get().proxy.connect_compute_lock,
-    )?;
-
    let http_config = HttpConfig {
        request_timeout: args.sql_over_http.sql_over_http_timeout,
        pool_options: GlobalConnPoolOptions {
@@ -624,14 +607,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        region: args.region.clone(),
        aws_region: args.aws_region.clone(),
        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
-        connect_compute_locks,
        connect_to_compute_retry_config: config::RetryConfig::parse(
            &args.connect_to_compute_retry,
        )?,
    }));

-    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
-
    Ok(config)
 }

--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -21,7 +21,7 @@ use crate::{
    config::EndpointCacheConfig,
    context::RequestMonitoring,
    intern::{BranchIdInt, EndpointIdInt, ProjectIdInt},
-    metrics::{Metrics, RedisErrors, RedisEventsCount},
+    metrics::{Metrics, RedisErrors},
    rate_limiter::GlobalRateLimiter,
    redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider,
    EndpointId,
@@ -100,26 +100,14 @@ impl EndpointsCache {
        if let Some(endpoint_created) = key.endpoint_created {
            self.endpoints
                .insert(EndpointIdInt::from(&endpoint_created.endpoint_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::EndpointCreated);
        }
        if let Some(branch_created) = key.branch_created {
            self.branches
                .insert(BranchIdInt::from(&branch_created.branch_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::BranchCreated);
        }
        if let Some(project_created) = key.project_created {
            self.projects
                .insert(ProjectIdInt::from(&project_created.project_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::ProjectCreated);
        }
    }
    pub async fn do_read(
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -5,11 +5,9 @@ use std::{
    time::Duration,
 };

-use async_trait::async_trait;
 use dashmap::DashMap;
 use rand::{thread_rng, Rng};
 use smol_str::SmolStr;
-use tokio::sync::Mutex;
 use tokio::time::Instant;
 use tracing::{debug, info};

@@ -23,12 +21,11 @@ use crate::{

 use super::{Cache, Cached};

-#[async_trait]
 pub trait ProjectInfoCache {
    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
    fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
-    async fn decrement_active_listeners(&self);
-    async fn increment_active_listeners(&self);
+    fn enable_ttl(&self);
+    fn disable_ttl(&self);
 }

 struct Entry<T> {
@@ -119,10 +116,8 @@ pub struct ProjectInfoCacheImpl {

    start_time: Instant,
    ttl_disabled_since_us: AtomicU64,
-    active_listeners_lock: Mutex<usize>,
 }

-#[async_trait]
 impl ProjectInfoCache for ProjectInfoCacheImpl {
    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
        info!("invalidating allowed ips for project `{}`", project_id);
@@ -153,27 +148,15 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
            }
        }
    }
-    async fn decrement_active_listeners(&self) {
-        let mut listeners_guard = self.active_listeners_lock.lock().await;
-        if *listeners_guard == 0 {
-            tracing::error!("active_listeners count is already 0, something is broken");
-            return;
-        }
-        *listeners_guard -= 1;
-        if *listeners_guard == 0 {
-            self.ttl_disabled_since_us
-                .store(u64::MAX, std::sync::atomic::Ordering::SeqCst);
-        }
+    fn enable_ttl(&self) {
+        self.ttl_disabled_since_us
+            .store(u64::MAX, std::sync::atomic::Ordering::Relaxed);
    }

-    async fn increment_active_listeners(&self) {
-        let mut listeners_guard = self.active_listeners_lock.lock().await;
-        *listeners_guard += 1;
-        if *listeners_guard == 1 {
-            let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
-            self.ttl_disabled_since_us
-                .store(new_ttl, std::sync::atomic::Ordering::SeqCst);
-        }
+    fn disable_ttl(&self) {
+        let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
+        self.ttl_disabled_since_us
+            .store(new_ttl, std::sync::atomic::Ordering::Relaxed);
    }
 }

@@ -185,7 +168,6 @@ impl ProjectInfoCacheImpl {
            config,
            ttl_disabled_since_us: AtomicU64::new(u64::MAX),
            start_time: Instant::now(),
-            active_listeners_lock: Mutex::new(0),
        }
    }

@@ -450,7 +432,7 @@ mod tests {
            ttl: Duration::from_secs(1),
            gc_interval: Duration::from_secs(600),
        }));
-        cache.clone().increment_active_listeners().await;
+        cache.clone().disable_ttl();
        tokio::time::advance(Duration::from_secs(2)).await;

        let project_id: ProjectId = "project".into();
@@ -507,7 +489,7 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_increment_active_listeners_invalidate_added_before() {
+    async fn test_disable_ttl_invalidate_added_before() {
        tokio::time::pause();
        let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
            size: 2,
@@ -532,7 +514,7 @@ mod tests {
            (&user1).into(),
            secret1.clone(),
        );
-        cache.clone().increment_active_listeners().await;
+        cache.clone().disable_ttl();
        tokio::time::advance(Duration::from_millis(100)).await;
        cache.insert_role_secret(
            (&project_id).into(),
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -6,7 +6,6 @@ use crate::{
    error::{ReportableError, UserFacingError},
    metrics::{Metrics, NumDbConnectionsGuard},
    proxy::neon_option,
-    Host,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
@@ -102,16 +101,6 @@ impl ConnCfg {
        }
    }

-    pub fn get_host(&self) -> Result<Host, WakeComputeError> {
-        match self.0.get_hosts() {
-            [tokio_postgres::config::Host::Tcp(s)] => Ok(s.into()),
-            // we should not have multiple address or unix addresses.
-            _ => Err(WakeComputeError::BadComputeAddress(
-                "invalid compute address".into(),
-            )),
-        }
-    }
-
    /// Apply startup message params to the connection config.
    pub fn set_startup_params(&mut self, params: &StartupMessageParams) {
        // Only set `user` if it's not present in the config.
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,9 +1,7 @@
 use crate::{
    auth::{self, backend::AuthRateLimiter},
-    console::locks::ApiLocks,
    rate_limiter::RateBucketInfo,
    serverless::GlobalConnPoolOptions,
-    Host,
 };
 use anyhow::{bail, ensure, Context, Ok};
 use itertools::Itertools;
@@ -36,7 +34,6 @@ pub struct ProxyConfig {
    pub handshake_timeout: Duration,
    pub aws_region: String,
    pub wake_compute_retry_config: RetryConfig,
-    pub connect_compute_locks: ApiLocks<Host>,
    pub connect_to_compute_retry_config: RetryConfig,
 }

@@ -576,7 +573,7 @@ impl RetryConfig {
 }

 /// Helper for cmdline cache options parsing.
-pub struct ConcurrencyLockOptions {
+pub struct WakeComputeLockOptions {
    /// The number of shards the lock map should have
    pub shards: usize,
    /// The number of allowed concurrent requests for each endpoitn
@@ -587,12 +584,9 @@ pub struct ConcurrencyLockOptions {
    pub timeout: Duration,
 }

-impl ConcurrencyLockOptions {
+impl WakeComputeLockOptions {
    /// Default options for [`crate::console::provider::ApiLocks`].
    pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
-    /// Default options for [`crate::console::provider::ApiLocks`].
-    pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
-        "shards=64,permits=50,epoch=10m,timeout=500ms";

    // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "shards=32,permits=4,epoch=10m,timeout=1s";

@@ -642,7 +636,7 @@ impl ConcurrencyLockOptions {
    }
 }

-impl FromStr for ConcurrencyLockOptions {
+impl FromStr for WakeComputeLockOptions {
    type Err = anyhow::Error;

    fn from_str(options: &str) -> Result<Self, Self::Err> {
@@ -678,7 +672,7 @@ mod tests {

    #[test]
    fn test_parse_lock_options() -> anyhow::Result<()> {
-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
@@ -689,7 +683,7 @@ mod tests {
        assert_eq!(shards, 32);
        assert_eq!(permits, 4);

-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
@@ -700,7 +694,7 @@ mod tests {
        assert_eq!(shards, 16);
        assert_eq!(permits, 8);

-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -17,7 +17,7 @@ use crate::{
    scram, EndpointCacheKey,
 };
 use dashmap::DashMap;
-use std::{hash::Hash, sync::Arc, time::Duration};
+use std::{sync::Arc, time::Duration};
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio::time::Instant;
 use tracing::info;
@@ -447,16 +447,16 @@ impl ApiCaches {
 }

 /// Various caches for [`console`](super).
-pub struct ApiLocks<K> {
+pub struct ApiLocks {
    name: &'static str,
-    node_locks: DashMap<K, Arc<Semaphore>>,
+    node_locks: DashMap<EndpointCacheKey, Arc<Semaphore>>,
    permits: usize,
    timeout: Duration,
    epoch: std::time::Duration,
    metrics: &'static ApiLockMetrics,
 }

-impl<K: Hash + Eq + Clone> ApiLocks<K> {
+impl ApiLocks {
    pub fn new(
        name: &'static str,
        permits: usize,
@@ -475,7 +475,10 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
        })
    }

-    pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, errors::WakeComputeError> {
+    pub async fn get_wake_compute_permit(
+        &self,
+        key: &EndpointCacheKey,
+    ) -> Result<WakeComputePermit, errors::WakeComputeError> {
        if self.permits == 0 {
            return Ok(WakeComputePermit { permit: None });
        }
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -13,7 +13,7 @@ use crate::{
    http,
    metrics::{CacheOutcome, Metrics},
    rate_limiter::EndpointRateLimiter,
-    scram, EndpointCacheKey, Normalize,
+    scram, Normalize,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
 use futures::TryFutureExt;
@@ -25,7 +25,7 @@ use tracing::{error, info, info_span, warn, Instrument};
 pub struct Api {
    endpoint: http::Endpoint,
    pub caches: &'static ApiCaches,
-    pub locks: &'static ApiLocks<EndpointCacheKey>,
+    pub locks: &'static ApiLocks,
    pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    jwt: String,
 }
@@ -35,7 +35,7 @@ impl Api {
    pub fn new(
        endpoint: http::Endpoint,
        caches: &'static ApiCaches,
-        locks: &'static ApiLocks<EndpointCacheKey>,
+        locks: &'static ApiLocks,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    ) -> Self {
        let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
@@ -289,7 +289,7 @@ impl super::Api for Api {
            return Err(WakeComputeError::TooManyConnections);
        }

-        let permit = self.locks.get_permit(&key).await?;
+        let permit = self.locks.get_wake_compute_permit(&key).await?;

        // after getting back a permit - it's possible the cache was filled
        // double check
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -4,7 +4,7 @@

 pub mod health_server;

-use std::{str::FromStr, sync::Arc, time::Duration};
+use std::{sync::Arc, time::Duration};

 use futures::FutureExt;
 pub use reqwest::{Request, Response, StatusCode};
@@ -103,12 +103,12 @@ impl Endpoint {
    }
 }

-use hyper_util::client::legacy::connect::dns::{
-    GaiResolver as HyperGaiResolver, Name as HyperName,
-};
-use reqwest::dns::{Addrs, Name, Resolve, Resolving};
 /// https://docs.rs/reqwest/0.11.18/src/reqwest/dns/gai.rs.html
-use tower_service::Service;
+use hyper::{
+    client::connect::dns::{GaiResolver as HyperGaiResolver, Name},
+    service::Service,
+};
+use reqwest::dns::{Addrs, Resolve, Resolving};
 #[derive(Debug)]
 pub struct GaiResolver(HyperGaiResolver);

@@ -121,12 +121,11 @@ impl Default for GaiResolver {
 impl Resolve for GaiResolver {
    fn resolve(&self, name: Name) -> Resolving {
        let this = &mut self.0.clone();
-        let hyper_name = HyperName::from_str(name.as_str()).expect("name should be valid");
        let start = Instant::now();
        Box::pin(
-            Service::<HyperName>::call(this, hyper_name).map(move |result| {
+            Service::<Name>::call(this, name.clone()).map(move |result| {
                let resolve_duration = start.elapsed();
-                trace!(duration = ?resolve_duration, addr = %name.as_str(), "resolve host complete");
+                trace!(duration = ?resolve_duration, addr = %name, "resolve host complete");
                result
                    .map(|addrs| -> Addrs { Box::new(addrs) })
                    .map_err(|err| -> Box<dyn std::error::Error + Send + Sync> { Box::new(err) })
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -159,9 +159,6 @@ smol_str_wrapper!(EndpointCacheKey);

 smol_str_wrapper!(DbName);

-// postgres hostname, will likely be a port:ip addr
-smol_str_wrapper!(Host);
-
 // Endpoints are a bit tricky. Rare they might be branches or projects.
 impl EndpointId {
    pub fn is_endpoint(&self) -> bool {
--- a/proxy/src/metrics.rs
+++ b/proxy/src/metrics.rs
@@ -123,12 +123,6 @@ pub struct ProxyMetrics {
    /// Number of retries (per outcome, per retry_type).
    #[metric(metadata = Thresholds::with_buckets([0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]))]
    pub retries_metric: HistogramVec<RetriesMetricSet, 9>,
-
-    /// Number of events consumed from redis (per event type).
-    pub redis_events_count: CounterVec<StaticLabelSet<RedisEventsCount>>,
-
-    #[metric(namespace = "connect_compute_lock")]
-    pub connect_compute_lock: ApiLockMetrics,
 }

 #[derive(MetricGroup)]
@@ -152,12 +146,6 @@ impl Default for ProxyMetrics {
    }
 }

-impl Default for ApiLockMetrics {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 #[derive(FixedCardinalityLabel, Copy, Clone)]
 #[label(singleton = "direction")]
 pub enum HttpDirection {
@@ -542,14 +530,3 @@ pub enum RetryType {
    WakeCompute,
    ConnectToCompute,
 }
-
-#[derive(FixedCardinalityLabel, Clone, Copy, Debug)]
-#[label(singleton = "event")]
-pub enum RedisEventsCount {
-    EndpointCreated,
-    BranchCreated,
-    ProjectCreated,
-    CancelSession,
-    PasswordUpdate,
-    AllowedIpsUpdate,
-}
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -301,10 +301,7 @@ pub async fn handle_client<S: AsyncRead + AsyncWrite + Unpin>(

    let mut node = connect_to_compute(
        ctx,
-        &TcpMechanism {
-            params: &params,
-            locks: &config.connect_compute_locks,
-        },
+        &TcpMechanism { params: &params },
        &user_info,
        mode.allow_self_signed_compute(config),
        config.wake_compute_retry_config,
--- a/proxy/src/proxy/connect_compute.rs
+++ b/proxy/src/proxy/connect_compute.rs
@@ -2,7 +2,7 @@ use crate::{
    auth::backend::ComputeCredentialKeys,
    compute::{self, PostgresConnection},
    config::RetryConfig,
-    console::{self, errors::WakeComputeError, locks::ApiLocks, CachedNodeInfo, NodeInfo},
+    console::{self, errors::WakeComputeError, CachedNodeInfo, NodeInfo},
    context::RequestMonitoring,
    error::ReportableError,
    metrics::{ConnectOutcome, ConnectionFailureKind, Metrics, RetriesMetricGroup, RetryType},
@@ -10,7 +10,6 @@ use crate::{
        retry::{retry_after, ShouldRetry},
        wake_compute::wake_compute,
    },
-    Host,
 };
 use async_trait::async_trait;
 use pq_proto::StartupMessageParams;
@@ -65,9 +64,6 @@ pub trait ComputeConnectBackend {
 pub struct TcpMechanism<'a> {
    /// KV-dictionary with PostgreSQL connection params.
    pub params: &'a StartupMessageParams,
-
-    /// connect_to_compute concurrency lock
-    pub locks: &'static ApiLocks<Host>,
 }

 #[async_trait]
@@ -83,8 +79,6 @@ impl ConnectMechanism for TcpMechanism<'_> {
        node_info: &console::CachedNodeInfo,
        timeout: time::Duration,
    ) -> Result<PostgresConnection, Self::Error> {
-        let host = node_info.config.get_host()?;
-        let _permit = self.locks.get_permit(&host).await?;
        node_info.connect(ctx, timeout).await
    }

--- a/proxy/src/proxy/wake_compute.rs
+++ b/proxy/src/proxy/wake_compute.rs
@@ -6,7 +6,7 @@ use crate::metrics::{
    WakeupFailureKind,
 };
 use crate::proxy::retry::retry_after;
-use hyper1::StatusCode;
+use hyper::StatusCode;
 use std::ops::ControlFlow;
 use tracing::{error, info, warn};

--- a/proxy/src/redis/notifications.rs
+++ b/proxy/src/redis/notifications.rs
@@ -4,7 +4,6 @@ use futures::StreamExt;
 use pq_proto::CancelKeyData;
 use redis::aio::PubSub;
 use serde::{Deserialize, Serialize};
-use tokio_util::sync::CancellationToken;
 use uuid::Uuid;

 use super::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
@@ -12,7 +11,7 @@ use crate::{
    cache::project_info::ProjectInfoCache,
    cancellation::{CancelMap, CancellationHandler},
    intern::{ProjectIdInt, RoleNameInt},
-    metrics::{Metrics, RedisErrors, RedisEventsCount},
+    metrics::{Metrics, RedisErrors},
 };

 const CPLANE_CHANNEL_NAME: &str = "neondb-proxy-ws-updates";
@@ -78,16 +77,6 @@ struct MessageHandler<C: ProjectInfoCache + Send + Sync + 'static> {
    region_id: String,
 }

-impl<C: ProjectInfoCache + Send + Sync + 'static> Clone for MessageHandler<C> {
-    fn clone(&self) -> Self {
-        Self {
-            cache: self.cache.clone(),
-            cancellation_handler: self.cancellation_handler.clone(),
-            region_id: self.region_id.clone(),
-        }
-    }
-}
-
 impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
    pub fn new(
        cache: Arc<C>,
@@ -100,11 +89,11 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
            region_id,
        }
    }
-    pub async fn increment_active_listeners(&self) {
-        self.cache.increment_active_listeners().await;
+    pub fn disable_ttl(&self) {
+        self.cache.disable_ttl();
    }
-    pub async fn decrement_active_listeners(&self) {
-        self.cache.decrement_active_listeners().await;
+    pub fn enable_ttl(&self) {
+        self.cache.enable_ttl();
    }
    #[tracing::instrument(skip(self, msg), fields(session_id = tracing::field::Empty))]
    async fn handle_message(&self, msg: redis::Msg) -> anyhow::Result<()> {
@@ -129,10 +118,6 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
                    "session_id",
                    &tracing::field::display(cancel_session.session_id),
                );
-                Metrics::get()
-                    .proxy
-                    .redis_events_count
-                    .inc(RedisEventsCount::CancelSession);
                if let Some(cancel_region) = cancel_session.region_id {
                    // If the message is not for this region, ignore it.
                    if cancel_region != self.region_id {
@@ -153,17 +138,6 @@ impl<C: ProjectInfoCache + Send + Sync + 'static> MessageHandler<C> {
            }
            _ => {
                invalidate_cache(self.cache.clone(), msg.clone());
-                if matches!(msg, AllowedIpsUpdate { .. }) {
-                    Metrics::get()
-                        .proxy
-                        .redis_events_count
-                        .inc(RedisEventsCount::AllowedIpsUpdate);
-                } else if matches!(msg, PasswordUpdate { .. }) {
-                    Metrics::get()
-                        .proxy
-                        .redis_events_count
-                        .inc(RedisEventsCount::PasswordUpdate);
-                }
                // It might happen that the invalid entry is on the way to be cached.
                // To make sure that the entry is invalidated, let's repeat the invalidation in INVALIDATION_LAG seconds.
                // TODO: include the version (or the timestamp) in the message and invalidate only if the entry is cached before the message.
@@ -193,24 +167,37 @@ fn invalidate_cache<C: ProjectInfoCache>(cache: Arc<C>, msg: Notification) {
    }
 }

-async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
-    handler: MessageHandler<C>,
+/// Handle console's invalidation messages.
+#[tracing::instrument(name = "console_notifications", skip_all)]
+pub async fn task_main<C>(
    redis: ConnectionWithCredentialsProvider,
-    cancellation_token: CancellationToken,
-) -> anyhow::Result<()> {
+    cache: Arc<C>,
+    cancel_map: CancelMap,
+    region_id: String,
+) -> anyhow::Result<Infallible>
+where
+    C: ProjectInfoCache + Send + Sync + 'static,
+{
+    cache.enable_ttl();
+    let handler = MessageHandler::new(
+        cache,
+        Arc::new(CancellationHandler::<()>::new(
+            cancel_map,
+            crate::metrics::CancellationSource::FromRedis,
+        )),
+        region_id,
+    );
+
    loop {
-        if cancellation_token.is_cancelled() {
-            return Ok(());
-        }
        let mut conn = match try_connect(&redis).await {
            Ok(conn) => {
-                handler.increment_active_listeners().await;
+                handler.disable_ttl();
                conn
            }
            Err(e) => {
                tracing::error!(
-            "failed to connect to redis: {e}, will try to reconnect in {RECONNECT_TIMEOUT:#?}"
-        );
+                    "failed to connect to redis: {e}, will try to reconnect in {RECONNECT_TIMEOUT:#?}"
+                );
                tokio::time::sleep(RECONNECT_TIMEOUT).await;
                continue;
            }
@@ -224,47 +211,8 @@ async fn handle_messages<C: ProjectInfoCache + Send + Sync + 'static>(
                    break;
                }
            }
-            if cancellation_token.is_cancelled() {
-                handler.decrement_active_listeners().await;
-                return Ok(());
-            }
        }
-        handler.decrement_active_listeners().await;
-    }
-}
-
-/// Handle console's invalidation messages.
-#[tracing::instrument(name = "redis_notifications", skip_all)]
-pub async fn task_main<C>(
-    redis: ConnectionWithCredentialsProvider,
-    cache: Arc<C>,
-    cancel_map: CancelMap,
-    region_id: String,
-) -> anyhow::Result<Infallible>
-where
-    C: ProjectInfoCache + Send + Sync + 'static,
-{
-    let cancellation_handler = Arc::new(CancellationHandler::<()>::new(
-        cancel_map,
-        crate::metrics::CancellationSource::FromRedis,
-    ));
-    let handler = MessageHandler::new(cache, cancellation_handler, region_id);
-    // 6h - 1m.
-    // There will be 1 minute overlap between two tasks. But at least we can be sure that no message is lost.
-    let mut interval = tokio::time::interval(std::time::Duration::from_secs(6 * 60 * 60 - 60));
-    loop {
-        let cancellation_token = CancellationToken::new();
-        interval.tick().await;
-
-        tokio::spawn(handle_messages(
-            handler.clone(),
-            redis.clone(),
-            cancellation_token.clone(),
-        ));
-        tokio::spawn(async move {
-            tokio::time::sleep(std::time::Duration::from_secs(6 * 60 * 60)).await; // 6h.
-            cancellation_token.cancel();
-        });
+        handler.enable_ttl();
    }
 }

--- a/proxy/src/serverless/backend.rs
+++ b/proxy/src/serverless/backend.rs
@@ -9,13 +9,11 @@ use crate::{
    config::{AuthenticationConfig, ProxyConfig},
    console::{
        errors::{GetAuthInfoError, WakeComputeError},
-        locks::ApiLocks,
        CachedNodeInfo,
    },
    context::RequestMonitoring,
    error::{ErrorKind, ReportableError, UserFacingError},
-    proxy::{connect_compute::ConnectMechanism, retry::ShouldRetry},
-    Host,
+    proxy::connect_compute::ConnectMechanism,
 };

 use super::conn_pool::{poll_client, Client, ConnInfo, GlobalConnPool};
@@ -107,7 +105,6 @@ impl PoolingBackend {
                conn_id,
                conn_info,
                pool: self.pool.clone(),
-                locks: &self.config.connect_compute_locks,
            },
            &backend,
            false, // do not allow self signed compute for http flow
@@ -157,31 +154,16 @@ impl UserFacingError for HttpConnError {
    }
 }

-impl ShouldRetry for HttpConnError {
-    fn could_retry(&self) -> bool {
-        match self {
-            HttpConnError::ConnectionError(e) => e.could_retry(),
-            HttpConnError::ConnectionClosedAbruptly(_) => false,
-            HttpConnError::GetAuthInfo(_) => false,
-            HttpConnError::AuthError(_) => false,
-            HttpConnError::WakeCompute(_) => false,
-        }
-    }
-}
-
 struct TokioMechanism {
    pool: Arc<GlobalConnPool<tokio_postgres::Client>>,
    conn_info: ConnInfo,
    conn_id: uuid::Uuid,
-
-    /// connect_to_compute concurrency lock
-    locks: &'static ApiLocks<Host>,
 }

 #[async_trait]
 impl ConnectMechanism for TokioMechanism {
    type Connection = Client<tokio_postgres::Client>;
-    type ConnectError = HttpConnError;
+    type ConnectError = tokio_postgres::Error;
    type Error = HttpConnError;

    async fn connect_once(
@@ -190,9 +172,6 @@ impl ConnectMechanism for TokioMechanism {
        node_info: &CachedNodeInfo,
        timeout: Duration,
    ) -> Result<Self::Connection, Self::ConnectError> {
-        let host = node_info.config.get_host()?;
-        let permit = self.locks.get_permit(&host).await?;
-
        let mut config = (*node_info.config).clone();
        let config = config
            .user(&self.conn_info.user_info.user)
@@ -203,7 +182,6 @@ impl ConnectMechanism for TokioMechanism {
        let pause = ctx.latency_timer.pause(crate::metrics::Waiting::Compute);
        let (client, connection) = config.connect(tokio_postgres::NoTls).await?;
        drop(pause);
-        drop(permit);

        tracing::Span::current().record("pid", &tracing::field::display(client.get_process_id()));
        Ok(poll_client(
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,5 +1,5 @@
 [toolchain]
-channel = "1.78.0"
+channel = "1.77.0"
 profile = "default"
 # The default profile includes rustc, rust-std, cargo, rust-docs, rustfmt and clippy.
 # https://rust-lang.github.io/rustup/concepts/profiles.html
--- a/s3_scrubber/Cargo.toml
+++ b/s3_scrubber/Cargo.toml
@@ -22,12 +22,7 @@ serde_with.workspace = true
 workspace_hack.workspace = true
 utils.workspace = true
 async-stream.workspace = true
-native-tls.workspace = true
-postgres-native-tls.workspace = true
-postgres_ffi.workspace = true
 tokio-stream.workspace = true
-tokio-postgres.workspace = true
-tokio-util = { workspace = true }
 futures-util.workspace = true
 itertools.workspace = true
 camino.workspace = true
--- a/s3_scrubber/README.md
+++ b/s3_scrubber/README.md
@@ -67,12 +67,10 @@ the purge command will log all the keys that it would have deleted.

 #### `scan-metadata`

-Walk objects in a pageserver or safekeeper S3 bucket, and report statistics on the contents and checking consistency.
-Errors are logged to stderr and summary to stdout.
+Walk objects in a pageserver S3 bucket, and report statistics on the contents.

-For pageserver:
 ```
-env SSO_ACCOUNT_ID=123456 REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- scan-metadata --node-kind pageserver
+env SSO_ACCOUNT_ID=123456 REGION=eu-west-1 BUCKET=my-dev-bucket CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- scan-metadata

 Timelines: 31106
 With errors: 3
@@ -84,10 +82,6 @@ Layer size bytes: min 24576, 1% 36879, 10% 36879, 50% 61471, 90% 44695551, 99% 2
 Timeline layer count: min 1, 1% 3, 10% 6, 50% 16, 90% 25, 99% 39, max 1053
 ```

-For safekeepers, dump_db_connstr and dump_db_table must be
-specified; they should point to table with debug dump which will be used
-to list timelines and find their backup and start LSNs.
-
 ## Cleaning up running pageservers

 If S3 state is altered first manually, pageserver in-memory state will contain wrong data about S3 state, and tenants/timelines may get recreated on S3 (due to any layer upload due to compaction, pageserver restart, etc.). So before proceeding, for tenants/timelines which are already deleted in the console, we must remove these from pageservers.
--- a/s3_scrubber/src/cloud_admin_api.rs
+++ b/s3_scrubber/src/cloud_admin_api.rs
@@ -1,13 +1,11 @@
-use chrono::{DateTime, Utc};
-use futures::Future;
-use hex::FromHex;
+use std::time::Duration;

+use chrono::{DateTime, Utc};
+use hex::FromHex;
 use reqwest::{header, Client, StatusCode, Url};
 use serde::Deserialize;
 use tokio::sync::Semaphore;

-use tokio_util::sync::CancellationToken;
-use utils::backoff;
 use utils::id::{TenantId, TimelineId};
 use utils::lsn::Lsn;

@@ -139,7 +137,7 @@ pub struct ProjectData {
    pub region_id: String,
    pub platform_id: String,
    pub user_id: String,
-    pub pageserver_id: Option<u64>,
+    pub pageserver_id: u64,
    #[serde(deserialize_with = "from_nullable_id")]
    pub tenant: TenantId,
    pub safekeepers: Vec<SafekeeperData>,
@@ -157,7 +155,7 @@ pub struct ProjectData {
    pub maintenance_set: Option<String>,
 }

-#[derive(Debug, Clone, serde::Deserialize)]
+#[derive(Debug, serde::Deserialize)]
 pub struct BranchData {
    pub id: BranchId,
    pub created_at: DateTime<Utc>,
@@ -212,39 +210,30 @@ impl CloudAdminApiClient {
            .await
            .expect("Semaphore is not closed");

-        let response = CloudAdminApiClient::with_retries(
-            || async {
-                let response = self
-                    .http_client
-                    .get(self.append_url("/projects"))
-                    .query(&[
-                        ("tenant_id", tenant_id.to_string()),
-                        ("show_deleted", "true".to_string()),
-                    ])
-                    .header(header::ACCEPT, "application/json")
-                    .bearer_auth(&self.token)
-                    .send()
-                    .await
-                    .map_err(|e| {
-                        Error::new(
-                            "Find project for tenant".to_string(),
-                            ErrorKind::RequestSend(e),
-                        )
-                    })?;
-
-                let response: AdminApiResponse<Vec<ProjectData>> =
-                    response.json().await.map_err(|e| {
-                        Error::new(
-                            "Find project for tenant".to_string(),
-                            ErrorKind::BodyRead(e),
-                        )
-                    })?;
-                Ok(response)
-            },
-            "find_tenant_project",
-        )
-        .await?;
+        let response = self
+            .http_client
+            .get(self.append_url("/projects"))
+            .query(&[
+                ("tenant_id", tenant_id.to_string()),
+                ("show_deleted", "true".to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| {
+                Error::new(
+                    "Find project for tenant".to_string(),
+                    ErrorKind::RequestSend(e),
+                )
+            })?;

+        let response: AdminApiResponse<Vec<ProjectData>> = response.json().await.map_err(|e| {
+            Error::new(
+                "Find project for tenant".to_string(),
+                ErrorKind::BodyRead(e),
+            )
+        })?;
        match response.data.len() {
            0 => Ok(None),
            1 => Ok(Some(
@@ -272,34 +261,42 @@ impl CloudAdminApiClient {
        const PAGINATION_LIMIT: usize = 512;
        let mut result: Vec<ProjectData> = Vec::with_capacity(PAGINATION_LIMIT);
        loop {
-            let response_bytes = CloudAdminApiClient::with_retries(
-                || async {
-                    let response = self
-                        .http_client
-                        .get(self.append_url("/projects"))
-                        .query(&[
-                            ("show_deleted", "false".to_string()),
-                            ("limit", format!("{PAGINATION_LIMIT}")),
-                            ("offset", format!("{pagination_offset}")),
-                        ])
-                        .header(header::ACCEPT, "application/json")
-                        .bearer_auth(&self.token)
-                        .send()
-                        .await
-                        .map_err(|e| {
-                            Error::new(
-                                "List active projects".to_string(),
-                                ErrorKind::RequestSend(e),
-                            )
-                        })?;
+            let response = self
+                .http_client
+                .get(self.append_url("/projects"))
+                .query(&[
+                    ("show_deleted", "false".to_string()),
+                    ("limit", format!("{PAGINATION_LIMIT}")),
+                    ("offset", format!("{pagination_offset}")),
+                ])
+                .header(header::ACCEPT, "application/json")
+                .bearer_auth(&self.token)
+                .send()
+                .await
+                .map_err(|e| {
+                    Error::new(
+                        "List active projects".to_string(),
+                        ErrorKind::RequestSend(e),
+                    )
+                })?;

-                    response.bytes().await.map_err(|e| {
-                        Error::new("List active projects".to_string(), ErrorKind::BodyRead(e))
-                    })
-                },
-                "list_projects",
-            )
-            .await?;
+            match response.status() {
+                StatusCode::OK => {}
+                StatusCode::SERVICE_UNAVAILABLE | StatusCode::TOO_MANY_REQUESTS => {
+                    tokio::time::sleep(Duration::from_millis(500)).await;
+                    continue;
+                }
+                _status => {
+                    return Err(Error::new(
+                        "List active projects".to_string(),
+                        ErrorKind::ResponseStatus(response.status()),
+                    ))
+                }
+            }
+
+            let response_bytes = response.bytes().await.map_err(|e| {
+                Error::new("List active projects".to_string(), ErrorKind::BodyRead(e))
+            })?;

            let decode_result =
                serde_json::from_slice::<AdminApiResponse<Vec<ProjectData>>>(&response_bytes);
@@ -330,7 +327,6 @@ impl CloudAdminApiClient {

    pub async fn find_timeline_branch(
        &self,
-        tenant_id: TenantId,
        timeline_id: TimelineId,
    ) -> Result<Option<BranchData>, Error> {
        let _permit = self
@@ -339,61 +335,43 @@ impl CloudAdminApiClient {
            .await
            .expect("Semaphore is not closed");

-        let response = CloudAdminApiClient::with_retries(
-            || async {
-                let response = self
-                    .http_client
-                    .get(self.append_url("/branches"))
-                    .query(&[
-                        ("timeline_id", timeline_id.to_string()),
-                        ("show_deleted", "true".to_string()),
-                    ])
-                    .header(header::ACCEPT, "application/json")
-                    .bearer_auth(&self.token)
-                    .send()
-                    .await
-                    .map_err(|e| {
-                        Error::new(
-                            "Find branch for timeline".to_string(),
-                            ErrorKind::RequestSend(e),
-                        )
-                    })?;
+        let response = self
+            .http_client
+            .get(self.append_url("/branches"))
+            .query(&[
+                ("timeline_id", timeline_id.to_string()),
+                ("show_deleted", "true".to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| {
+                Error::new(
+                    "Find branch for timeline".to_string(),
+                    ErrorKind::RequestSend(e),
+                )
+            })?;

-                let response: AdminApiResponse<Vec<BranchData>> =
-                    response.json().await.map_err(|e| {
-                        Error::new(
-                            "Find branch for timeline".to_string(),
-                            ErrorKind::BodyRead(e),
-                        )
-                    })?;
-                Ok(response)
-            },
-            "find_timeline_branch",
-        )
-        .await?;
-
-        let mut branches: Vec<BranchData> = response.data.into_iter().collect();
-        // Normally timeline_id is unique. However, we do have at least one case
-        // of the same timeline_id in two different projects, apparently after
-        // manual recovery. So always recheck project_id (discovered through
-        // tenant_id).
-        let project_data = match self.find_tenant_project(tenant_id).await? {
-            Some(pd) => pd,
-            None => return Ok(None),
-        };
-        branches.retain(|b| b.project_id == project_data.id);
-        if branches.len() < 2 {
-            Ok(branches.first().cloned())
-        } else {
-            Err(Error::new(
-                format!(
-                    "Find branch for timeline {}/{} returned {} branches instead of 0 or 1",
-                    tenant_id,
-                    timeline_id,
-                    branches.len()
-                ),
+        let response: AdminApiResponse<Vec<BranchData>> = response.json().await.map_err(|e| {
+            Error::new(
+                "Find branch for timeline".to_string(),
+                ErrorKind::BodyRead(e),
+            )
+        })?;
+        match response.data.len() {
+            0 => Ok(None),
+            1 => Ok(Some(
+                response
+                    .data
+                    .into_iter()
+                    .next()
+                    .expect("Should have exactly one element"),
+            )),
+            too_many => Err(Error::new(
+                format!("Find branch for timeline returned {too_many} branches instead of 0 or 1"),
                ErrorKind::UnexpectedState,
-            ))
+            )),
        }
    }

@@ -554,15 +532,4 @@ impl CloudAdminApiClient {
            .parse()
            .unwrap_or_else(|e| panic!("Could not append {subpath} to base url: {e}"))
    }
-
-    async fn with_retries<T, O, F>(op: O, description: &str) -> Result<T, Error>
-    where
-        O: FnMut() -> F,
-        F: Future<Output = Result<T, Error>>,
-    {
-        let cancel = CancellationToken::new(); // not really used
-        backoff::retry(op, |_| false, 1, 20, description, &cancel)
-            .await
-            .expect("cancellations are disabled")
-    }
 }
--- a/s3_scrubber/src/garbage.rs
+++ b/s3_scrubber/src/garbage.rs
@@ -60,7 +60,6 @@ pub struct GarbageList {
    /// see garbage, we saw some active tenants too.  This protects against classes of bugs
    /// in the scrubber that might otherwise generate a "deleted all" result.
    active_tenant_count: usize,
-    active_timeline_count: usize,
 }

 impl GarbageList {
@@ -68,7 +67,6 @@ impl GarbageList {
        Self {
            items: Vec::new(),
            active_tenant_count: 0,
-            active_timeline_count: 0,
            node_kind,
            bucket_config,
        }
@@ -121,10 +119,7 @@ pub async fn find_garbage(
 const S3_CONCURRENCY: usize = 32;

 // How many concurrent API requests to make to the console API.
-//
-// Be careful increasing this; roughly we shouldn't have more than ~100 rps. It
-// would be better to implement real rsp limiter.
-const CONSOLE_CONCURRENCY: usize = 16;
+const CONSOLE_CONCURRENCY: usize = 128;

 struct ConsoleCache {
    /// Set of tenants found in the control plane API
@@ -226,7 +221,6 @@ async fn find_garbage_inner(
        } else {
            tracing::debug!("Tenant {tenant_shard_id} is active");
            active_tenants.push(tenant_shard_id);
-            garbage.active_tenant_count = active_tenants.len();
        }

        counter += 1;
@@ -267,7 +261,7 @@ async fn find_garbage_inner(
        let api_client = cloud_admin_api_client.clone();
        async move {
            api_client
-                .find_timeline_branch(ttid.tenant_shard_id.tenant_id, ttid.timeline_id)
+                .find_timeline_branch(ttid.timeline_id)
                .await
                .map_err(|e| anyhow::anyhow!(e))
                .map(|r| (ttid, r))
@@ -277,29 +271,15 @@ async fn find_garbage_inner(
        std::pin::pin!(timelines_checked.try_buffer_unordered(CONSOLE_CONCURRENCY));

    // Update the GarbageList with any timelines which appear not to exist.
-    let mut active_timelines: Vec<TenantShardTimelineId> = vec![];
    while let Some(result) = timelines_checked.next().await {
        let (ttid, console_result) = result?;
        if garbage.maybe_append(GarbageEntity::Timeline(ttid), console_result) {
            tracing::debug!("Timeline {ttid} is garbage");
        } else {
            tracing::debug!("Timeline {ttid} is active");
-            active_timelines.push(ttid);
-            garbage.active_timeline_count = active_timelines.len();
        }
    }

-    let num_garbage_timelines = garbage
-        .items
-        .iter()
-        .filter(|g| matches!(g.entity, GarbageEntity::Timeline(_)))
-        .count();
-    tracing::info!(
-        "Found {}/{} garbage timelines in active tenants",
-        num_garbage_timelines,
-        active_timelines.len(),
-    );
-
    Ok(garbage)
 }

@@ -364,22 +344,16 @@ pub async fn get_timeline_objects(
 const MAX_KEYS_PER_DELETE: usize = 1000;

 /// Drain a buffer of keys into DeleteObjects requests
-///
-/// If `drain` is true, drains keys completely; otherwise stops when <
-/// MAX_KEYS_PER_DELETE keys are left.
-/// `num_deleted` returns number of deleted keys.
 async fn do_delete(
    s3_client: &Arc<Client>,
    bucket_name: &str,
    keys: &mut Vec<ObjectIdentifier>,
    dry_run: bool,
    drain: bool,
-    progress_tracker: &mut DeletionProgressTracker,
 ) -> anyhow::Result<()> {
    while (!keys.is_empty() && drain) || (keys.len() >= MAX_KEYS_PER_DELETE) {
        let request_keys =
            keys.split_off(keys.len() - (std::cmp::min(MAX_KEYS_PER_DELETE, keys.len())));
-        let num_deleted = request_keys.len();
        if dry_run {
            tracing::info!("Dry-run deletion of objects: ");
            for k in request_keys {
@@ -394,30 +368,12 @@ async fn do_delete(
                .send()
                .await
                .context("DeleteObjects request")?;
-            progress_tracker.register(num_deleted);
        }
    }

    Ok(())
 }

-/// Simple tracker reporting each 10k deleted keys.
-#[derive(Default)]
-struct DeletionProgressTracker {
-    num_deleted: usize,
-    last_reported_num_deleted: usize,
-}
-
-impl DeletionProgressTracker {
-    fn register(&mut self, n: usize) {
-        self.num_deleted += n;
-        if self.num_deleted - self.last_reported_num_deleted > 10000 {
-            tracing::info!("progress: deleted {} keys", self.num_deleted);
-            self.last_reported_num_deleted = self.num_deleted;
-        }
-    }
-}
-
 pub async fn purge_garbage(
    input_path: String,
    mode: PurgeMode,
@@ -438,14 +394,6 @@ pub async fn purge_garbage(
    if garbage_list.active_tenant_count == 0 {
        anyhow::bail!("Refusing to purge a garbage list that reports 0 active tenants");
    }
-    if garbage_list
-        .items
-        .iter()
-        .any(|g| matches!(g.entity, GarbageEntity::Timeline(_)))
-        && garbage_list.active_timeline_count == 0
-    {
-        anyhow::bail!("Refusing to purge a garbage list containing garbage timelines that reports 0 active timelines");
-    }

    let filtered_items = garbage_list
        .items
@@ -481,7 +429,6 @@ pub async fn purge_garbage(
        std::pin::pin!(get_objects_results.try_buffer_unordered(S3_CONCURRENCY));

    let mut objects_to_delete = Vec::new();
-    let mut progress_tracker = DeletionProgressTracker::default();
    while let Some(result) = get_objects_results.next().await {
        let mut object_list = result?;
        objects_to_delete.append(&mut object_list);
@@ -492,7 +439,6 @@ pub async fn purge_garbage(
                &mut objects_to_delete,
                dry_run,
                false,
-                &mut progress_tracker,
            )
            .await?;
        }
@@ -504,11 +450,10 @@ pub async fn purge_garbage(
        &mut objects_to_delete,
        dry_run,
        true,
-        &mut progress_tracker,
    )
    .await?;

-    tracing::info!("{} keys deleted in total", progress_tracker.num_deleted);
+    tracing::info!("Fell through");

    Ok(())
 }
--- a/s3_scrubber/src/lib.rs
+++ b/s3_scrubber/src/lib.rs
@@ -4,8 +4,7 @@ pub mod checks;
 pub mod cloud_admin_api;
 pub mod garbage;
 pub mod metadata_stream;
-pub mod scan_pageserver_metadata;
-pub mod scan_safekeeper_metadata;
+pub mod scan_metadata;
 pub mod tenant_snapshot;

 use std::env;
@@ -142,17 +141,12 @@ impl RootTarget {
    pub fn tenants_root(&self) -> S3Target {
        match self {
            Self::Pageserver(root) => root.with_sub_segment(TENANTS_SEGMENT_NAME),
-            Self::Safekeeper(root) => root.clone(),
+            Self::Safekeeper(root) => root.with_sub_segment("wal"),
        }
    }

    pub fn tenant_root(&self, tenant_id: &TenantShardId) -> S3Target {
-        match self {
-            Self::Pageserver(_) => self.tenants_root().with_sub_segment(&tenant_id.to_string()),
-            Self::Safekeeper(_) => self
-                .tenants_root()
-                .with_sub_segment(&tenant_id.tenant_id.to_string()),
-        }
+        self.tenants_root().with_sub_segment(&tenant_id.to_string())
    }

    pub(crate) fn tenant_shards_prefix(&self, tenant_id: &TenantId) -> S3Target {
@@ -343,7 +337,9 @@ fn init_remote(
        }),
        NodeKind::Safekeeper => RootTarget::Safekeeper(S3Target {
            bucket_name: bucket_config.bucket,
-            prefix_in_bucket: bucket_config.prefix_in_bucket.unwrap_or("wal/".to_string()),
+            prefix_in_bucket: bucket_config
+                .prefix_in_bucket
+                .unwrap_or("safekeeper/v1".to_string()),
            delimiter,
        }),
    };
@@ -368,10 +364,7 @@ async fn list_objects_with_retries(
        {
            Ok(response) => return Ok(response),
            Err(e) => {
-                error!(
-                    "list_objects_v2 query failed: {e}, bucket_name={}, prefix={}, delimiter={}",
-                    s3_target.bucket_name, s3_target.prefix_in_bucket, s3_target.delimiter
-                );
+                error!("list_objects_v2 query failed: {e}");
                tokio::time::sleep(Duration::from_secs(1)).await;
            }
        }
--- a/s3_scrubber/src/main.rs
+++ b/s3_scrubber/src/main.rs
@@ -1,13 +1,9 @@
-use anyhow::bail;
 use camino::Utf8PathBuf;
 use pageserver_api::shard::TenantShardId;
 use s3_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode};
-use s3_scrubber::scan_pageserver_metadata::scan_metadata;
+use s3_scrubber::scan_metadata::scan_metadata;
 use s3_scrubber::tenant_snapshot::SnapshotDownloader;
-use s3_scrubber::{
-    init_logging, scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig,
-    NodeKind, TraversingDepth,
-};
+use s3_scrubber::{init_logging, BucketConfig, ConsoleConfig, NodeKind, TraversingDepth};

 use clap::{Parser, Subcommand};
 use utils::id::TenantId;
@@ -39,20 +35,11 @@ enum Command {
        #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)]
        mode: PurgeMode,
    },
-    #[command(verbatim_doc_comment)]
    ScanMetadata {
-        #[arg(short, long)]
-        node_kind: NodeKind,
        #[arg(short, long, default_value_t = false)]
        json: bool,
        #[arg(long = "tenant-id", num_args = 0..)]
        tenant_ids: Vec<TenantShardId>,
-        #[arg(long, default_value = None)]
-        /// For safekeeper node_kind only, points to db with debug dump
-        dump_db_connstr: Option<String>,
-        /// For safekeeper node_kind only, table in the db with debug dump
-        #[arg(long, default_value = None)]
-        dump_db_table: Option<String>,
    },
    TenantSnapshot {
        #[arg(long = "tenant-id")]
@@ -85,75 +72,33 @@ async fn main() -> anyhow::Result<()> {
    ));

    match cli.command {
-        Command::ScanMetadata {
-            json,
-            tenant_ids,
-            node_kind,
-            dump_db_connstr,
-            dump_db_table,
-        } => {
-            if let NodeKind::Safekeeper = node_kind {
-                let dump_db_connstr =
-                    dump_db_connstr.ok_or(anyhow::anyhow!("dump_db_connstr not specified"))?;
-                let dump_db_table =
-                    dump_db_table.ok_or(anyhow::anyhow!("dump_db_table not specified"))?;
-
-                let summary = scan_safekeeper_metadata(
-                    bucket_config.clone(),
-                    tenant_ids.iter().map(|tshid| tshid.tenant_id).collect(),
-                    dump_db_connstr,
-                    dump_db_table,
-                )
-                .await?;
-                if json {
-                    println!("{}", serde_json::to_string(&summary).unwrap())
-                } else {
-                    println!("{}", summary.summary_string());
+        Command::ScanMetadata { json, tenant_ids } => {
+            match scan_metadata(bucket_config.clone(), tenant_ids).await {
+                Err(e) => {
+                    tracing::error!("Failed: {e}");
+                    Err(e)
                }
-                if summary.is_fatal() {
-                    bail!("Fatal scrub errors detected");
-                }
-                if summary.is_empty() {
-                    // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
-                    // scrubber they were likely expecting to scan something, and if we see no timelines
-                    // at all then it's likely due to some configuration issues like a bad prefix
-                    bail!(
-                        "No timelines found in bucket {} prefix {}",
-                        bucket_config.bucket,
-                        bucket_config
-                            .prefix_in_bucket
-                            .unwrap_or("<none>".to_string())
-                    );
-                }
-                Ok(())
-            } else {
-                match scan_metadata(bucket_config.clone(), tenant_ids).await {
-                    Err(e) => {
-                        tracing::error!("Failed: {e}");
-                        Err(e)
+                Ok(summary) => {
+                    if json {
+                        println!("{}", serde_json::to_string(&summary).unwrap())
+                    } else {
+                        println!("{}", summary.summary_string());
                    }
-                    Ok(summary) => {
-                        if json {
-                            println!("{}", serde_json::to_string(&summary).unwrap())
-                        } else {
-                            println!("{}", summary.summary_string());
-                        }
-                        if summary.is_fatal() {
-                            Err(anyhow::anyhow!("Fatal scrub errors detected"))
-                        } else if summary.is_empty() {
-                            // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
-                            // scrubber they were likely expecting to scan something, and if we see no timelines
-                            // at all then it's likely due to some configuration issues like a bad prefix
-                            Err(anyhow::anyhow!(
-                                "No timelines found in bucket {} prefix {}",
-                                bucket_config.bucket,
-                                bucket_config
-                                    .prefix_in_bucket
-                                    .unwrap_or("<none>".to_string())
-                            ))
-                        } else {
-                            Ok(())
-                        }
+                    if summary.is_fatal() {
+                        Err(anyhow::anyhow!("Fatal scrub errors detected"))
+                    } else if summary.is_empty() {
+                        // Strictly speaking an empty bucket is a valid bucket, but if someone ran the
+                        // scrubber they were likely expecting to scan something, and if we see no timelines
+                        // at all then it's likely due to some configuration issues like a bad prefix
+                        Err(anyhow::anyhow!(
+                            "No timelines found in bucket {} prefix {}",
+                            bucket_config.bucket,
+                            bucket_config
+                                .prefix_in_bucket
+                                .unwrap_or("<none>".to_string())
+                        ))
+                    } else {
+                        Ok(())
                    }
                }
            }
--- a/s3_scrubber/src/metadata_stream.rs
+++ b/s3_scrubber/src/metadata_stream.rs
@@ -114,7 +114,7 @@ pub async fn stream_tenant_timelines<'a>(
    let timelines_target = target.timelines_root(&tenant);

    loop {
-        tracing::debug!("Listing in {}", tenant);
+        tracing::info!("Listing in {}", tenant);
        let fetch_response =
            list_objects_with_retries(s3_client, &timelines_target, continuation_token.clone())
                .await;
@@ -151,7 +151,7 @@ pub async fn stream_tenant_timelines<'a>(
        }
    }

-    tracing::debug!("Yielding for {}", tenant);
+    tracing::info!("Yielding for {}", tenant);
    Ok(stream! {
        for i in timeline_ids {
            let id = i?;
--- a/s3_scrubber/src/scan_pageserver_metadata.rs
+++ b/s3_scrubber/src/scan_pageserver_metadata.rs
--- a/s3_scrubber/src/scan_safekeeper_metadata.rs
+++ b/s3_scrubber/src/scan_safekeeper_metadata.rs
@@ -1,236 +0,0 @@
-use std::{collections::HashSet, str::FromStr};
-
-use aws_sdk_s3::Client;
-use futures::stream::{StreamExt, TryStreamExt};
-use pageserver_api::shard::TenantShardId;
-use postgres_ffi::{XLogFileName, PG_TLI};
-use serde::Serialize;
-use tokio_postgres::types::PgLsn;
-use tracing::{error, info, trace};
-use utils::{
-    id::{TenantId, TenantTimelineId, TimelineId},
-    lsn::Lsn,
-};
-
-use crate::{
-    cloud_admin_api::CloudAdminApiClient, init_remote, metadata_stream::stream_listing,
-    BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId,
-};
-
-/// Generally we should ask safekeepers, but so far we use everywhere default 16MB.
-const WAL_SEGSIZE: usize = 16 * 1024 * 1024;
-
-#[derive(Serialize)]
-pub struct MetadataSummary {
-    timeline_count: usize,
-    with_errors: HashSet<TenantTimelineId>,
-    deleted_count: usize,
-}
-
-impl MetadataSummary {
-    fn new() -> Self {
-        Self {
-            timeline_count: 0,
-            with_errors: HashSet::new(),
-            deleted_count: 0,
-        }
-    }
-
-    pub fn summary_string(&self) -> String {
-        format!(
-            "timeline_count: {}, with_errors: {}",
-            self.timeline_count,
-            self.with_errors.len()
-        )
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.timeline_count == 0
-    }
-
-    pub fn is_fatal(&self) -> bool {
-        !self.with_errors.is_empty()
-    }
-}
-
-/// Scan the safekeeper metadata in an S3 bucket, reporting errors and
-/// statistics.
-///
-/// It works by listing timelines along with timeline_start_lsn and backup_lsn
-/// in debug dump in dump_db_table and verifying its s3 contents. If some WAL
-/// segments are missing, before complaining control plane is queried to check if
-/// the project wasn't deleted in the meanwhile.
-pub async fn scan_safekeeper_metadata(
-    bucket_config: BucketConfig,
-    tenant_ids: Vec<TenantId>,
-    dump_db_connstr: String,
-    dump_db_table: String,
-) -> anyhow::Result<MetadataSummary> {
-    info!(
-        "checking bucket {}, region {}, dump_db_table {}",
-        bucket_config.bucket, bucket_config.region, dump_db_table
-    );
-    // Use the native TLS implementation (Neon requires TLS)
-    let tls_connector =
-        postgres_native_tls::MakeTlsConnector::new(native_tls::TlsConnector::new().unwrap());
-    let (client, connection) = tokio_postgres::connect(&dump_db_connstr, tls_connector).await?;
-    // The connection object performs the actual communication with the database,
-    // so spawn it off to run on its own.
-    tokio::spawn(async move {
-        if let Err(e) = connection.await {
-            eprintln!("connection error: {}", e);
-        }
-    });
-
-    let tenant_filter_clause = if !tenant_ids.is_empty() {
-        format!(
-            "and tenant_id in ({})",
-            tenant_ids
-                .iter()
-                .map(|t| format!("'{}'", t))
-                .collect::<Vec<_>>()
-                .join(", ")
-        )
-    } else {
-        "".to_owned()
-    };
-    let query = format!(
-        "select tenant_id, timeline_id, min(timeline_start_lsn), max(backup_lsn) from \"{}\" where not is_cancelled {} group by tenant_id, timeline_id;",
-        dump_db_table, tenant_filter_clause,
-    );
-    info!("query is {}", query);
-    let timelines = client.query(&query, &[]).await?;
-    info!("loaded {} timelines", timelines.len());
-
-    let (s3_client, target) = init_remote(bucket_config, NodeKind::Safekeeper)?;
-    let console_config = ConsoleConfig::from_env()?;
-    let cloud_admin_api_client = CloudAdminApiClient::new(console_config);
-
-    let checks = futures::stream::iter(timelines.iter().map(Ok)).map_ok(|row| {
-        let tenant_id = TenantId::from_str(row.get(0)).expect("failed to parse tenant_id");
-        let timeline_id = TimelineId::from_str(row.get(1)).expect("failed to parse tenant_id");
-        let timeline_start_lsn_pg: PgLsn = row.get(2);
-        let timeline_start_lsn: Lsn = Lsn(u64::from(timeline_start_lsn_pg));
-        let backup_lsn_pg: PgLsn = row.get(3);
-        let backup_lsn: Lsn = Lsn(u64::from(backup_lsn_pg));
-        let ttid = TenantTimelineId::new(tenant_id, timeline_id);
-        check_timeline(
-            &s3_client,
-            &target,
-            &cloud_admin_api_client,
-            ttid,
-            timeline_start_lsn,
-            backup_lsn,
-        )
-    });
-    // Run multiple check_timeline's concurrently.
-    const CONCURRENCY: usize = 32;
-    let mut timelines = checks.try_buffered(CONCURRENCY);
-
-    let mut summary = MetadataSummary::new();
-    while let Some(r) = timelines.next().await {
-        let res = r?;
-        summary.timeline_count += 1;
-        if !res.is_ok {
-            summary.with_errors.insert(res.ttid);
-        }
-        if res.is_deleted {
-            summary.deleted_count += 1;
-        }
-    }
-
-    Ok(summary)
-}
-
-struct TimelineCheckResult {
-    ttid: TenantTimelineId,
-    is_ok: bool,
-    is_deleted: bool, // timeline is deleted in cplane
-}
-
-/// List s3 and check that is has all expected WAL for the ttid. Consistency
-/// errors are logged to stderr; returns Ok(true) if timeline is consistent,
-/// Ok(false) if not, Err if failed to check.
-async fn check_timeline(
-    s3_client: &Client,
-    root: &RootTarget,
-    api_client: &CloudAdminApiClient,
-    ttid: TenantTimelineId,
-    timeline_start_lsn: Lsn,
-    backup_lsn: Lsn,
-) -> anyhow::Result<TimelineCheckResult> {
-    trace!(
-        "checking ttid {}, should contain WAL [{}-{}]",
-        ttid,
-        timeline_start_lsn,
-        backup_lsn
-    );
-    // calculate expected segfiles
-    let expected_first_segno = timeline_start_lsn.segment_number(WAL_SEGSIZE);
-    let expected_last_segno = backup_lsn.segment_number(WAL_SEGSIZE);
-    let mut expected_segfiles: HashSet<String> = HashSet::from_iter(
-        (expected_first_segno..expected_last_segno)
-            .map(|segno| XLogFileName(PG_TLI, segno, WAL_SEGSIZE)),
-    );
-    let expected_files_num = expected_segfiles.len();
-    trace!("expecting {} files", expected_segfiles.len(),);
-
-    // now list s3 and check if it misses something
-    let ttshid =
-        TenantShardTimelineId::new(TenantShardId::unsharded(ttid.tenant_id), ttid.timeline_id);
-    let mut timeline_dir_target = root.timeline_root(&ttshid);
-    // stream_listing yields only common_prefixes if delimiter is not empty, but
-    // we need files, so unset it.
-    timeline_dir_target.delimiter = String::new();
-
-    let mut stream = std::pin::pin!(stream_listing(s3_client, &timeline_dir_target));
-    while let Some(obj) = stream.next().await {
-        let obj = obj?;
-        let key = obj.key();
-
-        let seg_name = key
-            .strip_prefix(&timeline_dir_target.prefix_in_bucket)
-            .expect("failed to extract segment name");
-        expected_segfiles.remove(seg_name);
-    }
-    if !expected_segfiles.is_empty() {
-        // Before complaining check cplane, probably timeline is already deleted.
-        let bdata = api_client
-            .find_timeline_branch(ttid.tenant_id, ttid.timeline_id)
-            .await?;
-        let deleted = match bdata {
-            Some(bdata) => bdata.deleted,
-            None => {
-                // note: should be careful with selecting proper cplane address
-                info!("ttid {} not found, assuming it is deleted", ttid);
-                true
-            }
-        };
-        if deleted {
-            // ok, branch is deleted
-            return Ok(TimelineCheckResult {
-                ttid,
-                is_ok: true,
-                is_deleted: true,
-            });
-        }
-        error!(
-            "ttid {}: missing {} files out of {}, timeline_start_lsn {}, wal_backup_lsn {}",
-            ttid,
-            expected_segfiles.len(),
-            expected_files_num,
-            timeline_start_lsn,
-            backup_lsn,
-        );
-        return Ok(TimelineCheckResult {
-            ttid,
-            is_ok: false,
-            is_deleted: false,
-        });
-    }
-    Ok(TimelineCheckResult {
-        ttid,
-        is_ok: true,
-        is_deleted: false,
-    })
-}
--- a/safekeeper/src/bin/safekeeper.rs
+++ b/safekeeper/src/bin/safekeeper.rs
@@ -177,10 +177,6 @@ struct Args {
    /// Controls how long backup will wait until uploading the partial segment.
    #[arg(long, value_parser = humantime::parse_duration, default_value = DEFAULT_PARTIAL_BACKUP_TIMEOUT, verbatim_doc_comment)]
    partial_backup_timeout: Duration,
-    /// Disable task to push messages to broker every second. Supposed to
-    /// be used in tests.
-    #[arg(long)]
-    disable_periodic_broker_push: bool,
 }

 // Like PathBufValueParser, but allows empty string.
@@ -313,7 +309,6 @@ async fn main() -> anyhow::Result<()> {
        walsenders_keep_horizon: args.walsenders_keep_horizon,
        partial_backup_enabled: args.partial_backup_enabled,
        partial_backup_timeout: args.partial_backup_timeout,
-        disable_periodic_broker_push: args.disable_periodic_broker_push,
    };

    // initialize sentry if SENTRY_DSN is provided
--- a/safekeeper/src/broker.rs
+++ b/safekeeper/src/broker.rs
@@ -10,20 +10,11 @@ use anyhow::Result;
 use storage_broker::parse_proto_ttid;

 use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey;
-use storage_broker::proto::FilterTenantTimelineId;
-use storage_broker::proto::MessageType;
-use storage_broker::proto::SafekeeperDiscoveryResponse;
-use storage_broker::proto::SubscribeByFilterRequest;
 use storage_broker::proto::SubscribeSafekeeperInfoRequest;
-use storage_broker::proto::TypeSubscription;
-use storage_broker::proto::TypedMessage;
 use storage_broker::Request;

-use std::sync::atomic::AtomicU64;
-use std::sync::Arc;
 use std::time::Duration;
 use std::time::Instant;
-use std::time::UNIX_EPOCH;
 use tokio::task::JoinHandle;
 use tokio::time::sleep;
 use tracing::*;
@@ -40,12 +31,6 @@ const PUSH_INTERVAL_MSEC: u64 = 1000;

 /// Push once in a while data about all active timelines to the broker.
 async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
-    if conf.disable_periodic_broker_push {
-        info!("broker push_loop is disabled, doing nothing...");
-        futures::future::pending::<()>().await; // sleep forever
-        return Ok(());
-    }
-
    let mut client =
        storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)?;
    let push_interval = Duration::from_millis(PUSH_INTERVAL_MSEC);
@@ -90,7 +75,7 @@ async fn push_loop(conf: SafeKeeperConf) -> anyhow::Result<()> {
 }

 /// Subscribe and fetch all the interesting data from the broker.
-async fn pull_loop(conf: SafeKeeperConf, stats: Arc<BrokerStats>) -> Result<()> {
+async fn pull_loop(conf: SafeKeeperConf) -> Result<()> {
    let mut client = storage_broker::connect(conf.broker_endpoint, conf.broker_keepalive_interval)?;

    // TODO: subscribe only to local timelines instead of all
@@ -109,8 +94,6 @@ async fn pull_loop(conf: SafeKeeperConf, stats: Arc<BrokerStats>) -> Result<()>
    let err_counter = BROKER_PULLED_UPDATES.with_label_values(&["error"]);

    while let Some(msg) = stream.message().await? {
-        stats.update_pulled();
-
        let proto_ttid = msg
            .tenant_timeline_id
            .as_ref()
@@ -136,93 +119,12 @@ async fn pull_loop(conf: SafeKeeperConf, stats: Arc<BrokerStats>) -> Result<()>
    bail!("end of stream");
 }

-/// Process incoming discover requests. This is done in a separate task to avoid
-/// interfering with the normal pull/push loops.
-async fn discover_loop(conf: SafeKeeperConf, stats: Arc<BrokerStats>) -> Result<()> {
-    let mut client =
-        storage_broker::connect(conf.broker_endpoint.clone(), conf.broker_keepalive_interval)?;
-
-    let request = SubscribeByFilterRequest {
-        types: vec![TypeSubscription {
-            r#type: MessageType::SafekeeperDiscoveryRequest as i32,
-        }],
-        tenant_timeline_id: Some(FilterTenantTimelineId {
-            enabled: false,
-            tenant_timeline_id: None,
-        }),
-    };
-
-    let mut stream = client
-        .subscribe_by_filter(request)
-        .await
-        .context("subscribe_by_filter request failed")?
-        .into_inner();
-
-    let discover_counter = BROKER_PULLED_UPDATES.with_label_values(&["discover"]);
-
-    while let Some(typed_msg) = stream.message().await? {
-        stats.update_pulled();
-
-        match typed_msg.r#type() {
-            MessageType::SafekeeperDiscoveryRequest => {
-                let msg = typed_msg
-                    .safekeeper_discovery_request
-                    .expect("proto type mismatch from broker message");
-
-                let proto_ttid = msg
-                    .tenant_timeline_id
-                    .as_ref()
-                    .ok_or_else(|| anyhow!("missing tenant_timeline_id"))?;
-                let ttid = parse_proto_ttid(proto_ttid)?;
-                if let Ok(tli) = GlobalTimelines::get(ttid) {
-                    // we received a discovery request for a timeline we know about
-                    discover_counter.inc();
-
-                    // create and reply with discovery response
-                    let sk_info = tli.get_safekeeper_info(&conf).await;
-                    let response = SafekeeperDiscoveryResponse {
-                        safekeeper_id: sk_info.safekeeper_id,
-                        tenant_timeline_id: sk_info.tenant_timeline_id,
-                        commit_lsn: sk_info.commit_lsn,
-                        safekeeper_connstr: sk_info.safekeeper_connstr,
-                        availability_zone: sk_info.availability_zone,
-                    };
-
-                    // note this is a blocking call
-                    client
-                        .publish_one(TypedMessage {
-                            r#type: MessageType::SafekeeperDiscoveryResponse as i32,
-                            safekeeper_timeline_info: None,
-                            safekeeper_discovery_request: None,
-                            safekeeper_discovery_response: Some(response),
-                        })
-                        .await?;
-                }
-            }
-
-            _ => {
-                warn!(
-                    "unexpected message type i32 {}, {:?}",
-                    typed_msg.r#type,
-                    typed_msg.r#type()
-                );
-            }
-        }
-    }
-    bail!("end of stream");
-}
-
 pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
    info!("started, broker endpoint {:?}", conf.broker_endpoint);

    let mut ticker = tokio::time::interval(Duration::from_millis(RETRY_INTERVAL_MSEC));
    let mut push_handle: Option<JoinHandle<Result<(), Error>>> = None;
    let mut pull_handle: Option<JoinHandle<Result<(), Error>>> = None;
-    let mut discover_handle: Option<JoinHandle<Result<(), Error>>> = None;
-
-    let stats = Arc::new(BrokerStats::new());
-    let stats_task = task_stats(stats.clone());
-    tokio::pin!(stats_task);

    // Selecting on JoinHandles requires some squats; is there a better way to
    // reap tasks individually?
@@ -251,77 +153,13 @@ pub async fn task_main(conf: SafeKeeperConf) -> anyhow::Result<()> {
                    };
                    pull_handle = None;
                },
-                res = async { discover_handle.as_mut().unwrap().await }, if discover_handle.is_some() => {
-                    // was it panic or normal error?
-                    match res {
-                        Ok(res_internal) => if let Err(err_inner) = res_internal {
-                            warn!("discover task failed: {:?}", err_inner);
-                        }
-                        Err(err_outer) => { warn!("discover task panicked: {:?}", err_outer) }
-                    };
-                    discover_handle = None;
-                },
                _ = ticker.tick() => {
                    if push_handle.is_none() {
                        push_handle = Some(tokio::spawn(push_loop(conf.clone())));
                    }
                    if pull_handle.is_none() {
-                        pull_handle = Some(tokio::spawn(pull_loop(conf.clone(), stats.clone())));
+                        pull_handle = Some(tokio::spawn(pull_loop(conf.clone())));
                    }
-                    if discover_handle.is_none() {
-                        discover_handle = Some(tokio::spawn(discover_loop(conf.clone(), stats.clone())));
-                    }
-                },
-                _ = &mut stats_task => {}
-        }
-    }
-}
-
-struct BrokerStats {
-    /// Timestamp of the last received message from the broker.
-    last_pulled_ts: AtomicU64,
-}
-
-impl BrokerStats {
-    fn new() -> Self {
-        BrokerStats {
-            last_pulled_ts: AtomicU64::new(0),
-        }
-    }
-
-    fn now_millis() -> u64 {
-        std::time::SystemTime::now()
-            .duration_since(UNIX_EPOCH)
-            .expect("time is before epoch")
-            .as_millis() as u64
-    }
-
-    /// Update last_pulled timestamp to current time.
-    fn update_pulled(&self) {
-        self.last_pulled_ts
-            .store(Self::now_millis(), std::sync::atomic::Ordering::Relaxed);
-    }
-}
-
-/// Periodically write to logs if there are issues with receiving data from the broker.
-async fn task_stats(stats: Arc<BrokerStats>) {
-    let warn_duration = Duration::from_secs(10);
-    let mut ticker = tokio::time::interval(warn_duration);
-
-    loop {
-        tokio::select! {
-            _ = ticker.tick() => {
-                let last_pulled = stats.last_pulled_ts.load(std::sync::atomic::Ordering::SeqCst);
-                if last_pulled == 0 {
-                    // no broker updates yet
-                    continue;
-                }
-
-                let now = BrokerStats::now_millis();
-                if now > last_pulled && now - last_pulled > warn_duration.as_millis() as u64 {
-                    let ts = chrono::NaiveDateTime::from_timestamp_millis(last_pulled as i64).expect("invalid timestamp");
-                    info!("no broker updates for some time, last update: {:?}", ts);
-                }
            }
        }
    }
--- a/safekeeper/src/lib.rs
+++ b/safekeeper/src/lib.rs
@@ -83,7 +83,6 @@ pub struct SafeKeeperConf {
    pub walsenders_keep_horizon: bool,
    pub partial_backup_enabled: bool,
    pub partial_backup_timeout: Duration,
-    pub disable_periodic_broker_push: bool,
 }

 impl SafeKeeperConf {
@@ -130,7 +129,6 @@ impl SafeKeeperConf {
            walsenders_keep_horizon: false,
            partial_backup_enabled: false,
            partial_backup_timeout: Duration::from_secs(0),
-            disable_periodic_broker_push: false,
        }
    }
 }
--- a/safekeeper/src/safekeeper.rs
+++ b/safekeeper/src/safekeeper.rs
@@ -725,18 +725,6 @@ where
            self.state.inmem.commit_lsn
        );

-        // Before first WAL write initialize its segment. It makes first segment
-        // pg_waldump'able because stream from compute doesn't include its
-        // segment and page headers.
-        //
-        // If we fail before first WAL write flush this action would be
-        // repeated, that's ok because it is idempotent.
-        if self.wal_store.flush_lsn() == Lsn::INVALID {
-            self.wal_store
-                .initialize_first_segment(msg.start_streaming_at)
-                .await?;
-        }
-
        // TODO: cross check divergence point, check if msg.start_streaming_at corresponds to
        // intersection of our history and history from msg

@@ -1019,10 +1007,6 @@ mod tests {
            self.lsn
        }

-        async fn initialize_first_segment(&mut self, _init_lsn: Lsn) -> Result<()> {
-            Ok(())
-        }
-
        async fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()> {
            self.lsn = startpos + buf.len() as u64;
            Ok(())
--- a/safekeeper/src/wal_storage.rs
+++ b/safekeeper/src/wal_storage.rs
@@ -38,12 +38,6 @@ pub trait Storage {
    /// LSN of last durably stored WAL record.
    fn flush_lsn(&self) -> Lsn;

-    /// Initialize segment by creating proper long header at the beginning of
-    /// the segment and short header at the page of given LSN. This is only used
-    /// for timeline initialization because compute will stream data only since
-    /// init_lsn. Other segment headers are included in compute stream.
-    async fn initialize_first_segment(&mut self, init_lsn: Lsn) -> Result<()>;
-
    /// Write piece of WAL from buf to disk, but not necessarily sync it.
    async fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()>;

@@ -84,8 +78,6 @@ pub struct PhysicalStorage {

    /// Size of WAL segment in bytes.
    wal_seg_size: usize,
-    pg_version: u32,
-    system_id: u64,

    /// Written to disk, but possibly still in the cache and not fully persisted.
    /// Also can be ahead of record_lsn, if happen to be in the middle of a WAL record.
@@ -177,8 +169,6 @@ impl PhysicalStorage {
            timeline_dir,
            conf: conf.clone(),
            wal_seg_size,
-            pg_version: state.server.pg_version,
-            system_id: state.server.system_id,
            write_lsn,
            write_record_lsn: write_lsn,
            flush_record_lsn: flush_lsn,
@@ -334,20 +324,6 @@ impl Storage for PhysicalStorage {
        self.flush_record_lsn
    }

-    async fn initialize_first_segment(&mut self, init_lsn: Lsn) -> Result<()> {
-        let segno = init_lsn.segment_number(self.wal_seg_size);
-        let (mut file, _) = self.open_or_create(segno).await?;
-        let major_pg_version = self.pg_version / 10000;
-        let wal_seg =
-            postgres_ffi::generate_wal_segment(segno, self.system_id, major_pg_version, init_lsn)?;
-        file.seek(SeekFrom::Start(0)).await?;
-        file.write_all(&wal_seg).await?;
-        file.flush().await?;
-        info!("initialized segno {} at lsn {}", segno, init_lsn);
-        // note: file is *not* fsynced
-        Ok(())
-    }
-
    /// Write WAL to disk.
    async fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()> {
        // Disallow any non-sequential writes, which can result in gaps or overwrites.
--- a/safekeeper/tests/walproposer_sim/safekeeper.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper.rs
@@ -178,7 +178,6 @@ pub fn run_server(os: NodeOs, disk: Arc<SafekeeperDisk>) -> Result<()> {
        walsenders_keep_horizon: false,
        partial_backup_enabled: false,
        partial_backup_timeout: Duration::from_secs(0),
-        disable_periodic_broker_push: false,
    };

    let mut global = GlobalMap::new(disk, conf.clone())?;
--- a/safekeeper/tests/walproposer_sim/safekeeper_disk.rs
+++ b/safekeeper/tests/walproposer_sim/safekeeper_disk.rs
@@ -182,10 +182,6 @@ impl wal_storage::Storage for DiskWALStorage {
        self.flush_record_lsn
    }

-    async fn initialize_first_segment(&mut self, _init_lsn: Lsn) -> Result<()> {
-        Ok(())
-    }
-
    /// Write piece of WAL from buf to disk, but not necessarily sync it.
    async fn write_wal(&mut self, startpos: Lsn, buf: &[u8]) -> Result<()> {
        if self.write_lsn != startpos {
--- a/storage_broker/src/bin/storage_broker.rs
+++ b/storage_broker/src/bin/storage_broker.rs
@@ -196,13 +196,8 @@ impl SubscriptionKey {

    /// Parse from FilterTenantTimelineId
    pub fn from_proto_filter_tenant_timeline_id(
-        opt: Option<&FilterTenantTimelineId>,
+        f: &FilterTenantTimelineId,
    ) -> Result<Self, Status> {
-        if opt.is_none() {
-            return Ok(SubscriptionKey::All);
-        }
-
-        let f = opt.unwrap();
        if !f.enabled {
            return Ok(SubscriptionKey::All);
        }
@@ -539,7 +534,10 @@ impl BrokerService for Broker {
            .remote_addr()
            .expect("TCPConnectInfo inserted by handler");
        let proto_filter = request.into_inner();
-        let ttid_filter = proto_filter.tenant_timeline_id.as_ref();
+        let ttid_filter = proto_filter
+            .tenant_timeline_id
+            .as_ref()
+            .ok_or_else(|| Status::new(Code::InvalidArgument, "missing tenant_timeline_id"))?;

        let sub_key = SubscriptionKey::from_proto_filter_tenant_timeline_id(ttid_filter)?;
        let types_set = proto_filter
--- a/storage_controller/Cargo.toml
+++ b/storage_controller/Cargo.toml
@@ -31,7 +31,7 @@ once_cell.workspace = true
 pageserver_api.workspace = true
 pageserver_client.workspace = true
 postgres_connection.workspace = true
-reqwest = { workspace = true, features = ["stream"] }
+reqwest.workspace = true
 routerify.workspace = true
 serde.workspace = true
 serde_json.workspace = true
@@ -40,8 +40,6 @@ tokio.workspace = true
 tokio-util.workspace = true
 tracing.workspace = true
 measured.workspace = true
-strum.workspace = true
-strum_macros.workspace = true

 diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] }
 diesel_migrations = { version = "2.1.0" }
--- a/storage_controller/src/compute_hook.rs
+++ b/storage_controller/src/compute_hook.rs
@@ -4,7 +4,7 @@ use std::{collections::HashMap, time::Duration};
 use control_plane::endpoint::{ComputeControlPlane, EndpointStatus};
 use control_plane::local_env::LocalEnv;
 use futures::StreamExt;
-use hyper::StatusCode;
+use hyper::{Method, StatusCode};
 use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShardId};
 use postgres_connection::parse_host_port;
 use serde::{Deserialize, Serialize};
@@ -328,7 +328,7 @@ impl ComputeHook {
        reconfigure_request: &ComputeHookNotifyRequest,
        cancel: &CancellationToken,
    ) -> Result<(), NotifyError> {
-        let req = self.client.request(reqwest::Method::PUT, url);
+        let req = self.client.request(Method::PUT, url);
        let req = if let Some(value) = &self.authorization_header {
            req.header(reqwest::header::AUTHORIZATION, value)
        } else {
@@ -347,10 +347,8 @@ impl ComputeHook {
        };

        // Treat all 2xx responses as success
-        if response.status() >= reqwest::StatusCode::OK
-            && response.status() < reqwest::StatusCode::MULTIPLE_CHOICES
-        {
-            if response.status() != reqwest::StatusCode::OK {
+        if response.status() >= StatusCode::OK && response.status() < StatusCode::MULTIPLE_CHOICES {
+            if response.status() != StatusCode::OK {
                // Non-200 2xx response: it doesn't make sense to retry, but this is unexpected, so
                // log a warning.
                tracing::warn!(
@@ -364,7 +362,7 @@ impl ComputeHook {

        // Error response codes
        match response.status() {
-            reqwest::StatusCode::TOO_MANY_REQUESTS => {
+            StatusCode::TOO_MANY_REQUESTS => {
                // TODO: 429 handling should be global: set some state visible to other requests
                // so that they will delay before starting, rather than all notifications trying
                // once before backing off.
@@ -373,30 +371,20 @@ impl ComputeHook {
                    .ok();
                Err(NotifyError::SlowDown)
            }
-            reqwest::StatusCode::LOCKED => {
+            StatusCode::LOCKED => {
                // We consider this fatal, because it's possible that the operation blocking the control one is
                // also the one that is waiting for this reconcile.  We should let the reconciler calling
                // this hook fail, to give control plane a chance to un-lock.
                tracing::info!("Control plane reports tenant is locked, dropping out of notify");
                Err(NotifyError::Busy)
            }
-            reqwest::StatusCode::SERVICE_UNAVAILABLE => {
-                Err(NotifyError::Unavailable(StatusCode::SERVICE_UNAVAILABLE))
+            StatusCode::SERVICE_UNAVAILABLE
+            | StatusCode::GATEWAY_TIMEOUT
+            | StatusCode::BAD_GATEWAY => Err(NotifyError::Unavailable(response.status())),
+            StatusCode::BAD_REQUEST | StatusCode::UNAUTHORIZED | StatusCode::FORBIDDEN => {
+                Err(NotifyError::Fatal(response.status()))
            }
-            reqwest::StatusCode::GATEWAY_TIMEOUT => {
-                Err(NotifyError::Unavailable(StatusCode::GATEWAY_TIMEOUT))
-            }
-            reqwest::StatusCode::BAD_GATEWAY => {
-                Err(NotifyError::Unavailable(StatusCode::BAD_GATEWAY))
-            }
-
-            reqwest::StatusCode::BAD_REQUEST => Err(NotifyError::Fatal(StatusCode::BAD_REQUEST)),
-            reqwest::StatusCode::UNAUTHORIZED => Err(NotifyError::Fatal(StatusCode::UNAUTHORIZED)),
-            reqwest::StatusCode::FORBIDDEN => Err(NotifyError::Fatal(StatusCode::FORBIDDEN)),
-            status => Err(NotifyError::Unexpected(
-                hyper::StatusCode::from_u16(status.as_u16())
-                    .unwrap_or(StatusCode::INTERNAL_SERVER_ERROR),
-            )),
+            _ => Err(NotifyError::Unexpected(response.status())),
        }
    }

--- a/Show More
+++ b/Show More