fix vendor/revisions.json

Bump vendor/postrges
Add test_pg_waldump.py
2026-01-27 15:20:38 +00:00 · 2024-05-01 19:50:14 +01:00 · 2024-05-01 19:50:14 +01:00 · 2024-05-01 19:50:14 +01:00
168 changed files with 1952 additions and 6828 deletions
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -236,6 +236,27 @@ jobs:
          submodules: true
          fetch-depth: 1

+      - name: Check Postgres submodules revision
+        shell: bash -euo pipefail {0}
+        run: |
+          # This is a temporary solution to ensure that the Postgres submodules revision is correct (i.e. the updated intentionally).
+          # Eventually it will be replaced by a regression test https://github.com/neondatabase/neon/pull/4603
+
+          FAILED=false
+          for postgres in postgres-v14 postgres-v15 postgres-v16; do
+            expected=$(cat vendor/revisions.json | jq --raw-output '."'"${postgres}"'"')
+            actual=$(git rev-parse "HEAD:vendor/${postgres}")
+            if [ "${expected}" != "${actual}" ]; then
+              echo >&2 "Expected ${postgres} rev to be at '${expected}', but it is at '${actual}'"
+              FAILED=true
+            fi
+          done
+
+          if [ "${FAILED}" = "true" ]; then
+            echo >&2 "Please update vendor/revisions.json if these changes are intentional"
+            exit 1
+          fi
+
      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -595,7 +595,7 @@ dependencies = [
 "http 0.2.9",
 "http-body 0.4.5",
 "hyper 0.14.26",
- "hyper-rustls 0.24.0",
+ "hyper-rustls",
 "once_cell",
 "pin-project-lite",
 "pin-utils",
@@ -684,7 +684,7 @@ dependencies = [
 "http-body 0.4.5",
 "hyper 0.14.26",
 "itoa",
- "matchit 0.7.0",
+ "matchit",
 "memchr",
 "mime",
 "percent-encoding",
@@ -740,7 +740,7 @@ dependencies = [
 "pin-project",
 "quick-xml",
 "rand 0.8.5",
- "reqwest 0.11.19",
+ "reqwest",
 "rustc_version",
 "serde",
 "serde_json",
@@ -865,12 +865,6 @@ version = "0.21.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105"

-[[package]]
-name = "base64"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
-
 [[package]]
 name = "base64-simd"
 version = "0.8.0"
@@ -1216,7 +1210,7 @@ dependencies = [
 "postgres",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "rust-ini",
 "serde",
 "serde_json",
@@ -1335,7 +1329,7 @@ dependencies = [
 "postgres_backend",
 "postgres_connection",
 "regex",
- "reqwest 0.12.4",
+ "reqwest",
 "safekeeper_api",
 "scopeguard",
 "serde",
@@ -1348,7 +1342,6 @@ dependencies = [
 "tokio-postgres",
 "tokio-util",
 "toml",
- "toml_edit",
 "tracing",
 "url",
 "utils",
@@ -2370,17 +2363,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "hostname"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba"
-dependencies = [
- "cfg-if",
- "libc",
- "windows 0.52.0",
-]
-
 [[package]]
 name = "http"
 version = "0.2.9"
@@ -2527,7 +2509,6 @@ dependencies = [
 "pin-project-lite",
 "smallvec",
 "tokio",
- "want",
 ]

 [[package]]
@@ -2545,23 +2526,6 @@ dependencies = [
 "tokio-rustls 0.24.0",
 ]

-[[package]]
-name = "hyper-rustls"
-version = "0.26.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
-dependencies = [
- "futures-util",
- "http 1.1.0",
- "hyper 1.2.0",
- "hyper-util",
- "rustls 0.22.4",
- "rustls-pki-types",
- "tokio",
- "tokio-rustls 0.25.0",
- "tower-service",
-]
-
 [[package]]
 name = "hyper-timeout"
 version = "0.4.1"
@@ -2609,7 +2573,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
 dependencies = [
 "bytes",
- "futures-channel",
 "futures-util",
 "http 1.1.0",
 "http-body 1.0.0",
@@ -2617,9 +2580,6 @@ dependencies = [
 "pin-project-lite",
 "socket2 0.5.5",
 "tokio",
- "tower",
- "tower-service",
- "tracing",
 ]

 [[package]]
@@ -2633,7 +2593,7 @@ dependencies = [
 "iana-time-zone-haiku",
 "js-sys",
 "wasm-bindgen",
- "windows 0.48.0",
+ "windows",
 ]

 [[package]]
@@ -2956,12 +2916,6 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b87248edafb776e59e6ee64a79086f65890d3510f2c656c000bf2a7e8a0aea40"

-[[package]]
-name = "matchit"
-version = "0.8.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "540f1c43aed89909c0cc0cc604e3bb2f7e7a341a3728a9e6cfe760e733cd11ed"
-
 [[package]]
 name = "md-5"
 version = "0.10.5"
@@ -3095,6 +3049,16 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"

+[[package]]
+name = "mime_guess"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef"
+dependencies = [
+ "mime",
+ "unicase",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -3438,7 +3402,7 @@ dependencies = [
 "bytes",
 "http 0.2.9",
 "opentelemetry_api",
- "reqwest 0.11.19",
+ "reqwest",
 ]

 [[package]]
@@ -3456,7 +3420,7 @@ dependencies = [
 "opentelemetry_api",
 "opentelemetry_sdk",
 "prost",
- "reqwest 0.11.19",
+ "reqwest",
 "thiserror",
 "tokio",
 "tonic",
@@ -3685,7 +3649,7 @@ dependencies = [
 "rand 0.8.5",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "rpds",
 "scopeguard",
 "serde",
@@ -3755,7 +3719,7 @@ dependencies = [
 "futures",
 "pageserver_api",
 "postgres",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "thiserror",
 "tokio",
@@ -4364,7 +4328,7 @@ dependencies = [
 "hashlink",
 "hex",
 "hmac",
- "hostname 0.3.1",
+ "hostname",
 "http 1.1.0",
 "http-body-util",
 "humantime",
@@ -4372,7 +4336,6 @@ dependencies = [
 "hyper 1.2.0",
 "hyper-tungstenite",
 "hyper-util",
- "indexmap 2.0.1",
 "ipnet",
 "itertools",
 "lasso",
@@ -4398,7 +4361,7 @@ dependencies = [
 "redis",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
 "reqwest-retry",
 "reqwest-tracing",
@@ -4425,7 +4388,6 @@ dependencies = [
 "tokio-postgres-rustls",
 "tokio-rustls 0.25.0",
 "tokio-util",
- "tower-service",
 "tracing",
 "tracing-opentelemetry",
 "tracing-subscriber",
@@ -4716,7 +4678,6 @@ dependencies = [
 "scopeguard",
 "serde",
 "serde_json",
- "sync_wrapper",
 "test-context",
 "tokio",
 "tokio-stream",
@@ -4742,106 +4703,69 @@ dependencies = [
 "http 0.2.9",
 "http-body 0.4.5",
 "hyper 0.14.26",
+ "hyper-rustls",
 "hyper-tls",
 "ipnet",
 "js-sys",
 "log",
 "mime",
+ "mime_guess",
 "native-tls",
 "once_cell",
 "percent-encoding",
 "pin-project-lite",
+ "rustls 0.21.11",
+ "rustls-pemfile 1.0.2",
 "serde",
 "serde_json",
 "serde_urlencoded",
 "tokio",
 "tokio-native-tls",
+ "tokio-rustls 0.24.0",
 "tokio-util",
 "tower-service",
 "url",
 "wasm-bindgen",
 "wasm-bindgen-futures",
- "wasm-streams 0.3.0",
+ "wasm-streams",
 "web-sys",
- "winreg 0.50.0",
-]
-
-[[package]]
-name = "reqwest"
-version = "0.12.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10"
-dependencies = [
- "base64 0.22.1",
- "bytes",
- "futures-channel",
- "futures-core",
- "futures-util",
- "http 1.1.0",
- "http-body 1.0.0",
- "http-body-util",
- "hyper 1.2.0",
- "hyper-rustls 0.26.0",
- "hyper-util",
- "ipnet",
- "js-sys",
- "log",
- "mime",
- "once_cell",
- "percent-encoding",
- "pin-project-lite",
- "rustls 0.22.4",
- "rustls-pemfile 2.1.1",
- "rustls-pki-types",
- "serde",
- "serde_json",
- "serde_urlencoded",
- "sync_wrapper",
- "tokio",
- "tokio-rustls 0.25.0",
- "tokio-util",
- "tower-service",
- "url",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "wasm-streams 0.4.0",
- "web-sys",
- "webpki-roots 0.26.1",
- "winreg 0.52.0",
+ "webpki-roots 0.25.2",
+ "winreg",
 ]

 [[package]]
 name = "reqwest-middleware"
-version = "0.3.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0209efb52486ad88136190094ee214759ef7507068b27992256ed6610eb71a01"
+checksum = "4531c89d50effe1fac90d095c8b133c20c5c714204feee0bfc3fd158e784209d"
 dependencies = [
 "anyhow",
 "async-trait",
- "http 1.1.0",
- "reqwest 0.12.4",
+ "http 0.2.9",
+ "reqwest",
 "serde",
+ "task-local-extensions",
 "thiserror",
- "tower-service",
 ]

 [[package]]
 name = "reqwest-retry"
-version = "0.5.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40f342894422862af74c50e1e9601cf0931accc9c6981e5eb413c46603b616b5"
+checksum = "48d0fd6ef4c6d23790399fe15efc8d12cd9f3d4133958f9bd7801ee5cbaec6c4"
 dependencies = [
 "anyhow",
 "async-trait",
 "chrono",
 "futures",
 "getrandom 0.2.11",
- "http 1.1.0",
- "hyper 1.2.0",
+ "http 0.2.9",
+ "hyper 0.14.26",
 "parking_lot 0.11.2",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
 "retry-policies",
+ "task-local-extensions",
 "tokio",
 "tracing",
 "wasm-timer",
@@ -4849,27 +4773,27 @@ dependencies = [

 [[package]]
 name = "reqwest-tracing"
-version = "0.5.0"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b253954a1979e02eabccd7e9c3d61d8f86576108baa160775e7f160bb4e800a3"
+checksum = "5a0152176687dd5cfe7f507ac1cb1a491c679cfe483afd133a7db7aaea818bb3"
 dependencies = [
 "anyhow",
 "async-trait",
 "getrandom 0.2.11",
- "http 1.1.0",
- "matchit 0.8.2",
+ "matchit",
 "opentelemetry",
- "reqwest 0.12.4",
+ "reqwest",
 "reqwest-middleware",
+ "task-local-extensions",
 "tracing",
 "tracing-opentelemetry",
 ]

 [[package]]
 name = "retry-policies"
-version = "0.3.0"
+version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "493b4243e32d6eedd29f9a398896e35c6943a123b55eec97dcaee98310d25810"
+checksum = "e09bbcb5003282bcb688f0bae741b278e9c7e8f378f561522c9806c58e075d9b"
 dependencies = [
 "anyhow",
 "chrono",
@@ -5195,7 +5119,7 @@ dependencies = [
 "postgres_ffi",
 "rand 0.8.5",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "serde_json",
 "serde_with",
@@ -5246,7 +5170,7 @@ dependencies = [
 "rand 0.8.5",
 "regex",
 "remote_storage",
- "reqwest 0.12.4",
+ "reqwest",
 "safekeeper_api",
 "scopeguard",
 "sd-notify",
@@ -5376,12 +5300,12 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"

 [[package]]
 name = "sentry"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00421ed8fa0c995f07cde48ba6c89e80f2b312f74ff637326f392fbfd23abe02"
+checksum = "2e95efd0cefa32028cdb9766c96de71d96671072f9fb494dc9fb84c0ef93e52b"
 dependencies = [
 "httpdate",
- "reqwest 0.12.4",
+ "reqwest",
 "rustls 0.21.11",
 "sentry-backtrace",
 "sentry-contexts",
@@ -5395,9 +5319,9 @@ dependencies = [

 [[package]]
 name = "sentry-backtrace"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a79194074f34b0cbe5dd33896e5928bbc6ab63a889bd9df2264af5acb186921e"
+checksum = "6ac2bac6f310c4c4c4bb094d1541d32ae497f8c5c23405e85492cefdfe0971a9"
 dependencies = [
 "backtrace",
 "once_cell",
@@ -5407,11 +5331,11 @@ dependencies = [

 [[package]]
 name = "sentry-contexts"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eba8870c5dba2bfd9db25c75574a11429f6b95957b0a78ac02e2970dd7a5249a"
+checksum = "6c3e17295cecdbacf66c5bd38d6e1147e09e1e9d824d2d5341f76638eda02a3a"
 dependencies = [
- "hostname 0.4.0",
+ "hostname",
 "libc",
 "os_info",
 "rustc_version",
@@ -5421,9 +5345,9 @@ dependencies = [

 [[package]]
 name = "sentry-core"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46a75011ea1c0d5c46e9e57df03ce81f5c7f0a9e199086334a1f9c0a541e0826"
+checksum = "8339474f587f36cb110fa1ed1b64229eea6d47b0b886375579297b7e47aeb055"
 dependencies = [
 "once_cell",
 "rand 0.8.5",
@@ -5434,9 +5358,9 @@ dependencies = [

 [[package]]
 name = "sentry-panic"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2eaa3ecfa3c8750c78dcfd4637cfa2598b95b52897ed184b4dc77fcf7d95060d"
+checksum = "875b69f506da75bd664029eafb05f8934297d2990192896d17325f066bd665b7"
 dependencies = [
 "sentry-backtrace",
 "sentry-core",
@@ -5444,9 +5368,9 @@ dependencies = [

 [[package]]
 name = "sentry-tracing"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f715932bf369a61b7256687c6f0554141b7ce097287e30e3f7ed6e9de82498fe"
+checksum = "89feead9bdd116f8035e89567651340fc382db29240b6c55ef412078b08d1aa3"
 dependencies = [
 "sentry-backtrace",
 "sentry-core",
@@ -5456,13 +5380,13 @@ dependencies = [

 [[package]]
 name = "sentry-types"
-version = "0.32.3"
+version = "0.31.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4519c900ce734f7a0eb7aba0869dfb225a7af8820634a7dd51449e3b093cfb7c"
+checksum = "99dc599bd6646884fc403d593cdcb9816dd67c50cff3271c01ff123617908dcd"
 dependencies = [
 "debugid",
+ "getrandom 0.2.11",
 "hex",
- "rand 0.8.5",
 "serde",
 "serde_json",
 "thiserror",
@@ -5854,12 +5778,10 @@ dependencies = [
 "pageserver_client",
 "postgres_connection",
 "r2d2",
- "reqwest 0.12.4",
+ "reqwest",
 "routerify",
 "serde",
 "serde_json",
- "strum",
- "strum_macros",
 "thiserror",
 "tokio",
 "tokio-util",
@@ -5878,7 +5800,7 @@ dependencies = [
 "hyper 0.14.26",
 "pageserver_api",
 "pageserver_client",
- "reqwest 0.12.4",
+ "reqwest",
 "serde",
 "serde_json",
 "thiserror",
@@ -5932,7 +5854,7 @@ checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc"
 [[package]]
 name = "svg_fmt"
 version = "0.4.2"
-source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#c1820b28664b5df68de7f043fccf2ed5d67b6ae8"
+source = "git+https://github.com/neondatabase/fork--nical--rust_debug?branch=neon#b9501105e746629004bc6d0473639320939dbe10"

 [[package]]
 name = "syn"
@@ -5961,9 +5883,6 @@ name = "sync_wrapper"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
-dependencies = [
- "futures-core",
-]

 [[package]]
 name = "synstructure"
@@ -6516,11 +6435,10 @@ dependencies = [

 [[package]]
 name = "tracing"
-version = "0.1.37"
+version = "0.1.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8"
+checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef"
 dependencies = [
- "cfg-if",
 "log",
 "pin-project-lite",
 "tracing-attributes",
@@ -6540,9 +6458,9 @@ dependencies = [

 [[package]]
 name = "tracing-attributes"
-version = "0.1.24"
+version = "0.1.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f57e3ca2a01450b1a921183a9c9cbfda207fd822cef4ccb00a65402cbba7a74"
+checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6551,9 +6469,9 @@ dependencies = [

 [[package]]
 name = "tracing-core"
-version = "0.1.31"
+version = "0.1.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a"
+checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
 dependencies = [
 "once_cell",
 "valuable",
@@ -6582,14 +6500,12 @@ dependencies = [

 [[package]]
 name = "tracing-opentelemetry"
-version = "0.21.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "75327c6b667828ddc28f5e3f169036cb793c3f588d83bf0f262a7f062ffed3c8"
+checksum = "fc09e402904a5261e42cf27aea09ccb7d5318c6717a9eec3d8e2e65c56b18f19"
 dependencies = [
 "once_cell",
 "opentelemetry",
- "opentelemetry_sdk",
- "smallvec",
 "tracing",
 "tracing-core",
 "tracing-log",
@@ -6635,7 +6551,7 @@ dependencies = [
 "opentelemetry",
 "opentelemetry-otlp",
 "opentelemetry-semantic-conventions",
- "reqwest 0.12.4",
+ "reqwest",
 "tokio",
 "tracing",
 "tracing-opentelemetry",
@@ -6721,6 +6637,15 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "unicase"
+version = "2.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
+dependencies = [
+ "version_check",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.13"
@@ -7079,19 +7004,6 @@ dependencies = [
 "web-sys",
 ]

-[[package]]
-name = "wasm-streams"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129"
-dependencies = [
- "futures-util",
- "js-sys",
- "wasm-bindgen",
- "wasm-bindgen-futures",
- "web-sys",
-]
-
 [[package]]
 name = "wasm-timer"
 version = "0.2.5"
@@ -7132,15 +7044,6 @@ version = "0.25.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc"

-[[package]]
-name = "webpki-roots"
-version = "0.26.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009"
-dependencies = [
- "rustls-pki-types",
-]
-
 [[package]]
 name = "which"
 version = "4.4.0"
@@ -7192,25 +7095,6 @@ dependencies = [
 "windows-targets 0.48.0",
 ]

-[[package]]
-name = "windows"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
-dependencies = [
- "windows-core",
- "windows-targets 0.52.4",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
-dependencies = [
- "windows-targets 0.52.4",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.42.0"
@@ -7443,16 +7327,6 @@ dependencies = [
 "windows-sys 0.48.0",
 ]

-[[package]]
-name = "winreg"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5"
-dependencies = [
- "cfg-if",
- "windows-sys 0.48.0",
-]
-
 [[package]]
 name = "workspace_hack"
 version = "0.1.0"
@@ -7502,8 +7376,7 @@ dependencies = [
 "regex",
 "regex-automata 0.4.3",
 "regex-syntax 0.8.2",
- "reqwest 0.11.19",
- "reqwest 0.12.4",
+ "reqwest",
 "rustls 0.21.11",
 "scopeguard",
 "serde",
@@ -7513,7 +7386,6 @@ dependencies = [
 "subtle",
 "syn 1.0.109",
 "syn 2.0.52",
- "sync_wrapper",
 "time",
 "time-macros",
 "tokio",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -99,7 +99,6 @@ humantime = "2.1"
 humantime-serde = "1.1.1"
 hyper = "0.14"
 hyper-tungstenite = "0.13.0"
-indexmap = "2"
 inotify = "0.10.2"
 ipnet = "2.9.0"
 itertools = "0.10"
@@ -131,10 +130,10 @@ prost = "0.11"
 rand = "0.8"
 redis = { version = "0.25.2", features = ["tokio-rustls-comp", "keep-alive"] }
 regex = "1.10.2"
-reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
-reqwest-tracing = { version = "0.5", features = ["opentelemetry_0_20"] }
-reqwest-middleware = "0.3.0"
-reqwest-retry = "0.5"
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
+reqwest-tracing = { version = "0.4.7", features = ["opentelemetry_0_20"] }
+reqwest-middleware = "0.2.0"
+reqwest-retry = "0.2.2"
 routerify = "3"
 rpds = "0.13"
 rustc-hash = "1.1.0"
@@ -144,7 +143,7 @@ rustls-split = "0.3"
 scopeguard = "1.1"
 sysinfo = "0.29.2"
 sd-notify = "0.4.1"
-sentry = { version = "0.32", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
+sentry = { version = "0.31", default-features = false, features = ["backtrace", "contexts", "panic", "rustls", "reqwest" ] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1"
 serde_path_to_error = "0.1"
@@ -178,10 +177,9 @@ tokio-util = { version = "0.7.10", features = ["io", "rt"] }
 toml = "0.7"
 toml_edit = "0.19"
 tonic = {version = "0.9", features = ["tls", "tls-roots"]}
-tower-service = "0.3.2"
 tracing = "0.1"
 tracing-error = "0.2.0"
-tracing-opentelemetry = "0.21.0"
+tracing-opentelemetry = "0.20.0"
 tracing-subscriber = { version = "0.3", default_features = false, features = ["smallvec", "fmt", "tracing-log", "std", "env-filter", "json", "ansi"] }
 twox-hash = { version = "1.6.3", default-features = false }
 url = "2.2"
--- a/Dockerfile.build-tools
+++ b/Dockerfile.build-tools
@@ -65,7 +65,7 @@ RUN curl -sL "https://github.com/peak/s5cmd/releases/download/v${S5CMD_VERSION}/
    && mv s5cmd /usr/local/bin/s5cmd

 # LLVM
-ENV LLVM_VERSION=18
+ENV LLVM_VERSION=17
 RUN curl -fsSL 'https://apt.llvm.org/llvm-snapshot.gpg.key' | apt-key add - \
    && echo "deb http://apt.llvm.org/bullseye/ llvm-toolchain-bullseye-${LLVM_VERSION} main" > /etc/apt/sources.list.d/llvm.stable.list \
    && apt update \
@@ -141,7 +141,7 @@ WORKDIR /home/nonroot

 # Rust
 # Please keep the version of llvm (installed above) in sync with rust llvm (`rustc --version --verbose | grep LLVM`)
-ENV RUSTC_VERSION=1.78.0
+ENV RUSTC_VERSION=1.77.0
 ENV RUSTUP_HOME="/home/nonroot/.rustup"
 ENV PATH="/home/nonroot/.cargo/bin:${PATH}"
 RUN curl -sSO https://static.rust-lang.org/rustup/dist/$(uname -m)-unknown-linux-gnu/rustup-init && whoami && \
--- a/11
+++ b/11
@@ -81,14 +81,11 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status:
 		echo "'git submodule update --init --recursive --depth 2 --progress .' in project root.\n"; \
 		exit 1; }
 	mkdir -p $(POSTGRES_INSTALL_DIR)/build/$*
-
-	VERSION=$*; \
-	EXTRA_VERSION=$$(cd $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION && git rev-parse HEAD); \
-	(cd $(POSTGRES_INSTALL_DIR)/build/$$VERSION && \
-	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$$VERSION/configure \
+	(cd $(POSTGRES_INSTALL_DIR)/build/$* && \
+	env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \
 		CFLAGS='$(PG_CFLAGS)' \
-		$(PG_CONFIGURE_OPTS) --with-extra-version=" ($$EXTRA_VERSION)" \
-		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$$VERSION > configure.log)
+		$(PG_CONFIGURE_OPTS) \
+		--prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log)

 # nicer alias to run 'configure'
 # Note: I've been unable to use templates for this part of our configuration.
--- a/compute_tools/src/bin/compute_ctl.rs
+++ b/compute_tools/src/bin/compute_ctl.rs
@@ -47,11 +47,10 @@ use chrono::Utc;
 use clap::Arg;
 use signal_hook::consts::{SIGQUIT, SIGTERM};
 use signal_hook::{consts::SIGINT, iterator::Signals};
-use tracing::{error, info, warn};
+use tracing::{error, info};
 use url::Url;

 use compute_api::responses::ComputeStatus;
-use compute_api::spec::ComputeSpec;

 use compute_tools::compute::{
    forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID,
@@ -63,41 +62,12 @@ use compute_tools::logger::*;
 use compute_tools::monitor::launch_monitor;
 use compute_tools::params::*;
 use compute_tools::spec::*;
-use compute_tools::swap::resize_swap;

 // this is an arbitrary build tag. Fine as a default / for testing purposes
 // in-case of not-set environment var
 const BUILD_TAG_DEFAULT: &str = "latest";

 fn main() -> Result<()> {
-    let (build_tag, clap_args) = init()?;
-
-    let (pg_handle, start_pg_result) = {
-        // Enter startup tracing context
-        let _startup_context_guard = startup_context_from_env();
-
-        let cli_args = process_cli(&clap_args)?;
-
-        let cli_spec = try_spec_from_cli(&clap_args, &cli_args)?;
-
-        let wait_spec_result = wait_spec(build_tag, cli_args, cli_spec)?;
-
-        start_postgres(&clap_args, wait_spec_result)?
-
-        // Startup is finished, exit the startup tracing span
-    };
-
-    // PostgreSQL is now running, if startup was successful. Wait until it exits.
-    let wait_pg_result = wait_postgres(pg_handle)?;
-
-    let delay_exit = cleanup_after_postgres_exit(start_pg_result)?;
-
-    maybe_delay_exit(delay_exit);
-
-    deinit_and_exit(wait_pg_result);
-}
-
-fn init() -> Result<(String, clap::ArgMatches)> {
    init_tracing_and_logging(DEFAULT_LOG_LEVEL)?;

    let mut signals = Signals::new([SIGINT, SIGTERM, SIGQUIT])?;
@@ -112,15 +82,9 @@ fn init() -> Result<(String, clap::ArgMatches)> {
        .to_string();
    info!("build_tag: {build_tag}");

-    Ok((build_tag, cli().get_matches()))
-}
-
-fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
-    let pgbin_default = "postgres";
-    let pgbin = matches
-        .get_one::<String>("pgbin")
-        .map(|s| s.as_str())
-        .unwrap_or(pgbin_default);
+    let matches = cli().get_matches();
+    let pgbin_default = String::from("postgres");
+    let pgbin = matches.get_one::<String>("pgbin").unwrap_or(&pgbin_default);

    let ext_remote_storage = matches
        .get_one::<String>("remote-ext-config")
@@ -146,32 +110,7 @@ fn process_cli(matches: &clap::ArgMatches) -> Result<ProcessCliResult> {
        .expect("Postgres connection string is required");
    let spec_json = matches.get_one::<String>("spec");
    let spec_path = matches.get_one::<String>("spec-path");
-    let resize_swap_on_bind = matches.get_flag("resize-swap-on-bind");

-    Ok(ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        http_port,
-        spec_json,
-        spec_path,
-        resize_swap_on_bind,
-    })
-}
-
-struct ProcessCliResult<'clap> {
-    connstr: &'clap str,
-    pgdata: &'clap str,
-    pgbin: &'clap str,
-    ext_remote_storage: Option<&'clap str>,
-    http_port: u16,
-    spec_json: Option<&'clap String>,
-    spec_path: Option<&'clap String>,
-    resize_swap_on_bind: bool,
-}
-
-fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    // Extract OpenTelemetry context for the startup actions from the
    // TRACEPARENT and TRACESTATE env variables, and attach it to the current
    // tracing context.
@@ -208,7 +147,7 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
    if let Ok(val) = std::env::var("TRACESTATE") {
        startup_tracing_carrier.insert("tracestate".to_string(), val);
    }
-    if !startup_tracing_carrier.is_empty() {
+    let startup_context_guard = if !startup_tracing_carrier.is_empty() {
        use opentelemetry::propagation::TextMapPropagator;
        use opentelemetry::sdk::propagation::TraceContextPropagator;
        let guard = TraceContextPropagator::new()
@@ -218,17 +157,8 @@ fn startup_context_from_env() -> Option<opentelemetry::ContextGuard> {
        Some(guard)
    } else {
        None
-    }
-}
+    };

-fn try_spec_from_cli(
-    matches: &clap::ArgMatches,
-    ProcessCliResult {
-        spec_json,
-        spec_path,
-        ..
-    }: &ProcessCliResult,
-) -> Result<CliSpecParams> {
    let compute_id = matches.get_one::<String>("compute-id");
    let control_plane_uri = matches.get_one::<String>("control-plane-uri");

@@ -269,34 +199,6 @@ fn try_spec_from_cli(
        }
    };

-    Ok(CliSpecParams {
-        spec,
-        live_config_allowed,
-    })
-}
-
-struct CliSpecParams {
-    /// If a spec was provided via CLI or file, the [`ComputeSpec`]
-    spec: Option<ComputeSpec>,
-    live_config_allowed: bool,
-}
-
-fn wait_spec(
-    build_tag: String,
-    ProcessCliResult {
-        connstr,
-        pgdata,
-        pgbin,
-        ext_remote_storage,
-        resize_swap_on_bind,
-        http_port,
-        ..
-    }: ProcessCliResult,
-    CliSpecParams {
-        spec,
-        live_config_allowed,
-    }: CliSpecParams,
-) -> Result<WaitSpecResult> {
    let mut new_state = ComputeState::new();
    let spec_set;

@@ -324,17 +226,19 @@ fn wait_spec(

    // If this is a pooled VM, prewarm before starting HTTP server and becoming
    // available for binding. Prewarming helps Postgres start quicker later,
-    // because QEMU will already have its memory allocated from the host, and
+    // because QEMU will already have it's memory allocated from the host, and
    // the necessary binaries will already be cached.
    if !spec_set {
        compute.prewarm_postgres()?;
    }

-    // Launch http service first, so that we can serve control-plane requests
-    // while configuration is still in progress.
+    // Launch http service first, so we were able to serve control-plane
+    // requests, while configuration is still in progress.
    let _http_handle =
        launch_http_server(http_port, &compute).expect("cannot launch http endpoint thread");

+    let extension_server_port: u16 = http_port;
+
    if !spec_set {
        // No spec provided, hang waiting for it.
        info!("no compute spec provided, waiting");
@@ -349,45 +253,21 @@ fn wait_spec(
                break;
            }
        }
-
-        // Record for how long we slept waiting for the spec.
-        let now = Utc::now();
-        state.metrics.wait_for_spec_ms = now
-            .signed_duration_since(state.start_time)
-            .to_std()
-            .unwrap()
-            .as_millis() as u64;
-
-        // Reset start time, so that the total startup time that is calculated later will
-        // not include the time that we waited for the spec.
-        state.start_time = now;
    }

-    Ok(WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    })
-}
-
-struct WaitSpecResult {
-    compute: Arc<ComputeNode>,
-    // passed through from ProcessCliResult
-    http_port: u16,
-    resize_swap_on_bind: bool,
-}
-
-fn start_postgres(
-    // need to allow unused because `matches` is only used if target_os = "linux"
-    #[allow(unused_variables)] matches: &clap::ArgMatches,
-    WaitSpecResult {
-        compute,
-        http_port,
-        resize_swap_on_bind,
-    }: WaitSpecResult,
-) -> Result<(Option<PostgresHandle>, StartPostgresResult)> {
    // We got all we need, update the state.
    let mut state = compute.state.lock().unwrap();
+
+    // Record for how long we slept waiting for the spec.
+    state.metrics.wait_for_spec_ms = Utc::now()
+        .signed_duration_since(state.start_time)
+        .to_std()
+        .unwrap()
+        .as_millis() as u64;
+    // Reset start time to the actual start of the configuration, so that
+    // total startup time was properly measured at the end.
+    state.start_time = Utc::now();
+
    state.status = ComputeStatus::Init;
    compute.state_changed.notify_all();

@@ -395,72 +275,33 @@ fn start_postgres(
        "running compute with features: {:?}",
        state.pspec.as_ref().unwrap().spec.features
    );
-    // before we release the mutex, fetch the swap size (if any) for later.
-    let swap_size_bytes = state.pspec.as_ref().unwrap().spec.swap_size_bytes;
    drop(state);

    // Launch remaining service threads
    let _monitor_handle = launch_monitor(&compute);
    let _configurator_handle = launch_configurator(&compute);

-    let mut prestartup_failed = false;
-    let mut delay_exit = false;
-
-    // Resize swap to the desired size if the compute spec says so
-    if let (Some(size_bytes), true) = (swap_size_bytes, resize_swap_on_bind) {
-        // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion
-        // *before* starting postgres.
-        //
-        // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this
-        // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets
-        // OOM-killed during startup because swap wasn't available yet.
-        match resize_swap(size_bytes) {
-            Ok(()) => {
-                let size_gib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display.
-                info!(%size_bytes, %size_gib, "resized swap");
-            }
-            Err(err) => {
-                let err = err.context("failed to resize swap");
-                error!("{err:#}");
-
-                // Mark compute startup as failed; don't try to start postgres, and report this
-                // error to the control plane when it next asks.
-                prestartup_failed = true;
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{err:?}"));
-                state.status = ComputeStatus::Failed;
-                compute.state_changed.notify_all();
-                delay_exit = true;
-            }
-        }
-    }
-
-    let extension_server_port: u16 = http_port;
-
    // Start Postgres
-    let mut pg = None;
-    if !prestartup_failed {
-        pg = match compute.start_compute(extension_server_port) {
-            Ok(pg) => Some(pg),
-            Err(err) => {
-                error!("could not start the compute node: {:#}", err);
-                let mut state = compute.state.lock().unwrap();
-                state.error = Some(format!("{:?}", err));
-                state.status = ComputeStatus::Failed;
-                // Notify others that Postgres failed to start. In case of configuring the
-                // empty compute, it's likely that API handler is still waiting for compute
-                // state change. With this we will notify it that compute is in Failed state,
-                // so control plane will know about it earlier and record proper error instead
-                // of timeout.
-                compute.state_changed.notify_all();
-                drop(state); // unlock
-                delay_exit = true;
-                None
-            }
-        };
-    } else {
-        warn!("skipping postgres startup because pre-startup step failed");
-    }
+    let mut delay_exit = false;
+    let mut exit_code = None;
+    let pg = match compute.start_compute(extension_server_port) {
+        Ok(pg) => Some(pg),
+        Err(err) => {
+            error!("could not start the compute node: {:#}", err);
+            let mut state = compute.state.lock().unwrap();
+            state.error = Some(format!("{:?}", err));
+            state.status = ComputeStatus::Failed;
+            // Notify others that Postgres failed to start. In case of configuring the
+            // empty compute, it's likely that API handler is still waiting for compute
+            // state change. With this we will notify it that compute is in Failed state,
+            // so control plane will know about it earlier and record proper error instead
+            // of timeout.
+            compute.state_changed.notify_all();
+            drop(state); // unlock
+            delay_exit = true;
+            None
+        }
+    };

    // Start the vm-monitor if directed to. The vm-monitor only runs on linux
    // because it requires cgroups.
@@ -493,7 +334,7 @@ fn start_postgres(
            // This token is used internally by the monitor to clean up all threads
            let token = CancellationToken::new();

-            let vm_monitor = rt.as_ref().map(|rt| {
+            let vm_monitor = &rt.as_ref().map(|rt| {
                rt.spawn(vm_monitor::start(
                    Box::leak(Box::new(vm_monitor::Args {
                        cgroup: cgroup.cloned(),
@@ -506,41 +347,12 @@ fn start_postgres(
        }
    }

-    Ok((
-        pg,
-        StartPostgresResult {
-            delay_exit,
-            compute,
-            #[cfg(target_os = "linux")]
-            rt,
-            #[cfg(target_os = "linux")]
-            token,
-            #[cfg(target_os = "linux")]
-            vm_monitor,
-        },
-    ))
-}
-
-type PostgresHandle = (std::process::Child, std::thread::JoinHandle<()>);
-
-struct StartPostgresResult {
-    delay_exit: bool,
-    // passed through from WaitSpecResult
-    compute: Arc<ComputeNode>,
-
-    #[cfg(target_os = "linux")]
-    rt: Option<tokio::runtime::Runtime>,
-    #[cfg(target_os = "linux")]
-    token: tokio_util::sync::CancellationToken,
-    #[cfg(target_os = "linux")]
-    vm_monitor: Option<tokio::task::JoinHandle<Result<()>>>,
-}
-
-fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
    // Wait for the child Postgres process forever. In this state Ctrl+C will
    // propagate to Postgres and it will be shut down as well.
-    let mut exit_code = None;
    if let Some((mut pg, logs_handle)) = pg {
+        // Startup is finished, exit the startup tracing span
+        drop(startup_context_guard);
+
        let ecode = pg
            .wait()
            .expect("failed to start waiting on Postgres process");
@@ -555,25 +367,6 @@ fn wait_postgres(pg: Option<PostgresHandle>) -> Result<WaitPostgresResult> {
        exit_code = ecode.code()
    }

-    Ok(WaitPostgresResult { exit_code })
-}
-
-struct WaitPostgresResult {
-    exit_code: Option<i32>,
-}
-
-fn cleanup_after_postgres_exit(
-    StartPostgresResult {
-        mut delay_exit,
-        compute,
-        #[cfg(target_os = "linux")]
-        vm_monitor,
-        #[cfg(target_os = "linux")]
-        token,
-        #[cfg(target_os = "linux")]
-        rt,
-    }: StartPostgresResult,
-) -> Result<bool> {
    // Terminate the vm_monitor so it releases the file watcher on
    // /sys/fs/cgroup/neon-postgres.
    // Note: the vm-monitor only runs on linux because it requires cgroups.
@@ -615,19 +408,13 @@ fn cleanup_after_postgres_exit(
        error!("error while checking for core dumps: {err:?}");
    }

-    Ok(delay_exit)
-}
-
-fn maybe_delay_exit(delay_exit: bool) {
    // If launch failed, keep serving HTTP requests for a while, so the cloud
    // control plane can get the actual error.
    if delay_exit {
        info!("giving control plane 30s to collect the error before shutdown");
        thread::sleep(Duration::from_secs(30));
    }
-}

-fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! {
    // Shutdown trace pipeline gracefully, so that it has a chance to send any
    // pending traces before we exit. Shutting down OTEL tracing provider may
    // hang for quite some time, see, for example:
@@ -739,11 +526,6 @@ fn cli() -> clap::Command {
                )
                .value_name("FILECACHE_CONNSTR"),
        )
-        .arg(
-            Arg::new("resize-swap-on-bind")
-                .long("resize-swap-on-bind")
-                .action(clap::ArgAction::SetTrue),
-        )
 }

 /// When compute_ctl is killed, send also termination signal to sync-safekeepers
--- a/compute_tools/src/lib.rs
+++ b/compute_tools/src/lib.rs
@@ -14,5 +14,4 @@ pub mod monitor;
 pub mod params;
 pub mod pg_helpers;
 pub mod spec;
-pub mod swap;
 pub mod sync_sk;
--- a/compute_tools/src/spec.rs
+++ b/compute_tools/src/spec.rs
@@ -490,7 +490,7 @@ pub fn handle_databases(spec: &ComputeSpec, client: &mut Client) -> Result<()> {
                "rename_db" => {
                    let new_name = op.new_name.as_ref().unwrap();

-                    if existing_dbs.contains_key(&op.name) {
+                    if existing_dbs.get(&op.name).is_some() {
                        let query: String = format!(
                            "ALTER DATABASE {} RENAME TO {}",
                            op.name.pg_quote(),
--- a/compute_tools/src/swap.rs
+++ b/compute_tools/src/swap.rs
@@ -1,36 +0,0 @@
-use anyhow::{anyhow, Context};
-use tracing::warn;
-
-pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap";
-
-pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> {
-    // run `/neonvm/bin/resize-swap --once {size_bytes}`
-    //
-    // Passing '--once' causes resize-swap to delete itself after successful completion, which
-    // means that if compute_ctl restarts later, we won't end up calling 'swapoff' while
-    // postgres is running.
-    //
-    // NOTE: resize-swap is not very clever. If present, --once MUST be the first arg.
-    let child_result = std::process::Command::new("/usr/bin/sudo")
-        .arg(RESIZE_SWAP_BIN)
-        .arg("--once")
-        .arg(size_bytes.to_string())
-        .spawn();
-
-    if matches!(&child_result, Err(e) if e.kind() == std::io::ErrorKind::NotFound) {
-        warn!("ignoring \"not found\" error from resize-swap to avoid swapoff while compute is running");
-        return Ok(());
-    }
-
-    child_result
-        .context("spawn() failed")
-        .and_then(|mut child| child.wait().context("wait() failed"))
-        .and_then(|status| match status.success() {
-            true => Ok(()),
-            false => Err(anyhow!("process exited with {status}")),
-        })
-        // wrap any prior error with the overall context that we couldn't run the command
-        .with_context(|| {
-            format!("could not run `/usr/bin/sudo {RESIZE_SWAP_BIN} --once {size_bytes}`")
-        })
-}
--- a/control_plane/Cargo.toml
+++ b/control_plane/Cargo.toml
@@ -28,7 +28,6 @@ serde_with.workspace = true
 tar.workspace = true
 thiserror.workspace = true
 toml.workspace = true
-toml_edit.workspace = true
 tokio.workspace = true
 tokio-postgres.workspace = true
 tokio-util.workspace = true
--- a/control_plane/src/bin/neon_local.rs
+++ b/control_plane/src/bin/neon_local.rs
@@ -14,15 +14,15 @@ use control_plane::pageserver::{PageServerNode, PAGESERVER_REMOTE_STORAGE_DIR};
 use control_plane::safekeeper::SafekeeperNode;
 use control_plane::storage_controller::StorageController;
 use control_plane::{broker, local_env};
-use pageserver_api::config::{
-    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
-    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
-};
 use pageserver_api::controller_api::PlacementPolicy;
 use pageserver_api::models::{
    ShardParameters, TenantCreateRequest, TimelineCreateRequest, TimelineInfo,
 };
 use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId};
+use pageserver_api::{
+    DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT,
+    DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT,
+};
 use postgres_backend::AuthType;
 use postgres_connection::parse_host_port;
 use safekeeper_api::{
@@ -133,7 +133,7 @@ fn main() -> Result<()> {
        let subcommand_result = match sub_name {
            "tenant" => rt.block_on(handle_tenant(sub_args, &mut env)),
            "timeline" => rt.block_on(handle_timeline(sub_args, &mut env)),
-            "start" => rt.block_on(handle_start_all(&env)),
+            "start" => rt.block_on(handle_start_all(sub_args, &env)),
            "stop" => rt.block_on(handle_stop_all(sub_args, &env)),
            "pageserver" => rt.block_on(handle_pageserver(sub_args, &env)),
            "storage_controller" => rt.block_on(handle_storage_controller(sub_args, &env)),
@@ -358,13 +358,6 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
        default_conf(*num_pageservers)
    };

-    let pageserver_config: toml_edit::Document =
-        if let Some(path) = init_match.get_one::<PathBuf>("pageserver-config") {
-            std::fs::read_to_string(path)?.parse()?
-        } else {
-            toml_edit::Document::new()
-        };
-
    let pg_version = init_match
        .get_one::<u32>("pg-version")
        .copied()
@@ -382,7 +375,7 @@ fn handle_init(init_match: &ArgMatches) -> anyhow::Result<LocalEnv> {
    // Initialize pageserver, create initial tenant and timeline.
    for ps_conf in &env.pageservers {
        PageServerNode::from_env(&env, ps_conf)
-            .initialize(&pageserver_config)
+            .initialize(&pageserver_config_overrides(init_match))
            .unwrap_or_else(|e| {
                eprintln!("pageserver init failed: {e:?}");
                exit(1);
@@ -404,6 +397,15 @@ fn get_default_pageserver(env: &local_env::LocalEnv) -> PageServerNode {
    PageServerNode::from_env(env, ps_conf)
 }

+fn pageserver_config_overrides(init_match: &ArgMatches) -> Vec<&str> {
+    init_match
+        .get_many::<String>("pageserver-config-override")
+        .into_iter()
+        .flatten()
+        .map(String::as_str)
+        .collect()
+}
+
 async fn handle_tenant(
    tenant_match: &ArgMatches,
    env: &mut local_env::LocalEnv,
@@ -835,8 +837,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .copied()
                .unwrap_or(false);

-            let allow_multiple = sub_args.get_flag("allow-multiple");
-
            let mode = match (lsn, hot_standby) {
                (Some(lsn), false) => ComputeMode::Static(lsn),
                (None, true) => ComputeMode::Replica,
@@ -854,9 +854,7 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                _ => {}
            }

-            if !allow_multiple {
-                cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;
-            }
+            cplane.check_conflicting_endpoints(mode, tenant_id, timeline_id)?;

            cplane.new_endpoint(
                &endpoint_id,
@@ -885,8 +883,6 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re

            let remote_ext_config = sub_args.get_one::<String>("remote-ext-config");

-            let allow_multiple = sub_args.get_flag("allow-multiple");
-
            // If --safekeepers argument is given, use only the listed safekeeper nodes.
            let safekeepers =
                if let Some(safekeepers_str) = sub_args.get_one::<String>("safekeepers") {
@@ -912,13 +908,11 @@ async fn handle_endpoint(ep_match: &ArgMatches, env: &local_env::LocalEnv) -> Re
                .cloned()
                .unwrap_or_default();

-            if !allow_multiple {
-                cplane.check_conflicting_endpoints(
-                    endpoint.mode,
-                    endpoint.tenant_id,
-                    endpoint.timeline_id,
-                )?;
-            }
+            cplane.check_conflicting_endpoints(
+                endpoint.mode,
+                endpoint.tenant_id,
+                endpoint.timeline_id,
+            )?;

            let (pageservers, stripe_size) = if let Some(pageserver_id) = pageserver_id {
                let conf = env.get_pageserver_conf(pageserver_id).unwrap();
@@ -1074,7 +1068,10 @@ fn get_pageserver(env: &local_env::LocalEnv, args: &ArgMatches) -> Result<PageSe
 async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> Result<()> {
    match sub_match.subcommand() {
        Some(("start", subcommand_args)) => {
-            if let Err(e) = get_pageserver(env, subcommand_args)?.start().await {
+            if let Err(e) = get_pageserver(env, subcommand_args)?
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
@@ -1100,7 +1097,10 @@ async fn handle_pageserver(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
                exit(1);
            }

-            if let Err(e) = pageserver.start().await {
+            if let Err(e) = pageserver
+                .start(&pageserver_config_overrides(subcommand_args))
+                .await
+            {
                eprintln!("pageserver start failed: {e}");
                exit(1);
            }
@@ -1227,7 +1227,7 @@ async fn handle_safekeeper(sub_match: &ArgMatches, env: &local_env::LocalEnv) ->
    Ok(())
 }

-async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {
+async fn handle_start_all(sub_match: &ArgMatches, env: &local_env::LocalEnv) -> anyhow::Result<()> {
    // Endpoints are not started automatically

    broker::start_broker_process(env).await?;
@@ -1244,7 +1244,10 @@ async fn handle_start_all(env: &local_env::LocalEnv) -> anyhow::Result<()> {

    for ps_conf in &env.pageservers {
        let pageserver = PageServerNode::from_env(env, ps_conf);
-        if let Err(e) = pageserver.start().await {
+        if let Err(e) = pageserver
+            .start(&pageserver_config_overrides(sub_match))
+            .await
+        {
            eprintln!("pageserver {} start failed: {:#}", ps_conf.id, e);
            try_stop_all(env, true).await;
            exit(1);
@@ -1385,6 +1388,13 @@ fn cli() -> Command {
        .required(false)
        .value_name("stop-mode");

+    let pageserver_config_args = Arg::new("pageserver-config-override")
+        .long("pageserver-config-override")
+        .num_args(1)
+        .action(ArgAction::Append)
+        .help("Additional pageserver's configuration options or overrides, refer to pageserver's 'config-override' CLI parameter docs for more")
+        .required(false);
+
    let remote_ext_config_args = Arg::new("remote-ext-config")
        .long("remote-ext-config")
        .num_args(1)
@@ -1434,33 +1444,20 @@ fn cli() -> Command {
        .help("If set, will create test user `user` and `neondb` database. Requires `update-catalog = true`")
        .required(false);

-    let allow_multiple = Arg::new("allow-multiple")
-        .help("Allow multiple primary endpoints running on the same branch. Shouldn't be used normally, but useful for tests.")
-        .long("allow-multiple")
-        .action(ArgAction::SetTrue)
-        .required(false);
-
    Command::new("Neon CLI")
        .arg_required_else_help(true)
        .version(GIT_VERSION)
        .subcommand(
            Command::new("init")
                .about("Initialize a new Neon repository, preparing configs for services to start with")
+                .arg(pageserver_config_args.clone())
                .arg(num_pageservers_arg.clone())
                .arg(
                    Arg::new("config")
                        .long("config")
                        .required(false)
                        .value_parser(value_parser!(PathBuf))
-                        .value_name("config")
-                )
-                .arg(
-                    Arg::new("pageserver-config")
-                        .long("pageserver-config")
-                        .required(false)
-                        .value_parser(value_parser!(PathBuf))
-                        .value_name("pageserver-config")
-                        .help("Merge the provided pageserver config into the one generated by neon_local."),
+                        .value_name("config"),
                )
                .arg(pg_version_arg.clone())
                .arg(force_arg)
@@ -1542,6 +1539,7 @@ fn cli() -> Command {
                .subcommand(Command::new("status"))
                .subcommand(Command::new("start")
                    .about("Start local pageserver")
+                    .arg(pageserver_config_args.clone())
                )
                .subcommand(Command::new("stop")
                    .about("Stop local pageserver")
@@ -1549,6 +1547,7 @@ fn cli() -> Command {
                )
                .subcommand(Command::new("restart")
                    .about("Restart local pageserver")
+                    .arg(pageserver_config_args.clone())
                )
        )
        .subcommand(
@@ -1602,7 +1601,6 @@ fn cli() -> Command {
                    .arg(pg_version_arg.clone())
                    .arg(hot_standby_arg.clone())
                    .arg(update_catalog)
-                    .arg(allow_multiple.clone())
                )
                .subcommand(Command::new("start")
                    .about("Start postgres.\n If the endpoint doesn't exist yet, it is created.")
@@ -1611,7 +1609,6 @@ fn cli() -> Command {
                    .arg(safekeepers_arg)
                    .arg(remote_ext_config_args)
                    .arg(create_test_user)
-                    .arg(allow_multiple.clone())
                )
                .subcommand(Command::new("reconfigure")
                            .about("Reconfigure the endpoint")
@@ -1663,6 +1660,7 @@ fn cli() -> Command {
        .subcommand(
            Command::new("start")
                .about("Start page server and safekeepers")
+                .arg(pageserver_config_args)
        )
        .subcommand(
            Command::new("stop")
--- a/control_plane/src/endpoint.rs
+++ b/control_plane/src/endpoint.rs
@@ -554,7 +554,6 @@ impl Endpoint {
            format_version: 1.0,
            operation_uuid: None,
            features: self.features.clone(),
-            swap_size_bytes: None,
            cluster: Cluster {
                cluster_id: None, // project ID: not used
                name: None,       // project name: not used
--- a/control_plane/src/local_env.rs
+++ b/control_plane/src/local_env.rs
@@ -382,10 +382,7 @@ impl LocalEnv {

        // Find neon binaries.
        if env.neon_distrib_dir == Path::new("") {
-            env::current_exe()?
-                .parent()
-                .unwrap()
-                .clone_into(&mut env.neon_distrib_dir);
+            env.neon_distrib_dir = env::current_exe()?.parent().unwrap().to_owned();
        }

        if env.pageservers.is_empty() {
--- a/control_plane/src/pageserver.rs
+++ b/control_plane/src/pageserver.rs
@@ -4,6 +4,7 @@
 //!
 //!   .neon/
 //!
+use std::borrow::Cow;
 use std::collections::HashMap;

 use std::io;
@@ -17,8 +18,7 @@ use anyhow::{bail, Context};
 use camino::Utf8PathBuf;
 use futures::SinkExt;
 use pageserver_api::models::{
-    self, AuxFilePolicy, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo,
-    TimelineInfo,
+    self, LocationConfig, ShardParameters, TenantHistorySize, TenantInfo, TimelineInfo,
 };
 use pageserver_api::shard::TenantShardId;
 use pageserver_client::mgmt_api;
@@ -77,7 +77,7 @@ impl PageServerNode {
    /// Merge overrides provided by the user on the command line with our default overides derived from neon_local configuration.
    ///
    /// These all end up on the command line of the `pageserver` binary.
-    fn neon_local_overrides(&self, cli_overrides: &toml_edit::Document) -> Vec<String> {
+    fn neon_local_overrides(&self, cli_overrides: &[&str]) -> Vec<String> {
        // FIXME: the paths should be shell-escaped to handle paths with spaces, quotas etc.
        let pg_distrib_dir_param = format!(
            "pg_distrib_dir='{}'",
@@ -157,7 +157,10 @@ impl PageServerNode {
            }
        }

-        if !cli_overrides.contains_key("remote_storage") {
+        if !cli_overrides
+            .iter()
+            .any(|c| c.starts_with("remote_storage"))
+        {
            overrides.push(format!(
                "remote_storage={{local_path='../{PAGESERVER_REMOTE_STORAGE_DIR}'}}"
            ));
@@ -170,13 +173,13 @@ impl PageServerNode {
        }

        // Apply the user-provided overrides
-        overrides.push(cli_overrides.to_string());
+        overrides.extend(cli_overrides.iter().map(|&c| c.to_owned()));

        overrides
    }

    /// Initializes a pageserver node by creating its config with the overrides provided.
-    pub fn initialize(&self, config_overrides: &toml_edit::Document) -> anyhow::Result<()> {
+    pub fn initialize(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
        // First, run `pageserver --init` and wait for it to write a config into FS and exit.
        self.pageserver_init(config_overrides)
            .with_context(|| format!("Failed to run init for pageserver node {}", self.conf.id))
@@ -194,11 +197,11 @@ impl PageServerNode {
            .expect("non-Unicode path")
    }

-    pub async fn start(&self) -> anyhow::Result<()> {
-        self.start_node().await
+    pub async fn start(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
+        self.start_node(config_overrides, false).await
    }

-    fn pageserver_init(&self, config_overrides: &toml_edit::Document) -> anyhow::Result<()> {
+    fn pageserver_init(&self, config_overrides: &[&str]) -> anyhow::Result<()> {
        let datadir = self.repo_path();
        let node_id = self.conf.id;
        println!(
@@ -216,18 +219,11 @@ impl PageServerNode {
        let datadir_path_str = datadir.to_str().with_context(|| {
            format!("Cannot start pageserver node {node_id} in path that has no string representation: {datadir:?}")
        })?;
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        args.push(Cow::Borrowed("--init"));

-        // `pageserver --init` merges the `--config-override`s into a built-in default config,
-        // then writes out the merged product to `pageserver.toml`.
-        // TODO: just write the full `pageserver.toml` and get rid of `--config-override`.
-        let mut args = vec!["--init", "--workdir", datadir_path_str];
-        let overrides = self.neon_local_overrides(config_overrides);
-        for piece in &overrides {
-            args.push("--config-override");
-            args.push(piece);
-        }
        let init_output = Command::new(self.env.pageserver_bin())
-            .args(args)
+            .args(args.iter().map(Cow::as_ref))
            .envs(self.pageserver_env_variables()?)
            .output()
            .with_context(|| format!("Failed to run pageserver init for node {node_id}"))?;
@@ -252,13 +248,12 @@ impl PageServerNode {
        // situation: the metadata is written by some other script.
        std::fs::write(
            metadata_path,
-            serde_json::to_vec(&pageserver_api::config::NodeMetadata {
-                postgres_host: "localhost".to_string(),
-                postgres_port: self.pg_connection_config.port(),
-                http_host: "localhost".to_string(),
-                http_port,
-                other: HashMap::new(),
-            })
+            serde_json::to_vec(&serde_json::json!({
+                "host": "localhost",
+                "port": self.pg_connection_config.port(),
+                "http_host": "localhost",
+                "http_port": http_port,
+            }))
            .unwrap(),
        )
        .expect("Failed to write metadata file");
@@ -266,7 +261,11 @@ impl PageServerNode {
        Ok(())
    }

-    async fn start_node(&self) -> anyhow::Result<()> {
+    async fn start_node(
+        &self,
+        config_overrides: &[&str],
+        update_config: bool,
+    ) -> anyhow::Result<()> {
        // TODO: using a thread here because start_process() is not async but we need to call check_status()
        let datadir = self.repo_path();
        print!(
@@ -283,12 +282,15 @@ impl PageServerNode {
                self.conf.id, datadir,
            )
        })?;
-        let args = vec!["-D", datadir_path_str];
+        let mut args = self.pageserver_basic_args(config_overrides, datadir_path_str);
+        if update_config {
+            args.push(Cow::Borrowed("--update-config"));
+        }
        background_process::start_process(
            "pageserver",
            &datadir,
            &self.env.pageserver_bin(),
-            args,
+            args.iter().map(Cow::as_ref),
            self.pageserver_env_variables()?,
            background_process::InitialPidFile::Expect(self.pid_file()),
            || async {
@@ -305,6 +307,22 @@ impl PageServerNode {
        Ok(())
    }

+    fn pageserver_basic_args<'a>(
+        &self,
+        config_overrides: &'a [&'a str],
+        datadir_path_str: &'a str,
+    ) -> Vec<Cow<'a, str>> {
+        let mut args = vec![Cow::Borrowed("-D"), Cow::Borrowed(datadir_path_str)];
+
+        let overrides = self.neon_local_overrides(config_overrides);
+        for config_override in overrides {
+            args.push(Cow::Borrowed("-c"));
+            args.push(Cow::Owned(config_override));
+        }
+
+        args
+    }
+
    fn pageserver_env_variables(&self) -> anyhow::Result<Vec<(String, String)>> {
        // FIXME: why is this tied to pageserver's auth type? Whether or not the safekeeper
        // needs a token, and how to generate that token, seems independent to whether
@@ -430,11 +448,11 @@ impl PageServerNode {
                .map(serde_json::from_str)
                .transpose()
                .context("parse `timeline_get_throttle` from json")?,
-            switch_aux_file_policy: settings
-                .remove("switch_aux_file_policy")
-                .map(|x| x.parse::<AuxFilePolicy>())
+            switch_to_aux_file_v2: settings
+                .remove("switch_to_aux_file_v2")
+                .map(|x| x.parse::<bool>())
                .transpose()
-                .context("Failed to parse 'switch_aux_file_policy'")?,
+                .context("Failed to parse 'switch_to_aux_file_v2' as bool")?,
        };
        if !settings.is_empty() {
            bail!("Unrecognized tenant settings: {settings:?}")
@@ -553,11 +571,11 @@ impl PageServerNode {
                    .map(serde_json::from_str)
                    .transpose()
                    .context("parse `timeline_get_throttle` from json")?,
-                switch_aux_file_policy: settings
-                    .remove("switch_aux_file_policy")
-                    .map(|x| x.parse::<AuxFilePolicy>())
+                switch_to_aux_file_v2: settings
+                    .remove("switch_to_aux_file_v2")
+                    .map(|x| x.parse::<bool>())
                    .transpose()
-                    .context("Failed to parse 'switch_aux_file_policy'")?,
+                    .context("Failed to parse 'switch_to_aux_file_v2' as bool")?,
            }
        };

--- a/control_plane/src/storage_controller.rs
+++ b/control_plane/src/storage_controller.rs
@@ -3,6 +3,7 @@ use crate::{
    local_env::{LocalEnv, NeonStorageControllerConf},
 };
 use camino::{Utf8Path, Utf8PathBuf};
+use hyper::Method;
 use pageserver_api::{
    controller_api::{
        NodeConfigureRequest, NodeRegisterRequest, TenantCreateResponse, TenantLocateResponse,
@@ -16,7 +17,6 @@ use pageserver_api::{
 };
 use pageserver_client::mgmt_api::ResponseErrorMessageExt;
 use postgres_backend::AuthType;
-use reqwest::Method;
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
 use std::{fs, str::FromStr};
 use tokio::process::Command;
@@ -379,7 +379,7 @@ impl StorageController {
    /// Simple HTTP request wrapper for calling into storage controller
    async fn dispatch<RQ, RS>(
        &self,
-        method: reqwest::Method,
+        method: hyper::Method,
        path: String,
        body: Option<RQ>,
    ) -> anyhow::Result<RS>
--- a/control_plane/storcon_cli/src/main.rs
+++ b/control_plane/storcon_cli/src/main.rs
@@ -1,6 +1,7 @@
 use std::{collections::HashMap, str::FromStr, time::Duration};

 use clap::{Parser, Subcommand};
+use hyper::{Method, StatusCode};
 use pageserver_api::{
    controller_api::{
        NodeAvailabilityWrapper, NodeDescribeResponse, ShardSchedulingPolicy,
@@ -13,7 +14,7 @@ use pageserver_api::{
    shard::{ShardStripeSize, TenantShardId},
 };
 use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt};
-use reqwest::{Method, StatusCode, Url};
+use reqwest::Url;
 use serde::{de::DeserializeOwned, Serialize};
 use utils::id::{NodeId, TenantId};

@@ -231,7 +232,7 @@ impl Client {
    /// Simple HTTP request wrapper for calling into storage controller
    async fn dispatch<RQ, RS>(
        &self,
-        method: Method,
+        method: hyper::Method,
        path: String,
        body: Option<RQ>,
    ) -> mgmt_api::Result<RS>
--- a/libs/compute_api/src/spec.rs
+++ b/libs/compute_api/src/spec.rs
@@ -33,23 +33,6 @@ pub struct ComputeSpec {
    #[serde(default)]
    pub features: Vec<ComputeFeature>,

-    /// If compute_ctl was passed `--resize-swap-on-bind`, a value of `Some(_)` instructs
-    /// compute_ctl to `/neonvm/bin/resize-swap` with the given size, when the spec is first
-    /// received.
-    ///
-    /// Both this field and `--resize-swap-on-bind` are required, so that the control plane's
-    /// spec generation doesn't need to be aware of the actual compute it's running on, while
-    /// guaranteeing gradual rollout of swap. Otherwise, without `--resize-swap-on-bind`, we could
-    /// end up trying to resize swap in VMs without it -- or end up *not* resizing swap, thus
-    /// giving every VM much more swap than it should have (32GiB).
-    ///
-    /// Eventually we may remove `--resize-swap-on-bind` and exclusively use `swap_size_bytes` for
-    /// enabling the swap resizing behavior once rollout is complete.
-    ///
-    /// See neondatabase/cloud#12047 for more.
-    #[serde(default)]
-    pub swap_size_bytes: Option<u64>,
-
    /// Expected cluster state at the end of transition process.
    pub cluster: Cluster,
    pub delta_operations: Option<Vec<DeltaOp>>,
--- a/libs/metrics/src/lib.rs
+++ b/libs/metrics/src/lib.rs
@@ -480,15 +480,6 @@ impl<A: CounterPairAssoc> CounterPairVec<A> {
        let id = self.vec.with_labels(labels);
        self.vec.remove_metric(id)
    }
-
-    pub fn sample(&self, labels: <A::LabelGroupSet as LabelGroupSet>::Group<'_>) -> u64 {
-        let id = self.vec.with_labels(labels);
-        let metric = self.vec.get_metric(id);
-
-        let inc = metric.inc.count.load(std::sync::atomic::Ordering::Relaxed);
-        let dec = metric.dec.count.load(std::sync::atomic::Ordering::Relaxed);
-        inc.saturating_sub(dec)
-    }
 }

 impl<T, A> ::measured::metric::group::MetricGroup<T> for CounterPairVec<A>
--- a/libs/pageserver_api/src/config.rs
+++ b/libs/pageserver_api/src/config.rs
@@ -1,31 +0,0 @@
-use std::collections::HashMap;
-
-use const_format::formatcp;
-
-#[cfg(test)]
-mod tests;
-
-pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
-pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
-pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
-pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
-
-// Certain metadata (e.g. externally-addressable name, AZ) is delivered
-// as a separate structure.  This information is not neeed by the pageserver
-// itself, it is only used for registering the pageserver with the control
-// plane and/or storage controller.
-//
-#[derive(PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
-pub struct NodeMetadata {
-    #[serde(rename = "host")]
-    pub postgres_host: String,
-    #[serde(rename = "port")]
-    pub postgres_port: u16,
-    pub http_host: String,
-    pub http_port: u16,
-
-    // Deployment tools may write fields to the metadata file beyond what we
-    // use in this type: this type intentionally only names fields that require.
-    #[serde(flatten)]
-    pub other: HashMap<String, serde_json::Value>,
-}
--- a/libs/pageserver_api/src/config/tests.rs
+++ b/libs/pageserver_api/src/config/tests.rs
@@ -1,22 +0,0 @@
-use super::*;
-
-#[test]
-fn test_node_metadata_v1_backward_compatibilty() {
-    let v1 = serde_json::to_vec(&serde_json::json!({
-        "host": "localhost",
-        "port": 23,
-        "http_host": "localhost",
-        "http_port": 42,
-    }));
-
-    assert_eq!(
-        serde_json::from_slice::<NodeMetadata>(&v1.unwrap()).unwrap(),
-        NodeMetadata {
-            postgres_host: "localhost".to_string(),
-            postgres_port: 23,
-            http_host: "localhost".to_string(),
-            http_port: 42,
-            other: HashMap::new(),
-        }
-    )
-}
--- a/libs/pageserver_api/src/key.rs
+++ b/libs/pageserver_api/src/key.rs
@@ -80,7 +80,7 @@ impl Key {
    }

    /// Get the range of metadata keys.
-    pub const fn metadata_key_range() -> Range<Self> {
+    pub fn metadata_key_range() -> Range<Self> {
        Key {
            field1: METADATA_KEY_BEGIN_PREFIX,
            field2: 0,
@@ -572,17 +572,14 @@ pub const AUX_FILES_KEY: Key = Key {
 // Reverse mappings for a few Keys.
 // These are needed by WAL redo manager.

-/// Non inherited range for vectored get.
 pub const NON_INHERITED_RANGE: Range<Key> = AUX_FILES_KEY..AUX_FILES_KEY.next();
-/// Sparse keyspace range for vectored get. Missing key error will be ignored for this range.
-pub const NON_INHERITED_SPARSE_RANGE: Range<Key> = Key::metadata_key_range();

 // AUX_FILES currently stores only data for logical replication (slots etc), and
 // we don't preserve these on a branch because safekeepers can't follow timeline
 // switch (and generally it likely should be optional), so ignore these.
 #[inline(always)]
 pub fn is_inherited_key(key: Key) -> bool {
-    !NON_INHERITED_RANGE.contains(&key) && !NON_INHERITED_SPARSE_RANGE.contains(&key)
+    !NON_INHERITED_RANGE.contains(&key)
 }

 #[inline(always)]
--- a/libs/pageserver_api/src/keyspace.rs
+++ b/libs/pageserver_api/src/keyspace.rs
@@ -240,7 +240,7 @@ impl<'a> ShardedRange<'a> {
    /// pages that would not actually be stored on this node.
    ///
    /// Don't use this function in code that works with physical entities like layer files.
-    pub fn raw_size(range: &Range<Key>) -> u32 {
+    fn raw_size(range: &Range<Key>) -> u32 {
        if is_contiguous_range(range) {
            contiguous_range_len(range)
        } else {
--- a/libs/pageserver_api/src/lib.rs
+++ b/libs/pageserver_api/src/lib.rs
@@ -1,5 +1,6 @@
 #![deny(unsafe_code)]
 #![deny(clippy::undocumented_unsafe_blocks)]
+use const_format::formatcp;

 pub mod controller_api;
 pub mod key;
@@ -10,4 +11,7 @@ pub mod shard;
 /// Public API types
 pub mod upcall_api;

-pub mod config;
+pub const DEFAULT_PG_LISTEN_PORT: u16 = 64000;
+pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN_PORT}");
+pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898;
+pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}");
--- a/libs/pageserver_api/src/models.rs
+++ b/libs/pageserver_api/src/models.rs
@@ -1,4 +1,3 @@
-pub mod detach_ancestor;
 pub mod partitioning;
 pub mod utilization;

@@ -9,7 +8,6 @@ use std::{
    collections::HashMap,
    io::{BufRead, Read},
    num::{NonZeroU64, NonZeroUsize},
-    str::FromStr,
    time::{Duration, SystemTime},
 };

@@ -305,31 +303,7 @@ pub struct TenantConfig {
    pub lazy_slru_download: Option<bool>,
    pub timeline_get_throttle: Option<ThrottleConfig>,
    pub image_layer_creation_check_threshold: Option<u8>,
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
-}
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub enum AuxFilePolicy {
-    V1,
-    V2,
-    CrossValidation,
-}
-
-impl FromStr for AuxFilePolicy {
-    type Err = anyhow::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let s = s.to_lowercase();
-        if s == "v1" {
-            Ok(Self::V1)
-        } else if s == "v2" {
-            Ok(Self::V2)
-        } else if s == "crossvalidation" || s == "cross_validation" {
-            Ok(Self::CrossValidation)
-        } else {
-            anyhow::bail!("cannot parse {} to aux file policy", s)
-        }
-    }
+    pub switch_to_aux_file_v2: Option<bool>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
@@ -456,6 +430,8 @@ pub struct StatusResponse {
 #[derive(Serialize, Deserialize, Debug)]
 #[serde(deny_unknown_fields)]
 pub struct TenantLocationConfigRequest {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tenant_id: Option<TenantShardId>,
    #[serde(flatten)]
    pub config: LocationConfig, // as we have a flattened field, we should reject all unknown fields in it
 }
--- a/libs/pageserver_api/src/models/detach_ancestor.rs
+++ b/libs/pageserver_api/src/models/detach_ancestor.rs
@@ -1,6 +0,0 @@
-use utils::id::TimelineId;
-
-#[derive(Default, serde::Serialize)]
-pub struct AncestorDetached {
-    pub reparented_timelines: Vec<TimelineId>,
-}
--- a/libs/pageserver_api/src/shard.rs
+++ b/libs/pageserver_api/src/shard.rs
@@ -97,7 +97,7 @@ impl ShardCount {

    /// The internal value of a ShardCount may be zero, which means "1 shard, but use
    /// legacy format for TenantShardId that excludes the shard suffix", also known
-    /// as [`TenantShardId::unsharded`].
+    /// as `TenantShardId::unsharded`.
    ///
    /// This method returns the actual number of shards, i.e. if our internal value is
    /// zero, we return 1 (unsharded tenants have 1 shard).
@@ -116,9 +116,7 @@ impl ShardCount {
        self.0
    }

-    /// Whether the `ShardCount` is for an unsharded tenant, so uses one shard but
-    /// uses the legacy format for `TenantShardId`. See also the documentation for
-    /// [`Self::count`].
+    ///
    pub fn is_unsharded(&self) -> bool {
        self.0 == 0
    }
--- a/libs/postgres_ffi/src/xlog_utils.rs
+++ b/libs/postgres_ffi/src/xlog_utils.rs
@@ -331,10 +331,7 @@ impl CheckPoint {
    /// Returns 'true' if the XID was updated.
    pub fn update_next_xid(&mut self, xid: u32) -> bool {
        // nextXid should be greater than any XID in WAL, so increment provided XID and check for wraparround.
-        let mut new_xid = std::cmp::max(
-            xid.wrapping_add(1),
-            pg_constants::FIRST_NORMAL_TRANSACTION_ID,
-        );
+        let mut new_xid = std::cmp::max(xid.wrapping_add(1), pg_constants::FIRST_NORMAL_TRANSACTION_ID);
        // To reduce number of metadata checkpoints, we forward align XID on XID_CHECKPOINT_INTERVAL.
        // XID_CHECKPOINT_INTERVAL should not be larger than BLCKSZ*CLOG_XACTS_PER_BYTE
        new_xid =
@@ -370,16 +367,8 @@ pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Byte
    let seg_off = lsn.segment_offset(WAL_SEGMENT_SIZE);

    let first_page_only = seg_off < XLOG_BLCKSZ;
-    // If first records starts in the middle of the page, pretend in page header
-    // there is a fake record which ends where first real record starts. This
-    // makes pg_waldump etc happy.
-    let (shdr_rem_len, infoflags) = if first_page_only && seg_off > 0 {
-        assert!(seg_off >= XLOG_SIZE_OF_XLOG_LONG_PHD);
-        // xlp_rem_len doesn't include page header, hence the subtraction.
-        (
-            seg_off - XLOG_SIZE_OF_XLOG_LONG_PHD,
-            pg_constants::XLP_FIRST_IS_CONTRECORD,
-        )
+    let (shdr_rem_len, infoflags) = if first_page_only {
+        (seg_off, pg_constants::XLP_FIRST_IS_CONTRECORD)
    } else {
        (0, 0)
    };
@@ -408,22 +397,20 @@ pub fn generate_wal_segment(segno: u64, system_id: u64, lsn: Lsn) -> Result<Byte

    if !first_page_only {
        let block_offset = lsn.page_offset_in_segment(WAL_SEGMENT_SIZE) as usize;
-        // see comments above about XLP_FIRST_IS_CONTRECORD and xlp_rem_len.
-        let (xlp_rem_len, xlp_info) = if page_off > 0 {
-            assert!(page_off >= XLOG_SIZE_OF_XLOG_SHORT_PHD as u64);
-            (
-                (page_off - XLOG_SIZE_OF_XLOG_SHORT_PHD as u64) as u32,
-                pg_constants::XLP_FIRST_IS_CONTRECORD,
-            )
-        } else {
-            (0, 0)
-        };
        let header = XLogPageHeaderData {
            xlp_magic: XLOG_PAGE_MAGIC as u16,
-            xlp_info,
+            xlp_info: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 {
+                pg_constants::XLP_FIRST_IS_CONTRECORD
+            } else {
+                0
+            },
            xlp_tli: PG_TLI,
            xlp_pageaddr: lsn.page_lsn().0,
-            xlp_rem_len,
+            xlp_rem_len: if page_off >= pg_constants::SIZE_OF_PAGE_HEADER as u64 {
+                page_off as u32
+            } else {
+                0u32
+            },
            ..Default::default() // Put 0 in padding fields.
        };
        let hdr_bytes = header.encode()?;
--- a/libs/remote_storage/Cargo.toml
+++ b/libs/remote_storage/Cargo.toml
@@ -38,7 +38,6 @@ azure_storage_blobs.workspace = true
 futures-util.workspace = true
 http-types.workspace = true
 itertools.workspace = true
-sync_wrapper = { workspace = true, features = ["futures"] }

 [dev-dependencies]
 camino-tempfile.workspace = true
--- a/libs/remote_storage/src/azure_blob.rs
+++ b/libs/remote_storage/src/azure_blob.rs
@@ -3,7 +3,6 @@
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::env;
-use std::io;
 use std::num::NonZeroU32;
 use std::pin::Pin;
 use std::str::FromStr;
@@ -21,7 +20,6 @@ use azure_storage_blobs::blob::CopyStatus;
 use azure_storage_blobs::prelude::ClientBuilder;
 use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient};
 use bytes::Bytes;
-use futures::future::Either;
 use futures::stream::Stream;
 use futures_util::StreamExt;
 use futures_util::TryStreamExt;
@@ -130,12 +128,12 @@ impl AzureBlobStorage {
        let kind = RequestKind::Get;

        let _permit = self.permit(kind, cancel).await?;
-        let cancel_or_timeout = crate::support::cancel_or_timeout(self.timeout, cancel.clone());
-        let cancel_or_timeout_ = crate::support::cancel_or_timeout(self.timeout, cancel.clone());

        let mut etag = None;
        let mut last_modified = None;
        let mut metadata = HashMap::new();
+        // TODO give proper streaming response instead of buffering into RAM
+        // https://github.com/neondatabase/neon/issues/5563

        let download = async {
            let response = builder
@@ -154,46 +152,39 @@ impl AzureBlobStorage {
                Err(_elapsed) => Err(DownloadError::Timeout),
            });

-            let mut response = Box::pin(response);
+            let mut response = std::pin::pin!(response);

-            let Some(part) = response.next().await else {
+            let mut bufs = Vec::new();
+            while let Some(part) = response.next().await {
+                let part = part?;
+                if etag.is_none() {
+                    etag = Some(part.blob.properties.etag);
+                }
+                if last_modified.is_none() {
+                    last_modified = Some(part.blob.properties.last_modified.into());
+                }
+                if let Some(blob_meta) = part.blob.metadata {
+                    metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
+                }
+                let data = part
+                    .data
+                    .collect()
+                    .await
+                    .map_err(|e| DownloadError::Other(e.into()))?;
+                bufs.push(data);
+            }
+
+            if bufs.is_empty() {
                return Err(DownloadError::Other(anyhow::anyhow!(
-                    "Azure GET response contained no response body"
+                    "Azure GET response contained no buffers"
                )));
-            };
-            let part = part?;
-            if etag.is_none() {
-                etag = Some(part.blob.properties.etag);
            }
-            if last_modified.is_none() {
-                last_modified = Some(part.blob.properties.last_modified.into());
-            }
-            if let Some(blob_meta) = part.blob.metadata {
-                metadata.extend(blob_meta.iter().map(|(k, v)| (k.to_owned(), v.to_owned())));
-            }
-
            // unwrap safety: if these were None, bufs would be empty and we would have returned an error already
            let etag = etag.unwrap();
            let last_modified = last_modified.unwrap();

-            let tail_stream = response
-                .map(|part| match part {
-                    Ok(part) => Either::Left(part.data.map(|r| r.map_err(io::Error::other))),
-                    Err(e) => {
-                        Either::Right(futures::stream::once(async { Err(io::Error::other(e)) }))
-                    }
-                })
-                .flatten();
-            let stream = part
-                .data
-                .map(|r| r.map_err(io::Error::other))
-                .chain(sync_wrapper::SyncStream::new(tail_stream));
-            //.chain(SyncStream::from_pin(Box::pin(tail_stream)));
-
-            let download_stream = crate::support::DownloadStream::new(cancel_or_timeout_, stream);
-
            Ok(Download {
-                download_stream: Box::pin(download_stream),
+                download_stream: Box::pin(futures::stream::iter(bufs.into_iter().map(Ok))),
                etag,
                last_modified,
                metadata: Some(StorageMetadata(metadata)),
@@ -202,10 +193,7 @@ impl AzureBlobStorage {

        tokio::select! {
            bufs = download => bufs,
-            cancel_or_timeout = cancel_or_timeout => match cancel_or_timeout {
-                TimeoutOrCancel::Timeout => Err(DownloadError::Timeout),
-                TimeoutOrCancel::Cancel => Err(DownloadError::Cancelled),
-            },
+            _ = cancel.cancelled() => Err(DownloadError::Cancelled),
        }
    }

--- a/libs/remote_storage/src/lib.rs
+++ b/libs/remote_storage/src/lib.rs
@@ -55,11 +55,11 @@ pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel};
 /// ~3500 PUT/COPY/POST/DELETE or 5500 GET/HEAD S3 requests
 /// <https://aws.amazon.com/premiumsupport/knowledge-center/s3-request-limit-avoid-throttling/>
 pub const DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT: usize = 100;
-/// Set this limit analogously to the S3 limit
+/// We set this a little bit low as we currently buffer the entire file into RAM
 ///
 /// Here, a limit of max 20k concurrent connections was noted.
 /// <https://learn.microsoft.com/en-us/answers/questions/1301863/is-there-any-limitation-to-concurrent-connections>
-pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 100;
+pub const DEFAULT_REMOTE_STORAGE_AZURE_CONCURRENCY_LIMIT: usize = 30;
 /// No limits on the client side, which currenltly means 1000 for AWS S3.
 /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_RequestSyntax>
 pub const DEFAULT_MAX_KEYS_PER_LIST_RESPONSE: Option<i32> = None;
--- a/libs/walproposer/src/api_bindings.rs
+++ b/libs/walproposer/src/api_bindings.rs
@@ -50,14 +50,6 @@ extern "C" fn get_flush_rec_ptr(wp: *mut WalProposer) -> XLogRecPtr {
    }
 }

-extern "C" fn update_donor(wp: *mut WalProposer, donor: *mut Safekeeper, donor_lsn: XLogRecPtr) {
-    unsafe {
-        let callback_data = (*(*wp).config).callback_data;
-        let api = callback_data as *mut Box<dyn ApiImpl>;
-        (*api).update_donor(&mut (*donor), donor_lsn)
-    }
-}
-
 extern "C" fn get_current_timestamp(wp: *mut WalProposer) -> TimestampTz {
    unsafe {
        let callback_data = (*(*wp).config).callback_data;
@@ -399,7 +391,6 @@ pub(crate) fn create_api() -> walproposer_api {
        get_shmem_state: Some(get_shmem_state),
        start_streaming: Some(start_streaming),
        get_flush_rec_ptr: Some(get_flush_rec_ptr),
-        update_donor: Some(update_donor),
        get_current_timestamp: Some(get_current_timestamp),
        conn_error_message: Some(conn_error_message),
        conn_status: Some(conn_status),
@@ -430,32 +421,6 @@ pub(crate) fn create_api() -> walproposer_api {
    }
 }

-pub fn empty_shmem() -> crate::bindings::WalproposerShmemState {
-    let empty_feedback = crate::bindings::PageserverFeedback {
-        present: false,
-        currentClusterSize: 0,
-        last_received_lsn: 0,
-        disk_consistent_lsn: 0,
-        remote_consistent_lsn: 0,
-        replytime: 0,
-        shard_number: 0,
-    };
-
-    crate::bindings::WalproposerShmemState {
-        propEpochStartLsn: crate::bindings::pg_atomic_uint64 { value: 0 },
-        donor_name: [0; 64],
-        donor_conninfo: [0; 1024],
-        donor_lsn: 0,
-        mutex: 0,
-        mineLastElectedTerm: crate::bindings::pg_atomic_uint64 { value: 0 },
-        backpressureThrottlingTime: crate::bindings::pg_atomic_uint64 { value: 0 },
-        currentClusterSize: crate::bindings::pg_atomic_uint64 { value: 0 },
-        shard_ps_feedback: [empty_feedback; 128],
-        num_shards: 0,
-        min_ps_feedback: empty_feedback,
-    }
-}
-
 impl std::fmt::Display for Level {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        write!(f, "{:?}", self)
--- a/libs/walproposer/src/walproposer.rs
+++ b/libs/walproposer/src/walproposer.rs
@@ -1,5 +1,8 @@
 use std::ffi::CString;

+use postgres_ffi::WAL_SEGMENT_SIZE;
+use utils::{id::TenantTimelineId, lsn::Lsn};
+
 use crate::{
    api_bindings::{create_api, take_vec_u8, Level},
    bindings::{
@@ -7,8 +10,6 @@ use crate::{
        WalProposerCreate, WalProposerFree, WalProposerPoll, WalProposerStart,
    },
 };
-use postgres_ffi::WAL_SEGMENT_SIZE;
-use utils::{id::TenantTimelineId, lsn::Lsn};

 /// Rust high-level wrapper for C walproposer API. Many methods are not required
 /// for simple cases, hence todo!() in default implementations.
@@ -27,10 +28,6 @@ pub trait ApiImpl {
        todo!()
    }

-    fn update_donor(&self, _donor: &mut Safekeeper, _donor_lsn: u64) {
-        todo!()
-    }
-
    fn get_current_timestamp(&self) -> i64 {
        todo!()
    }
@@ -277,7 +274,6 @@ mod tests {
        sync::{atomic::AtomicUsize, mpsc::sync_channel},
    };

-    use std::cell::UnsafeCell;
    use utils::id::TenantTimelineId;

    use crate::{api_bindings::Level, bindings::NeonWALReadResult, walproposer::Wrapper};
@@ -301,8 +297,6 @@ mod tests {
        replies_ptr: AtomicUsize,
        // channel to send LSN to the main thread
        sync_channel: std::sync::mpsc::SyncSender<u64>,
-        // Shmem state, used for storing donor info
-        shmem: UnsafeCell<crate::bindings::WalproposerShmemState>,
    }

    impl MockImpl {
@@ -333,22 +327,11 @@ mod tests {
    }

    impl ApiImpl for MockImpl {
-        fn get_shmem_state(&self) -> *mut crate::bindings::WalproposerShmemState {
-            self.shmem.get()
-        }
-
        fn get_current_timestamp(&self) -> i64 {
            println!("get_current_timestamp");
            0
        }

-        fn update_donor(&self, donor: &mut crate::bindings::Safekeeper, donor_lsn: u64) {
-            let mut shmem = unsafe { *self.get_shmem_state() };
-            shmem.propEpochStartLsn.value = donor_lsn;
-            shmem.donor_conninfo = donor.conninfo;
-            shmem.donor_lsn = donor_lsn;
-        }
-
        fn conn_status(
            &self,
            _: &mut crate::bindings::Safekeeper,
@@ -524,7 +507,6 @@ mod tests {
            ],
            replies_ptr: AtomicUsize::new(0),
            sync_channel: sender,
-            shmem: UnsafeCell::new(crate::api_bindings::empty_shmem()),
        });
        let config = crate::walproposer::Config {
            ttid,
--- a/pageserver/client/src/mgmt_api.rs
+++ b/pageserver/client/src/mgmt_api.rs
@@ -284,34 +284,6 @@ impl Client {
        Ok((status, progress))
    }

-    pub async fn tenant_secondary_status(
-        &self,
-        tenant_shard_id: TenantShardId,
-    ) -> Result<SecondaryProgress> {
-        let path = reqwest::Url::parse(&format!(
-            "{}/v1/tenant/{}/secondary/status",
-            self.mgmt_api_endpoint, tenant_shard_id
-        ))
-        .expect("Cannot build URL");
-
-        self.request(Method::GET, path, ())
-            .await?
-            .json()
-            .await
-            .map_err(Error::ReceiveBody)
-    }
-
-    pub async fn tenant_heatmap_upload(&self, tenant_id: TenantShardId) -> Result<()> {
-        let path = reqwest::Url::parse(&format!(
-            "{}/v1/tenant/{}/heatmap_upload",
-            self.mgmt_api_endpoint, tenant_id
-        ))
-        .expect("Cannot build URL");
-
-        self.request(Method::POST, path, ()).await?;
-        Ok(())
-    }
-
    pub async fn location_config(
        &self,
        tenant_shard_id: TenantShardId,
@@ -319,7 +291,10 @@ impl Client {
        flush_ms: Option<std::time::Duration>,
        lazy: bool,
    ) -> Result<()> {
-        let req_body = TenantLocationConfigRequest { config };
+        let req_body = TenantLocationConfigRequest {
+            tenant_id: None,
+            config,
+        };

        let mut path = reqwest::Url::parse(&format!(
            "{}/v1/tenant/{}/location_config",
--- a/pageserver/src/aux_file.rs
+++ b/pageserver/src/aux_file.rs
@@ -1,4 +1,3 @@
-use bytes::{Buf, BufMut, Bytes};
 use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE};
 use tracing::warn;

@@ -62,84 +61,6 @@ pub fn encode_aux_file_key(path: &str) -> Key {
    }
 }

-const AUX_FILE_ENCODING_VERSION: u8 = 0x01;
-
-pub fn decode_file_value(val: &[u8]) -> anyhow::Result<Vec<(&str, &[u8])>> {
-    let mut ptr = val;
-    if ptr.is_empty() {
-        // empty value = no files
-        return Ok(Vec::new());
-    }
-    assert_eq!(
-        ptr.get_u8(),
-        AUX_FILE_ENCODING_VERSION,
-        "unsupported aux file value"
-    );
-    let mut files = vec![];
-    while ptr.has_remaining() {
-        let key_len = ptr.get_u32() as usize;
-        let key = &ptr[..key_len];
-        ptr.advance(key_len);
-        let val_len = ptr.get_u32() as usize;
-        let content = &ptr[..val_len];
-        ptr.advance(val_len);
-
-        let path = std::str::from_utf8(key)?;
-        files.push((path, content));
-    }
-    Ok(files)
-}
-
-/// Decode an aux file key-value pair into a list of files. The returned `Bytes` contains reference
-/// to the original value slice. Be cautious about memory consumption.
-pub fn decode_file_value_bytes(val: &Bytes) -> anyhow::Result<Vec<(String, Bytes)>> {
-    let mut ptr = val.clone();
-    if ptr.is_empty() {
-        // empty value = no files
-        return Ok(Vec::new());
-    }
-    assert_eq!(
-        ptr.get_u8(),
-        AUX_FILE_ENCODING_VERSION,
-        "unsupported aux file value"
-    );
-    let mut files = vec![];
-    while ptr.has_remaining() {
-        let key_len = ptr.get_u32() as usize;
-        let key = ptr.slice(..key_len);
-        ptr.advance(key_len);
-        let val_len = ptr.get_u32() as usize;
-        let content = ptr.slice(..val_len);
-        ptr.advance(val_len);
-
-        let path = std::str::from_utf8(&key)?.to_string();
-        files.push((path, content));
-    }
-    Ok(files)
-}
-
-pub fn encode_file_value(files: &[(&str, &[u8])]) -> anyhow::Result<Vec<u8>> {
-    if files.is_empty() {
-        // no files = empty value
-        return Ok(Vec::new());
-    }
-    let mut encoded = vec![];
-    encoded.put_u8(AUX_FILE_ENCODING_VERSION);
-    for (path, content) in files {
-        if path.len() > u32::MAX as usize {
-            anyhow::bail!("{} exceeds path size limit", path);
-        }
-        encoded.put_u32(path.len() as u32);
-        encoded.put_slice(path.as_bytes());
-        if content.len() > u32::MAX as usize {
-            anyhow::bail!("{} exceeds content size limit", path);
-        }
-        encoded.put_u32(content.len() as u32);
-        encoded.put_slice(content);
-    }
-    Ok(encoded)
-}
-
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -188,21 +109,4 @@ mod tests {
            encode_aux_file_key("other_file_not_supported").to_string()
        );
    }
-
-    #[test]
-    fn test_value_encoding() {
-        let files = vec![
-            ("pg_logical/1.file", "1111".as_bytes()),
-            ("pg_logical/2.file", "2222".as_bytes()),
-        ];
-        assert_eq!(
-            files,
-            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
-        );
-        let files = vec![];
-        assert_eq!(
-            files,
-            decode_file_value(&encode_file_value(&files).unwrap()).unwrap()
-        );
-    }
 }
--- a/pageserver/src/bin/pageserver.rs
+++ b/pageserver/src/bin/pageserver.rs
@@ -3,7 +3,6 @@
 //! Main entry point for the Page Server executable.

 use std::env::{var, VarError};
-use std::io::Read;
 use std::sync::Arc;
 use std::time::Duration;
 use std::{env, ops::ControlFlow, str::FromStr};
@@ -152,34 +151,37 @@ fn initialize_config(
    workdir: &Utf8Path,
 ) -> anyhow::Result<ControlFlow<(), &'static PageServerConf>> {
    let init = arg_matches.get_flag("init");
+    let update_config = init || arg_matches.get_flag("update-config");

-    let file_contents: Option<toml_edit::Document> = match std::fs::File::open(cfg_file_path) {
-        Ok(mut f) => {
-            if init {
-                anyhow::bail!("config file already exists: {cfg_file_path}");
-            }
-            let md = f.metadata().context("stat config file")?;
-            if md.is_file() {
-                let mut s = String::new();
-                f.read_to_string(&mut s).context("read config file")?;
-                Some(s.parse().context("parse config file toml")?)
-            } else {
-                anyhow::bail!("directory entry exists but is not a file: {cfg_file_path}");
-            }
-        }
-        Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
-        Err(e) => {
-            anyhow::bail!("open pageserver config: {e}: {cfg_file_path}");
+    let (mut toml, config_file_exists) = if cfg_file_path.is_file() {
+        if init {
+            anyhow::bail!(
+                "Config file '{cfg_file_path}' already exists, cannot init it, use --update-config to update it",
+            );
        }
+        // Supplement the CLI arguments with the config file
+        let cfg_file_contents = std::fs::read_to_string(cfg_file_path)
+            .with_context(|| format!("Failed to read pageserver config at '{cfg_file_path}'"))?;
+        (
+            cfg_file_contents
+                .parse::<toml_edit::Document>()
+                .with_context(|| {
+                    format!("Failed to parse '{cfg_file_path}' as pageserver config")
+                })?,
+            true,
+        )
+    } else if cfg_file_path.exists() {
+        anyhow::bail!("Config file '{cfg_file_path}' exists but is not a regular file");
+    } else {
+        // We're initializing the tenant, so there's no config file yet
+        (
+            DEFAULT_CONFIG_FILE
+                .parse::<toml_edit::Document>()
+                .context("could not parse built-in config file")?,
+            false,
+        )
    };

-    let mut effective_config = file_contents.unwrap_or_else(|| {
-        DEFAULT_CONFIG_FILE
-            .parse()
-            .expect("unit tests ensure this works")
-    });
-
-    // Patch with overrides from the command line
    if let Some(values) = arg_matches.get_many::<String>("config-override") {
        for option_line in values {
            let doc = toml_edit::Document::from_str(option_line).with_context(|| {
@@ -187,21 +189,22 @@ fn initialize_config(
            })?;

            for (key, item) in doc.iter() {
-                effective_config.insert(key, item.clone());
+                if config_file_exists && update_config && key == "id" && toml.contains_key(key) {
+                    anyhow::bail!("Pageserver config file exists at '{cfg_file_path}' and has node id already, it cannot be overridden");
+                }
+                toml.insert(key, item.clone());
            }
        }
    }

-    debug!("Resulting toml: {effective_config}");
-
-    // Construct the runtime representation
-    let conf = PageServerConf::parse_and_validate(&effective_config, workdir)
+    debug!("Resulting toml: {toml}");
+    let conf = PageServerConf::parse_and_validate(&toml, workdir)
        .context("Failed to parse pageserver configuration")?;

-    if init {
+    if update_config {
        info!("Writing pageserver config to '{cfg_file_path}'");

-        std::fs::write(cfg_file_path, effective_config.to_string())
+        std::fs::write(cfg_file_path, toml.to_string())
            .with_context(|| format!("Failed to write pageserver config to '{cfg_file_path}'"))?;
        info!("Config successfully written to '{cfg_file_path}'")
    }
@@ -755,13 +758,18 @@ fn cli() -> Command {
        // See `settings.md` for more details on the extra configuration patameters pageserver can process
        .arg(
            Arg::new("config-override")
-                .long("config-override")
                .short('c')
                .num_args(1)
                .action(ArgAction::Append)
                .help("Additional configuration overrides of the ones from the toml config file (or new ones to add there). \
                Any option has to be a valid toml document, example: `-c=\"foo='hey'\"` `-c=\"foo={value=1}\"`"),
        )
+        .arg(
+            Arg::new("update-config")
+                .long("update-config")
+                .action(ArgAction::SetTrue)
+                .help("Update the config file when started"),
+        )
        .arg(
            Arg::new("enabled-features")
                .long("enabled-features")
--- a/pageserver/src/config.rs
+++ b/pageserver/src/config.rs
@@ -9,7 +9,7 @@ use pageserver_api::shard::TenantShardId;
 use remote_storage::{RemotePath, RemoteStorageConfig};
 use serde;
 use serde::de::IntoDeserializer;
-use std::env;
+use std::{collections::HashMap, env};
 use storage_broker::Uri;
 use utils::crashsafe::path_with_suffix_extension;
 use utils::id::ConnectionId;
@@ -51,7 +51,7 @@ pub mod defaults {
    use crate::tenant::config::defaults::*;
    use const_format::formatcp;

-    pub use pageserver_api::config::{
+    pub use pageserver_api::{
        DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_HTTP_LISTEN_PORT, DEFAULT_PG_LISTEN_ADDR,
        DEFAULT_PG_LISTEN_PORT,
    };
@@ -335,6 +335,26 @@ impl<T: Clone> BuilderValue<T> {
    }
 }

+// Certain metadata (e.g. externally-addressable name, AZ) is delivered
+// as a separate structure.  This information is not neeed by the pageserver
+// itself, it is only used for registering the pageserver with the control
+// plane and/or storage controller.
+//
+#[derive(serde::Deserialize)]
+pub(crate) struct NodeMetadata {
+    #[serde(rename = "host")]
+    pub(crate) postgres_host: String,
+    #[serde(rename = "port")]
+    pub(crate) postgres_port: u16,
+    pub(crate) http_host: String,
+    pub(crate) http_port: u16,
+
+    // Deployment tools may write fields to the metadata file beyond what we
+    // use in this type: this type intentionally only names fields that require.
+    #[serde(flatten)]
+    pub(crate) other: HashMap<String, serde_json::Value>,
+}
+
 // needed to simplify config construction
 #[derive(Default)]
 struct PageServerConfigBuilder {
--- a/pageserver/src/control_plane_client.rs
+++ b/pageserver/src/control_plane_client.rs
@@ -14,8 +14,10 @@ use tokio_util::sync::CancellationToken;
 use url::Url;
 use utils::{backoff, failpoint_support, generation::Generation, id::NodeId};

-use crate::{config::PageServerConf, virtual_file::on_fatal_io_error};
-use pageserver_api::config::NodeMetadata;
+use crate::{
+    config::{NodeMetadata, PageServerConf},
+    virtual_file::on_fatal_io_error,
+};

 /// The Pageserver's client for using the control plane API: this is a small subset
 /// of the overall control plane API, for dealing with generations (see docs/rfcs/025-generation-numbers.md)
@@ -63,7 +65,7 @@ impl ControlPlaneClient {
        let mut client = reqwest::ClientBuilder::new();

        if let Some(jwt) = &conf.control_plane_api_token {
-            let mut headers = reqwest::header::HeaderMap::new();
+            let mut headers = hyper::HeaderMap::new();
            headers.insert(
                "Authorization",
                format!("Bearer {}", jwt.get_contents()).parse().unwrap(),
--- a/pageserver/src/disk_usage_eviction_task.rs
+++ b/pageserver/src/disk_usage_eviction_task.rs
@@ -540,12 +540,7 @@ pub(crate) async fn disk_usage_eviction_task_iteration_impl<U: Usage>(
                    js.spawn(async move {
                        layer
                            .secondary_tenant
-                            .evict_layer(
-                                tenant_manager.get_conf(),
-                                layer.timeline_id,
-                                layer.name,
-                                layer.metadata,
-                            )
+                            .evict_layer(tenant_manager.get_conf(), layer.timeline_id, layer.name)
                            .await;
                        Ok(file_size)
                    });
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -782,6 +782,9 @@ components:
      required:
        - mode
      properties:
+        tenant_id:
+          type: string
+          description: Not used, scheduled for removal.
        mode:
          type: string
          enum: ["AttachedSingle", "AttachedMulti", "AttachedStale", "Secondary", "Detached"]
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -63,7 +63,6 @@ use crate::tenant::remote_timeline_client::list_remote_timelines;
 use crate::tenant::secondary::SecondaryController;
 use crate::tenant::size::ModelInputs;
 use crate::tenant::storage_layer::LayerAccessStatsReset;
-use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::timeline::CompactFlags;
 use crate::tenant::timeline::Timeline;
 use crate::tenant::SpawnMode;
@@ -1229,15 +1228,13 @@ async fn layer_download_handler(
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let layer_file_name = get_request_param(&request, "layer_file_name")?;
    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let layer_name = LayerFileName::from_str(layer_file_name)
-        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
    let state = get_state(&request);

    let timeline =
        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
            .await?;
    let downloaded = timeline
-        .download_layer(&layer_name)
+        .download_layer(layer_file_name)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -1261,14 +1258,11 @@ async fn evict_timeline_layer_handler(
    let layer_file_name = get_request_param(&request, "layer_file_name")?;
    let state = get_state(&request);

-    let layer_name = LayerFileName::from_str(layer_file_name)
-        .map_err(|s| ApiError::BadRequest(anyhow::anyhow!(s)))?;
-
    let timeline =
        active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id)
            .await?;
    let evicted = timeline
-        .evict_layer(&layer_name)
+        .evict_layer(layer_file_name)
        .await
        .map_err(ApiError::InternalServerError)?;

@@ -1833,75 +1827,6 @@ async fn timeline_download_remote_layers_handler_get(
    json_response(StatusCode::OK, info)
 }

-async fn timeline_detach_ancestor_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    use crate::tenant::timeline::detach_ancestor::Options;
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-    check_permission(&request, Some(tenant_shard_id.tenant_id))?;
-    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
-
-    let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id);
-
-    async move {
-        let mut options = Options::default();
-
-        let rewrite_concurrency =
-            parse_query_param::<_, std::num::NonZeroUsize>(&request, "rewrite_concurrency")?;
-        let copy_concurrency =
-            parse_query_param::<_, std::num::NonZeroUsize>(&request, "copy_concurrency")?;
-
-        [
-            (&mut options.rewrite_concurrency, rewrite_concurrency),
-            (&mut options.copy_concurrency, copy_concurrency),
-        ]
-        .into_iter()
-        .filter_map(|(target, val)| val.map(|val| (target, val)))
-        .for_each(|(target, val)| *target = val);
-
-        let state = get_state(&request);
-
-        let tenant = state
-            .tenant_manager
-            .get_attached_tenant_shard(tenant_shard_id)?;
-
-        tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?;
-
-        let ctx = RequestContext::new(TaskKind::DetachAncestor, DownloadBehavior::Download);
-        let ctx = &ctx;
-
-        let timeline = tenant
-            .get_timeline(timeline_id, true)
-            .map_err(|e| ApiError::NotFound(e.into()))?;
-
-        let (_guard, prepared) = timeline
-            .prepare_to_detach_from_ancestor(&tenant, options, ctx)
-            .await
-            .map_err(|e| ApiError::InternalServerError(e.into()))?;
-
-        let res = state
-            .tenant_manager
-            .complete_detaching_timeline_ancestor(tenant_shard_id, timeline_id, prepared, ctx)
-            .await;
-
-        match res {
-            Ok(reparented_timelines) => {
-                let resp = pageserver_api::models::detach_ancestor::AncestorDetached {
-                    reparented_timelines,
-                };
-
-                json_response(StatusCode::OK, resp)
-            }
-            Err(e) => Err(ApiError::InternalServerError(
-                e.context("timeline detach completion"),
-            )),
-        }
-    }
-    .instrument(span)
-    .await
-}
-
 async fn deletion_queue_flush(
    r: Request<Body>,
    cancel: CancellationToken,
@@ -2235,27 +2160,6 @@ async fn secondary_download_handler(
    json_response(status, progress)
 }

-async fn secondary_status_handler(
-    request: Request<Body>,
-    _cancel: CancellationToken,
-) -> Result<Response<Body>, ApiError> {
-    let state = get_state(&request);
-    let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
-
-    let Some(secondary_tenant) = state
-        .tenant_manager
-        .get_secondary_tenant_shard(tenant_shard_id)
-    else {
-        return Err(ApiError::NotFound(
-            anyhow::anyhow!("Shard {} not found", tenant_shard_id).into(),
-        ));
-    };
-
-    let progress = secondary_tenant.progress.lock().unwrap().clone();
-
-    json_response(StatusCode::OK, progress)
-}
-
 async fn handler_404(_: Request<Body>) -> Result<Response<Body>, ApiError> {
    json_response(
        StatusCode::NOT_FOUND,
@@ -2590,10 +2494,6 @@ pub fn make_router(
            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_remote_layers",
            |r| api_handler(r, timeline_download_remote_layers_handler_get),
        )
-        .put(
-            "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/detach_ancestor",
-            |r| api_handler(r, timeline_detach_ancestor_handler),
-        )
        .delete("/v1/tenant/:tenant_shard_id/timeline/:timeline_id", |r| {
            api_handler(r, timeline_delete_handler)
        })
@@ -2621,9 +2521,6 @@ pub fn make_router(
        .put("/v1/deletion_queue/flush", |r| {
            api_handler(r, deletion_queue_flush)
        })
-        .get("/v1/tenant/:tenant_shard_id/secondary/status", |r| {
-            api_handler(r, secondary_status_handler)
-        })
        .post("/v1/tenant/:tenant_shard_id/secondary/download", |r| {
            api_handler(r, secondary_download_handler)
        })
--- a/pageserver/src/metrics.rs
+++ b/pageserver/src/metrics.rs
@@ -51,8 +51,8 @@ pub(crate) enum StorageTimeOperation {
    #[strum(serialize = "gc")]
    Gc,

-    #[strum(serialize = "find gc cutoffs")]
-    FindGcCutoffs,
+    #[strum(serialize = "update gc info")]
+    UpdateGcInfo,

    #[strum(serialize = "create tenant")]
    CreateTenant,
@@ -194,11 +194,6 @@ pub(crate) struct GetVectoredLatency {
    map: EnumMap<TaskKind, Option<Histogram>>,
 }

-#[allow(dead_code)]
-pub(crate) struct ScanLatency {
-    map: EnumMap<TaskKind, Option<Histogram>>,
-}
-
 impl GetVectoredLatency {
    // Only these task types perform vectored gets. Filter all other tasks out to reduce total
    // cardinality of the metric.
@@ -209,48 +204,6 @@ impl GetVectoredLatency {
    }
 }

-impl ScanLatency {
-    // Only these task types perform vectored gets. Filter all other tasks out to reduce total
-    // cardinality of the metric.
-    const TRACKED_TASK_KINDS: [TaskKind; 1] = [TaskKind::PageRequestHandler];
-
-    pub(crate) fn for_task_kind(&self, task_kind: TaskKind) -> Option<&Histogram> {
-        self.map[task_kind].as_ref()
-    }
-}
-
-pub(crate) struct ScanLatencyOngoingRecording<'a> {
-    parent: &'a Histogram,
-    start: std::time::Instant,
-}
-
-impl<'a> ScanLatencyOngoingRecording<'a> {
-    pub(crate) fn start_recording(parent: &'a Histogram) -> ScanLatencyOngoingRecording<'a> {
-        let start = Instant::now();
-        ScanLatencyOngoingRecording { parent, start }
-    }
-
-    pub(crate) fn observe(self, throttled: Option<Duration>) {
-        let elapsed = self.start.elapsed();
-        let ex_throttled = if let Some(throttled) = throttled {
-            elapsed.checked_sub(throttled)
-        } else {
-            Some(elapsed)
-        };
-        if let Some(ex_throttled) = ex_throttled {
-            self.parent.observe(ex_throttled.as_secs_f64());
-        } else {
-            use utils::rate_limit::RateLimit;
-            static LOGGED: Lazy<Mutex<RateLimit>> =
-                Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10))));
-            let mut rate_limit = LOGGED.lock().unwrap();
-            rate_limit.call(|| {
-                warn!("error deducting time spent throttled; this message is logged at a global rate limit");
-            });
-        }
-    }
-}
-
 pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(|| {
    let inner = register_histogram_vec!(
        "pageserver_get_vectored_seconds",
@@ -274,29 +227,6 @@ pub(crate) static GET_VECTORED_LATENCY: Lazy<GetVectoredLatency> = Lazy::new(||
    }
 });

-pub(crate) static SCAN_LATENCY: Lazy<ScanLatency> = Lazy::new(|| {
-    let inner = register_histogram_vec!(
-        "pageserver_scan_seconds",
-        "Time spent in scan, excluding time spent in timeline_get_throttle.",
-        &["task_kind"],
-        CRITICAL_OP_BUCKETS.into(),
-    )
-    .expect("failed to define a metric");
-
-    ScanLatency {
-        map: EnumMap::from_array(std::array::from_fn(|task_kind_idx| {
-            let task_kind = <TaskKind as enum_map::Enum>::from_usize(task_kind_idx);
-
-            if ScanLatency::TRACKED_TASK_KINDS.contains(&task_kind) {
-                let task_kind = task_kind.into();
-                Some(inner.with_label_values(&[task_kind]))
-            } else {
-                None
-            }
-        })),
-    }
-});
-
 pub(crate) struct PageCacheMetricsForTaskKind {
    pub read_accesses_materialized_page: IntCounter,
    pub read_accesses_immutable: IntCounter,
@@ -1512,80 +1442,29 @@ static REMOTE_TIMELINE_CLIENT_BYTES_FINISHED_COUNTER: Lazy<IntCounterVec> = Lazy
 });

 pub(crate) struct TenantManagerMetrics {
-    tenant_slots_attached: UIntGauge,
-    tenant_slots_secondary: UIntGauge,
-    tenant_slots_inprogress: UIntGauge,
+    pub(crate) tenant_slots: UIntGauge,
    pub(crate) tenant_slot_writes: IntCounter,
    pub(crate) unexpected_errors: IntCounter,
 }

-impl TenantManagerMetrics {
-    /// Helpers for tracking slots.  Note that these do not track the lifetime of TenantSlot objects
-    /// exactly: they track the lifetime of the slots _in the tenant map_.
-    pub(crate) fn slot_inserted(&self, slot: &TenantSlot) {
-        match slot {
-            TenantSlot::Attached(_) => {
-                self.tenant_slots_attached.inc();
-            }
-            TenantSlot::Secondary(_) => {
-                self.tenant_slots_secondary.inc();
-            }
-            TenantSlot::InProgress(_) => {
-                self.tenant_slots_inprogress.inc();
-            }
-        }
-    }
-
-    pub(crate) fn slot_removed(&self, slot: &TenantSlot) {
-        match slot {
-            TenantSlot::Attached(_) => {
-                self.tenant_slots_attached.dec();
-            }
-            TenantSlot::Secondary(_) => {
-                self.tenant_slots_secondary.dec();
-            }
-            TenantSlot::InProgress(_) => {
-                self.tenant_slots_inprogress.dec();
-            }
-        }
-    }
-
-    #[cfg(all(debug_assertions, not(test)))]
-    pub(crate) fn slots_total(&self) -> u64 {
-        self.tenant_slots_attached.get()
-            + self.tenant_slots_secondary.get()
-            + self.tenant_slots_inprogress.get()
-    }
-}
-
 pub(crate) static TENANT_MANAGER: Lazy<TenantManagerMetrics> = Lazy::new(|| {
-    let tenant_slots = register_uint_gauge_vec!(
+    TenantManagerMetrics {
+    tenant_slots: register_uint_gauge!(
        "pageserver_tenant_manager_slots",
        "How many slots currently exist, including all attached, secondary and in-progress operations",
-        &["mode"]
    )
-    .expect("failed to define a metric");
-    TenantManagerMetrics {
-        tenant_slots_attached: tenant_slots
-            .get_metric_with_label_values(&["attached"])
-            .unwrap(),
-        tenant_slots_secondary: tenant_slots
-            .get_metric_with_label_values(&["secondary"])
-            .unwrap(),
-        tenant_slots_inprogress: tenant_slots
-            .get_metric_with_label_values(&["inprogress"])
-            .unwrap(),
-        tenant_slot_writes: register_int_counter!(
-            "pageserver_tenant_manager_slot_writes",
-            "Writes to a tenant slot, including all of create/attach/detach/delete"
-        )
-        .expect("failed to define a metric"),
-        unexpected_errors: register_int_counter!(
-            "pageserver_tenant_manager_unexpected_errors_total",
-            "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
-        )
-        .expect("failed to define a metric"),
-    }
+    .expect("failed to define a metric"),
+    tenant_slot_writes: register_int_counter!(
+        "pageserver_tenant_manager_slot_writes",
+        "Writes to a tenant slot, including all of create/attach/detach/delete"
+    )
+    .expect("failed to define a metric"),
+    unexpected_errors: register_int_counter!(
+        "pageserver_tenant_manager_unexpected_errors_total",
+        "Number of unexpected conditions encountered: nonzero value indicates a non-fatal bug."
+    )
+    .expect("failed to define a metric"),
+}
 });

 pub(crate) struct DeletionQueueMetrics {
@@ -2110,7 +1989,7 @@ pub(crate) struct TimelineMetrics {
    pub imitate_logical_size_histo: StorageTimeMetrics,
    pub load_layer_map_histo: StorageTimeMetrics,
    pub garbage_collect_histo: StorageTimeMetrics,
-    pub find_gc_cutoffs_histo: StorageTimeMetrics,
+    pub update_gc_info_histo: StorageTimeMetrics,
    pub last_record_gauge: IntGauge,
    resident_physical_size_gauge: UIntGauge,
    /// copy of LayeredTimeline.current_logical_size
@@ -2171,8 +2050,8 @@ impl TimelineMetrics {
            &shard_id,
            &timeline_id,
        );
-        let find_gc_cutoffs_histo = StorageTimeMetrics::new(
-            StorageTimeOperation::FindGcCutoffs,
+        let update_gc_info_histo = StorageTimeMetrics::new(
+            StorageTimeOperation::UpdateGcInfo,
            &tenant_id,
            &shard_id,
            &timeline_id,
@@ -2219,7 +2098,7 @@ impl TimelineMetrics {
            logical_size_histo,
            imitate_logical_size_histo,
            garbage_collect_histo,
-            find_gc_cutoffs_histo,
+            update_gc_info_histo,
            load_layer_map_histo,
            last_record_gauge,
            resident_physical_size_gauge,
@@ -2326,7 +2205,6 @@ use std::time::{Duration, Instant};

 use crate::context::{PageContentKind, RequestContext};
 use crate::task_mgr::TaskKind;
-use crate::tenant::mgr::TenantSlot;

 /// Maintain a per timeline gauge in addition to the global gauge.
 struct PerTimelineRemotePhysicalSizeGauge {
@@ -2929,8 +2807,6 @@ pub fn preinitialize_metrics() {
        &WALRECEIVER_CANDIDATES_REMOVED,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_FAILURES,
        &tokio_epoll_uring::THREAD_LOCAL_LAUNCH_SUCCESSES,
-        &REMOTE_ONDEMAND_DOWNLOADED_LAYERS,
-        &REMOTE_ONDEMAND_DOWNLOADED_BYTES,
    ]
    .into_iter()
    .for_each(|c| {
--- a/pageserver/src/pgdatadir_mapping.rs
+++ b/pageserver/src/pgdatadir_mapping.rs
@@ -10,9 +10,9 @@ use super::tenant::{PageReconstructError, Timeline};
 use crate::context::RequestContext;
 use crate::keyspace::{KeySpace, KeySpaceAccum};
 use crate::metrics::WAL_INGEST;
+use crate::repository::*;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id;
 use crate::walrecord::NeonWalRecord;
-use crate::{aux_file, repository::*};
 use anyhow::{ensure, Context};
 use bytes::{Buf, Bytes, BytesMut};
 use enum_map::Enum;
@@ -24,7 +24,6 @@ use pageserver_api::key::{
    AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY,
 };
 use pageserver_api::keyspace::SparseKeySpace;
-use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind};
 use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM};
 use postgres_ffi::BLCKSZ;
@@ -280,7 +279,7 @@ impl Timeline {

        match RelDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
-                let exists = dir.rels.contains(&(tag.relnode, tag.forknum));
+                let exists = dir.rels.get(&(tag.relnode, tag.forknum)).is_some();
                Ok(exists)
            }
            Err(e) => Err(PageReconstructError::from(e)),
@@ -380,7 +379,7 @@ impl Timeline {

        match SlruSegmentDirectory::des(&buf).context("deserialization failure") {
            Ok(dir) => {
-                let exists = dir.segments.contains(&segno);
+                let exists = dir.segments.get(&segno).is_some();
                Ok(exists)
            }
            Err(e) => Err(PageReconstructError::from(e)),
@@ -671,7 +670,7 @@ impl Timeline {
        self.get(CHECKPOINT_KEY, lsn, ctx).await
    }

-    async fn list_aux_files_v1(
+    pub(crate) async fn list_aux_files(
        &self,
        lsn: Lsn,
        ctx: &RequestContext,
@@ -689,63 +688,6 @@ impl Timeline {
        }
    }

-    async fn list_aux_files_v2(
-        &self,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {
-        let kv = self
-            .scan(KeySpace::single(Key::metadata_aux_key_range()), lsn, ctx)
-            .await
-            .context("scan")?;
-        let mut result = HashMap::new();
-        for (_, v) in kv {
-            let v = v.context("get value")?;
-            let v = aux_file::decode_file_value_bytes(&v).context("value decode")?;
-            for (fname, content) in v {
-                result.insert(fname, content);
-            }
-        }
-        Ok(result)
-    }
-
-    pub(crate) async fn list_aux_files(
-        &self,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<HashMap<String, Bytes>, PageReconstructError> {
-        match self.get_switch_aux_file_policy() {
-            AuxFilePolicy::V1 => self.list_aux_files_v1(lsn, ctx).await,
-            AuxFilePolicy::V2 => self.list_aux_files_v2(lsn, ctx).await,
-            AuxFilePolicy::CrossValidation => {
-                let v1_result = self.list_aux_files_v1(lsn, ctx).await;
-                let v2_result = self.list_aux_files_v2(lsn, ctx).await;
-                match (v1_result, v2_result) {
-                    (Ok(v1), Ok(v2)) => {
-                        if v1 != v2 {
-                            tracing::error!(
-                                "unmatched aux file v1 v2 result:\nv1 {v1:?}\nv2 {v2:?}"
-                            );
-                            return Err(PageReconstructError::Other(anyhow::anyhow!(
-                                "unmatched aux file v1 v2 result"
-                            )));
-                        }
-                        Ok(v1)
-                    }
-                    (Ok(_), Err(v2)) => {
-                        tracing::error!("aux file v1 returns Ok while aux file v2 returns an err");
-                        Err(v2)
-                    }
-                    (Err(v1), Ok(_)) => {
-                        tracing::error!("aux file v2 returns Ok while aux file v1 returns an err");
-                        Err(v1)
-                    }
-                    (Err(_), Err(v2)) => Err(v2),
-                }
-            }
-        }
-    }
-
    /// Does the same as get_current_logical_size but counted on demand.
    /// Used to initialize the logical size tracking on startup.
    ///
@@ -1201,22 +1143,21 @@ impl<'a> DatadirModification<'a> {
        let mut dbdir = DbDirectory::des(&self.get(DBDIR_KEY, ctx).await.context("read db")?)
            .context("deserialize db")?;
        let rel_dir_key = rel_dir_to_key(rel.spcnode, rel.dbnode);
-        let mut rel_dir =
-            if let hash_map::Entry::Vacant(e) = dbdir.dbdirs.entry((rel.spcnode, rel.dbnode)) {
-                // Didn't exist. Update dbdir
-                e.insert(false);
-                let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
-                self.pending_directory_entries
-                    .push((DirectoryKind::Db, dbdir.dbdirs.len()));
-                self.put(DBDIR_KEY, Value::Image(buf.into()));
+        let mut rel_dir = if dbdir.dbdirs.get(&(rel.spcnode, rel.dbnode)).is_none() {
+            // Didn't exist. Update dbdir
+            dbdir.dbdirs.insert((rel.spcnode, rel.dbnode), false);
+            let buf = DbDirectory::ser(&dbdir).context("serialize db")?;
+            self.pending_directory_entries
+                .push((DirectoryKind::Db, dbdir.dbdirs.len()));
+            self.put(DBDIR_KEY, Value::Image(buf.into()));

-                // and create the RelDirectory
-                RelDirectory::default()
-            } else {
-                // reldir already exists, fetch it
-                RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
-                    .context("deserialize db")?
-            };
+            // and create the RelDirectory
+            RelDirectory::default()
+        } else {
+            // reldir already exists, fetch it
+            RelDirectory::des(&self.get(rel_dir_key, ctx).await.context("read db")?)
+                .context("deserialize db")?
+        };

        // Add the new relation to the rel directory entry, and write it back
        if !rel_dir.rels.insert((rel.relnode, rel.forknum)) {
@@ -1447,9 +1388,6 @@ impl<'a> DatadirModification<'a> {
    }

    pub fn init_aux_dir(&mut self) -> anyhow::Result<()> {
-        if let AuxFilePolicy::V2 = self.tline.get_switch_aux_file_policy() {
-            return Ok(());
-        }
        let buf = AuxFilesDirectory::ser(&AuxFilesDirectory {
            files: HashMap::new(),
        })?;
@@ -1465,122 +1403,90 @@ impl<'a> DatadirModification<'a> {
        content: &[u8],
        ctx: &RequestContext,
    ) -> anyhow::Result<()> {
-        let policy = self.tline.get_switch_aux_file_policy();
-        if let AuxFilePolicy::V2 | AuxFilePolicy::CrossValidation = policy {
-            let key = aux_file::encode_aux_file_key(path);
-            // retrieve the key from the engine
-            let old_val = match self.get(key, ctx).await {
-                Ok(val) => Some(val),
-                Err(PageReconstructError::MissingKey(_)) => None,
-                Err(e) => return Err(e.into()),
-            };
-            let files = if let Some(ref old_val) = old_val {
-                aux_file::decode_file_value(old_val)?
-            } else {
-                Vec::new()
-            };
-            let new_files = if content.is_empty() {
-                files
-                    .into_iter()
-                    .filter(|(p, _)| &path != p)
-                    .collect::<Vec<_>>()
-            } else {
-                files
-                    .into_iter()
-                    .filter(|(p, _)| &path != p)
-                    .chain(std::iter::once((path, content)))
-                    .collect::<Vec<_>>()
-            };
-            let new_val = aux_file::encode_file_value(&new_files)?;
-            self.put(key, Value::Image(new_val.into()));
-        }
+        let file_path = path.to_string();
+        let content = if content.is_empty() {
+            None
+        } else {
+            Some(Bytes::copy_from_slice(content))
+        };

-        if let AuxFilePolicy::V1 | AuxFilePolicy::CrossValidation = policy {
-            let file_path = path.to_string();
-            let content = if content.is_empty() {
-                None
+        let n_files;
+        let mut aux_files = self.tline.aux_files.lock().await;
+        if let Some(mut dir) = aux_files.dir.take() {
+            // We already updated aux files in `self`: emit a delta and update our latest value.
+            dir.upsert(file_path.clone(), content.clone());
+            n_files = dir.files.len();
+            if aux_files.n_deltas == MAX_AUX_FILE_DELTAS {
+                self.put(
+                    AUX_FILES_KEY,
+                    Value::Image(Bytes::from(
+                        AuxFilesDirectory::ser(&dir).context("serialize")?,
+                    )),
+                );
+                aux_files.n_deltas = 0;
            } else {
-                Some(Bytes::copy_from_slice(content))
-            };
+                self.put(
+                    AUX_FILES_KEY,
+                    Value::WalRecord(NeonWalRecord::AuxFile { file_path, content }),
+                );
+                aux_files.n_deltas += 1;
+            }
+            aux_files.dir = Some(dir);
+        } else {
+            // Check if the AUX_FILES_KEY is initialized
+            match self.get(AUX_FILES_KEY, ctx).await {
+                Ok(dir_bytes) => {
+                    let mut dir = AuxFilesDirectory::des(&dir_bytes)?;
+                    // Key is already set, we may append a delta
+                    self.put(
+                        AUX_FILES_KEY,
+                        Value::WalRecord(NeonWalRecord::AuxFile {
+                            file_path: file_path.clone(),
+                            content: content.clone(),
+                        }),
+                    );
+                    dir.upsert(file_path, content);
+                    n_files = dir.files.len();
+                    aux_files.dir = Some(dir);
+                }
+                Err(
+                    e @ (PageReconstructError::AncestorStopping(_)
+                    | PageReconstructError::Cancelled
+                    | PageReconstructError::AncestorLsnTimeout(_)),
+                ) => {
+                    // Important that we do not interpret a shutdown error as "not found" and thereby
+                    // reset the map.
+                    return Err(e.into());
+                }
+                // Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but
+                // the original code assumes all other errors are missing keys. Therefore, we keep the code path
+                // the same for now, though in theory, we should only match the `MissingKey` variant.
+                Err(
+                    PageReconstructError::Other(_)
+                    | PageReconstructError::WalRedo(_)
+                    | PageReconstructError::MissingKey { .. },
+                ) => {
+                    // Key is missing, we must insert an image as the basis for subsequent deltas.

-            let n_files;
-            let mut aux_files = self.tline.aux_files.lock().await;
-            if let Some(mut dir) = aux_files.dir.take() {
-                // We already updated aux files in `self`: emit a delta and update our latest value.
-                dir.upsert(file_path.clone(), content.clone());
-                n_files = dir.files.len();
-                if aux_files.n_deltas == MAX_AUX_FILE_DELTAS {
+                    let mut dir = AuxFilesDirectory {
+                        files: HashMap::new(),
+                    };
+                    dir.upsert(file_path, content);
                    self.put(
                        AUX_FILES_KEY,
                        Value::Image(Bytes::from(
                            AuxFilesDirectory::ser(&dir).context("serialize")?,
                        )),
                    );
-                    aux_files.n_deltas = 0;
-                } else {
-                    self.put(
-                        AUX_FILES_KEY,
-                        Value::WalRecord(NeonWalRecord::AuxFile { file_path, content }),
-                    );
-                    aux_files.n_deltas += 1;
-                }
-                aux_files.dir = Some(dir);
-            } else {
-                // Check if the AUX_FILES_KEY is initialized
-                match self.get(AUX_FILES_KEY, ctx).await {
-                    Ok(dir_bytes) => {
-                        let mut dir = AuxFilesDirectory::des(&dir_bytes)?;
-                        // Key is already set, we may append a delta
-                        self.put(
-                            AUX_FILES_KEY,
-                            Value::WalRecord(NeonWalRecord::AuxFile {
-                                file_path: file_path.clone(),
-                                content: content.clone(),
-                            }),
-                        );
-                        dir.upsert(file_path, content);
-                        n_files = dir.files.len();
-                        aux_files.dir = Some(dir);
-                    }
-                    Err(
-                        e @ (PageReconstructError::AncestorStopping(_)
-                        | PageReconstructError::Cancelled
-                        | PageReconstructError::AncestorLsnTimeout(_)),
-                    ) => {
-                        // Important that we do not interpret a shutdown error as "not found" and thereby
-                        // reset the map.
-                        return Err(e.into());
-                    }
-                    // Note: we added missing key error variant in https://github.com/neondatabase/neon/pull/7393 but
-                    // the original code assumes all other errors are missing keys. Therefore, we keep the code path
-                    // the same for now, though in theory, we should only match the `MissingKey` variant.
-                    Err(
-                        PageReconstructError::Other(_)
-                        | PageReconstructError::WalRedo(_)
-                        | PageReconstructError::MissingKey { .. },
-                    ) => {
-                        // Key is missing, we must insert an image as the basis for subsequent deltas.
-
-                        let mut dir = AuxFilesDirectory {
-                            files: HashMap::new(),
-                        };
-                        dir.upsert(file_path, content);
-                        self.put(
-                            AUX_FILES_KEY,
-                            Value::Image(Bytes::from(
-                                AuxFilesDirectory::ser(&dir).context("serialize")?,
-                            )),
-                        );
-                        n_files = 1;
-                        aux_files.dir = Some(dir);
-                    }
+                    n_files = 1;
+                    aux_files.dir = Some(dir);
                }
            }
-
-            self.pending_directory_entries
-                .push((DirectoryKind::AuxFiles, n_files));
        }

+        self.pending_directory_entries
+            .push((DirectoryKind::AuxFiles, n_files));
+
        Ok(())
    }

--- a/pageserver/src/repository.rs
+++ b/pageserver/src/repository.rs
@@ -33,6 +33,7 @@ impl Value {
    }
 }

+#[cfg(test)]
 #[derive(Debug, PartialEq)]
 pub(crate) enum InvalidInput {
    TooShortValue,
@@ -41,8 +42,10 @@ pub(crate) enum InvalidInput {

 /// We could have a ValueRef where everything is `serde(borrow)`. Before implementing that, lets
 /// use this type for querying if a slice looks some particular way.
+#[cfg(test)]
 pub(crate) struct ValueBytes;

+#[cfg(test)]
 impl ValueBytes {
    pub(crate) fn will_init(raw: &[u8]) -> Result<bool, InvalidInput> {
        if raw.len() < 12 {
--- a/pageserver/src/task_mgr.rs
+++ b/pageserver/src/task_mgr.rs
@@ -319,9 +319,6 @@ pub enum TaskKind {
    // Eviction. One per timeline.
    Eviction,

-    // Ingest housekeeping (flushing ephemeral layers on time threshold or disk pressure)
-    IngestHousekeeping,
-
    /// See [`crate::disk_usage_eviction_task`].
    DiskUsageEviction,

@@ -366,12 +363,8 @@ pub enum TaskKind {

    EphemeralFilePreWarmPageCache,

-    LayerDownload,
-
    #[cfg(test)]
    UnitTest,
-
-    DetachAncestor,
 }

 #[derive(Default)]
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -64,7 +64,6 @@ use self::timeline::uninit::UninitializedTimeline;
 use self::timeline::EvictionTaskTenantState;
 use self::timeline::TimelineResources;
 use self::timeline::WaitLsnError;
-use self::timeline::{GcCutoffs, GcInfo};
 use crate::config::PageServerConf;
 use crate::context::{DownloadBehavior, RequestContext};
 use crate::deletion_queue::DeletionQueueClient;
@@ -87,6 +86,7 @@ use crate::tenant::remote_timeline_client::INITDB_PATH;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
 use crate::InitializationOrder;
+use std::cmp::min;
 use std::collections::hash_map::Entry;
 use std::collections::BTreeSet;
 use std::collections::HashMap;
@@ -322,9 +322,6 @@ pub struct Tenant {
    /// All [`Tenant::timelines`] of a given [`Tenant`] instance share the same [`throttle::Throttle`] instance.
    pub(crate) timeline_get_throttle:
        Arc<throttle::Throttle<&'static crate::metrics::tenant_throttling::TimelineGet>>,
-
-    /// An ongoing timeline detach must be checked during attempts to GC or compact a timeline.
-    ongoing_timeline_detach: std::sync::Mutex<Option<(TimelineId, utils::completion::Barrier)>>,
 }

 impl std::fmt::Debug for Tenant {
@@ -1679,34 +1676,6 @@ impl Tenant {
        Ok(())
    }

-    // Call through to all timelines to freeze ephemeral layers if needed.  Usually
-    // this happens during ingest: this background housekeeping is for freezing layers
-    // that are open but haven't been written to for some time.
-    async fn ingest_housekeeping(&self) {
-        // Scan through the hashmap and collect a list of all the timelines,
-        // while holding the lock. Then drop the lock and actually perform the
-        // compactions.  We don't want to block everything else while the
-        // compaction runs.
-        let timelines = {
-            self.timelines
-                .lock()
-                .unwrap()
-                .values()
-                .filter_map(|timeline| {
-                    if timeline.is_active() {
-                        Some(timeline.clone())
-                    } else {
-                        None
-                    }
-                })
-                .collect::<Vec<_>>()
-        };
-
-        for timeline in &timelines {
-            timeline.maybe_freeze_ephemeral_layer().await;
-        }
-    }
-
    pub fn current_state(&self) -> TenantState {
        self.state.borrow().clone()
    }
@@ -2560,7 +2529,6 @@ impl Tenant {
                &crate::metrics::tenant_throttling::TIMELINE_GET,
            )),
            tenant_conf: Arc::new(ArcSwap::from_pointee(attached_conf)),
-            ongoing_timeline_detach: std::sync::Mutex::default(),
        }
    }

@@ -2844,48 +2812,7 @@ impl Tenant {
        cancel: &CancellationToken,
        ctx: &RequestContext,
    ) -> anyhow::Result<Vec<Arc<Timeline>>> {
-        // before taking the gc_cs lock, do the heavier weight finding of gc_cutoff points for
-        // currently visible timelines.
-        let timelines = self
-            .timelines
-            .lock()
-            .unwrap()
-            .values()
-            .filter(|tl| match target_timeline_id.as_ref() {
-                Some(target) => &tl.timeline_id == target,
-                None => true,
-            })
-            .cloned()
-            .collect::<Vec<_>>();
-
-        let mut gc_cutoffs: HashMap<TimelineId, GcCutoffs> =
-            HashMap::with_capacity(timelines.len());
-
-        for timeline in timelines.iter() {
-            let cutoff = timeline
-                .get_last_record_lsn()
-                .checked_sub(horizon)
-                .unwrap_or(Lsn(0));
-
-            let res = timeline.find_gc_cutoffs(cutoff, pitr, cancel, ctx).await;
-
-            match res {
-                Ok(cutoffs) => {
-                    let old = gc_cutoffs.insert(timeline.timeline_id, cutoffs);
-                    assert!(old.is_none());
-                }
-                Err(e) => {
-                    tracing::warn!(timeline_id = %timeline.timeline_id, "ignoring failure to find gc cutoffs: {e:#}");
-                }
-            }
-        }
-
-        if !self.is_active() {
-            anyhow::bail!("shutting down");
-        }
-
-        // grab mutex to prevent new timelines from being created here; avoid doing long operations
-        // because that will stall branch creation.
+        // grab mutex to prevent new timelines from being created here.
        let gc_cs = self.gc_cs.lock().await;

        // Scan all timelines. For each timeline, remember the timeline ID and
@@ -2947,6 +2874,11 @@ impl Tenant {
                }
            }

+            let cutoff = timeline
+                .get_last_record_lsn()
+                .checked_sub(horizon)
+                .unwrap_or(Lsn(0));
+
            let branchpoints: Vec<Lsn> = all_branchpoints
                .range((
                    Included((timeline_id, Lsn(0))),
@@ -2954,27 +2886,9 @@ impl Tenant {
                ))
                .map(|&x| x.1)
                .collect();
-
-            {
-                let mut target = timeline.gc_info.write().unwrap();
-
-                match gc_cutoffs.remove(&timeline_id) {
-                    Some(cutoffs) => {
-                        *target = GcInfo {
-                            retain_lsns: branchpoints,
-                            cutoffs,
-                        };
-                    }
-                    None => {
-                        // reasons for this being unavailable:
-                        // - this timeline was created while we were finding cutoffs
-                        // - lsn for timestamp search fails for this timeline repeatedly
-                        //
-                        // in both cases, refreshing the branchpoints is correct.
-                        target.retain_lsns = branchpoints;
-                    }
-                };
-            }
+            timeline
+                .update_gc_info(branchpoints, cutoff, pitr, cancel, ctx)
+                .await?;

            gc_timelines.push(timeline);
        }
@@ -3063,7 +2977,7 @@ impl Tenant {
        // and then the planned GC cutoff
        {
            let gc_info = src_timeline.gc_info.read().unwrap();
-            let cutoff = gc_info.min_cutoff();
+            let cutoff = min(gc_info.pitr_cutoff, gc_info.horizon_cutoff);
            if start_lsn < cutoff {
                return Err(CreateTimelineError::AncestorLsn(anyhow::anyhow!(
                    "invalid branch start lsn: less than planned GC cutoff {cutoff}"
@@ -3758,7 +3672,7 @@ pub(crate) mod harness {
                image_layer_creation_check_threshold: Some(
                    tenant_conf.image_layer_creation_check_threshold,
                ),
-                switch_aux_file_policy: Some(tenant_conf.switch_aux_file_policy),
+                switch_to_aux_file_v2: Some(tenant_conf.switch_to_aux_file_v2),
            }
        }
    }
@@ -3957,7 +3871,7 @@ mod tests {
    use crate::DEFAULT_PG_VERSION;
    use bytes::BytesMut;
    use hex_literal::hex;
-    use pageserver_api::key::{AUX_KEY_PREFIX, NON_INHERITED_RANGE};
+    use pageserver_api::key::NON_INHERITED_RANGE;
    use pageserver_api::keyspace::KeySpace;
    use pageserver_api::models::CompactionAlgorithm;
    use rand::{thread_rng, Rng};
@@ -4599,20 +4513,18 @@ mod tests {
    }

    async fn bulk_insert_compact_gc(
-        tenant: &Tenant,
-        timeline: &Arc<Timeline>,
+        timeline: Arc<Timeline>,
        ctx: &RequestContext,
        lsn: Lsn,
        repeat: usize,
        key_count: usize,
    ) -> anyhow::Result<()> {
        let compact = true;
-        bulk_insert_maybe_compact_gc(tenant, timeline, ctx, lsn, repeat, key_count, compact).await
+        bulk_insert_maybe_compact_gc(timeline, ctx, lsn, repeat, key_count, compact).await
    }

    async fn bulk_insert_maybe_compact_gc(
-        tenant: &Tenant,
-        timeline: &Arc<Timeline>,
+        timeline: Arc<Timeline>,
        ctx: &RequestContext,
        mut lsn: Lsn,
        repeat: usize,
@@ -4625,8 +4537,6 @@ mod tests {
        // Enforce that key range is monotonously increasing
        let mut keyspace = KeySpaceAccum::new();

-        let cancel = CancellationToken::new();
-
        for _ in 0..repeat {
            for _ in 0..key_count {
                test_key.field6 = blknum;
@@ -4648,19 +4558,24 @@ mod tests {
                blknum += 1;
            }

+            let cutoff = timeline.get_last_record_lsn();
+
+            timeline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    ctx,
+                )
+                .await?;
            timeline.freeze_and_flush().await?;
            if compact {
-                // this requires timeline to be &Arc<Timeline>
-                timeline.compact(&cancel, EnumSet::empty(), ctx).await?;
+                timeline
+                    .compact(&CancellationToken::new(), EnumSet::empty(), ctx)
+                    .await?;
            }
-
-            // this doesn't really need to use the timeline_id target, but it is closer to what it
-            // originally was.
-            let res = tenant
-                .gc_iteration(Some(timeline.timeline_id), 0, Duration::ZERO, &cancel, ctx)
-                .await?;
-
-            assert_eq!(res.layers_removed, 0, "this never removes anything");
+            timeline.gc().await?;
        }

        Ok(())
@@ -4679,7 +4594,7 @@ mod tests {
            .await?;

        let lsn = Lsn(0x10);
-        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
+        bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;

        Ok(())
    }
@@ -4710,7 +4625,7 @@ mod tests {
            .await?;

        let lsn = Lsn(0x10);
-        bulk_insert_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000).await?;
+        bulk_insert_compact_gc(tline.clone(), &ctx, lsn, 50, 10000).await?;

        let guard = tline.layers.read().await;
        guard.layer_map().dump(true, &ctx).await?;
@@ -4823,7 +4738,15 @@ mod tests {
            .await;

        let images = vectored_res?;
-        assert!(images.is_empty());
+        let mut key = NON_INHERITED_RANGE.start;
+        while key < NON_INHERITED_RANGE.end {
+            assert!(matches!(
+                images[&key],
+                Err(PageReconstructError::MissingKey(_))
+            ));
+            key = key.next();
+        }
+
        Ok(())
    }

@@ -5156,7 +5079,6 @@ mod tests {
            .await?;

        const NUM_KEYS: usize = 1000;
-        let cancel = CancellationToken::new();

        let mut test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();

@@ -5216,10 +5138,18 @@ mod tests {
            }

            // Perform a cycle of flush, and GC
-            tline.freeze_and_flush().await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
+            let cutoff = tline.get_last_record_lsn();
+            tline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    &ctx,
+                )
                .await?;
+            tline.freeze_and_flush().await?;
+            tline.gc().await?;
        }

        Ok(())
@@ -5240,8 +5170,6 @@ mod tests {

        let mut keyspace = KeySpaceAccum::new();

-        let cancel = CancellationToken::new();
-
        // Track when each page was last modified. Used to assert that
        // a read sees the latest page version.
        let mut updated = [Lsn(0); NUM_KEYS];
@@ -5305,11 +5233,21 @@ mod tests {
            }

            // Perform a cycle of flush, compact, and GC
-            tline.freeze_and_flush().await?;
-            tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
+            let cutoff = tline.get_last_record_lsn();
+            tline
+                .update_gc_info(
+                    Vec::new(),
+                    cutoff,
+                    Duration::ZERO,
+                    &CancellationToken::new(),
+                    &ctx,
+                )
                .await?;
+            tline.freeze_and_flush().await?;
+            tline
+                .compact(&CancellationToken::new(), EnumSet::empty(), &ctx)
+                .await?;
+            tline.gc().await?;
        }

        Ok(())
@@ -5514,7 +5452,7 @@ mod tests {

        let lsn = Lsn(0x10);
        let compact = false;
-        bulk_insert_maybe_compact_gc(&tenant, &tline, &ctx, lsn, 50, 10000, compact).await?;
+        bulk_insert_maybe_compact_gc(tline.clone(), &ctx, lsn, 50, 10000, compact).await?;

        let test_key = Key::from_hex("010000000033333333444444445500000000").unwrap();
        let read_lsn = Lsn(u64::MAX - 1);
@@ -5524,108 +5462,4 @@ mod tests {

        Ok(())
    }
-
-    #[tokio::test]
-    async fn test_metadata_scan() -> anyhow::Result<()> {
-        let harness = TenantHarness::create("test_metadata_scan")?;
-        let (tenant, ctx) = harness.load().await;
-        let tline = tenant
-            .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
-            .await?;
-
-        const NUM_KEYS: usize = 1000;
-        const STEP: usize = 100; // random update + scan base_key + idx * STEP
-
-        let cancel = CancellationToken::new();
-
-        let mut base_key = Key::from_hex("000000000033333333444444445500000000").unwrap();
-        base_key.field1 = AUX_KEY_PREFIX;
-        let mut test_key = base_key;
-
-        // Track when each page was last modified. Used to assert that
-        // a read sees the latest page version.
-        let mut updated = [Lsn(0); NUM_KEYS];
-
-        let mut lsn = Lsn(0x10);
-        #[allow(clippy::needless_range_loop)]
-        for blknum in 0..NUM_KEYS {
-            lsn = Lsn(lsn.0 + 0x10);
-            test_key.field6 = (blknum * STEP) as u32;
-            let mut writer = tline.writer().await;
-            writer
-                .put(
-                    test_key,
-                    lsn,
-                    &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
-                    &ctx,
-                )
-                .await?;
-            writer.finish_write(lsn);
-            updated[blknum] = lsn;
-            drop(writer);
-        }
-
-        let keyspace = KeySpace::single(base_key..base_key.add((NUM_KEYS * STEP) as u32));
-
-        for _ in 0..10 {
-            // Read all the blocks
-            for (blknum, last_lsn) in updated.iter().enumerate() {
-                test_key.field6 = (blknum * STEP) as u32;
-                assert_eq!(
-                    tline.get(test_key, lsn, &ctx).await?,
-                    test_img(&format!("{} at {}", blknum, last_lsn))
-                );
-            }
-
-            let mut cnt = 0;
-            for (key, value) in tline
-                .get_vectored_impl(
-                    keyspace.clone(),
-                    lsn,
-                    ValuesReconstructState::default(),
-                    &ctx,
-                )
-                .await?
-            {
-                let blknum = key.field6 as usize;
-                let value = value?;
-                assert!(blknum % STEP == 0);
-                let blknum = blknum / STEP;
-                assert_eq!(
-                    value,
-                    test_img(&format!("{} at {}", blknum, updated[blknum]))
-                );
-                cnt += 1;
-            }
-
-            assert_eq!(cnt, NUM_KEYS);
-
-            for _ in 0..NUM_KEYS {
-                lsn = Lsn(lsn.0 + 0x10);
-                let blknum = thread_rng().gen_range(0..NUM_KEYS);
-                test_key.field6 = (blknum * STEP) as u32;
-                let mut writer = tline.writer().await;
-                writer
-                    .put(
-                        test_key,
-                        lsn,
-                        &Value::Image(test_img(&format!("{} at {}", blknum, lsn))),
-                        &ctx,
-                    )
-                    .await?;
-                writer.finish_write(lsn);
-                drop(writer);
-                updated[blknum] = lsn;
-            }
-
-            // Perform a cycle of flush, compact, and GC
-            tline.freeze_and_flush().await?;
-            tline.compact(&cancel, EnumSet::empty(), &ctx).await?;
-            tenant
-                .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx)
-                .await?;
-        }
-
-        Ok(())
-    }
 }
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -130,9 +130,8 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    async fn write_all_unbuffered<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        src_buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
-        let (src_buf, res) = self.inner.write_all(src_buf, ctx).await;
+        let (src_buf, res) = self.inner.write_all(src_buf).await;
        let nbytes = match res {
            Ok(nbytes) => nbytes,
            Err(e) => return (src_buf, Err(e)),
@@ -143,9 +142,9 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {

    #[inline(always)]
    /// Flushes the internal buffer to the underlying `VirtualFile`.
-    pub async fn flush_buffer(&mut self, ctx: &RequestContext) -> Result<(), Error> {
+    pub async fn flush_buffer(&mut self) -> Result<(), Error> {
        let buf = std::mem::take(&mut self.buf);
-        let (mut buf, res) = self.inner.write_all(buf, ctx).await;
+        let (mut buf, res) = self.inner.write_all(buf).await;
        res?;
        buf.clear();
        self.buf = buf;
@@ -166,11 +165,10 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        src_buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
        if !BUFFERED {
            assert!(self.buf.is_empty());
-            return self.write_all_unbuffered(src_buf, ctx).await;
+            return self.write_all_unbuffered(src_buf).await;
        }
        let remaining = Self::CAPACITY - self.buf.len();
        let src_buf_len = src_buf.bytes_init();
@@ -185,7 +183,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
        }
        // Then, if the buffer is full, flush it out
        if self.buf.len() == Self::CAPACITY {
-            if let Err(e) = self.flush_buffer(ctx).await {
+            if let Err(e) = self.flush_buffer().await {
                return (Slice::into_inner(src_buf), Err(e));
            }
        }
@@ -201,7 +199,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
                assert_eq!(copied, src_buf.len());
                Slice::into_inner(src_buf)
            } else {
-                let (src_buf, res) = self.write_all_unbuffered(src_buf, ctx).await;
+                let (src_buf, res) = self.write_all_unbuffered(src_buf).await;
                if let Err(e) = res {
                    return (src_buf, Err(e));
                }
@@ -218,7 +216,6 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
    pub async fn write_blob<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        srcbuf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<u64, Error>) {
        let offset = self.offset;

@@ -230,7 +227,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
            if len < 128 {
                // Short blob. Write a 1-byte length header
                io_buf.put_u8(len as u8);
-                self.write_all(io_buf, ctx).await
+                self.write_all(io_buf).await
            } else {
                // Write a 4-byte length header
                if len > 0x7fff_ffff {
@@ -245,7 +242,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
                let mut len_buf = (len as u32).to_be_bytes();
                len_buf[0] |= 0x80;
                io_buf.extend_from_slice(&len_buf[..]);
-                self.write_all(io_buf, ctx).await
+                self.write_all(io_buf).await
            }
        }
        .await;
@@ -254,7 +251,7 @@ impl<const BUFFERED: bool> BlobWriter<BUFFERED> {
            Ok(_) => (),
            Err(e) => return (Slice::into_inner(srcbuf.slice(..)), Err(e)),
        }
-        let (srcbuf, res) = self.write_all(srcbuf, ctx).await;
+        let (srcbuf, res) = self.write_all(srcbuf).await;
        (srcbuf, res.map(|_| offset))
    }
 }
@@ -264,8 +261,8 @@ impl BlobWriter<true> {
    ///
    /// This function flushes the internal buffer before giving access
    /// to the underlying `VirtualFile`.
-    pub async fn into_inner(mut self, ctx: &RequestContext) -> Result<VirtualFile, Error> {
-        self.flush_buffer(ctx).await?;
+    pub async fn into_inner(mut self) -> Result<VirtualFile, Error> {
+        self.flush_buffer().await?;
        Ok(self.inner)
    }

@@ -302,16 +299,16 @@ mod tests {
            let file = VirtualFile::create(pathbuf.as_path()).await?;
            let mut wtr = BlobWriter::<BUFFERED>::new(file, 0);
            for blob in blobs.iter() {
-                let (_, res) = wtr.write_blob(blob.clone(), &ctx).await;
+                let (_, res) = wtr.write_blob(blob.clone()).await;
                let offs = res?;
                offsets.push(offs);
            }
            // Write out one page worth of zeros so that we can
            // read again with read_blk
-            let (_, res) = wtr.write_blob(vec![0; PAGE_SZ], &ctx).await;
+            let (_, res) = wtr.write_blob(vec![0; PAGE_SZ]).await;
            let offs = res?;
            println!("Writing final blob at offs={offs}");
-            wtr.flush_buffer(&ctx).await?;
+            wtr.flush_buffer().await?;
        }

        let file = VirtualFile::open(pathbuf.as_path()).await?;
--- a/pageserver/src/tenant/config.rs
+++ b/pageserver/src/tenant/config.rs
@@ -9,7 +9,6 @@
 //! may lead to a data loss.
 //!
 use anyhow::bail;
-use pageserver_api::models::AuxFilePolicy;
 use pageserver_api::models::CompactionAlgorithm;
 use pageserver_api::models::EvictionPolicy;
 use pageserver_api::models::{self, ThrottleConfig};
@@ -371,9 +370,9 @@ pub struct TenantConf {
    // Expresed in multiples of checkpoint distance.
    pub image_layer_creation_check_threshold: u8,

-    /// Switch to a new aux file policy. Switching this flag requires the user has not written any aux file into
+    /// Switch to aux file v2. Switching this flag requires the user has not written any aux file into
    /// the storage before, and this flag cannot be switched back. Otherwise there will be data corruptions.
-    pub switch_aux_file_policy: AuxFilePolicy,
+    pub switch_to_aux_file_v2: bool,
 }

 /// Same as TenantConf, but this struct preserves the information about
@@ -472,7 +471,7 @@ pub struct TenantConfOpt {

    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default)]
-    pub switch_aux_file_policy: Option<AuxFilePolicy>,
+    pub switch_to_aux_file_v2: Option<bool>,
 }

 impl TenantConfOpt {
@@ -530,9 +529,9 @@ impl TenantConfOpt {
            image_layer_creation_check_threshold: self
                .image_layer_creation_check_threshold
                .unwrap_or(global_conf.image_layer_creation_check_threshold),
-            switch_aux_file_policy: self
-                .switch_aux_file_policy
-                .unwrap_or(global_conf.switch_aux_file_policy),
+            switch_to_aux_file_v2: self
+                .switch_to_aux_file_v2
+                .unwrap_or(global_conf.switch_to_aux_file_v2),
        }
    }
 }
@@ -574,7 +573,7 @@ impl Default for TenantConf {
            lazy_slru_download: false,
            timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
            image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
-            switch_aux_file_policy: AuxFilePolicy::V1,
+            switch_to_aux_file_v2: false,
        }
    }
 }
@@ -649,7 +648,7 @@ impl From<TenantConfOpt> for models::TenantConfig {
            lazy_slru_download: value.lazy_slru_download,
            timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
            image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
-            switch_aux_file_policy: value.switch_aux_file_policy,
+            switch_to_aux_file_v2: value.switch_to_aux_file_v2,
        }
    }
 }
--- a/pageserver/src/tenant/delete.rs
+++ b/pageserver/src/tenant/delete.rs
@@ -585,20 +585,9 @@ impl DeleteTenantFlow {

                    // FIXME: we should not be modifying this from outside of mgr.rs.
                    // This will go away when we simplify deletion (https://github.com/neondatabase/neon/issues/5080)
-
-                    // Update stats
-                    match &removed {
-                        TenantsMapRemoveResult::Occupied(slot) => {
-                            crate::metrics::TENANT_MANAGER.slot_removed(slot);
-                        }
-                        TenantsMapRemoveResult::InProgress(barrier) => {
-                            crate::metrics::TENANT_MANAGER
-                                .slot_removed(&TenantSlot::InProgress(barrier.clone()));
-                        }
-                        TenantsMapRemoveResult::Vacant => {
-                            // Nothing changed in map, no metric update
-                        }
-                    }
+                    crate::metrics::TENANT_MANAGER
+                        .tenant_slots
+                        .set(locked.len() as u64);

                    match removed {
                        TenantsMapRemoveResult::Occupied(TenantSlot::Attached(tenant)) => {
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -74,7 +74,7 @@ impl EphemeralFile {
    pub(crate) async fn write_blob(
        &mut self,
        srcbuf: &[u8],
-        ctx: &RequestContext,
+        _ctx: &RequestContext,
    ) -> Result<u64, io::Error> {
        let pos = self.rw.bytes_written();

@@ -83,15 +83,15 @@ impl EphemeralFile {
            // short one-byte length header
            let len_buf = [srcbuf.len() as u8];

-            self.rw.write_all_borrowed(&len_buf, ctx).await?;
+            self.rw.write_all_borrowed(&len_buf).await?;
        } else {
            let mut len_buf = u32::to_be_bytes(srcbuf.len() as u32);
            len_buf[0] |= 0x80;
-            self.rw.write_all_borrowed(&len_buf, ctx).await?;
+            self.rw.write_all_borrowed(&len_buf).await?;
        }

        // Write the payload
-        self.rw.write_all_borrowed(srcbuf, ctx).await?;
+        self.rw.write_all_borrowed(srcbuf).await?;

        Ok(pos)
    }
--- a/pageserver/src/tenant/ephemeral_file/page_caching.rs
+++ b/pageserver/src/tenant/ephemeral_file/page_caching.rs
@@ -35,14 +35,10 @@ impl RW {
        self.page_cache_file_id
    }

-    pub(crate) async fn write_all_borrowed(
-        &mut self,
-        srcbuf: &[u8],
-        ctx: &RequestContext,
-    ) -> Result<usize, io::Error> {
+    pub(crate) async fn write_all_borrowed(&mut self, srcbuf: &[u8]) -> Result<usize, io::Error> {
        // It doesn't make sense to proactively fill the page cache on the Pageserver write path
        // because Compute is unlikely to access recently written data.
-        self.rw.write_all_borrowed(srcbuf, ctx).await
+        self.rw.write_all_borrowed(srcbuf).await
    }

    pub(crate) fn bytes_written(&self) -> u64 {
@@ -138,7 +134,6 @@ impl crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter for PreWarmi
    >(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
        let buf = buf.slice(..);
        let saved_bounds = buf.bounds(); // save for reconstructing the Slice from iobuf after the IO is done
@@ -155,7 +150,7 @@ impl crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter for PreWarmi
        );

        // Do the IO.
-        let iobuf = match self.file.write_all(buf, ctx).await {
+        let iobuf = match self.file.write_all(buf).await {
            (iobuf, Ok(nwritten)) => {
                assert_eq!(nwritten, buflen);
                iobuf
--- a/pageserver/src/tenant/ephemeral_file/zero_padded_read_write.rs
+++ b/pageserver/src/tenant/ephemeral_file/zero_padded_read_write.rs
@@ -20,7 +20,6 @@
 mod zero_padded;

 use crate::{
-    context::RequestContext,
    page_cache::PAGE_SZ,
    virtual_file::owned_buffers_io::{
        self,
@@ -61,12 +60,8 @@ where
        self.buffered_writer.as_inner().as_inner()
    }

-    pub async fn write_all_borrowed(
-        &mut self,
-        buf: &[u8],
-        ctx: &RequestContext,
-    ) -> std::io::Result<usize> {
-        self.buffered_writer.write_buffered_borrowed(buf, ctx).await
+    pub async fn write_all_borrowed(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        self.buffered_writer.write_buffered_borrowed(buf).await
    }

    pub fn bytes_written(&self) -> u64 {
--- a/pageserver/src/tenant/layer_map.rs
+++ b/pageserver/src/tenant/layer_map.rs
@@ -588,7 +588,7 @@ impl LayerMap {
            let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
            coverage.push((kr, current_val.take()));
            current_key = change_key;
-            current_val.clone_from(&change_val);
+            current_val = change_val.clone();
        }

        // Add the final interval
@@ -672,12 +672,12 @@ impl LayerMap {
        // Loop through the delta coverage and recurse on each part
        for (change_key, change_val) in version.delta_coverage.range(start..end) {
            // If there's a relevant delta in this part, add 1 and recurse down
-            if let Some(val) = &current_val {
+            if let Some(val) = current_val {
                if val.get_lsn_range().end > lsn.start {
                    let kr = Key::from_i128(current_key)..Key::from_i128(change_key);
                    let lr = lsn.start..val.get_lsn_range().start;
                    if !kr.is_empty() {
-                        let base_count = Self::is_reimage_worthy(val, key) as usize;
+                        let base_count = Self::is_reimage_worthy(&val, key) as usize;
                        let new_limit = limit.map(|l| l - base_count);
                        let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
                        max_stacked_deltas = std::cmp::max(
@@ -689,17 +689,17 @@ impl LayerMap {
            }

            current_key = change_key;
-            current_val.clone_from(&change_val);
+            current_val = change_val.clone();
        }

        // Consider the last part
-        if let Some(val) = &current_val {
+        if let Some(val) = current_val {
            if val.get_lsn_range().end > lsn.start {
                let kr = Key::from_i128(current_key)..Key::from_i128(end);
                let lr = lsn.start..val.get_lsn_range().start;

                if !kr.is_empty() {
-                    let base_count = Self::is_reimage_worthy(val, key) as usize;
+                    let base_count = Self::is_reimage_worthy(&val, key) as usize;
                    let new_limit = limit.map(|l| l - base_count);
                    let max_stacked_deltas_underneath = self.count_deltas(&kr, &lr, new_limit);
                    max_stacked_deltas = std::cmp::max(
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -207,24 +207,6 @@ impl TimelineMetadata {
        self.body.ancestor_lsn
    }

-    /// When reparenting, the `ancestor_lsn` does not change.
-    pub fn reparent(&mut self, timeline: &TimelineId) {
-        assert!(self.body.ancestor_timeline.is_some());
-        // no assertion for redoing this: it's fine, we may have to repeat this multiple times over
-        self.body.ancestor_timeline = Some(*timeline);
-    }
-
-    pub fn detach_from_ancestor(&mut self, timeline: &TimelineId, ancestor_lsn: &Lsn) {
-        if let Some(ancestor) = self.body.ancestor_timeline {
-            assert_eq!(ancestor, *timeline);
-        }
-        if self.body.ancestor_lsn != Lsn(0) {
-            assert_eq!(self.body.ancestor_lsn, *ancestor_lsn);
-        }
-        self.body.ancestor_timeline = None;
-        self.body.ancestor_lsn = Lsn(0);
-    }
-
    pub fn latest_gc_cutoff_lsn(&self) -> Lsn {
        self.body.latest_gc_cutoff_lsn
    }
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -56,7 +56,6 @@ use utils::id::{TenantId, TimelineId};

 use super::delete::DeleteTenantError;
 use super::secondary::SecondaryTenant;
-use super::timeline::detach_ancestor::PreparedTimelineDetach;
 use super::TenantSharedResources;

 /// For a tenant that appears in TenantsMap, it may either be
@@ -247,7 +246,6 @@ impl TenantsMap {
        }
    }

-    #[cfg(all(debug_assertions, not(test)))]
    pub(crate) fn len(&self) -> usize {
        match self {
            TenantsMap::Initializing => 0,
@@ -748,7 +746,6 @@ pub async fn init_tenant_mgr(
            }
        };

-        METRICS.slot_inserted(&slot);
        tenants.insert(tenant_shard_id, slot);
    }

@@ -756,7 +753,7 @@ pub async fn init_tenant_mgr(

    let mut tenants_map = TENANTS.write().unwrap();
    assert!(matches!(&*tenants_map, &TenantsMap::Initializing));
-
+    METRICS.tenant_slots.set(tenants.len() as u64);
    *tenants_map = TenantsMap::Open(tenants);

    Ok(TenantManager {
@@ -827,14 +824,6 @@ fn tenant_spawn(
 async fn shutdown_all_tenants0(tenants: &std::sync::RwLock<TenantsMap>) {
    let mut join_set = JoinSet::new();

-    #[cfg(all(debug_assertions, not(test)))]
-    {
-        // Check that our metrics properly tracked the size of the tenants map.  This is a convenient location to check,
-        // as it happens implicitly at the end of tests etc.
-        let m = tenants.read().unwrap();
-        debug_assert_eq!(METRICS.slots_total(), m.len() as u64);
-    }
-
    // Atomically, 1. create the shutdown tasks and 2. prevent creation of new tenants.
    let (total_in_progress, total_attached) = {
        let mut m = tenants.write().unwrap();
@@ -2008,101 +1997,6 @@ impl TenantManager {
            })
            .collect())
    }
-
-    /// Completes an earlier prepared timeline detach ancestor.
-    pub(crate) async fn complete_detaching_timeline_ancestor(
-        &self,
-        tenant_shard_id: TenantShardId,
-        timeline_id: TimelineId,
-        prepared: PreparedTimelineDetach,
-        ctx: &RequestContext,
-    ) -> Result<Vec<TimelineId>, anyhow::Error> {
-        struct RevertOnDropSlot(Option<SlotGuard>);
-
-        impl Drop for RevertOnDropSlot {
-            fn drop(&mut self) {
-                if let Some(taken) = self.0.take() {
-                    taken.revert();
-                }
-            }
-        }
-
-        impl RevertOnDropSlot {
-            fn into_inner(mut self) -> SlotGuard {
-                self.0.take().unwrap()
-            }
-        }
-
-        impl std::ops::Deref for RevertOnDropSlot {
-            type Target = SlotGuard;
-
-            fn deref(&self) -> &Self::Target {
-                self.0.as_ref().unwrap()
-            }
-        }
-
-        let slot_guard = tenant_map_acquire_slot(&tenant_shard_id, TenantSlotAcquireMode::Any)?;
-        let slot_guard = RevertOnDropSlot(Some(slot_guard));
-
-        let tenant = {
-            let Some(old_slot) = slot_guard.get_old_value() else {
-                anyhow::bail!(
-                    "Tenant not found when trying to complete detaching timeline ancestor"
-                );
-            };
-
-            let Some(tenant) = old_slot.get_attached() else {
-                anyhow::bail!("Tenant is not in attached state");
-            };
-
-            if !tenant.is_active() {
-                anyhow::bail!("Tenant is not active");
-            }
-
-            tenant.clone()
-        };
-
-        let timeline = tenant.get_timeline(timeline_id, true)?;
-
-        let reparented = timeline
-            .complete_detaching_timeline_ancestor(&tenant, prepared, ctx)
-            .await?;
-
-        let mut slot_guard = slot_guard.into_inner();
-
-        let (_guard, progress) = utils::completion::channel();
-        match tenant.shutdown(progress, ShutdownMode::Hard).await {
-            Ok(()) => {
-                slot_guard.drop_old_value()?;
-            }
-            Err(_barrier) => {
-                slot_guard.revert();
-                // this really should not happen, at all, unless shutdown was already going?
-                anyhow::bail!("Cannot restart Tenant, already shutting down");
-            }
-        }
-
-        let tenant_path = self.conf.tenant_path(&tenant_shard_id);
-        let config = Tenant::load_tenant_config(self.conf, &tenant_shard_id)?;
-
-        let shard_identity = config.shard;
-        let tenant = tenant_spawn(
-            self.conf,
-            tenant_shard_id,
-            &tenant_path,
-            self.resources.clone(),
-            AttachedTenantConf::try_from(config)?,
-            shard_identity,
-            None,
-            self.tenants,
-            SpawnMode::Eager,
-            ctx,
-        )?;
-
-        slot_guard.upsert(TenantSlot::Attached(tenant))?;
-
-        Ok(reparented)
-    }
 }

 #[derive(Debug, thiserror::Error)]
@@ -2534,13 +2428,10 @@ impl SlotGuard {
                TenantsMap::Open(m) => m,
            };

-            METRICS.slot_inserted(&new_value);
-
            let replaced = m.insert(self.tenant_shard_id, new_value);
            self.upserted = true;
-            if let Some(replaced) = replaced.as_ref() {
-                METRICS.slot_removed(replaced);
-            }
+
+            METRICS.tenant_slots.set(m.len() as u64);

            replaced
        };
@@ -2650,13 +2541,9 @@ impl Drop for SlotGuard {
                }

                if self.old_value_is_shutdown() {
-                    METRICS.slot_removed(entry.get());
                    entry.remove();
                } else {
-                    let inserting = self.old_value.take().unwrap();
-                    METRICS.slot_inserted(&inserting);
-                    let replaced = entry.insert(inserting);
-                    METRICS.slot_removed(&replaced);
+                    entry.insert(self.old_value.take().unwrap());
                }
            }
            Entry::Vacant(_) => {
@@ -2667,6 +2554,8 @@ impl Drop for SlotGuard {
                );
            }
        }
+
+        METRICS.tenant_slots.set(m.len() as u64);
    }
 }

@@ -2746,9 +2635,7 @@ fn tenant_map_acquire_slot_impl(
            }
            _ => {
                let (completion, barrier) = utils::completion::channel();
-                let inserting = TenantSlot::InProgress(barrier);
-                METRICS.slot_inserted(&inserting);
-                v.insert(inserting);
+                v.insert(TenantSlot::InProgress(barrier));
                tracing::debug!("Vacant, inserted InProgress");
                Ok(SlotGuard::new(*tenant_shard_id, None, completion))
            }
@@ -2784,10 +2671,7 @@ fn tenant_map_acquire_slot_impl(
                _ => {
                    // Happy case: the slot was not in any state that violated our mode
                    let (completion, barrier) = utils::completion::channel();
-                    let in_progress = TenantSlot::InProgress(barrier);
-                    METRICS.slot_inserted(&in_progress);
-                    let old_value = o.insert(in_progress);
-                    METRICS.slot_removed(&old_value);
+                    let old_value = o.insert(TenantSlot::InProgress(barrier));
                    tracing::debug!("Occupied, replaced with InProgress");
                    Ok(SlotGuard::new(
                        *tenant_shard_id,
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -210,7 +210,6 @@ use tracing::{debug, error, info, instrument, warn};
 use tracing::{info_span, Instrument};
 use utils::lsn::Lsn;

-use crate::context::RequestContext;
 use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError};
 use crate::metrics::{
    MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics,
@@ -506,7 +505,6 @@ impl RemoteTimelineClient {
        layer_file_name: &LayerFileName,
        layer_metadata: &LayerFileMetadata,
        cancel: &CancellationToken,
-        ctx: &RequestContext,
    ) -> anyhow::Result<u64> {
        let downloaded_size = {
            let _unfinished_gauge_guard = self.metrics.call_begin(
@@ -524,7 +522,6 @@ impl RemoteTimelineClient {
                layer_file_name,
                layer_metadata,
                cancel,
-                ctx,
            )
            .measure_remote_op(
                RemoteOpFileKind::Layer,
@@ -570,7 +567,7 @@ impl RemoteTimelineClient {
        // ahead of what's _actually_ on the remote during index upload.
        upload_queue.latest_metadata = metadata.clone();

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());

        Ok(())
    }
@@ -591,7 +588,7 @@ impl RemoteTimelineClient {

        upload_queue.latest_metadata.apply(update);

-        self.schedule_index_upload(upload_queue);
+        self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());

        Ok(())
    }
@@ -611,14 +608,18 @@ impl RemoteTimelineClient {
        let upload_queue = guard.initialized_mut()?;

        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue, upload_queue.latest_metadata.clone());
        }

        Ok(())
    }

    /// Launch an index-file upload operation in the background (internal function)
-    fn schedule_index_upload(self: &Arc<Self>, upload_queue: &mut UploadQueueInitialized) {
+    fn schedule_index_upload(
+        self: &Arc<Self>,
+        upload_queue: &mut UploadQueueInitialized,
+        metadata: TimelineMetadata,
+    ) {
        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();

        info!(
@@ -627,7 +628,11 @@ impl RemoteTimelineClient {
            upload_queue.latest_files_changes_since_metadata_upload_scheduled,
        );

-        let index_part = IndexPart::from(&*upload_queue);
+        let index_part = IndexPart::new(
+            upload_queue.latest_files.clone(),
+            disk_consistent_lsn,
+            metadata,
+        );
        let op = UploadOp::UploadMetadata(index_part, disk_consistent_lsn);
        self.metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -637,61 +642,9 @@ impl RemoteTimelineClient {
        self.launch_queued_tasks(upload_queue);
    }

-    pub(crate) async fn schedule_reparenting_and_wait(
-        self: &Arc<Self>,
-        new_parent: &TimelineId,
-    ) -> anyhow::Result<()> {
-        // FIXME: because of how Timeline::schedule_uploads works when called from layer flushing
-        // and reads the in-memory part we cannot do the detaching like this
-        let receiver = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = guard.initialized_mut()?;
-
-            upload_queue.latest_metadata.reparent(new_parent);
-
-            self.schedule_index_upload(upload_queue);
-
-            self.schedule_barrier0(upload_queue)
-        };
-
-        Self::wait_completion0(receiver).await
-    }
-
-    /// Schedules uploading a new version of `index_part.json` with the given layers added,
-    /// detaching from ancestor and waits for it to complete.
    ///
-    /// This is used with `Timeline::detach_ancestor` functionality.
-    pub(crate) async fn schedule_adding_existing_layers_to_index_detach_and_wait(
-        self: &Arc<Self>,
-        layers: &[Layer],
-        adopted: (TimelineId, Lsn),
-    ) -> anyhow::Result<()> {
-        let barrier = {
-            let mut guard = self.upload_queue.lock().unwrap();
-            let upload_queue = guard.initialized_mut()?;
-
-            upload_queue
-                .latest_metadata
-                .detach_from_ancestor(&adopted.0, &adopted.1);
-
-            for layer in layers {
-                upload_queue
-                    .latest_files
-                    .insert(layer.layer_desc().filename(), layer.metadata());
-            }
-
-            self.schedule_index_upload(upload_queue);
-
-            let barrier = self.schedule_barrier0(upload_queue);
-            self.launch_queued_tasks(upload_queue);
-            barrier
-        };
-
-        Self::wait_completion0(barrier).await
-    }
-
-    /// Launch an upload operation in the background; the file is added to be included in next
-    /// `index_part.json` upload.
+    /// Launch an upload operation in the background.
+    ///
    pub(crate) fn schedule_layer_file_upload(
        self: &Arc<Self>,
        layer: ResidentLayer,
@@ -717,11 +670,9 @@ impl RemoteTimelineClient {
        upload_queue.latest_files_changes_since_metadata_upload_scheduled += 1;

        info!(
-            gen=?metadata.generation,
-            shard=?metadata.shard,
-            "scheduled layer file upload {layer}",
+            "scheduled layer file upload {layer} gen={:?} shard={:?}",
+            metadata.generation, metadata.shard
        );
-
        let op = UploadOp::UploadLayer(layer, metadata);
        self.metric_begin(&op);
        upload_queue.queued_operations.push_back(op);
@@ -784,6 +735,10 @@ impl RemoteTimelineClient {
    where
        I: IntoIterator<Item = LayerFileName>,
    {
+        // Deleting layers doesn't affect the values stored in TimelineMetadata,
+        // so we don't need update it. Just serialize it.
+        let metadata = upload_queue.latest_metadata.clone();
+
        // Decorate our list of names with each name's metadata, dropping
        // names that are unexpectedly missing from our metadata.  This metadata
        // is later used when physically deleting layers, to construct key paths.
@@ -822,7 +777,7 @@ impl RemoteTimelineClient {
        // index_part update, because that needs to be uploaded before we can actually delete the
        // files.
        if upload_queue.latest_files_changes_since_metadata_upload_scheduled > 0 {
-            self.schedule_index_upload(upload_queue);
+            self.schedule_index_upload(upload_queue, metadata);
        }

        with_metadata
@@ -924,18 +879,12 @@ impl RemoteTimelineClient {

    /// Wait for all previously scheduled uploads/deletions to complete
    pub(crate) async fn wait_completion(self: &Arc<Self>) -> anyhow::Result<()> {
-        let receiver = {
+        let mut receiver = {
            let mut guard = self.upload_queue.lock().unwrap();
            let upload_queue = guard.initialized_mut()?;
            self.schedule_barrier0(upload_queue)
        };

-        Self::wait_completion0(receiver).await
-    }
-
-    async fn wait_completion0(
-        mut receiver: tokio::sync::watch::Receiver<()>,
-    ) -> anyhow::Result<()> {
        if receiver.changed().await.is_err() {
            anyhow::bail!("wait_completion aborted because upload queue was stopped");
        }
@@ -1051,7 +1000,8 @@ impl RemoteTimelineClient {
            let deleted_at = Utc::now().naive_utc();
            stopped.deleted_at = SetDeletedFlagProgress::InProgress(deleted_at);

-            let mut index_part = IndexPart::from(&stopped.upload_queue_for_deletion);
+            let mut index_part = IndexPart::try_from(&stopped.upload_queue_for_deletion)
+                .context("IndexPart serialize")?;
            index_part.deleted_at = Some(deleted_at);
            index_part
        };
@@ -1132,93 +1082,6 @@ impl RemoteTimelineClient {
        Ok(())
    }

-    /// Uploads the given layer **without** adding it to be part of a future `index_part.json` upload.
-    ///
-    /// This is not normally needed.
-    pub(crate) async fn upload_layer_file(
-        self: &Arc<Self>,
-        uploaded: &ResidentLayer,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
-            &uploaded.layer_desc().filename(),
-            uploaded.metadata().generation,
-        );
-
-        backoff::retry(
-            || async {
-                upload::upload_timeline_layer(
-                    &self.storage_impl,
-                    uploaded.local_path(),
-                    &remote_path,
-                    uploaded.metadata().file_size(),
-                    cancel,
-                )
-                .await
-            },
-            TimeoutOrCancel::caused_by_cancel,
-            FAILED_UPLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-            "upload a layer without adding it to latest files",
-            cancel,
-        )
-        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
-        .and_then(|x| x)
-        .context("upload a layer without adding it to latest files")
-    }
-
-    /// Copies the `adopted` remote existing layer to the remote path of `adopted_as`. The layer is
-    /// not added to be part of a future `index_part.json` upload.
-    pub(crate) async fn copy_timeline_layer(
-        self: &Arc<Self>,
-        adopted: &Layer,
-        adopted_as: &Layer,
-        cancel: &CancellationToken,
-    ) -> anyhow::Result<()> {
-        let source_remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &adopted
-                .get_timeline_id()
-                .expect("Source timeline should be alive"),
-            self.tenant_shard_id.to_index(),
-            &adopted.layer_desc().filename(),
-            adopted.metadata().generation,
-        );
-
-        let target_remote_path = remote_layer_path(
-            &self.tenant_shard_id.tenant_id,
-            &self.timeline_id,
-            self.tenant_shard_id.to_index(),
-            &adopted_as.layer_desc().filename(),
-            adopted_as.metadata().generation,
-        );
-
-        backoff::retry(
-            || async {
-                upload::copy_timeline_layer(
-                    &self.storage_impl,
-                    &source_remote_path,
-                    &target_remote_path,
-                    cancel,
-                )
-                .await
-            },
-            TimeoutOrCancel::caused_by_cancel,
-            FAILED_UPLOAD_WARN_THRESHOLD,
-            FAILED_REMOTE_OP_RETRIES,
-            "copy timeline layer",
-            cancel,
-        )
-        .await
-        .ok_or_else(|| anyhow::Error::new(TimeoutOrCancel::Cancel))
-        .and_then(|x| x)
-        .context("remote copy timeline layer")
-    }
-
    async fn flush_deletion_queue(&self) -> Result<(), DeletionQueueError> {
        match tokio::time::timeout(
            DELETION_QUEUE_FLUSH_TIMEOUT,
@@ -1390,7 +1253,7 @@ impl RemoteTimelineClient {
        while let Some(next_op) = upload_queue.queued_operations.front() {
            // Can we run this task now?
            let can_run_now = match next_op {
-                UploadOp::UploadLayer(..) => {
+                UploadOp::UploadLayer(_, _) => {
                    // Can always be scheduled.
                    true
                }
@@ -1517,25 +1380,13 @@ impl RemoteTimelineClient {

            let upload_result: anyhow::Result<()> = match &task.op {
                UploadOp::UploadLayer(ref layer, ref layer_metadata) => {
-                    let local_path = layer.local_path();
-
-                    // We should only be uploading layers created by this `Tenant`'s lifetime, so
-                    // the metadata in the upload should always match our current generation.
-                    assert_eq!(layer_metadata.generation, self.generation);
-
-                    let remote_path = remote_layer_path(
-                        &self.tenant_shard_id.tenant_id,
-                        &self.timeline_id,
-                        layer_metadata.shard,
-                        &layer.layer_desc().filename(),
-                        layer_metadata.generation,
-                    );
-
+                    let path = layer.local_path();
                    upload::upload_timeline_layer(
+                        self.conf,
                        &self.storage_impl,
-                        local_path,
-                        &remote_path,
-                        layer_metadata.file_size(),
+                        path,
+                        layer_metadata,
+                        self.generation,
                        &self.cancel,
                    )
                    .measure_remote_op(
@@ -1964,6 +1815,29 @@ pub fn parse_remote_index_path(path: RemotePath) -> Option<Generation> {
    }
 }

+/// Files on the remote storage are stored with paths, relative to the workdir.
+/// That path includes in itself both tenant and timeline ids, allowing to have a unique remote storage path.
+///
+/// Errors if the path provided does not start from pageserver's workdir.
+pub fn remote_path(
+    conf: &PageServerConf,
+    local_path: &Utf8Path,
+    generation: Generation,
+) -> anyhow::Result<RemotePath> {
+    let stripped = local_path
+        .strip_prefix(&conf.workdir)
+        .context("Failed to strip workdir prefix")?;
+
+    let suffixed = format!("{0}{1}", stripped, generation.get_suffix());
+
+    RemotePath::new(Utf8Path::new(&suffixed)).with_context(|| {
+        format!(
+            "to resolve remote part of path {:?} for base {:?}",
+            local_path, conf.workdir
+        )
+    })
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -1971,7 +1845,6 @@ mod tests {
        context::RequestContext,
        tenant::{
            harness::{TenantHarness, TIMELINE_ID},
-            storage_layer::layer::local_layer_path,
            Tenant, Timeline,
        },
        DEFAULT_PG_VERSION,
@@ -2154,20 +2027,11 @@ mod tests {
        ]
        .into_iter()
        .map(|(name, contents): (LayerFileName, Vec<u8>)| {
-
-            let local_path = local_layer_path(
-                harness.conf,
-                &timeline.tenant_shard_id,
-                &timeline.timeline_id,
-                &name,
-                &generation,
-            );
-            std::fs::write(&local_path, &contents).unwrap();
+            std::fs::write(timeline_path.join(name.file_name()), &contents).unwrap();

            Layer::for_resident(
                harness.conf,
                &timeline,
-                local_path,
                name,
                LayerFileMetadata::new(contents.len() as u64, generation, shard),
            )
@@ -2304,22 +2168,19 @@ mod tests {
            ..
        } = TestSetup::new("metrics").await.unwrap();
        let client = timeline.remote_client.as_ref().unwrap();
+        let timeline_path = harness.timeline_path(&TIMELINE_ID);

        let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
-        let local_path = local_layer_path(
-            harness.conf,
-            &timeline.tenant_shard_id,
-            &timeline.timeline_id,
-            &layer_file_name_1,
-            &harness.generation,
-        );
        let content_1 = dummy_contents("foo");
-        std::fs::write(&local_path, &content_1).unwrap();
+        std::fs::write(
+            timeline_path.join(layer_file_name_1.file_name()),
+            &content_1,
+        )
+        .unwrap();

        let layer_file_1 = Layer::for_resident(
            harness.conf,
            &timeline,
-            local_path,
            layer_file_name_1.clone(),
            LayerFileMetadata::new(content_1.len() as u64, harness.generation, harness.shard),
        );
@@ -2388,7 +2249,12 @@ mod tests {

    async fn inject_index_part(test_state: &TestSetup, generation: Generation) -> IndexPart {
        // An empty IndexPart, just sufficient to ensure deserialization will succeed
-        let example_index_part = IndexPart::example();
+        let example_metadata = TimelineMetadata::example();
+        let example_index_part = IndexPart::new(
+            HashMap::new(),
+            example_metadata.disk_consistent_lsn(),
+            example_metadata,
+        );

        let index_part_bytes = serde_json::to_vec(&example_index_part).unwrap();

--- a/pageserver/src/tenant/remote_timeline_client/download.rs
+++ b/pageserver/src/tenant/remote_timeline_client/download.rs
@@ -18,10 +18,8 @@ use tracing::warn;
 use utils::backoff;

 use crate::config::PageServerConf;
-use crate::context::RequestContext;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
 use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path};
-use crate::tenant::storage_layer::layer::local_layer_path;
 use crate::tenant::storage_layer::LayerFileName;
 use crate::tenant::Generation;
 use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile};
@@ -42,7 +40,6 @@ use super::{
 /// in the metadata. (In the future, we might do more cross-checks, like CRC validation)
 ///
 /// Returns the size of the downloaded file.
-#[allow(clippy::too_many_arguments)]
 pub async fn download_layer_file<'a>(
    conf: &'static PageServerConf,
    storage: &'a GenericRemoteStorage,
@@ -51,18 +48,11 @@ pub async fn download_layer_file<'a>(
    layer_file_name: &'a LayerFileName,
    layer_metadata: &'a LayerFileMetadata,
    cancel: &CancellationToken,
-    ctx: &RequestContext,
 ) -> Result<u64, DownloadError> {
    debug_assert_current_span_has_tenant_and_timeline_id();

    let timeline_path = conf.timeline_path(&tenant_shard_id, &timeline_id);
-    let local_path = local_layer_path(
-        conf,
-        &tenant_shard_id,
-        &timeline_id,
-        layer_file_name,
-        &layer_metadata.generation,
-    );
+    let local_path = timeline_path.join(layer_file_name.file_name());

    let remote_path = remote_layer_path(
        &tenant_shard_id.tenant_id,
@@ -85,7 +75,7 @@ pub async fn download_layer_file<'a>(
    let temp_file_path = path_with_suffix_extension(&local_path, TEMP_DOWNLOAD_EXTENSION);

    let bytes_amount = download_retry(
-        || async { download_object(storage, &remote_path, &temp_file_path, cancel, ctx).await },
+        || async { download_object(storage, &remote_path, &temp_file_path, cancel).await },
        &format!("download {remote_path:?}"),
        cancel,
    )
@@ -143,7 +133,6 @@ async fn download_object<'a>(
    src_path: &RemotePath,
    dst_path: &Utf8PathBuf,
    cancel: &CancellationToken,
-    #[cfg_attr(target_os = "macos", allow(unused_variables))] ctx: &RequestContext,
 ) -> Result<u64, DownloadError> {
    let res = match crate::virtual_file::io_engine::get() {
        crate::virtual_file::io_engine::IoEngine::NotSet => panic!("unset"),
@@ -219,10 +208,10 @@ async fn download_object<'a>(
                            Err(e) => return Err(e),
                        };
                        buffered
-                            .write_buffered(tokio_epoll_uring::BoundedBuf::slice_full(chunk), ctx)
+                            .write_buffered(tokio_epoll_uring::BoundedBuf::slice_full(chunk))
                            .await?;
                    }
-                    let size_tracking = buffered.flush_and_into_inner(ctx).await?;
+                    let size_tracking = buffered.flush_and_into_inner().await?;
                    Ok(size_tracking.into_inner())
                }
                .await?;
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -6,6 +6,7 @@ use std::collections::HashMap;

 use chrono::NaiveDateTime;
 use serde::{Deserialize, Serialize};
+use utils::bin_ser::SerializeError;

 use crate::tenant::metadata::TimelineMetadata;
 use crate::tenant::storage_layer::LayerFileName;
@@ -103,14 +104,15 @@ impl IndexPart {

    pub const FILE_NAME: &'static str = "index_part.json";

-    fn new(
-        layers_and_metadata: &HashMap<LayerFileName, LayerFileMetadata>,
+    pub fn new(
+        layers_and_metadata: HashMap<LayerFileName, LayerFileMetadata>,
        disk_consistent_lsn: Lsn,
        metadata: TimelineMetadata,
    ) -> Self {
+        // Transform LayerFileMetadata into IndexLayerMetadata
        let layer_metadata = layers_and_metadata
-            .iter()
-            .map(|(k, v)| (k.to_owned(), IndexLayerMetadata::from(v)))
+            .into_iter()
+            .map(|(k, v)| (k, IndexLayerMetadata::from(v)))
            .collect();

        Self {
@@ -139,24 +141,20 @@ impl IndexPart {
    pub fn to_s3_bytes(&self) -> serde_json::Result<Vec<u8>> {
        serde_json::to_vec(self)
    }
-
-    #[cfg(test)]
-    pub(crate) fn example() -> Self {
-        let example_metadata = TimelineMetadata::example();
-        Self::new(
-            &HashMap::new(),
-            example_metadata.disk_consistent_lsn(),
-            example_metadata,
-        )
-    }
 }

-impl From<&UploadQueueInitialized> for IndexPart {
-    fn from(uq: &UploadQueueInitialized) -> Self {
-        let disk_consistent_lsn = uq.latest_metadata.disk_consistent_lsn();
-        let metadata = uq.latest_metadata.clone();
+impl TryFrom<&UploadQueueInitialized> for IndexPart {
+    type Error = SerializeError;

-        Self::new(&uq.latest_files, disk_consistent_lsn, metadata)
+    fn try_from(upload_queue: &UploadQueueInitialized) -> Result<Self, Self::Error> {
+        let disk_consistent_lsn = upload_queue.latest_metadata.disk_consistent_lsn();
+        let metadata = upload_queue.latest_metadata.clone();
+
+        Ok(Self::new(
+            upload_queue.latest_files.clone(),
+            disk_consistent_lsn,
+            metadata,
+        ))
    }
 }

@@ -174,8 +172,8 @@ pub struct IndexLayerMetadata {
    pub shard: ShardIndex,
 }

-impl From<&LayerFileMetadata> for IndexLayerMetadata {
-    fn from(other: &LayerFileMetadata) -> Self {
+impl From<LayerFileMetadata> for IndexLayerMetadata {
+    fn from(other: LayerFileMetadata) -> Self {
        IndexLayerMetadata {
            file_size: other.file_size,
            generation: other.generation,
--- a/pageserver/src/tenant/remote_timeline_client/upload.rs
+++ b/pageserver/src/tenant/remote_timeline_client/upload.rs
@@ -12,13 +12,18 @@ use tokio_util::sync::CancellationToken;
 use utils::backoff;

 use super::Generation;
-use crate::tenant::remote_timeline_client::{
-    index::IndexPart, remote_index_path, remote_initdb_archive_path,
-    remote_initdb_preserved_archive_path,
+use crate::{
+    config::PageServerConf,
+    tenant::remote_timeline_client::{
+        index::IndexPart, remote_index_path, remote_initdb_archive_path,
+        remote_initdb_preserved_archive_path, remote_path,
+    },
 };
-use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError};
+use remote_storage::{GenericRemoteStorage, TimeTravelError};
 use utils::id::{TenantId, TimelineId};

+use super::index::LayerFileMetadata;
+
 use tracing::info;

 /// Serializes and uploads the given index part data to the remote storage.
@@ -60,10 +65,11 @@ pub(crate) async fn upload_index_part<'a>(
 ///
 /// On an error, bumps the retries count and reschedules the entire task.
 pub(super) async fn upload_timeline_layer<'a>(
+    conf: &'static PageServerConf,
    storage: &'a GenericRemoteStorage,
-    local_path: &'a Utf8Path,
-    remote_path: &'a RemotePath,
-    metadata_size: u64,
+    source_path: &'a Utf8Path,
+    known_metadata: &'a LayerFileMetadata,
+    generation: Generation,
    cancel: &CancellationToken,
 ) -> anyhow::Result<()> {
    fail_point!("before-upload-layer", |_| {
@@ -72,7 +78,8 @@ pub(super) async fn upload_timeline_layer<'a>(

    pausable_failpoint!("before-upload-layer-pausable");

-    let source_file_res = fs::File::open(&local_path).await;
+    let storage_path = remote_path(conf, source_path, generation)?;
+    let source_file_res = fs::File::open(&source_path).await;
    let source_file = match source_file_res {
        Ok(source_file) => source_file,
        Err(e) if e.kind() == ErrorKind::NotFound => {
@@ -83,49 +90,34 @@ pub(super) async fn upload_timeline_layer<'a>(
            // it has been written to disk yet.
            //
            // This is tested against `test_compaction_delete_before_upload`
-            info!(path = %local_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.");
+            info!(path = %source_path, "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more.");
            return Ok(());
        }
-        Err(e) => Err(e).with_context(|| format!("open a source file for layer {local_path:?}"))?,
+        Err(e) => {
+            Err(e).with_context(|| format!("open a source file for layer {source_path:?}"))?
+        }
    };

    let fs_size = source_file
        .metadata()
        .await
-        .with_context(|| format!("get the source file metadata for layer {local_path:?}"))?
+        .with_context(|| format!("get the source file metadata for layer {source_path:?}"))?
        .len();

+    let metadata_size = known_metadata.file_size();
    if metadata_size != fs_size {
-        bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
+        bail!("File {source_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}");
    }

    let fs_size = usize::try_from(fs_size)
-        .with_context(|| format!("convert {local_path:?} size {fs_size} usize"))?;
+        .with_context(|| format!("convert {source_path:?} size {fs_size} usize"))?;

    let reader = tokio_util::io::ReaderStream::with_capacity(source_file, super::BUFFER_SIZE);

    storage
-        .upload(reader, fs_size, remote_path, None, cancel)
+        .upload(reader, fs_size, &storage_path, None, cancel)
        .await
-        .with_context(|| format!("upload layer from local path '{local_path}'"))
-}
-
-pub(super) async fn copy_timeline_layer(
-    storage: &GenericRemoteStorage,
-    source_path: &RemotePath,
-    target_path: &RemotePath,
-    cancel: &CancellationToken,
-) -> anyhow::Result<()> {
-    fail_point!("before-copy-layer", |_| {
-        bail!("failpoint before-copy-layer")
-    });
-
-    pausable_failpoint!("before-copy-layer-pausable");
-
-    storage
-        .copy_object(source_path, target_path, cancel)
-        .await
-        .with_context(|| format!("copy layer {source_path} to {target_path}"))
+        .with_context(|| format!("upload layer from local path '{source_path}'"))
 }

 /// Uploads the given `initdb` data to the remote storage.
--- a/pageserver/src/tenant/secondary.rs
+++ b/pageserver/src/tenant/secondary.rs
@@ -7,7 +7,6 @@ use std::{sync::Arc, time::SystemTime};

 use crate::{
    config::PageServerConf,
-    context::RequestContext,
    disk_usage_eviction_task::DiskUsageEvictionInfo,
    task_mgr::{self, TaskKind, BACKGROUND_RUNTIME},
    virtual_file::MaybeFatalIo,
@@ -21,9 +20,8 @@ use self::{
 use super::{
    config::{SecondaryLocationConfig, TenantConfOpt},
    mgr::TenantManager,
-    remote_timeline_client::LayerFileMetadata,
    span::debug_assert_current_span_has_tenant_id,
-    storage_layer::{layer::local_layer_path, LayerFileName},
+    storage_layer::LayerFileName,
 };

 use pageserver_api::{
@@ -183,7 +181,6 @@ impl SecondaryTenant {
        conf: &PageServerConf,
        timeline_id: TimelineId,
        name: LayerFileName,
-        metadata: LayerFileMetadata,
    ) {
        debug_assert_current_span_has_tenant_id();

@@ -197,13 +194,9 @@ impl SecondaryTenant {

        let now = SystemTime::now();

-        let local_path = local_layer_path(
-            conf,
-            &self.tenant_shard_id,
-            &timeline_id,
-            &name,
-            &metadata.generation,
-        );
+        let path = conf
+            .timeline_path(&self.tenant_shard_id, &timeline_id)
+            .join(name.file_name());

        let this = self.clone();

@@ -214,7 +207,7 @@ impl SecondaryTenant {
            // it, the secondary downloader could have seen an updated heatmap that
            // resulted in a layer being deleted.
            // Other local I/O errors are process-fatal: these should never happen.
-            let deleted = std::fs::remove_file(local_path);
+            let deleted = std::fs::remove_file(path);

            let not_found = deleted
                .as_ref()
@@ -323,13 +316,9 @@ pub fn spawn_tasks(
    let (upload_req_tx, upload_req_rx) =
        tokio::sync::mpsc::channel::<CommandRequest<UploadCommand>>(16);

-    let downloader_task_ctx = RequestContext::new(
-        TaskKind::SecondaryDownloads,
-        crate::context::DownloadBehavior::Download,
-    );
    task_mgr::spawn(
        BACKGROUND_RUNTIME.handle(),
-        downloader_task_ctx.task_kind(),
+        TaskKind::SecondaryDownloads,
        None,
        None,
        "secondary tenant downloads",
@@ -341,7 +330,6 @@ pub fn spawn_tasks(
                download_req_rx,
                bg_jobs_clone,
                cancel_clone,
-                downloader_task_ctx,
            )
            .await;

--- a/pageserver/src/tenant/secondary/downloader.rs
+++ b/pageserver/src/tenant/secondary/downloader.rs
@@ -8,7 +8,6 @@ use std::{

 use crate::{
    config::PageServerConf,
-    context::RequestContext,
    disk_usage_eviction_task::{
        finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer,
    },
@@ -22,7 +21,7 @@ use crate::{
            FAILED_REMOTE_OP_RETRIES,
        },
        span::debug_assert_current_span_has_tenant_id,
-        storage_layer::{layer::local_layer_path, LayerFileName},
+        storage_layer::LayerFileName,
        tasks::{warn_when_period_overrun, BackgroundLoopKind},
    },
    virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile},
@@ -31,10 +30,7 @@ use crate::{

 use super::{
    heatmap::HeatMapLayer,
-    scheduler::{
-        self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult,
-        TenantBackgroundJobs,
-    },
+    scheduler::{self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs},
    SecondaryTenant,
 };

@@ -48,6 +44,7 @@ use chrono::format::{DelayedFormat, StrftimeItems};
 use futures::Future;
 use pageserver_api::models::SecondaryProgress;
 use pageserver_api::shard::TenantShardId;
+use rand::Rng;
 use remote_storage::{DownloadError, Etag, GenericRemoteStorage};

 use tokio_util::sync::CancellationToken;
@@ -77,14 +74,12 @@ pub(super) async fn downloader_task(
    command_queue: tokio::sync::mpsc::Receiver<CommandRequest<DownloadCommand>>,
    background_jobs_can_start: Barrier,
    cancel: CancellationToken,
-    root_ctx: RequestContext,
 ) {
    let concurrency = tenant_manager.get_conf().secondary_download_concurrency;

    let generator = SecondaryDownloader {
        tenant_manager,
        remote_storage,
-        root_ctx,
    };
    let mut scheduler = Scheduler::new(generator, concurrency);

@@ -97,7 +92,6 @@ pub(super) async fn downloader_task(
 struct SecondaryDownloader {
    tenant_manager: Arc<TenantManager>,
    remote_storage: GenericRemoteStorage,
-    root_ctx: RequestContext,
 }

 #[derive(Debug, Clone)]
@@ -276,7 +270,7 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
        // Update freshened_at even if there was an error: we don't want errored tenants to implicitly
        // take priority to run again.
        let mut detail = secondary_state.detail.lock().unwrap();
-        detail.next_download = Some(Instant::now() + period_jitter(DOWNLOAD_FRESHEN_INTERVAL, 5));
+        detail.next_download = Some(Instant::now() + DOWNLOAD_FRESHEN_INTERVAL);
    }

    async fn schedule(&mut self) -> SchedulingResult<PendingDownload> {
@@ -307,9 +301,11 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
                    }

                    if detail.next_download.is_none() {
-                        // Initialize randomly in the range from 0 to our interval: this uniformly spreads the start times.  Subsequent
-                        // rounds will use a smaller jitter to avoid accidentally synchronizing later.
-                        detail.next_download = Some(now.checked_add(period_warmup(DOWNLOAD_FRESHEN_INTERVAL)).expect(
+                        // Initialize with a jitter: this spreads initial downloads on startup
+                        // or mass-attach across our freshen interval.
+                        let jittered_period =
+                            rand::thread_rng().gen_range(Duration::ZERO..DOWNLOAD_FRESHEN_INTERVAL);
+                        detail.next_download = Some(now.checked_add(jittered_period).expect(
                        "Using our constant, which is known to be small compared with clock range",
                    ));
                    }
@@ -371,12 +367,11 @@ impl JobGenerator<PendingDownload, RunningDownload, CompleteDownload, DownloadCo
        let remote_storage = self.remote_storage.clone();
        let conf = self.tenant_manager.get_conf();
        let tenant_shard_id = *secondary_state.get_tenant_shard_id();
-        let download_ctx = self.root_ctx.attached_child();
        (RunningDownload { barrier }, Box::pin(async move {
            let _completion = completion;

            match TenantDownloader::new(conf, &remote_storage, &secondary_state)
-                .download(&download_ctx)
+                .download()
                .await
            {
                Err(UpdateError::NoData) => {
@@ -490,7 +485,7 @@ impl<'a> TenantDownloader<'a> {
        }
    }

-    async fn download(&self, ctx: &RequestContext) -> Result<(), UpdateError> {
+    async fn download(&self) -> Result<(), UpdateError> {
        debug_assert_current_span_has_tenant_id();

        // For the duration of a download, we must hold the SecondaryTenant::gate, to ensure
@@ -565,7 +560,7 @@ impl<'a> TenantDownloader<'a> {
            }

            let timeline_id = timeline.timeline_id;
-            self.download_timeline(timeline, ctx)
+            self.download_timeline(timeline)
                .instrument(tracing::info_span!(
                    "secondary_download_timeline",
                    tenant_id=%tenant_shard_id.tenant_id,
@@ -621,12 +616,12 @@ impl<'a> TenantDownloader<'a> {
                let layers_in_heatmap = heatmap_timeline
                    .layers
                    .iter()
-                    .map(|l| (&l.name, l.metadata.generation))
+                    .map(|l| &l.name)
                    .collect::<HashSet<_>>();
                let layers_on_disk = timeline_state
                    .on_disk_layers
                    .iter()
-                    .map(|l| (l.0, l.1.metadata.generation))
+                    .map(|l| l.0)
                    .collect::<HashSet<_>>();

                let mut layer_count = layers_on_disk.len();
@@ -637,24 +632,16 @@ impl<'a> TenantDownloader<'a> {
                    .sum();

                // Remove on-disk layers that are no longer present in heatmap
-                for (layer_file_name, generation) in layers_on_disk.difference(&layers_in_heatmap) {
+                for layer in layers_on_disk.difference(&layers_in_heatmap) {
                    layer_count -= 1;
                    layer_byte_count -= timeline_state
                        .on_disk_layers
-                        .get(layer_file_name)
+                        .get(layer)
                        .unwrap()
                        .metadata
                        .file_size();

-                    let local_path = local_layer_path(
-                        self.conf,
-                        self.secondary_state.get_tenant_shard_id(),
-                        timeline_id,
-                        layer_file_name,
-                        generation,
-                    );
-
-                    delete_layers.push((*timeline_id, (*layer_file_name).clone(), local_path));
+                    delete_layers.push((*timeline_id, (*layer).clone()));
                }

                progress.bytes_downloaded += layer_byte_count;
@@ -669,7 +656,11 @@ impl<'a> TenantDownloader<'a> {
        }

        // Execute accumulated deletions
-        for (timeline_id, layer_name, local_path) in delete_layers {
+        for (timeline_id, layer_name) in delete_layers {
+            let timeline_path = self
+                .conf
+                .timeline_path(self.secondary_state.get_tenant_shard_id(), &timeline_id);
+            let local_path = timeline_path.join(layer_name.to_string());
            tracing::info!(timeline_id=%timeline_id, "Removing secondary local layer {layer_name} because it's absent in heatmap",);

            tokio::fs::remove_file(&local_path)
@@ -751,13 +742,12 @@ impl<'a> TenantDownloader<'a> {
        .and_then(|x| x)
    }

-    async fn download_timeline(
-        &self,
-        timeline: HeatMapTimeline,
-        ctx: &RequestContext,
-    ) -> Result<(), UpdateError> {
+    async fn download_timeline(&self, timeline: HeatMapTimeline) -> Result<(), UpdateError> {
        debug_assert_current_span_has_tenant_and_timeline_id();
        let tenant_shard_id = self.secondary_state.get_tenant_shard_id();
+        let timeline_path = self
+            .conf
+            .timeline_path(tenant_shard_id, &timeline.timeline_id);

        // Accumulate updates to the state
        let mut touched = Vec::new();
@@ -807,14 +797,10 @@ impl<'a> TenantDownloader<'a> {
                if cfg!(debug_assertions) {
                    // Debug for https://github.com/neondatabase/neon/issues/6966: check that the files we think
                    // are already present on disk are really there.
-                    let local_path = local_layer_path(
-                        self.conf,
-                        tenant_shard_id,
-                        &timeline.timeline_id,
-                        &layer.name,
-                        &layer.metadata.generation,
-                    );
-
+                    let local_path = self
+                        .conf
+                        .timeline_path(tenant_shard_id, &timeline.timeline_id)
+                        .join(layer.name.file_name());
                    match tokio::fs::metadata(&local_path).await {
                        Ok(meta) => {
                            tracing::debug!(
@@ -889,7 +875,6 @@ impl<'a> TenantDownloader<'a> {
                &layer.name,
                &LayerFileMetadata::from(&layer.metadata),
                &self.secondary_state.cancel,
-                ctx,
            )
            .await
            {
@@ -908,13 +893,7 @@ impl<'a> TenantDownloader<'a> {
            };

            if downloaded_bytes != layer.metadata.file_size {
-                let local_path = local_layer_path(
-                    self.conf,
-                    tenant_shard_id,
-                    &timeline.timeline_id,
-                    &layer.name,
-                    &layer.metadata.generation,
-                );
+                let local_path = timeline_path.join(layer.name.to_string());

                tracing::warn!(
                    "Downloaded layer {} with unexpected size {} != {}.  Removing download.",
--- a/pageserver/src/tenant/secondary/heatmap_uploader.rs
+++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs
@@ -20,14 +20,12 @@ use crate::{

 use futures::Future;
 use pageserver_api::shard::TenantShardId;
+use rand::Rng;
 use remote_storage::{GenericRemoteStorage, TimeoutOrCancel};

 use super::{
    heatmap::HeatMapTenant,
-    scheduler::{
-        self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult,
-        TenantBackgroundJobs,
-    },
+    scheduler::{self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs},
    CommandRequest, UploadCommand,
 };
 use tokio_util::sync::CancellationToken;
@@ -183,11 +181,15 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>
            let state = self
                .tenants
                .entry(*tenant.get_tenant_shard_id())
-                .or_insert_with(|| UploaderTenantState {
-                    tenant: Arc::downgrade(&tenant),
-                    last_upload: None,
-                    next_upload: Some(now.checked_add(period_warmup(period)).unwrap_or(now)),
-                    last_digest: None,
+                .or_insert_with(|| {
+                    let jittered_period = rand::thread_rng().gen_range(Duration::ZERO..period);
+
+                    UploaderTenantState {
+                        tenant: Arc::downgrade(&tenant),
+                        last_upload: None,
+                        next_upload: Some(now.checked_add(jittered_period).unwrap_or(now)),
+                        last_digest: None,
+                    }
                });

            // Decline to do the upload if insufficient time has passed
@@ -272,7 +274,7 @@ impl JobGenerator<UploadPending, WriteInProgress, WriteComplete, UploadCommand>

            let next_upload = tenant
                .get_heatmap_period()
-                .and_then(|period| now.checked_add(period_jitter(period, 5)));
+                .and_then(|period| now.checked_add(period));

            WriteComplete {
                    tenant_shard_id: *tenant.get_tenant_shard_id(),
--- a/pageserver/src/tenant/secondary/scheduler.rs
+++ b/pageserver/src/tenant/secondary/scheduler.rs
@@ -1,5 +1,4 @@
 use futures::Future;
-use rand::Rng;
 use std::{
    collections::HashMap,
    marker::PhantomData,
@@ -20,26 +19,6 @@ use super::{CommandRequest, CommandResponse};
 const MAX_SCHEDULING_INTERVAL: Duration = Duration::from_secs(10);
 const MIN_SCHEDULING_INTERVAL: Duration = Duration::from_secs(1);

-/// Jitter a Duration by an integer percentage.  Returned values are uniform
-/// in the range 100-pct..100+pct (i.e. a 5% jitter is 5% either way: a ~10% range)
-pub(super) fn period_jitter(d: Duration, pct: u32) -> Duration {
-    if d == Duration::ZERO {
-        d
-    } else {
-        rand::thread_rng().gen_range((d * (100 - pct)) / 100..(d * (100 + pct)) / 100)
-    }
-}
-
-/// When a periodic task first starts, it should wait for some time in the range 0..period, so
-/// that starting many such tasks at the same time spreads them across the time range.
-pub(super) fn period_warmup(period: Duration) -> Duration {
-    if period == Duration::ZERO {
-        period
-    } else {
-        rand::thread_rng().gen_range(Duration::ZERO..period)
-    }
-}
-
 /// Scheduling helper for background work across many tenants.
 ///
 /// Systems that need to run background work across many tenants may use this type
--- a/pageserver/src/tenant/size.rs
+++ b/pageserver/src/tenant/size.rs
@@ -118,6 +118,9 @@ pub(super) async fn gather_inputs(
    ctx: &RequestContext,
 ) -> anyhow::Result<ModelInputs> {
    // refresh is needed to update gc related pitr_cutoff and horizon_cutoff
+    //
+    // FIXME: if a single timeline is deleted while refresh gc info is ongoing, we will fail the
+    // whole computation. It does not make sense from the billing perspective.
    tenant
        .refresh_gc_info(cancel, ctx)
        .await
@@ -189,9 +192,7 @@ pub(super) async fn gather_inputs(
        // than a space bound (horizon cutoff).  This means that if someone drops a database and waits for their
        // PITR interval, they will see synthetic size decrease, even if we are still storing data inside
        // horizon_cutoff.
-        let pitr_cutoff = gc_info.cutoffs.pitr;
-        let horizon_cutoff = gc_info.cutoffs.horizon;
-        let mut next_gc_cutoff = pitr_cutoff;
+        let mut next_gc_cutoff = gc_info.pitr_cutoff;

        // If the caller provided a shorter retention period, use that instead of the GC cutoff.
        let retention_param_cutoff = if let Some(max_retention_period) = max_retention_period {
@@ -218,8 +219,6 @@ pub(super) async fn gather_inputs(
            .map(|lsn| (lsn, LsnKind::BranchPoint))
            .collect::<Vec<_>>();

-        drop(gc_info);
-
        // Add branch points we collected earlier, just in case there were any that were
        // not present in retain_lsns. We will remove any duplicates below later.
        if let Some(this_branchpoints) = branchpoints.get(&timeline_id) {
@@ -298,8 +297,8 @@ pub(super) async fn gather_inputs(
            last_record: last_record_lsn,
            // this is not used above, because it might not have updated recently enough
            latest_gc_cutoff: *timeline.get_latest_gc_cutoff_lsn(),
-            horizon_cutoff,
-            pitr_cutoff,
+            horizon_cutoff: gc_info.horizon_cutoff,
+            pitr_cutoff: gc_info.pitr_cutoff,
            next_gc_cutoff,
            retention_param_cutoff,
        });
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -428,15 +428,9 @@ impl DeltaLayerWriterInner {
    ///
    /// The values must be appended in key, lsn order.
    ///
-    async fn put_value(
-        &mut self,
-        key: Key,
-        lsn: Lsn,
-        val: Value,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    async fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> {
        let (_, res) = self
-            .put_value_bytes(key, lsn, Value::ser(&val)?, val.will_init(), ctx)
+            .put_value_bytes(key, lsn, Value::ser(&val)?, val.will_init())
            .await;
        res
    }
@@ -447,10 +441,9 @@ impl DeltaLayerWriterInner {
        lsn: Lsn,
        val: Vec<u8>,
        will_init: bool,
-        ctx: &RequestContext,
    ) -> (Vec<u8>, anyhow::Result<()>) {
        assert!(self.lsn_range.start <= lsn);
-        let (val, res) = self.blob_writer.write_blob(val, ctx).await;
+        let (val, res) = self.blob_writer.write_blob(val).await;
        let off = match res {
            Ok(off) => off,
            Err(e) => return (val, Err(anyhow::anyhow!(e))),
@@ -470,23 +463,18 @@ impl DeltaLayerWriterInner {
    ///
    /// Finish writing the delta layer.
    ///
-    async fn finish(
-        self,
-        key_end: Key,
-        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<ResidentLayer> {
+    async fn finish(self, key_end: Key, timeline: &Arc<Timeline>) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

-        let mut file = self.blob_writer.into_inner(ctx).await?;
+        let mut file = self.blob_writer.into_inner().await?;

        // Write out the index
        let (index_root_blk, block_buf) = self.tree.finish()?;
        file.seek(SeekFrom::Start(index_start_blk as u64 * PAGE_SZ as u64))
            .await?;
        for buf in block_buf.blocks {
-            let (_buf, res) = file.write_all(buf, ctx).await;
+            let (_buf, res) = file.write_all(buf).await;
            res?;
        }
        assert!(self.lsn_range.start < self.lsn_range.end);
@@ -506,7 +494,7 @@ impl DeltaLayerWriterInner {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&summary, &mut buf)?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;

        let metadata = file
@@ -604,18 +592,8 @@ impl DeltaLayerWriter {
    ///
    /// The values must be appended in key, lsn order.
    ///
-    pub async fn put_value(
-        &mut self,
-        key: Key,
-        lsn: Lsn,
-        val: Value,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        self.inner
-            .as_mut()
-            .unwrap()
-            .put_value(key, lsn, val, ctx)
-            .await
+    pub async fn put_value(&mut self, key: Key, lsn: Lsn, val: Value) -> anyhow::Result<()> {
+        self.inner.as_mut().unwrap().put_value(key, lsn, val).await
    }

    pub async fn put_value_bytes(
@@ -624,12 +602,11 @@ impl DeltaLayerWriter {
        lsn: Lsn,
        val: Vec<u8>,
        will_init: bool,
-        ctx: &RequestContext,
    ) -> (Vec<u8>, anyhow::Result<()>) {
        self.inner
            .as_mut()
            .unwrap()
-            .put_value_bytes(key, lsn, val, will_init, ctx)
+            .put_value_bytes(key, lsn, val, will_init)
            .await
    }

@@ -644,11 +621,10 @@ impl DeltaLayerWriter {
        mut self,
        key_end: Key,
        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<ResidentLayer> {
        let inner = self.inner.take().unwrap();
        let temp_path = inner.path.clone();
-        let result = inner.finish(key_end, timeline, ctx).await;
+        let result = inner.finish(key_end, timeline).await;
        // The delta layer files can sometimes be really large. Clean them up.
        if result.is_err() {
            tracing::warn!(
@@ -716,7 +692,7 @@ impl DeltaLayer {
        // TODO: could use smallvec here, but it's a pain with Slice<T>
        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;
        Ok(())
    }
@@ -1139,15 +1115,15 @@ impl DeltaLayerInner {
        Ok(all_keys)
    }

-    /// Using the given writer, write out a version which has the earlier Lsns than `until`.
-    ///
-    /// Return the amount of key value records pushed to the writer.
+    /// Using the given writer, write out a truncated version, where LSNs higher than the
+    /// truncate_at are missing.
+    #[cfg(test)]
    pub(super) async fn copy_prefix(
        &self,
        writer: &mut DeltaLayerWriter,
-        until: Lsn,
+        truncate_at: Lsn,
        ctx: &RequestContext,
-    ) -> anyhow::Result<usize> {
+    ) -> anyhow::Result<()> {
        use crate::tenant::vectored_blob_io::{
            BlobMeta, VectoredReadBuilder, VectoredReadExtended,
        };
@@ -1211,8 +1187,6 @@ impl DeltaLayerInner {
        // FIXME: buffering of DeltaLayerWriter
        let mut per_blob_copy = Vec::new();

-        let mut records = 0;
-
        while let Some(item) = stream.try_next().await? {
            tracing::debug!(?item, "popped");
            let offset = item
@@ -1231,7 +1205,7 @@ impl DeltaLayerInner {

            prev = Option::from(item);

-            let actionable = actionable.filter(|x| x.0.lsn < until);
+            let actionable = actionable.filter(|x| x.0.lsn < truncate_at);

            let builder = if let Some((meta, offsets)) = actionable {
                // extend or create a new builder
@@ -1299,7 +1273,7 @@ impl DeltaLayerInner {
                    let will_init = crate::repository::ValueBytes::will_init(data)
                        .inspect_err(|_e| {
                            #[cfg(feature = "testing")]
-                            tracing::error!(data=?utils::Hex(data), err=?_e, %key, %lsn, "failed to parse will_init out of serialized value");
+                            tracing::error!(data=?utils::Hex(data), err=?_e, "failed to parse will_init out of serialized value");
                        })
                        .unwrap_or(false);

@@ -1307,19 +1281,10 @@ impl DeltaLayerInner {
                    per_blob_copy.extend_from_slice(data);

                    let (tmp, res) = writer
-                        .put_value_bytes(
-                            key,
-                            lsn,
-                            std::mem::take(&mut per_blob_copy),
-                            will_init,
-                            ctx,
-                        )
+                        .put_value_bytes(key, lsn, std::mem::take(&mut per_blob_copy), will_init)
                        .await;
                    per_blob_copy = tmp;
-
                    res?;
-
-                    records += 1;
                }

                buffer = Some(res.buf);
@@ -1331,7 +1296,7 @@ impl DeltaLayerInner {
            "with the sentinel above loop should had handled all"
        );

-        Ok(records)
+        Ok(())
    }

    pub(super) async fn dump(&self, ctx: &RequestContext) -> anyhow::Result<()> {
@@ -1404,6 +1369,7 @@ impl DeltaLayerInner {
        Ok(())
    }

+    #[cfg(test)]
    fn stream_index_forwards<'a, R>(
        &'a self,
        reader: &'a DiskBtreeReader<R, DELTA_KEY_SIZE>,
@@ -1794,14 +1760,12 @@ mod test {

        for entry in entries {
            let (_, res) = writer
-                .put_value_bytes(entry.key, entry.lsn, entry.value, false, &ctx)
+                .put_value_bytes(entry.key, entry.lsn, entry.value, false)
                .await;
            res?;
        }

-        let resident = writer
-            .finish(entries_meta.key_range.end, &timeline, &ctx)
-            .await?;
+        let resident = writer.finish(entries_meta.key_range.end, &timeline).await?;

        let inner = resident.as_delta(&ctx).await?;

@@ -1987,7 +1951,7 @@ mod test {
                .await
                .unwrap();

-            let copied_layer = writer.finish(Key::MAX, &branch, ctx).await.unwrap();
+            let copied_layer = writer.finish(Key::MAX, &branch).await.unwrap();

            copied_layer.as_delta(ctx).await.unwrap();

--- a/pageserver/src/tenant/storage_layer/filename.rs
+++ b/pageserver/src/tenant/storage_layer/filename.rs
@@ -2,13 +2,11 @@
 //! Helper functions for dealing with filenames of the image and delta layer files.
 //!
 use crate::repository::Key;
-use std::borrow::Cow;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Range;
 use std::str::FromStr;

-use regex::Regex;
 use utils::lsn::Lsn;

 use super::PersistentLayerDesc;
@@ -76,19 +74,10 @@ impl DeltaFileName {
        let key_end_str = key_parts.next()?;
        let lsn_start_str = lsn_parts.next()?;
        let lsn_end_str = lsn_parts.next()?;
-
        if parts.next().is_some() || key_parts.next().is_some() || key_parts.next().is_some() {
            return None;
        }

-        if key_start_str.len() != 36
-            || key_end_str.len() != 36
-            || lsn_start_str.len() != 16
-            || lsn_end_str.len() != 16
-        {
-            return None;
-        }
-
        let key_start = Key::from_hex(key_start_str).ok()?;
        let key_end = Key::from_hex(key_end_str).ok()?;

@@ -193,10 +182,6 @@ impl ImageFileName {
            return None;
        }

-        if key_start_str.len() != 36 || key_end_str.len() != 36 || lsn_str.len() != 16 {
-            return None;
-        }
-
        let key_start = Key::from_hex(key_start_str).ok()?;
        let key_end = Key::from_hex(key_end_str).ok()?;

@@ -274,22 +259,9 @@ impl From<DeltaFileName> for LayerFileName {
 impl FromStr for LayerFileName {
    type Err = String;

-    /// Conversion from either a physical layer filename, or the string-ization of
-    /// Self. When loading a physical layer filename, we drop any extra information
-    /// not needed to build Self.
    fn from_str(value: &str) -> Result<Self, Self::Err> {
-        let gen_suffix_regex = Regex::new("^(?<base>.+)-(?<gen>[0-9a-f]{8})$").unwrap();
-        let file_name: Cow<str> = match gen_suffix_regex.captures(value) {
-            Some(captures) => captures
-                .name("base")
-                .expect("Non-optional group")
-                .as_str()
-                .into(),
-            None => value.into(),
-        };
-
-        let delta = DeltaFileName::parse_str(&file_name);
-        let image = ImageFileName::parse_str(&file_name);
+        let delta = DeltaFileName::parse_str(value);
+        let image = ImageFileName::parse_str(value);
        let ok = match (delta, image) {
            (None, None) => {
                return Err(format!(
@@ -343,42 +315,3 @@ impl<'de> serde::de::Visitor<'de> for LayerFileNameVisitor {
        v.parse().map_err(|e| E::custom(e))
    }
 }
-
-#[cfg(test)]
-mod test {
-    use super::*;
-    #[test]
-    fn image_layer_parse() -> anyhow::Result<()> {
-        let expected = LayerFileName::Image(ImageFileName {
-            key_range: Key::from_i128(0)
-                ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
-            lsn: Lsn::from_hex("00000000014FED58").unwrap(),
-        });
-        let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-00000001").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected,);
-
-        // Omitting generation suffix is valid
-        let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected,);
-
-        Ok(())
-    }
-
-    #[test]
-    fn delta_layer_parse() -> anyhow::Result<()> {
-        let expected = LayerFileName::Delta(DeltaFileName {
-            key_range: Key::from_i128(0)
-                ..Key::from_hex("000000067F00000001000004DF0000000006").unwrap(),
-            lsn_range: Lsn::from_hex("00000000014FED58").unwrap()
-                ..Lsn::from_hex("000000000154C481").unwrap(),
-        });
-        let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481-00000001").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected);
-
-        // Omitting generation suffix is valid
-        let parsed = LayerFileName::from_str("000000000000000000000000000000000000-000000067F00000001000004DF0000000006__00000000014FED58-000000000154C481").map_err(|s| anyhow::anyhow!(s))?;
-        assert_eq!(parsed, expected);
-
-        Ok(())
-    }
-}
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -357,7 +357,7 @@ impl ImageLayer {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&new_summary, &mut buf).context("serialize")?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;
        Ok(())
    }
@@ -677,14 +677,9 @@ impl ImageLayerWriterInner {
    ///
    /// The page versions must be appended in blknum order.
    ///
-    async fn put_image(
-        &mut self,
-        key: Key,
-        img: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
+    async fn put_image(&mut self, key: Key, img: Bytes) -> anyhow::Result<()> {
        ensure!(self.key_range.contains(&key));
-        let (_img, res) = self.blob_writer.write_blob(img, ctx).await;
+        let (_img, res) = self.blob_writer.write_blob(img).await;
        // TODO: re-use the buffer for `img` further upstack
        let off = res?;

@@ -698,11 +693,7 @@ impl ImageLayerWriterInner {
    ///
    /// Finish writing the image layer.
    ///
-    async fn finish(
-        self,
-        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<ResidentLayer> {
+    async fn finish(self, timeline: &Arc<Timeline>) -> anyhow::Result<ResidentLayer> {
        let index_start_blk =
            ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32;

@@ -713,7 +704,7 @@ impl ImageLayerWriterInner {
            .await?;
        let (index_root_blk, block_buf) = self.tree.finish()?;
        for buf in block_buf.blocks {
-            let (_buf, res) = file.write_all(buf, ctx).await;
+            let (_buf, res) = file.write_all(buf).await;
            res?;
        }

@@ -733,7 +724,7 @@ impl ImageLayerWriterInner {
        // TODO: could use smallvec here but it's a pain with Slice<T>
        Summary::ser_into(&summary, &mut buf)?;
        file.seek(SeekFrom::Start(0)).await?;
-        let (_buf, res) = file.write_all(buf, ctx).await;
+        let (_buf, res) = file.write_all(buf).await;
        res?;

        let metadata = file
@@ -815,13 +806,8 @@ impl ImageLayerWriter {
    ///
    /// The page versions must be appended in blknum order.
    ///
-    pub async fn put_image(
-        &mut self,
-        key: Key,
-        img: Bytes,
-        ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        self.inner.as_mut().unwrap().put_image(key, img, ctx).await
+    pub async fn put_image(&mut self, key: Key, img: Bytes) -> anyhow::Result<()> {
+        self.inner.as_mut().unwrap().put_image(key, img).await
    }

    ///
@@ -830,9 +816,8 @@ impl ImageLayerWriter {
    pub(crate) async fn finish(
        mut self,
        timeline: &Arc<Timeline>,
-        ctx: &RequestContext,
    ) -> anyhow::Result<super::ResidentLayer> {
-        self.inner.take().unwrap().finish(timeline, ctx).await
+        self.inner.take().unwrap().finish(timeline).await
    }
 }

--- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs
+++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs
@@ -659,14 +659,14 @@ impl InMemoryLayer {
                let will_init = Value::des(&buf)?.will_init();
                let res;
                (buf, res) = delta_layer_writer
-                    .put_value_bytes(*key, *lsn, buf, will_init, &ctx)
+                    .put_value_bytes(*key, *lsn, buf, will_init)
                    .await;
                res?;
            }
        }

        // MAX is used here because we identify L0 layers by full key range
-        let delta_layer = delta_layer_writer.finish(Key::MAX, timeline, &ctx).await?;
+        let delta_layer = delta_layer_writer.finish(Key::MAX, timeline).await?;
        Ok(Some(delta_layer))
    }
 }
--- a/pageserver/src/tenant/storage_layer/layer.rs
+++ b/pageserver/src/tenant/storage_layer/layer.rs
@@ -4,21 +4,19 @@ use pageserver_api::keyspace::KeySpace;
 use pageserver_api::models::{
    HistoricLayerInfo, LayerAccessKind, LayerResidenceEventReason, LayerResidenceStatus,
 };
-use pageserver_api::shard::{ShardIndex, TenantShardId};
+use pageserver_api::shard::ShardIndex;
 use std::ops::Range;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::{Arc, Weak};
 use std::time::{Duration, SystemTime};
 use tracing::Instrument;
-use utils::id::TimelineId;
 use utils::lsn::Lsn;
 use utils::sync::heavier_once_cell;

 use crate::config::PageServerConf;
-use crate::context::{DownloadBehavior, RequestContext};
+use crate::context::RequestContext;
 use crate::repository::Key;
 use crate::span::debug_assert_current_span_has_tenant_and_timeline_id;
-use crate::task_mgr::TaskKind;
 use crate::tenant::timeline::GetVectoredError;
 use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline};

@@ -124,25 +122,6 @@ impl PartialEq for Layer {
    }
 }

-pub(crate) fn local_layer_path(
-    conf: &PageServerConf,
-    tenant_shard_id: &TenantShardId,
-    timeline_id: &TimelineId,
-    layer_file_name: &LayerFileName,
-    _generation: &Generation,
-) -> Utf8PathBuf {
-    let timeline_path = conf.timeline_path(tenant_shard_id, timeline_id);
-
-    timeline_path.join(layer_file_name.file_name())
-
-    // TOOD: include generation in the name in now+1 releases.
-    // timeline_path.join(format!(
-    //     "{}{}",
-    //     layer_file_name.file_name(),
-    //     generation.get_suffix()
-    // ))
-}
-
 impl Layer {
    /// Creates a layer value for a file we know to not be resident.
    pub(crate) fn for_evicted(
@@ -151,14 +130,6 @@ impl Layer {
        file_name: LayerFileName,
        metadata: LayerFileMetadata,
    ) -> Self {
-        let local_path = local_layer_path(
-            conf,
-            &timeline.tenant_shard_id,
-            &timeline.timeline_id,
-            &file_name,
-            &metadata.generation,
-        );
-
        let desc = PersistentLayerDesc::from_filename(
            timeline.tenant_shard_id,
            timeline.timeline_id,
@@ -171,7 +142,6 @@ impl Layer {
        let owner = Layer(Arc::new(LayerInner::new(
            conf,
            timeline,
-            local_path,
            access_stats,
            desc,
            None,
@@ -188,7 +158,6 @@ impl Layer {
    pub(crate) fn for_resident(
        conf: &'static PageServerConf,
        timeline: &Arc<Timeline>,
-        local_path: Utf8PathBuf,
        file_name: LayerFileName,
        metadata: LayerFileMetadata,
    ) -> ResidentLayer {
@@ -214,7 +183,6 @@ impl Layer {
            LayerInner::new(
                conf,
                timeline,
-                local_path,
                access_stats,
                desc,
                Some(inner),
@@ -256,19 +224,9 @@ impl Layer {
                LayerResidenceStatus::Resident,
                LayerResidenceEventReason::LayerCreate,
            );
-
-            let local_path = local_layer_path(
-                conf,
-                &timeline.tenant_shard_id,
-                &timeline.timeline_id,
-                &desc.filename(),
-                &timeline.generation,
-            );
-
            LayerInner::new(
                conf,
                timeline,
-                local_path,
                access_stats,
                desc,
                Some(inner),
@@ -451,13 +409,6 @@ impl Layer {
        self.0.metadata()
    }

-    pub(crate) fn get_timeline_id(&self) -> Option<TimelineId> {
-        self.0
-            .timeline
-            .upgrade()
-            .map(|timeline| timeline.timeline_id)
-    }
-
    /// Traditional debug dumping facility
    #[allow(unused)]
    pub(crate) async fn dump(&self, verbose: bool, ctx: &RequestContext) -> anyhow::Result<()> {
@@ -757,17 +708,19 @@ impl Drop for LayerInner {
 }

 impl LayerInner {
-    #[allow(clippy::too_many_arguments)]
    fn new(
        conf: &'static PageServerConf,
        timeline: &Arc<Timeline>,
-        local_path: Utf8PathBuf,
        access_stats: LayerAccessStats,
        desc: PersistentLayerDesc,
        downloaded: Option<Arc<DownloadedLayer>>,
        generation: Generation,
        shard: ShardIndex,
    ) -> Self {
+        let path = conf
+            .timeline_path(&timeline.tenant_shard_id, &timeline.timeline_id)
+            .join(desc.filename().to_string());
+
        let (inner, version, init_status) = if let Some(inner) = downloaded {
            let version = inner.version;
            let resident = ResidentOrWantedEvicted::Resident(inner);
@@ -783,7 +736,7 @@ impl LayerInner {
        LayerInner {
            conf,
            debug_str: { format!("timelines/{}/{}", timeline.timeline_id, desc.filename()).into() },
-            path: local_path,
+            path,
            desc,
            timeline: Arc::downgrade(timeline),
            have_remote_client: timeline.remote_client.is_some(),
@@ -986,20 +939,11 @@ impl LayerInner {
            return Err(DownloadError::DownloadRequired);
        }

-        let download_ctx = ctx
-            .map(|ctx| ctx.detached_child(TaskKind::LayerDownload, DownloadBehavior::Download))
-            .unwrap_or(RequestContext::new(
-                TaskKind::LayerDownload,
-                DownloadBehavior::Download,
-            ));
-
        async move {
            tracing::info!(%reason, "downloading on-demand");

            let init_cancelled = scopeguard::guard((), |_| LAYER_IMPL_METRICS.inc_init_cancelled());
-            let res = self
-                .download_init_and_wait(timeline, permit, download_ctx)
-                .await?;
+            let res = self.download_init_and_wait(timeline, permit).await?;
            scopeguard::ScopeGuard::into_inner(init_cancelled);
            Ok(res)
        }
@@ -1038,7 +982,6 @@ impl LayerInner {
        self: &Arc<Self>,
        timeline: Arc<Timeline>,
        permit: heavier_once_cell::InitPermit,
-        ctx: RequestContext,
    ) -> Result<Arc<DownloadedLayer>, DownloadError> {
        debug_assert_current_span_has_tenant_and_timeline_id();

@@ -1068,7 +1011,7 @@ impl LayerInner {
                    .await
                    .unwrap();

-                let res = this.download_and_init(timeline, permit, &ctx).await;
+                let res = this.download_and_init(timeline, permit).await;

                if let Err(res) = tx.send(res) {
                    match res {
@@ -1111,7 +1054,6 @@ impl LayerInner {
        self: &Arc<LayerInner>,
        timeline: Arc<Timeline>,
        permit: heavier_once_cell::InitPermit,
-        ctx: &RequestContext,
    ) -> anyhow::Result<Arc<DownloadedLayer>> {
        let client = timeline
            .remote_client
@@ -1119,12 +1061,7 @@ impl LayerInner {
            .expect("checked before download_init_and_wait");

        let result = client
-            .download_layer_file(
-                &self.desc.filename(),
-                &self.metadata(),
-                &timeline.cancel,
-                ctx,
-            )
+            .download_layer_file(&self.desc.filename(), &self.metadata(), &timeline.cancel)
            .await;

        match result {
@@ -1843,23 +1780,25 @@ impl ResidentLayer {
        }
    }

-    /// Returns the amount of keys and values written to the writer.
-    pub(crate) async fn copy_delta_prefix(
+    /// FIXME: truncate is bad name because we are not truncating anything, but copying the
+    /// filtered parts.
+    #[cfg(test)]
+    pub(super) async fn copy_delta_prefix(
        &self,
        writer: &mut super::delta_layer::DeltaLayerWriter,
-        until: Lsn,
+        truncate_at: Lsn,
        ctx: &RequestContext,
-    ) -> anyhow::Result<usize> {
+    ) -> anyhow::Result<()> {
        use LayerKind::*;

        let owner = &self.owner.0;

        match self.downloaded.get(owner, ctx).await? {
            Delta(ref d) => d
-                .copy_prefix(writer, until, ctx)
+                .copy_prefix(writer, truncate_at, ctx)
                .await
-                .with_context(|| format!("copy_delta_prefix until {until} of {self}")),
-            Image(_) => anyhow::bail!(format!("cannot copy_lsn_prefix of image layer {self}")),
+                .with_context(|| format!("truncate {self}")),
+            Image(_) => anyhow::bail!(format!("cannot truncate image layer {self}")),
        }
    }

--- a/pageserver/src/tenant/tasks.rs
+++ b/pageserver/src/tenant/tasks.rs
@@ -2,7 +2,6 @@
 //! such as compaction and GC

 use std::ops::ControlFlow;
-use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};

@@ -10,11 +9,9 @@ use crate::context::{DownloadBehavior, RequestContext};
 use crate::metrics::TENANT_TASK_EVENTS;
 use crate::task_mgr;
 use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME};
-use crate::tenant::config::defaults::DEFAULT_COMPACTION_PERIOD;
 use crate::tenant::throttle::Stats;
 use crate::tenant::timeline::CompactionError;
 use crate::tenant::{Tenant, TenantState};
-use rand::Rng;
 use tokio_util::sync::CancellationToken;
 use tracing::*;
 use utils::{backoff, completion};
@@ -47,7 +44,6 @@ pub(crate) enum BackgroundLoopKind {
    Compaction,
    Gc,
    Eviction,
-    IngestHouseKeeping,
    ConsumptionMetricsCollectMetrics,
    ConsumptionMetricsSyntheticSizeWorker,
    InitialLogicalSizeCalculation,
@@ -136,30 +132,6 @@ pub fn start_background_loops(
            }
        },
    );
-
-    task_mgr::spawn(
-        BACKGROUND_RUNTIME.handle(),
-        TaskKind::IngestHousekeeping,
-        Some(tenant_shard_id),
-        None,
-        &format!("ingest housekeeping for tenant {tenant_shard_id}"),
-        false,
-        {
-            let tenant = Arc::clone(tenant);
-            let background_jobs_can_start = background_jobs_can_start.cloned();
-            async move {
-                let cancel = task_mgr::shutdown_token();
-                tokio::select! {
-                    _ = cancel.cancelled() => { return Ok(()) },
-                    _ = completion::Barrier::maybe_wait(background_jobs_can_start) => {}
-                };
-                ingest_housekeeping_loop(tenant, cancel)
-                    .instrument(info_span!("ingest_housekeeping_loop", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug()))
-                    .await;
-                Ok(())
-            }
-        },
-    );
 }

 ///
@@ -407,61 +379,6 @@ async fn gc_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
 }

-async fn ingest_housekeeping_loop(tenant: Arc<Tenant>, cancel: CancellationToken) {
-    TENANT_TASK_EVENTS.with_label_values(&["start"]).inc();
-    async {
-        loop {
-            tokio::select! {
-                _ = cancel.cancelled() => {
-                    return;
-                },
-                tenant_wait_result = wait_for_active_tenant(&tenant) => match tenant_wait_result {
-                    ControlFlow::Break(()) => return,
-                    ControlFlow::Continue(()) => (),
-                },
-            }
-
-            // We run ingest housekeeping with the same frequency as compaction: it is not worth
-            // having a distinct setting.  But we don't run it in the same task, because compaction
-            // blocks on acquiring the background job semaphore.
-            let period = tenant.get_compaction_period();
-
-            // If compaction period is set to zero (to disable it), then we will use a reasonable default
-            let period = if period == Duration::ZERO {
-                humantime::Duration::from_str(DEFAULT_COMPACTION_PERIOD)
-                    .unwrap()
-                    .into()
-            } else {
-                period
-            };
-
-            // Jitter the period by +/- 5%
-            let period =
-                rand::thread_rng().gen_range((period * (95)) / 100..(period * (105)) / 100);
-
-            // Always sleep first: we do not need to do ingest housekeeping early in the lifetime of
-            // a tenant, since it won't have started writing any ephemeral files yet.
-            if tokio::time::timeout(period, cancel.cancelled())
-                .await
-                .is_ok()
-            {
-                break;
-            }
-
-            let started_at = Instant::now();
-            tenant.ingest_housekeeping().await;
-
-            warn_when_period_overrun(
-                started_at.elapsed(),
-                period,
-                BackgroundLoopKind::IngestHouseKeeping,
-            );
-        }
-    }
-    .await;
-    TENANT_TASK_EVENTS.with_label_values(&["stop"]).inc();
-}
-
 async fn wait_for_active_tenant(tenant: &Arc<Tenant>) -> ControlFlow<()> {
    // if the tenant has a proper status already, no need to wait for anything
    if tenant.current_state() == TenantState::Active {
@@ -503,6 +420,8 @@ pub(crate) async fn random_init_delay(
    period: Duration,
    cancel: &CancellationToken,
 ) -> Result<(), Cancelled> {
+    use rand::Rng;
+
    if period == Duration::ZERO {
        return Ok(());
    }
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -1,6 +1,5 @@
 mod compaction;
 pub mod delete;
-pub(crate) mod detach_ancestor;
 mod eviction_task;
 mod init;
 pub mod layer_manager;
@@ -17,15 +16,11 @@ use enumset::EnumSet;
 use fail::fail_point;
 use once_cell::sync::Lazy;
 use pageserver_api::{
-    key::{
-        AUX_FILES_KEY, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE,
-        NON_INHERITED_SPARSE_RANGE,
-    },
+    key::{AUX_FILES_KEY, NON_INHERITED_RANGE},
    keyspace::{KeySpaceAccum, SparseKeyPartitioning},
    models::{
-        AuxFilePolicy, CompactionAlgorithm, DownloadRemoteLayersTaskInfo,
-        DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, InMemoryLayerInfo, LayerMapInfo,
-        TimelineState,
+        CompactionAlgorithm, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest,
+        EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, TimelineState,
    },
    reltag::BlockNumber,
    shard::{ShardIdentity, ShardNumber, TenantShardId},
@@ -60,7 +55,7 @@ use std::{
    ops::ControlFlow,
 };

-use crate::tenant::storage_layer::layer::local_layer_path;
+use crate::tenant::timeline::logical_size::CurrentLogicalSize;
 use crate::tenant::{
    layer_map::{LayerMap, SearchResult},
    metadata::TimelineMetadata,
@@ -82,9 +77,6 @@ use crate::{
 use crate::{
    disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry,
 };
-use crate::{
-    metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize,
-};
 use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind};
 use crate::{
    pgdatadir_mapping::{AuxFilesDirectory, DirectoryKind},
@@ -333,7 +325,7 @@ pub struct Timeline {

    // List of child timelines and their branch points. This is needed to avoid
    // garbage collecting data that is still needed by the child timelines.
-    pub(crate) gc_info: std::sync::RwLock<GcInfo>,
+    pub gc_info: std::sync::RwLock<GcInfo>,

    // It may change across major versions so for simplicity
    // keep it after running initdb for a timeline.
@@ -417,59 +409,33 @@ pub struct WalReceiverInfo {
    pub last_received_msg_ts: u128,
 }

+///
 /// Information about how much history needs to be retained, needed by
 /// Garbage Collection.
-#[derive(Default)]
-pub(crate) struct GcInfo {
+///
+pub struct GcInfo {
    /// Specific LSNs that are needed.
    ///
    /// Currently, this includes all points where child branches have
    /// been forked off from. In the future, could also include
    /// explicit user-defined snapshot points.
-    pub(crate) retain_lsns: Vec<Lsn>,
+    pub retain_lsns: Vec<Lsn>,

-    /// The cutoff coordinates, which are combined by selecting the minimum.
-    pub(crate) cutoffs: GcCutoffs,
-}
-
-impl GcInfo {
-    pub(crate) fn min_cutoff(&self) -> Lsn {
-        self.cutoffs.select_min()
-    }
-}
-
-/// The `GcInfo` component describing which Lsns need to be retained.
-#[derive(Debug)]
-pub(crate) struct GcCutoffs {
-    /// Keep everything newer than this point.
+    /// In addition to 'retain_lsns', keep everything newer than this
+    /// point.
    ///
    /// This is calculated by subtracting 'gc_horizon' setting from
    /// last-record LSN
    ///
    /// FIXME: is this inclusive or exclusive?
-    pub(crate) horizon: Lsn,
+    pub horizon_cutoff: Lsn,

    /// In addition to 'retain_lsns' and 'horizon_cutoff', keep everything newer than this
    /// point.
    ///
    /// This is calculated by finding a number such that a record is needed for PITR
    /// if only if its LSN is larger than 'pitr_cutoff'.
-    pub(crate) pitr: Lsn,
-}
-
-impl Default for GcCutoffs {
-    fn default() -> Self {
-        Self {
-            horizon: Lsn::INVALID,
-            pitr: Lsn::INVALID,
-        }
-    }
-}
-
-impl GcCutoffs {
-    fn select_min(&self) -> Lsn {
-        std::cmp::min(self.horizon, self.pitr)
-    }
+    pub pitr_cutoff: Lsn,
 }

 /// An error happened in a get() operation.
@@ -498,6 +464,7 @@ pub(crate) enum PageReconstructError {

 #[derive(Debug)]
 pub struct MissingKeyError {
+    stuck_at_lsn: bool,
    key: Key,
    shard: ShardNumber,
    cont_lsn: Lsn,
@@ -509,13 +476,23 @@ pub struct MissingKeyError {

 impl std::fmt::Display for MissingKeyError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "could not find data for key {} (shard {:?}) at LSN {}, request LSN {}",
-            self.key, self.shard, self.cont_lsn, self.request_lsn
-        )?;
-        if let Some(ref ancestor_lsn) = self.ancestor_lsn {
-            write!(f, ", ancestor {}", ancestor_lsn)?;
+        if self.stuck_at_lsn {
+            // Records are found in this timeline but no image layer or initial delta record was found.
+            write!(
+                f,
+                "could not find layer with more data for key {} (shard {:?}) at LSN {}, request LSN {}",
+                self.key, self.shard, self.cont_lsn, self.request_lsn
+            )?;
+            if let Some(ref ancestor_lsn) = self.ancestor_lsn {
+                write!(f, ", ancestor {}", ancestor_lsn)?;
+            }
+        } else {
+            // No records in this timeline.
+            write!(
+                f,
+                "could not find data for key {} (shard {:?}) at LSN {}, for request at LSN {}",
+                self.key, self.shard, self.cont_lsn, self.request_lsn
+            )?;
        }

        if !self.traversal_path.is_empty() {
@@ -591,8 +568,8 @@ pub(crate) enum GetVectoredError {
    #[error("Requested at invalid LSN: {0}")]
    InvalidLsn(Lsn),

-    #[error("Requested key not found: {0}")]
-    MissingKey(MissingKeyError),
+    #[error("Requested key {0} not found")]
+    MissingKey(Key),

    #[error(transparent)]
    GetReadyAncestorError(GetReadyAncestorError),
@@ -701,7 +678,7 @@ impl From<GetVectoredError> for PageReconstructError {
            GetVectoredError::Cancelled => PageReconstructError::Cancelled,
            GetVectoredError::InvalidLsn(_) => PageReconstructError::Other(anyhow!("Invalid LSN")),
            err @ GetVectoredError::Oversized(_) => PageReconstructError::Other(err.into()),
-            GetVectoredError::MissingKey(err) => PageReconstructError::MissingKey(err),
+            err @ GetVectoredError::MissingKey(_) => PageReconstructError::Other(err.into()),
            GetVectoredError::GetReadyAncestorError(err) => PageReconstructError::from(err),
            GetVectoredError::Other(err) => PageReconstructError::Other(err),
        }
@@ -865,13 +842,9 @@ impl Timeline {
                // Initialise the reconstruct state for the key with the cache
                // entry returned above.
                let mut reconstruct_state = ValuesReconstructState::new();
-
-                // Only add the cached image to the reconstruct state when it exists.
-                if cached_page_img.is_some() {
-                    let mut key_state = VectoredValueReconstructState::default();
-                    key_state.img = cached_page_img;
-                    reconstruct_state.keys.insert(key, Ok(key_state));
-                }
+                let mut key_state = VectoredValueReconstructState::default();
+                key_state.img = cached_page_img;
+                reconstruct_state.keys.insert(key, Ok(key_state));

                let vectored_res = self
                    .get_vectored_impl(keyspace.clone(), lsn, reconstruct_state, ctx)
@@ -897,15 +870,16 @@ impl Timeline {
                            value
                        }
                    }
-                    None => Err(PageReconstructError::MissingKey(MissingKeyError {
-                        key,
-                        shard: self.shard_identity.get_shard_number(&key),
-                        cont_lsn: Lsn(0),
-                        request_lsn: lsn,
-                        ancestor_lsn: None,
-                        traversal_path: Vec::new(),
-                        backtrace: None,
-                    })),
+                    None => {
+                        error!(
+                            "Expected {}, but singular vectored get returned nothing",
+                            key
+                        );
+                        Err(PageReconstructError::Other(anyhow!(
+                            "Singular vectored get did not return a value for {}",
+                            key
+                        )))
+                    }
                }
            }
        }
@@ -1055,70 +1029,6 @@ impl Timeline {
        res
    }

-    /// Scan the keyspace and return all existing key-values in the keyspace. This currently uses vectored
-    /// get underlying. Normal vectored get would throw an error when a key in the keyspace is not found
-    /// during the search, but for the scan interface, it returns all existing key-value pairs, and does
-    /// not expect each single key in the key space will be found. The semantics is closer to the RocksDB
-    /// scan iterator interface. We could optimize this interface later to avoid some checks in the vectored
-    /// get path to maintain and split the probing and to-be-probe keyspace. We also need to ensure that
-    /// the scan operation will not cause OOM in the future.
-    #[allow(dead_code)]
-    pub(crate) async fn scan(
-        &self,
-        keyspace: KeySpace,
-        lsn: Lsn,
-        ctx: &RequestContext,
-    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
-        if !lsn.is_valid() {
-            return Err(GetVectoredError::InvalidLsn(lsn));
-        }
-
-        trace!(
-            "key-value scan request for {:?}@{} from task kind {:?}",
-            keyspace,
-            lsn,
-            ctx.task_kind()
-        );
-
-        // We should generalize this into Keyspace::contains in the future.
-        for range in &keyspace.ranges {
-            if range.start.field1 < METADATA_KEY_BEGIN_PREFIX
-                || range.end.field1 > METADATA_KEY_END_PREFIX
-            {
-                return Err(GetVectoredError::Other(anyhow::anyhow!(
-                    "only metadata keyspace can be scanned"
-                )));
-            }
-        }
-
-        let start = crate::metrics::SCAN_LATENCY
-            .for_task_kind(ctx.task_kind())
-            .map(ScanLatencyOngoingRecording::start_recording);
-
-        // start counting after throttle so that throttle time
-        // is always less than observation time
-        let throttled = self
-            .timeline_get_throttle
-            // assume scan = 1 quota for now until we find a better way to process this
-            .throttle(ctx, 1)
-            .await;
-
-        let vectored_res = self
-            .get_vectored_impl(
-                keyspace.clone(),
-                lsn,
-                ValuesReconstructState::default(),
-                ctx,
-            )
-            .await;
-
-        if let Some(recording) = start {
-            recording.observe(throttled);
-        }
-
-        vectored_res
-    }
-
    /// Not subject to [`Self::timeline_get_throttle`].
    pub(super) async fn get_vectored_sequential_impl(
        &self,
@@ -1127,7 +1037,6 @@ impl Timeline {
        ctx: &RequestContext,
    ) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
        let mut values = BTreeMap::new();
-
        for range in keyspace.ranges {
            let mut key = range.start;
            while key != range.end {
@@ -1140,17 +1049,16 @@ impl Timeline {
                    Err(Cancelled | AncestorStopping(_)) => {
                        return Err(GetVectoredError::Cancelled)
                    }
-                    Err(MissingKey(_))
-                        if NON_INHERITED_RANGE.contains(&key)
-                            || NON_INHERITED_SPARSE_RANGE.contains(&key) =>
-                    {
-                        // Ignore missing key error for aux key range. TODO: currently, we assume non_inherited_range == aux_key_range.
-                        // When we add more types of keys into the page server, we should revisit this part of code and throw errors
-                        // accordingly.
-                        key = key.next();
-                    }
-                    Err(MissingKey(err)) => {
-                        return Err(GetVectoredError::MissingKey(err));
+                    // we only capture stuck_at_lsn=false now until we figure out https://github.com/neondatabase/neon/issues/7380
+                    Err(MissingKey(MissingKeyError {
+                        stuck_at_lsn: false,
+                        ..
+                    })) if !NON_INHERITED_RANGE.contains(&key) => {
+                        // The vectored read path handles non inherited keys specially.
+                        // If such a a key cannot be reconstructed from the current timeline,
+                        // the vectored read path returns a key level error as opposed to a top
+                        // level error.
+                        return Err(GetVectoredError::MissingKey(key));
                    }
                    Err(Other(err))
                        if err
@@ -1237,11 +1145,6 @@ impl Timeline {
        lsn: Lsn,
        ctx: &RequestContext,
    ) {
-        if keyspace.overlaps(&Key::metadata_key_range()) {
-            // skip validation for metadata key range
-            return;
-        }
-
        let sequential_res = self
            .get_vectored_sequential_impl(keyspace.clone(), lsn, ctx)
            .await;
@@ -1251,7 +1154,7 @@ impl Timeline {
            match (lhs, rhs) {
                (Oversized(l), Oversized(r)) => l == r,
                (InvalidLsn(l), InvalidLsn(r)) => l == r,
-                (MissingKey(l), MissingKey(r)) => l.key == r.key,
+                (MissingKey(l), MissingKey(r)) => l == r,
                (GetReadyAncestorError(_), GetReadyAncestorError(_)) => true,
                (Other(_), Other(_)) => true,
                _ => false,
@@ -1266,7 +1169,7 @@ impl Timeline {
                               " - keyspace={:?} lsn={}"),
                       seq_err, keyspace, lsn) },
            (Ok(_), Err(GetVectoredError::GetReadyAncestorError(GetReadyAncestorError::AncestorLsnTimeout(_)))) => {
-                // Sequential get runs after vectored get, so it is possible for the later
+                // Sequential get runs after vectored get, so it is possible for the later 
                // to time out while waiting for its ancestor's Lsn to become ready and for the
                // former to succeed (it essentially has a doubled wait time).
            },
@@ -1501,21 +1404,15 @@ impl Timeline {
    /// Flush to disk all data that was written with the put_* functions
    #[instrument(skip(self), fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug(), timeline_id=%self.timeline_id))]
    pub(crate) async fn freeze_and_flush(&self) -> anyhow::Result<()> {
-        self.freeze_and_flush0().await
-    }
-
-    // This exists to provide a non-span creating version of `freeze_and_flush` we can call without
-    // polluting the span hierarchy.
-    pub(crate) async fn freeze_and_flush0(&self) -> anyhow::Result<()> {
        let to_lsn = self.freeze_inmem_layer(false).await;
        self.flush_frozen_layers_and_wait(to_lsn).await
    }

-    // Check if an open ephemeral layer should be closed: this provides
-    // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping
-    // an ephemeral layer open forever when idle.  It also freezes layers if the global limit on
-    // ephemeral layer bytes has been breached.
-    pub(super) async fn maybe_freeze_ephemeral_layer(&self) {
+    /// If there is no writer, and conditions for rolling the latest layer are met, then freeze it.
+    ///
+    /// This is for use in background housekeeping, to provide guarantees of layers closing eventually
+    /// even if there are no ongoing writes to drive that.
+    async fn maybe_freeze_ephemeral_layer(&self) {
        let Ok(_write_guard) = self.write_lock.try_lock() else {
            // If the write lock is held, there is an active wal receiver: rolling open layers
            // is their responsibility while they hold this lock.
@@ -1542,11 +1439,13 @@ impl Timeline {
                // we are a sharded tenant and have skipped some WAL
                let last_freeze_ts = *self.last_freeze_ts.read().unwrap();
                if last_freeze_ts.elapsed() >= self.get_checkpoint_timeout() {
-                    // Only do this if have been layer-less longer than get_checkpoint_timeout, so that a shard
-                    // without any data ingested (yet) doesn't write a remote index as soon as it
+                    // This should be somewhat rare, so we log it at INFO level.
+                    //
+                    // We checked for checkpoint timeout so that a shard without any
+                    // data ingested (yet) doesn't write a remote index as soon as it
                    // sees its LSN advance: we only do this if we've been layer-less
                    // for some time.
-                    tracing::debug!(
+                    tracing::info!(
                        "Advancing disk_consistent_lsn past WAL ingest gap {} -> {}",
                        disk_consistent_lsn,
                        last_record_lsn
@@ -1636,6 +1535,11 @@ impl Timeline {
            (guard, permit)
        };

+        // Prior to compaction, check if an open ephemeral layer should be closed: this provides
+        // background enforcement of checkpoint interval if there is no active WAL receiver, to avoid keeping
+        // an ephemeral layer open forever when idle.
+        self.maybe_freeze_ephemeral_layer().await;
+
        // this wait probably never needs any "long time spent" logging, because we already nag if
        // compaction task goes over it's period (20s) which is quite often in production.
        let (_guard, _permit) = tokio::select! {
@@ -1905,7 +1809,7 @@ impl Timeline {
    #[instrument(skip_all, fields(tenant_id = %self.tenant_shard_id.tenant_id, shard_id = %self.tenant_shard_id.shard_slug(), timeline_id = %self.timeline_id))]
    pub(crate) async fn download_layer(
        &self,
-        layer_file_name: &LayerFileName,
+        layer_file_name: &str,
    ) -> anyhow::Result<Option<bool>> {
        let Some(layer) = self.find_layer(layer_file_name).await else {
            return Ok(None);
@@ -1923,10 +1827,7 @@ impl Timeline {
    /// Evict just one layer.
    ///
    /// Returns `Ok(None)` in the case where the layer could not be found by its `layer_file_name`.
-    pub(crate) async fn evict_layer(
-        &self,
-        layer_file_name: &LayerFileName,
-    ) -> anyhow::Result<Option<bool>> {
+    pub(crate) async fn evict_layer(&self, layer_file_name: &str) -> anyhow::Result<Option<bool>> {
        let _gate = self
            .gate
            .enter()
@@ -2000,12 +1901,13 @@ const REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE: u64 = 10;

 // Private functions
 impl Timeline {
-    pub(crate) fn get_switch_aux_file_policy(&self) -> AuxFilePolicy {
+    #[allow(dead_code)]
+    pub(crate) fn get_switch_to_aux_file_v2(&self) -> bool {
        let tenant_conf = self.tenant_conf.load();
        tenant_conf
            .tenant_conf
-            .switch_aux_file_policy
-            .unwrap_or(self.conf.default_tenant_conf.switch_aux_file_policy)
+            .switch_to_aux_file_v2
+            .unwrap_or(self.conf.default_tenant_conf.switch_to_aux_file_v2)
    }

    pub(crate) fn get_lazy_slru_download(&self) -> bool {
@@ -2209,7 +2111,11 @@ impl Timeline {

                write_lock: tokio::sync::Mutex::new(None),

-                gc_info: std::sync::RwLock::new(GcInfo::default()),
+                gc_info: std::sync::RwLock::new(GcInfo {
+                    retain_lsns: Vec::new(),
+                    horizon_cutoff: Lsn(0),
+                    pitr_cutoff: Lsn(0),
+                }),

                latest_gc_cutoff_lsn: Rcu::new(metadata.latest_gc_cutoff_lsn()),
                initdb_lsn: metadata.initdb_lsn(),
@@ -2417,8 +2323,8 @@ impl Timeline {

                for discovered in discovered {
                    let (name, kind) = match discovered {
-                        Discovered::Layer(layer_file_name, local_path, file_size) => {
-                            discovered_layers.push((layer_file_name, local_path, file_size));
+                        Discovered::Layer(file_name, file_size) => {
+                            discovered_layers.push((file_name, file_size));
                            continue;
                        }
                        Discovered::Metadata => {
@@ -2463,7 +2369,7 @@ impl Timeline {
                let mut needs_cleanup = Vec::new();
                let mut total_physical_size = 0;

-                for (name, local_path, decision) in decided {
+                for (name, decision) in decided {
                    let decision = match decision {
                        Ok(UseRemote { local, remote }) => {
                            // Remote is authoritative, but we may still choose to retain
@@ -2473,23 +2379,26 @@ impl Timeline {
                                // the correct generation.
                                UseLocal(remote)
                            } else {
-                                let local_path = local_path.as_ref().expect("Locally found layer must have path");
-                                init::cleanup_local_file_for_remote(local_path, &local, &remote)?;
+                                path.push(name.file_name());
+                                init::cleanup_local_file_for_remote(&path, &local, &remote)?;
+                                path.pop();
                                UseRemote { local, remote }
                            }
                        }
                        Ok(decision) => decision,
                        Err(DismissedLayer::Future { local }) => {
                            if local.is_some() {
-                                let local_path = local_path.expect("Locally found layer must have path");
-                                init::cleanup_future_layer(&local_path, &name, disk_consistent_lsn)?;
+                                path.push(name.file_name());
+                                init::cleanup_future_layer(&path, &name, disk_consistent_lsn)?;
+                                path.pop();
                            }
                            needs_cleanup.push(name);
                            continue;
                        }
                        Err(DismissedLayer::LocalOnly(local)) => {
-                            let local_path = local_path.expect("Locally found layer must have path");
-                            init::cleanup_local_only_file(&local_path, &name, &local)?;
+                            path.push(name.file_name());
+                            init::cleanup_local_only_file(&path, &name, &local)?;
+                            path.pop();
                            // this file never existed remotely, we will have to do rework
                            continue;
                        }
@@ -2505,18 +2414,7 @@ impl Timeline {
                    let layer = match decision {
                        UseLocal(m) => {
                            total_physical_size += m.file_size();
-
-                            let local_path = local_path.unwrap_or_else(|| {
-                                local_layer_path(
-                                    conf,
-                                    &this.tenant_shard_id,
-                                    &this.timeline_id,
-                                    &name,
-                                    &m.generation,
-                                )
-                            });
-
-                            Layer::for_resident(conf, &this, local_path, name, m).drop_eviction_guard()
+                            Layer::for_resident(conf, &this, name, m).drop_eviction_guard()
                        }
                        Evicted(remote) | UseRemote { remote, .. } => {
                            Layer::for_evicted(conf, &this, name, remote)
@@ -2997,11 +2895,11 @@ impl Timeline {
        }
    }

-    async fn find_layer(&self, layer_name: &LayerFileName) -> Option<Layer> {
+    async fn find_layer(&self, layer_file_name: &str) -> Option<Layer> {
        let guard = self.layers.read().await;
        for historic_layer in guard.layer_map().iter_historic_layers() {
-            let historic_layer_name = historic_layer.filename();
-            if layer_name == &historic_layer_name {
+            let historic_layer_name = historic_layer.filename().file_name();
+            if layer_file_name == historic_layer_name {
                return Some(guard.get_from_desc(&historic_layer));
            }
        }
@@ -3031,7 +2929,7 @@ impl Timeline {

            HeatMapLayer::new(
                layer.layer_desc().filename(),
-                (&layer.metadata()).into(),
+                layer.metadata().into(),
                last_activity_ts,
            )
        });
@@ -3126,6 +3024,7 @@ impl Timeline {
                            // Didn't make any progress in last iteration. Error out to avoid
                            // getting stuck in the loop.
                            return Err(PageReconstructError::MissingKey(MissingKeyError {
+                                stuck_at_lsn: true,
                                key,
                                shard: self.shard_identity.get_shard_number(&key),
                                cont_lsn: Lsn(cont_lsn.0 - 1),
@@ -3140,6 +3039,7 @@ impl Timeline {
                }
                ValueReconstructResult::Missing => {
                    return Err(PageReconstructError::MissingKey(MissingKeyError {
+                        stuck_at_lsn: false,
                        key,
                        shard: self.shard_identity.get_shard_number(&key),
                        cont_lsn,
@@ -3303,12 +3203,37 @@ impl Timeline {
            // Do not descend into the ancestor timeline for aux files.
            // We don't return a blanket [`GetVectoredError::MissingKey`] to avoid
            // stalling compaction.
-            keyspace.remove_overlapping_with(&KeySpace {
-                ranges: vec![NON_INHERITED_RANGE, NON_INHERITED_SPARSE_RANGE],
-            });
+            // TODO(chi): this will need to be updated for aux files v2 storage
+            if keyspace.overlaps(&NON_INHERITED_RANGE) {
+                let removed = keyspace.remove_overlapping_with(&KeySpace {
+                    ranges: vec![NON_INHERITED_RANGE],
+                });
+
+                for range in removed.ranges {
+                    let mut key = range.start;
+                    while key < range.end {
+                        reconstruct_state.on_key_error(
+                            key,
+                            PageReconstructError::MissingKey(MissingKeyError {
+                                stuck_at_lsn: false,
+                                key,
+                                shard: self.shard_identity.get_shard_number(&key),
+                                cont_lsn,
+                                request_lsn,
+                                ancestor_lsn: None,
+                                traversal_path: Vec::default(),
+                                backtrace: if cfg!(test) {
+                                    Some(std::backtrace::Backtrace::force_capture())
+                                } else {
+                                    None
+                                },
+                            }),
+                        );
+                        key = key.next();
+                    }
+                }
+            }

-            // Keyspace is fully retrieved, no ancestor timeline, or metadata scan (where we do not look
-            // into ancestor timelines). TODO: is there any other metadata which we want to inherit?
            if keyspace.total_raw_size() == 0 || timeline.ancestor_timeline.is_none() {
                break;
            }
@@ -3323,17 +3248,7 @@ impl Timeline {
        }

        if keyspace.total_raw_size() != 0 {
-            return Err(GetVectoredError::MissingKey(MissingKeyError {
-                key: keyspace.start().unwrap(), /* better if we can store the full keyspace */
-                shard: self
-                    .shard_identity
-                    .get_shard_number(&keyspace.start().unwrap()),
-                cont_lsn,
-                request_lsn,
-                ancestor_lsn: Some(timeline.ancestor_lsn),
-                traversal_path: vec![],
-                backtrace: None,
-            }));
+            return Err(GetVectoredError::MissingKey(keyspace.start().unwrap()));
        }

        Ok(())
@@ -3533,7 +3448,7 @@ impl Timeline {
        Ok(ancestor)
    }

-    pub(crate) fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
+    fn get_ancestor_timeline(&self) -> anyhow::Result<Arc<Timeline>> {
        let ancestor = self.ancestor_timeline.as_ref().with_context(|| {
            format!(
                "Ancestor is missing. Timeline id: {} Ancestor id {:?}",
@@ -4271,7 +4186,7 @@ impl Timeline {
                            };

                            // Write all the keys we just read into our new image layer.
-                            image_layer_writer.put_image(img_key, img, ctx).await?;
+                            image_layer_writer.put_image(img_key, img).await?;
                            wrote_keys = true;
                        }
                    }
@@ -4282,7 +4197,7 @@ impl Timeline {
                // Normal path: we have written some data into the new image layer for this
                // partition, so flush it to disk.
                start = img_range.end;
-                let image_layer = image_layer_writer.finish(self, ctx).await?;
+                let image_layer = image_layer_writer.finish(self).await?;
                image_layers.push(image_layer);
            } else {
                // Special case: the image layer may be empty if this is a sharded tenant and the
@@ -4349,49 +4264,6 @@ impl Timeline {
            _ = self.cancel.cancelled() => {}
        )
    }
-
-    /// Detach this timeline from its ancestor by copying all of ancestors layers as this
-    /// Timelines layers up to the ancestor_lsn.
-    ///
-    /// Requires a timeline that:
-    /// - has an ancestor to detach from
-    /// - the ancestor does not have an ancestor -- follows from the original RFC limitations, not
-    /// a technical requirement
-    /// - has prev_lsn in remote storage (temporary restriction)
-    ///
-    /// After the operation has been started, it cannot be canceled. Upon restart it needs to be
-    /// polled again until completion.
-    ///
-    /// During the operation all timelines sharing the data with this timeline will be reparented
-    /// from our ancestor to be branches of this timeline.
-    pub(crate) async fn prepare_to_detach_from_ancestor(
-        self: &Arc<Timeline>,
-        tenant: &crate::tenant::Tenant,
-        options: detach_ancestor::Options,
-        ctx: &RequestContext,
-    ) -> Result<
-        (
-            completion::Completion,
-            detach_ancestor::PreparedTimelineDetach,
-        ),
-        detach_ancestor::Error,
-    > {
-        detach_ancestor::prepare(self, tenant, options, ctx).await
-    }
-
-    /// Completes the ancestor detach. This method is to be called while holding the
-    /// TenantManager's tenant slot, so during this method we cannot be deleted nor can any
-    /// timeline be deleted. After this method returns successfully, tenant must be reloaded.
-    ///
-    /// Pageserver receiving a SIGKILL during this operation is not supported (yet).
-    pub(crate) async fn complete_detaching_timeline_ancestor(
-        self: &Arc<Timeline>,
-        tenant: &crate::tenant::Tenant,
-        prepared: detach_ancestor::PreparedTimelineDetach,
-        ctx: &RequestContext,
-    ) -> Result<Vec<TimelineId>, anyhow::Error> {
-        detach_ancestor::complete(self, tenant, prepared, ctx).await
-    }
 }

 /// Top-level failure to compact.
@@ -4500,24 +4372,6 @@ impl Timeline {
        Ok(())
    }

-    async fn rewrite_layers(
-        self: &Arc<Self>,
-        replace_layers: Vec<(Layer, ResidentLayer)>,
-        drop_layers: Vec<Layer>,
-    ) -> anyhow::Result<()> {
-        let mut guard = self.layers.write().await;
-
-        guard.rewrite_layers(&replace_layers, &drop_layers, &self.metrics);
-
-        let upload_layers: Vec<_> = replace_layers.into_iter().map(|r| r.1).collect();
-
-        if let Some(remote_client) = self.remote_client.as_ref() {
-            remote_client.schedule_compaction_update(&drop_layers, &upload_layers)?;
-        }
-
-        Ok(())
-    }
-
    /// Schedules the uploads of the given image layers
    fn upload_new_image_layers(
        self: &Arc<Self>,
@@ -4536,7 +4390,7 @@ impl Timeline {
        Ok(())
    }

-    /// Find the Lsns above which layer files need to be retained on
+    /// Update information about which layer files need to be retained on
    /// garbage collection. This is separate from actually performing the GC,
    /// and is updated more frequently, so that compaction can remove obsolete
    /// page versions more aggressively.
@@ -4544,6 +4398,17 @@ impl Timeline {
    /// TODO: that's wishful thinking, compaction doesn't actually do that
    /// currently.
    ///
+    /// The caller specifies how much history is needed with the 3 arguments:
+    ///
+    /// retain_lsns: keep a version of each page at these LSNs
+    /// cutoff_horizon: also keep everything newer than this LSN
+    /// pitr: the time duration required to keep data for PITR
+    ///
+    /// The 'retain_lsns' list is currently used to prevent removing files that
+    /// are needed by child timelines. In the future, the user might be able to
+    /// name additional points in time to retain. The caller is responsible for
+    /// collecting that information.
+    ///
    /// The 'cutoff_horizon' point is used to retain recent versions that might still be
    /// needed by read-only nodes. (As of this writing, the caller just passes
    /// the latest LSN subtracted by a constant, and doesn't do anything smart
@@ -4551,22 +4416,29 @@ impl Timeline {
    ///
    /// The 'pitr' duration is used to calculate a 'pitr_cutoff', which can be used to determine
    /// whether a record is needed for PITR.
+    ///
+    /// NOTE: This function holds a short-lived lock to protect the 'gc_info'
+    /// field, so that the three values passed as argument are stored
+    /// atomically. But the caller is responsible for ensuring that no new
+    /// branches are created that would need to be included in 'retain_lsns',
+    /// for example. The caller should hold `Tenant::gc_cs` lock to ensure
+    /// that.
+    ///
    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
-    pub(super) async fn find_gc_cutoffs(
+    pub(super) async fn update_gc_info(
        &self,
+        retain_lsns: Vec<Lsn>,
        cutoff_horizon: Lsn,
        pitr: Duration,
        cancel: &CancellationToken,
        ctx: &RequestContext,
-    ) -> anyhow::Result<GcCutoffs> {
+    ) -> anyhow::Result<()> {
        let _timer = self
            .metrics
-            .find_gc_cutoffs_histo
+            .update_gc_info_histo
            .start_timer()
            .record_on_drop();

-        pausable_failpoint!("Timeline::find_gc_cutoffs-pausable");
-
        // First, calculate pitr_cutoff_timestamp and then convert it to LSN.
        //
        // Some unit tests depend on garbage-collection working even when
@@ -4616,10 +4488,14 @@ impl Timeline {
            self.get_last_record_lsn()
        };

-        Ok(GcCutoffs {
-            horizon: cutoff_horizon,
-            pitr: pitr_cutoff,
-        })
+        // Grab the lock and update the values
+        *self.gc_info.write().unwrap() = GcInfo {
+            retain_lsns,
+            horizon_cutoff: cutoff_horizon,
+            pitr_cutoff,
+        };
+
+        Ok(())
    }

    /// Garbage collect layer files on a timeline that are no longer needed.
@@ -4648,8 +4524,8 @@ impl Timeline {
        let (horizon_cutoff, pitr_cutoff, retain_lsns) = {
            let gc_info = self.gc_info.read().unwrap();

-            let horizon_cutoff = min(gc_info.cutoffs.horizon, self.get_disk_consistent_lsn());
-            let pitr_cutoff = gc_info.cutoffs.pitr;
+            let horizon_cutoff = min(gc_info.horizon_cutoff, self.get_disk_consistent_lsn());
+            let pitr_cutoff = gc_info.pitr_cutoff;
            let retain_lsns = gc_info.retain_lsns.clone();
            (horizon_cutoff, pitr_cutoff, retain_lsns)
        };
@@ -4676,8 +4552,6 @@ impl Timeline {
        retain_lsns: Vec<Lsn>,
        new_gc_cutoff: Lsn,
    ) -> anyhow::Result<GcResult> {
-        // FIXME: if there is an ongoing detach_from_ancestor, we should just skip gc
-
        let now = SystemTime::now();
        let mut result: GcResult = GcResult::default();

--- a/pageserver/src/tenant/timeline/compaction.rs
+++ b/pageserver/src/tenant/timeline/compaction.rs
@@ -15,8 +15,7 @@ use anyhow::{anyhow, Context};
 use enumset::EnumSet;
 use fail::fail_point;
 use itertools::Itertools;
-use pageserver_api::keyspace::ShardedRange;
-use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId};
+use pageserver_api::shard::{ShardIdentity, TenantShardId};
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info, info_span, trace, warn, Instrument};
 use utils::id::TimelineId;
@@ -94,7 +93,7 @@ impl Timeline {
        // Define partitioning schema if needed

        // FIXME: the match should only cover repartitioning, not the next steps
-        let partition_count = match self
+        match self
            .repartition(
                self.get_last_record_lsn(),
                self.get_compaction_target_size(),
@@ -147,7 +146,6 @@ impl Timeline {
                assert!(sparse_layers.is_empty());

                self.upload_new_image_layers(dense_layers)?;
-                dense_partitioning.parts.len()
            }
            Err(err) => {
                // no partitioning? This is normal, if the timeline was just created
@@ -159,150 +157,9 @@ impl Timeline {
                if !self.cancel.is_cancelled() {
                    tracing::error!("could not compact, repartitioning keyspace failed: {err:?}");
                }
-                1
            }
        };

-        if self.shard_identity.count >= ShardCount::new(2) {
-            // Limit the number of layer rewrites to the number of partitions: this means its
-            // runtime should be comparable to a full round of image layer creations, rather than
-            // being potentially much longer.
-            let rewrite_max = partition_count;
-
-            self.compact_shard_ancestors(rewrite_max, ctx).await?;
-        }
-
-        Ok(())
-    }
-
-    /// Check for layers that are elegible to be rewritten:
-    /// - Shard splitting: After a shard split, ancestor layers beyond pitr_interval, so that
-    ///   we don't indefinitely retain keys in this shard that aren't needed.
-    /// - For future use: layers beyond pitr_interval that are in formats we would
-    ///   rather not maintain compatibility with indefinitely.
-    ///
-    /// Note: this phase may read and write many gigabytes of data: use rewrite_max to bound
-    /// how much work it will try to do in each compaction pass.
-    async fn compact_shard_ancestors(
-        self: &Arc<Self>,
-        rewrite_max: usize,
-        _ctx: &RequestContext,
-    ) -> anyhow::Result<()> {
-        let mut drop_layers = Vec::new();
-        let layers_to_rewrite: Vec<Layer> = Vec::new();
-
-        // We will use the PITR cutoff as a condition for rewriting layers.
-        let pitr_cutoff = self.gc_info.read().unwrap().cutoffs.pitr;
-
-        let layers = self.layers.read().await;
-        for layer_desc in layers.layer_map().iter_historic_layers() {
-            let layer = layers.get_from_desc(&layer_desc);
-            if layer.metadata().shard.shard_count == self.shard_identity.count {
-                // This layer does not belong to a historic ancestor, no need to re-image it.
-                continue;
-            }
-
-            // This layer was created on an ancestor shard: check if it contains any data for this shard.
-            let sharded_range = ShardedRange::new(layer_desc.get_key_range(), &self.shard_identity);
-            let layer_local_page_count = sharded_range.page_count();
-            let layer_raw_page_count = ShardedRange::raw_size(&layer_desc.get_key_range());
-            if layer_local_page_count == 0 {
-                // This ancestral layer only covers keys that belong to other shards.
-                // We include the full metadata in the log: if we had some critical bug that caused
-                // us to incorrectly drop layers, this would simplify manually debugging + reinstating those layers.
-                info!(%layer, old_metadata=?layer.metadata(),
-                    "dropping layer after shard split, contains no keys for this shard.",
-                );
-
-                if cfg!(debug_assertions) {
-                    // Expensive, exhaustive check of keys in this layer: this guards against ShardedRange's calculations being
-                    // wrong.  If ShardedRange claims the local page count is zero, then no keys in this layer
-                    // should be !is_key_disposable()
-                    let range = layer_desc.get_key_range();
-                    let mut key = range.start;
-                    while key < range.end {
-                        debug_assert!(self.shard_identity.is_key_disposable(&key));
-                        key = key.next();
-                    }
-                }
-
-                drop_layers.push(layer);
-                continue;
-            } else if layer_local_page_count != u32::MAX
-                && layer_local_page_count == layer_raw_page_count
-            {
-                debug!(%layer,
-                    "layer is entirely shard local ({} keys), no need to filter it",
-                    layer_local_page_count
-                );
-                continue;
-            }
-
-            // Don't bother re-writing a layer unless it will at least halve its size
-            if layer_local_page_count != u32::MAX
-                && layer_local_page_count > layer_raw_page_count / 2
-            {
-                debug!(%layer,
-                    "layer is already mostly local ({}/{}), not rewriting",
-                    layer_local_page_count,
-                    layer_raw_page_count
-                );
-            }
-
-            // Don't bother re-writing a layer if it is within the PITR window: it will age-out eventually
-            // without incurring the I/O cost of a rewrite.
-            if layer_desc.get_lsn_range().end >= pitr_cutoff {
-                debug!(%layer, "Skipping rewrite of layer still in PITR window ({} >= {})",
-                    layer_desc.get_lsn_range().end, pitr_cutoff);
-                continue;
-            }
-
-            if layer_desc.is_delta() {
-                // We do not yet implement rewrite of delta layers
-                debug!(%layer, "Skipping rewrite of delta layer");
-                continue;
-            }
-
-            // Only rewrite layers if they would have different remote paths: either they belong to this
-            // shard but an old generation, or they belonged to another shard.  This also implicitly
-            // guarantees that the layer is persistent in remote storage (as only remote persistent
-            // layers are carried across shard splits, any local-only layer would be in the current generation)
-            if layer.metadata().generation == self.generation
-                && layer.metadata().shard.shard_count == self.shard_identity.count
-            {
-                debug!(%layer, "Skipping rewrite, is not from old generation");
-                continue;
-            }
-
-            if layers_to_rewrite.len() >= rewrite_max {
-                tracing::info!(%layer, "Will rewrite layer on a future compaction, already rewrote {}",
-                    layers_to_rewrite.len()
-                );
-                continue;
-            }
-
-            // Fall through: all our conditions for doing a rewrite passed.
-            // TODO: implement rewriting
-            tracing::debug!(%layer, "Would rewrite layer");
-        }
-
-        // Drop the layers read lock: we will acquire it for write in [`Self::rewrite_layers`]
-        drop(layers);
-
-        // TODO: collect layers to rewrite
-        let replace_layers = Vec::new();
-
-        // Update the LayerMap so that readers will use the new layers, and enqueue it for writing to remote storage
-        self.rewrite_layers(replace_layers, drop_layers).await?;
-
-        if let Some(remote_client) = self.remote_client.as_ref() {
-            // We wait for all uploads to complete before finishing this compaction stage.  This is not
-            // necessary for correctness, but it simplifies testing, and avoids proceeding with another
-            // Timeline's compaction while this timeline's uploads may be generating lots of disk I/O
-            // load.
-            remote_client.wait_completion().await?;
-        }
-
        Ok(())
    }

@@ -663,7 +520,7 @@ impl Timeline {
                            writer
                                .take()
                                .unwrap()
-                                .finish(prev_key.unwrap().next(), self, ctx)
+                                .finish(prev_key.unwrap().next(), self)
                                .await?,
                        );
                        writer = None;
@@ -705,11 +562,7 @@ impl Timeline {
                    );
                }

-                writer
-                    .as_mut()
-                    .unwrap()
-                    .put_value(key, lsn, value, ctx)
-                    .await?;
+                writer.as_mut().unwrap().put_value(key, lsn, value).await?;
            } else {
                debug!(
                    "Dropping key {} during compaction (it belongs on shard {:?})",
@@ -725,7 +578,7 @@ impl Timeline {
            prev_key = Some(key);
        }
        if let Some(writer) = writer {
-            new_layers.push(writer.finish(prev_key.unwrap().next(), self, ctx).await?);
+            new_layers.push(writer.finish(prev_key.unwrap().next(), self).await?);
        }

        // Sync layers
@@ -1119,7 +972,7 @@ impl CompactionJobExecutor for TimelineAdaptor {

            let value = val.load(ctx).await?;

-            writer.put_value(key, lsn, value, ctx).await?;
+            writer.put_value(key, lsn, value).await?;

            prev = Some((key, lsn));
        }
@@ -1135,7 +988,7 @@ impl CompactionJobExecutor for TimelineAdaptor {
        });

        let new_delta_layer = writer
-            .finish(prev.unwrap().0.next(), &self.timeline, ctx)
+            .finish(prev.unwrap().0.next(), &self.timeline)
            .await?;

        self.new_deltas.push(new_delta_layer);
@@ -1205,11 +1058,11 @@ impl TimelineAdaptor {
                        }
                    }
                };
-                image_layer_writer.put_image(key, img, ctx).await?;
+                image_layer_writer.put_image(key, img).await?;
                key = key.next();
            }
        }
-        let image_layer = image_layer_writer.finish(&self.timeline, ctx).await?;
+        let image_layer = image_layer_writer.finish(&self.timeline).await?;

        self.new_images.push(image_layer);

--- a/pageserver/src/tenant/timeline/delete.rs
+++ b/pageserver/src/tenant/timeline/delete.rs
@@ -422,10 +422,6 @@ impl DeleteTimelineFlow {
    pub(crate) fn is_finished(&self) -> bool {
        matches!(self, Self::Finished)
    }
-
-    pub(crate) fn is_not_started(&self) -> bool {
-        matches!(self, Self::NotStarted)
-    }
 }

 struct DeletionGuard(OwnedMutexGuard<DeleteTimelineFlow>);
--- a/pageserver/src/tenant/timeline/detach_ancestor.rs
+++ b/pageserver/src/tenant/timeline/detach_ancestor.rs
@@ -1,550 +0,0 @@
-use std::sync::Arc;
-
-use super::{layer_manager::LayerManager, Timeline};
-use crate::{
-    context::{DownloadBehavior, RequestContext},
-    task_mgr::TaskKind,
-    tenant::{
-        storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer},
-        Tenant,
-    },
-    virtual_file::{MaybeFatalIo, VirtualFile},
-};
-use tokio_util::sync::CancellationToken;
-use tracing::Instrument;
-use utils::{completion, generation::Generation, id::TimelineId, lsn::Lsn};
-
-#[derive(Debug, thiserror::Error)]
-pub(crate) enum Error {
-    #[error("no ancestors")]
-    NoAncestor,
-    #[error("too many ancestors")]
-    TooManyAncestors,
-    #[error("shutting down, please retry later")]
-    ShuttingDown,
-    #[error("detached timeline must receive writes before the operation")]
-    DetachedTimelineNeedsWrites,
-    #[error("flushing failed")]
-    FlushAncestor(#[source] anyhow::Error),
-    #[error("layer download failed")]
-    RewrittenDeltaDownloadFailed(#[source] anyhow::Error),
-    #[error("copying LSN prefix locally failed")]
-    CopyDeltaPrefix(#[source] anyhow::Error),
-    #[error("upload rewritten layer")]
-    UploadRewritten(#[source] anyhow::Error),
-
-    #[error("ancestor is already being detached by: {}", .0)]
-    OtherTimelineDetachOngoing(TimelineId),
-
-    #[error("remote copying layer failed")]
-    CopyFailed(#[source] anyhow::Error),
-
-    #[error("unexpected error")]
-    Unexpected(#[source] anyhow::Error),
-}
-
-pub(crate) struct PreparedTimelineDetach {
-    layers: Vec<Layer>,
-}
-
-/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments.
-#[derive(Debug)]
-pub(crate) struct Options {
-    pub(crate) rewrite_concurrency: std::num::NonZeroUsize,
-    pub(crate) copy_concurrency: std::num::NonZeroUsize,
-}
-
-impl Default for Options {
-    fn default() -> Self {
-        Self {
-            rewrite_concurrency: std::num::NonZeroUsize::new(2).unwrap(),
-            copy_concurrency: std::num::NonZeroUsize::new(10).unwrap(),
-        }
-    }
-}
-
-/// See [`Timeline::prepare_to_detach_from_ancestor`]
-pub(super) async fn prepare(
-    detached: &Arc<Timeline>,
-    tenant: &Tenant,
-    options: Options,
-    ctx: &RequestContext,
-) -> Result<(completion::Completion, PreparedTimelineDetach), Error> {
-    use Error::*;
-
-    if detached.remote_client.as_ref().is_none() {
-        unimplemented!("no new code for running without remote storage");
-    }
-
-    let Some((ancestor, ancestor_lsn)) = detached
-        .ancestor_timeline
-        .as_ref()
-        .map(|tl| (tl.clone(), detached.ancestor_lsn))
-    else {
-        return Err(NoAncestor);
-    };
-
-    if !ancestor_lsn.is_valid() {
-        return Err(NoAncestor);
-    }
-
-    if ancestor.ancestor_timeline.is_some() {
-        // non-technical requirement; we could flatten N ancestors just as easily but we chose
-        // not to
-        return Err(TooManyAncestors);
-    }
-
-    if detached.get_prev_record_lsn() == Lsn::INVALID
-        || detached.disk_consistent_lsn.load() == ancestor_lsn
-    {
-        // this is to avoid a problem that after detaching we would be unable to start up the
-        // compute because of "PREV_LSN: invalid".
-        return Err(DetachedTimelineNeedsWrites);
-    }
-
-    // before we acquire the gate, we must mark the ancestor as having a detach operation
-    // ongoing which will block other concurrent detach operations so we don't get to ackward
-    // situations where there would be two branches trying to reparent earlier branches.
-    let (guard, barrier) = completion::channel();
-
-    {
-        let mut guard = tenant.ongoing_timeline_detach.lock().unwrap();
-        if let Some((tl, other)) = guard.as_ref() {
-            if !other.is_ready() {
-                return Err(OtherTimelineDetachOngoing(*tl));
-            }
-        }
-        *guard = Some((detached.timeline_id, barrier));
-    }
-
-    let _gate_entered = detached.gate.enter().map_err(|_| ShuttingDown)?;
-
-    if ancestor_lsn >= ancestor.get_disk_consistent_lsn() {
-        let span =
-            tracing::info_span!("freeze_and_flush", ancestor_timeline_id=%ancestor.timeline_id);
-        async {
-            let started_at = std::time::Instant::now();
-            let freeze_and_flush = ancestor.freeze_and_flush0();
-            let mut freeze_and_flush = std::pin::pin!(freeze_and_flush);
-
-            let res =
-                tokio::time::timeout(std::time::Duration::from_secs(1), &mut freeze_and_flush)
-                    .await;
-
-            let res = match res {
-                Ok(res) => res,
-                Err(_elapsed) => {
-                    tracing::info!("freezing and flushing ancestor is still ongoing");
-                    freeze_and_flush.await
-                }
-            };
-
-            res.map_err(FlushAncestor)?;
-
-            // we do not need to wait for uploads to complete but we do need `struct Layer`,
-            // copying delta prefix is unsupported currently for `InMemoryLayer`.
-            tracing::info!(
-                elapsed_ms = started_at.elapsed().as_millis(),
-                "froze and flushed the ancestor"
-            );
-            Ok(())
-        }
-        .instrument(span)
-        .await?;
-    }
-
-    let end_lsn = ancestor_lsn + 1;
-
-    let (filtered_layers, straddling_branchpoint, rest_of_historic) = {
-        // we do not need to start from our layers, because they can only be layers that come
-        // *after* ancestor_lsn
-        let layers = tokio::select! {
-            guard = ancestor.layers.read() => guard,
-            _ = detached.cancel.cancelled() => {
-                return Err(ShuttingDown);
-            }
-            _ = ancestor.cancel.cancelled() => {
-                return Err(ShuttingDown);
-            }
-        };
-
-        // between retries, these can change if compaction or gc ran in between. this will mean
-        // we have to redo work.
-        partition_work(ancestor_lsn, &layers)
-    };
-
-    // TODO: layers are already sorted by something: use that to determine how much of remote
-    // copies are already done.
-    tracing::info!(filtered=%filtered_layers, to_rewrite = straddling_branchpoint.len(), historic=%rest_of_historic.len(), "collected layers");
-
-    // TODO: copying and lsn prefix copying could be done at the same time with a single fsync after
-    let mut new_layers: Vec<Layer> =
-        Vec::with_capacity(straddling_branchpoint.len() + rest_of_historic.len());
-
-    {
-        tracing::debug!(to_rewrite = %straddling_branchpoint.len(), "copying prefix of delta layers");
-
-        let mut tasks = tokio::task::JoinSet::new();
-
-        let mut wrote_any = false;
-
-        let limiter = Arc::new(tokio::sync::Semaphore::new(
-            options.rewrite_concurrency.get(),
-        ));
-
-        for layer in straddling_branchpoint {
-            let limiter = limiter.clone();
-            let timeline = detached.clone();
-            let ctx = ctx.detached_child(TaskKind::DetachAncestor, DownloadBehavior::Download);
-
-            tasks.spawn(async move {
-                let _permit = limiter.acquire().await;
-                let copied =
-                    upload_rewritten_layer(end_lsn, &layer, &timeline, &timeline.cancel, &ctx)
-                        .await?;
-                Ok(copied)
-            });
-        }
-
-        while let Some(res) = tasks.join_next().await {
-            match res {
-                Ok(Ok(Some(copied))) => {
-                    wrote_any = true;
-                    tracing::info!(layer=%copied, "rewrote and uploaded");
-                    new_layers.push(copied);
-                }
-                Ok(Ok(None)) => {}
-                Ok(Err(e)) => return Err(e),
-                Err(je) => return Err(Unexpected(je.into())),
-            }
-        }
-
-        // FIXME: the fsync should be mandatory, after both rewrites and copies
-        if wrote_any {
-            let timeline_dir = VirtualFile::open(
-                &detached
-                    .conf
-                    .timeline_path(&detached.tenant_shard_id, &detached.timeline_id),
-            )
-            .await
-            .fatal_err("VirtualFile::open for timeline dir fsync");
-            timeline_dir
-                .sync_all()
-                .await
-                .fatal_err("VirtualFile::sync_all timeline dir");
-        }
-    }
-
-    let mut tasks = tokio::task::JoinSet::new();
-    let limiter = Arc::new(tokio::sync::Semaphore::new(options.copy_concurrency.get()));
-
-    for adopted in rest_of_historic {
-        let limiter = limiter.clone();
-        let timeline = detached.clone();
-
-        tasks.spawn(
-            async move {
-                let _permit = limiter.acquire().await;
-                let owned =
-                    remote_copy(&adopted, &timeline, timeline.generation, &timeline.cancel).await?;
-                tracing::info!(layer=%owned, "remote copied");
-                Ok(owned)
-            }
-            .in_current_span(),
-        );
-    }
-
-    while let Some(res) = tasks.join_next().await {
-        match res {
-            Ok(Ok(owned)) => {
-                new_layers.push(owned);
-            }
-            Ok(Err(failed)) => {
-                return Err(failed);
-            }
-            Err(je) => return Err(Unexpected(je.into())),
-        }
-    }
-
-    // TODO: fsync directory again if we hardlinked something
-
-    let prepared = PreparedTimelineDetach { layers: new_layers };
-
-    Ok((guard, prepared))
-}
-
-fn partition_work(
-    ancestor_lsn: Lsn,
-    source_layermap: &LayerManager,
-) -> (usize, Vec<Layer>, Vec<Layer>) {
-    let mut straddling_branchpoint = vec![];
-    let mut rest_of_historic = vec![];
-
-    let mut later_by_lsn = 0;
-
-    for desc in source_layermap.layer_map().iter_historic_layers() {
-        // off by one chances here:
-        // - start is inclusive
-        // - end is exclusive
-        if desc.lsn_range.start > ancestor_lsn {
-            later_by_lsn += 1;
-            continue;
-        }
-
-        let target = if desc.lsn_range.start <= ancestor_lsn
-            && desc.lsn_range.end > ancestor_lsn
-            && desc.is_delta
-        {
-            // TODO: image layer at Lsn optimization
-            &mut straddling_branchpoint
-        } else {
-            &mut rest_of_historic
-        };
-
-        target.push(source_layermap.get_from_desc(&desc));
-    }
-
-    (later_by_lsn, straddling_branchpoint, rest_of_historic)
-}
-
-async fn upload_rewritten_layer(
-    end_lsn: Lsn,
-    layer: &Layer,
-    target: &Arc<Timeline>,
-    cancel: &CancellationToken,
-    ctx: &RequestContext,
-) -> Result<Option<Layer>, Error> {
-    use Error::UploadRewritten;
-    let copied = copy_lsn_prefix(end_lsn, layer, target, ctx).await?;
-
-    let Some(copied) = copied else {
-        return Ok(None);
-    };
-
-    // FIXME: better shuttingdown error
-    target
-        .remote_client
-        .as_ref()
-        .unwrap()
-        .upload_layer_file(&copied, cancel)
-        .await
-        .map_err(UploadRewritten)?;
-
-    Ok(Some(copied.into()))
-}
-
-async fn copy_lsn_prefix(
-    end_lsn: Lsn,
-    layer: &Layer,
-    target_timeline: &Arc<Timeline>,
-    ctx: &RequestContext,
-) -> Result<Option<ResidentLayer>, Error> {
-    use Error::{CopyDeltaPrefix, RewrittenDeltaDownloadFailed};
-
-    tracing::debug!(%layer, %end_lsn, "copying lsn prefix");
-
-    let mut writer = DeltaLayerWriter::new(
-        target_timeline.conf,
-        target_timeline.timeline_id,
-        target_timeline.tenant_shard_id,
-        layer.layer_desc().key_range.start,
-        layer.layer_desc().lsn_range.start..end_lsn,
-    )
-    .await
-    .map_err(CopyDeltaPrefix)?;
-
-    let resident = layer
-        .download_and_keep_resident()
-        .await
-        // likely shutdown
-        .map_err(RewrittenDeltaDownloadFailed)?;
-
-    let records = resident
-        .copy_delta_prefix(&mut writer, end_lsn, ctx)
-        .await
-        .map_err(CopyDeltaPrefix)?;
-
-    drop(resident);
-
-    tracing::debug!(%layer, records, "copied records");
-
-    if records == 0 {
-        drop(writer);
-        // TODO: we might want to store an empty marker in remote storage for this
-        // layer so that we will not needlessly walk `layer` on repeated attempts.
-        Ok(None)
-    } else {
-        // reuse the key instead of adding more holes between layers by using the real
-        // highest key in the layer.
-        let reused_highest_key = layer.layer_desc().key_range.end;
-        let copied = writer
-            .finish(reused_highest_key, target_timeline, ctx)
-            .await
-            .map_err(CopyDeltaPrefix)?;
-
-        tracing::debug!(%layer, %copied, "new layer produced");
-
-        Ok(Some(copied))
-    }
-}
-
-/// Creates a new Layer instance for the adopted layer, and ensures it is found from the remote
-/// storage on successful return without the adopted layer being added to `index_part.json`.
-async fn remote_copy(
-    adopted: &Layer,
-    adoptee: &Arc<Timeline>,
-    generation: Generation,
-    cancel: &CancellationToken,
-) -> Result<Layer, Error> {
-    use Error::CopyFailed;
-
-    // depending if Layer::keep_resident we could hardlink
-
-    let mut metadata = adopted.metadata();
-    debug_assert!(metadata.generation <= generation);
-    metadata.generation = generation;
-
-    let owned = crate::tenant::storage_layer::Layer::for_evicted(
-        adoptee.conf,
-        adoptee,
-        adopted.layer_desc().filename(),
-        metadata,
-    );
-
-    // FIXME: better shuttingdown error
-    adoptee
-        .remote_client
-        .as_ref()
-        .unwrap()
-        .copy_timeline_layer(adopted, &owned, cancel)
-        .await
-        .map(move |()| owned)
-        .map_err(CopyFailed)
-}
-
-/// See [`Timeline::complete_detaching_timeline_ancestor`].
-pub(super) async fn complete(
-    detached: &Arc<Timeline>,
-    tenant: &Tenant,
-    prepared: PreparedTimelineDetach,
-    _ctx: &RequestContext,
-) -> Result<Vec<TimelineId>, anyhow::Error> {
-    let rtc = detached
-        .remote_client
-        .as_ref()
-        .expect("has to have a remote timeline client for timeline ancestor detach");
-
-    let PreparedTimelineDetach { layers } = prepared;
-
-    let ancestor = detached
-        .get_ancestor_timeline()
-        .expect("must still have a ancestor");
-    let ancestor_lsn = detached.get_ancestor_lsn();
-
-    // publish the prepared layers before we reparent any of the timelines, so that on restart
-    // reparented timelines find layers. also do the actual detaching.
-    //
-    // if we crash after this operation, we will at least come up having detached a timeline, but
-    // we cannot go back and reparent the timelines which would had been reparented in normal
-    // execution.
-    //
-    // this is not perfect, but it avoids us a retry happening after a compaction or gc on restart
-    // which could give us a completely wrong layer combination.
-    rtc.schedule_adding_existing_layers_to_index_detach_and_wait(
-        &layers,
-        (ancestor.timeline_id, ancestor_lsn),
-    )
-    .await?;
-
-    let mut tasks = tokio::task::JoinSet::new();
-
-    // because we are now keeping the slot in progress, it is unlikely that there will be any
-    // timeline deletions during this time. if we raced one, then we'll just ignore it.
-    tenant
-        .timelines
-        .lock()
-        .unwrap()
-        .values()
-        .filter_map(|tl| {
-            if Arc::ptr_eq(tl, detached) {
-                return None;
-            }
-
-            if !tl.is_active() {
-                return None;
-            }
-
-            let tl_ancestor = tl.ancestor_timeline.as_ref()?;
-            let is_same = Arc::ptr_eq(&ancestor, tl_ancestor);
-            let is_earlier = tl.get_ancestor_lsn() <= ancestor_lsn;
-
-            let is_deleting = tl
-                .delete_progress
-                .try_lock()
-                .map(|flow| !flow.is_not_started())
-                .unwrap_or(true);
-
-            if is_same && is_earlier && !is_deleting {
-                Some(tl.clone())
-            } else {
-                None
-            }
-        })
-        .for_each(|timeline| {
-            // important in this scope: we are holding the Tenant::timelines lock
-            let span = tracing::info_span!("reparent", reparented=%timeline.timeline_id);
-            let new_parent = detached.timeline_id;
-
-            tasks.spawn(
-                async move {
-                    let res = timeline
-                        .remote_client
-                        .as_ref()
-                        .expect("reparented has to have remote client because detached has one")
-                        .schedule_reparenting_and_wait(&new_parent)
-                        .await;
-
-                    match res {
-                        Ok(()) => Some(timeline),
-                        Err(e) => {
-                            // with the use of tenant slot, we no longer expect these.
-                            tracing::warn!("reparenting failed: {e:#}");
-                            None
-                        }
-                    }
-                }
-                .instrument(span),
-            );
-        });
-
-    let reparenting_candidates = tasks.len();
-    let mut reparented = Vec::with_capacity(tasks.len());
-
-    while let Some(res) = tasks.join_next().await {
-        match res {
-            Ok(Some(timeline)) => {
-                tracing::info!(reparented=%timeline.timeline_id, "reparenting done");
-                reparented.push(timeline.timeline_id);
-            }
-            Ok(None) => {
-                // lets just ignore this for now. one or all reparented timelines could had
-                // started deletion, and that is fine.
-            }
-            Err(je) if je.is_cancelled() => unreachable!("not used"),
-            Err(je) if je.is_panic() => {
-                // ignore; it's better to continue with a single reparenting failing (or even
-                // all of them) in order to get to the goal state.
-                //
-                // these timelines will never be reparentable, but they can be always detached as
-                // separate tree roots.
-            }
-            Err(je) => tracing::error!("unexpected join error: {je:?}"),
-        }
-    }
-
-    if reparenting_candidates != reparented.len() {
-        tracing::info!("failed to reparent some candidates");
-    }
-
-    Ok(reparented)
-}
--- a/pageserver/src/tenant/timeline/init.rs
+++ b/pageserver/src/tenant/timeline/init.rs
@@ -12,7 +12,7 @@ use crate::{
    METADATA_FILE_NAME,
 };
 use anyhow::Context;
-use camino::{Utf8Path, Utf8PathBuf};
+use camino::Utf8Path;
 use pageserver_api::shard::ShardIndex;
 use std::{collections::HashMap, str::FromStr};
 use utils::lsn::Lsn;
@@ -20,7 +20,7 @@ use utils::lsn::Lsn;
 /// Identified files in the timeline directory.
 pub(super) enum Discovered {
    /// The only one we care about
-    Layer(LayerFileName, Utf8PathBuf, u64),
+    Layer(LayerFileName, u64),
    /// Old ephmeral files from previous launches, should be removed
    Ephemeral(String),
    /// Old temporary timeline files, unsure what these really are, should be removed
@@ -46,7 +46,7 @@ pub(super) fn scan_timeline_dir(path: &Utf8Path) -> anyhow::Result<Vec<Discovere
        let discovered = match LayerFileName::from_str(&file_name) {
            Ok(file_name) => {
                let file_size = direntry.metadata()?.len();
-                Discovered::Layer(file_name, direntry.path().to_owned(), file_size)
+                Discovered::Layer(file_name, file_size)
            }
            Err(_) => {
                if file_name == METADATA_FILE_NAME {
@@ -104,38 +104,26 @@ pub(super) enum DismissedLayer {

 /// Merges local discoveries and remote [`IndexPart`] to a collection of decisions.
 pub(super) fn reconcile(
-    discovered: Vec<(LayerFileName, Utf8PathBuf, u64)>,
+    discovered: Vec<(LayerFileName, u64)>,
    index_part: Option<&IndexPart>,
    disk_consistent_lsn: Lsn,
    generation: Generation,
    shard: ShardIndex,
-) -> Vec<(
-    LayerFileName,
-    Option<Utf8PathBuf>,
-    Result<Decision, DismissedLayer>,
-)> {
+) -> Vec<(LayerFileName, Result<Decision, DismissedLayer>)> {
    use Decision::*;

-    // name => (local_path, local_metadata, remote_metadata)
-    type Collected = HashMap<
-        LayerFileName,
-        (
-            Option<Utf8PathBuf>,
-            Option<LayerFileMetadata>,
-            Option<LayerFileMetadata>,
-        ),
-    >;
+    // name => (local, remote)
+    type Collected = HashMap<LayerFileName, (Option<LayerFileMetadata>, Option<LayerFileMetadata>)>;

    let mut discovered = discovered
        .into_iter()
-        .map(|(layer_name, local_path, file_size)| {
+        .map(|(name, file_size)| {
            (
-                layer_name,
+                name,
                // The generation and shard here will be corrected to match IndexPart in the merge below, unless
                // it is not in IndexPart, in which case using our current generation makes sense
                // because it will be uploaded in this generation.
                (
-                    Some(local_path),
                    Some(LayerFileMetadata::new(file_size, generation, shard)),
                    None,
                ),
@@ -152,15 +140,15 @@ pub(super) fn reconcile(
        .map(|(name, metadata)| (name, LayerFileMetadata::from(metadata)))
        .for_each(|(name, metadata)| {
            if let Some(existing) = discovered.get_mut(name) {
-                existing.2 = Some(metadata);
+                existing.1 = Some(metadata);
            } else {
-                discovered.insert(name.to_owned(), (None, None, Some(metadata)));
+                discovered.insert(name.to_owned(), (None, Some(metadata)));
            }
        });

    discovered
        .into_iter()
-        .map(|(name, (local_path, local, remote))| {
+        .map(|(name, (local, remote))| {
            let decision = if name.is_in_future(disk_consistent_lsn) {
                Err(DismissedLayer::Future { local })
            } else {
@@ -177,7 +165,7 @@ pub(super) fn reconcile(
                }
            };

-            (name, local_path, decision)
+            (name, decision)
        })
        .collect::<Vec<_>>()
 }
--- a/pageserver/src/tenant/timeline/layer_manager.rs
+++ b/pageserver/src/tenant/timeline/layer_manager.rs
@@ -205,24 +205,6 @@ impl LayerManager {
        updates.flush();
    }

-    /// Called when compaction is completed.
-    pub(crate) fn rewrite_layers(
-        &mut self,
-        rewrite_layers: &[(Layer, ResidentLayer)],
-        drop_layers: &[Layer],
-        _metrics: &TimelineMetrics,
-    ) {
-        let mut updates = self.layer_map.batch_update();
-
-        // TODO: implement rewrites (currently this code path only used for drops)
-        assert!(rewrite_layers.is_empty());
-
-        for l in drop_layers {
-            Self::delete_historic_layer(l, &mut updates, &mut self.layer_fmgr);
-        }
-        updates.flush();
-    }
-
    /// Called when garbage collect has selected the layers to be removed.
    pub(crate) fn finish_gc_timeline(&mut self, gc_layers: &[Layer]) {
        let mut updates = self.layer_map.batch_update();
--- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
+++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs
@@ -1535,7 +1535,7 @@ mod tests {

        let harness = TenantHarness::create("switch_to_same_availability_zone")?;
        let mut state = dummy_state(&harness).await;
-        state.conf.availability_zone.clone_from(&test_az);
+        state.conf.availability_zone = test_az.clone();
        let current_lsn = Lsn(100_000).align();
        let now = Utc::now().naive_utc();

@@ -1568,7 +1568,7 @@ mod tests {
        // We have another safekeeper with the same commit_lsn, and it have the same availability zone as
        // the current pageserver.
        let mut same_az_sk = dummy_broker_sk_timeline(current_lsn.0, "same_az", now);
-        same_az_sk.timeline.availability_zone.clone_from(&test_az);
+        same_az_sk.timeline.availability_zone = test_az.clone();

        state.wal_stream_candidates = HashMap::from([
            (
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -10,7 +10,6 @@
 //! This is similar to PostgreSQL's virtual file descriptor facility in
 //! src/backend/storage/file/fd.c
 //!
-use crate::context::RequestContext;
 use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC};

 use crate::page_cache::PageWriteGuard;
@@ -616,7 +615,6 @@ impl VirtualFile {
        &self,
        buf: B,
        mut offset: u64,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<(), Error>) {
        let buf_len = buf.bytes_init();
        if buf_len == 0 {
@@ -625,7 +623,7 @@ impl VirtualFile {
        let mut buf = buf.slice(0..buf_len);
        while !buf.is_empty() {
            let res;
-            (buf, res) = self.write_at(buf, offset, ctx).await;
+            (buf, res) = self.write_at(buf, offset).await;
            match res {
                Ok(0) => {
                    return (
@@ -654,7 +652,6 @@ impl VirtualFile {
    pub async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> (B::Buf, Result<usize, Error>) {
        let nbytes = buf.bytes_init();
        if nbytes == 0 {
@@ -663,7 +660,7 @@ impl VirtualFile {
        let mut buf = buf.slice(0..nbytes);
        while !buf.is_empty() {
            let res;
-            (buf, res) = self.write(buf, ctx).await;
+            (buf, res) = self.write(buf).await;
            match res {
                Ok(0) => {
                    return (
@@ -687,10 +684,9 @@ impl VirtualFile {
    async fn write<B: IoBuf + Send>(
        &mut self,
        buf: Slice<B>,
-        ctx: &RequestContext,
    ) -> (Slice<B>, Result<usize, std::io::Error>) {
        let pos = self.pos;
-        let (buf, res) = self.write_at(buf, pos, ctx).await;
+        let (buf, res) = self.write_at(buf, pos).await;
        let n = match res {
            Ok(n) => n,
            Err(e) => return (buf, Err(e)),
@@ -728,7 +724,6 @@ impl VirtualFile {
        &self,
        buf: Slice<B>,
        offset: u64,
-        _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */
    ) -> (Slice<B>, Result<usize, Error>) {
        let file_guard = match self.lock_file().await {
            Ok(file_guard) => file_guard,
@@ -1093,9 +1088,8 @@ impl OwnedAsyncWriter for VirtualFile {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
-        let (buf, res) = VirtualFile::write_all(self, buf, ctx).await;
+        let (buf, res) = VirtualFile::write_all(self, buf).await;
        res.map(move |v| (v, buf))
    }
 }
@@ -1152,9 +1146,6 @@ fn get_open_files() -> &'static OpenFiles {

 #[cfg(test)]
 mod tests {
-    use crate::context::DownloadBehavior;
-    use crate::task_mgr::TaskKind;
-
    use super::*;
    use rand::seq::SliceRandom;
    use rand::thread_rng;
@@ -1186,11 +1177,10 @@ mod tests {
            &self,
            buf: B,
            offset: u64,
-            ctx: &RequestContext,
        ) -> Result<(), Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => {
-                    let (_buf, res) = file.write_all_at(buf, offset, ctx).await;
+                    let (_buf, res) = file.write_all_at(buf, offset).await;
                    res
                }
                MaybeVirtualFile::File(file) => {
@@ -1211,11 +1201,10 @@ mod tests {
        async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
            &mut self,
            buf: B,
-            ctx: &RequestContext,
        ) -> Result<(), Error> {
            match self {
                MaybeVirtualFile::VirtualFile(file) => {
-                    let (_buf, res) = file.write_all(buf, ctx).await;
+                    let (_buf, res) = file.write_all(buf).await;
                    res.map(|_| ())
                }
                MaybeVirtualFile::File(file) => {
@@ -1286,7 +1275,6 @@ mod tests {
        OF: Fn(Utf8PathBuf, OpenOptions) -> FT,
        FT: Future<Output = Result<MaybeVirtualFile, std::io::Error>>,
    {
-        let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
        let testdir = crate::config::PageServerConf::test_repo_dir(testname);
        std::fs::create_dir_all(&testdir)?;

@@ -1300,7 +1288,7 @@ mod tests {
                .to_owned(),
        )
        .await?;
-        file_a.write_all(b"foobar".to_vec(), &ctx).await?;
+        file_a.write_all(b"foobar".to_vec()).await?;

        // cannot read from a file opened in write-only mode
        let _ = file_a.read_string().await.unwrap_err();
@@ -1309,7 +1297,7 @@ mod tests {
        let mut file_a = openfunc(path_a, OpenOptions::new().read(true).to_owned()).await?;

        // cannot write to a file opened in read-only mode
-        let _ = file_a.write_all(b"bar".to_vec(), &ctx).await.unwrap_err();
+        let _ = file_a.write_all(b"bar".to_vec()).await.unwrap_err();

        // Try simple read
        assert_eq!("foobar", file_a.read_string().await?);
@@ -1351,8 +1339,8 @@ mod tests {
                .to_owned(),
        )
        .await?;
-        file_b.write_all_at(b"BAR".to_vec(), 3, &ctx).await?;
-        file_b.write_all_at(b"FOO".to_vec(), 0, &ctx).await?;
+        file_b.write_all_at(b"BAR".to_vec(), 3).await?;
+        file_b.write_all_at(b"FOO".to_vec(), 0).await?;

        assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");

--- a/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/util/size_tracking_writer.rs
@@ -1,4 +1,4 @@
-use crate::{context::RequestContext, virtual_file::owned_buffers_io::write::OwnedAsyncWriter};
+use crate::virtual_file::owned_buffers_io::write::OwnedAsyncWriter;
 use tokio_epoll_uring::{BoundedBuf, IoBuf};

 pub struct Writer<W> {
@@ -38,9 +38,8 @@ where
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
-        let (nwritten, buf) = self.dst.write_all(buf, ctx).await?;
+        let (nwritten, buf) = self.dst.write_all(buf).await?;
        self.bytes_amount += u64::try_from(nwritten).unwrap();
        Ok((nwritten, buf))
    }
--- a/pageserver/src/virtual_file/owned_buffers_io/write.rs
+++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs
@@ -1,15 +1,12 @@
 use bytes::BytesMut;
 use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice};

-use crate::context::RequestContext;
-
 /// A trait for doing owned-buffer write IO.
 /// Think [`tokio::io::AsyncWrite`] but with owned buffers.
 pub trait OwnedAsyncWriter {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        ctx: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)>;
 }

@@ -60,9 +57,8 @@ where
    }

    #[cfg_attr(target_os = "macos", allow(dead_code))]
-    pub async fn flush_and_into_inner(mut self, ctx: &RequestContext) -> std::io::Result<W> {
-        self.flush(ctx).await?;
-
+    pub async fn flush_and_into_inner(mut self) -> std::io::Result<W> {
+        self.flush().await?;
        let Self { buf, writer } = self;
        assert!(buf.is_some());
        Ok(writer)
@@ -76,15 +72,14 @@ where
    }

    #[cfg_attr(target_os = "macos", allow(dead_code))]
-    pub async fn write_buffered<S: IoBuf + Send>(
-        &mut self,
-        chunk: Slice<S>,
-        ctx: &RequestContext,
-    ) -> std::io::Result<(usize, S)> {
+    pub async fn write_buffered<S: IoBuf>(&mut self, chunk: Slice<S>) -> std::io::Result<(usize, S)>
+    where
+        S: IoBuf + Send,
+    {
        let chunk_len = chunk.len();
        // avoid memcpy for the middle of the chunk
        if chunk.len() >= self.buf().cap() {
-            self.flush(ctx).await?;
+            self.flush().await?;
            // do a big write, bypassing `buf`
            assert_eq!(
                self.buf
@@ -93,7 +88,7 @@ where
                    .pending(),
                0
            );
-            let (nwritten, chunk) = self.writer.write_all(chunk, ctx).await?;
+            let (nwritten, chunk) = self.writer.write_all(chunk).await?;
            assert_eq!(nwritten, chunk_len);
            return Ok((nwritten, chunk));
        }
@@ -109,7 +104,7 @@ where
            slice = &slice[n..];
            if buf.pending() >= buf.cap() {
                assert_eq!(buf.pending(), buf.cap());
-                self.flush(ctx).await?;
+                self.flush().await?;
            }
        }
        assert!(slice.is_empty(), "by now we should have drained the chunk");
@@ -121,11 +116,7 @@ where
    /// It is less performant because we always have to copy the borrowed data into the internal buffer
    /// before we can do the IO. The [`Self::write_buffered`] can avoid this, which is more performant
    /// for large writes.
-    pub async fn write_buffered_borrowed(
-        &mut self,
-        mut chunk: &[u8],
-        ctx: &RequestContext,
-    ) -> std::io::Result<usize> {
+    pub async fn write_buffered_borrowed(&mut self, mut chunk: &[u8]) -> std::io::Result<usize> {
        let chunk_len = chunk.len();
        while !chunk.is_empty() {
            let buf = self.buf.as_mut().expect("must not use after an error");
@@ -136,20 +127,20 @@ where
            chunk = &chunk[n..];
            if buf.pending() >= buf.cap() {
                assert_eq!(buf.pending(), buf.cap());
-                self.flush(ctx).await?;
+                self.flush().await?;
            }
        }
        Ok(chunk_len)
    }

-    async fn flush(&mut self, ctx: &RequestContext) -> std::io::Result<()> {
+    async fn flush(&mut self) -> std::io::Result<()> {
        let buf = self.buf.take().expect("must not use after an error");
        let buf_len = buf.pending();
        if buf_len == 0 {
            self.buf = Some(buf);
            return Ok(());
        }
-        let (nwritten, io_buf) = self.writer.write_all(buf.flush(), ctx).await?;
+        let (nwritten, io_buf) = self.writer.write_all(buf.flush()).await?;
        assert_eq!(nwritten, buf_len);
        self.buf = Some(Buffer::reuse_after_flush(io_buf));
        Ok(())
@@ -215,7 +206,6 @@ impl OwnedAsyncWriter for Vec<u8> {
    async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
        &mut self,
        buf: B,
-        _: &RequestContext,
    ) -> std::io::Result<(usize, B::Buf)> {
        let nbytes = buf.bytes_init();
        if nbytes == 0 {
@@ -232,8 +222,6 @@ mod tests {
    use bytes::BytesMut;

    use super::*;
-    use crate::context::{DownloadBehavior, RequestContext};
-    use crate::task_mgr::TaskKind;

    #[derive(Default)]
    struct RecorderWriter {
@@ -243,7 +231,6 @@ mod tests {
        async fn write_all<B: BoundedBuf<Buf = Buf>, Buf: IoBuf + Send>(
            &mut self,
            buf: B,
-            _: &RequestContext,
        ) -> std::io::Result<(usize, B::Buf)> {
            let nbytes = buf.bytes_init();
            if nbytes == 0 {
@@ -256,14 +243,10 @@ mod tests {
        }
    }

-    fn test_ctx() -> RequestContext {
-        RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error)
-    }
-
    macro_rules! write {
        ($writer:ident, $data:literal) => {{
            $writer
-                .write_buffered(::bytes::Bytes::from_static($data).slice_full(), &test_ctx())
+                .write_buffered(::bytes::Bytes::from_static($data).slice_full())
                .await?;
        }};
    }
@@ -277,7 +260,7 @@ mod tests {
        write!(writer, b"c");
        write!(writer, b"d");
        write!(writer, b"e");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"ab"), Vec::from(b"cd"), Vec::from(b"e")]
@@ -293,7 +276,7 @@ mod tests {
        write!(writer, b"de");
        write!(writer, b"");
        write!(writer, b"fghijk");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"abc"), Vec::from(b"de"), Vec::from(b"fghijk")]
@@ -309,7 +292,7 @@ mod tests {
        write!(writer, b"bc");
        write!(writer, b"d");
        write!(writer, b"e");
-        let recorder = writer.flush_and_into_inner(&test_ctx()).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            vec![Vec::from(b"a"), Vec::from(b"bc"), Vec::from(b"de")]
@@ -319,20 +302,18 @@ mod tests {

    #[tokio::test]
    async fn test_write_all_borrowed_always_goes_through_buffer() -> std::io::Result<()> {
-        let ctx = test_ctx();
-        let ctx = &ctx;
        let recorder = RecorderWriter::default();
        let mut writer = BufferedWriter::new(recorder, BytesMut::with_capacity(2));

-        writer.write_buffered_borrowed(b"abc", ctx).await?;
-        writer.write_buffered_borrowed(b"d", ctx).await?;
-        writer.write_buffered_borrowed(b"e", ctx).await?;
-        writer.write_buffered_borrowed(b"fg", ctx).await?;
-        writer.write_buffered_borrowed(b"hi", ctx).await?;
-        writer.write_buffered_borrowed(b"j", ctx).await?;
-        writer.write_buffered_borrowed(b"klmno", ctx).await?;
+        writer.write_buffered_borrowed(b"abc").await?;
+        writer.write_buffered_borrowed(b"d").await?;
+        writer.write_buffered_borrowed(b"e").await?;
+        writer.write_buffered_borrowed(b"fg").await?;
+        writer.write_buffered_borrowed(b"hi").await?;
+        writer.write_buffered_borrowed(b"j").await?;
+        writer.write_buffered_borrowed(b"klmno").await?;

-        let recorder = writer.flush_and_into_inner(ctx).await?;
+        let recorder = writer.flush_and_into_inner().await?;
        assert_eq!(
            recorder.writes,
            {
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -14,8 +14,7 @@ OBJS = \
 	relsize_cache.o \
 	walproposer.o \
 	walproposer_pg.o \
-	control_plane_connector.o \
-	walsender_hooks.o
+	control_plane_connector.o

 PG_CPPFLAGS = -I$(libpq_srcdir)
 SHLIB_LINK_INTERNAL = $(libpq)
--- a/pgxn/neon/libpagestore.c
+++ b/pgxn/neon/libpagestore.c
@@ -49,7 +49,7 @@ char	   *neon_auth_token;
 int			readahead_buffer_size = 128;
 int			flush_every_n_requests = 8;

-int         neon_protocol_version = 2;
+int         neon_protocol_version = 1;

 static int	n_reconnect_attempts = 0;
 static int	max_reconnect_attempts = 60;
@@ -860,7 +860,7 @@ pg_init_libpagestore(void)
 							"Version of compute<->page server protocol",
 							NULL,
 							&neon_protocol_version,
-							2, /* use protocol version 2 */
+							1, /* default to old protocol for now */
 							1, /* min */
 							2, /* max */
 							PGC_SU_BACKEND,
--- a/pgxn/neon/neon.c
+++ b/pgxn/neon/neon.c
@@ -34,7 +34,6 @@
 #include "walproposer.h"
 #include "pagestore_client.h"
 #include "control_plane_connector.h"
-#include "walsender_hooks.h"

 PG_MODULE_MAGIC;
 void		_PG_init(void);
@@ -266,6 +265,7 @@ LogicalSlotsMonitorMain(Datum main_arg)
 	}
 }

+
 void
 _PG_init(void)
 {
@@ -279,7 +279,6 @@ _PG_init(void)

 	pg_init_libpagestore();
 	pg_init_walproposer();
-        WalSender_Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines;

 	InitLogicalReplicationMonitor();

--- a/pgxn/neon/neon_walreader.c
+++ b/pgxn/neon/neon_walreader.c
@@ -36,7 +36,10 @@

 static NeonWALReadResult NeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);
 static NeonWALReadResult NeonWALReaderReadMsg(NeonWALReader *state);
+static void NeonWALReaderResetRemote(NeonWALReader *state);
 static bool NeonWALReadLocal(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);
+static bool neon_wal_segment_open(NeonWALReader *state, XLogSegNo nextSegNo, TimeLineID *tli_p);
+static void neon_wal_segment_close(NeonWALReader *state);
 static bool is_wal_segment_exists(XLogSegNo segno, int segsize,
 								  TimeLineID tli);

@@ -79,9 +82,8 @@ struct NeonWALReader
 	XLogRecPtr	req_lsn;
 	Size		req_len;
 	Size		req_progress;
-	char		donor_conninfo[MAXCONNINFO];
+	WalProposer *wp;			/* we learn donor through walproposer */
 	char		donor_name[64]; /* saved donor safekeeper name for logging */
-	XLogRecPtr	donor_lsn;
 	/* state of connection to safekeeper */
 	NeonWALReaderRemoteState rem_state;
 	WalProposerConn *wp_conn;
@@ -105,7 +107,7 @@ struct NeonWALReader

 /* palloc and initialize NeonWALReader */
 NeonWALReader *
-NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, char *log_prefix)
+NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, WalProposer *wp, char *log_prefix)
 {
 	NeonWALReader *reader;

@@ -121,6 +123,8 @@ NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, char *log_
 	reader->seg.ws_tli = 0;
 	reader->segcxt.ws_segsize = wal_segment_size;

+	reader->wp = wp;
+
 	reader->rem_state = RS_NONE;

 	if (log_prefix)
@@ -200,16 +204,21 @@ NeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size cou
 {
 	if (state->rem_state == RS_NONE)
 	{
-		if (!NeonWALReaderUpdateDonor(state))
+		XLogRecPtr	donor_lsn;
+
+		/* no connection yet; start one */
+		Safekeeper *donor = GetDonor(state->wp, &donor_lsn);
+
+		if (donor == NULL)
 		{
 			snprintf(state->err_msg, sizeof(state->err_msg),
 					 "failed to establish remote connection to fetch WAL: no donor available");
 			return NEON_WALREAD_ERROR;
-
 		}
-		/* no connection yet; start one */
-		nwr_log(LOG, "establishing connection to %s, lsn=%X/%X to fetch WAL", state->donor_name, LSN_FORMAT_ARGS(state->donor_lsn));
-		state->wp_conn = libpqwp_connect_start(state->donor_conninfo);
+		snprintf(state->donor_name, sizeof(state->donor_name), "%s:%s", donor->host, donor->port);
+		nwr_log(LOG, "establishing connection to %s, flush_lsn %X/%X to fetch WAL",
+				state->donor_name, LSN_FORMAT_ARGS(donor_lsn));
+		state->wp_conn = libpqwp_connect_start(donor->conninfo);
 		if (PQstatus(state->wp_conn->pg_conn) == CONNECTION_BAD)
 		{
 			snprintf(state->err_msg, sizeof(state->err_msg),
@@ -242,22 +251,10 @@ NeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size cou
 				{
 					/* connection successfully established */
 					char		start_repl_query[128];
-					term_t		term = pg_atomic_read_u64(&GetWalpropShmemState()->mineLastElectedTerm);

-					/*
-					 * Set elected walproposer's term to pull only data from
-					 * its history. Note: for logical walsender it means we
-					 * might stream WAL not yet committed by safekeepers. It
-					 * would be cleaner to fix this.
-					 *
-					 * mineLastElectedTerm shouldn't be 0 at this point
-					 * because we checked above that donor exists and it
-					 * appears only after successfull election.
-					 */
-					Assert(term > 0);
 					snprintf(start_repl_query, sizeof(start_repl_query),
 							 "START_REPLICATION PHYSICAL %X/%X (term='" UINT64_FORMAT "')",
-							 LSN_FORMAT_ARGS(startptr), term);
+							 LSN_FORMAT_ARGS(startptr), state->wp->propTerm);
 					nwr_log(LOG, "connection to %s to fetch WAL succeeded, running %s",
 							state->donor_name, start_repl_query);
 					if (!libpqwp_send_query(state->wp_conn, start_repl_query))
@@ -407,10 +404,6 @@ NeonWALReadRemote(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size cou
 			state->req_lsn = InvalidXLogRecPtr;
 			state->req_len = 0;
 			state->req_progress = 0;
-
-			/* Update the current segment info. */
-			state->seg.ws_tli = tli;
-
 			return NEON_WALREAD_SUCCESS;
 		}
 	}
@@ -533,7 +526,7 @@ err:
 }

 /* reset remote connection and request in progress */
-void
+static void
 NeonWALReaderResetRemote(NeonWALReader *state)
 {
 	state->req_lsn = InvalidXLogRecPtr;
@@ -698,25 +691,13 @@ NeonWALReadLocal(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size coun
 	return true;
 }

-XLogRecPtr
-NeonWALReaderGetRemLsn(NeonWALReader *state)
-{
-	return state->rem_lsn;
-}
-
-const WALOpenSegment *
-NeonWALReaderGetSegment(NeonWALReader *state)
-{
-	return &state->seg;
-}
-
 /*
 * Copy of vanilla wal_segment_open, but returns false in case of error instead
 * of ERROR, with errno set.
 *
 * XLogReaderRoutine->segment_open callback for local pg_wal files
 */
-bool
+static bool
 neon_wal_segment_open(NeonWALReader *state, XLogSegNo nextSegNo,
 					  TimeLineID *tli_p)
 {
@@ -743,7 +724,7 @@ is_wal_segment_exists(XLogSegNo segno, int segsize, TimeLineID tli)
 }

 /* copy of vanilla wal_segment_close with NeonWALReader */
-void
+static void
 neon_wal_segment_close(NeonWALReader *state)
 {
 	if (state->seg.ws_file >= 0)
@@ -759,19 +740,3 @@ NeonWALReaderErrMsg(NeonWALReader *state)
 {
 	return state->err_msg;
 }
-
-/*
- * Returns true if there is a donor, and false otherwise
- */
-bool
-NeonWALReaderUpdateDonor(NeonWALReader *state)
-{
-	WalproposerShmemState *wps = GetWalpropShmemState();
-
-	SpinLockAcquire(&wps->mutex);
-	memcpy(state->donor_name, wps->donor_name, sizeof(state->donor_name));
-	memcpy(state->donor_conninfo, wps->donor_conninfo, sizeof(state->donor_conninfo));
-	state->donor_lsn = wps->donor_lsn;
-	SpinLockRelease(&wps->mutex);
-	return state->donor_name[0] != '\0';
-}
--- a/pgxn/neon/neon_walreader.h
+++ b/pgxn/neon/neon_walreader.h
@@ -19,19 +19,12 @@ typedef enum
 	NEON_WALREAD_ERROR,
 } NeonWALReadResult;

-extern NeonWALReader *NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, char *log_prefix);
+extern NeonWALReader *NeonWALReaderAllocate(int wal_segment_size, XLogRecPtr available_lsn, WalProposer *wp, char *log_prefix);
 extern void NeonWALReaderFree(NeonWALReader *state);
-extern void NeonWALReaderResetRemote(NeonWALReader *state);
 extern NeonWALReadResult NeonWALRead(NeonWALReader *state, char *buf, XLogRecPtr startptr, Size count, TimeLineID tli);
 extern pgsocket NeonWALReaderSocket(NeonWALReader *state);
 extern uint32 NeonWALReaderEvents(NeonWALReader *state);
 extern bool NeonWALReaderIsRemConnEstablished(NeonWALReader *state);
 extern char *NeonWALReaderErrMsg(NeonWALReader *state);
-extern XLogRecPtr NeonWALReaderGetRemLsn(NeonWALReader *state);
-extern const WALOpenSegment *NeonWALReaderGetSegment(NeonWALReader *state);
-extern bool neon_wal_segment_open(NeonWALReader *state, XLogSegNo nextSegNo, TimeLineID *tli_p);
-extern void neon_wal_segment_close(NeonWALReader *state);
-extern bool NeonWALReaderUpdateDonor(NeonWALReader *state);
-

 #endif							/* __NEON_WALREADER_H__ */
--- a/pgxn/neon/walproposer.c
+++ b/pgxn/neon/walproposer.c
@@ -80,7 +80,7 @@ static int	CompareLsn(const void *a, const void *b);
 static char *FormatSafekeeperState(Safekeeper *sk);
 static void AssertEventsOkForState(uint32 events, Safekeeper *sk);
 static char *FormatEvents(WalProposer *wp, uint32 events);
-static void UpdateDonorShmem(WalProposer *wp);
+

 WalProposer *
 WalProposerCreate(WalProposerConfig *config, walproposer_api api)
@@ -922,8 +922,7 @@ static void
 DetermineEpochStartLsn(WalProposer *wp)
 {
 	TermHistory *dth;
-	int			n_ready = 0;
-	WalproposerShmemState *walprop_shared;
+	int          n_ready = 0;

 	wp->propEpochStartLsn = InvalidXLogRecPtr;
 	wp->donorEpoch = 0;
@@ -965,18 +964,16 @@ DetermineEpochStartLsn(WalProposer *wp)
 	if (n_ready < wp->quorum)
 	{
 		/*
-		 * This is a rare case that can be triggered if safekeeper has voted
-		 * and disconnected. In this case, its state will not be SS_IDLE and
-		 * its vote cannot be used, because we clean up `voteResponse` in
-		 * `ShutdownConnection`.
+		 * This is a rare case that can be triggered if safekeeper has voted and disconnected.
+		 * In this case, its state will not be SS_IDLE and its vote cannot be used, because
+		 * we clean up `voteResponse` in `ShutdownConnection`.
 		 */
 		wp_log(FATAL, "missing majority of votes, collected %d, expected %d, got %d", wp->n_votes, wp->quorum, n_ready);
 	}

 	/*
-	 * If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are
-	 * bootstrapping and nothing was committed yet. Start streaming then from
-	 * the basebackup LSN.
+	 * If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are bootstrapping
+	 * and nothing was committed yet. Start streaming then from the basebackup LSN.
 	 */
 	if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers)
 	{
@@ -987,12 +984,11 @@ DetermineEpochStartLsn(WalProposer *wp)
 		}
 		wp_log(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn));
 	}
-	pg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propEpochStartLsn);

 	/*
-	 * Safekeepers are setting truncateLsn after timelineStartLsn is known, so
-	 * it should never be zero at this point, if we know timelineStartLsn.
-	 *
+	 * Safekeepers are setting truncateLsn after timelineStartLsn is known, so it
+	 * should never be zero at this point, if we know timelineStartLsn.
+	 * 
 	 * timelineStartLsn can be zero only on the first syncSafekeepers run.
 	 */
 	Assert((wp->truncateLsn != InvalidXLogRecPtr) ||
@@ -1026,9 +1022,10 @@ DetermineEpochStartLsn(WalProposer *wp)
 	 * since which we are going to write according to the consensus. If not,
 	 * we must bail out, as clog and other non rel data is inconsistent.
 	 */
-	walprop_shared = wp->api.get_shmem_state(wp);
 	if (!wp->config->syncSafekeepers)
 	{
+		WalproposerShmemState *walprop_shared = wp->api.get_shmem_state(wp);
+
 		/*
 		 * Basebackup LSN always points to the beginning of the record (not
 		 * the page), as StartupXLOG most probably wants it this way.
@@ -1043,7 +1040,7 @@ DetermineEpochStartLsn(WalProposer *wp)
 			 * compute (who could generate WAL) is ok.
 			 */
 			if (!((dth->n_entries >= 1) && (dth->entries[dth->n_entries - 1].term ==
-											pg_atomic_read_u64(&walprop_shared->mineLastElectedTerm))))
+											walprop_shared->mineLastElectedTerm)))
 			{
 				/*
 				 * Panic to restart PG as we need to retake basebackup.
@@ -1057,8 +1054,8 @@ DetermineEpochStartLsn(WalProposer *wp)
 					   LSN_FORMAT_ARGS(wp->api.get_redo_start_lsn(wp)));
 			}
 		}
+		walprop_shared->mineLastElectedTerm = wp->propTerm;
 	}
-	pg_atomic_write_u64(&walprop_shared->mineLastElectedTerm, wp->propTerm);
 }

 /*
@@ -1108,13 +1105,9 @@ SendProposerElected(Safekeeper *sk)
 	{
 		/* safekeeper is empty or no common point, start from the beginning */
 		sk->startStreamingAt = wp->propTermHistory.entries[0].lsn;
-		wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, timelineStartLsn=%X/%X, termHistory.n_entries=%u",
-			   sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), LSN_FORMAT_ARGS(wp->timelineStartLsn), wp->propTermHistory.n_entries);
-
-		/*
-		 * wp->timelineStartLsn == InvalidXLogRecPtr can be only when timeline
-		 * is created manually (test_s3_wal_replay)
-		 */
+		wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, timelineStartLsn=%X/%X, termHistory.n_entries=%u" ,
+		 	 sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), LSN_FORMAT_ARGS(wp->timelineStartLsn), wp->propTermHistory.n_entries);
+		/* wp->timelineStartLsn == InvalidXLogRecPtr can be only when timeline is created manually (test_s3_wal_replay) */
 		Assert(sk->startStreamingAt == wp->timelineStartLsn || wp->timelineStartLsn == InvalidXLogRecPtr);
 	}
 	else
@@ -1184,12 +1177,6 @@ StartStreaming(Safekeeper *sk)
 	sk->active_state = SS_ACTIVE_SEND;
 	sk->streamingAt = sk->startStreamingAt;

-	/*
-	 * Donors can only be in SS_ACTIVE state, so we potentially update the
-	 * donor when we switch one to SS_ACTIVE.
-	 */
-	UpdateDonorShmem(sk->wp);
-
 	/* event set will be updated inside SendMessageToNode */
 	SendMessageToNode(sk);
 }
@@ -1581,17 +1568,17 @@ GetAcknowledgedByQuorumWALPosition(WalProposer *wp)
 * none if it doesn't exist. donor_lsn is set to end position of the donor to
 * the best of our knowledge.
 */
-static void
-UpdateDonorShmem(WalProposer *wp)
+Safekeeper *
+GetDonor(WalProposer *wp, XLogRecPtr *donor_lsn)
 {
 	Safekeeper *donor = NULL;
 	int			i;
-	XLogRecPtr	donor_lsn = InvalidXLogRecPtr;
+	*donor_lsn = InvalidXLogRecPtr;

 	if (wp->n_votes < wp->quorum)
 	{
-		wp_log(WARNING, "UpdateDonorShmem called before elections are won");
-		return;
+		wp_log(WARNING, "GetDonor called before elections are won");
+		return NULL;
 	}

 	/*
@@ -1602,7 +1589,7 @@ UpdateDonorShmem(WalProposer *wp)
 	if (wp->safekeeper[wp->donor].state >= SS_IDLE)
 	{
 		donor = &wp->safekeeper[wp->donor];
-		donor_lsn = wp->propEpochStartLsn;
+		*donor_lsn = wp->propEpochStartLsn;
 	}

 	/*
@@ -1614,19 +1601,13 @@ UpdateDonorShmem(WalProposer *wp)
 	{
 		Safekeeper *sk = &wp->safekeeper[i];

-		if (sk->state == SS_ACTIVE && sk->appendResponse.flushLsn > donor_lsn)
+		if (sk->state == SS_ACTIVE && sk->appendResponse.flushLsn > *donor_lsn)
 		{
 			donor = sk;
-			donor_lsn = sk->appendResponse.flushLsn;
+			*donor_lsn = sk->appendResponse.flushLsn;
 		}
 	}
-
-	if (donor == NULL)
-	{
-		wp_log(WARNING, "UpdateDonorShmem didn't find a suitable donor, skipping");
-		return;
-	}
-	wp->api.update_donor(wp, donor, donor_lsn);
+	return donor;
 }

 /*
@@ -1636,7 +1617,7 @@ static void
 HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
 {
 	XLogRecPtr	candidateTruncateLsn;
-	XLogRecPtr	newCommitLsn;
+	XLogRecPtr  newCommitLsn;

 	newCommitLsn = GetAcknowledgedByQuorumWALPosition(wp);
 	if (newCommitLsn > wp->commitLsn)
@@ -1646,7 +1627,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk)
 		BroadcastAppendRequest(wp);
 	}

-	/*
+	/* 
 	 * Unlock syncrep waiters, update ps_feedback, CheckGracefulShutdown().
 	 * The last one will terminate the process if the shutdown is requested
 	 * and WAL is committed by the quorum. BroadcastAppendRequest() should be
--- a/pgxn/neon/walproposer.h
+++ b/pgxn/neon/walproposer.h
@@ -284,19 +284,14 @@ typedef struct PageserverFeedback

 typedef struct WalproposerShmemState
 {
-	pg_atomic_uint64 propEpochStartLsn;
-	char		donor_name[64];
-	char		donor_conninfo[MAXCONNINFO];
-	XLogRecPtr	donor_lsn;
-
 	slock_t		mutex;
-	pg_atomic_uint64 mineLastElectedTerm;
+	term_t		mineLastElectedTerm;
 	pg_atomic_uint64 backpressureThrottlingTime;
 	pg_atomic_uint64 currentClusterSize;

 	/* last feedback from each shard */
 	PageserverFeedback shard_ps_feedback[MAX_SHARDS];
-	int			num_shards;
+	int num_shards;

 	/* aggregated feedback with min LSNs across shards */
 	PageserverFeedback min_ps_feedback;
@@ -470,9 +465,6 @@ typedef struct walproposer_api
 	/* Get pointer to the latest available WAL. */
 	XLogRecPtr	(*get_flush_rec_ptr) (WalProposer *wp);

-	/* Update current donor info in WalProposer Shmem */
-	void		(*update_donor) (WalProposer *wp, Safekeeper *donor, XLogRecPtr donor_lsn);
-
 	/* Get current time. */
 	TimestampTz (*get_current_timestamp) (WalProposer *wp);

@@ -505,7 +497,7 @@ typedef struct walproposer_api
 	 *
 	 * On success, the data is placed in *buf. It is valid until the next call
 	 * to this function.
-	 *
+	 * 
 	 * Returns PG_ASYNC_READ_FAIL on closed connection.
 	 */
 	PGAsyncReadResult (*conn_async_read) (Safekeeper *sk, char **buf, int *amount);
@@ -553,14 +545,13 @@ typedef struct walproposer_api
 	 * Returns 0 if timeout is reached, 1 if some event happened. Updates
 	 * events mask to indicate events and sets sk to the safekeeper which has
 	 * an event.
-	 *
+	 * 
 	 * On timeout, events is set to WL_NO_EVENTS. On socket event, events is
 	 * set to WL_SOCKET_READABLE and/or WL_SOCKET_WRITEABLE. When socket is
 	 * closed, events is set to WL_SOCKET_READABLE.
-	 *
-	 * WL_SOCKET_WRITEABLE is usually set only when we need to flush the
-	 * buffer. It can be returned only if caller asked for this event in the
-	 * last *_event_set call.
+	 * 
+	 * WL_SOCKET_WRITEABLE is usually set only when we need to flush the buffer.
+	 * It can be returned only if caller asked for this event in the last *_event_set call.
 	 */
 	int			(*wait_event_set) (WalProposer *wp, long timeout, Safekeeper **sk, uint32 *events);

@@ -580,9 +571,9 @@ typedef struct walproposer_api
 	void		(*finish_sync_safekeepers) (WalProposer *wp, XLogRecPtr lsn);

 	/*
-	 * Called after every AppendResponse from the safekeeper. Used to
-	 * propagate backpressure feedback and to confirm WAL persistence (has
-	 * been commited on the quorum of safekeepers).
+	 * Called after every AppendResponse from the safekeeper. Used to propagate
+	 * backpressure feedback and to confirm WAL persistence (has been commited
+	 * on the quorum of safekeepers).
 	 */
 	void		(*process_safekeeper_feedback) (WalProposer *wp, Safekeeper *sk);

@@ -725,14 +716,12 @@ extern void WalProposerBroadcast(WalProposer *wp, XLogRecPtr startpos, XLogRecPt
 extern void WalProposerPoll(WalProposer *wp);
 extern void WalProposerFree(WalProposer *wp);

-extern WalproposerShmemState *GetWalpropShmemState();
-
 /*
 * WaitEventSet API doesn't allow to remove socket, so walproposer_pg uses it to
 * recreate set from scratch, hence the export.
 */
 extern void SafekeeperStateDesiredEvents(Safekeeper *sk, uint32 *sk_events, uint32 *nwr_events);
-extern TimeLineID walprop_pg_get_timeline_id(void);
+extern Safekeeper *GetDonor(WalProposer *wp, XLogRecPtr *donor_lsn);


 #define WPEVENT		1337		/* special log level for walproposer internal
--- a/pgxn/neon/walproposer_pg.c
+++ b/pgxn/neon/walproposer_pg.c
@@ -85,6 +85,7 @@ static void walprop_pg_init_standalone_sync_safekeepers(void);
 static void walprop_pg_init_walsender(void);
 static void walprop_pg_init_bgworker(void);
 static TimestampTz walprop_pg_get_current_timestamp(WalProposer *wp);
+static TimeLineID walprop_pg_get_timeline_id(void);
 static void walprop_pg_load_libpqwalreceiver(void);

 static process_interrupts_callback_t PrevProcessInterruptsCallback;
@@ -93,8 +94,6 @@ static shmem_startup_hook_type prev_shmem_startup_hook_type;
 static shmem_request_hook_type prev_shmem_request_hook = NULL;
 static void walproposer_shmem_request(void);
 #endif
-static void WalproposerShmemInit_SyncSafekeeper(void);
-

 static void StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd);
 static void WalSndLoop(WalProposer *wp);
@@ -137,7 +136,6 @@ WalProposerSync(int argc, char *argv[])
 	WalProposer *wp;

 	init_walprop_config(true);
-	WalproposerShmemInit_SyncSafekeeper();
 	walprop_pg_init_standalone_sync_safekeepers();
 	walprop_pg_load_libpqwalreceiver();

@@ -283,8 +281,6 @@ WalproposerShmemInit(void)
 	{
 		memset(walprop_shared, 0, WalproposerShmemSize());
 		SpinLockInit(&walprop_shared->mutex);
-		pg_atomic_init_u64(&walprop_shared->propEpochStartLsn, 0);
-		pg_atomic_init_u64(&walprop_shared->mineLastElectedTerm, 0);
 		pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
 		pg_atomic_init_u64(&walprop_shared->currentClusterSize, 0);
 	}
@@ -293,17 +289,6 @@ WalproposerShmemInit(void)
 	return found;
 }

-static void
-WalproposerShmemInit_SyncSafekeeper(void)
-{
-	walprop_shared = palloc(WalproposerShmemSize());
-	memset(walprop_shared, 0, WalproposerShmemSize());
-	SpinLockInit(&walprop_shared->mutex);
-	pg_atomic_init_u64(&walprop_shared->propEpochStartLsn, 0);
-	pg_atomic_init_u64(&walprop_shared->mineLastElectedTerm, 0);
-	pg_atomic_init_u64(&walprop_shared->backpressureThrottlingTime, 0);
-}
-
 #define BACK_PRESSURE_DELAY 10000L // 0.01 sec

 static bool
@@ -414,13 +399,6 @@ nwp_shmem_startup_hook(void)
 	WalproposerShmemInit();
 }

-WalproposerShmemState *
-GetWalpropShmemState()
-{
-	Assert(walprop_shared != NULL);
-	return walprop_shared;
-}
-
 static WalproposerShmemState *
 walprop_pg_get_shmem_state(WalProposer *wp)
 {
@@ -453,15 +431,14 @@ record_pageserver_feedback(PageserverFeedback *ps_feedback)
 	for (int i = 0; i < walprop_shared->num_shards; i++)
 	{
 		PageserverFeedback *feedback = &walprop_shared->shard_ps_feedback[i];
-
 		if (feedback->present)
 		{
 			if (min_feedback.last_received_lsn == InvalidXLogRecPtr || feedback->last_received_lsn < min_feedback.last_received_lsn)
 				min_feedback.last_received_lsn = feedback->last_received_lsn;
-
+			
 			if (min_feedback.disk_consistent_lsn == InvalidXLogRecPtr || feedback->disk_consistent_lsn < min_feedback.disk_consistent_lsn)
 				min_feedback.disk_consistent_lsn = feedback->disk_consistent_lsn;
-
+			
 			if (min_feedback.remote_consistent_lsn == InvalidXLogRecPtr || feedback->remote_consistent_lsn < min_feedback.remote_consistent_lsn)
 				min_feedback.remote_consistent_lsn = feedback->remote_consistent_lsn;
 		}
@@ -574,7 +551,6 @@ static void
 walprop_sigusr2(SIGNAL_ARGS)
 {
 	int			save_errno = errno;
-
 	got_SIGUSR2 = true;
 	SetLatch(MyLatch);
 	errno = save_errno;
@@ -622,7 +598,7 @@ walprop_pg_get_current_timestamp(WalProposer *wp)
 	return GetCurrentTimestamp();
 }

-TimeLineID
+static TimeLineID
 walprop_pg_get_timeline_id(void)
 {
 #if PG_VERSION_NUM >= 150000
@@ -641,20 +617,6 @@ walprop_pg_load_libpqwalreceiver(void)
 		wpg_log(ERROR, "libpqwalreceiver didn't initialize correctly");
 }

-static void
-walprop_pg_update_donor(WalProposer *wp, Safekeeper *donor, XLogRecPtr donor_lsn)
-{
-	WalproposerShmemState *wps = wp->api.get_shmem_state(wp);
-	char		donor_name[64];
-
-	pg_snprintf(donor_name, sizeof(donor_name), "%s:%s", donor->host, donor->port);
-	SpinLockAcquire(&wps->mutex);
-	memcpy(wps->donor_name, donor_name, sizeof(donor_name));
-	memcpy(wps->donor_conninfo, donor->conninfo, sizeof(donor->conninfo));
-	wps->donor_lsn = donor_lsn;
-	SpinLockRelease(&wps->mutex);
-}
-
 /* Helper function */
 static bool
 ensure_nonblocking_status(WalProposerConn *conn, bool is_nonblocking)
@@ -755,6 +717,7 @@ walprop_connect_start(Safekeeper *sk)
 {
 	Assert(sk->conn == NULL);
 	sk->conn = libpqwp_connect_start(sk->conninfo);
+
 }

 static WalProposerConnectPollStatusType
@@ -1128,7 +1091,7 @@ static void
 StartProposerReplication(WalProposer *wp, StartReplicationCmd *cmd)
 {
 	XLogRecPtr	FlushPtr;
-	__attribute__((unused)) TimeLineID currTLI;
+	 __attribute__((unused)) TimeLineID	currTLI;

 #if PG_VERSION_NUM < 150000
 	if (ThisTimeLineID == 0)
@@ -1332,13 +1295,116 @@ XLogBroadcastWalProposer(WalProposer *wp)
 	}
 }

-/*
-  Used to download WAL before basebackup for logical walsenders from sk, no longer
-  needed because walsender always uses neon_walreader.
- */
+/* Download WAL before basebackup for logical walsenders from sk, if needed */
 static bool
 WalProposerRecovery(WalProposer *wp, Safekeeper *sk)
 {
+	char	   *err;
+	WalReceiverConn *wrconn;
+	WalRcvStreamOptions options;
+	char		conninfo[MAXCONNINFO];
+	TimeLineID	timeline;
+	XLogRecPtr	startpos;
+	XLogRecPtr	endpos;
+
+	startpos = GetLogRepRestartLSN(wp);
+	if (startpos == InvalidXLogRecPtr)
+		return true;			/* recovery not needed */
+	endpos = wp->propEpochStartLsn;
+
+	timeline = wp->greetRequest.timeline;
+
+	if (!neon_auth_token)
+	{
+		memcpy(conninfo, sk->conninfo, MAXCONNINFO);
+	}
+	else
+	{
+		int			written = 0;
+
+		written = snprintf((char *) conninfo, MAXCONNINFO, "password=%s %s", neon_auth_token, sk->conninfo);
+		if (written > MAXCONNINFO || written < 0)
+			wpg_log(FATAL, "could not append password to the safekeeper connection string");
+	}
+
+#if PG_MAJORVERSION_NUM < 16
+	wrconn = walrcv_connect(conninfo, false, "wal_proposer_recovery", &err);
+#else
+	wrconn = walrcv_connect(conninfo, false, false, "wal_proposer_recovery", &err);
+#endif
+
+	if (!wrconn)
+	{
+		ereport(WARNING,
+				(errmsg("could not connect to WAL acceptor %s:%s: %s",
+						sk->host, sk->port,
+						err)));
+		return false;
+	}
+	wpg_log(LOG,
+			"start recovery for logical replication from %s:%s starting from %X/%08X till %X/%08X timeline "
+			"%d",
+			sk->host, sk->port, (uint32) (startpos >> 32),
+			(uint32) startpos, (uint32) (endpos >> 32), (uint32) endpos, timeline);
+
+	options.logical = false;
+	options.startpoint = startpos;
+	options.slotname = NULL;
+	options.proto.physical.startpointTLI = timeline;
+
+	if (walrcv_startstreaming(wrconn, &options))
+	{
+		XLogRecPtr	rec_start_lsn;
+		XLogRecPtr	rec_end_lsn = 0;
+		int			len;
+		char	   *buf;
+		pgsocket	wait_fd = PGINVALID_SOCKET;
+
+		while ((len = walrcv_receive(wrconn, &buf, &wait_fd)) >= 0)
+		{
+			if (len == 0)
+			{
+				(void) WaitLatchOrSocket(
+										 MyLatch, WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE, wait_fd,
+										 -1, WAIT_EVENT_WAL_RECEIVER_MAIN);
+			}
+			else
+			{
+				Assert(buf[0] == 'w' || buf[0] == 'k');
+				if (buf[0] == 'k')
+					continue;	/* keepalive */
+				memcpy(&rec_start_lsn, &buf[XLOG_HDR_START_POS],
+					   sizeof rec_start_lsn);
+				rec_start_lsn = pg_ntoh64(rec_start_lsn);
+				rec_end_lsn = rec_start_lsn + len - XLOG_HDR_SIZE;
+
+				/* write WAL to disk */
+				XLogWalPropWrite(sk->wp, &buf[XLOG_HDR_SIZE], len - XLOG_HDR_SIZE, rec_start_lsn);
+
+				ereport(DEBUG1,
+						(errmsg("Recover message %X/%X length %d",
+								LSN_FORMAT_ARGS(rec_start_lsn), len)));
+				if (rec_end_lsn >= endpos)
+					break;
+			}
+		}
+		ereport(LOG,
+				(errmsg("end of replication stream at %X/%X: %m",
+						LSN_FORMAT_ARGS(rec_end_lsn))));
+		walrcv_disconnect(wrconn);
+
+		/* failed to receive all WAL till endpos */
+		if (rec_end_lsn < endpos)
+			return false;
+	}
+	else
+	{
+		ereport(LOG,
+				(errmsg("primary server contains no more WAL on requested timeline %u LSN %X/%08X",
+						timeline, (uint32) (startpos >> 32), (uint32) startpos)));
+		return false;
+	}
+
 	return true;
 }

@@ -1479,7 +1545,7 @@ walprop_pg_wal_reader_allocate(Safekeeper *sk)

 	snprintf(log_prefix, sizeof(log_prefix), WP_LOG_PREFIX "sk %s:%s nwr: ", sk->host, sk->port);
 	Assert(!sk->xlogreader);
-	sk->xlogreader = NeonWALReaderAllocate(wal_segment_size, sk->wp->propEpochStartLsn, log_prefix);
+	sk->xlogreader = NeonWALReaderAllocate(wal_segment_size, sk->wp->propEpochStartLsn, sk->wp, log_prefix);
 	if (sk->xlogreader == NULL)
 		wpg_log(FATAL, "failed to allocate xlog reader");
 }
@@ -1894,8 +1960,8 @@ CombineHotStanbyFeedbacks(HotStandbyFeedback *hs, WalProposer *wp)
 static void
 walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk)
 {
-	HotStandbyFeedback hsFeedback;
-	bool		needToAdvanceSlot = false;
+	HotStandbyFeedback	hsFeedback;
+	bool				needToAdvanceSlot = false;

 	if (wp->config->syncSafekeepers)
 		return;
@@ -2029,25 +2095,22 @@ GetLogRepRestartLSN(WalProposer *wp)
 	return lrRestartLsn;
 }

-void
-SetNeonCurrentClusterSize(uint64 size)
+void SetNeonCurrentClusterSize(uint64 size)
 {
 	pg_atomic_write_u64(&walprop_shared->currentClusterSize, size);
 }

-uint64
-GetNeonCurrentClusterSize(void)
+uint64 GetNeonCurrentClusterSize(void)
 {
 	return pg_atomic_read_u64(&walprop_shared->currentClusterSize);
 }
-uint64		GetNeonCurrentClusterSize(void);
+uint64 GetNeonCurrentClusterSize(void);


 static const walproposer_api walprop_pg = {
 	.get_shmem_state = walprop_pg_get_shmem_state,
 	.start_streaming = walprop_pg_start_streaming,
 	.get_flush_rec_ptr = walprop_pg_get_flush_rec_ptr,
-	.update_donor = walprop_pg_update_donor,
 	.get_current_timestamp = walprop_pg_get_current_timestamp,
 	.conn_error_message = walprop_error_message,
 	.conn_status = walprop_status,
--- a/pgxn/neon/walsender_hooks.c
+++ b/pgxn/neon/walsender_hooks.c
@@ -1,172 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * walsender_hooks.c
- *
- * Implements XLogReaderRoutine in terms of NeonWALReader. Allows for
- * fetching WAL from safekeepers, which normal xlogreader can't do.
- *
- *-------------------------------------------------------------------------
- */
-#include "walsender_hooks.h"
-#include "postgres.h"
-#include "fmgr.h"
-#include "access/xlogdefs.h"
-#include "replication/walsender.h"
-#include "access/xlog.h"
-#include "access/xlog_internal.h"
-#include "access/xlogreader.h"
-#include "miscadmin.h"
-#include "utils/wait_event.h"
-#include "utils/guc.h"
-#include "postmaster/interrupt.h"
-
-#include "neon_walreader.h"
-#include "walproposer.h"
-
-static NeonWALReader *wal_reader = NULL;
-extern XLogRecPtr WalSndWaitForWal(XLogRecPtr loc);
-extern bool GetDonorShmem(XLogRecPtr *donor_lsn);
-
-static XLogRecPtr
-NeonWALReadWaitForWAL(XLogRecPtr loc)
-{
-	while (!NeonWALReaderUpdateDonor(wal_reader))
-	{
-		pg_usleep(1000);
-		CHECK_FOR_INTERRUPTS();
-	}
-
-	return WalSndWaitForWal(loc);
-}
-
-static int
-NeonWALPageRead(
-				XLogReaderState *xlogreader,
-				XLogRecPtr targetPagePtr,
-				int reqLen,
-				XLogRecPtr targetRecPtr,
-				char *readBuf)
-{
-	XLogRecPtr	rem_lsn;
-
-	/* Wait for flush pointer to advance past our request */
-	XLogRecPtr	flushptr = NeonWALReadWaitForWAL(targetPagePtr + reqLen);
-	int			count;
-
-	if (flushptr < targetPagePtr + reqLen)
-		return -1;
-
-	/* Read at most XLOG_BLCKSZ bytes */
-	if (targetPagePtr + XLOG_BLCKSZ <= flushptr)
-		count = XLOG_BLCKSZ;
-	else
-		count = flushptr - targetPagePtr;
-
-	/*
-	 * Sometimes walsender requests non-monotonic sequences of WAL. If that's
-	 * the case, we have to reset streaming from remote at the correct
-	 * position. For example, walsender may try to verify the segment header
-	 * when trying to read in the middle of it.
-	 */
-	rem_lsn = NeonWALReaderGetRemLsn(wal_reader);
-	if (rem_lsn != InvalidXLogRecPtr && targetPagePtr != rem_lsn)
-	{
-		NeonWALReaderResetRemote(wal_reader);
-	}
-
-	for (;;)
-	{
-		NeonWALReadResult res = NeonWALRead(
-											wal_reader,
-											readBuf,
-											targetPagePtr,
-											count,
-											walprop_pg_get_timeline_id());
-
-		if (res == NEON_WALREAD_SUCCESS)
-		{
-			/*
-			 * Setting ws_tli is required by the XLogReaderRoutine, it is used
-			 * for segment name generation in error reports.
-			 *
-			 * ReadPageInternal updates ws_segno after calling cb on its own
-			 * and XLogReaderRoutine description doesn't require it, but
-			 * WALRead sets, let's follow it.
-			 */
-			xlogreader->seg.ws_tli = NeonWALReaderGetSegment(wal_reader)->ws_tli;
-			xlogreader->seg.ws_segno = NeonWALReaderGetSegment(wal_reader)->ws_segno;
-
-			/*
-			 * ws_file doesn't exist in case of remote read, and isn't used by
-			 * xlogreader except by WALRead on which we don't rely anyway.
-			 */
-			return count;
-		}
-		if (res == NEON_WALREAD_ERROR)
-		{
-			elog(ERROR, "[walsender] Failed to read WAL (req_lsn=%X/%X, len=%d): %s",
-				 LSN_FORMAT_ARGS(targetPagePtr),
-				 reqLen,
-				 NeonWALReaderErrMsg(wal_reader));
-			return -1;
-		}
-
-		/*
-		 * Res is WOULDBLOCK, so we wait on the socket, recreating event set
-		 * if necessary
-		 */
-		{
-
-			pgsocket	sock = NeonWALReaderSocket(wal_reader);
-			uint32_t	reader_events = NeonWALReaderEvents(wal_reader);
-			long		timeout_ms = 1000;
-
-			ResetLatch(MyLatch);
-			CHECK_FOR_INTERRUPTS();
-			if (ConfigReloadPending)
-			{
-				ConfigReloadPending = false;
-				ProcessConfigFile(PGC_SIGHUP);
-			}
-
-			WaitLatchOrSocket(
-							  MyLatch,
-							  WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | reader_events,
-							  sock,
-							  timeout_ms,
-							  WAIT_EVENT_WAL_SENDER_MAIN);
-		}
-	}
-}
-
-static void
-NeonWALReadSegmentOpen(XLogReaderState *xlogreader, XLogSegNo nextSegNo, TimeLineID *tli_p)
-{
-	neon_wal_segment_open(wal_reader, nextSegNo, tli_p);
-	xlogreader->seg.ws_file = NeonWALReaderGetSegment(wal_reader)->ws_file;
-}
-
-static void
-NeonWALReadSegmentClose(XLogReaderState *xlogreader)
-{
-	neon_wal_segment_close(wal_reader);
-	xlogreader->seg.ws_file = NeonWALReaderGetSegment(wal_reader)->ws_file;
-}
-
-void
-NeonOnDemandXLogReaderRoutines(XLogReaderRoutine *xlr)
-{
-	if (!wal_reader)
-	{
-		XLogRecPtr	epochStartLsn = pg_atomic_read_u64(&GetWalpropShmemState()->propEpochStartLsn);
-
-		if (epochStartLsn == 0)
-		{
-			elog(ERROR, "Unable to start walsender when propEpochStartLsn is 0!");
-		}
-		wal_reader = NeonWALReaderAllocate(wal_segment_size, epochStartLsn, "[walsender] ");
-	}
-	xlr->page_read = NeonWALPageRead;
-	xlr->segment_open = NeonWALReadSegmentOpen;
-	xlr->segment_close = NeonWALReadSegmentClose;
-}
--- a/pgxn/neon/walsender_hooks.h
+++ b/pgxn/neon/walsender_hooks.h
@@ -1,7 +0,0 @@
-#ifndef __WALSENDER_HOOKS_H__
-#define __WALSENDER_HOOKS_H__
-
-struct XLogReaderRoutine;
-void		NeonOnDemandXLogReaderRoutines(struct XLogReaderRoutine *xlr);
-
-#endif
--- a/poetry.lock
+++ b/poetry.lock
@@ -1001,17 +1001,18 @@ dotenv = ["python-dotenv"]

 [[package]]
 name = "flask-cors"
-version = "4.0.1"
+version = "3.0.10"
 description = "A Flask extension adding a decorator for CORS support"
 optional = false
 python-versions = "*"
 files = [
-    {file = "Flask_Cors-4.0.1-py2.py3-none-any.whl", hash = "sha256:f2a704e4458665580c074b714c4627dd5a306b333deb9074d0b1794dfa2fb677"},
-    {file = "flask_cors-4.0.1.tar.gz", hash = "sha256:eeb69b342142fdbf4766ad99357a7f3876a2ceb77689dc10ff912aac06c389e4"},
+    {file = "Flask-Cors-3.0.10.tar.gz", hash = "sha256:b60839393f3b84a0f3746f6cdca56c1ad7426aa738b70d6c61375857823181de"},
+    {file = "Flask_Cors-3.0.10-py2.py3-none-any.whl", hash = "sha256:74efc975af1194fc7891ff5cd85b0f7478be4f7f59fe158102e91abb72bb4438"},
 ]

 [package.dependencies]
 Flask = ">=0.9"
+Six = "*"

 [[package]]
 name = "frozenlist"
@@ -1242,13 +1243,13 @@ files = [

 [[package]]
 name = "jinja2"
-version = "3.1.4"
+version = "3.1.3"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
-    {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
+    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
 ]

 [package.dependencies]
@@ -2611,13 +2612,13 @@ files = [

 [[package]]
 name = "werkzeug"
-version = "3.0.3"
+version = "3.0.1"
 description = "The comprehensive WSGI web application library."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "werkzeug-3.0.3-py3-none-any.whl", hash = "sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8"},
-    {file = "werkzeug-3.0.3.tar.gz", hash = "sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18"},
+    {file = "werkzeug-3.0.1-py3-none-any.whl", hash = "sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10"},
+    {file = "werkzeug-3.0.1.tar.gz", hash = "sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc"},
 ]

 [package.dependencies]
@@ -2899,4 +2900,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "496d6d9f722983bda4d1265370bc8ba75560da74ab5d6b68c94a03290815e1eb"
+content-hash = "b3452b50901123fd5f2c385ce8a0c1c492296393b8a7926a322b6df0ea3ac572"
--- a/proxy/Cargo.toml
+++ b/proxy/Cargo.toml
@@ -40,7 +40,6 @@ hyper.workspace = true
 hyper1 = { package = "hyper", version = "1.2", features = ["server"] }
 hyper-util = { version = "0.1", features = ["server", "http1", "http2", "tokio"] }
 http-body-util = { version = "0.1" }
-indexmap.workspace = true
 ipnet.workspace = true
 itertools.workspace = true
 lasso = { workspace = true, features = ["multi-threaded"] }
@@ -60,8 +59,8 @@ prometheus.workspace = true
 rand.workspace = true
 regex.workspace = true
 remote_storage = { version = "0.1", path = "../libs/remote_storage/" }
-reqwest.workspace = true
-reqwest-middleware = { workspace = true, features = ["json"] }
+reqwest = { workspace = true, features = ["json"] }
+reqwest-middleware.workspace = true
 reqwest-retry.workspace = true
 reqwest-tracing.workspace = true
 routerify.workspace = true
@@ -85,7 +84,6 @@ tokio-postgres.workspace = true
 tokio-rustls.workspace = true
 tokio-util.workspace = true
 tokio = { workspace = true, features = ["signal"] }
-tower-service.workspace = true
 tracing-opentelemetry.workspace = true
 tracing-subscriber.workspace = true
 tracing-utils.workspace = true
--- a/proxy/src/bin/proxy.rs
+++ b/proxy/src/bin/proxy.rs
@@ -27,7 +27,6 @@ use proxy::redis::cancellation_publisher::RedisPublisherClient;
 use proxy::redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider;
 use proxy::redis::elasticache;
 use proxy::redis::notifications;
-use proxy::serverless::cancel_set::CancelSet;
 use proxy::serverless::GlobalConnPoolOptions;
 use proxy::usage_metrics;

@@ -119,11 +118,8 @@ struct ProxyCliArgs {
    #[clap(long, default_value = config::CacheOptions::CACHE_DEFAULT_OPTIONS)]
    wake_compute_cache: String,
    /// lock for `wake_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
+    #[clap(long, default_value = config::WakeComputeLockOptions::DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK)]
    wake_compute_lock: String,
-    /// lock for `connect_compute` api method. example: "shards=32,permits=4,epoch=10m,timeout=1s". (use `permits=0` to disable).
-    #[clap(long, default_value = config::ConcurrencyLockOptions::DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK)]
-    connect_compute_lock: String,
    /// Allow self-signed certificates for compute nodes (for testing)
    #[clap(long, default_value_t = false, value_parser = clap::builder::BoolishValueParser::new(), action = clap::ArgAction::Set)]
    allow_self_signed_compute: bool,
@@ -244,12 +240,6 @@ struct SqlOverHttpArgs {
    /// increase memory used by the pool
    #[clap(long, default_value_t = 128)]
    sql_over_http_pool_shards: usize,
-
-    #[clap(long, default_value_t = 10000)]
-    sql_over_http_client_conn_threshold: u64,
-
-    #[clap(long, default_value_t = 64)]
-    sql_over_http_cancel_set_shards: usize,
 }

 #[tokio::main]
@@ -349,7 +339,7 @@ async fn main() -> anyhow::Result<()> {

    let cancel_map = CancelMap::default();

-    let redis_publisher = match &redis_notifications_client {
+    let redis_publisher = match &regional_redis_client {
        Some(redis_publisher) => Some(Arc::new(Mutex::new(RedisPublisherClient::new(
            redis_publisher.clone(),
            args.region.clone(),
@@ -539,21 +529,24 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
                endpoint_cache_config,
            )));

-            let config::ConcurrencyLockOptions {
+            let config::WakeComputeLockOptions {
                shards,
                permits,
                epoch,
                timeout,
            } = args.wake_compute_lock.parse()?;
            info!(permits, shards, ?epoch, "Using NodeLocks (wake_compute)");
-            let locks = Box::leak(Box::new(console::locks::ApiLocks::new(
-                "wake_compute_lock",
-                permits,
-                shards,
-                timeout,
-                epoch,
-                &Metrics::get().wake_compute_lock,
-            )?));
+            let locks = Box::leak(Box::new(
+                console::locks::ApiLocks::new(
+                    "wake_compute_lock",
+                    permits,
+                    shards,
+                    timeout,
+                    epoch,
+                    &Metrics::get().wake_compute_lock,
+                )
+                .unwrap(),
+            ));
            tokio::spawn(locks.garbage_collect_worker());

            let url = args.auth_endpoint.parse()?;
@@ -579,23 +572,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            auth::BackendType::Link(MaybeOwned::Owned(url), ())
        }
    };
-
-    let config::ConcurrencyLockOptions {
-        shards,
-        permits,
-        epoch,
-        timeout,
-    } = args.connect_compute_lock.parse()?;
-    info!(permits, shards, ?epoch, "Using NodeLocks (connect_compute)");
-    let connect_compute_locks = console::locks::ApiLocks::new(
-        "connect_compute_lock",
-        permits,
-        shards,
-        timeout,
-        epoch,
-        &Metrics::get().proxy.connect_compute_lock,
-    )?;
-
    let http_config = HttpConfig {
        request_timeout: args.sql_over_http.sql_over_http_timeout,
        pool_options: GlobalConnPoolOptions {
@@ -606,8 +582,6 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
            opt_in: args.sql_over_http.sql_over_http_pool_opt_in,
            max_total_conns: args.sql_over_http.sql_over_http_pool_max_total_conns,
        },
-        cancel_set: CancelSet::new(args.sql_over_http.sql_over_http_cancel_set_shards),
-        client_conn_threshold: args.sql_over_http.sql_over_http_client_conn_threshold,
    };
    let authentication_config = AuthenticationConfig {
        scram_protocol_timeout: args.scram_protocol_timeout,
@@ -633,14 +607,11 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> {
        region: args.region.clone(),
        aws_region: args.aws_region.clone(),
        wake_compute_retry_config: config::RetryConfig::parse(&args.wake_compute_retry)?,
-        connect_compute_locks,
        connect_to_compute_retry_config: config::RetryConfig::parse(
            &args.connect_to_compute_retry,
        )?,
    }));

-    tokio::spawn(config.connect_compute_locks.garbage_collect_worker());
-
    Ok(config)
 }

--- a/proxy/src/cache/endpoints.rs
+++ b/proxy/src/cache/endpoints.rs
@@ -21,7 +21,7 @@ use crate::{
    config::EndpointCacheConfig,
    context::RequestMonitoring,
    intern::{BranchIdInt, EndpointIdInt, ProjectIdInt},
-    metrics::{Metrics, RedisErrors, RedisEventsCount},
+    metrics::{Metrics, RedisErrors},
    rate_limiter::GlobalRateLimiter,
    redis::connection_with_credentials_provider::ConnectionWithCredentialsProvider,
    EndpointId,
@@ -100,26 +100,14 @@ impl EndpointsCache {
        if let Some(endpoint_created) = key.endpoint_created {
            self.endpoints
                .insert(EndpointIdInt::from(&endpoint_created.endpoint_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::EndpointCreated);
        }
        if let Some(branch_created) = key.branch_created {
            self.branches
                .insert(BranchIdInt::from(&branch_created.branch_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::BranchCreated);
        }
        if let Some(project_created) = key.project_created {
            self.projects
                .insert(ProjectIdInt::from(&project_created.project_id.into()));
-            Metrics::get()
-                .proxy
-                .redis_events_count
-                .inc(RedisEventsCount::ProjectCreated);
        }
    }
    pub async fn do_read(
--- a/proxy/src/cache/project_info.rs
+++ b/proxy/src/cache/project_info.rs
@@ -5,11 +5,9 @@ use std::{
    time::Duration,
 };

-use async_trait::async_trait;
 use dashmap::DashMap;
 use rand::{thread_rng, Rng};
 use smol_str::SmolStr;
-use tokio::sync::Mutex;
 use tokio::time::Instant;
 use tracing::{debug, info};

@@ -23,12 +21,11 @@ use crate::{

 use super::{Cache, Cached};

-#[async_trait]
 pub trait ProjectInfoCache {
    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt);
    fn invalidate_role_secret_for_project(&self, project_id: ProjectIdInt, role_name: RoleNameInt);
-    async fn decrement_active_listeners(&self);
-    async fn increment_active_listeners(&self);
+    fn enable_ttl(&self);
+    fn disable_ttl(&self);
 }

 struct Entry<T> {
@@ -119,10 +116,8 @@ pub struct ProjectInfoCacheImpl {

    start_time: Instant,
    ttl_disabled_since_us: AtomicU64,
-    active_listeners_lock: Mutex<usize>,
 }

-#[async_trait]
 impl ProjectInfoCache for ProjectInfoCacheImpl {
    fn invalidate_allowed_ips_for_project(&self, project_id: ProjectIdInt) {
        info!("invalidating allowed ips for project `{}`", project_id);
@@ -153,27 +148,15 @@ impl ProjectInfoCache for ProjectInfoCacheImpl {
            }
        }
    }
-    async fn decrement_active_listeners(&self) {
-        let mut listeners_guard = self.active_listeners_lock.lock().await;
-        if *listeners_guard == 0 {
-            tracing::error!("active_listeners count is already 0, something is broken");
-            return;
-        }
-        *listeners_guard -= 1;
-        if *listeners_guard == 0 {
-            self.ttl_disabled_since_us
-                .store(u64::MAX, std::sync::atomic::Ordering::SeqCst);
-        }
+    fn enable_ttl(&self) {
+        self.ttl_disabled_since_us
+            .store(u64::MAX, std::sync::atomic::Ordering::Relaxed);
    }

-    async fn increment_active_listeners(&self) {
-        let mut listeners_guard = self.active_listeners_lock.lock().await;
-        *listeners_guard += 1;
-        if *listeners_guard == 1 {
-            let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
-            self.ttl_disabled_since_us
-                .store(new_ttl, std::sync::atomic::Ordering::SeqCst);
-        }
+    fn disable_ttl(&self) {
+        let new_ttl = (self.start_time.elapsed() + self.config.ttl).as_micros() as u64;
+        self.ttl_disabled_since_us
+            .store(new_ttl, std::sync::atomic::Ordering::Relaxed);
    }
 }

@@ -185,7 +168,6 @@ impl ProjectInfoCacheImpl {
            config,
            ttl_disabled_since_us: AtomicU64::new(u64::MAX),
            start_time: Instant::now(),
-            active_listeners_lock: Mutex::new(0),
        }
    }

@@ -450,7 +432,7 @@ mod tests {
            ttl: Duration::from_secs(1),
            gc_interval: Duration::from_secs(600),
        }));
-        cache.clone().increment_active_listeners().await;
+        cache.clone().disable_ttl();
        tokio::time::advance(Duration::from_secs(2)).await;

        let project_id: ProjectId = "project".into();
@@ -507,7 +489,7 @@ mod tests {
    }

    #[tokio::test]
-    async fn test_increment_active_listeners_invalidate_added_before() {
+    async fn test_disable_ttl_invalidate_added_before() {
        tokio::time::pause();
        let cache = Arc::new(ProjectInfoCacheImpl::new(ProjectInfoCacheOptions {
            size: 2,
@@ -532,7 +514,7 @@ mod tests {
            (&user1).into(),
            secret1.clone(),
        );
-        cache.clone().increment_active_listeners().await;
+        cache.clone().disable_ttl();
        tokio::time::advance(Duration::from_millis(100)).await;
        cache.insert_role_secret(
            (&project_id).into(),
--- a/proxy/src/compute.rs
+++ b/proxy/src/compute.rs
@@ -6,7 +6,6 @@ use crate::{
    error::{ReportableError, UserFacingError},
    metrics::{Metrics, NumDbConnectionsGuard},
    proxy::neon_option,
-    Host,
 };
 use futures::{FutureExt, TryFutureExt};
 use itertools::Itertools;
@@ -102,16 +101,6 @@ impl ConnCfg {
        }
    }

-    pub fn get_host(&self) -> Result<Host, WakeComputeError> {
-        match self.0.get_hosts() {
-            [tokio_postgres::config::Host::Tcp(s)] => Ok(s.into()),
-            // we should not have multiple address or unix addresses.
-            _ => Err(WakeComputeError::BadComputeAddress(
-                "invalid compute address".into(),
-            )),
-        }
-    }
-
    /// Apply startup message params to the connection config.
    pub fn set_startup_params(&mut self, params: &StartupMessageParams) {
        // Only set `user` if it's not present in the config.
--- a/proxy/src/config.rs
+++ b/proxy/src/config.rs
@@ -1,9 +1,7 @@
 use crate::{
    auth::{self, backend::AuthRateLimiter},
-    console::locks::ApiLocks,
    rate_limiter::RateBucketInfo,
-    serverless::{cancel_set::CancelSet, GlobalConnPoolOptions},
-    Host,
+    serverless::GlobalConnPoolOptions,
 };
 use anyhow::{bail, ensure, Context, Ok};
 use itertools::Itertools;
@@ -36,7 +34,6 @@ pub struct ProxyConfig {
    pub handshake_timeout: Duration,
    pub aws_region: String,
    pub wake_compute_retry_config: RetryConfig,
-    pub connect_compute_locks: ApiLocks<Host>,
    pub connect_to_compute_retry_config: RetryConfig,
 }

@@ -56,8 +53,6 @@ pub struct TlsConfig {
 pub struct HttpConfig {
    pub request_timeout: tokio::time::Duration,
    pub pool_options: GlobalConnPoolOptions,
-    pub cancel_set: CancelSet,
-    pub client_conn_threshold: u64,
 }

 pub struct AuthenticationConfig {
@@ -538,9 +533,9 @@ pub struct RetryConfig {
 impl RetryConfig {
    /// Default options for RetryConfig.

-    /// Total delay for 5 retries with 200ms base delay and 2 backoff factor is about 6s.
+    /// Total delay for 8 retries with 100ms base delay and 1.6 backoff factor is about 7s.
    pub const CONNECT_TO_COMPUTE_DEFAULT_VALUES: &'static str =
-        "num_retries=5,base_retry_wait_duration=200ms,retry_wait_exponent_base=2";
+        "num_retries=8,base_retry_wait_duration=100ms,retry_wait_exponent_base=1.6";
    /// Total delay for 8 retries with 100ms base delay and 1.6 backoff factor is about 7s.
    /// Cplane has timeout of 60s on each request. 8m7s in total.
    pub const WAKE_COMPUTE_DEFAULT_VALUES: &'static str =
@@ -578,7 +573,7 @@ impl RetryConfig {
 }

 /// Helper for cmdline cache options parsing.
-pub struct ConcurrencyLockOptions {
+pub struct WakeComputeLockOptions {
    /// The number of shards the lock map should have
    pub shards: usize,
    /// The number of allowed concurrent requests for each endpoitn
@@ -589,12 +584,9 @@ pub struct ConcurrencyLockOptions {
    pub timeout: Duration,
 }

-impl ConcurrencyLockOptions {
+impl WakeComputeLockOptions {
    /// Default options for [`crate::console::provider::ApiLocks`].
    pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "permits=0";
-    /// Default options for [`crate::console::provider::ApiLocks`].
-    pub const DEFAULT_OPTIONS_CONNECT_COMPUTE_LOCK: &'static str =
-        "shards=64,permits=10,epoch=10m,timeout=10ms";

    // pub const DEFAULT_OPTIONS_WAKE_COMPUTE_LOCK: &'static str = "shards=32,permits=4,epoch=10m,timeout=1s";

@@ -644,7 +636,7 @@ impl ConcurrencyLockOptions {
    }
 }

-impl FromStr for ConcurrencyLockOptions {
+impl FromStr for WakeComputeLockOptions {
    type Err = anyhow::Error;

    fn from_str(options: &str) -> Result<Self, Self::Err> {
@@ -680,7 +672,7 @@ mod tests {

    #[test]
    fn test_parse_lock_options() -> anyhow::Result<()> {
-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
@@ -691,7 +683,7 @@ mod tests {
        assert_eq!(shards, 32);
        assert_eq!(permits, 4);

-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
@@ -702,7 +694,7 @@ mod tests {
        assert_eq!(shards, 16);
        assert_eq!(permits, 8);

-        let ConcurrencyLockOptions {
+        let WakeComputeLockOptions {
            epoch,
            permits,
            shards,
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -17,7 +17,7 @@ use crate::{
    scram, EndpointCacheKey,
 };
 use dashmap::DashMap;
-use std::{hash::Hash, sync::Arc, time::Duration};
+use std::{sync::Arc, time::Duration};
 use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 use tokio::time::Instant;
 use tracing::info;
@@ -76,7 +76,7 @@ pub mod errors {
                    }
                    http::StatusCode::LOCKED | http::StatusCode::UNPROCESSABLE_ENTITY => {
                        // Status 423: project might be in maintenance mode (or bad state), or quotas exceeded.
-                        format!("{REQUEST_FAILED}: endpoint is temporarily unavailable. Check your quotas and/or contact our support.")
+                        format!("{REQUEST_FAILED}: endpoint is temporary unavailable. check your quotas and/or contact our support")
                    }
                    _ => REQUEST_FAILED.to_owned(),
                },
@@ -447,16 +447,16 @@ impl ApiCaches {
 }

 /// Various caches for [`console`](super).
-pub struct ApiLocks<K> {
+pub struct ApiLocks {
    name: &'static str,
-    node_locks: DashMap<K, Arc<Semaphore>>,
+    node_locks: DashMap<EndpointCacheKey, Arc<Semaphore>>,
    permits: usize,
    timeout: Duration,
    epoch: std::time::Duration,
    metrics: &'static ApiLockMetrics,
 }

-impl<K: Hash + Eq + Clone> ApiLocks<K> {
+impl ApiLocks {
    pub fn new(
        name: &'static str,
        permits: usize,
@@ -475,7 +475,10 @@ impl<K: Hash + Eq + Clone> ApiLocks<K> {
        })
    }

-    pub async fn get_permit(&self, key: &K) -> Result<WakeComputePermit, errors::WakeComputeError> {
+    pub async fn get_wake_compute_permit(
+        &self,
+        key: &EndpointCacheKey,
+    ) -> Result<WakeComputePermit, errors::WakeComputeError> {
        if self.permits == 0 {
            return Ok(WakeComputePermit { permit: None });
        }
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -13,7 +13,7 @@ use crate::{
    http,
    metrics::{CacheOutcome, Metrics},
    rate_limiter::EndpointRateLimiter,
-    scram, EndpointCacheKey, Normalize,
+    scram, Normalize,
 };
 use crate::{cache::Cached, context::RequestMonitoring};
 use futures::TryFutureExt;
@@ -25,7 +25,7 @@ use tracing::{error, info, info_span, warn, Instrument};
 pub struct Api {
    endpoint: http::Endpoint,
    pub caches: &'static ApiCaches,
-    pub locks: &'static ApiLocks<EndpointCacheKey>,
+    pub locks: &'static ApiLocks,
    pub endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    jwt: String,
 }
@@ -35,7 +35,7 @@ impl Api {
    pub fn new(
        endpoint: http::Endpoint,
        caches: &'static ApiCaches,
-        locks: &'static ApiLocks<EndpointCacheKey>,
+        locks: &'static ApiLocks,
        endpoint_rate_limiter: Arc<EndpointRateLimiter>,
    ) -> Self {
        let jwt: String = match std::env::var("NEON_PROXY_TO_CONTROLPLANE_TOKEN") {
@@ -289,7 +289,7 @@ impl super::Api for Api {
            return Err(WakeComputeError::TooManyConnections);
        }

-        let permit = self.locks.get_permit(&key).await?;
+        let permit = self.locks.get_wake_compute_permit(&key).await?;

        // after getting back a permit - it's possible the cache was filled
        // double check
--- a/proxy/src/http.rs
+++ b/proxy/src/http.rs
@@ -4,7 +4,7 @@

 pub mod health_server;

-use std::{str::FromStr, sync::Arc, time::Duration};
+use std::{sync::Arc, time::Duration};

 use futures::FutureExt;
 pub use reqwest::{Request, Response, StatusCode};
@@ -103,12 +103,12 @@ impl Endpoint {
    }
 }

-use hyper_util::client::legacy::connect::dns::{
-    GaiResolver as HyperGaiResolver, Name as HyperName,
-};
-use reqwest::dns::{Addrs, Name, Resolve, Resolving};
 /// https://docs.rs/reqwest/0.11.18/src/reqwest/dns/gai.rs.html
-use tower_service::Service;
+use hyper::{
+    client::connect::dns::{GaiResolver as HyperGaiResolver, Name},
+    service::Service,
+};
+use reqwest::dns::{Addrs, Resolve, Resolving};
 #[derive(Debug)]
 pub struct GaiResolver(HyperGaiResolver);

@@ -121,12 +121,11 @@ impl Default for GaiResolver {
 impl Resolve for GaiResolver {
    fn resolve(&self, name: Name) -> Resolving {
        let this = &mut self.0.clone();
-        let hyper_name = HyperName::from_str(name.as_str()).expect("name should be valid");
        let start = Instant::now();
        Box::pin(
-            Service::<HyperName>::call(this, hyper_name).map(move |result| {
+            Service::<Name>::call(this, name.clone()).map(move |result| {
                let resolve_duration = start.elapsed();
-                trace!(duration = ?resolve_duration, addr = %name.as_str(), "resolve host complete");
+                trace!(duration = ?resolve_duration, addr = %name, "resolve host complete");
                result
                    .map(|addrs| -> Addrs { Box::new(addrs) })
                    .map_err(|err| -> Box<dyn std::error::Error + Send + Sync> { Box::new(err) })
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -159,9 +159,6 @@ smol_str_wrapper!(EndpointCacheKey);

 smol_str_wrapper!(DbName);

-// postgres hostname, will likely be a port:ip addr
-smol_str_wrapper!(Host);
-
 // Endpoints are a bit tricky. Rare they might be branches or projects.
 impl EndpointId {
    pub fn is_endpoint(&self) -> bool {
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Anastasia Lubennikova	5ccf32b756	fix vendor/revisions.json	2024-05-01 19:50:14 +01:00
Anastasia Lubennikova	120bd1972f	Bump vendor/postrges	2024-05-01 19:50:14 +01:00
Anastasia Lubennikova	434eea7d11	Add test_pg_waldump.py Simple test to ensure that pg_waldump works with neon WAL files	2024-05-01 19:50:14 +01:00