diff --git a/.dockerignore b/.dockerignore
index 960588b6f2..396fba3568 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,6 +14,7 @@
 !pgxn/
 !proxy/
 !safekeeper/
+!s3_scrubber/
 !storage_broker/
 !trace/
 !vendor/postgres-v14/
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 1ec2a65a89..144a96910e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -899,7 +899,7 @@ jobs:
       - name: Cleanup ECR folder
         run: rm -rf ~/.ecr
 
-  build-private-extensions:
+  trigger-custom-extensions-build:
     runs-on: [ self-hosted, gen3, small ]
     container:
       image: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/base:pinned
@@ -908,8 +908,7 @@ jobs:
     steps:
       - name: Set PR's status to pending and request a remote CI test
         run: |
-          COMMIT_SHA=${{ github.event.pull_request.head.sha }}
-          COMMIT_SHA=${COMMIT_SHA:-${{ github.sha }}}
+          COMMIT_SHA=${{ github.event.pull_request.head.sha || github.sha }}
           REMOTE_REPO="${{ github.repository_owner }}/build-custom-extensions"
 
           curl -f -X POST \
@@ -939,10 +938,53 @@ jobs:
               }
             }"
 
+  wait-for-extensions-build:
+    runs-on: ubuntu-latest
+    needs: [ trigger-custom-extensions-build ]
+
+    steps:
+      - name: Wait for extension build to finish
+        env:
+          GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }}
+        run: |
+          TIMEOUT=600 # 10 minutes, currently it takes ~2-3 minutes
+          INTERVAL=15 # try each N seconds
+
+          last_status="" # a variable to carry the last status of the "build-and-upload-extensions" context
+
+          for ((i=0; i <= $TIMEOUT; i+=$INTERVAL)); do
+            sleep $INTERVAL
+
+            # Get statuses for the latest commit in the PR / branch
+            gh api \
+              -H "Accept: application/vnd.github+json" \
+              -H "X-GitHub-Api-Version: 2022-11-28" \
+              "/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha || github.sha }}" > statuses.json
+
+            # Get the latest status for the "build-and-upload-extensions" context
+            last_status=$(jq --raw-output '[.[] | select(.context == "build-and-upload-extensions")] | sort_by(.created_at)[-1].state' statuses.json)
+            if [ "${last_status}" = "pending" ]; then
+              # Extension build is still in progress.
+              continue
+            elif [ "${last_status}" = "success" ]; then
+              # Extension build is successful.
+              exit 0
+            else
+              # Status is neither "pending" nor "success", exit the loop and fail the job.
+              break
+            fi
+          done
+
+          # Extension build failed, print `statuses.json` for debugging and fail the job.
+          jq '.' statuses.json
+
+          echo >&2 "Status of extension build is '${last_status}' != 'success'"
+          exit 1
+
   deploy:
     runs-on: [ self-hosted, gen3, small ]
     container: 369495373322.dkr.ecr.eu-central-1.amazonaws.com/ansible:latest
-    needs: [ promote-images, tag, regress-tests ]
+    needs: [ promote-images, tag, regress-tests, wait-for-extensions-build ]
     if: ( github.ref_name == 'main' || github.ref_name == 'release' ) && github.event_name != 'workflow_dispatch'
     steps:
       - name: Fix git ownership
diff --git a/Cargo.lock b/Cargo.lock
index 867008808b..98cf23c620 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -213,6 +213,17 @@ dependencies = [
  "critical-section",
 ]
 
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi 0.1.19",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "autocfg"
 version = "1.1.0"
@@ -227,6 +238,7 @@ checksum = "de3d533e0263bf453cc80af4c8bcc4d64e2aca293bd16f81633a36f1bf4a97cb"
 dependencies = [
  "aws-credential-types",
  "aws-http",
+ "aws-sdk-sso",
  "aws-sdk-sts",
  "aws-smithy-async",
  "aws-smithy-client",
@@ -237,12 +249,15 @@ dependencies = [
  "aws-types",
  "bytes",
  "fastrand 2.0.0",
+ "hex",
  "http",
  "hyper",
+ "ring",
  "time",
  "tokio",
  "tower",
  "tracing",
+ "zeroize",
 ]
 
 [[package]]
@@ -332,6 +347,30 @@ dependencies = [
  "url",
 ]
 
+[[package]]
+name = "aws-sdk-sso"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f41bf2c28d32dbb9894a8fcfcb148265d034d3f4a170552a47553a09de890895"
+dependencies = [
+ "aws-credential-types",
+ "aws-http",
+ "aws-runtime",
+ "aws-smithy-async",
+ "aws-smithy-client",
+ "aws-smithy-http",
+ "aws-smithy-json",
+ "aws-smithy-runtime",
+ "aws-smithy-runtime-api",
+ "aws-smithy-types",
+ "aws-types",
+ "bytes",
+ "http",
+ "regex",
+ "tokio-stream",
+ "tracing",
+]
+
 [[package]]
 name = "aws-sdk-sts"
 version = "0.29.0"
@@ -1747,6 +1786,15 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "hermit-abi"
 version = "0.2.6"
@@ -3685,6 +3733,39 @@ version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041"
 
+[[package]]
+name = "s3_scrubber"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "atty",
+ "aws-config",
+ "aws-sdk-s3",
+ "aws-smithy-http",
+ "aws-types",
+ "bincode",
+ "bytes",
+ "chrono",
+ "clap",
+ "crc32c",
+ "either",
+ "hex",
+ "pageserver",
+ "rand",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "serde_with",
+ "thiserror",
+ "tokio",
+ "tokio-rustls",
+ "tracing",
+ "tracing-appender",
+ "tracing-subscriber",
+ "utils",
+ "workspace_hack",
+]
+
 [[package]]
 name = "safekeeper"
 version = "0.1.0"
@@ -4768,6 +4849,17 @@ dependencies = [
  "tracing-core",
 ]
 
+[[package]]
+name = "tracing-appender"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09d48f71a791638519505cefafe162606f706c25592e4bde4d97600c0195312e"
+dependencies = [
+ "crossbeam-channel",
+ "time",
+ "tracing-subscriber",
+]
+
 [[package]]
 name = "tracing-attributes"
 version = "0.1.24"
@@ -5506,6 +5598,10 @@ name = "workspace_hack"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "aws-config",
+ "aws-runtime",
+ "aws-sigv4",
+ "aws-smithy-http",
  "axum",
  "base64 0.21.1",
  "bytes",
@@ -5514,7 +5610,6 @@ dependencies = [
  "clap",
  "clap_builder",
  "crossbeam-utils",
- "digest",
  "either",
  "fail",
  "futures",
@@ -5523,6 +5618,7 @@ dependencies = [
  "futures-executor",
  "futures-sink",
  "futures-util",
+ "hex",
  "hyper",
  "itertools",
  "libc",
@@ -5546,6 +5642,7 @@ dependencies = [
  "socket2 0.4.9",
  "syn 1.0.109",
  "syn 2.0.28",
+ "time",
  "tokio",
  "tokio-rustls",
  "tokio-util",
@@ -5555,6 +5652,7 @@ dependencies = [
  "tracing",
  "tracing-core",
  "url",
+ "uuid",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index d545be266f..9e2a32ba52 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ members = [
     "proxy",
     "safekeeper",
     "storage_broker",
+    "s3_scrubber",
     "workspace_hack",
     "trace",
     "libs/compute_api",
diff --git a/compute_tools/src/http/api.rs b/compute_tools/src/http/api.rs
index a571628770..8851be1ec1 100644
--- a/compute_tools/src/http/api.rs
+++ b/compute_tools/src/http/api.rs
@@ -1,4 +1,6 @@
 use std::convert::Infallible;
+use std::net::IpAddr;
+use std::net::Ipv6Addr;
 use std::net::SocketAddr;
 use std::sync::Arc;
 use std::thread;
@@ -298,7 +300,9 @@ fn render_json_error(e: &str, status: StatusCode) -> Response<Body> {
 // Main Hyper HTTP server function that runs it and blocks waiting on it forever.
 #[tokio::main]
 async fn serve(port: u16, state: Arc<ComputeNode>) {
-    let addr = SocketAddr::from(([0, 0, 0, 0], port));
+    // this usually binds to both IPv4 and IPv6 on linux
+    // see e.g. https://github.com/rust-lang/rust/pull/34440
+    let addr = SocketAddr::new(IpAddr::from(Ipv6Addr::UNSPECIFIED), port);
 
     let make_service = make_service_fn(move |_conn| {
         let state = state.clone();
diff --git a/compute_tools/src/params.rs b/compute_tools/src/params.rs
index 0ce01ff478..4ccb403ca6 100644
--- a/compute_tools/src/params.rs
+++ b/compute_tools/src/params.rs
@@ -6,4 +6,4 @@ pub const DEFAULT_LOG_LEVEL: &str = "info";
 //   https://www.postgresql.org/docs/15/auth-password.html
 //
 // So it's safe to set md5 here, as `control-plane` anyway uses SCRAM for all roles.
-pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\t0.0.0.0/0\t\tmd5";
+pub const PG_HBA_ALL_MD5: &str = "host\tall\t\tall\t\tall\t\tmd5";
diff --git a/docs/rfcs/027-crash-consistent-layer-map-through-index-part.md b/docs/rfcs/027-crash-consistent-layer-map-through-index-part.md
new file mode 100644
index 0000000000..2c6b46eabe
--- /dev/null
+++ b/docs/rfcs/027-crash-consistent-layer-map-through-index-part.md
@@ -0,0 +1,281 @@
+
+# Crash-Consistent Layer Map Updates By Leveraging `index_part.json`
+
+* Created on: Aug 23, 2023
+* Author: Christian Schwarz
+
+## Summary
+
+This RFC describes a simple scheme to make layer map updates crash consistent by leveraging the `index_part.json` in remote storage.
+Without such a mechanism, crashes can induce certain edge cases in which broadly held assumptions about system invariants don't hold.
+
+## Motivation
+
+### Background
+
+We can currently easily make complex, atomic updates to the layer map by means of an RwLock.
+If we crash or restart pageserver, we reconstruct the layer map from:
+1. local timeline directory contents
+2. remote `index_part.json` contents.
+
+The function that is responsible for this is called `Timeline::load_layer_map()`.
+The reconciliation process's behavior is the following:
+* local-only files will become part of the layer map as local-only layers and rescheduled for upload
+* For a file name that, by its name, is present locally and in the remote `index_part.json`, but where the local file has a different size (future: checksum) than the remote file, we will delete the local file and leave the remote file as a `RemoteLayer` in the layer map.
+
+### The Problem
+
+There are are cases where we need to make an atomic update to the layer map that involves **more than one layer**.
+The best example is compaction, where we need to insert the L1 layers generated from the L0 layers, and remove the L0 layers.
+As stated above, making the update to the layer map in atomic way is trivial.
+But, there is no system call API to make an atomic update to a directory that involves more than one file rename and deletion.
+Currently, we issue the system calls one by one and hope we don't crash.
+
+What happens if we crash and restart in the middle of that system call sequence?
+We will reconstruct the layer map according to the reconciliation process, taking as input whatever transitory state the timeline directory ended up in.
+
+We cannot roll back or complete the timeline directory update during which we crashed, because we keep no record of the changes we plan to make.
+
+### Problem's Implications For Compaction
+
+The implications of the above are primarily problematic for compaction.
+Specifically, the part of it that compacts L0 layers into L1 layers.
+
+Remember that compaction takes a set of L0 layers and reshuffles the delta records in them into L1 layer files.
+Once the L1 layer files are written to disk, it atomically removes the L0 layers from the layer map and adds the L1 layers to the layer map.
+It then deletes the L0 layers locally, and schedules an upload of the L1 layers and and updated index part.
+
+If we crash before deleting L0s, but after writing out L1s, the next compaction after restart will re-digest the L0s and produce new L1s.
+This means the compaction after restart will **overwrite** the previously written L1s.
+Currently we also schedule an S3 upload of the overwritten L1.
+
+If the compaction algorithm doesn't change between the two compaction runs, is deterministic, and uses the same set of L0s as input, then the second run will produce identical L1s and the overwrites will go unnoticed.
+
+*However*:
+1. the file size of the overwritten L1s may not be identical, and
+2. the bit pattern of the overwritten L1s may not be identical, and,
+3. in the future, we may want to make the compaction code non-determinstic, influenced by past access patterns, or otherwise change it, resulting in L1 overwrites with a different set of delta records than before the overwrite
+
+The items above are a problem for the [split-brain protection RFC](https://github.com/neondatabase/neon/pull/4919) because it assumes that layer files in S3 are only ever deleted, but never replaced (overPUTted).
+
+For example, if an unresponsive node A becomes active again after control plane has relocated the tenant to a new node B, the node A may overwrite some L1s.
+But node B based its world view on the version of node A's `index_part.json` from _before_ the overwrite.
+That earlier `index_part.json`` contained the file size of the pre-overwrite L1.
+If the overwritten L1 has a different file size, node B will refuse to read data from the overwritten L1.
+Effectively, the data in the L1 has become inaccessible to node B.
+If node B already uploaded an index part itself, all subsequent attachments will use node B's index part, and run into the same probem.
+
+If we ever introduce checksums instead of checking just the file size, then a mismatching bit pattern (2) will cause similar problems.
+
+In case of (1) and (2), where we know that the logical content of the layers is still the same, we can recover by manually patching the `index_part.json` of the new node to the overwritten L1's file size / checksum.
+
+But if (3) ever happens, the logical content may be different, and, we could have truly lost data.
+
+Given the above considerations, we should avoid making correctness of split-brain protection dependent on overwrites preserving _logical_ layer file contents.
+**It is a much cleaner separation of concerns to require that layer files are truly immutable in S3, i.e., PUT once and then only DELETEd, never overwritten (overPUTted).**
+
+## Design
+
+Instead of reconciling a layer map from local timeline directory contents and remote index part, this RFC proposes to view the remote index part as authoritative during timeline load.
+Local layer files will be recognized if they match what's listed in remote index part, and removed otherwise.
+
+During **timeline load**, the only thing that matters is the remote index part content.
+Essentially, timeline load becomes much like attach, except we don't need to prefix-list the remote timelines.
+The local timeline dir's `metadata` file does not matter.
+The layer files in the local timeline dir are seen as a nice-to-have cache of layer files that are in the remote index part.
+Any layer files in the local timeline dir that aren't in the remote index part are removed during startup.
+The `Timeline::load_layer_map()` no longer "merges" local timeline dir contents with the remote index part.
+Instead, it treats the remote index part as the authoritative layer map.
+If the local timeline dir contains a layer that is in the remote index part, that's nice, and we'll re-use it if file size (and in the future, check sum) match what's stated in the index part.
+If it doesn't match, we remove the file from the local timeline dir.
+
+After load, **at runtime**, nothing changes compared to what we did before this RFC.
+The procedure for single- and multi-object changes is reproduced here for reference:
+* For any new layers that the change adds:
+  * Write them to a temporary location.
+  * While holding layer map lock:
+    * Move them to the final location.
+    * Insert into layer map.
+* Make the S3 changes.
+  We won't reproduce the remote timeline client method calls here because these are subject to change.
+  Instead we reproduce the sequence of s3 changes that must result for a given single-/multi-object change:
+    * PUT layer files inserted by the change.
+    * PUT an index part that has insertions and deletions of the change.
+    * DELETE the layer files that are deleted by the change.
+
+Note that it is safe for the DELETE to be deferred arbitrarily.
+* If it never happens, we leak the object, but, that's not a correctness concern.
+* As of #4938, we don't schedule the remote timeline client operation for deletion immediately, but, only when we drop the `LayerInner`.
+* With the [split-brain protection RFC](https://github.com/neondatabase/neon/pull/4919), the deletions will be written to deletion queue for processing when it's safe to do so (see the RFC for details).
+
+## How This Solves The Problem
+
+If we crash before we've finished the S3 changes, then timeline load will reset layer map to the state that's in the S3 index part.
+The S3 change sequence above is obviously crash-consistent.
+If we crash before the index part PUT, then we leak the inserted layer files to S3.
+If we crash after the index part PUT, we leak the to-be-DELETEd layer files to S3.
+Leaking is fine, it's a pre-existing condition and not addressed in this RFC.
+
+Multi-object changes that previously created and removed files in timeline dir are now atomic because the layer map updates are atomic and crash consistent:
+* atomic layer map update at runtime, currently by using an RwLock in write mode
+* atomic `index_part.json` update in S3, as per guarantee that S3 PUT is atomic
+* local timeline dir state:
+  * irrelevant for layer map content => irrelevant for atomic updates / crash consistency
+  * if we crash after index part PUT, local layer files will be used, so, no on-demand downloads neede for them
+  * if we crash before index part PUT, local layer files will be deleted
+
+## Trade-Offs
+
+### Fundamental
+
+If we crash before finishing the index part PUT, we lose all the work that hasn't reached the S3 `index_part.json`:
+* wal ingest: we lose not-yet-uploaded L0s; load on the **safekeepers** + work for pageserver
+* compaction: we lose the entire compaction iteration work; need to re-do it again
+* gc: no change to what we have today
+
+If the work is still deemed necessary after restart, the restarted restarted pageserver will re-do this work.
+The amount of work to be re-do is capped to the lag of S3 changes to the local changes.
+Assuming upload queue allows for unlimited queue depth (that's what it does today), this means:
+* on-demand downloads that were needed to do the work: are likely still present, not lost
+* wal ingest: currently unbounded
+* L0 => L1 compaction: CPU time proportional to `O(sum(L0 size))` and upload work proportional to `O()`
+  * Compaction threshold is 10 L0s and each L0 can be up to 256M in size. Target size for L1 is 128M.
+  * In practive, most L0s are tiny due to 10minute `DEFAULT_CHECKPOINT_TIMEOUT`.
+* image layer generation: CPU time `O(sum(input data))` + upload work `O(sum(new image layer size))`
+  * I have no intuition how expensive / long-running it is in reality.
+* gc: `update_gc_info`` work (not substantial, AFAIK)
+
+To limit the amount of lost upload work, and ingest work, we can limit the upload queue depth (see suggestions in the next sub-section).
+However, to limit the amount of lost CPU work, we would need a way to make make the compaction/image-layer-generation algorithms interruptible & resumable.
+We aren't there yet, the need for it is tracked by ([#4580](https://github.com/neondatabase/neon/issues/4580)).
+However, this RFC is not constraining the design space either.
+
+### Practical
+
+#### Pageserver Restarts
+
+Pageserver crashes are very rare ; it would likely be acceptable to re-do the lost work in that case.
+However, regular pageserver restart happen frequently, e.g., during weekly deploys.
+
+In general, pageserver restart faces the problem of tenants that "take too long" to shut down.
+They are a problem because other tenants that shut down quickly are unavailble while we wait for the slow tenants to shut down.
+We currently allot 10 seconds for graceful shutdown until we SIGKILL the pageserver process (as per `pageserver.service` unit file).
+A longer budget would expose tenants that are done early to a longer downtime.
+A short budget would risk throwing away more work that'd have to be re-done after restart.
+
+In the context of this RFC, killing the process would mean losing the work that hasn't made it to S3.
+We can mitigate this problem as follows:
+0. initially, by accepting that we need to do the work again
+1. short-term, introducing measures to cap the amount of in-flight work:
+
+   - cap upload queue length, use backpressure to slow down compaction
+   - disabling compaction/image-layer-generation X minutes before `systemctl restart pageserver`
+   - introducing a read-only shutdown state for tenants that are fast to shut down;
+     that state would be equivalent to the state of a tenant in hot standby / readonly mode.
+
+2. mid term, by not restarting pageserver in place, but using [*seamless tenant migration*](https://github.com/neondatabase/neon/pull/5029) to drain a pageserver's tenants before we restart it.
+
+#### `disk_consistent_lsn` can go backwards
+
+`disk_consistent_lsn` can go backwards across restarts if we crash before we've finished the index part PUT.
+Nobody should care about it, because the only thing that matters is `remote_consistent_lsn`.
+Compute certainly doesn't care about `disk_consistent_lsn`.
+
+
+## Side-Effects Of This Design
+
+* local `metadata` is basically reduced to a cache of which timelines exist for this tenant; i.e., we can avoid a `ListObjects` requests for a tenant's timelines during tenant load.
+
+## Limitations
+
+Multi-object changes that span multiple timelines aren't covered by this RFC.
+That's fine because we currently don't need them, as evidenced by the absence
+of a Pageserver operation that holds multiple timelines' layer map lock at a time.
+
+## Impacted components
+
+Primarily pageservers.
+
+Safekeepers will experience more load when we need to re-ingest WAL because we've thrown away work.
+No changes to safekeepers are needed.
+
+## Alternatives considered
+
+### Alternative 1: WAL
+
+We could have a local WAL for timeline dir changes, as proposed here https://github.com/neondatabase/neon/issues/4418 and partially implemented here https://github.com/neondatabase/neon/pull/4422 .
+The WAL would be used to
+1. make multi-object changes atomic
+2. replace `reconcile_with_remote()` reconciliation: scheduling of layer upload would be part of WAL replay.
+
+The WAL is appealing in a local-first world, but, it's much more complex than the design described above:
+* New on-disk state to get right.
+* Forward- and backward-compatibility development costs in the future.
+
+### Alternative 2: Flow Everything Through `index_part.json`
+
+We could have gone to the other extreme and **only** update the layer map whenever we've PUT `index_part.json`.
+I.e., layer map would always be the last-persisted S3 state.
+That's axiomatically beautiful, not least because it fully separates the layer file production and consumption path (=> [layer file spreading proposal](https://www.notion.so/neondatabase/One-Pager-Layer-File-Spreading-Christian-eb6b64182a214e11b3fceceee688d843?pvs=4)).
+And it might make hot standbys / read-only pageservers less of a special case in the future.
+
+But, I have some uncertainties with regard to WAL ingestion, because it needs to be able to do some reads for the logical size feedback to safekeepers.
+
+And it's silly that we wouldn't be able to use the results of compaction or image layer generation before we're done with the upload.
+
+Lastly, a temporarily clogged-up upload queue (e.g. S3 is down) shouldn't immediately render ingestion unavailable.
+
+### Alternative 3: Sequence Numbers For Layers
+
+Instead of what's proposed in this RFC, we could use unique numbers to identify layer files:
+
+```
+# before
+tenants/$tenant/timelines/$timeline/$key_and_lsn_range
+# after
+tenants/$tenant/timelines/$timeline/$layer_file_id-$key_and_lsn_range
+```
+
+To guarantee uniqueness, the unqiue number is a sequence number, stored in `index_part.json`.
+
+This alternative does not solve atomic layer map updates.
+In our crash-during-compaction scenario above, the compaction run after the crash will not overwrite the L1s, but write/PUT new files with new sequence numbers.
+In fact, this alternative makes it worse because the data is now duplicated in the not-overwritten and overwritten L1 layer files.
+We'd need to write a deduplication pass that checks if perfectly overlapping layers have identical contents.
+
+However, this alternative is appealing because it systematically prevents overwrites at a lower level than this RFC.
+
+So, this alternative is sufficient for the needs of the split-brain safety RFC (immutable layer files locally and in S3).
+But it doesn't solve the problems with crash-during-compaction outlined earlier in this RFC, and in fact, makes it much more accute.
+The proposed design in this RFC addresses both.
+
+So, if this alternative sounds appealing, we should implement the proposal in this RFC first, then implement this alternative on top.
+That way, we avoid a phase where the crash-during-compaction problem is accute.
+
+## Related issues
+
+- https://github.com/neondatabase/neon/issues/4749
+- https://github.com/neondatabase/neon/issues/4418
+  - https://github.com/neondatabase/neon/pull/4422
+- https://github.com/neondatabase/neon/issues/5077
+- https://github.com/neondatabase/neon/issues/4088
+  - (re)resolutions:
+    - https://github.com/neondatabase/neon/pull/4696
+    - https://github.com/neondatabase/neon/pull/4094
+      - https://neondb.slack.com/archives/C033QLM5P7D/p1682519017949719
+
+Note that the test case introduced in https://github.com/neondatabase/neon/pull/4696/files#diff-13114949d1deb49ae394405d4c49558adad91150ba8a34004133653a8a5aeb76 will produce L1s with the same logical content, but, as outlined in the last paragraph of the _Problem Statement_ section above, we don't want to make that  assumption in order to fix the problem.
+
+
+## Implementation Plan
+
+1. Remove support for `remote_storage=None`, because we now rely on the existence of an index part.
+
+    - The nasty part here is to fix all the tests that fiddle with the local timeline directory.
+      Possibly they are just irrelevant with this change, but, each case will require inspection.
+
+2. Implement the design above.
+
+    - Initially, ship without the mitigations for restart and accept we will do some work twice.
+    - Measure the impact and implement one of the mitigations.
+
diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs
index 2af54902f7..608b3cecd6 100644
--- a/pageserver/ctl/src/layers.rs
+++ b/pageserver/ctl/src/layers.rs
@@ -68,7 +68,7 @@ async fn read_delta_file(path: impl AsRef<Path>) -> Result<()> {
             },
         )
         .await?;
-    let cursor = BlockCursor::new_fileblockreader_virtual(&file);
+    let cursor = BlockCursor::new_fileblockreader(&file);
     for (k, v) in all {
         let value = cursor.read_blob(v.pos()).await?;
         println!("key:{} value_len:{}", k, value.len());
diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs
index 72a66d51a6..2a87ee0381 100644
--- a/pageserver/src/page_service.rs
+++ b/pageserver/src/page_service.rs
@@ -469,7 +469,9 @@ impl PageServerHandler {
         // Create empty timeline
         info!("creating new timeline");
         let tenant = get_active_tenant_with_timeout(tenant_id, &ctx).await?;
-        let timeline = tenant.create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)?;
+        let timeline = tenant
+            .create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx)
+            .await?;
 
         // TODO mark timeline as not ready until it reaches end_lsn.
         // We might have some wal to import as well, and we should prevent compute
diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs
index 2168db57de..3256a00182 100644
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -32,9 +32,7 @@ use std::fmt::Debug;
 use std::fmt::Display;
 use std::fs;
 use std::fs::File;
-use std::fs::OpenOptions;
 use std::io;
-use std::io::Write;
 use std::ops::Bound::Included;
 use std::path::Path;
 use std::path::PathBuf;
@@ -68,7 +66,7 @@ use crate::task_mgr;
 use crate::task_mgr::TaskKind;
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::metadata::load_metadata;
-use crate::tenant::remote_timeline_client::index::IndexPart;
+pub use crate::tenant::remote_timeline_client::index::IndexPart;
 use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart;
 use crate::tenant::storage_layer::DeltaLayer;
 use crate::tenant::storage_layer::ImageLayer;
@@ -115,7 +113,6 @@ pub mod block_io;
 pub mod disk_btree;
 pub(crate) mod ephemeral_file;
 pub mod layer_map;
-pub mod manifest;
 mod span;
 
 pub mod metadata;
@@ -195,7 +192,7 @@ pub struct Tenant {
     walredo_mgr: Arc<dyn WalRedoManager + Send + Sync>,
 
     // provides access to timeline data sitting in the remote storage
-    remote_storage: Option<GenericRemoteStorage>,
+    pub(crate) remote_storage: Option<GenericRemoteStorage>,
 
     /// Cached logical sizes updated updated on each [`Tenant::gather_size_inputs`].
     cached_logical_sizes: tokio::sync::Mutex<HashMap<(TimelineId, Lsn), u64>>,
@@ -407,7 +404,6 @@ impl Tenant {
         remote_startup_data: Option<RemoteStartupData>,
         local_metadata: Option<TimelineMetadata>,
         ancestor: Option<Arc<Timeline>>,
-        first_save: bool,
         init_order: Option<&InitializationOrder>,
         _ctx: &RequestContext,
     ) -> anyhow::Result<()> {
@@ -441,14 +437,9 @@ impl Tenant {
 
         // Save the metadata file to local disk.
         if !picked_local {
-            save_metadata(
-                self.conf,
-                &tenant_id,
-                &timeline_id,
-                up_to_date_metadata,
-                first_save,
-            )
-            .context("save_metadata")?;
+            save_metadata(self.conf, &tenant_id, &timeline_id, up_to_date_metadata)
+                .await
+                .context("save_metadata")?;
         }
 
         let index_part = remote_startup_data.as_ref().map(|x| &x.index_part);
@@ -833,7 +824,6 @@ impl Tenant {
             }),
             local_metadata,
             ancestor,
-            true,
             None,
             ctx,
         )
@@ -1386,7 +1376,6 @@ impl Tenant {
             remote_startup_data,
             Some(local_metadata),
             ancestor,
-            false,
             init_order,
             ctx,
         )
@@ -1450,7 +1439,7 @@ impl Tenant {
     /// For tests, use `DatadirModification::init_empty_test_timeline` + `commit` to setup the
     /// minimum amount of keys required to get a writable timeline.
     /// (Without it, `put` might fail due to `repartition` failing.)
-    pub fn create_empty_timeline(
+    pub async fn create_empty_timeline(
         &self,
         new_timeline_id: TimelineId,
         initdb_lsn: Lsn,
@@ -1462,10 +1451,10 @@ impl Tenant {
             "Cannot create empty timelines on inactive tenant"
         );
 
-        let timelines = self.timelines.lock().unwrap();
-        let timeline_uninit_mark = self.create_timeline_uninit_mark(new_timeline_id, &timelines)?;
-        drop(timelines);
-
+        let timeline_uninit_mark = {
+            let timelines = self.timelines.lock().unwrap();
+            self.create_timeline_uninit_mark(new_timeline_id, &timelines)?
+        };
         let new_metadata = TimelineMetadata::new(
             // Initialize disk_consistent LSN to 0, The caller must import some data to
             // make it valid, before calling finish_creation()
@@ -1484,6 +1473,7 @@ impl Tenant {
             initdb_lsn,
             None,
         )
+        .await
     }
 
     /// Helper for unit tests to create an empty timeline.
@@ -1499,7 +1489,9 @@ impl Tenant {
         pg_version: u32,
         ctx: &RequestContext,
     ) -> anyhow::Result<Arc<Timeline>> {
-        let uninit_tl = self.create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)?;
+        let uninit_tl = self
+            .create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx)
+            .await?;
         let tline = uninit_tl.raw_timeline().expect("we just created it");
         assert_eq!(tline.get_last_record_lsn(), Lsn(0));
 
@@ -1517,6 +1509,15 @@ impl Tenant {
         tline.maybe_spawn_flush_loop();
         tline.freeze_and_flush().await.context("freeze_and_flush")?;
 
+        // Make sure the freeze_and_flush reaches remote storage.
+        tline
+            .remote_client
+            .as_ref()
+            .unwrap()
+            .wait_completion()
+            .await
+            .unwrap();
+
         let tl = uninit_tl.finish_creation()?;
         // The non-test code would call tl.activate() here.
         tl.set_state(TimelineState::Active);
@@ -1693,65 +1694,6 @@ impl Tenant {
         Ok(())
     }
 
-    /// Flush all in-memory data to disk and remote storage, if any.
-    ///
-    /// Used at graceful shutdown.
-    async fn freeze_and_flush_on_shutdown(&self) {
-        let mut js = tokio::task::JoinSet::new();
-
-        // execute on each timeline on the JoinSet, join after.
-        let per_timeline = |timeline_id: TimelineId, timeline: Arc<Timeline>| {
-            async move {
-                debug_assert_current_span_has_tenant_and_timeline_id();
-
-                match timeline.freeze_and_flush().await {
-                    Ok(()) => {}
-                    Err(e) => {
-                        warn!("failed to freeze and flush: {e:#}");
-                        return;
-                    }
-                }
-
-                let res = if let Some(client) = timeline.remote_client.as_ref() {
-                    // if we did not wait for completion here, it might be our shutdown process
-                    // didn't wait for remote uploads to complete at all, as new tasks can forever
-                    // be spawned.
-                    //
-                    // what is problematic is the shutting down of RemoteTimelineClient, because
-                    // obviously it does not make sense to stop while we wait for it, but what
-                    // about corner cases like s3 suddenly hanging up?
-                    client.wait_completion().await
-                } else {
-                    Ok(())
-                };
-
-                if let Err(e) = res {
-                    warn!("failed to await for frozen and flushed uploads: {e:#}");
-                }
-            }
-            .instrument(tracing::info_span!("freeze_and_flush_on_shutdown", %timeline_id))
-        };
-
-        {
-            let timelines = self.timelines.lock().unwrap();
-            timelines
-                .iter()
-                .map(|(id, tl)| (*id, Arc::clone(tl)))
-                .for_each(|(timeline_id, timeline)| {
-                    js.spawn(per_timeline(timeline_id, timeline));
-                })
-        };
-
-        while let Some(res) = js.join_next().await {
-            match res {
-                Ok(()) => {}
-                Err(je) if je.is_cancelled() => unreachable!("no cancelling used"),
-                Err(je) if je.is_panic() => { /* logged already */ }
-                Err(je) => warn!("unexpected JoinError: {je:?}"),
-            }
-        }
-    }
-
     pub fn current_state(&self) -> TenantState {
         self.state.borrow().clone()
     }
@@ -1882,19 +1824,22 @@ impl Tenant {
             }
         };
 
-        if freeze_and_flush {
-            // walreceiver has already began to shutdown with TenantState::Stopping, but we need to
-            // await for them to stop.
-            task_mgr::shutdown_tasks(
-                Some(TaskKind::WalReceiverManager),
-                Some(self.tenant_id),
-                None,
-            )
-            .await;
-
-            // this will wait for uploads to complete; in the past, it was done outside tenant
-            // shutdown in pageserver::shutdown_pageserver.
-            self.freeze_and_flush_on_shutdown().await;
+        let mut js = tokio::task::JoinSet::new();
+        {
+            let timelines = self.timelines.lock().unwrap();
+            timelines.values().for_each(|timeline| {
+                let timeline = Arc::clone(timeline);
+                let span = Span::current();
+                js.spawn(async move { timeline.shutdown(freeze_and_flush).instrument(span).await });
+            })
+        };
+        while let Some(res) = js.join_next().await {
+            match res {
+                Ok(()) => {}
+                Err(je) if je.is_cancelled() => unreachable!("no cancelling used"),
+                Err(je) if je.is_panic() => { /* logged already */ }
+                Err(je) => warn!("unexpected JoinError: {je:?}"),
+            }
         }
 
         // shutdown all tenant and timeline tasks: gc, compaction, page service
@@ -2421,72 +2366,37 @@ impl Tenant {
         Ok(tenant_conf)
     }
 
-    pub(super) fn persist_tenant_config(
+    #[tracing::instrument(skip_all, fields(%tenant_id))]
+    pub(super) async fn persist_tenant_config(
         tenant_id: &TenantId,
         target_config_path: &Path,
         tenant_conf: TenantConfOpt,
-        creating_tenant: bool,
     ) -> anyhow::Result<()> {
-        let _enter = info_span!("saving tenantconf").entered();
-
         // imitate a try-block with a closure
-        let do_persist = |target_config_path: &Path| -> anyhow::Result<()> {
-            let target_config_parent = target_config_path.parent().with_context(|| {
-                format!(
-                    "Config path does not have a parent: {}",
-                    target_config_path.display()
-                )
-            })?;
+        info!("persisting tenantconf to {}", target_config_path.display());
 
-            info!("persisting tenantconf to {}", target_config_path.display());
-
-            let mut conf_content = r#"# This file contains a specific per-tenant's config.
+        let mut conf_content = r#"# This file contains a specific per-tenant's config.
 #  It is read in case of pageserver restart.
 
 [tenant_config]
 "#
-            .to_string();
+        .to_string();
 
-            // Convert the config to a toml file.
-            conf_content += &toml_edit::ser::to_string(&tenant_conf)?;
+        // Convert the config to a toml file.
+        conf_content += &toml_edit::ser::to_string(&tenant_conf)?;
 
-            let mut target_config_file = VirtualFile::open_with_options(
-                target_config_path,
-                OpenOptions::new()
-                    .truncate(true) // This needed for overwriting with small config files
-                    .write(true)
-                    .create_new(creating_tenant)
-                    // when creating a new tenant, first_save will be true and `.create(true)` will be
-                    // ignored (per rust std docs).
-                    //
-                    // later when updating the config of created tenant, or persisting config for the
-                    // first time for attached tenant, the `.create(true)` is used.
-                    .create(true),
-            )?;
+        let conf_content = conf_content.as_bytes();
 
-            target_config_file
-                .write(conf_content.as_bytes())
-                .context("write toml bytes into file")
-                .and_then(|_| target_config_file.sync_all().context("fsync config file"))
-                .context("write config file")?;
-
-            // fsync the parent directory to ensure the directory entry is durable.
-            // before this was done conditionally on creating_tenant, but these management actions are rare
-            // enough to just fsync it always.
-
-            crashsafe::fsync(target_config_parent)?;
-            // XXX we're not fsyncing the parent dir, need to do that in case `creating_tenant`
-            Ok(())
-        };
-
-        // this function is called from creating the tenant and updating the tenant config, which
-        // would otherwise share this context, so keep it here in one place.
-        do_persist(target_config_path).with_context(|| {
-            format!(
-                "write tenant {tenant_id} config to {}",
-                target_config_path.display()
-            )
-        })
+        let temp_path = path_with_suffix_extension(target_config_path, TEMP_FILE_SUFFIX);
+        VirtualFile::crashsafe_overwrite(target_config_path, &temp_path, conf_content)
+            .await
+            .with_context(|| {
+                format!(
+                    "write tenant {tenant_id} config to {}",
+                    target_config_path.display()
+                )
+            })?;
+        Ok(())
     }
 
     //
@@ -2797,13 +2707,15 @@ impl Tenant {
             src_timeline.pg_version,
         );
 
-        let uninitialized_timeline = self.prepare_new_timeline(
-            dst_id,
-            &metadata,
-            timeline_uninit_mark,
-            start_lsn + 1,
-            Some(Arc::clone(src_timeline)),
-        )?;
+        let uninitialized_timeline = self
+            .prepare_new_timeline(
+                dst_id,
+                &metadata,
+                timeline_uninit_mark,
+                start_lsn + 1,
+                Some(Arc::clone(src_timeline)),
+            )
+            .await?;
 
         let new_timeline = uninitialized_timeline.finish_creation()?;
 
@@ -2881,13 +2793,15 @@ impl Tenant {
             pgdata_lsn,
             pg_version,
         );
-        let raw_timeline = self.prepare_new_timeline(
-            timeline_id,
-            &new_metadata,
-            timeline_uninit_mark,
-            pgdata_lsn,
-            None,
-        )?;
+        let raw_timeline = self
+            .prepare_new_timeline(
+                timeline_id,
+                &new_metadata,
+                timeline_uninit_mark,
+                pgdata_lsn,
+                None,
+            )
+            .await?;
 
         let tenant_id = raw_timeline.owning_tenant.tenant_id;
         let unfinished_timeline = raw_timeline.raw_timeline()?;
@@ -2958,7 +2872,7 @@ impl Tenant {
     /// at 'disk_consistent_lsn'. After any initial data has been imported, call
     /// `finish_creation` to insert the Timeline into the timelines map and to remove the
     /// uninit mark file.
-    fn prepare_new_timeline(
+    async fn prepare_new_timeline(
         &self,
         new_timeline_id: TimelineId,
         new_metadata: &TimelineMetadata,
@@ -2986,8 +2900,9 @@ impl Tenant {
 
         timeline_struct.init_empty_layer_map(start_lsn);
 
-        if let Err(e) =
-            self.create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
+        if let Err(e) = self
+            .create_timeline_files(&uninit_mark.timeline_path, &new_timeline_id, new_metadata)
+            .await
         {
             error!("Failed to create initial files for timeline {tenant_id}/{new_timeline_id}, cleaning up: {e:?}");
             cleanup_timeline_directory(uninit_mark);
@@ -3003,7 +2918,7 @@ impl Tenant {
         ))
     }
 
-    fn create_timeline_files(
+    async fn create_timeline_files(
         &self,
         timeline_path: &Path,
         new_timeline_id: &TimelineId,
@@ -3015,14 +2930,9 @@ impl Tenant {
             anyhow::bail!("failpoint after-timeline-uninit-mark-creation");
         });
 
-        save_metadata(
-            self.conf,
-            &self.tenant_id,
-            new_timeline_id,
-            new_metadata,
-            true,
-        )
-        .context("Failed to create timeline metadata")?;
+        save_metadata(self.conf, &self.tenant_id, new_timeline_id, new_metadata)
+            .await
+            .context("Failed to create timeline metadata")?;
         Ok(())
     }
 
@@ -3169,7 +3079,7 @@ pub(crate) enum CreateTenantFilesMode {
     Attach,
 }
 
-pub(crate) fn create_tenant_files(
+pub(crate) async fn create_tenant_files(
     conf: &'static PageServerConf,
     tenant_conf: TenantConfOpt,
     tenant_id: &TenantId,
@@ -3205,7 +3115,8 @@ pub(crate) fn create_tenant_files(
         mode,
         &temporary_tenant_dir,
         &target_tenant_directory,
-    );
+    )
+    .await;
 
     if creation_result.is_err() {
         error!("Failed to create directory structure for tenant {tenant_id}, cleaning tmp data");
@@ -3223,7 +3134,7 @@ pub(crate) fn create_tenant_files(
     Ok(target_tenant_directory)
 }
 
-fn try_create_target_tenant_dir(
+async fn try_create_target_tenant_dir(
     conf: &'static PageServerConf,
     tenant_conf: TenantConfOpt,
     tenant_id: &TenantId,
@@ -3262,7 +3173,7 @@ fn try_create_target_tenant_dir(
     )
     .with_context(|| format!("resolve tenant {tenant_id} temporary config path"))?;
 
-    Tenant::persist_tenant_config(tenant_id, &temporary_tenant_config_path, tenant_conf, true)?;
+    Tenant::persist_tenant_config(tenant_id, &temporary_tenant_config_path, tenant_conf).await?;
 
     crashsafe::create_dir(&temporary_tenant_timelines_dir).with_context(|| {
         format!(
@@ -3467,6 +3378,8 @@ pub mod harness {
         pub tenant_conf: TenantConf,
         pub tenant_id: TenantId,
         pub generation: Generation,
+        remote_storage: GenericRemoteStorage,
+        pub remote_fs_dir: PathBuf,
     }
 
     static LOG_HANDLE: OnceCell<()> = OnceCell::new();
@@ -3504,29 +3417,39 @@ pub mod harness {
             fs::create_dir_all(conf.tenant_path(&tenant_id))?;
             fs::create_dir_all(conf.timelines_path(&tenant_id))?;
 
+            use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
+            let remote_fs_dir = conf.workdir.join("localfs");
+            std::fs::create_dir_all(&remote_fs_dir).unwrap();
+            let config = RemoteStorageConfig {
+                // TODO: why not remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
+                max_concurrent_syncs: std::num::NonZeroUsize::new(2_000_000).unwrap(),
+                // TODO: why not remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
+                max_sync_errors: std::num::NonZeroU32::new(3_000_000).unwrap(),
+                storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
+            };
+            let remote_storage = GenericRemoteStorage::from_config(&config).unwrap();
+
             Ok(Self {
                 conf,
                 tenant_conf,
                 tenant_id,
                 generation: Generation::new(0xdeadbeef),
+                remote_storage,
+                remote_fs_dir,
             })
         }
 
         pub async fn load(&self) -> (Arc<Tenant>, RequestContext) {
             let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error);
             (
-                self.try_load(&ctx, None)
+                self.try_load(&ctx)
                     .await
                     .expect("failed to load test tenant"),
                 ctx,
             )
         }
 
-        pub async fn try_load(
-            &self,
-            ctx: &RequestContext,
-            remote_storage: Option<remote_storage::GenericRemoteStorage>,
-        ) -> anyhow::Result<Arc<Tenant>> {
+        pub async fn try_load(&self, ctx: &RequestContext) -> anyhow::Result<Arc<Tenant>> {
             let walredo_mgr = Arc::new(TestRedoManager);
 
             let tenant = Arc::new(Tenant::new(
@@ -3536,7 +3459,7 @@ pub mod harness {
                 walredo_mgr,
                 self.tenant_id,
                 self.generation,
-                remote_storage,
+                Some(self.remote_storage.clone()),
             ));
             tenant
                 .load(None, ctx)
@@ -3649,7 +3572,10 @@ mod tests {
             .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
             .await?;
 
-        match tenant.create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) {
+        match tenant
+            .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
+            .await
+        {
             Ok(_) => panic!("duplicate timeline creation should fail"),
             Err(e) => assert_eq!(
                 e.to_string(),
@@ -4004,6 +3930,13 @@ mod tests {
                 .create_test_timeline(TIMELINE_ID, Lsn(0x7000), DEFAULT_PG_VERSION, &ctx)
                 .await?;
             make_some_layers(tline.as_ref(), Lsn(0x8000)).await?;
+            // so that all uploads finish & we can call harness.load() below again
+            tenant
+                .shutdown(Default::default(), true)
+                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+                .await
+                .ok()
+                .unwrap();
         }
 
         let (tenant, _ctx) = harness.load().await;
@@ -4037,6 +3970,14 @@ mod tests {
                 .expect("Should have a local timeline");
 
             make_some_layers(newtline.as_ref(), Lsn(0x60)).await?;
+
+            // so that all uploads finish & we can call harness.load() below again
+            tenant
+                .shutdown(Default::default(), true)
+                .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+                .await
+                .ok()
+                .unwrap();
         }
 
         // check that both of them are initially unloaded
@@ -4089,6 +4030,13 @@ mod tests {
             .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx)
             .await?;
         drop(tline);
+        // so that all uploads finish & we can call harness.try_load() below again
+        tenant
+            .shutdown(Default::default(), true)
+            .instrument(info_span!("test_shutdown", tenant_id=%tenant.tenant_id))
+            .await
+            .ok()
+            .unwrap();
         drop(tenant);
 
         let metadata_path = harness.timeline_path(&TIMELINE_ID).join(METADATA_FILE_NAME);
@@ -4100,11 +4048,7 @@ mod tests {
         metadata_bytes[8] ^= 1;
         std::fs::write(metadata_path, metadata_bytes)?;
 
-        let err = harness
-            .try_load(&ctx, None)
-            .await
-            .err()
-            .expect("should fail");
+        let err = harness.try_load(&ctx).await.err().expect("should fail");
         // get all the stack with all .context, not only the last one
         let message = format!("{err:#}");
         let expected = "failed to load metadata";
@@ -4489,8 +4433,9 @@ mod tests {
             .await;
 
         let initdb_lsn = Lsn(0x20);
-        let utline =
-            tenant.create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx)?;
+        let utline = tenant
+            .create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx)
+            .await?;
         let tline = utline.raw_timeline().unwrap();
 
         // Spawn flush loop now so that we can set the `expect_initdb_optimization`
@@ -4555,9 +4500,15 @@ mod tests {
         let harness = TenantHarness::create(name)?;
         {
             let (tenant, ctx) = harness.load().await;
-            let tline =
-                tenant.create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)?;
+            let tline = tenant
+                .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx)
+                .await?;
             // Keeps uninit mark in place
+            let raw_tline = tline.raw_timeline().unwrap();
+            raw_tline
+                .shutdown(false)
+                .instrument(info_span!("test_shutdown", tenant_id=%raw_tline.tenant_id))
+                .await;
             std::mem::forget(tline);
         }
 
diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs
index f5ff15b50c..e4dede2c30 100644
--- a/pageserver/src/tenant/blob_io.rs
+++ b/pageserver/src/tenant/blob_io.rs
@@ -96,18 +96,12 @@ pub trait BlobWriter {
 /// An implementation of BlobWriter to write blobs to anything that
 /// implements std::io::Write.
 ///
-pub struct WriteBlobWriter<W>
-where
-    W: std::io::Write,
-{
+pub struct WriteBlobWriter<W> {
     inner: W,
     offset: u64,
 }
 
-impl<W> WriteBlobWriter<W>
-where
-    W: std::io::Write,
-{
+impl<W> WriteBlobWriter<W> {
     pub fn new(inner: W, start_offset: u64) -> Self {
         WriteBlobWriter {
             inner,
diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs
index 69d5b49c6d..645ec81036 100644
--- a/pageserver/src/tenant/block_io.rs
+++ b/pageserver/src/tenant/block_io.rs
@@ -7,9 +7,7 @@ use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner};
 use crate::page_cache::{self, PageReadGuard, ReadBufResult, PAGE_SZ};
 use crate::virtual_file::VirtualFile;
 use bytes::Bytes;
-use std::fs::File;
 use std::ops::{Deref, DerefMut};
-use std::os::unix::fs::FileExt;
 
 /// This is implemented by anything that can read 8 kB (PAGE_SZ)
 /// blocks, using the page cache
@@ -73,8 +71,7 @@ impl<'a> Deref for BlockLease<'a> {
 ///
 /// Unlike traits, we also support the read function to be async though.
 pub(crate) enum BlockReaderRef<'a> {
-    FileBlockReaderVirtual(&'a FileBlockReader<VirtualFile>),
-    FileBlockReaderFile(&'a FileBlockReader<std::fs::File>),
+    FileBlockReaderVirtual(&'a FileBlockReader),
     EphemeralFile(&'a EphemeralFile),
     Adapter(Adapter<&'a DeltaLayerInner>),
     #[cfg(test)]
@@ -87,7 +84,6 @@ impl<'a> BlockReaderRef<'a> {
         use BlockReaderRef::*;
         match self {
             FileBlockReaderVirtual(r) => r.read_blk(blknum).await,
-            FileBlockReaderFile(r) => r.read_blk(blknum).await,
             EphemeralFile(r) => r.read_blk(blknum).await,
             Adapter(r) => r.read_blk(blknum).await,
             #[cfg(test)]
@@ -105,7 +101,7 @@ impl<'a> BlockReaderRef<'a> {
 ///
 /// ```no_run
 /// # use pageserver::tenant::block_io::{BlockReader, FileBlockReader};
-/// # let reader: FileBlockReader<std::fs::File> = unimplemented!("stub");
+/// # let reader: FileBlockReader = unimplemented!("stub");
 /// let cursor = reader.block_cursor();
 /// let buf = cursor.read_blk(1);
 /// // do stuff with 'buf'
@@ -122,7 +118,7 @@ impl<'a> BlockCursor<'a> {
         BlockCursor { reader }
     }
     // Needed by cli
-    pub fn new_fileblockreader_virtual(reader: &'a FileBlockReader<VirtualFile>) -> Self {
+    pub fn new_fileblockreader(reader: &'a FileBlockReader) -> Self {
         BlockCursor {
             reader: BlockReaderRef::FileBlockReaderVirtual(reader),
         }
@@ -143,27 +139,26 @@ impl<'a> BlockCursor<'a> {
 ///
 /// The file is assumed to be immutable. This doesn't provide any functions
 /// for modifying the file, nor for invalidating the cache if it is modified.
-pub struct FileBlockReader<F> {
-    pub file: F,
+pub struct FileBlockReader {
+    pub file: VirtualFile,
 
     /// Unique ID of this file, used as key in the page cache.
     file_id: page_cache::FileId,
 }
 
-impl<F> FileBlockReader<F>
-where
-    F: FileExt,
-{
-    pub fn new(file: F) -> Self {
+impl FileBlockReader {
+    pub fn new(file: VirtualFile) -> Self {
         let file_id = page_cache::next_file_id();
 
         FileBlockReader { file_id, file }
     }
 
     /// Read a page from the underlying file into given buffer.
-    fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), std::io::Error> {
+    async fn fill_buffer(&self, buf: &mut [u8], blkno: u32) -> Result<(), std::io::Error> {
         assert!(buf.len() == PAGE_SZ);
-        self.file.read_exact_at(buf, blkno as u64 * PAGE_SZ as u64)
+        self.file
+            .read_exact_at(buf, blkno as u64 * PAGE_SZ as u64)
+            .await
     }
     /// Read a block.
     ///
@@ -185,7 +180,7 @@ where
                 ReadBufResult::Found(guard) => break Ok(guard.into()),
                 ReadBufResult::NotFound(mut write_guard) => {
                     // Read the page from disk into the buffer
-                    self.fill_buffer(write_guard.deref_mut(), blknum)?;
+                    self.fill_buffer(write_guard.deref_mut(), blknum).await?;
                     write_guard.mark_valid();
 
                     // Swap for read lock
@@ -196,13 +191,7 @@ where
     }
 }
 
-impl BlockReader for FileBlockReader<File> {
-    fn block_cursor(&self) -> BlockCursor<'_> {
-        BlockCursor::new(BlockReaderRef::FileBlockReaderFile(self))
-    }
-}
-
-impl BlockReader for FileBlockReader<VirtualFile> {
+impl BlockReader for FileBlockReader {
     fn block_cursor(&self) -> BlockCursor<'_> {
         BlockCursor::new(BlockReaderRef::FileBlockReaderVirtual(self))
     }
diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs
index 31db3869d9..4c5fe424f3 100644
--- a/pageserver/src/tenant/ephemeral_file.rs
+++ b/pageserver/src/tenant/ephemeral_file.rs
@@ -9,7 +9,6 @@ use std::cmp::min;
 use std::fs::OpenOptions;
 use std::io::{self, ErrorKind};
 use std::ops::DerefMut;
-use std::os::unix::prelude::FileExt;
 use std::path::PathBuf;
 use std::sync::atomic::AtomicU64;
 use tracing::*;
@@ -88,7 +87,8 @@ impl EphemeralFile {
                         let buf: &mut [u8] = write_guard.deref_mut();
                         debug_assert_eq!(buf.len(), PAGE_SZ);
                         self.file
-                            .read_exact_at(&mut buf[..], blknum as u64 * PAGE_SZ as u64)?;
+                            .read_exact_at(&mut buf[..], blknum as u64 * PAGE_SZ as u64)
+                            .await?;
                         write_guard.mark_valid();
 
                         // Swap for read lock
@@ -128,10 +128,15 @@ impl EphemeralFile {
                     self.off += n;
                     src_remaining = &src_remaining[n..];
                     if self.off == PAGE_SZ {
-                        match self.ephemeral_file.file.write_all_at(
-                            &self.ephemeral_file.mutable_tail,
-                            self.blknum as u64 * PAGE_SZ as u64,
-                        ) {
+                        match self
+                            .ephemeral_file
+                            .file
+                            .write_all_at(
+                                &self.ephemeral_file.mutable_tail,
+                                self.blknum as u64 * PAGE_SZ as u64,
+                            )
+                            .await
+                        {
                             Ok(_) => {
                                 // Pre-warm the page cache with what we just wrote.
                                 // This isn't necessary for coherency/correctness, but it's how we've always done it.
diff --git a/pageserver/src/tenant/manifest.rs b/pageserver/src/tenant/manifest.rs
deleted file mode 100644
index 1d2835114f..0000000000
--- a/pageserver/src/tenant/manifest.rs
+++ /dev/null
@@ -1,325 +0,0 @@
-//! This module contains the encoding and decoding of the local manifest file.
-//!
-//! MANIFEST is a write-ahead log which is stored locally to each timeline. It
-//! records the state of the storage engine. It contains a snapshot of the
-//! state and all operations proceeding that snapshot. The file begins with a
-//! header recording MANIFEST version number. After that, it contains a snapshot.
-//! The snapshot is followed by a list of operations. Each operation is a list
-//! of records. Each record is either an addition or a removal of a layer.
-//!
-//! With MANIFEST, we can:
-//!
-//! 1. recover state quickly by reading the file, potentially boosting the
-//!    startup speed.
-//! 2. ensure all operations are atomic and avoid corruption, solving issues
-//!    like redundant image layer and preparing us for future compaction
-//!    strategies.
-//!
-//! There is also a format for storing all layer files on S3, called
-//! `index_part.json`. Compared with index_part, MANIFEST is an WAL which
-//! records all operations as logs, and therefore we can easily replay the
-//! operations when recovering from crash, while ensuring those operations
-//! are atomic upon restart.
-//!
-//! Currently, this is not used in the system. Future refactors will ensure
-//! the storage state will be recorded in this file, and the system can be
-//! recovered from this file. This is tracked in
-//! <https://github.com/neondatabase/neon/issues/4418>
-
-use std::io::{self, Read, Write};
-
-use crate::virtual_file::VirtualFile;
-use anyhow::Result;
-use bytes::{Buf, BufMut, Bytes, BytesMut};
-use crc32c::crc32c;
-use serde::{Deserialize, Serialize};
-use tracing::log::warn;
-use utils::lsn::Lsn;
-
-use super::storage_layer::PersistentLayerDesc;
-
-pub struct Manifest {
-    file: VirtualFile,
-}
-
-#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
-pub struct Snapshot {
-    pub layers: Vec<PersistentLayerDesc>,
-}
-
-/// serde by default encode this in tagged enum, and therefore it will be something
-/// like `{ "AddLayer": { ... } }`.
-#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
-pub enum Record {
-    AddLayer(PersistentLayerDesc),
-    RemoveLayer(PersistentLayerDesc),
-}
-
-/// `echo neon.manifest | sha1sum` and take the leading 8 bytes.
-const MANIFEST_MAGIC_NUMBER: u64 = 0xf5c44592b806109c;
-const MANIFEST_VERSION: u64 = 1;
-
-#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
-pub struct ManifestHeader {
-    magic_number: u64,
-    version: u64,
-}
-
-const MANIFEST_HEADER_LEN: usize = 16;
-
-impl ManifestHeader {
-    fn encode(&self) -> BytesMut {
-        let mut buf = BytesMut::with_capacity(MANIFEST_HEADER_LEN);
-        buf.put_u64(self.magic_number);
-        buf.put_u64(self.version);
-        buf
-    }
-
-    fn decode(mut buf: &[u8]) -> Self {
-        assert!(buf.len() == MANIFEST_HEADER_LEN, "invalid header");
-        Self {
-            magic_number: buf.get_u64(),
-            version: buf.get_u64(),
-        }
-    }
-}
-
-#[derive(Clone, Serialize, Deserialize, PartialEq, Eq, Debug)]
-pub enum Operation {
-    /// A snapshot of the current state.
-    ///
-    /// Lsn field represents the LSN that is persisted to disk for this snapshot.
-    Snapshot(Snapshot, Lsn),
-    /// An atomic operation that changes the state.
-    ///
-    /// Lsn field represents the LSN that is persisted to disk after the operation is done.
-    /// This will only change when new L0 is flushed to the disk.
-    Operation(Vec<Record>, Lsn),
-}
-
-struct RecordHeader {
-    size: u32,
-    checksum: u32,
-}
-
-const RECORD_HEADER_LEN: usize = 8;
-
-impl RecordHeader {
-    fn encode(&self) -> BytesMut {
-        let mut buf = BytesMut::with_capacity(RECORD_HEADER_LEN);
-        buf.put_u32(self.size);
-        buf.put_u32(self.checksum);
-        buf
-    }
-
-    fn decode(mut buf: &[u8]) -> Self {
-        assert!(buf.len() == RECORD_HEADER_LEN, "invalid header");
-        Self {
-            size: buf.get_u32(),
-            checksum: buf.get_u32(),
-        }
-    }
-}
-
-#[derive(Debug, thiserror::Error)]
-pub enum ManifestLoadError {
-    #[error("manifest header is corrupted")]
-    CorruptedManifestHeader,
-    #[error("unsupported manifest version: got {0}, expected {1}")]
-    UnsupportedVersion(u64, u64),
-    #[error("error when decoding record: {0}")]
-    DecodeRecord(serde_json::Error),
-    #[error("I/O error: {0}")]
-    Io(io::Error),
-}
-
-#[must_use = "Should check if the manifest is partially corrupted"]
-pub struct ManifestPartiallyCorrupted(bool);
-
-impl Manifest {
-    /// Create a new manifest by writing the manifest header and a snapshot record to the given file.
-    pub fn init(file: VirtualFile, snapshot: Snapshot, lsn: Lsn) -> Result<Self> {
-        let mut manifest = Self { file };
-        manifest.append_manifest_header(ManifestHeader {
-            magic_number: MANIFEST_MAGIC_NUMBER,
-            version: MANIFEST_VERSION,
-        })?;
-        manifest.append_operation(Operation::Snapshot(snapshot, lsn))?;
-        Ok(manifest)
-    }
-
-    /// Load a manifest. Returns the manifest and a list of operations. If the manifest is corrupted,
-    /// the bool flag will be set to true and the user is responsible to reconstruct a new manifest and
-    /// backup the current one.
-    pub fn load(
-        mut file: VirtualFile,
-    ) -> Result<(Self, Vec<Operation>, ManifestPartiallyCorrupted), ManifestLoadError> {
-        let mut buf = vec![];
-        file.read_to_end(&mut buf).map_err(ManifestLoadError::Io)?;
-
-        // Read manifest header
-        let mut buf = Bytes::from(buf);
-        if buf.remaining() < MANIFEST_HEADER_LEN {
-            return Err(ManifestLoadError::CorruptedManifestHeader);
-        }
-        let header = ManifestHeader::decode(&buf[..MANIFEST_HEADER_LEN]);
-        buf.advance(MANIFEST_HEADER_LEN);
-        if header.version != MANIFEST_VERSION {
-            return Err(ManifestLoadError::UnsupportedVersion(
-                header.version,
-                MANIFEST_VERSION,
-            ));
-        }
-
-        // Read operations
-        let mut operations = Vec::new();
-        let corrupted = loop {
-            if buf.remaining() == 0 {
-                break false;
-            }
-            if buf.remaining() < RECORD_HEADER_LEN {
-                warn!("incomplete header when decoding manifest, could be corrupted");
-                break true;
-            }
-            let RecordHeader { size, checksum } = RecordHeader::decode(&buf[..RECORD_HEADER_LEN]);
-            let size = size as usize;
-            buf.advance(RECORD_HEADER_LEN);
-            if buf.remaining() < size {
-                warn!("incomplete data when decoding manifest, could be corrupted");
-                break true;
-            }
-            let data = &buf[..size];
-            if crc32c(data) != checksum {
-                warn!("checksum mismatch when decoding manifest, could be corrupted");
-                break true;
-            }
-            // if the following decode fails, we cannot use the manifest or safely ignore any record.
-            operations.push(serde_json::from_slice(data).map_err(ManifestLoadError::DecodeRecord)?);
-            buf.advance(size);
-        };
-        Ok((
-            Self { file },
-            operations,
-            ManifestPartiallyCorrupted(corrupted),
-        ))
-    }
-
-    fn append_data(&mut self, data: &[u8]) -> Result<()> {
-        if data.len() >= u32::MAX as usize {
-            panic!("data too large");
-        }
-        let header = RecordHeader {
-            size: data.len() as u32,
-            checksum: crc32c(data),
-        };
-        let header = header.encode();
-        self.file.write_all(&header)?;
-        self.file.write_all(data)?;
-        self.file.sync_all()?;
-        Ok(())
-    }
-
-    fn append_manifest_header(&mut self, header: ManifestHeader) -> Result<()> {
-        let encoded = header.encode();
-        self.file.write_all(&encoded)?;
-        Ok(())
-    }
-
-    /// Add an operation to the manifest. The operation will be appended to the end of the file,
-    /// and the file will fsync.
-    pub fn append_operation(&mut self, operation: Operation) -> Result<()> {
-        let encoded = Vec::from(serde_json::to_string(&operation)?);
-        self.append_data(&encoded)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::fs::OpenOptions;
-
-    use crate::repository::Key;
-
-    use super::*;
-
-    #[test]
-    fn test_read_manifest() {
-        let testdir = crate::config::PageServerConf::test_repo_dir("test_read_manifest");
-        std::fs::create_dir_all(&testdir).unwrap();
-        let file = VirtualFile::create(&testdir.join("MANIFEST")).unwrap();
-        let layer1 = PersistentLayerDesc::new_test(Key::from_i128(0)..Key::from_i128(233));
-        let layer2 = PersistentLayerDesc::new_test(Key::from_i128(233)..Key::from_i128(2333));
-        let layer3 = PersistentLayerDesc::new_test(Key::from_i128(2333)..Key::from_i128(23333));
-        let layer4 = PersistentLayerDesc::new_test(Key::from_i128(23333)..Key::from_i128(233333));
-
-        // Write a manifest with a snapshot and some operations
-        let snapshot = Snapshot {
-            layers: vec![layer1, layer2],
-        };
-        let mut manifest = Manifest::init(file, snapshot.clone(), Lsn::from(0)).unwrap();
-        manifest
-            .append_operation(Operation::Operation(
-                vec![Record::AddLayer(layer3.clone())],
-                Lsn::from(1),
-            ))
-            .unwrap();
-        drop(manifest);
-
-        // Open the second time and write
-        let file = VirtualFile::open_with_options(
-            &testdir.join("MANIFEST"),
-            OpenOptions::new()
-                .read(true)
-                .write(true)
-                .create_new(false)
-                .truncate(false),
-        )
-        .unwrap();
-        let (mut manifest, operations, corrupted) = Manifest::load(file).unwrap();
-        assert!(!corrupted.0);
-        assert_eq!(operations.len(), 2);
-        assert_eq!(
-            &operations[0],
-            &Operation::Snapshot(snapshot.clone(), Lsn::from(0))
-        );
-        assert_eq!(
-            &operations[1],
-            &Operation::Operation(vec![Record::AddLayer(layer3.clone())], Lsn::from(1))
-        );
-        manifest
-            .append_operation(Operation::Operation(
-                vec![
-                    Record::RemoveLayer(layer3.clone()),
-                    Record::AddLayer(layer4.clone()),
-                ],
-                Lsn::from(2),
-            ))
-            .unwrap();
-        drop(manifest);
-
-        // Open the third time and verify
-        let file = VirtualFile::open_with_options(
-            &testdir.join("MANIFEST"),
-            OpenOptions::new()
-                .read(true)
-                .write(true)
-                .create_new(false)
-                .truncate(false),
-        )
-        .unwrap();
-        let (_manifest, operations, corrupted) = Manifest::load(file).unwrap();
-        assert!(!corrupted.0);
-        assert_eq!(operations.len(), 3);
-        assert_eq!(&operations[0], &Operation::Snapshot(snapshot, Lsn::from(0)));
-        assert_eq!(
-            &operations[1],
-            &Operation::Operation(vec![Record::AddLayer(layer3.clone())], Lsn::from(1))
-        );
-        assert_eq!(
-            &operations[2],
-            &Operation::Operation(
-                vec![Record::RemoveLayer(layer3), Record::AddLayer(layer4)],
-                Lsn::from(2)
-            )
-        );
-    }
-}
diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs
index dbf2d5ac37..7b05704e4f 100644
--- a/pageserver/src/tenant/metadata.rs
+++ b/pageserver/src/tenant/metadata.rs
@@ -8,14 +8,13 @@
 //!
 //! [`remote_timeline_client`]: super::remote_timeline_client
 
-use std::fs::{File, OpenOptions};
-use std::io::{self, Write};
+use std::io::{self};
 
-use anyhow::{bail, ensure, Context};
+use anyhow::{ensure, Context};
 use serde::{de::Error, Deserialize, Serialize, Serializer};
 use thiserror::Error;
-use tracing::info_span;
 use utils::bin_ser::SerializeError;
+use utils::crashsafe::path_with_suffix_extension;
 use utils::{
     bin_ser::BeSer,
     id::{TenantId, TimelineId},
@@ -24,6 +23,7 @@ use utils::{
 
 use crate::config::PageServerConf;
 use crate::virtual_file::VirtualFile;
+use crate::TEMP_FILE_SUFFIX;
 
 /// Use special format number to enable backward compatibility.
 const METADATA_FORMAT_VERSION: u16 = 4;
@@ -255,38 +255,19 @@ impl Serialize for TimelineMetadata {
 }
 
 /// Save timeline metadata to file
-pub fn save_metadata(
+#[tracing::instrument(skip_all, fields(%tenant_id, %timeline_id))]
+pub async fn save_metadata(
     conf: &'static PageServerConf,
     tenant_id: &TenantId,
     timeline_id: &TimelineId,
     data: &TimelineMetadata,
-    first_save: bool,
 ) -> anyhow::Result<()> {
-    let _enter = info_span!("saving metadata").entered();
     let path = conf.metadata_path(tenant_id, timeline_id);
-    // use OpenOptions to ensure file presence is consistent with first_save
-    let mut file = VirtualFile::open_with_options(
-        &path,
-        OpenOptions::new().write(true).create_new(first_save),
-    )
-    .context("open_with_options")?;
-
-    let metadata_bytes = data.to_bytes().context("Failed to get metadata bytes")?;
-
-    if file.write(&metadata_bytes)? != metadata_bytes.len() {
-        bail!("Could not write all the metadata bytes in a single call");
-    }
-    file.sync_all()?;
-
-    // fsync the parent directory to ensure the directory entry is durable
-    if first_save {
-        let timeline_dir = File::open(
-            path.parent()
-                .expect("Metadata should always have a parent dir"),
-        )?;
-        timeline_dir.sync_all()?;
-    }
-
+    let temp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
+    let metadata_bytes = data.to_bytes().context("serialize metadata")?;
+    VirtualFile::crashsafe_overwrite(&path, &temp_path, &metadata_bytes)
+        .await
+        .context("write metadata")?;
     Ok(())
 }
 
diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs
index 87617b544c..72d150e0eb 100644
--- a/pageserver/src/tenant/mgr.rs
+++ b/pageserver/src/tenant/mgr.rs
@@ -22,8 +22,9 @@ use crate::task_mgr::{self, TaskKind};
 use crate::tenant::config::TenantConfOpt;
 use crate::tenant::delete::DeleteTenantFlow;
 use crate::tenant::{create_tenant_files, CreateTenantFilesMode, Tenant, TenantState};
-use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME};
+use crate::{InitializationOrder, IGNORED_TENANT_FILE_NAME, TEMP_FILE_SUFFIX};
 
+use utils::crashsafe::path_with_suffix_extension;
 use utils::fs_ext::PathExt;
 use utils::generation::Generation;
 use utils::id::{TenantId, TimelineId};
@@ -60,6 +61,29 @@ impl TenantsMap {
     }
 }
 
+/// This is "safe" in that that it won't leave behind a partially deleted directory
+/// at the original path, because we rename with TEMP_FILE_SUFFIX before starting deleting
+/// the contents.
+///
+/// This is pageserver-specific, as it relies on future processes after a crash to check
+/// for TEMP_FILE_SUFFIX when loading things.
+async fn safe_remove_tenant_dir_all(path: impl AsRef<Path>) -> std::io::Result<()> {
+    let parent = path
+        .as_ref()
+        .parent()
+        // It is invalid to call this function with a relative path.  Tenant directories
+        // should always have a parent.
+        .ok_or(std::io::Error::new(
+            std::io::ErrorKind::InvalidInput,
+            "Path must be absolute",
+        ))?;
+
+    let tmp_path = path_with_suffix_extension(&path, TEMP_FILE_SUFFIX);
+    fs::rename(&path, &tmp_path).await?;
+    fs::File::open(parent).await?.sync_all().await?;
+    fs::remove_dir_all(tmp_path).await
+}
+
 static TENANTS: Lazy<RwLock<TenantsMap>> = Lazy::new(|| RwLock::new(TenantsMap::Initializing));
 
 /// Initialize repositories with locally available timelines.
@@ -92,6 +116,8 @@ pub async fn init_tenant_mgr(
                         "Found temporary tenant directory, removing: {}",
                         tenant_dir_path.display()
                     );
+                    // No need to use safe_remove_tenant_dir_all because this is already
+                    // a temporary path
                     if let Err(e) = fs::remove_dir_all(&tenant_dir_path).await {
                         error!(
                             "Failed to remove temporary directory '{}': {:?}",
@@ -361,11 +387,11 @@ pub async fn create_tenant(
     remote_storage: Option<GenericRemoteStorage>,
     ctx: &RequestContext,
 ) -> Result<Arc<Tenant>, TenantMapInsertError> {
-    tenant_map_insert(tenant_id, || {
+    tenant_map_insert(tenant_id, || async {
         // We're holding the tenants lock in write mode while doing local IO.
         // If this section ever becomes contentious, introduce a new `TenantState::Creating`
         // and do the work in that state.
-        let tenant_directory = super::create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Create)?;
+        let tenant_directory = super::create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Create).await?;
         // TODO: tenant directory remains on disk if we bail out from here on.
         //       See https://github.com/neondatabase/neon/issues/4233
 
@@ -404,7 +430,8 @@ pub async fn set_new_tenant_config(
     let tenant = get_tenant(tenant_id, true).await?;
 
     let tenant_config_path = conf.tenant_config_path(&tenant_id);
-    Tenant::persist_tenant_config(&tenant_id, &tenant_config_path, new_tenant_conf, false)
+    Tenant::persist_tenant_config(&tenant_id, &tenant_config_path, new_tenant_conf)
+        .await
         .map_err(SetNewTenantConfigError::Persist)?;
     tenant.set_new_tenant_config(new_tenant_conf);
     Ok(())
@@ -490,7 +517,7 @@ async fn detach_tenant0(
 ) -> Result<(), TenantStateError> {
     let local_files_cleanup_operation = |tenant_id_to_clean| async move {
         let local_tenant_directory = conf.tenant_path(&tenant_id_to_clean);
-        fs::remove_dir_all(&local_tenant_directory)
+        safe_remove_tenant_dir_all(&local_tenant_directory)
             .await
             .with_context(|| {
                 format!("local tenant directory {local_tenant_directory:?} removal")
@@ -525,7 +552,7 @@ pub async fn load_tenant(
     remote_storage: Option<GenericRemoteStorage>,
     ctx: &RequestContext,
 ) -> Result<(), TenantMapInsertError> {
-    tenant_map_insert(tenant_id, || {
+    tenant_map_insert(tenant_id, || async {
         let tenant_path = conf.tenant_path(&tenant_id);
         let tenant_ignore_mark = conf.tenant_ignore_mark_file_path(&tenant_id);
         if tenant_ignore_mark.exists() {
@@ -606,8 +633,8 @@ pub async fn attach_tenant(
     remote_storage: GenericRemoteStorage,
     ctx: &RequestContext,
 ) -> Result<(), TenantMapInsertError> {
-    tenant_map_insert(tenant_id, || {
-        let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach)?;
+    tenant_map_insert(tenant_id, || async {
+        let tenant_dir = create_tenant_files(conf, tenant_conf, &tenant_id, CreateTenantFilesMode::Attach).await?;
         // TODO: tenant directory remains on disk if we bail out from here on.
         //       See https://github.com/neondatabase/neon/issues/4233
 
@@ -655,12 +682,13 @@ pub enum TenantMapInsertError {
 ///
 /// NB: the closure should return quickly because the current implementation of tenants map
 /// serializes access through an `RwLock`.
-async fn tenant_map_insert<F>(
+async fn tenant_map_insert<F, R>(
     tenant_id: TenantId,
     insert_fn: F,
 ) -> Result<Arc<Tenant>, TenantMapInsertError>
 where
-    F: FnOnce() -> anyhow::Result<Arc<Tenant>>,
+    F: FnOnce() -> R,
+    R: std::future::Future<Output = anyhow::Result<Arc<Tenant>>>,
 {
     let mut guard = TENANTS.write().await;
     let m = match &mut *guard {
@@ -673,7 +701,7 @@ where
             tenant_id,
             e.get().current_state(),
         )),
-        hash_map::Entry::Vacant(v) => match insert_fn() {
+        hash_map::Entry::Vacant(v) => match insert_fn().await {
             Ok(tenant) => {
                 v.insert(tenant.clone());
                 Ok(tenant)
diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs
index 50bb8b43de..13f3fac41c 100644
--- a/pageserver/src/tenant/remote_timeline_client.rs
+++ b/pageserver/src/tenant/remote_timeline_client.rs
@@ -342,7 +342,12 @@ impl RemoteTimelineClient {
     ) -> RemoteTimelineClient {
         RemoteTimelineClient {
             conf,
-            runtime: BACKGROUND_RUNTIME.handle().to_owned(),
+            runtime: if cfg!(test) {
+                // remote_timeline_client.rs tests rely on current-thread runtime
+                tokio::runtime::Handle::current()
+            } else {
+                BACKGROUND_RUNTIME.handle().clone()
+            },
             tenant_id,
             timeline_id,
             generation,
@@ -1463,11 +1468,8 @@ mod tests {
         },
         DEFAULT_PG_VERSION,
     };
-    use remote_storage::{RemoteStorageConfig, RemoteStorageKind};
-    use std::{
-        collections::HashSet,
-        path::{Path, PathBuf},
-    };
+
+    use std::{collections::HashSet, path::Path};
     use utils::lsn::Lsn;
 
     pub(super) fn dummy_contents(name: &str) -> Vec<u8> {
@@ -1524,8 +1526,6 @@ mod tests {
         tenant: Arc<Tenant>,
         timeline: Arc<Timeline>,
         tenant_ctx: RequestContext,
-        remote_fs_dir: PathBuf,
-        client: Arc<RemoteTimelineClient>,
     }
 
     impl TestSetup {
@@ -1535,52 +1535,15 @@ mod tests {
             let harness = TenantHarness::create(test_name)?;
             let (tenant, ctx) = harness.load().await;
 
-            // create an empty timeline directory
             let timeline = tenant
                 .create_test_timeline(TIMELINE_ID, Lsn(8), DEFAULT_PG_VERSION, &ctx)
                 .await?;
 
-            let remote_fs_dir = harness.conf.workdir.join("remote_fs");
-            std::fs::create_dir_all(remote_fs_dir)?;
-            let remote_fs_dir = std::fs::canonicalize(harness.conf.workdir.join("remote_fs"))?;
-
-            let storage_config = RemoteStorageConfig {
-                max_concurrent_syncs: std::num::NonZeroUsize::new(
-                    remote_storage::DEFAULT_REMOTE_STORAGE_MAX_CONCURRENT_SYNCS,
-                )
-                .unwrap(),
-                max_sync_errors: std::num::NonZeroU32::new(
-                    remote_storage::DEFAULT_REMOTE_STORAGE_MAX_SYNC_ERRORS,
-                )
-                .unwrap(),
-                storage: RemoteStorageKind::LocalFs(remote_fs_dir.clone()),
-            };
-
-            let generation = Generation::new(0xdeadbeef);
-
-            let storage = GenericRemoteStorage::from_config(&storage_config).unwrap();
-
-            let client = Arc::new(RemoteTimelineClient {
-                conf: harness.conf,
-                runtime: tokio::runtime::Handle::current(),
-                tenant_id: harness.tenant_id,
-                timeline_id: TIMELINE_ID,
-                generation,
-                storage_impl: storage,
-                upload_queue: Mutex::new(UploadQueue::Uninitialized),
-                metrics: Arc::new(RemoteTimelineClientMetrics::new(
-                    &harness.tenant_id,
-                    &TIMELINE_ID,
-                )),
-            });
-
             Ok(Self {
                 harness,
                 tenant,
                 timeline,
                 tenant_ctx: ctx,
-                remote_fs_dir,
-                client,
             })
         }
     }
@@ -1605,26 +1568,37 @@ mod tests {
         let TestSetup {
             harness,
             tenant: _tenant,
-            timeline: _timeline,
+            timeline,
             tenant_ctx: _tenant_ctx,
-            remote_fs_dir,
-            client,
         } = TestSetup::new("upload_scheduling").await.unwrap();
 
+        let client = timeline.remote_client.as_ref().unwrap();
+
+        // Download back the index.json, and check that the list of files is correct
+        let initial_index_part = match client.download_index_file().await.unwrap() {
+            MaybeDeletedIndexPart::IndexPart(index_part) => index_part,
+            MaybeDeletedIndexPart::Deleted(_) => panic!("unexpectedly got deleted index part"),
+        };
+        let initial_layers = initial_index_part
+            .layer_metadata
+            .keys()
+            .map(|f| f.to_owned())
+            .collect::<HashSet<LayerFileName>>();
+        let initial_layer = {
+            assert!(initial_layers.len() == 1);
+            initial_layers.into_iter().next().unwrap()
+        };
+
         let timeline_path = harness.timeline_path(&TIMELINE_ID);
 
         println!("workdir: {}", harness.conf.workdir.display());
 
-        let remote_timeline_dir =
-            remote_fs_dir.join(timeline_path.strip_prefix(&harness.conf.workdir).unwrap());
+        let remote_timeline_dir = harness
+            .remote_fs_dir
+            .join(timeline_path.strip_prefix(&harness.conf.workdir).unwrap());
         println!("remote_timeline_dir: {}", remote_timeline_dir.display());
 
-        let metadata = dummy_metadata(Lsn(0x10));
-        client
-            .init_upload_queue_for_empty_remote(&metadata)
-            .unwrap();
-
-        let generation = Generation::new(0xdeadbeef);
+        let generation = harness.generation;
 
         // Create a couple of dummy files,  schedule upload for them
         let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
@@ -1705,6 +1679,7 @@ mod tests {
                 .map(|f| f.to_owned())
                 .collect(),
             &[
+                &initial_layer.file_name(),
                 &layer_file_name_1.file_name(),
                 &layer_file_name_2.file_name(),
             ],
@@ -1734,6 +1709,7 @@ mod tests {
         }
         assert_remote_files(
             &[
+                &initial_layer.file_name(),
                 &layer_file_name_1.file_name(),
                 &layer_file_name_2.file_name(),
                 "index_part.json",
@@ -1747,6 +1723,7 @@ mod tests {
 
         assert_remote_files(
             &[
+                &initial_layer.file_name(),
                 &layer_file_name_2.file_name(),
                 &layer_file_name_3.file_name(),
                 "index_part.json",
@@ -1763,16 +1740,10 @@ mod tests {
         let TestSetup {
             harness,
             tenant: _tenant,
-            timeline: _timeline,
-            client,
+            timeline,
             ..
         } = TestSetup::new("metrics").await.unwrap();
-
-        let metadata = dummy_metadata(Lsn(0x10));
-        client
-            .init_upload_queue_for_empty_remote(&metadata)
-            .unwrap();
-
+        let client = timeline.remote_client.as_ref().unwrap();
         let timeline_path = harness.timeline_path(&TIMELINE_ID);
 
         let layer_file_name_1: LayerFileName = "000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__00000000016B59D8-00000000016B5A51".parse().unwrap();
@@ -1783,11 +1754,20 @@ mod tests {
         )
         .unwrap();
 
-        #[derive(Debug, PartialEq)]
+        #[derive(Debug, PartialEq, Clone, Copy)]
         struct BytesStartedFinished {
             started: Option<usize>,
             finished: Option<usize>,
         }
+        impl std::ops::Add for BytesStartedFinished {
+            type Output = Self;
+            fn add(self, rhs: Self) -> Self::Output {
+                Self {
+                    started: self.started.map(|v| v + rhs.started.unwrap_or(0)),
+                    finished: self.finished.map(|v| v + rhs.finished.unwrap_or(0)),
+                }
+            }
+        }
         let get_bytes_started_stopped = || {
             let started = client
                 .metrics
@@ -1804,47 +1784,38 @@ mod tests {
         };
 
         // Test
+        tracing::info!("now doing actual test");
 
-        let generation = Generation::new(0xdeadbeef);
-
-        let init = get_bytes_started_stopped();
+        let actual_a = get_bytes_started_stopped();
 
         client
             .schedule_layer_file_upload(
                 &layer_file_name_1,
-                &LayerFileMetadata::new(content_1.len() as u64, generation),
+                &LayerFileMetadata::new(content_1.len() as u64, harness.generation),
             )
             .unwrap();
 
-        let pre = get_bytes_started_stopped();
+        let actual_b = get_bytes_started_stopped();
 
         client.wait_completion().await.unwrap();
 
-        let post = get_bytes_started_stopped();
+        let actual_c = get_bytes_started_stopped();
 
         // Validate
 
-        assert_eq!(
-            init,
-            BytesStartedFinished {
-                started: None,
-                finished: None
-            }
-        );
-        assert_eq!(
-            pre,
-            BytesStartedFinished {
+        let expected_b = actual_a
+            + BytesStartedFinished {
                 started: Some(content_1.len()),
                 // assert that the _finished metric is created eagerly so that subtractions work on first sample
                 finished: Some(0),
-            }
-        );
-        assert_eq!(
-            post,
-            BytesStartedFinished {
+            };
+        assert_eq!(actual_b, expected_b);
+
+        let expected_c = actual_a
+            + BytesStartedFinished {
                 started: Some(content_1.len()),
-                finished: Some(content_1.len())
-            }
-        );
+                finished: Some(content_1.len()),
+            };
+        assert_eq!(actual_c, expected_c);
     }
 }
diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs
index 9cc5256568..05f9f5dcd2 100644
--- a/pageserver/src/tenant/remote_timeline_client/index.rs
+++ b/pageserver/src/tenant/remote_timeline_client/index.rs
@@ -96,6 +96,10 @@ impl IndexPart {
     ///      is always generated from the keys of `layer_metadata`)
     /// - 4: timeline_layers is fully removed.
     const LATEST_VERSION: usize = 4;
+
+    // Versions we may see when reading from a bucket.
+    pub const KNOWN_VERSIONS: &[usize] = &[1, 2, 3, 4];
+
     pub const FILE_NAME: &'static str = "index_part.json";
 
     pub fn new(
@@ -117,6 +121,16 @@ impl IndexPart {
             deleted_at: None,
         }
     }
+
+    pub fn get_version(&self) -> usize {
+        self.version
+    }
+
+    /// If you want this under normal operations, read it from self.metadata:
+    /// this method is just for the scrubber to use when validating an index.
+    pub fn get_disk_consistent_lsn(&self) -> Lsn {
+        self.disk_consistent_lsn
+    }
 }
 
 impl TryFrom<&UploadQueueInitialized> for IndexPart {
@@ -137,7 +151,7 @@ impl TryFrom<&UploadQueueInitialized> for IndexPart {
 /// Serialized form of [`LayerFileMetadata`].
 #[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)]
 pub struct IndexLayerMetadata {
-    pub(super) file_size: u64,
+    pub file_size: u64,
 
     #[serde(default = "Generation::none")]
     #[serde(skip_serializing_if = "Generation::is_none")]
diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs
index d9df346a14..60427a22e4 100644
--- a/pageserver/src/tenant/storage_layer/delta_layer.rs
+++ b/pageserver/src/tenant/storage_layer/delta_layer.rs
@@ -45,8 +45,8 @@ use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::{self, File};
+use std::io::SeekFrom;
 use std::io::{BufWriter, Write};
-use std::io::{Seek, SeekFrom};
 use std::ops::Range;
 use std::os::unix::fs::FileExt;
 use std::path::{Path, PathBuf};
@@ -219,7 +219,7 @@ pub struct DeltaLayerInner {
     index_root_blk: u32,
 
     /// Reader object for reading blocks from the file.
-    file: FileBlockReader<VirtualFile>,
+    file: FileBlockReader,
 }
 
 impl AsRef<DeltaLayerInner> for DeltaLayerInner {
diff --git a/pageserver/src/tenant/storage_layer/filename.rs b/pageserver/src/tenant/storage_layer/filename.rs
index b52c20a7c6..9fb0c23dd7 100644
--- a/pageserver/src/tenant/storage_layer/filename.rs
+++ b/pageserver/src/tenant/storage_layer/filename.rs
@@ -212,7 +212,7 @@ pub enum LayerFileName {
 }
 
 impl LayerFileName {
-    pub(crate) fn file_name(&self) -> String {
+    pub fn file_name(&self) -> String {
         self.to_string()
     }
 
diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs
index b1fc257092..f329041fb1 100644
--- a/pageserver/src/tenant/storage_layer/image_layer.rs
+++ b/pageserver/src/tenant/storage_layer/image_layer.rs
@@ -42,8 +42,8 @@ use pageserver_api::models::{HistoricLayerInfo, LayerAccessKind};
 use rand::{distributions::Alphanumeric, Rng};
 use serde::{Deserialize, Serialize};
 use std::fs::{self, File};
+use std::io::SeekFrom;
 use std::io::Write;
-use std::io::{Seek, SeekFrom};
 use std::ops::Range;
 use std::os::unix::prelude::FileExt;
 use std::path::{Path, PathBuf};
@@ -155,7 +155,7 @@ pub struct ImageLayerInner {
     lsn: Lsn,
 
     /// Reader object for reading blocks from the file.
-    file: FileBlockReader<VirtualFile>,
+    file: FileBlockReader,
 }
 
 impl std::fmt::Debug for ImageLayerInner {
diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs
index f0ae385806..816af214a5 100644
--- a/pageserver/src/tenant/timeline.rs
+++ b/pageserver/src/tenant/timeline.rs
@@ -90,6 +90,7 @@ use self::logical_size::LogicalSize;
 use self::walreceiver::{WalReceiver, WalReceiverConf};
 
 use super::config::TenantConf;
+use super::debug_assert_current_span_has_tenant_and_timeline_id;
 use super::remote_timeline_client::index::IndexPart;
 use super::remote_timeline_client::RemoteTimelineClient;
 use super::storage_layer::{
@@ -933,6 +934,48 @@ impl Timeline {
         self.launch_eviction_task(background_jobs_can_start);
     }
 
+    #[instrument(skip_all, fields(timeline_id=%self.timeline_id))]
+    pub async fn shutdown(self: &Arc<Self>, freeze_and_flush: bool) {
+        debug_assert_current_span_has_tenant_and_timeline_id();
+
+        // prevent writes to the InMemoryLayer
+        task_mgr::shutdown_tasks(
+            Some(TaskKind::WalReceiverManager),
+            Some(self.tenant_id),
+            Some(self.timeline_id),
+        )
+        .await;
+
+        // now all writers to InMemory layer are gone, do the final flush if requested
+        if freeze_and_flush {
+            match self.freeze_and_flush().await {
+                Ok(()) => {}
+                Err(e) => {
+                    warn!("failed to freeze and flush: {e:#}");
+                    return; // TODO: should probably drain remote timeline client anyways?
+                }
+            }
+
+            // drain the upload queue
+            let res = if let Some(client) = self.remote_client.as_ref() {
+                // if we did not wait for completion here, it might be our shutdown process
+                // didn't wait for remote uploads to complete at all, as new tasks can forever
+                // be spawned.
+                //
+                // what is problematic is the shutting down of RemoteTimelineClient, because
+                // obviously it does not make sense to stop while we wait for it, but what
+                // about corner cases like s3 suddenly hanging up?
+                client.wait_completion().await
+            } else {
+                Ok(())
+            };
+
+            if let Err(e) = res {
+                warn!("failed to await for frozen and flushed uploads: {e:#}");
+            }
+        }
+    }
+
     pub fn set_state(&self, new_state: TimelineState) {
         match (self.current_state(), new_state) {
             (equal_state_1, equal_state_2) if equal_state_1 == equal_state_2 => {
@@ -2735,6 +2778,7 @@ impl Timeline {
         if disk_consistent_lsn != old_disk_consistent_lsn {
             assert!(disk_consistent_lsn > old_disk_consistent_lsn);
             self.update_metadata_file(disk_consistent_lsn, layer_paths_to_upload)
+                .await
                 .context("update_metadata_file")?;
             // Also update the in-memory copy
             self.disk_consistent_lsn.store(disk_consistent_lsn);
@@ -2743,7 +2787,7 @@ impl Timeline {
     }
 
     /// Update metadata file
-    fn update_metadata_file(
+    async fn update_metadata_file(
         &self,
         disk_consistent_lsn: Lsn,
         layer_paths_to_upload: HashMap<LayerFileName, LayerFileMetadata>,
@@ -2784,14 +2828,9 @@ impl Timeline {
             x.unwrap()
         ));
 
-        save_metadata(
-            self.conf,
-            &self.tenant_id,
-            &self.timeline_id,
-            &metadata,
-            false,
-        )
-        .context("save_metadata")?;
+        save_metadata(self.conf, &self.tenant_id, &self.timeline_id, &metadata)
+            .await
+            .context("save_metadata")?;
 
         if let Some(remote_client) = &self.remote_client {
             for (path, layer_metadata) in layer_paths_to_upload {
@@ -4122,7 +4161,8 @@ impl Timeline {
         if !layers_to_remove.is_empty() {
             // Persist the new GC cutoff value in the metadata file, before
             // we actually remove anything.
-            self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())?;
+            self.update_metadata_file(self.disk_consistent_lsn.load(), HashMap::new())
+                .await?;
 
             // Actually delete the layers from disk and remove them from the map.
             // (couldn't do this in the loop above, because you cannot modify a collection
@@ -4742,22 +4782,8 @@ mod tests {
         let harness =
             TenantHarness::create("two_layer_eviction_attempts_at_the_same_time").unwrap();
 
-        let remote_storage = {
-            // this is never used for anything, because of how the create_test_timeline works, but
-            // it is with us in spirit and a Some.
-            use remote_storage::{GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind};
-            let path = harness.conf.workdir.join("localfs");
-            std::fs::create_dir_all(&path).unwrap();
-            let config = RemoteStorageConfig {
-                max_concurrent_syncs: std::num::NonZeroUsize::new(2_000_000).unwrap(),
-                max_sync_errors: std::num::NonZeroU32::new(3_000_000).unwrap(),
-                storage: RemoteStorageKind::LocalFs(path),
-            };
-            GenericRemoteStorage::from_config(&config).unwrap()
-        };
-
         let ctx = any_context();
-        let tenant = harness.try_load(&ctx, Some(remote_storage)).await.unwrap();
+        let tenant = harness.try_load(&ctx).await.unwrap();
         let timeline = tenant
             .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
             .await
@@ -4807,22 +4833,8 @@ mod tests {
     async fn layer_eviction_aba_fails() {
         let harness = TenantHarness::create("layer_eviction_aba_fails").unwrap();
 
-        let remote_storage = {
-            // this is never used for anything, because of how the create_test_timeline works, but
-            // it is with us in spirit and a Some.
-            use remote_storage::{GenericRemoteStorage, RemoteStorageConfig, RemoteStorageKind};
-            let path = harness.conf.workdir.join("localfs");
-            std::fs::create_dir_all(&path).unwrap();
-            let config = RemoteStorageConfig {
-                max_concurrent_syncs: std::num::NonZeroUsize::new(2_000_000).unwrap(),
-                max_sync_errors: std::num::NonZeroU32::new(3_000_000).unwrap(),
-                storage: RemoteStorageKind::LocalFs(path),
-            };
-            GenericRemoteStorage::from_config(&config).unwrap()
-        };
-
         let ctx = any_context();
-        let tenant = harness.try_load(&ctx, Some(remote_storage)).await.unwrap();
+        let tenant = harness.try_load(&ctx).await.unwrap();
         let timeline = tenant
             .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx)
             .await
diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs
index a86b8fa2a6..41c5eb96cf 100644
--- a/pageserver/src/virtual_file.rs
+++ b/pageserver/src/virtual_file.rs
@@ -13,7 +13,7 @@
 use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME};
 use once_cell::sync::OnceCell;
 use std::fs::{self, File, OpenOptions};
-use std::io::{Error, ErrorKind, Read, Seek, SeekFrom, Write};
+use std::io::{Error, ErrorKind, Seek, SeekFrom, Write};
 use std::os::unix::fs::FileExt;
 use std::path::{Path, PathBuf};
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
@@ -172,6 +172,41 @@ impl OpenFiles {
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+pub enum CrashsafeOverwriteError {
+    #[error("final path has no parent dir")]
+    FinalPathHasNoParentDir,
+    #[error("remove tempfile: {0}")]
+    RemovePreviousTempfile(#[source] std::io::Error),
+    #[error("create tempfile: {0}")]
+    CreateTempfile(#[source] std::io::Error),
+    #[error("write tempfile: {0}")]
+    WriteContents(#[source] std::io::Error),
+    #[error("sync tempfile: {0}")]
+    SyncTempfile(#[source] std::io::Error),
+    #[error("rename tempfile to final path: {0}")]
+    RenameTempfileToFinalPath(#[source] std::io::Error),
+    #[error("open final path parent dir: {0}")]
+    OpenFinalPathParentDir(#[source] std::io::Error),
+    #[error("sync final path parent dir: {0}")]
+    SyncFinalPathParentDir(#[source] std::io::Error),
+}
+impl CrashsafeOverwriteError {
+    /// Returns true iff the new contents are durably stored.
+    pub fn are_new_contents_durable(&self) -> bool {
+        match self {
+            Self::FinalPathHasNoParentDir => false,
+            Self::RemovePreviousTempfile(_) => false,
+            Self::CreateTempfile(_) => false,
+            Self::WriteContents(_) => false,
+            Self::SyncTempfile(_) => false,
+            Self::RenameTempfileToFinalPath(_) => false,
+            Self::OpenFinalPathParentDir(_) => false,
+            Self::SyncFinalPathParentDir(_) => true,
+        }
+    }
+}
+
 impl VirtualFile {
     /// Open a file in read-only mode. Like File::open.
     pub fn open(path: &Path) -> Result<VirtualFile, std::io::Error> {
@@ -236,6 +271,56 @@ impl VirtualFile {
         Ok(vfile)
     }
 
+    /// Writes a file to the specified `final_path` in a crash safe fasion
+    ///
+    /// The file is first written to the specified tmp_path, and in a second
+    /// step, the tmp path is renamed to the final path. As renames are
+    /// atomic, a crash during the write operation will never leave behind a
+    /// partially written file.
+    pub async fn crashsafe_overwrite(
+        final_path: &Path,
+        tmp_path: &Path,
+        content: &[u8],
+    ) -> Result<(), CrashsafeOverwriteError> {
+        let Some(final_path_parent) = final_path.parent() else {
+            return Err(CrashsafeOverwriteError::FinalPathHasNoParentDir);
+        };
+        match std::fs::remove_file(tmp_path) {
+            Ok(()) => {}
+            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {}
+            Err(e) => return Err(CrashsafeOverwriteError::RemovePreviousTempfile(e)),
+        }
+        let mut file = Self::open_with_options(
+            tmp_path,
+            OpenOptions::new()
+                .write(true)
+                // Use `create_new` so that, if we race with ourselves or something else,
+                // we bail out instead of causing damage.
+                .create_new(true),
+        )
+        .map_err(CrashsafeOverwriteError::CreateTempfile)?;
+        file.write_all(content)
+            .map_err(CrashsafeOverwriteError::WriteContents)?;
+        file.sync_all()
+            .map_err(CrashsafeOverwriteError::SyncTempfile)?;
+        drop(file); // before the rename, that's important!
+                    // renames are atomic
+        std::fs::rename(tmp_path, final_path)
+            .map_err(CrashsafeOverwriteError::RenameTempfileToFinalPath)?;
+        // Only open final path parent dirfd now, so that this operation only
+        // ever holds one VirtualFile fd at a time.  That's important because
+        // the current `find_victim_slot` impl might pick the same slot for both
+        // VirtualFile., and it eventually does a blocking write lock instead of
+        // try_lock.
+        let final_parent_dirfd =
+            Self::open_with_options(final_path_parent, OpenOptions::new().read(true))
+                .map_err(CrashsafeOverwriteError::OpenFinalPathParentDir)?;
+        final_parent_dirfd
+            .sync_all()
+            .map_err(CrashsafeOverwriteError::SyncFinalPathParentDir)?;
+        Ok(())
+    }
+
     /// Call File::sync_all() on the underlying File.
     pub fn sync_all(&self) -> Result<(), Error> {
         self.with_file("fsync", |file| file.sync_all())?
@@ -321,54 +406,8 @@ impl VirtualFile {
         drop(self);
         std::fs::remove_file(path).expect("failed to remove the virtual file");
     }
-}
 
-impl Drop for VirtualFile {
-    /// If a VirtualFile is dropped, close the underlying file if it was open.
-    fn drop(&mut self) {
-        let handle = self.handle.get_mut().unwrap();
-
-        // We could check with a read-lock first, to avoid waiting on an
-        // unrelated I/O.
-        let slot = &get_open_files().slots[handle.index];
-        let mut slot_guard = slot.inner.write().unwrap();
-        if slot_guard.tag == handle.tag {
-            slot.recently_used.store(false, Ordering::Relaxed);
-            // there is also operation "close-by-replace" for closes done on eviction for
-            // comparison.
-            STORAGE_IO_TIME
-                .with_label_values(&["close"])
-                .observe_closure_duration(|| drop(slot_guard.file.take()));
-        }
-    }
-}
-
-impl Read for VirtualFile {
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
-        let pos = self.pos;
-        let n = self.read_at(buf, pos)?;
-        self.pos += n as u64;
-        Ok(n)
-    }
-}
-
-impl Write for VirtualFile {
-    fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
-        let pos = self.pos;
-        let n = self.write_at(buf, pos)?;
-        self.pos += n as u64;
-        Ok(n)
-    }
-
-    fn flush(&mut self) -> Result<(), std::io::Error> {
-        // flush is no-op for File (at least on unix), so we don't need to do
-        // anything here either.
-        Ok(())
-    }
-}
-
-impl Seek for VirtualFile {
-    fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
+    pub fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
         match pos {
             SeekFrom::Start(offset) => {
                 self.pos = offset;
@@ -392,10 +431,66 @@ impl Seek for VirtualFile {
         }
         Ok(self.pos)
     }
-}
 
-impl FileExt for VirtualFile {
-    fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
+    #[cfg(test)]
+    async fn read_to_end(&mut self, buf: &mut Vec<u8>) -> Result<(), Error> {
+        loop {
+            let mut tmp = [0; 128];
+            match self.read_at(&mut tmp, self.pos).await {
+                Ok(0) => return Ok(()),
+                Ok(n) => {
+                    self.pos += n as u64;
+                    buf.extend_from_slice(&tmp[..n]);
+                }
+                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
+                Err(e) => return Err(e),
+            }
+        }
+    }
+
+    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#117-135
+    pub async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> Result<(), Error> {
+        while !buf.is_empty() {
+            match self.read_at(buf, offset).await {
+                Ok(0) => {
+                    return Err(Error::new(
+                        std::io::ErrorKind::UnexpectedEof,
+                        "failed to fill whole buffer",
+                    ))
+                }
+                Ok(n) => {
+                    buf = &mut buf[n..];
+                    offset += n as u64;
+                }
+                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
+                Err(e) => return Err(e),
+            }
+        }
+        Ok(())
+    }
+
+    // Copied from https://doc.rust-lang.org/1.72.0/src/std/os/unix/fs.rs.html#219-235
+    pub async fn write_all_at(&self, mut buf: &[u8], mut offset: u64) -> Result<(), Error> {
+        while !buf.is_empty() {
+            match self.write_at(buf, offset) {
+                Ok(0) => {
+                    return Err(Error::new(
+                        std::io::ErrorKind::WriteZero,
+                        "failed to write whole buffer",
+                    ));
+                }
+                Ok(n) => {
+                    buf = &buf[n..];
+                    offset += n as u64;
+                }
+                Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => {}
+                Err(e) => return Err(e),
+            }
+        }
+        Ok(())
+    }
+
+    pub async fn read_at(&self, buf: &mut [u8], offset: u64) -> Result<usize, Error> {
         let result = self.with_file("read", |file| file.read_at(buf, offset))?;
         if let Ok(size) = result {
             STORAGE_IO_SIZE
@@ -405,7 +500,7 @@ impl FileExt for VirtualFile {
         result
     }
 
-    fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
+    pub fn write_at(&self, buf: &[u8], offset: u64) -> Result<usize, Error> {
         let result = self.with_file("write", |file| file.write_at(buf, offset))?;
         if let Ok(size) = result {
             STORAGE_IO_SIZE
@@ -416,6 +511,41 @@ impl FileExt for VirtualFile {
     }
 }
 
+impl Drop for VirtualFile {
+    /// If a VirtualFile is dropped, close the underlying file if it was open.
+    fn drop(&mut self) {
+        let handle = self.handle.get_mut().unwrap();
+
+        // We could check with a read-lock first, to avoid waiting on an
+        // unrelated I/O.
+        let slot = &get_open_files().slots[handle.index];
+        let mut slot_guard = slot.inner.write().unwrap();
+        if slot_guard.tag == handle.tag {
+            slot.recently_used.store(false, Ordering::Relaxed);
+            // there is also operation "close-by-replace" for closes done on eviction for
+            // comparison.
+            STORAGE_IO_TIME
+                .with_label_values(&["close"])
+                .observe_closure_duration(|| drop(slot_guard.file.take()));
+        }
+    }
+}
+
+impl Write for VirtualFile {
+    fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
+        let pos = self.pos;
+        let n = self.write_at(buf, pos)?;
+        self.pos += n as u64;
+        Ok(n)
+    }
+
+    fn flush(&mut self) -> Result<(), std::io::Error> {
+        // flush is no-op for File (at least on unix), so we don't need to do
+        // anything here either.
+        Ok(())
+    }
+}
+
 impl OpenFiles {
     fn new(num_slots: usize) -> OpenFiles {
         let mut slots = Box::new(Vec::with_capacity(num_slots));
@@ -470,32 +600,66 @@ mod tests {
     use rand::thread_rng;
     use rand::Rng;
     use std::sync::Arc;
-    use std::thread;
 
-    // Helper function to slurp contents of a file, starting at the current position,
-    // into a string
-    fn read_string<FD>(vfile: &mut FD) -> Result<String, Error>
-    where
-        FD: Read,
-    {
-        let mut buf = String::new();
-        vfile.read_to_string(&mut buf)?;
-        Ok(buf)
+    enum MaybeVirtualFile {
+        VirtualFile(VirtualFile),
+        File(File),
     }
 
-    // Helper function to slurp a portion of a file into a string
-    fn read_string_at<FD>(vfile: &mut FD, pos: u64, len: usize) -> Result<String, Error>
-    where
-        FD: FileExt,
-    {
-        let mut buf = Vec::new();
-        buf.resize(len, 0);
-        vfile.read_exact_at(&mut buf, pos)?;
-        Ok(String::from_utf8(buf).unwrap())
+    impl MaybeVirtualFile {
+        async fn read_exact_at(&self, buf: &mut [u8], offset: u64) -> Result<(), Error> {
+            match self {
+                MaybeVirtualFile::VirtualFile(file) => file.read_exact_at(buf, offset).await,
+                MaybeVirtualFile::File(file) => file.read_exact_at(buf, offset),
+            }
+        }
+        async fn write_all_at(&self, buf: &[u8], offset: u64) -> Result<(), Error> {
+            match self {
+                MaybeVirtualFile::VirtualFile(file) => file.write_all_at(buf, offset).await,
+                MaybeVirtualFile::File(file) => file.write_all_at(buf, offset),
+            }
+        }
+        fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
+            match self {
+                MaybeVirtualFile::VirtualFile(file) => file.seek(pos),
+                MaybeVirtualFile::File(file) => file.seek(pos),
+            }
+        }
+        async fn write_all(&mut self, buf: &[u8]) -> Result<(), Error> {
+            match self {
+                MaybeVirtualFile::VirtualFile(file) => file.write_all(buf),
+                MaybeVirtualFile::File(file) => file.write_all(buf),
+            }
+        }
+
+        // Helper function to slurp contents of a file, starting at the current position,
+        // into a string
+        async fn read_string(&mut self) -> Result<String, Error> {
+            use std::io::Read;
+            let mut buf = String::new();
+            match self {
+                MaybeVirtualFile::VirtualFile(file) => {
+                    let mut buf = Vec::new();
+                    file.read_to_end(&mut buf).await?;
+                    return Ok(String::from_utf8(buf).unwrap());
+                }
+                MaybeVirtualFile::File(file) => {
+                    file.read_to_string(&mut buf)?;
+                }
+            }
+            Ok(buf)
+        }
+
+        // Helper function to slurp a portion of a file into a string
+        async fn read_string_at(&mut self, pos: u64, len: usize) -> Result<String, Error> {
+            let mut buf = vec![0; len];
+            self.read_exact_at(&mut buf, pos).await?;
+            Ok(String::from_utf8(buf).unwrap())
+        }
     }
 
-    #[test]
-    fn test_virtual_files() -> Result<(), Error> {
+    #[tokio::test]
+    async fn test_virtual_files() -> Result<(), Error> {
         // The real work is done in the test_files() helper function. This
         // allows us to run the same set of tests against a native File, and
         // VirtualFile. We trust the native Files and wouldn't need to test them,
@@ -504,21 +668,23 @@ mod tests {
         // native files, you will run out of file descriptors if the ulimit
         // is low enough.)
         test_files("virtual_files", |path, open_options| {
-            VirtualFile::open_with_options(path, open_options)
+            let vf = VirtualFile::open_with_options(path, open_options)?;
+            Ok(MaybeVirtualFile::VirtualFile(vf))
         })
+        .await
     }
 
-    #[test]
-    fn test_physical_files() -> Result<(), Error> {
+    #[tokio::test]
+    async fn test_physical_files() -> Result<(), Error> {
         test_files("physical_files", |path, open_options| {
-            open_options.open(path)
+            Ok(MaybeVirtualFile::File(open_options.open(path)?))
         })
+        .await
     }
 
-    fn test_files<OF, FD>(testname: &str, openfunc: OF) -> Result<(), Error>
+    async fn test_files<OF>(testname: &str, openfunc: OF) -> Result<(), Error>
     where
-        FD: Read + Write + Seek + FileExt,
-        OF: Fn(&Path, &OpenOptions) -> Result<FD, std::io::Error>,
+        OF: Fn(&Path, &OpenOptions) -> Result<MaybeVirtualFile, std::io::Error>,
     {
         let testdir = crate::config::PageServerConf::test_repo_dir(testname);
         std::fs::create_dir_all(&testdir)?;
@@ -528,36 +694,36 @@ mod tests {
             &path_a,
             OpenOptions::new().write(true).create(true).truncate(true),
         )?;
-        file_a.write_all(b"foobar")?;
+        file_a.write_all(b"foobar").await?;
 
         // cannot read from a file opened in write-only mode
-        assert!(read_string(&mut file_a).is_err());
+        let _ = file_a.read_string().await.unwrap_err();
 
         // Close the file and re-open for reading
         let mut file_a = openfunc(&path_a, OpenOptions::new().read(true))?;
 
         // cannot write to a file opened in read-only mode
-        assert!(file_a.write(b"bar").is_err());
+        let _ = file_a.write_all(b"bar").await.unwrap_err();
 
         // Try simple read
-        assert_eq!("foobar", read_string(&mut file_a)?);
+        assert_eq!("foobar", file_a.read_string().await?);
 
         // It's positioned at the EOF now.
-        assert_eq!("", read_string(&mut file_a)?);
+        assert_eq!("", file_a.read_string().await?);
 
         // Test seeks.
         assert_eq!(file_a.seek(SeekFrom::Start(1))?, 1);
-        assert_eq!("oobar", read_string(&mut file_a)?);
+        assert_eq!("oobar", file_a.read_string().await?);
 
         assert_eq!(file_a.seek(SeekFrom::End(-2))?, 4);
-        assert_eq!("ar", read_string(&mut file_a)?);
+        assert_eq!("ar", file_a.read_string().await?);
 
         assert_eq!(file_a.seek(SeekFrom::Start(1))?, 1);
         assert_eq!(file_a.seek(SeekFrom::Current(2))?, 3);
-        assert_eq!("bar", read_string(&mut file_a)?);
+        assert_eq!("bar", file_a.read_string().await?);
 
         assert_eq!(file_a.seek(SeekFrom::Current(-5))?, 1);
-        assert_eq!("oobar", read_string(&mut file_a)?);
+        assert_eq!("oobar", file_a.read_string().await?);
 
         // Test erroneous seeks to before byte 0
         assert!(file_a.seek(SeekFrom::End(-7)).is_err());
@@ -565,7 +731,7 @@ mod tests {
         assert!(file_a.seek(SeekFrom::Current(-2)).is_err());
 
         // the erroneous seek should have left the position unchanged
-        assert_eq!("oobar", read_string(&mut file_a)?);
+        assert_eq!("oobar", file_a.read_string().await?);
 
         // Create another test file, and try FileExt functions on it.
         let path_b = testdir.join("file_b");
@@ -577,10 +743,10 @@ mod tests {
                 .create(true)
                 .truncate(true),
         )?;
-        file_b.write_all_at(b"BAR", 3)?;
-        file_b.write_all_at(b"FOO", 0)?;
+        file_b.write_all_at(b"BAR", 3).await?;
+        file_b.write_all_at(b"FOO", 0).await?;
 
-        assert_eq!(read_string_at(&mut file_b, 2, 3)?, "OBA");
+        assert_eq!(file_b.read_string_at(2, 3).await?, "OBA");
 
         // Open a lot of files, enough to cause some evictions. (Or to be precise,
         // open the same file many times. The effect is the same.)
@@ -591,7 +757,7 @@ mod tests {
         let mut vfiles = Vec::new();
         for _ in 0..100 {
             let mut vfile = openfunc(&path_b, OpenOptions::new().read(true))?;
-            assert_eq!("FOOBAR", read_string(&mut vfile)?);
+            assert_eq!("FOOBAR", vfile.read_string().await?);
             vfiles.push(vfile);
         }
 
@@ -600,13 +766,13 @@ mod tests {
 
         // The underlying file descriptor for 'file_a' should be closed now. Try to read
         // from it again. We left the file positioned at offset 1 above.
-        assert_eq!("oobar", read_string(&mut file_a)?);
+        assert_eq!("oobar", file_a.read_string().await?);
 
         // Check that all the other FDs still work too. Use them in random order for
         // good measure.
         vfiles.as_mut_slice().shuffle(&mut thread_rng());
         for vfile in vfiles.iter_mut() {
-            assert_eq!("OOBAR", read_string_at(vfile, 1, 5)?);
+            assert_eq!("OOBAR", vfile.read_string_at(1, 5).await?);
         }
 
         Ok(())
@@ -641,28 +807,22 @@ mod tests {
         let files = Arc::new(files);
 
         // Launch many threads, and use the virtual files concurrently in random order.
-        let mut threads = Vec::new();
-        for threadno in 0..THREADS {
-            let builder =
-                thread::Builder::new().name(format!("test_vfile_concurrency thread {}", threadno));
-
+        let rt = tokio::runtime::Builder::new_multi_thread()
+            .worker_threads(THREADS)
+            .thread_name("test_vfile_concurrency thread")
+            .build()
+            .unwrap();
+        for _threadno in 0..THREADS {
             let files = files.clone();
-            let thread = builder
-                .spawn(move || {
-                    let mut buf = [0u8; SIZE];
-                    let mut rng = rand::thread_rng();
-                    for _ in 1..1000 {
-                        let f = &files[rng.gen_range(0..files.len())];
-                        f.read_exact_at(&mut buf, 0).unwrap();
-                        assert!(buf == SAMPLE);
-                    }
-                })
-                .unwrap();
-            threads.push(thread);
-        }
-
-        for thread in threads {
-            thread.join().unwrap();
+            rt.spawn(async move {
+                let mut buf = [0u8; SIZE];
+                let mut rng = rand::rngs::OsRng;
+                for _ in 1..1000 {
+                    let f = &files[rng.gen_range(0..files.len())];
+                    f.read_exact_at(&mut buf, 0).await.unwrap();
+                    assert!(buf == SAMPLE);
+                }
+            });
         }
 
         Ok(())
diff --git a/proxy/src/console/provider.rs b/proxy/src/console/provider.rs
index 37190c76b8..7d587ff1ec 100644
--- a/proxy/src/console/provider.rs
+++ b/proxy/src/console/provider.rs
@@ -63,8 +63,8 @@ pub mod errors {
                         format!("{REQUEST_FAILED}: endpoint is disabled")
                     }
                     http::StatusCode::LOCKED => {
-                        // Status 423: project might be in maintenance mode (or bad state).
-                        format!("{REQUEST_FAILED}: endpoint is temporary unavailable")
+                        // Status 423: project might be in maintenance mode (or bad state), or quotas exceeded.
+                        format!("{REQUEST_FAILED}: endpoint is temporary unavailable. check your quotas and/or contact our support")
                     }
                     _ => REQUEST_FAILED.to_owned(),
                 },
@@ -81,9 +81,15 @@ pub mod errors {
                 // retry some temporary failures because the compute was in a bad state
                 // (bad request can be returned when the endpoint was in transition)
                 Self::Console {
-                    status: http::StatusCode::BAD_REQUEST | http::StatusCode::LOCKED,
+                    status: http::StatusCode::BAD_REQUEST,
                     ..
                 } => true,
+                // locked can be returned when the endpoint was in transition
+                // or when quotas are exceeded. don't retry when quotas are exceeded
+                Self::Console {
+                    status: http::StatusCode::LOCKED,
+                    ref text,
+                } => !text.contains("quota"),
                 // retry server errors
                 Self::Console { status, .. } if status.is_server_error() => true,
                 _ => false,
diff --git a/proxy/src/console/provider/neon.rs b/proxy/src/console/provider/neon.rs
index 3322d5a5be..163cdfffc0 100644
--- a/proxy/src/console/provider/neon.rs
+++ b/proxy/src/console/provider/neon.rs
@@ -8,6 +8,7 @@ use super::{
 use crate::{auth::ClientCredentials, compute, http, scram};
 use async_trait::async_trait;
 use futures::TryFutureExt;
+use std::net::SocketAddr;
 use tokio::time::Instant;
 use tokio_postgres::config::SslMode;
 use tracing::{error, info, info_span, warn, Instrument};
@@ -117,7 +118,7 @@ impl Api {
             // We'll set username and such later using the startup message.
             // TODO: add more type safety (in progress).
             let mut config = compute::ConnCfg::new();
-            config.host(host).port(port).ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes.
+            config.host(&host).port(port).ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes.
 
             let node = NodeInfo {
                 config,
@@ -194,9 +195,9 @@ async fn parse_body<T: for<'a> serde::Deserialize<'a>>(
     Err(ApiError::Console { status, text })
 }
 
-fn parse_host_port(input: &str) -> Option<(&str, u16)> {
-    let (host, port) = input.split_once(':')?;
-    Some((host, port.parse().ok()?))
+fn parse_host_port(input: &str) -> Option<(String, u16)> {
+    let parsed: SocketAddr = input.parse().ok()?;
+    Some((parsed.ip().to_string(), parsed.port()))
 }
 
 #[cfg(test)]
diff --git a/proxy/src/http/websocket.rs b/proxy/src/http/websocket.rs
index 72ae3dc26f..fa66df0469 100644
--- a/proxy/src/http/websocket.rs
+++ b/proxy/src/http/websocket.rs
@@ -2,6 +2,7 @@ use crate::{
     cancellation::CancelMap,
     config::ProxyConfig,
     error::io_error,
+    protocol2::{ProxyProtocolAccept, WithClientIp},
     proxy::{handle_client, ClientMode},
 };
 use bytes::{Buf, Bytes};
@@ -292,6 +293,9 @@ pub async fn task_main(
 
     let mut addr_incoming = AddrIncoming::from_listener(ws_listener)?;
     let _ = addr_incoming.set_nodelay(true);
+    let addr_incoming = ProxyProtocolAccept {
+        incoming: addr_incoming,
+    };
 
     let tls_listener = TlsListener::new(tls_acceptor, addr_incoming).filter(|conn| {
         if let Err(err) = conn {
@@ -302,9 +306,11 @@ pub async fn task_main(
         }
     });
 
-    let make_svc =
-        hyper::service::make_service_fn(|stream: &tokio_rustls::server::TlsStream<AddrStream>| {
-            let sni_name = stream.get_ref().1.server_name().map(|s| s.to_string());
+    let make_svc = hyper::service::make_service_fn(
+        |stream: &tokio_rustls::server::TlsStream<WithClientIp<AddrStream>>| {
+            let (io, tls) = stream.get_ref();
+            let peer_addr = io.client_addr().unwrap_or(io.inner.remote_addr());
+            let sni_name = tls.server_name().map(|s| s.to_string());
             let conn_pool = conn_pool.clone();
 
             async move {
@@ -319,13 +325,15 @@ pub async fn task_main(
                         ws_handler(req, config, conn_pool, cancel_map, session_id, sni_name)
                             .instrument(info_span!(
                                 "ws-client",
-                                session = %session_id
+                                session = %session_id,
+                                %peer_addr,
                             ))
                             .await
                     }
                 }))
             }
-        });
+        },
+    );
 
     hyper::Server::builder(accept::from_stream(tls_listener))
         .serve(make_svc)
diff --git a/proxy/src/lib.rs b/proxy/src/lib.rs
index 1e1e216bb7..a3d1cdd3c8 100644
--- a/proxy/src/lib.rs
+++ b/proxy/src/lib.rs
@@ -16,6 +16,7 @@ pub mod http;
 pub mod logging;
 pub mod metrics;
 pub mod parse;
+pub mod protocol2;
 pub mod proxy;
 pub mod sasl;
 pub mod scram;
diff --git a/proxy/src/protocol2.rs b/proxy/src/protocol2.rs
new file mode 100644
index 0000000000..1d8931be85
--- /dev/null
+++ b/proxy/src/protocol2.rs
@@ -0,0 +1,479 @@
+//! Proxy Protocol V2 implementation
+
+use std::{
+    future::poll_fn,
+    future::Future,
+    io,
+    net::SocketAddr,
+    pin::{pin, Pin},
+    task::{ready, Context, Poll},
+};
+
+use bytes::{Buf, BytesMut};
+use hyper::server::conn::{AddrIncoming, AddrStream};
+use pin_project_lite::pin_project;
+use tls_listener::AsyncAccept;
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, ReadBuf};
+
+pub struct ProxyProtocolAccept {
+    pub incoming: AddrIncoming,
+}
+
+pin_project! {
+    pub struct WithClientIp<T> {
+        #[pin]
+        pub inner: T,
+        buf: BytesMut,
+        tlv_bytes: u16,
+        state: ProxyParse,
+    }
+}
+
+#[derive(Clone, PartialEq, Debug)]
+enum ProxyParse {
+    NotStarted,
+
+    Finished(SocketAddr),
+    None,
+}
+
+impl<T: AsyncWrite> AsyncWrite for WithClientIp<T> {
+    #[inline]
+    fn poll_write(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &[u8],
+    ) -> Poll<Result<usize, io::Error>> {
+        self.project().inner.poll_write(cx, buf)
+    }
+
+    #[inline]
+    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
+        self.project().inner.poll_flush(cx)
+    }
+
+    #[inline]
+    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), io::Error>> {
+        self.project().inner.poll_shutdown(cx)
+    }
+
+    #[inline]
+    fn poll_write_vectored(
+        self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        bufs: &[io::IoSlice<'_>],
+    ) -> Poll<Result<usize, io::Error>> {
+        self.project().inner.poll_write_vectored(cx, bufs)
+    }
+
+    #[inline]
+    fn is_write_vectored(&self) -> bool {
+        self.inner.is_write_vectored()
+    }
+}
+
+impl<T> WithClientIp<T> {
+    pub fn new(inner: T) -> Self {
+        WithClientIp {
+            inner,
+            buf: BytesMut::with_capacity(128),
+            tlv_bytes: 0,
+            state: ProxyParse::NotStarted,
+        }
+    }
+
+    pub fn client_addr(&self) -> Option<SocketAddr> {
+        match self.state {
+            ProxyParse::Finished(socket) => Some(socket),
+            _ => None,
+        }
+    }
+}
+
+impl<T: AsyncRead + Unpin> WithClientIp<T> {
+    pub async fn wait_for_addr(&mut self) -> io::Result<Option<SocketAddr>> {
+        match self.state {
+            ProxyParse::NotStarted => {
+                let mut pin = Pin::new(&mut *self);
+                let addr = poll_fn(|cx| pin.as_mut().poll_client_ip(cx)).await?;
+                match addr {
+                    Some(addr) => self.state = ProxyParse::Finished(addr),
+                    None => self.state = ProxyParse::None,
+                }
+                Ok(addr)
+            }
+            ProxyParse::Finished(addr) => Ok(Some(addr)),
+            ProxyParse::None => Ok(None),
+        }
+    }
+}
+
+/// Proxy Protocol Version 2 Header
+const HEADER: [u8; 12] = [
+    0x0D, 0x0A, 0x0D, 0x0A, 0x00, 0x0D, 0x0A, 0x51, 0x55, 0x49, 0x54, 0x0A,
+];
+
+impl<T: AsyncRead> WithClientIp<T> {
+    /// implementation of <https://www.haproxy.org/download/2.4/doc/proxy-protocol.txt>
+    /// Version 2 (Binary Format)
+    fn poll_client_ip(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<io::Result<Option<SocketAddr>>> {
+        // The binary header format starts with a constant 12 bytes block containing the protocol signature :
+        //    \x0D \x0A \x0D \x0A \x00 \x0D \x0A \x51 \x55 \x49 \x54 \x0A
+        while self.buf.len() < 16 {
+            let mut this = self.as_mut().project();
+            let bytes_read = pin!(this.inner.read_buf(this.buf)).poll(cx)?;
+
+            // exit for bad header
+            let len = usize::min(self.buf.len(), HEADER.len());
+            if self.buf[..len] != HEADER[..len] {
+                return Poll::Ready(Ok(None));
+            }
+
+            // if no more bytes available then exit
+            if ready!(bytes_read) == 0 {
+                return Poll::Ready(Ok(None));
+            };
+        }
+
+        // The next byte (the 13th one) is the protocol version and command.
+        // The highest four bits contains the version. As of this specification, it must
+        // always be sent as \x2 and the receiver must only accept this value.
+        let vc = self.buf[12];
+        let version = vc >> 4;
+        let command = vc & 0b1111;
+        if version != 2 {
+            return Poll::Ready(Err(io::Error::new(
+                io::ErrorKind::Other,
+                "invalid proxy protocol version. expected version 2",
+            )));
+        }
+        match command {
+            // the connection was established on purpose by the proxy
+            // without being relayed. The connection endpoints are the sender and the
+            // receiver. Such connections exist when the proxy sends health-checks to the
+            // server. The receiver must accept this connection as valid and must use the
+            // real connection endpoints and discard the protocol block including the
+            // family which is ignored.
+            0 => {}
+            // the connection was established on behalf of another node,
+            // and reflects the original connection endpoints. The receiver must then use
+            // the information provided in the protocol block to get original the address.
+            1 => {}
+            // other values are unassigned and must not be emitted by senders. Receivers
+            // must drop connections presenting unexpected values here.
+            _ => {
+                return Poll::Ready(Err(io::Error::new(
+                    io::ErrorKind::Other,
+                    "invalid proxy protocol command. expected local (0) or proxy (1)",
+                )))
+            }
+        };
+
+        // The 14th byte contains the transport protocol and address family. The highest 4
+        // bits contain the address family, the lowest 4 bits contain the protocol.
+        let ft = self.buf[13];
+        let address_length = match ft {
+            // - \x11 : TCP over IPv4 : the forwarded connection uses TCP over the AF_INET
+            //   protocol family. Address length is 2*4 + 2*2 = 12 bytes.
+            // - \x12 : UDP over IPv4 : the forwarded connection uses UDP over the AF_INET
+            //   protocol family. Address length is 2*4 + 2*2 = 12 bytes.
+            0x11 | 0x12 => 12,
+            // - \x21 : TCP over IPv6 : the forwarded connection uses TCP over the AF_INET6
+            //   protocol family. Address length is 2*16 + 2*2 = 36 bytes.
+            // - \x22 : UDP over IPv6 : the forwarded connection uses UDP over the AF_INET6
+            //   protocol family. Address length is 2*16 + 2*2 = 36 bytes.
+            0x21 | 0x22 => 36,
+            // unspecified or unix stream. ignore the addresses
+            _ => 0,
+        };
+
+        // The 15th and 16th bytes is the address length in bytes in network endian order.
+        // It is used so that the receiver knows how many address bytes to skip even when
+        // it does not implement the presented protocol. Thus the length of the protocol
+        // header in bytes is always exactly 16 + this value. When a sender presents a
+        // LOCAL connection, it should not present any address so it sets this field to
+        // zero. Receivers MUST always consider this field to skip the appropriate number
+        // of bytes and must not assume zero is presented for LOCAL connections. When a
+        // receiver accepts an incoming connection showing an UNSPEC address family or
+        // protocol, it may or may not decide to log the address information if present.
+        let remaining_length = u16::from_be_bytes(self.buf[14..16].try_into().unwrap());
+        if remaining_length < address_length {
+            return Poll::Ready(Err(io::Error::new(
+                io::ErrorKind::Other,
+                "invalid proxy protocol length. not enough to fit requested IP addresses",
+            )));
+        }
+
+        while self.buf.len() < 16 + address_length as usize {
+            let mut this = self.as_mut().project();
+            if ready!(pin!(this.inner.read_buf(this.buf)).poll(cx)?) == 0 {
+                return Poll::Ready(Err(io::Error::new(
+                    io::ErrorKind::UnexpectedEof,
+                    "stream closed while waiting for proxy protocol addresses",
+                )));
+            }
+        }
+
+        let this = self.as_mut().project();
+
+        // we are sure this is a proxy protocol v2 entry and we have read all the bytes we need
+        // discard the header we have parsed
+        this.buf.advance(16);
+
+        // Starting from the 17th byte, addresses are presented in network byte order.
+        // The address order is always the same :
+        //   - source layer 3 address in network byte order
+        //   - destination layer 3 address in network byte order
+        //   - source layer 4 address if any, in network byte order (port)
+        //   - destination layer 4 address if any, in network byte order (port)
+        let addresses = this.buf.split_to(address_length as usize);
+        let socket = match address_length {
+            12 => {
+                let src_addr: [u8; 4] = addresses[0..4].try_into().unwrap();
+                let src_port = u16::from_be_bytes(addresses[8..10].try_into().unwrap());
+                Some(SocketAddr::from((src_addr, src_port)))
+            }
+            36 => {
+                let src_addr: [u8; 16] = addresses[0..16].try_into().unwrap();
+                let src_port = u16::from_be_bytes(addresses[32..34].try_into().unwrap());
+                Some(SocketAddr::from((src_addr, src_port)))
+            }
+            _ => None,
+        };
+
+        *this.tlv_bytes = remaining_length - address_length;
+        self.as_mut().skip_tlv_inner();
+
+        Poll::Ready(Ok(socket))
+    }
+
+    #[cold]
+    fn read_ip(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        let ip = ready!(self.as_mut().poll_client_ip(cx)?);
+        match ip {
+            Some(x) => *self.as_mut().project().state = ProxyParse::Finished(x),
+            None => *self.as_mut().project().state = ProxyParse::None,
+        }
+        Poll::Ready(Ok(()))
+    }
+
+    #[cold]
+    fn skip_tlv(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<io::Result<()>> {
+        let mut this = self.as_mut().project();
+        // we know that this.buf is empty
+        debug_assert_eq!(this.buf.len(), 0);
+
+        this.buf.reserve((*this.tlv_bytes).clamp(0, 1024) as usize);
+        ready!(pin!(this.inner.read_buf(this.buf)).poll(cx)?);
+        self.skip_tlv_inner();
+
+        Poll::Ready(Ok(()))
+    }
+
+    fn skip_tlv_inner(self: Pin<&mut Self>) {
+        let tlv_bytes_read = match u16::try_from(self.buf.len()) {
+            // we read more than u16::MAX therefore we must have read the full tlv_bytes
+            Err(_) => self.tlv_bytes,
+            // we might not have read the full tlv bytes yet
+            Ok(n) => u16::min(n, self.tlv_bytes),
+        };
+        let this = self.project();
+        *this.tlv_bytes -= tlv_bytes_read;
+        this.buf.advance(tlv_bytes_read as usize);
+    }
+}
+
+impl<T: AsyncRead> AsyncRead for WithClientIp<T> {
+    #[inline]
+    fn poll_read(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        buf: &mut ReadBuf<'_>,
+    ) -> Poll<io::Result<()>> {
+        // I'm assuming these 3 comparisons will be easy to branch predict.
+        // especially with the cold attributes
+        // which should make this read wrapper almost invisible
+
+        if let ProxyParse::NotStarted = self.state {
+            ready!(self.as_mut().read_ip(cx)?);
+        }
+
+        while self.tlv_bytes > 0 {
+            ready!(self.as_mut().skip_tlv(cx)?)
+        }
+
+        let this = self.project();
+        if this.buf.is_empty() {
+            this.inner.poll_read(cx, buf)
+        } else {
+            // we know that tlv_bytes is 0
+            debug_assert_eq!(*this.tlv_bytes, 0);
+
+            let write = usize::min(this.buf.len(), buf.remaining());
+            let slice = this.buf.split_to(write).freeze();
+            buf.put_slice(&slice);
+
+            // reset the allocation so it can be freed
+            if this.buf.is_empty() {
+                *this.buf = BytesMut::new();
+            }
+
+            Poll::Ready(Ok(()))
+        }
+    }
+}
+
+impl AsyncAccept for ProxyProtocolAccept {
+    type Connection = WithClientIp<AddrStream>;
+
+    type Error = io::Error;
+
+    fn poll_accept(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+    ) -> Poll<Option<Result<Self::Connection, Self::Error>>> {
+        let conn = ready!(Pin::new(&mut self.incoming).poll_accept(cx)?);
+        let Some(conn) = conn else {
+            return Poll::Ready(None);
+        };
+
+        Poll::Ready(Some(Ok(WithClientIp::new(conn))))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::pin::pin;
+
+    use tokio::io::AsyncReadExt;
+
+    use crate::protocol2::{ProxyParse, WithClientIp};
+
+    #[tokio::test]
+    async fn test_ipv4() {
+        let header = super::HEADER
+            // Proxy command, IPV4 | TCP
+            .chain([(2 << 4) | 1, (1 << 4) | 1].as_slice())
+            // 12 + 3 bytes
+            .chain([0, 15].as_slice())
+            // src ip
+            .chain([127, 0, 0, 1].as_slice())
+            // dst ip
+            .chain([192, 168, 0, 1].as_slice())
+            // src port
+            .chain([255, 255].as_slice())
+            // dst port
+            .chain([1, 1].as_slice())
+            // TLV
+            .chain([1, 2, 3].as_slice());
+
+        let extra_data = [0x55; 256];
+
+        let mut read = pin!(WithClientIp::new(header.chain(extra_data.as_slice())));
+
+        let mut bytes = vec![];
+        read.read_to_end(&mut bytes).await.unwrap();
+
+        assert_eq!(bytes, extra_data);
+        assert_eq!(
+            read.state,
+            ProxyParse::Finished(([127, 0, 0, 1], 65535).into())
+        );
+    }
+
+    #[tokio::test]
+    async fn test_ipv6() {
+        let header = super::HEADER
+            // Proxy command, IPV6 | UDP
+            .chain([(2 << 4) | 1, (2 << 4) | 2].as_slice())
+            // 36 + 3 bytes
+            .chain([0, 39].as_slice())
+            // src ip
+            .chain([15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0].as_slice())
+            // dst ip
+            .chain([0, 15, 1, 14, 2, 13, 3, 12, 4, 11, 5, 10, 6, 9, 7, 8].as_slice())
+            // src port
+            .chain([1, 1].as_slice())
+            // dst port
+            .chain([255, 255].as_slice())
+            // TLV
+            .chain([1, 2, 3].as_slice());
+
+        let extra_data = [0x55; 256];
+
+        let mut read = pin!(WithClientIp::new(header.chain(extra_data.as_slice())));
+
+        let mut bytes = vec![];
+        read.read_to_end(&mut bytes).await.unwrap();
+
+        assert_eq!(bytes, extra_data);
+        assert_eq!(
+            read.state,
+            ProxyParse::Finished(
+                ([15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], 257).into()
+            )
+        );
+    }
+
+    #[tokio::test]
+    async fn test_invalid() {
+        let data = [0x55; 256];
+
+        let mut read = pin!(WithClientIp::new(data.as_slice()));
+
+        let mut bytes = vec![];
+        read.read_to_end(&mut bytes).await.unwrap();
+        assert_eq!(bytes, data);
+        assert_eq!(read.state, ProxyParse::None);
+    }
+
+    #[tokio::test]
+    async fn test_short() {
+        let data = [0x55; 10];
+
+        let mut read = pin!(WithClientIp::new(data.as_slice()));
+
+        let mut bytes = vec![];
+        read.read_to_end(&mut bytes).await.unwrap();
+        assert_eq!(bytes, data);
+        assert_eq!(read.state, ProxyParse::None);
+    }
+
+    #[tokio::test]
+    async fn test_large_tlv() {
+        let tlv = vec![0x55; 32768];
+        let len = (12 + tlv.len() as u16).to_be_bytes();
+
+        let header = super::HEADER
+            // Proxy command, Inet << 4 | Stream
+            .chain([(2 << 4) | 1, (1 << 4) | 1].as_slice())
+            // 12 + 3 bytes
+            .chain(len.as_slice())
+            // src ip
+            .chain([55, 56, 57, 58].as_slice())
+            // dst ip
+            .chain([192, 168, 0, 1].as_slice())
+            // src port
+            .chain([255, 255].as_slice())
+            // dst port
+            .chain([1, 1].as_slice())
+            // TLV
+            .chain(tlv.as_slice());
+
+        let extra_data = [0xaa; 256];
+
+        let mut read = pin!(WithClientIp::new(header.chain(extra_data.as_slice())));
+
+        let mut bytes = vec![];
+        read.read_to_end(&mut bytes).await.unwrap();
+
+        assert_eq!(bytes, extra_data);
+        assert_eq!(
+            read.state,
+            ProxyParse::Finished(([55, 56, 57, 58], 65535).into())
+        );
+    }
+}
diff --git a/proxy/src/proxy.rs b/proxy/src/proxy.rs
index 0267d767ee..66ce2e5fd0 100644
--- a/proxy/src/proxy.rs
+++ b/proxy/src/proxy.rs
@@ -7,6 +7,7 @@ use crate::{
     compute::{self, PostgresConnection},
     config::{ProxyConfig, TlsConfig},
     console::{self, errors::WakeComputeError, messages::MetricsAuxInfo, Api},
+    protocol2::WithClientIp,
     stream::{PqStream, Stream},
 };
 use anyhow::{bail, Context};
@@ -100,7 +101,7 @@ pub async fn task_main(
     loop {
         tokio::select! {
             accept_result = listener.accept() => {
-                let (socket, peer_addr) = accept_result?;
+                let (socket, _) = accept_result?;
 
                 let session_id = uuid::Uuid::new_v4();
                 let cancel_map = Arc::clone(&cancel_map);
@@ -108,13 +109,19 @@ pub async fn task_main(
                     async move {
                         info!("accepted postgres client connection");
 
+                        let mut socket = WithClientIp::new(socket);
+                        if let Some(ip) = socket.wait_for_addr().await? {
+                            tracing::Span::current().record("peer_addr", &tracing::field::display(ip));
+                        }
+
                         socket
+                            .inner
                             .set_nodelay(true)
                             .context("failed to set socket option")?;
 
                         handle_client(config, &cancel_map, session_id, socket, ClientMode::Tcp).await
                     }
-                    .instrument(info_span!("handle_client", ?session_id, %peer_addr))
+                    .instrument(info_span!("handle_client", ?session_id, peer_addr = tracing::field::Empty))
                     .unwrap_or_else(move |e| {
                         // Acknowledge that the task has finished with an error.
                         error!(?session_id, "per-client task finished with an error: {e:#}");
diff --git a/proxy/src/proxy/tests.rs b/proxy/src/proxy/tests.rs
index 5653ec94dc..99ec8fb090 100644
--- a/proxy/src/proxy/tests.rs
+++ b/proxy/src/proxy/tests.rs
@@ -137,6 +137,7 @@ async fn dummy_proxy(
     auth: impl TestAuth + Send,
 ) -> anyhow::Result<()> {
     let cancel_map = CancelMap::default();
+    let client = WithClientIp::new(client);
     let (mut stream, _params) = handshake(client, tls.as_ref(), &cancel_map)
         .await?
         .context("handshake failed")?;
diff --git a/s3_scrubber/Cargo.toml b/s3_scrubber/Cargo.toml
new file mode 100644
index 0000000000..47668eb4aa
--- /dev/null
+++ b/s3_scrubber/Cargo.toml
@@ -0,0 +1,39 @@
+[package]
+name = "s3_scrubber"
+version = "0.1.0"
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+aws-sdk-s3.workspace = true
+aws-smithy-http.workspace = true
+aws-types.workspace = true
+either.workspace = true
+tokio-rustls.workspace = true
+anyhow.workspace = true
+hex.workspace = true
+thiserror.workspace = true
+rand.workspace = true
+bytes.workspace = true
+bincode.workspace = true
+crc32c.workspace = true
+serde.workspace = true
+serde_json.workspace = true
+serde_with.workspace = true
+workspace_hack.workspace = true
+utils.workspace = true
+
+tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
+chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
+reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
+aws-config = { workspace = true, default-features = false, features = ["rustls", "credentials-sso"] }
+
+pageserver = {path="../pageserver"}
+
+
+tracing.workspace = true
+tracing-subscriber.workspace = true
+clap.workspace = true
+
+atty = "0.2"
+tracing-appender = "0.2"
\ No newline at end of file
diff --git a/s3_scrubber/README.md b/s3_scrubber/README.md
new file mode 100644
index 0000000000..48be3512b4
--- /dev/null
+++ b/s3_scrubber/README.md
@@ -0,0 +1,93 @@
+# Neon S3 scrubber
+
+This tool directly accesses the S3 buckets used by the Neon `pageserver`
+and `safekeeper`, and does housekeeping such as cleaning up objects for tenants & timelines that no longer exist.
+
+## Usage
+
+### Generic Parameters
+
+#### S3
+
+Do `aws sso login --profile dev` to get the SSO access to the bucket to clean, get the SSO_ACCOUNT_ID for your profile (`cat ~/.aws/config` may help).
+
+- `SSO_ACCOUNT_ID`: Credentials id to use for accessing S3 buckets
+- `REGION`: A region where the bucket is located at.
+- `BUCKET`: Bucket name
+
+#### Console API
+
+_This section is only relevant if using a command that requires access to Neon's internal control plane_
+
+- `CLOUD_ADMIN_API_URL`: The URL base to use for checking tenant/timeline for existence via the Cloud API.  e.g. `https://<admin host>/admin`
+
+- `CLOUD_ADMIN_API_TOKEN`: The token to provide when querying the admin API. Get one on the corresponding console page, e.g. `https://<admin host>/app/settings/api-keys`
+
+### Commands
+
+#### `tidy`
+
+Iterate over S3 buckets for storage nodes, checking their contents and removing the data not present in the console. Node S3 data that's not removed is then further checked for discrepancies and, sometimes, validated.
+
+Unless the global `--delete` argument is provided, this command only dry-runs and logs
+what it would have deleted.
+
+```
+tidy --node-kind=<safekeeper|pageserver> [--depth=<tenant|timeline>] [--skip-validation]
+```
+
+- `--node-kind`: whether to inspect safekeeper or pageserver bucket prefix
+- `--depth`: whether to only search for deletable tenants, or also search for
+  deletable timelines within active tenants. Default: `tenant`
+- `--skip-validation`: skip additional post-deletion checks. Default: `false`
+
+For a selected S3 path, the tool lists the S3 bucket given for either tenants or both tenants and timelines — for every found entry, console API is queried: any deleted or missing in the API entity is scheduled for deletion from S3.
+
+If validation is enabled, only the non-deleted tenants' ones are checked.
+For pageserver, timelines' index_part.json on S3 is also checked for various discrepancies: no files are removed, even if there are "extra" S3 files not present in index_part.json: due to the way pageserver updates the remote storage, it's better to do such removals manually, stopping the corresponding tenant first.
+
+Command examples:
+
+`env SSO_ACCOUNT_ID=369495373322 REGION=eu-west-1 BUCKET=neon-dev-storage-eu-west-1 CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- tidy --node-kind=safekeeper`
+
+`env SSO_ACCOUNT_ID=369495373322 REGION=us-east-2 BUCKET=neon-staging-storage-us-east-2 CLOUD_ADMIN_API_TOKEN=${NEON_CLOUD_ADMIN_API_STAGING_KEY} CLOUD_ADMIN_API_URL=[url] cargo run --release -- tidy --node-kind=pageserver --depth=timeline`
+
+When dry run stats look satisfying, use `-- --delete` before the `tidy` command to
+disable dry run and run the binary with deletion enabled.
+
+See these lines (and lines around) in the logs for the final stats:
+
+- `Finished listing the bucket for tenants`
+- `Finished active tenant and timeline validation`
+- `Total tenant deletion stats`
+- `Total timeline deletion stats`
+
+## Current implementation details
+
+- The tool does not have any peristent state currently: instead, it creates very verbose logs, with every S3 delete request logged, every tenant/timeline id check, etc.
+  Worse, any panic or early errored tasks might force the tool to exit without printing the final summary — all affected ids will still be in the logs though. The tool has retries inside it, so it's error-resistant up to some extent, and recent runs showed no traces of errors/panics.
+
+- Instead of checking non-deleted tenants' timelines instantly, the tool attempts to create separate tasks (futures) for that,
+  complicating the logic and slowing down the process, this should be fixed and done in one "task".
+
+- The tool does uses only publicly available remote resources (S3, console) and does not access pageserver/safekeeper nodes themselves.
+  Yet, its S3 set up should be prepared for running on any pageserver/safekeeper node, using node's S3 credentials, so the node API access logic could be implemented relatively simply on top.
+
+## Cleanup procedure:
+
+### Pageserver preparations
+
+If S3 state is altered first manually, pageserver in-memory state will contain wrong data about S3 state, and tenants/timelines may get recreated on S3 (due to any layer upload due to compaction, pageserver restart, etc.). So before proceeding, for tenants/timelines which are already deleted in the console, we must remove these from pageservers.
+
+First, we need to group pageservers by buckets, `https://<admin host>/admin/pageservers`` can be used for all env nodes, then `cat /storage/pageserver/data/pageserver.toml` on every node will show the bucket names and regions needed.
+
+Per bucket, for every pageserver id related, find deleted tenants:
+
+`curl -X POST "https://<admin_host>/admin/check_pageserver/{id}" -H "Accept: application/json" -H "Authorization: Bearer ${NEON_CLOUD_ADMIN_API_STAGING_KEY}" | jq`
+
+use `?check_timelines=true` to find deleted timelines, but the check runs a separate query on every alive tenant, so that could be long and time out for big pageservers.
+
+Note that some tenants/timelines could be marked as deleted in console, but console might continue querying the node later to fully remove the tenant/timeline: wait for some time before ensuring that the "extra" tenant/timeline is not going away by itself.
+
+When all IDs are collected, manually go to every pageserver and detach/delete the tenant/timeline.
+In future, the cleanup tool may access pageservers directly, but now it's only console and S3 it has access to.
diff --git a/s3_scrubber/src/checks.rs b/s3_scrubber/src/checks.rs
new file mode 100644
index 0000000000..c52a40ee94
--- /dev/null
+++ b/s3_scrubber/src/checks.rs
@@ -0,0 +1,438 @@
+use std::collections::{hash_map, HashMap, HashSet};
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::Context;
+use aws_sdk_s3::Client;
+use tokio::io::AsyncReadExt;
+use tokio::task::JoinSet;
+use tracing::{error, info, info_span, warn, Instrument};
+
+use crate::cloud_admin_api::{BranchData, CloudAdminApiClient, ProjectId};
+use crate::delete_batch_producer::DeleteProducerStats;
+use crate::{list_objects_with_retries, RootTarget, MAX_RETRIES};
+use pageserver::tenant::storage_layer::LayerFileName;
+use pageserver::tenant::IndexPart;
+use utils::id::TenantTimelineId;
+
+pub async fn validate_pageserver_active_tenant_and_timelines(
+    s3_client: Arc<Client>,
+    s3_root: RootTarget,
+    admin_client: Arc<CloudAdminApiClient>,
+    batch_producer_stats: DeleteProducerStats,
+) -> anyhow::Result<BranchCheckStats> {
+    let Some(timeline_stats) = batch_producer_stats.timeline_stats else {
+        info!("No tenant-only checks, exiting");
+        return Ok(BranchCheckStats::default());
+    };
+
+    let s3_active_projects = batch_producer_stats
+        .tenant_stats
+        .active_entries
+        .into_iter()
+        .map(|project| (project.id.clone(), project))
+        .collect::<HashMap<_, _>>();
+    info!("Validating {} active tenants", s3_active_projects.len());
+
+    let mut s3_active_branches_per_project = HashMap::<ProjectId, Vec<BranchData>>::new();
+    let mut s3_blob_data = HashMap::<TenantTimelineId, S3TimelineBlobData>::new();
+    for active_branch in timeline_stats.active_entries {
+        let active_project_id = active_branch.project_id.clone();
+        let active_branch_id = active_branch.id.clone();
+        let active_timeline_id = active_branch.timeline_id;
+
+        s3_active_branches_per_project
+            .entry(active_project_id.clone())
+            .or_default()
+            .push(active_branch);
+
+        let Some(active_project) = s3_active_projects.get(&active_project_id) else {
+            error!("Branch {:?} for project {:?} has no such project in the active projects", active_branch_id, active_project_id);
+            continue;
+        };
+
+        let id = TenantTimelineId::new(active_project.tenant, active_timeline_id);
+        s3_blob_data.insert(
+            id,
+            list_timeline_blobs(&s3_client, id, &s3_root)
+                .await
+                .with_context(|| format!("List timeline {id} blobs"))?,
+        );
+    }
+
+    let mut branch_checks = JoinSet::new();
+    for (_, s3_active_project) in s3_active_projects {
+        let project_id = &s3_active_project.id;
+        let tenant_id = s3_active_project.tenant;
+
+        let mut console_active_branches =
+            branches_for_project_with_retries(&admin_client, project_id)
+                .await
+                .with_context(|| {
+                    format!("Client API branches for project {project_id:?} retrieval")
+                })?
+                .into_iter()
+                .map(|branch| (branch.id.clone(), branch))
+                .collect::<HashMap<_, _>>();
+
+        let active_branches = s3_active_branches_per_project
+            .remove(project_id)
+            .unwrap_or_default();
+        info!(
+            "Spawning tasks for {} tenant {} active timelines",
+            active_branches.len(),
+            tenant_id
+        );
+        for s3_active_branch in active_branches {
+            let console_branch = console_active_branches.remove(&s3_active_branch.id);
+            let timeline_id = s3_active_branch.timeline_id;
+            let id = TenantTimelineId::new(tenant_id, timeline_id);
+            let s3_data = s3_blob_data.remove(&id);
+            let s3_root = s3_root.clone();
+            branch_checks.spawn(
+                async move {
+                    let check_errors = branch_cleanup_and_check_errors(
+                        id,
+                        &s3_root,
+                        &s3_active_branch,
+                        console_branch,
+                        s3_data,
+                    )
+                    .await;
+                    (id, check_errors)
+                }
+                .instrument(info_span!("check_timeline", id = %id)),
+            );
+        }
+    }
+
+    let mut total_stats = BranchCheckStats::default();
+    while let Some((id, branch_check_errors)) = branch_checks
+        .join_next()
+        .await
+        .transpose()
+        .context("branch check task join")?
+    {
+        total_stats.add(id, branch_check_errors);
+    }
+    Ok(total_stats)
+}
+
+async fn branches_for_project_with_retries(
+    admin_client: &CloudAdminApiClient,
+    project_id: &ProjectId,
+) -> anyhow::Result<Vec<BranchData>> {
+    for _ in 0..MAX_RETRIES {
+        match admin_client.branches_for_project(project_id, false).await {
+            Ok(branches) => return Ok(branches),
+            Err(e) => {
+                error!("admin list branches for project {project_id:?} query failed: {e}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+            }
+        }
+    }
+
+    anyhow::bail!("Failed to list branches for project {project_id:?} {MAX_RETRIES} times")
+}
+
+#[derive(Debug, Default)]
+pub struct BranchCheckStats {
+    pub timelines_with_errors: HashMap<TenantTimelineId, Vec<String>>,
+    pub normal_timelines: HashSet<TenantTimelineId>,
+}
+
+impl BranchCheckStats {
+    pub fn add(&mut self, id: TenantTimelineId, check_errors: Vec<String>) {
+        if check_errors.is_empty() {
+            if !self.normal_timelines.insert(id) {
+                panic!("Checking branch with timeline {id} more than once")
+            }
+        } else {
+            match self.timelines_with_errors.entry(id) {
+                hash_map::Entry::Occupied(_) => {
+                    panic!("Checking branch with timeline {id} more than once")
+                }
+                hash_map::Entry::Vacant(v) => {
+                    v.insert(check_errors);
+                }
+            }
+        }
+    }
+}
+
+async fn branch_cleanup_and_check_errors(
+    id: TenantTimelineId,
+    s3_root: &RootTarget,
+    s3_active_branch: &BranchData,
+    console_branch: Option<BranchData>,
+    s3_data: Option<S3TimelineBlobData>,
+) -> Vec<String> {
+    info!(
+        "Checking timeline for branch branch {:?}/{:?}",
+        s3_active_branch.project_id, s3_active_branch.id
+    );
+    let mut branch_check_errors = Vec::new();
+
+    match console_branch {
+        Some(console_active_branch) => {
+            if console_active_branch.deleted {
+                branch_check_errors.push(format!("Timeline has deleted branch data in the console (id = {:?}, project_id = {:?}), recheck whether if it got removed during the check",
+                    s3_active_branch.id, s3_active_branch.project_id))
+            }
+        },
+        None => branch_check_errors.push(format!("Timeline has no branch data in the console (id = {:?}, project_id = {:?}), recheck whether if it got removed during the check",
+            s3_active_branch.id, s3_active_branch.project_id))
+    }
+
+    let mut keys_to_remove = Vec::new();
+
+    match s3_data {
+        Some(s3_data) => {
+            keys_to_remove.extend(s3_data.keys_to_remove);
+
+            match s3_data.blob_data {
+                BlobDataParseResult::Parsed {
+                    index_part,
+                    mut s3_layers,
+                } => {
+                    if !IndexPart::KNOWN_VERSIONS.contains(&index_part.get_version()) {
+                        branch_check_errors.push(format!(
+                            "index_part.json version: {}",
+                            index_part.get_version()
+                        ))
+                    }
+
+                    if index_part.metadata.disk_consistent_lsn()
+                        != index_part.get_disk_consistent_lsn()
+                    {
+                        branch_check_errors.push(format!(
+                                    "Mismatching disk_consistent_lsn in TimelineMetadata ({}) and in the index_part ({})",
+                                    index_part.metadata.disk_consistent_lsn(),
+                                    index_part.get_disk_consistent_lsn(),
+
+                                ))
+                    }
+
+                    if index_part.layer_metadata.is_empty() {
+                        // not an error, can happen for branches with zero writes, but notice that
+                        info!("index_part.json has no layers");
+                    }
+
+                    for (layer, metadata) in index_part.layer_metadata {
+                        if metadata.file_size == 0 {
+                            branch_check_errors.push(format!(
+                                            "index_part.json contains a layer {} that has 0 size in its layer metadata", layer.file_name(),
+                                        ))
+                        }
+
+                        if !s3_layers.remove(&layer) {
+                            branch_check_errors.push(format!(
+                                "index_part.json contains a layer {} that is not present in S3",
+                                layer.file_name(),
+                            ))
+                        }
+                    }
+
+                    if !s3_layers.is_empty() {
+                        branch_check_errors.push(format!(
+                            "index_part.json does not contain layers from S3: {:?}",
+                            s3_layers
+                                .iter()
+                                .map(|layer_name| layer_name.file_name())
+                                .collect::<Vec<_>>(),
+                        ));
+                        keys_to_remove.extend(s3_layers.iter().map(|layer_name| {
+                            let mut key = s3_root.timeline_root(id).prefix_in_bucket;
+                            let delimiter = s3_root.delimiter();
+                            if !key.ends_with(delimiter) {
+                                key.push_str(delimiter);
+                            }
+                            key.push_str(&layer_name.file_name());
+                            key
+                        }));
+                    }
+                }
+                BlobDataParseResult::Incorrect(parse_errors) => branch_check_errors.extend(
+                    parse_errors
+                        .into_iter()
+                        .map(|error| format!("parse error: {error}")),
+                ),
+            }
+        }
+        None => branch_check_errors.push("Timeline has no data on S3 at all".to_string()),
+    }
+
+    if branch_check_errors.is_empty() {
+        info!("No check errors found");
+    } else {
+        warn!("Found check errors: {branch_check_errors:?}");
+    }
+
+    if !keys_to_remove.is_empty() {
+        error!("The following keys should be removed from S3: {keys_to_remove:?}")
+    }
+
+    branch_check_errors
+}
+
+#[derive(Debug)]
+struct S3TimelineBlobData {
+    blob_data: BlobDataParseResult,
+    keys_to_remove: Vec<String>,
+}
+
+#[derive(Debug)]
+enum BlobDataParseResult {
+    Parsed {
+        index_part: IndexPart,
+        s3_layers: HashSet<LayerFileName>,
+    },
+    Incorrect(Vec<String>),
+}
+
+async fn list_timeline_blobs(
+    s3_client: &Client,
+    id: TenantTimelineId,
+    s3_root: &RootTarget,
+) -> anyhow::Result<S3TimelineBlobData> {
+    let mut s3_layers = HashSet::new();
+    let mut index_part_object = None;
+
+    let timeline_dir_target = s3_root.timeline_root(id);
+    let mut continuation_token = None;
+
+    let mut errors = Vec::new();
+    let mut keys_to_remove = Vec::new();
+
+    loop {
+        let fetch_response =
+            list_objects_with_retries(s3_client, &timeline_dir_target, continuation_token.clone())
+                .await?;
+
+        let subdirectories = fetch_response.common_prefixes().unwrap_or_default();
+        if !subdirectories.is_empty() {
+            errors.push(format!(
+                "S3 list response should not contain any subdirectories, but got {subdirectories:?}"
+            ));
+        }
+
+        for (object, key) in fetch_response
+            .contents()
+            .unwrap_or_default()
+            .iter()
+            .filter_map(|object| Some((object, object.key()?)))
+        {
+            let blob_name = key.strip_prefix(&timeline_dir_target.prefix_in_bucket);
+            match blob_name {
+                Some("index_part.json") => index_part_object = Some(object.clone()),
+                Some(maybe_layer_name) => match maybe_layer_name.parse::<LayerFileName>() {
+                    Ok(new_layer) => {
+                        s3_layers.insert(new_layer);
+                    }
+                    Err(e) => {
+                        errors.push(
+                            format!("S3 list response got an object with key {key} that is not a layer name: {e}"),
+                        );
+                        keys_to_remove.push(key.to_string());
+                    }
+                },
+                None => {
+                    errors.push(format!("S3 list response got an object with odd key {key}"));
+                    keys_to_remove.push(key.to_string());
+                }
+            }
+        }
+
+        match fetch_response.next_continuation_token {
+            Some(new_token) => continuation_token = Some(new_token),
+            None => break,
+        }
+    }
+
+    if index_part_object.is_none() {
+        errors.push("S3 list response got no index_part.json file".to_string());
+    }
+
+    if let Some(index_part_object_key) = index_part_object.as_ref().and_then(|object| object.key())
+    {
+        let index_part_bytes = download_object_with_retries(
+            s3_client,
+            &timeline_dir_target.bucket_name,
+            index_part_object_key,
+        )
+        .await
+        .context("index_part.json download")?;
+
+        match serde_json::from_slice(&index_part_bytes) {
+            Ok(index_part) => {
+                return Ok(S3TimelineBlobData {
+                    blob_data: BlobDataParseResult::Parsed {
+                        index_part,
+                        s3_layers,
+                    },
+                    keys_to_remove,
+                })
+            }
+            Err(index_parse_error) => errors.push(format!(
+                "index_part.json body parsing error: {index_parse_error}"
+            )),
+        }
+    } else {
+        errors.push(format!(
+            "Index part object {index_part_object:?} has no key"
+        ));
+    }
+
+    if errors.is_empty() {
+        errors.push(
+            "Unexpected: no errors did not lead to a successfully parsed blob return".to_string(),
+        );
+    }
+
+    Ok(S3TimelineBlobData {
+        blob_data: BlobDataParseResult::Incorrect(errors),
+        keys_to_remove,
+    })
+}
+
+async fn download_object_with_retries(
+    s3_client: &Client,
+    bucket_name: &str,
+    key: &str,
+) -> anyhow::Result<Vec<u8>> {
+    for _ in 0..MAX_RETRIES {
+        let mut body_buf = Vec::new();
+        let response_stream = match s3_client
+            .get_object()
+            .bucket(bucket_name)
+            .key(key)
+            .send()
+            .await
+        {
+            Ok(response) => response,
+            Err(e) => {
+                error!("Failed to download object for key {key}: {e}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+                continue;
+            }
+        };
+
+        match response_stream
+            .body
+            .into_async_read()
+            .read_to_end(&mut body_buf)
+            .await
+        {
+            Ok(bytes_read) => {
+                info!("Downloaded {bytes_read} bytes for object object with key {key}");
+                return Ok(body_buf);
+            }
+            Err(e) => {
+                error!("Failed to stream object body for key {key}: {e}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+            }
+        }
+    }
+
+    anyhow::bail!("Failed to download objects with key {key} {MAX_RETRIES} times")
+}
diff --git a/s3_scrubber/src/cloud_admin_api.rs b/s3_scrubber/src/cloud_admin_api.rs
new file mode 100644
index 0000000000..3c21b70385
--- /dev/null
+++ b/s3_scrubber/src/cloud_admin_api.rs
@@ -0,0 +1,418 @@
+#![allow(unused)]
+
+use chrono::{DateTime, Utc};
+use reqwest::{header, Client, Url};
+use tokio::sync::Semaphore;
+
+use utils::id::{TenantId, TimelineId};
+use utils::lsn::Lsn;
+
+#[derive(Debug)]
+pub struct Error {
+    context: String,
+    kind: ErrorKind,
+}
+
+impl Error {
+    fn new(context: String, kind: ErrorKind) -> Self {
+        Self { context, kind }
+    }
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self.kind {
+            ErrorKind::RequestSend(e) => write!(
+                f,
+                "Failed to send a request. Context: {}, error: {}",
+                self.context, e
+            ),
+            ErrorKind::BodyRead(e) => {
+                write!(
+                    f,
+                    "Failed to read a request body. Context: {}, error: {}",
+                    self.context, e
+                )
+            }
+            ErrorKind::UnexpectedState => write!(f, "Unexpected state: {}", self.context),
+        }
+    }
+}
+
+#[derive(Debug, Clone, serde::Deserialize, Hash, PartialEq, Eq)]
+#[serde(transparent)]
+pub struct ProjectId(pub String);
+
+#[derive(Clone, Debug, serde::Deserialize, Hash, PartialEq, Eq)]
+#[serde(transparent)]
+pub struct BranchId(pub String);
+
+impl std::error::Error for Error {}
+
+#[derive(Debug)]
+pub enum ErrorKind {
+    RequestSend(reqwest::Error),
+    BodyRead(reqwest::Error),
+    UnexpectedState,
+}
+
+pub struct CloudAdminApiClient {
+    request_limiter: Semaphore,
+    token: String,
+    base_url: Url,
+    http_client: Client,
+}
+
+#[derive(Debug, serde::Deserialize)]
+struct AdminApiResponse<T> {
+    data: T,
+    total: Option<usize>,
+}
+
+#[derive(Debug, serde::Deserialize)]
+pub struct PageserverData {
+    pub id: u64,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub region_id: String,
+    pub version: i64,
+    pub instance_id: String,
+    pub port: u16,
+    pub http_host: String,
+    pub http_port: u16,
+    pub active: bool,
+    pub projects_count: usize,
+    pub availability_zone_id: String,
+}
+
+#[derive(Debug, Clone, serde::Deserialize)]
+pub struct SafekeeperData {
+    pub id: u64,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub region_id: String,
+    pub version: i64,
+    pub instance_id: String,
+    pub active: bool,
+    pub host: String,
+    pub port: u16,
+    pub projects_count: usize,
+    pub availability_zone_id: String,
+}
+
+#[serde_with::serde_as]
+#[derive(Debug, Clone, serde::Deserialize)]
+pub struct ProjectData {
+    pub id: ProjectId,
+    pub name: String,
+    pub region_id: String,
+    pub platform_id: String,
+    pub user_id: String,
+    pub pageserver_id: u64,
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub tenant: TenantId,
+    pub safekeepers: Vec<SafekeeperData>,
+    pub deleted: bool,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub pg_version: u32,
+    pub max_project_size: u64,
+    pub remote_storage_size: u64,
+    pub resident_size: u64,
+    pub synthetic_storage_size: u64,
+    pub compute_time: u64,
+    pub data_transfer: u64,
+    pub data_storage: u64,
+    pub maintenance_set: Option<String>,
+}
+
+#[serde_with::serde_as]
+#[derive(Debug, serde::Deserialize)]
+pub struct BranchData {
+    pub id: BranchId,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+    pub name: String,
+    pub project_id: ProjectId,
+    #[serde_as(as = "serde_with::DisplayFromStr")]
+    pub timeline_id: TimelineId,
+    #[serde(default)]
+    pub parent_id: Option<BranchId>,
+    #[serde(default)]
+    #[serde_as(as = "Option<serde_with::DisplayFromStr>")]
+    pub parent_lsn: Option<Lsn>,
+    pub default: bool,
+    pub deleted: bool,
+    pub logical_size: Option<u64>,
+    pub physical_size: Option<u64>,
+    pub written_size: Option<u64>,
+}
+
+impl CloudAdminApiClient {
+    pub fn new(token: String, base_url: Url) -> Self {
+        Self {
+            token,
+            base_url,
+            request_limiter: Semaphore::new(200),
+            http_client: Client::new(), // TODO timeout configs at least
+        }
+    }
+
+    pub async fn find_tenant_project(
+        &self,
+        tenant_id: TenantId,
+    ) -> Result<Option<ProjectData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/projects"))
+            .query(&[
+                ("tenant_id", tenant_id.to_string()),
+                ("show_deleted", "true".to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| {
+                Error::new(
+                    "Find project for tenant".to_string(),
+                    ErrorKind::RequestSend(e),
+                )
+            })?;
+
+        let response: AdminApiResponse<Vec<ProjectData>> = response.json().await.map_err(|e| {
+            Error::new(
+                "Find project for tenant".to_string(),
+                ErrorKind::BodyRead(e),
+            )
+        })?;
+        match response.data.len() {
+            0 => Ok(None),
+            1 => Ok(Some(
+                response
+                    .data
+                    .into_iter()
+                    .next()
+                    .expect("Should have exactly one element"),
+            )),
+            too_many => Err(Error::new(
+                format!("Find project for tenant returned {too_many} projects instead of 0 or 1"),
+                ErrorKind::UnexpectedState,
+            )),
+        }
+    }
+
+    pub async fn find_timeline_branch(
+        &self,
+        timeline_id: TimelineId,
+    ) -> Result<Option<BranchData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/branches"))
+            .query(&[
+                ("timeline_id", timeline_id.to_string()),
+                ("show_deleted", "true".to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| {
+                Error::new(
+                    "Find branch for timeline".to_string(),
+                    ErrorKind::RequestSend(e),
+                )
+            })?;
+
+        let response: AdminApiResponse<Vec<BranchData>> = response.json().await.map_err(|e| {
+            Error::new(
+                "Find branch for timeline".to_string(),
+                ErrorKind::BodyRead(e),
+            )
+        })?;
+        match response.data.len() {
+            0 => Ok(None),
+            1 => Ok(Some(
+                response
+                    .data
+                    .into_iter()
+                    .next()
+                    .expect("Should have exactly one element"),
+            )),
+            too_many => Err(Error::new(
+                format!("Find branch for timeline returned {too_many} branches instead of 0 or 1"),
+                ErrorKind::UnexpectedState,
+            )),
+        }
+    }
+
+    pub async fn list_pageservers(&self) -> Result<Vec<PageserverData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/pageservers"))
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| Error::new("List pageservers".to_string(), ErrorKind::RequestSend(e)))?;
+
+        let response: AdminApiResponse<Vec<PageserverData>> = response
+            .json()
+            .await
+            .map_err(|e| Error::new("List pageservers".to_string(), ErrorKind::BodyRead(e)))?;
+
+        Ok(response.data)
+    }
+
+    pub async fn list_safekeepers(&self) -> Result<Vec<SafekeeperData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/safekeepers"))
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| Error::new("List safekeepers".to_string(), ErrorKind::RequestSend(e)))?;
+
+        let response: AdminApiResponse<Vec<SafekeeperData>> = response
+            .json()
+            .await
+            .map_err(|e| Error::new("List safekeepers".to_string(), ErrorKind::BodyRead(e)))?;
+
+        Ok(response.data)
+    }
+
+    pub async fn projects_for_pageserver(
+        &self,
+        pageserver_id: u64,
+        show_deleted: bool,
+    ) -> Result<Vec<ProjectData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/projects"))
+            .query(&[
+                ("pageserver_id", &pageserver_id.to_string()),
+                ("show_deleted", &show_deleted.to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::RequestSend(e)))?;
+
+        let response: AdminApiResponse<Vec<ProjectData>> = response
+            .json()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::BodyRead(e)))?;
+
+        Ok(response.data)
+    }
+
+    pub async fn project_for_tenant(
+        &self,
+        tenant_id: TenantId,
+        show_deleted: bool,
+    ) -> Result<Option<ProjectData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/projects"))
+            .query(&[
+                ("search", &tenant_id.to_string()),
+                ("show_deleted", &show_deleted.to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::RequestSend(e)))?;
+
+        let response: AdminApiResponse<Vec<ProjectData>> = response
+            .json()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::BodyRead(e)))?;
+
+        match response.data.as_slice() {
+            [] => Ok(None),
+            [_single] => Ok(Some(response.data.into_iter().next().unwrap())),
+            multiple => Err(Error::new(
+                format!("Got more than one project for tenant {tenant_id} : {multiple:?}"),
+                ErrorKind::UnexpectedState,
+            )),
+        }
+    }
+
+    pub async fn branches_for_project(
+        &self,
+        project_id: &ProjectId,
+        show_deleted: bool,
+    ) -> Result<Vec<BranchData>, Error> {
+        let _permit = self
+            .request_limiter
+            .acquire()
+            .await
+            .expect("Semaphore is not closed");
+
+        let response = self
+            .http_client
+            .get(self.append_url("/branches"))
+            .query(&[
+                ("project_id", &project_id.0),
+                ("show_deleted", &show_deleted.to_string()),
+            ])
+            .header(header::ACCEPT, "application/json")
+            .bearer_auth(&self.token)
+            .send()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::RequestSend(e)))?;
+
+        let response: AdminApiResponse<Vec<BranchData>> = response
+            .json()
+            .await
+            .map_err(|e| Error::new("Project for tenant".to_string(), ErrorKind::BodyRead(e)))?;
+
+        Ok(response.data)
+    }
+
+    fn append_url(&self, subpath: &str) -> Url {
+        // TODO fugly, but `.join` does not work when called
+        (self.base_url.to_string() + subpath)
+            .parse()
+            .unwrap_or_else(|e| panic!("Could not append {subpath} to base url: {e}"))
+    }
+}
diff --git a/s3_scrubber/src/delete_batch_producer.rs b/s3_scrubber/src/delete_batch_producer.rs
new file mode 100644
index 0000000000..99ab5c4198
--- /dev/null
+++ b/s3_scrubber/src/delete_batch_producer.rs
@@ -0,0 +1,354 @@
+mod tenant_batch;
+mod timeline_batch;
+
+use std::future::Future;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::Context;
+use aws_sdk_s3::Client;
+use either::Either;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio::sync::Mutex;
+use tokio::task::{JoinHandle, JoinSet};
+use tracing::{error, info, info_span, Instrument};
+
+use crate::cloud_admin_api::{BranchData, CloudAdminApiClient, ProjectData};
+use crate::{list_objects_with_retries, RootTarget, S3Target, TraversingDepth, MAX_RETRIES};
+use utils::id::{TenantId, TenantTimelineId};
+
+/// Typical tenant to remove contains 1 layer and 1 index_part.json blobs
+/// Also, there are some non-standard tenants to remove, having more layers.
+/// delete_objects request allows up to 1000 keys, so be on a safe side and allow most
+/// batch processing tasks to do 1 delete objects request only.
+///
+/// Every batch item will be additionally S3 LS'ed later, so keep the batch size
+/// even lower to allow multiple concurrent tasks do the LS requests.
+const BATCH_SIZE: usize = 100;
+
+pub struct DeleteBatchProducer {
+    delete_tenants_sender_task: JoinHandle<anyhow::Result<ProcessedS3List<TenantId, ProjectData>>>,
+    delete_timelines_sender_task:
+        JoinHandle<anyhow::Result<ProcessedS3List<TenantTimelineId, BranchData>>>,
+    delete_batch_creator_task: JoinHandle<()>,
+    delete_batch_receiver: Arc<Mutex<UnboundedReceiver<DeleteBatch>>>,
+}
+
+pub struct DeleteProducerStats {
+    pub tenant_stats: ProcessedS3List<TenantId, ProjectData>,
+    pub timeline_stats: Option<ProcessedS3List<TenantTimelineId, BranchData>>,
+}
+
+impl DeleteProducerStats {
+    pub fn tenants_checked(&self) -> usize {
+        self.tenant_stats.entries_total
+    }
+
+    pub fn active_tenants(&self) -> usize {
+        self.tenant_stats.active_entries.len()
+    }
+
+    pub fn timelines_checked(&self) -> usize {
+        self.timeline_stats
+            .as_ref()
+            .map(|stats| stats.entries_total)
+            .unwrap_or(0)
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct DeleteBatch {
+    pub tenants: Vec<TenantId>,
+    pub timelines: Vec<TenantTimelineId>,
+}
+
+impl DeleteBatch {
+    pub fn merge(&mut self, other: Self) {
+        self.tenants.extend(other.tenants);
+        self.timelines.extend(other.timelines);
+    }
+
+    pub fn len(&self) -> usize {
+        self.tenants.len() + self.timelines.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl DeleteBatchProducer {
+    pub fn start(
+        admin_client: Arc<CloudAdminApiClient>,
+        s3_client: Arc<Client>,
+        s3_root_target: RootTarget,
+        traversing_depth: TraversingDepth,
+    ) -> Self {
+        let (delete_elements_sender, mut delete_elements_receiver) =
+            tokio::sync::mpsc::unbounded_channel();
+        let delete_elements_sender = Arc::new(delete_elements_sender);
+        let admin_client = Arc::new(admin_client);
+
+        let (projects_to_check_sender, mut projects_to_check_receiver) =
+            tokio::sync::mpsc::unbounded_channel();
+        let delete_tenants_root_target = s3_root_target.clone();
+        let delete_tenants_client = Arc::clone(&s3_client);
+        let delete_tenants_admin_client = Arc::clone(&admin_client);
+        let delete_sender = Arc::clone(&delete_elements_sender);
+        let delete_tenants_sender_task = tokio::spawn(
+            async move {
+                tenant_batch::schedule_cleanup_deleted_tenants(
+                    &delete_tenants_root_target,
+                    &delete_tenants_client,
+                    &delete_tenants_admin_client,
+                    projects_to_check_sender,
+                    delete_sender,
+                    traversing_depth,
+                )
+                .await
+            }
+            .instrument(info_span!("delete_tenants_sender")),
+        );
+        let delete_timelines_sender_task = tokio::spawn(async move {
+            timeline_batch::schedule_cleanup_deleted_timelines(
+                &s3_root_target,
+                &s3_client,
+                &admin_client,
+                &mut projects_to_check_receiver,
+                delete_elements_sender,
+            )
+            .in_current_span()
+            .await
+        });
+
+        let (delete_batch_sender, delete_batch_receiver) = tokio::sync::mpsc::unbounded_channel();
+        let delete_batch_creator_task = tokio::spawn(
+            async move {
+                'outer: loop {
+                    let mut delete_batch = DeleteBatch::default();
+                    while delete_batch.len() < BATCH_SIZE {
+                        match delete_elements_receiver.recv().await {
+                            Some(new_task) => match new_task {
+                                Either::Left(tenant_id) => delete_batch.tenants.push(tenant_id),
+                                Either::Right(timeline_id) => {
+                                    delete_batch.timelines.push(timeline_id)
+                                }
+                            },
+                            None => {
+                                info!("Task finished: sender dropped");
+                                delete_batch_sender.send(delete_batch).ok();
+                                break 'outer;
+                            }
+                        }
+                    }
+
+                    if !delete_batch.is_empty() {
+                        delete_batch_sender.send(delete_batch).ok();
+                    }
+                }
+            }
+            .instrument(info_span!("delete batch creator")),
+        );
+
+        Self {
+            delete_tenants_sender_task,
+            delete_timelines_sender_task,
+            delete_batch_creator_task,
+            delete_batch_receiver: Arc::new(Mutex::new(delete_batch_receiver)),
+        }
+    }
+
+    pub fn subscribe(&self) -> Arc<Mutex<UnboundedReceiver<DeleteBatch>>> {
+        self.delete_batch_receiver.clone()
+    }
+
+    pub async fn join(self) -> anyhow::Result<DeleteProducerStats> {
+        let (delete_tenants_task_result, delete_timelines_task_result, batch_task_result) = tokio::join!(
+            self.delete_tenants_sender_task,
+            self.delete_timelines_sender_task,
+            self.delete_batch_creator_task,
+        );
+
+        let tenant_stats = match delete_tenants_task_result {
+            Ok(Ok(stats)) => stats,
+            Ok(Err(tenant_deletion_error)) => return Err(tenant_deletion_error),
+            Err(join_error) => {
+                anyhow::bail!("Failed to join the delete tenant producing task: {join_error}")
+            }
+        };
+
+        let timeline_stats = match delete_timelines_task_result {
+            Ok(Ok(stats)) => Some(stats),
+            Ok(Err(timeline_deletion_error)) => return Err(timeline_deletion_error),
+            Err(join_error) => {
+                anyhow::bail!("Failed to join the delete timeline producing task: {join_error}")
+            }
+        };
+
+        match batch_task_result {
+            Ok(()) => (),
+            Err(join_error) => anyhow::bail!("Failed to join the batch forming task: {join_error}"),
+        };
+
+        Ok(DeleteProducerStats {
+            tenant_stats,
+            timeline_stats,
+        })
+    }
+}
+
+pub struct ProcessedS3List<I, A> {
+    pub entries_total: usize,
+    pub entries_to_delete: Vec<I>,
+    pub active_entries: Vec<A>,
+}
+
+impl<I, A> Default for ProcessedS3List<I, A> {
+    fn default() -> Self {
+        Self {
+            entries_total: 0,
+            entries_to_delete: Vec::new(),
+            active_entries: Vec::new(),
+        }
+    }
+}
+
+impl<I, A> ProcessedS3List<I, A> {
+    fn merge(&mut self, other: Self) {
+        self.entries_total += other.entries_total;
+        self.entries_to_delete.extend(other.entries_to_delete);
+        self.active_entries.extend(other.active_entries);
+    }
+
+    fn change_ids<NewI>(self, transform: impl Fn(I) -> NewI) -> ProcessedS3List<NewI, A> {
+        ProcessedS3List {
+            entries_total: self.entries_total,
+            entries_to_delete: self.entries_to_delete.into_iter().map(transform).collect(),
+            active_entries: self.active_entries,
+        }
+    }
+}
+
+async fn process_s3_target_recursively<F, Fut, I, E, A>(
+    s3_client: &Client,
+    target: &S3Target,
+    find_active_and_deleted_entries: F,
+) -> anyhow::Result<ProcessedS3List<I, A>>
+where
+    I: FromStr<Err = E> + Send + Sync,
+    E: Send + Sync + std::error::Error + 'static,
+    F: FnOnce(Vec<I>) -> Fut + Clone,
+    Fut: Future<Output = anyhow::Result<ProcessedS3List<I, A>>>,
+{
+    let mut continuation_token = None;
+    let mut total_entries = ProcessedS3List::default();
+
+    loop {
+        let fetch_response =
+            list_objects_with_retries(s3_client, target, continuation_token.clone()).await?;
+
+        let new_entry_ids = fetch_response
+            .common_prefixes()
+            .unwrap_or_default()
+            .iter()
+            .filter_map(|prefix| prefix.prefix())
+            .filter_map(|prefix| -> Option<&str> {
+                prefix
+                    .strip_prefix(&target.prefix_in_bucket)?
+                    .strip_suffix('/')
+            })
+            .map(|entry_id_str| {
+                entry_id_str
+                    .parse()
+                    .with_context(|| format!("Incorrect entry id str: {entry_id_str}"))
+            })
+            .collect::<anyhow::Result<Vec<I>>>()
+            .context("list and parse bucket's entry ids")?;
+
+        total_entries.merge(
+            (find_active_and_deleted_entries.clone())(new_entry_ids)
+                .await
+                .context("filter active and deleted entry ids")?,
+        );
+
+        match fetch_response.next_continuation_token {
+            Some(new_token) => continuation_token = Some(new_token),
+            None => break,
+        }
+    }
+
+    Ok(total_entries)
+}
+
+enum FetchResult<A> {
+    Found(A),
+    Deleted,
+    Absent,
+}
+
+async fn split_to_active_and_deleted_entries<I, A, F, Fut>(
+    new_entry_ids: Vec<I>,
+    find_active_entry: F,
+) -> anyhow::Result<ProcessedS3List<I, A>>
+where
+    I: std::fmt::Display + Send + Sync + 'static + Copy,
+    A: Send + 'static,
+    F: FnOnce(I) -> Fut + Send + Sync + 'static + Clone,
+    Fut: Future<Output = anyhow::Result<FetchResult<A>>> + Send,
+{
+    let entries_total = new_entry_ids.len();
+    let mut check_tasks = JoinSet::new();
+    let mut active_entries = Vec::with_capacity(entries_total);
+    let mut entries_to_delete = Vec::with_capacity(entries_total);
+
+    for new_entry_id in new_entry_ids {
+        let check_closure = find_active_entry.clone();
+        check_tasks.spawn(
+            async move {
+                (
+                    new_entry_id,
+                    async {
+                        for _ in 0..MAX_RETRIES {
+                            let closure_clone = check_closure.clone();
+                            match closure_clone(new_entry_id).await {
+                                Ok(active_entry) => return Ok(active_entry),
+                                Err(e) => {
+                                    error!("find active entry admin API call failed: {e}");
+                                    tokio::time::sleep(Duration::from_secs(1)).await;
+                                }
+                            }
+                        }
+
+                        anyhow::bail!("Failed to check entry {new_entry_id} {MAX_RETRIES} times")
+                    }
+                    .await,
+                )
+            }
+            .instrument(info_span!("filter_active_entries")),
+        );
+    }
+
+    while let Some(task_result) = check_tasks.join_next().await {
+        let (entry_id, entry_data_fetch_result) = task_result.context("task join")?;
+        match entry_data_fetch_result.context("entry data fetch")? {
+            FetchResult::Found(active_entry) => {
+                info!("Entry {entry_id} is alive, cannot delete");
+                active_entries.push(active_entry);
+            }
+            FetchResult::Deleted => {
+                info!("Entry {entry_id} deleted in the admin data, can safely delete");
+                entries_to_delete.push(entry_id);
+            }
+            FetchResult::Absent => {
+                info!("Entry {entry_id} absent in the admin data, can safely delete");
+                entries_to_delete.push(entry_id);
+            }
+        }
+    }
+    Ok(ProcessedS3List {
+        entries_total,
+        entries_to_delete,
+        active_entries,
+    })
+}
diff --git a/s3_scrubber/src/delete_batch_producer/tenant_batch.rs b/s3_scrubber/src/delete_batch_producer/tenant_batch.rs
new file mode 100644
index 0000000000..59fd638645
--- /dev/null
+++ b/s3_scrubber/src/delete_batch_producer/tenant_batch.rs
@@ -0,0 +1,87 @@
+use std::sync::Arc;
+
+use anyhow::Context;
+use aws_sdk_s3::Client;
+use either::Either;
+use tokio::sync::mpsc::UnboundedSender;
+use tracing::info;
+
+use crate::cloud_admin_api::{CloudAdminApiClient, ProjectData};
+use crate::delete_batch_producer::FetchResult;
+use crate::{RootTarget, TraversingDepth};
+use utils::id::{TenantId, TenantTimelineId};
+
+use super::ProcessedS3List;
+
+pub async fn schedule_cleanup_deleted_tenants(
+    s3_root_target: &RootTarget,
+    s3_client: &Arc<Client>,
+    admin_client: &Arc<CloudAdminApiClient>,
+    projects_to_check_sender: UnboundedSender<ProjectData>,
+    delete_sender: Arc<UnboundedSender<Either<TenantId, TenantTimelineId>>>,
+    traversing_depth: TraversingDepth,
+) -> anyhow::Result<ProcessedS3List<TenantId, ProjectData>> {
+    info!(
+        "Starting to list the bucket from root {}",
+        s3_root_target.bucket_name()
+    );
+    s3_client
+        .head_bucket()
+        .bucket(s3_root_target.bucket_name())
+        .send()
+        .await
+        .with_context(|| format!("bucket {} was not found", s3_root_target.bucket_name()))?;
+
+    let check_client = Arc::clone(admin_client);
+    let tenant_stats = super::process_s3_target_recursively(
+        s3_client,
+        s3_root_target.tenants_root(),
+        |s3_tenants| async move {
+            let another_client = Arc::clone(&check_client);
+            super::split_to_active_and_deleted_entries(s3_tenants, move |tenant_id| async move {
+                let project_data = another_client
+                    .find_tenant_project(tenant_id)
+                    .await
+                    .with_context(|| format!("Tenant {tenant_id} project admin check"))?;
+
+                Ok(if let Some(console_project) = project_data {
+                    if console_project.deleted {
+                        delete_sender.send(Either::Left(tenant_id)).ok();
+                        FetchResult::Deleted
+                    } else {
+                        if traversing_depth == TraversingDepth::Timeline {
+                            projects_to_check_sender.send(console_project.clone()).ok();
+                        }
+                        FetchResult::Found(console_project)
+                    }
+                } else {
+                    delete_sender.send(Either::Left(tenant_id)).ok();
+                    FetchResult::Absent
+                })
+            })
+            .await
+        },
+    )
+    .await
+    .context("tenant batch processing")?;
+
+    info!(
+        "Among {} tenants, found {} tenants to delete and {} active ones",
+        tenant_stats.entries_total,
+        tenant_stats.entries_to_delete.len(),
+        tenant_stats.active_entries.len(),
+    );
+
+    let tenant_stats = match traversing_depth {
+        TraversingDepth::Tenant => {
+            info!("Finished listing the bucket for tenants only");
+            tenant_stats
+        }
+        TraversingDepth::Timeline => {
+            info!("Finished listing the bucket for tenants and sent {} active tenants to check for timelines", tenant_stats.active_entries.len());
+            tenant_stats
+        }
+    };
+
+    Ok(tenant_stats)
+}
diff --git a/s3_scrubber/src/delete_batch_producer/timeline_batch.rs b/s3_scrubber/src/delete_batch_producer/timeline_batch.rs
new file mode 100644
index 0000000000..0a0b31ae87
--- /dev/null
+++ b/s3_scrubber/src/delete_batch_producer/timeline_batch.rs
@@ -0,0 +1,102 @@
+use std::sync::Arc;
+
+use anyhow::Context;
+use aws_sdk_s3::Client;
+use either::Either;
+use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
+use tracing::{info, info_span, Instrument};
+
+use crate::cloud_admin_api::{BranchData, CloudAdminApiClient, ProjectData};
+use crate::delete_batch_producer::{FetchResult, ProcessedS3List};
+use crate::RootTarget;
+use utils::id::{TenantId, TenantTimelineId};
+
+pub async fn schedule_cleanup_deleted_timelines(
+    s3_root_target: &RootTarget,
+    s3_client: &Arc<Client>,
+    admin_client: &Arc<CloudAdminApiClient>,
+    projects_to_check_receiver: &mut UnboundedReceiver<ProjectData>,
+    delete_elements_sender: Arc<UnboundedSender<Either<TenantId, TenantTimelineId>>>,
+) -> anyhow::Result<ProcessedS3List<TenantTimelineId, BranchData>> {
+    info!(
+        "Starting to list the bucket from root {}",
+        s3_root_target.bucket_name()
+    );
+    s3_client
+        .head_bucket()
+        .bucket(s3_root_target.bucket_name())
+        .send()
+        .await
+        .with_context(|| format!("bucket {} was not found", s3_root_target.bucket_name()))?;
+
+    let mut timeline_stats = ProcessedS3List::default();
+    while let Some(project_to_check) = projects_to_check_receiver.recv().await {
+        let check_client = Arc::clone(admin_client);
+
+        let check_s3_client = Arc::clone(s3_client);
+
+        let check_delete_sender = Arc::clone(&delete_elements_sender);
+
+        let check_root = s3_root_target.clone();
+
+        let new_stats = async move {
+            let tenant_id_to_check = project_to_check.tenant;
+            let check_target = check_root.timelines_root(tenant_id_to_check);
+            let stats = super::process_s3_target_recursively(
+                &check_s3_client,
+                &check_target,
+                |s3_timelines| async move {
+                    let another_client = check_client.clone();
+                    super::split_to_active_and_deleted_entries(
+                        s3_timelines,
+                        move |timeline_id| async move {
+                            let console_branch = another_client
+                                .find_timeline_branch(timeline_id)
+                                .await
+                                .map_err(|e| {
+                                    anyhow::anyhow!(
+                                        "Timeline {timeline_id} branch admin check: {e}"
+                                    )
+                                })?;
+
+                            let id = TenantTimelineId::new(tenant_id_to_check, timeline_id);
+                            Ok(match console_branch {
+                                Some(console_branch) => {
+                                    if console_branch.deleted {
+                                        check_delete_sender.send(Either::Right(id)).ok();
+                                        FetchResult::Deleted
+                                    } else {
+                                        FetchResult::Found(console_branch)
+                                    }
+                                }
+                                None => {
+                                    check_delete_sender.send(Either::Right(id)).ok();
+                                    FetchResult::Absent
+                                }
+                            })
+                        },
+                    )
+                    .await
+                },
+            )
+            .await
+            .with_context(|| format!("tenant {tenant_id_to_check} timeline batch processing"))?
+            .change_ids(|timeline_id| TenantTimelineId::new(tenant_id_to_check, timeline_id));
+
+            Ok::<_, anyhow::Error>(stats)
+        }
+        .instrument(info_span!("delete_timelines_sender", tenant = %project_to_check.tenant))
+        .await?;
+
+        timeline_stats.merge(new_stats);
+    }
+
+    info!(
+        "Among {} timelines, found {} timelines to delete and {} active ones",
+        timeline_stats.entries_total,
+        timeline_stats.entries_to_delete.len(),
+        timeline_stats.active_entries.len(),
+    );
+
+    Ok(timeline_stats)
+}
diff --git a/s3_scrubber/src/lib.rs b/s3_scrubber/src/lib.rs
new file mode 100644
index 0000000000..ea1338cf11
--- /dev/null
+++ b/s3_scrubber/src/lib.rs
@@ -0,0 +1,204 @@
+pub mod checks;
+pub mod cloud_admin_api;
+pub mod delete_batch_producer;
+mod s3_deletion;
+
+use std::env;
+use std::fmt::Display;
+use std::time::Duration;
+
+use aws_config::environment::EnvironmentVariableCredentialsProvider;
+use aws_config::imds::credentials::ImdsCredentialsProvider;
+use aws_config::meta::credentials::CredentialsProviderChain;
+use aws_config::sso::SsoCredentialsProvider;
+use aws_sdk_s3::config::Region;
+use aws_sdk_s3::{Client, Config};
+
+pub use s3_deletion::S3Deleter;
+use tracing::error;
+use tracing_appender::non_blocking::WorkerGuard;
+use tracing_subscriber::{fmt, prelude::*, EnvFilter};
+use utils::id::{TenantId, TenantTimelineId};
+
+const MAX_RETRIES: usize = 20;
+const CLOUD_ADMIN_API_TOKEN_ENV_VAR: &str = "CLOUD_ADMIN_API_TOKEN";
+
+#[derive(Debug, Clone)]
+pub struct S3Target {
+    pub bucket_name: String,
+    pub prefix_in_bucket: String,
+    pub delimiter: String,
+}
+
+#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TraversingDepth {
+    Tenant,
+    Timeline,
+}
+
+impl Display for TraversingDepth {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(match self {
+            Self::Tenant => "tenant",
+            Self::Timeline => "timeline",
+        })
+    }
+}
+
+impl S3Target {
+    pub fn with_sub_segment(&self, new_segment: &str) -> Self {
+        let mut new_self = self.clone();
+        let _ = new_self.prefix_in_bucket.pop();
+        new_self.prefix_in_bucket =
+            [&new_self.prefix_in_bucket, new_segment, ""].join(&new_self.delimiter);
+        new_self
+    }
+}
+
+#[derive(Clone)]
+pub enum RootTarget {
+    Pageserver(S3Target),
+    Safekeeper(S3Target),
+}
+
+impl RootTarget {
+    pub fn tenants_root(&self) -> &S3Target {
+        match self {
+            Self::Pageserver(root) => root,
+            Self::Safekeeper(root) => root,
+        }
+    }
+
+    pub fn tenant_root(&self, tenant_id: TenantId) -> S3Target {
+        self.tenants_root().with_sub_segment(&tenant_id.to_string())
+    }
+
+    pub fn timelines_root(&self, tenant_id: TenantId) -> S3Target {
+        match self {
+            Self::Pageserver(_) => self.tenant_root(tenant_id).with_sub_segment("timelines"),
+            Self::Safekeeper(_) => self.tenant_root(tenant_id),
+        }
+    }
+
+    pub fn timeline_root(&self, id: TenantTimelineId) -> S3Target {
+        self.timelines_root(id.tenant_id)
+            .with_sub_segment(&id.timeline_id.to_string())
+    }
+
+    pub fn bucket_name(&self) -> &str {
+        match self {
+            Self::Pageserver(root) => &root.bucket_name,
+            Self::Safekeeper(root) => &root.bucket_name,
+        }
+    }
+
+    pub fn delimiter(&self) -> &str {
+        match self {
+            Self::Pageserver(root) => &root.delimiter,
+            Self::Safekeeper(root) => &root.delimiter,
+        }
+    }
+}
+
+pub fn get_cloud_admin_api_token_or_exit() -> String {
+    match env::var(CLOUD_ADMIN_API_TOKEN_ENV_VAR) {
+        Ok(token) => token,
+        Err(env::VarError::NotPresent) => {
+            error!("{CLOUD_ADMIN_API_TOKEN_ENV_VAR} env variable is not present");
+            std::process::exit(1);
+        }
+        Err(env::VarError::NotUnicode(not_unicode_string)) => {
+            error!("{CLOUD_ADMIN_API_TOKEN_ENV_VAR} env variable's value is not a valid unicode string: {not_unicode_string:?}");
+            std::process::exit(1);
+        }
+    }
+}
+
+pub fn init_logging(binary_name: &str, dry_run: bool, node_kind: &str) -> WorkerGuard {
+    let file_name = if dry_run {
+        format!(
+            "{}_{}_{}__dry.log",
+            binary_name,
+            node_kind,
+            chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
+        )
+    } else {
+        format!(
+            "{}_{}_{}.log",
+            binary_name,
+            node_kind,
+            chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S")
+        )
+    };
+
+    let (file_writer, guard) =
+        tracing_appender::non_blocking(tracing_appender::rolling::never("./logs/", file_name));
+
+    let file_logs = fmt::Layer::new()
+        .with_target(false)
+        .with_ansi(false)
+        .with_writer(file_writer);
+    let stdout_logs = fmt::Layer::new()
+        .with_target(false)
+        .with_ansi(atty::is(atty::Stream::Stdout))
+        .with_writer(std::io::stdout);
+    tracing_subscriber::registry()
+        .with(EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")))
+        .with(file_logs)
+        .with(stdout_logs)
+        .init();
+
+    guard
+}
+
+pub fn init_s3_client(account_id: String, bucket_region: Region) -> Client {
+    let credentials_provider = {
+        // uses "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"
+        CredentialsProviderChain::first_try("env", EnvironmentVariableCredentialsProvider::new())
+            // uses sso
+            .or_else(
+                "sso",
+                SsoCredentialsProvider::builder()
+                    .account_id(account_id)
+                    .role_name("PowerUserAccess")
+                    .start_url("https://neondb.awsapps.com/start")
+                    .region(Region::from_static("eu-central-1"))
+                    .build(),
+            )
+            // uses imds v2
+            .or_else("imds", ImdsCredentialsProvider::builder().build())
+    };
+
+    let config = Config::builder()
+        .region(bucket_region)
+        .credentials_provider(credentials_provider)
+        .build();
+
+    Client::from_conf(config)
+}
+
+async fn list_objects_with_retries(
+    s3_client: &Client,
+    s3_target: &S3Target,
+    continuation_token: Option<String>,
+) -> anyhow::Result<aws_sdk_s3::operation::list_objects_v2::ListObjectsV2Output> {
+    for _ in 0..MAX_RETRIES {
+        match s3_client
+            .list_objects_v2()
+            .bucket(&s3_target.bucket_name)
+            .prefix(&s3_target.prefix_in_bucket)
+            .delimiter(&s3_target.delimiter)
+            .set_continuation_token(continuation_token.clone())
+            .send()
+            .await
+        {
+            Ok(response) => return Ok(response),
+            Err(e) => {
+                error!("list_objects_v2 query failed: {e}");
+                tokio::time::sleep(Duration::from_secs(1)).await;
+            }
+        }
+    }
+
+    anyhow::bail!("Failed to list objects {MAX_RETRIES} times")
+}
diff --git a/s3_scrubber/src/main.rs b/s3_scrubber/src/main.rs
new file mode 100644
index 0000000000..7004bcad51
--- /dev/null
+++ b/s3_scrubber/src/main.rs
@@ -0,0 +1,268 @@
+use std::collections::HashMap;
+use std::env;
+use std::fmt::Display;
+use std::num::NonZeroUsize;
+use std::sync::Arc;
+
+use anyhow::Context;
+use aws_sdk_s3::config::Region;
+use reqwest::Url;
+use s3_scrubber::cloud_admin_api::CloudAdminApiClient;
+use s3_scrubber::delete_batch_producer::DeleteBatchProducer;
+use s3_scrubber::{
+    checks, get_cloud_admin_api_token_or_exit, init_logging, init_s3_client, RootTarget, S3Deleter,
+    S3Target, TraversingDepth,
+};
+use tracing::{info, info_span, warn};
+
+use clap::{Parser, Subcommand, ValueEnum};
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+#[command(arg_required_else_help(true))]
+struct Cli {
+    #[command(subcommand)]
+    command: Command,
+
+    #[arg(short, long, default_value_t = false)]
+    delete: bool,
+}
+
+#[derive(ValueEnum, Clone, Copy, Eq, PartialEq)]
+enum NodeKind {
+    Safekeeper,
+    Pageserver,
+}
+
+impl NodeKind {
+    fn as_str(&self) -> &'static str {
+        match self {
+            Self::Safekeeper => "safekeeper",
+            Self::Pageserver => "pageserver",
+        }
+    }
+}
+
+impl Display for NodeKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+#[derive(Subcommand)]
+enum Command {
+    Tidy {
+        #[arg(short, long)]
+        node_kind: NodeKind,
+        #[arg(short, long, default_value_t=TraversingDepth::Tenant)]
+        depth: TraversingDepth,
+        #[arg(short, long, default_value_t = false)]
+        skip_validation: bool,
+    },
+}
+
+struct BucketConfig {
+    region: String,
+    bucket: String,
+    sso_account_id: String,
+}
+
+impl Display for BucketConfig {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}/{}/{}", self.sso_account_id, self.region, self.bucket)
+    }
+}
+
+impl BucketConfig {
+    fn from_env() -> anyhow::Result<Self> {
+        let sso_account_id =
+            env::var("SSO_ACCOUNT_ID").context("'SSO_ACCOUNT_ID' param retrieval")?;
+        let region = env::var("REGION").context("'REGION' param retrieval")?;
+        let bucket = env::var("BUCKET").context("'BUCKET' param retrieval")?;
+
+        Ok(Self {
+            region,
+            bucket,
+            sso_account_id,
+        })
+    }
+}
+
+struct ConsoleConfig {
+    admin_api_url: Url,
+}
+
+impl ConsoleConfig {
+    fn from_env() -> anyhow::Result<Self> {
+        let admin_api_url: Url = env::var("CLOUD_ADMIN_API_URL")
+            .context("'CLOUD_ADMIN_API_URL' param retrieval")?
+            .parse()
+            .context("'CLOUD_ADMIN_API_URL' param parsing")?;
+
+        Ok(Self { admin_api_url })
+    }
+}
+
+async fn tidy(
+    cli: &Cli,
+    bucket_config: BucketConfig,
+    console_config: ConsoleConfig,
+    node_kind: NodeKind,
+    depth: TraversingDepth,
+    skip_validation: bool,
+) -> anyhow::Result<()> {
+    let binary_name = env::args()
+        .next()
+        .context("binary name in not the first argument")?;
+
+    let dry_run = !cli.delete;
+    let _guard = init_logging(&binary_name, dry_run, node_kind.as_str());
+    let _main_span = info_span!("tidy", binary = %binary_name, %dry_run).entered();
+
+    if dry_run {
+        info!("Dry run, not removing items for real");
+    } else {
+        warn!("Dry run disabled, removing bucket items for real");
+    }
+
+    info!("skip_validation={skip_validation}");
+
+    info!("Starting extra S3 removal in {bucket_config} for node kind '{node_kind}', traversing depth: {depth:?}");
+
+    info!("Starting extra tenant S3 removal in {bucket_config} for node kind '{node_kind}'");
+    let cloud_admin_api_client = Arc::new(CloudAdminApiClient::new(
+        get_cloud_admin_api_token_or_exit(),
+        console_config.admin_api_url,
+    ));
+
+    let bucket_region = Region::new(bucket_config.region);
+    let delimiter = "/".to_string();
+    let s3_client = Arc::new(init_s3_client(bucket_config.sso_account_id, bucket_region));
+    let s3_root = match node_kind {
+        NodeKind::Pageserver => RootTarget::Pageserver(S3Target {
+            bucket_name: bucket_config.bucket,
+            prefix_in_bucket: ["pageserver", "v1", "tenants", ""].join(&delimiter),
+            delimiter,
+        }),
+        NodeKind::Safekeeper => RootTarget::Safekeeper(S3Target {
+            bucket_name: bucket_config.bucket,
+            prefix_in_bucket: ["safekeeper", "v1", "wal", ""].join(&delimiter),
+            delimiter,
+        }),
+    };
+
+    let delete_batch_producer = DeleteBatchProducer::start(
+        Arc::clone(&cloud_admin_api_client),
+        Arc::clone(&s3_client),
+        s3_root.clone(),
+        depth,
+    );
+
+    let s3_deleter = S3Deleter::new(
+        dry_run,
+        NonZeroUsize::new(15).unwrap(),
+        Arc::clone(&s3_client),
+        delete_batch_producer.subscribe(),
+        s3_root.clone(),
+    );
+
+    let (deleter_task_result, batch_producer_task_result) =
+        tokio::join!(s3_deleter.remove_all(), delete_batch_producer.join());
+
+    let deletion_stats = deleter_task_result.context("s3 deletion")?;
+    info!(
+        "Deleted {} tenants ({} keys) and {} timelines ({} keys) total. Dry run: {}",
+        deletion_stats.deleted_tenant_keys.len(),
+        deletion_stats.deleted_tenant_keys.values().sum::<usize>(),
+        deletion_stats.deleted_timeline_keys.len(),
+        deletion_stats.deleted_timeline_keys.values().sum::<usize>(),
+        dry_run,
+    );
+    info!(
+        "Total tenant deletion stats: {:?}",
+        deletion_stats
+            .deleted_tenant_keys
+            .into_iter()
+            .map(|(id, key)| (id.to_string(), key))
+            .collect::<HashMap<_, _>>()
+    );
+    info!(
+        "Total timeline deletion stats: {:?}",
+        deletion_stats
+            .deleted_timeline_keys
+            .into_iter()
+            .map(|(id, key)| (id.to_string(), key))
+            .collect::<HashMap<_, _>>()
+    );
+
+    let batch_producer_stats = batch_producer_task_result.context("delete batch producer join")?;
+    info!(
+        "Total bucket tenants listed: {}; for {} active tenants, timelines checked: {}",
+        batch_producer_stats.tenants_checked(),
+        batch_producer_stats.active_tenants(),
+        batch_producer_stats.timelines_checked()
+    );
+
+    if node_kind == NodeKind::Pageserver {
+        info!("node_kind != pageserver, finish without performing validation step");
+        return Ok(());
+    }
+
+    if skip_validation {
+        info!("--skip-validation is set, exiting");
+        return Ok(());
+    }
+
+    info!("validating active tenants and timelines for pageserver S3 data");
+
+    // TODO kb real stats for validation + better stats for every place: add and print `min`, `max`, `mean` values at least
+    let validation_stats = checks::validate_pageserver_active_tenant_and_timelines(
+        s3_client,
+        s3_root,
+        cloud_admin_api_client,
+        batch_producer_stats,
+    )
+    .await
+    .context("active tenant and timeline validation")?;
+    info!("Finished active tenant and timeline validation, correct timelines: {}, timeline validation errors: {}",
+        validation_stats.normal_timelines.len(), validation_stats.timelines_with_errors.len());
+    if !validation_stats.timelines_with_errors.is_empty() {
+        warn!(
+            "Validation errors: {:#?}",
+            validation_stats
+                .timelines_with_errors
+                .into_iter()
+                .map(|(id, errors)| (id.to_string(), format!("{errors:?}")))
+                .collect::<HashMap<_, _>>()
+        );
+    }
+
+    info!("Done");
+    Ok(())
+}
+
+#[tokio::main]
+async fn main() -> anyhow::Result<()> {
+    let cli = Cli::parse();
+
+    let bucket_config = BucketConfig::from_env()?;
+
+    match &cli.command {
+        Command::Tidy {
+            node_kind,
+            depth,
+            skip_validation,
+        } => {
+            let console_config = ConsoleConfig::from_env()?;
+            tidy(
+                &cli,
+                bucket_config,
+                console_config,
+                *node_kind,
+                *depth,
+                *skip_validation,
+            )
+            .await
+        }
+    }
+}
diff --git a/s3_scrubber/src/s3_deletion.rs b/s3_scrubber/src/s3_deletion.rs
new file mode 100644
index 0000000000..716443790b
--- /dev/null
+++ b/s3_scrubber/src/s3_deletion.rs
@@ -0,0 +1,434 @@
+use std::collections::BTreeMap;
+use std::num::NonZeroUsize;
+use std::sync::Arc;
+use std::time::Duration;
+
+use anyhow::Context;
+use aws_sdk_s3::types::{Delete, ObjectIdentifier};
+use aws_sdk_s3::Client;
+use tokio::sync::mpsc::error::TryRecvError;
+use tokio::sync::mpsc::UnboundedReceiver;
+use tokio::sync::Mutex;
+use tokio::task::JoinSet;
+use tracing::{debug, error, info, info_span, Instrument};
+
+use crate::delete_batch_producer::DeleteBatch;
+use crate::{list_objects_with_retries, RootTarget, S3Target, TenantId, MAX_RETRIES};
+use utils::id::TenantTimelineId;
+
+pub struct S3Deleter {
+    dry_run: bool,
+    concurrent_tasks_count: NonZeroUsize,
+    delete_batch_receiver: Arc<Mutex<UnboundedReceiver<DeleteBatch>>>,
+    s3_client: Arc<Client>,
+    s3_target: RootTarget,
+}
+
+impl S3Deleter {
+    pub fn new(
+        dry_run: bool,
+        concurrent_tasks_count: NonZeroUsize,
+        s3_client: Arc<Client>,
+        delete_batch_receiver: Arc<Mutex<UnboundedReceiver<DeleteBatch>>>,
+        s3_target: RootTarget,
+    ) -> Self {
+        Self {
+            dry_run,
+            concurrent_tasks_count,
+            delete_batch_receiver,
+            s3_client,
+            s3_target,
+        }
+    }
+
+    pub async fn remove_all(self) -> anyhow::Result<DeletionStats> {
+        let mut deletion_tasks = JoinSet::new();
+        for id in 0..self.concurrent_tasks_count.get() {
+            let closure_client = Arc::clone(&self.s3_client);
+            let closure_s3_target = self.s3_target.clone();
+            let closure_batch_receiver = Arc::clone(&self.delete_batch_receiver);
+            let dry_run = self.dry_run;
+            deletion_tasks.spawn(
+                async move {
+                    info!("Task started");
+                    (
+                        id,
+                        async move {
+                            let mut task_stats = DeletionStats::default();
+                            loop {
+                                let mut guard = closure_batch_receiver.lock().await;
+                                let receiver_result = guard.try_recv();
+                                drop(guard);
+                                match receiver_result {
+                                    Ok(batch) => {
+                                        let stats = delete_batch(
+                                            &closure_client,
+                                            &closure_s3_target,
+                                            batch,
+                                            dry_run,
+                                        )
+                                        .await
+                                        .context("batch deletion")?;
+                                        debug!(
+                                            "Batch processed, number of objects deleted per tenant in the batch is: {}, per timeline — {}",
+                                            stats.deleted_tenant_keys.len(),
+                                            stats.deleted_timeline_keys.len(),
+                                        );
+                                        task_stats.merge(stats);
+                                    }
+                                    Err(TryRecvError::Empty) => {
+                                        debug!("No tasks yet, waiting");
+                                        tokio::time::sleep(Duration::from_secs(1)).await;
+                                        continue;
+                                    }
+                                    Err(TryRecvError::Disconnected) => {
+                                        info!("Task finished: sender dropped");
+                                        return Ok(task_stats);
+                                    }
+                                }
+                            }
+                        }
+                        .in_current_span()
+                        .await,
+                    )
+                }
+                .instrument(info_span!("deletion_task", %id)),
+            );
+        }
+
+        let mut total_stats = DeletionStats::default();
+        while let Some(task_result) = deletion_tasks.join_next().await {
+            match task_result {
+                Ok((id, Ok(task_stats))) => {
+                    info!("Task {id} completed");
+                    total_stats.merge(task_stats);
+                }
+                Ok((id, Err(e))) => {
+                    error!("Task {id} failed: {e:#}");
+                    return Err(e);
+                }
+                Err(join_error) => anyhow::bail!("Failed to join on a task: {join_error:?}"),
+            }
+        }
+
+        Ok(total_stats)
+    }
+}
+
+/// S3 delete_objects allows up to 1000 keys to be passed in a single request.
+/// Yet if you pass too many key requests, apparently S3 could return with OK and
+/// actually delete nothing, so keep the number lower.
+const MAX_ITEMS_TO_DELETE: usize = 200;
+
+#[derive(Debug, Default)]
+pub struct DeletionStats {
+    pub deleted_tenant_keys: BTreeMap<TenantId, usize>,
+    pub deleted_timeline_keys: BTreeMap<TenantTimelineId, usize>,
+}
+
+impl DeletionStats {
+    fn merge(&mut self, other: Self) {
+        self.deleted_tenant_keys.extend(other.deleted_tenant_keys);
+        self.deleted_timeline_keys
+            .extend(other.deleted_timeline_keys);
+    }
+}
+
+async fn delete_batch(
+    s3_client: &Client,
+    s3_target: &RootTarget,
+    batch: DeleteBatch,
+    dry_run: bool,
+) -> anyhow::Result<DeletionStats> {
+    let (deleted_tenant_keys, deleted_timeline_keys) = tokio::join!(
+        delete_tenants_batch(batch.tenants, s3_target, s3_client, dry_run),
+        delete_timelines_batch(batch.timelines, s3_target, s3_client, dry_run),
+    );
+
+    Ok(DeletionStats {
+        deleted_tenant_keys: deleted_tenant_keys.context("tenant batch deletion")?,
+        deleted_timeline_keys: deleted_timeline_keys.context("timeline batch deletion")?,
+    })
+}
+
+async fn delete_tenants_batch(
+    batched_tenants: Vec<TenantId>,
+    s3_target: &RootTarget,
+    s3_client: &Client,
+    dry_run: bool,
+) -> Result<BTreeMap<TenantId, usize>, anyhow::Error> {
+    info!("Deleting tenants batch of size {}", batched_tenants.len());
+    info!("Tenant ids to remove: {batched_tenants:?}");
+    let deleted_keys = delete_elements(
+        &batched_tenants,
+        s3_target,
+        s3_client,
+        dry_run,
+        |root_target, tenant_to_delete| root_target.tenant_root(tenant_to_delete),
+    )
+    .await?;
+
+    if !dry_run {
+        let mut last_err = None;
+        for _ in 0..MAX_RETRIES {
+            match ensure_tenant_batch_deleted(s3_client, s3_target, &batched_tenants).await {
+                Ok(()) => {
+                    last_err = None;
+                    break;
+                }
+                Err(e) => {
+                    error!("Failed to ensure the tenant batch is deleted: {e}");
+                    last_err = Some(e);
+                }
+            }
+        }
+
+        if let Some(e) = last_err {
+            anyhow::bail!(
+                "Failed to ensure that tenant batch is deleted {MAX_RETRIES} times: {e:?}"
+            );
+        }
+    }
+
+    Ok(deleted_keys)
+}
+
+async fn delete_timelines_batch(
+    batched_timelines: Vec<TenantTimelineId>,
+    s3_target: &RootTarget,
+    s3_client: &Client,
+    dry_run: bool,
+) -> Result<BTreeMap<TenantTimelineId, usize>, anyhow::Error> {
+    info!(
+        "Deleting timelines batch of size {}",
+        batched_timelines.len()
+    );
+    info!(
+        "Timeline ids to remove: {:?}",
+        batched_timelines
+            .iter()
+            .map(|id| id.to_string())
+            .collect::<Vec<_>>()
+    );
+    let deleted_keys = delete_elements(
+        &batched_timelines,
+        s3_target,
+        s3_client,
+        dry_run,
+        |root_target, timeline_to_delete| root_target.timeline_root(timeline_to_delete),
+    )
+    .await?;
+
+    if !dry_run {
+        let mut last_err = None;
+        for _ in 0..MAX_RETRIES {
+            match ensure_timeline_batch_deleted(s3_client, s3_target, &batched_timelines).await {
+                Ok(()) => {
+                    last_err = None;
+                    break;
+                }
+                Err(e) => {
+                    error!("Failed to ensure the timelines batch is deleted: {e}");
+                    last_err = Some(e);
+                }
+            }
+        }
+
+        if let Some(e) = last_err {
+            anyhow::bail!(
+                "Failed to ensure that timeline batch is deleted {MAX_RETRIES} times: {e:?}"
+            );
+        }
+    }
+    Ok(deleted_keys)
+}
+
+async fn delete_elements<I>(
+    batched_ids: &Vec<I>,
+    s3_target: &RootTarget,
+    s3_client: &Client,
+    dry_run: bool,
+    target_producer: impl Fn(&RootTarget, I) -> S3Target,
+) -> Result<BTreeMap<I, usize>, anyhow::Error>
+where
+    I: Ord + PartialOrd + Copy,
+{
+    let mut deleted_keys = BTreeMap::new();
+    let mut object_ids_to_delete = Vec::with_capacity(MAX_ITEMS_TO_DELETE);
+    for &id_to_delete in batched_ids {
+        let mut continuation_token = None;
+        let mut subtargets = vec![target_producer(s3_target, id_to_delete)];
+        while let Some(current_target) = subtargets.pop() {
+            loop {
+                let fetch_response = list_objects_with_retries(
+                    s3_client,
+                    &current_target,
+                    continuation_token.clone(),
+                )
+                .await?;
+
+                for object_id in fetch_response
+                    .contents()
+                    .unwrap_or_default()
+                    .iter()
+                    .filter_map(|object| object.key())
+                    .map(|key| ObjectIdentifier::builder().key(key).build())
+                {
+                    if object_ids_to_delete.len() >= MAX_ITEMS_TO_DELETE {
+                        let object_ids_for_request = std::mem::replace(
+                            &mut object_ids_to_delete,
+                            Vec::with_capacity(MAX_ITEMS_TO_DELETE),
+                        );
+                        send_delete_request(
+                            s3_client,
+                            s3_target.bucket_name(),
+                            object_ids_for_request,
+                            dry_run,
+                        )
+                        .await
+                        .context("object ids deletion")?;
+                    }
+
+                    object_ids_to_delete.push(object_id);
+                    *deleted_keys.entry(id_to_delete).or_default() += 1;
+                }
+
+                subtargets.extend(
+                    fetch_response
+                        .common_prefixes()
+                        .unwrap_or_default()
+                        .iter()
+                        .filter_map(|common_prefix| common_prefix.prefix())
+                        .map(|prefix| {
+                            let mut new_target = current_target.clone();
+                            new_target.prefix_in_bucket = prefix.to_string();
+                            new_target
+                        }),
+                );
+
+                match fetch_response.next_continuation_token {
+                    Some(new_token) => continuation_token = Some(new_token),
+                    None => break,
+                }
+            }
+        }
+    }
+    if !object_ids_to_delete.is_empty() {
+        info!("Removing last objects of the batch");
+        send_delete_request(
+            s3_client,
+            s3_target.bucket_name(),
+            object_ids_to_delete,
+            dry_run,
+        )
+        .await
+        .context("Last object ids deletion")?;
+    }
+    Ok(deleted_keys)
+}
+
+pub async fn send_delete_request(
+    s3_client: &Client,
+    bucket_name: &str,
+    ids: Vec<ObjectIdentifier>,
+    dry_run: bool,
+) -> anyhow::Result<()> {
+    info!("Removing {} object ids from S3", ids.len());
+    info!("Object ids to remove: {ids:?}");
+    let delete_request = s3_client
+        .delete_objects()
+        .bucket(bucket_name)
+        .delete(Delete::builder().set_objects(Some(ids)).build());
+    if dry_run {
+        info!("Dry run, skipping the actual removal");
+        Ok(())
+    } else {
+        let original_request = delete_request.clone();
+
+        for _ in 0..MAX_RETRIES {
+            match delete_request
+                .clone()
+                .send()
+                .await
+                .context("delete request processing")
+            {
+                Ok(delete_response) => {
+                    info!("Delete response: {delete_response:?}");
+                    match delete_response.errors() {
+                        Some(delete_errors) => {
+                            error!("Delete request returned errors: {delete_errors:?}");
+                            tokio::time::sleep(Duration::from_secs(1)).await;
+                        }
+                        None => {
+                            info!("Successfully removed an object batch from S3");
+                            return Ok(());
+                        }
+                    }
+                }
+                Err(e) => {
+                    error!("Failed to send a delete request: {e:#}");
+                    tokio::time::sleep(Duration::from_secs(1)).await;
+                }
+            }
+        }
+
+        error!("Failed to do deletion, request: {original_request:?}");
+        anyhow::bail!("Failed to run deletion request {MAX_RETRIES} times");
+    }
+}
+
+async fn ensure_tenant_batch_deleted(
+    s3_client: &Client,
+    s3_target: &RootTarget,
+    batch: &[TenantId],
+) -> anyhow::Result<()> {
+    let mut not_deleted_tenants = Vec::with_capacity(batch.len());
+
+    for &tenant_id in batch {
+        let fetch_response =
+            list_objects_with_retries(s3_client, &s3_target.tenant_root(tenant_id), None).await?;
+
+        if fetch_response.is_truncated()
+            || fetch_response.contents().is_some()
+            || fetch_response.common_prefixes().is_some()
+        {
+            error!(
+                "Tenant {tenant_id} should be deleted, but its list response is {fetch_response:?}"
+            );
+            not_deleted_tenants.push(tenant_id);
+        }
+    }
+
+    anyhow::ensure!(
+        not_deleted_tenants.is_empty(),
+        "Failed to delete all tenants in a batch. Tenants {not_deleted_tenants:?} should be deleted."
+    );
+    Ok(())
+}
+
+async fn ensure_timeline_batch_deleted(
+    s3_client: &Client,
+    s3_target: &RootTarget,
+    batch: &[TenantTimelineId],
+) -> anyhow::Result<()> {
+    let mut not_deleted_timelines = Vec::with_capacity(batch.len());
+
+    for &id in batch {
+        let fetch_response =
+            list_objects_with_retries(s3_client, &s3_target.timeline_root(id), None).await?;
+
+        if fetch_response.is_truncated()
+            || fetch_response.contents().is_some()
+            || fetch_response.common_prefixes().is_some()
+        {
+            error!("Timeline {id} should be deleted, but its list response is {fetch_response:?}");
+            not_deleted_timelines.push(id);
+        }
+    }
+
+    anyhow::ensure!(
+        not_deleted_timelines.is_empty(),
+        "Failed to delete all timelines in a batch"
+    );
+    Ok(())
+}
diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py
index b2cd0fe968..3936736e56 100644
--- a/test_runner/fixtures/neon_fixtures.py
+++ b/test_runner/fixtures/neon_fixtures.py
@@ -414,6 +414,7 @@ class NeonEnvBuilder:
         neon_binpath: Path,
         pg_distrib_dir: Path,
         pg_version: PgVersion,
+        test_name: str,
         remote_storage: Optional[RemoteStorage] = None,
         remote_storage_users: RemoteStorageUsers = RemoteStorageUsers.PAGESERVER,
         pageserver_config_override: Optional[str] = None,
@@ -455,6 +456,11 @@ class NeonEnvBuilder:
         self.initial_tenant = initial_tenant or TenantId.generate()
         self.initial_timeline = initial_timeline or TimelineId.generate()
 
+        assert test_name.startswith(
+            "test_"
+        ), "Unexpectedly instantiated from outside a test function"
+        self.test_name = test_name
+
     def init_configs(self) -> NeonEnv:
         # Cannot create more than one environment from one builder
         assert self.env is None, "environment already initialized"
@@ -486,23 +492,24 @@ class NeonEnvBuilder:
     def enable_remote_storage(
         self,
         remote_storage_kind: RemoteStorageKind,
-        test_name: str,
         force_enable: bool = True,
         enable_remote_extensions: bool = False,
     ):
+        bucket_name = re.sub(r"[_\[\]]", "-", self.test_name)[:63]
+
         if remote_storage_kind == RemoteStorageKind.NOOP:
             return
         elif remote_storage_kind == RemoteStorageKind.LOCAL_FS:
             self.enable_local_fs_remote_storage(force_enable=force_enable)
         elif remote_storage_kind == RemoteStorageKind.MOCK_S3:
             self.enable_mock_s3_remote_storage(
-                bucket_name=test_name,
+                bucket_name=bucket_name,
                 force_enable=force_enable,
                 enable_remote_extensions=enable_remote_extensions,
             )
         elif remote_storage_kind == RemoteStorageKind.REAL_S3:
             self.enable_real_s3_remote_storage(
-                test_name=test_name,
+                test_name=bucket_name,
                 force_enable=force_enable,
                 enable_remote_extensions=enable_remote_extensions,
             )
@@ -949,6 +956,7 @@ def _shared_simple_env(
         pg_version=pg_version,
         run_id=run_id,
         preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
+        test_name=request.node.name,
     ) as builder:
         env = builder.init_start()
 
@@ -984,6 +992,7 @@ def neon_env_builder(
     pg_version: PgVersion,
     default_broker: NeonBroker,
     run_id: uuid.UUID,
+    request: FixtureRequest,
 ) -> Iterator[NeonEnvBuilder]:
     """
     Fixture to create a Neon environment for test.
@@ -1012,6 +1021,7 @@ def neon_env_builder(
         broker=default_broker,
         run_id=run_id,
         preserve_database_files=pytestconfig.getoption("--preserve-database-files"),
+        test_name=request.node.name,
     ) as builder:
         yield builder
 
diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py
index bc6afa84a1..1acf429c52 100644
--- a/test_runner/regress/test_attach_tenant_config.py
+++ b/test_runner/regress/test_attach_tenant_config.py
@@ -16,7 +16,6 @@ from fixtures.utils import wait_until
 def positive_env(neon_env_builder: NeonEnvBuilder) -> NeonEnv:
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.LOCAL_FS,
-        test_name="test_attach_tenant_config",
     )
     env = neon_env_builder.init_start()
 
@@ -39,7 +38,6 @@ class NegativeTests:
 def negative_env(neon_env_builder: NeonEnvBuilder) -> Generator[NegativeTests, None, None]:
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.LOCAL_FS,
-        test_name="test_attach_tenant_config",
     )
     env = neon_env_builder.init_start()
     assert isinstance(env.remote_storage, LocalFsStorage)
diff --git a/test_runner/regress/test_disk_usage_eviction.py b/test_runner/regress/test_disk_usage_eviction.py
index 182069315e..cdbd02de03 100644
--- a/test_runner/regress/test_disk_usage_eviction.py
+++ b/test_runner/regress/test_disk_usage_eviction.py
@@ -135,7 +135,7 @@ def eviction_env(request, neon_env_builder: NeonEnvBuilder, pg_bin: PgBin) -> Ev
 
     log.info(f"setting up eviction_env for test {request.node.name}")
 
-    neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS, f"{request.node.name}")
+    neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS)
 
     # initial tenant will not be present on this pageserver
     env = neon_env_builder.init_configs()
diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py
index b208616345..54f51414bb 100644
--- a/test_runner/regress/test_download_extensions.py
+++ b/test_runner/regress/test_download_extensions.py
@@ -90,7 +90,6 @@ def test_remote_extensions(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_remote_extensions",
         enable_remote_extensions=True,
     )
     env = neon_env_builder.init_start()
@@ -157,7 +156,6 @@ def test_remote_library(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_remote_library",
         enable_remote_extensions=True,
     )
     env = neon_env_builder.init_start()
@@ -218,7 +216,6 @@ def test_multiple_extensions_one_archive(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.REAL_S3,
-        test_name="test_multiple_extensions_one_archive",
         enable_remote_extensions=True,
     )
     env = neon_env_builder.init_start()
@@ -266,7 +263,6 @@ def test_extension_download_after_restart(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.MOCK_S3,
-        test_name="test_extension_download_after_restart",
         enable_remote_extensions=True,
     )
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_gc_aggressive.py b/test_runner/regress/test_gc_aggressive.py
index be817521cd..8d7a42a805 100644
--- a/test_runner/regress/test_gc_aggressive.py
+++ b/test_runner/regress/test_gc_aggressive.py
@@ -102,7 +102,6 @@ def test_gc_index_upload(neon_env_builder: NeonEnvBuilder, remote_storage_kind:
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_gc_index_upload",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_layer_eviction.py b/test_runner/regress/test_layer_eviction.py
index 8f627defb5..c939ace803 100644
--- a/test_runner/regress/test_layer_eviction.py
+++ b/test_runner/regress/test_layer_eviction.py
@@ -21,7 +21,6 @@ def test_basic_eviction(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_download_remote_layers_api",
     )
 
     env = neon_env_builder.init_start(
@@ -157,7 +156,6 @@ def test_basic_eviction(
 def test_gc_of_remote_layers(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.LOCAL_FS,
-        test_name="test_gc_of_remote_layers",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_metric_collection.py b/test_runner/regress/test_metric_collection.py
index 3f4b42707a..e4641cff05 100644
--- a/test_runner/regress/test_metric_collection.py
+++ b/test_runner/regress/test_metric_collection.py
@@ -96,7 +96,6 @@ def test_metric_collection(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_metric_collection",
     )
 
     log.info(f"test_metric_collection endpoint is {metric_collection_endpoint}")
diff --git a/test_runner/regress/test_ondemand_download.py b/test_runner/regress/test_ondemand_download.py
index a4e86e0519..0ca6a7a595 100644
--- a/test_runner/regress/test_ondemand_download.py
+++ b/test_runner/regress/test_ondemand_download.py
@@ -54,7 +54,6 @@ def test_ondemand_download_large_rel(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ondemand_download_large_rel",
     )
 
     # thinking about using a shared environment? the test assumes that global
@@ -157,7 +156,6 @@ def test_ondemand_download_timetravel(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ondemand_download_timetravel",
     )
 
     # thinking about using a shared environment? the test assumes that global
@@ -319,7 +317,6 @@ def test_download_remote_layers_api(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_download_remote_layers_api",
     )
 
     ##### First start, insert data and upload it to the remote storage
@@ -481,7 +478,6 @@ def test_compaction_downloads_on_demand_without_image_creation(
     """
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_compaction_downloads_on_demand_without_image_creation",
     )
 
     conf = {
@@ -569,7 +565,6 @@ def test_compaction_downloads_on_demand_with_image_creation(
     """
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_compaction_downloads_on_demand",
     )
 
     conf = {
@@ -670,7 +665,6 @@ def test_ondemand_download_failure_to_replace(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ondemand_download_failure_to_replace",
     )
 
     # disable gc and compaction via default tenant config because config is lost while detaching
diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py
index 0bd365efaa..8b75d35200 100644
--- a/test_runner/regress/test_remote_storage.py
+++ b/test_runner/regress/test_remote_storage.py
@@ -62,7 +62,6 @@ def test_remote_storage_backup_and_restore(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_remote_storage_backup_and_restore",
     )
 
     # Exercise retry code path by making all uploads and downloads fail for the
@@ -225,7 +224,6 @@ def test_remote_storage_upload_queue_retries(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_remote_storage_upload_queue_retries",
     )
 
     env = neon_env_builder.init_start()
@@ -381,7 +379,6 @@ def test_remote_timeline_client_calls_started_metric(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_remote_timeline_client_metrics",
     )
 
     # thinking about using a shared environment? the test assumes that global
@@ -524,7 +521,6 @@ def test_timeline_deletion_with_files_stuck_in_upload_queue(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_timeline_deletion_with_files_stuck_in_upload_queue",
     )
 
     env = neon_env_builder.init_start(
@@ -642,7 +638,6 @@ def test_empty_branch_remote_storage_upload(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_empty_branch_remote_storage_upload",
     )
 
     env = neon_env_builder.init_start()
@@ -694,7 +689,6 @@ def test_empty_branch_remote_storage_upload_on_restart(
     """
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_empty_branch_remote_storage_upload_on_restart",
     )
 
     env = neon_env_builder.init_start()
@@ -792,7 +786,6 @@ def test_compaction_delete_before_upload(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_compaction_delete_before_upload",
     )
 
     env = neon_env_builder.init_start(
diff --git a/test_runner/regress/test_tenant_conf.py b/test_runner/regress/test_tenant_conf.py
index 60ec532db4..93ba5477a6 100644
--- a/test_runner/regress/test_tenant_conf.py
+++ b/test_runner/regress/test_tenant_conf.py
@@ -294,7 +294,6 @@ eviction_policy = { "kind" = "LayerAccessThreshold", period = "20s", threshold =
 def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.LOCAL_FS,
-        test_name="test_creating_tenant_conf_after_attach",
     )
 
     env = neon_env_builder.init_start()
@@ -339,7 +338,6 @@ def test_live_reconfig_get_evictions_low_residence_duration_metric_threshold(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.LOCAL_FS,
-        test_name="test_live_reconfig_get_evictions_low_residence_duration_metric_threshold",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_tenant_delete.py b/test_runner/regress/test_tenant_delete.py
index 448dcfaff7..c7324cac83 100644
--- a/test_runner/regress/test_tenant_delete.py
+++ b/test_runner/regress/test_tenant_delete.py
@@ -43,7 +43,6 @@ def test_tenant_delete_smoke(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_tenant_delete_smoke",
     )
 
     env = neon_env_builder.init_start()
@@ -177,9 +176,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
     if simulate_failures:
         neon_env_builder.pageserver_config_override = "test_remote_failures=1"
 
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind, "test_delete_tenant_exercise_crash_safety_failpoints"
-    )
+    neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
 
@@ -192,7 +189,7 @@ def test_delete_tenant_exercise_crash_safety_failpoints(
             # allow errors caused by failpoints
             f".*failpoint: {failpoint}",
             # It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
-            ".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
             # We may leave some upload tasks in the queue. They're likely deletes.
             # For uploads we explicitly wait with `last_flush_lsn_upload` below.
             # So by ignoring these instead of waiting for empty upload queue
@@ -300,7 +297,6 @@ def test_tenant_delete_is_resumed_on_attach(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_deleted_tenant_ignored_on_attach",
     )
 
     env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
@@ -338,7 +334,7 @@ def test_tenant_delete_is_resumed_on_attach(
             # From deletion polling
             f".*NotFound: tenant {env.initial_tenant}.*",
             # It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
-            ".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
             # error from http response is also logged
             ".*InternalServerError\\(Tenant is marked as deleted on remote storage.*",
             '.*shutdown_pageserver{exit_code=0}: stopping left-over name="remote upload".*',
diff --git a/test_runner/regress/test_tenant_detach.py b/test_runner/regress/test_tenant_detach.py
index 07b751bcca..20526cd0a9 100644
--- a/test_runner/regress/test_tenant_detach.py
+++ b/test_runner/regress/test_tenant_detach.py
@@ -46,7 +46,6 @@ def test_tenant_reattach(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_tenant_reattach",
     )
 
     # Exercise retry code path by making all uploads and downloads fail for the
@@ -231,7 +230,6 @@ def test_tenant_reattach_while_busy(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_tenant_reattach_while_busy",
     )
     env = neon_env_builder.init_start()
 
@@ -453,7 +451,6 @@ def test_detach_while_attaching(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_detach_while_attaching",
     )
 
     ##### First start, insert secret data and upload it to the remote storage
@@ -537,7 +534,6 @@ def test_ignored_tenant_reattach(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ignored_tenant_reattach",
     )
     env = neon_env_builder.init_start()
     pageserver_http = env.pageserver.http_client()
@@ -609,7 +605,6 @@ def test_ignored_tenant_download_missing_layers(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ignored_tenant_download_and_attach",
     )
     env = neon_env_builder.init_start()
     pageserver_http = env.pageserver.http_client()
@@ -675,7 +670,6 @@ def test_ignored_tenant_stays_broken_without_metadata(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ignored_tenant_stays_broken_without_metadata",
     )
     env = neon_env_builder.init_start()
     pageserver_http = env.pageserver.http_client()
@@ -688,7 +682,7 @@ def test_ignored_tenant_stays_broken_without_metadata(
     # temporarily detached produces these errors in the pageserver log.
     env.pageserver.allowed_errors.append(f".*Tenant {tenant_id} not found.*")
     env.pageserver.allowed_errors.append(
-        f".*Tenant {tenant_id} will not become active\\. Current state: Broken.*"
+        f".*Tenant {tenant_id} will not become active\\. Current state: (Broken|Stopping).*"
     )
 
     # ignore the tenant and remove its metadata
@@ -719,7 +713,6 @@ def test_load_attach_negatives(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_load_attach_negatives",
     )
     env = neon_env_builder.init_start()
     pageserver_http = env.pageserver.http_client()
@@ -764,7 +757,6 @@ def test_ignore_while_attaching(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_ignore_while_attaching",
     )
 
     env = neon_env_builder.init_start()
@@ -868,7 +860,6 @@ def test_metrics_while_ignoring_broken_tenant_and_reloading(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_metrics_while_ignoring_broken_tenant_and_reloading",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_tenant_relocation.py b/test_runner/regress/test_tenant_relocation.py
index 32ad5381b4..6a81ff498e 100644
--- a/test_runner/regress/test_tenant_relocation.py
+++ b/test_runner/regress/test_tenant_relocation.py
@@ -526,7 +526,6 @@ def test_emergency_relocate_with_branches_slow_replay(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_emergency_relocate_with_branches_slow_replay",
     )
 
     env = neon_env_builder.init_start()
@@ -683,7 +682,6 @@ def test_emergency_relocate_with_branches_createdb(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_emergency_relocate_with_branches_createdb",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_tenants.py b/test_runner/regress/test_tenants.py
index 19bc3ed37c..985bd63b24 100644
--- a/test_runner/regress/test_tenants.py
+++ b/test_runner/regress/test_tenants.py
@@ -244,7 +244,6 @@ def test_pageserver_metrics_removed_after_detach(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_pageserver_metrics_removed_after_detach",
     )
 
     neon_env_builder.num_safekeepers = 3
@@ -305,7 +304,6 @@ def test_pageserver_with_empty_tenants(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_pageserver_with_empty_tenants",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_tenants_with_remote_storage.py b/test_runner/regress/test_tenants_with_remote_storage.py
index 2925f8c2da..6a541c8a37 100644
--- a/test_runner/regress/test_tenants_with_remote_storage.py
+++ b/test_runner/regress/test_tenants_with_remote_storage.py
@@ -64,7 +64,6 @@ async def all_tenants_workload(env: NeonEnv, tenants_endpoints):
 def test_tenants_many(neon_env_builder: NeonEnvBuilder, remote_storage_kind: RemoteStorageKind):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_tenants_many",
     )
 
     env = neon_env_builder.init_start()
@@ -117,7 +116,6 @@ def test_tenants_attached_after_download(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="remote_storage_kind",
     )
 
     data_id = 1
@@ -232,7 +230,6 @@ def test_tenant_redownloads_truncated_file_on_startup(
     # since we now store the layer file length metadata, we notice on startup that a layer file is of wrong size, and proceed to redownload it.
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_tenant_redownloads_truncated_file_on_startup",
     )
 
     env = neon_env_builder.init_start()
diff --git a/test_runner/regress/test_threshold_based_eviction.py b/test_runner/regress/test_threshold_based_eviction.py
index a0e423e7ff..739a9a5b74 100644
--- a/test_runner/regress/test_threshold_based_eviction.py
+++ b/test_runner/regress/test_threshold_based_eviction.py
@@ -16,13 +16,12 @@ from pytest_httpserver import HTTPServer
 
 
 def test_threshold_based_eviction(
-    request,
     httpserver: HTTPServer,
     httpserver_listen_address,
     pg_bin: PgBin,
     neon_env_builder: NeonEnvBuilder,
 ):
-    neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS, f"{request.node.name}")
+    neon_env_builder.enable_remote_storage(RemoteStorageKind.LOCAL_FS)
 
     # Start with metrics collection enabled, so that the eviction task
     # imitates its accesses. We'll use a non-existent endpoint to make it fail.
diff --git a/test_runner/regress/test_timeline_delete.py b/test_runner/regress/test_timeline_delete.py
index 916c0111f7..0ce714d185 100644
--- a/test_runner/regress/test_timeline_delete.py
+++ b/test_runner/regress/test_timeline_delete.py
@@ -191,9 +191,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
     8. Retry or restart without the failpoint and check the result.
     """
 
-    neon_env_builder.enable_remote_storage(
-        remote_storage_kind, "test_delete_timeline_exercise_crash_safety_failpoints"
-    )
+    neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start(
         initial_tenant_conf={
@@ -231,7 +229,7 @@ def test_delete_timeline_exercise_crash_safety_failpoints(
     env.pageserver.allowed_errors.append(f".*{timeline_id}.*failpoint: {failpoint}")
     # It appears when we stopped flush loop during deletion and then pageserver is stopped
     env.pageserver.allowed_errors.append(
-        ".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited"
+        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
     )
     # This happens when we fail before scheduling background operation.
     # Timeline is left in stopping state and retry tries to stop it again.
@@ -350,7 +348,6 @@ def test_timeline_resurrection_on_attach(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_timeline_resurrection_on_attach",
     )
 
     ##### First start, insert data and upload it to the remote storage
@@ -438,7 +435,6 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.MOCK_S3,
-        test_name="test_timeline_delete_fail_before_local_delete",
     )
 
     env = neon_env_builder.init_start()
@@ -449,7 +445,7 @@ def test_timeline_delete_fail_before_local_delete(neon_env_builder: NeonEnvBuild
     )
     # this happens, because the stuck timeline is visible to shutdown
     env.pageserver.allowed_errors.append(
-        ".*freeze_and_flush_on_shutdown.+: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited"
+        ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
     )
 
     ps_http = env.pageserver.http_client()
@@ -558,7 +554,6 @@ def test_concurrent_timeline_delete_stuck_on(
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.MOCK_S3,
-        test_name=f"concurrent_timeline_delete_stuck_on_{stuck_failpoint}",
     )
 
     env = neon_env_builder.init_start()
@@ -636,7 +631,6 @@ def test_delete_timeline_client_hangup(neon_env_builder: NeonEnvBuilder):
     """
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=RemoteStorageKind.MOCK_S3,
-        test_name="test_delete_timeline_client_hangup",
     )
 
     env = neon_env_builder.init_start()
@@ -706,7 +700,6 @@ def test_timeline_delete_works_for_remote_smoke(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_timeline_delete_works_for_remote_smoke",
     )
 
     env = neon_env_builder.init_start()
@@ -780,7 +773,7 @@ def test_delete_orphaned_objects(
     pg_bin: PgBin,
 ):
     remote_storage_kind = RemoteStorageKind.LOCAL_FS
-    neon_env_builder.enable_remote_storage(remote_storage_kind, "test_delete_orphaned_objects")
+    neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start(
         initial_tenant_conf={
@@ -844,7 +837,6 @@ def test_timeline_delete_resumed_on_attach(
 ):
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_deleted_tenant_ignored_on_attach",
     )
 
     env = neon_env_builder.init_start(initial_tenant_conf=MANY_SMALL_LAYERS_TENANT_CONFIG)
@@ -881,7 +873,7 @@ def test_timeline_delete_resumed_on_attach(
             # allow errors caused by failpoints
             f".*failpoint: {failpoint}",
             # It appears when we stopped flush loop during deletion (attempt) and then pageserver is stopped
-            ".*freeze_and_flush_on_shutdown.*failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
+            ".*shutdown_all_tenants:shutdown.*tenant_id.*shutdown.*timeline_id.*: failed to freeze and flush: cannot flush frozen layers when flush_loop is not running, state is Exited",
             # error from http response is also logged
             ".*InternalServerError\\(Tenant is marked as deleted on remote storage.*",
             # Polling after attach may fail with this
diff --git a/test_runner/regress/test_timeline_size.py b/test_runner/regress/test_timeline_size.py
index f6e4a667a4..d754ce0aa0 100644
--- a/test_runner/regress/test_timeline_size.py
+++ b/test_runner/regress/test_timeline_size.py
@@ -306,9 +306,7 @@ def test_timeline_physical_size_init(
     neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind]
 ):
     if remote_storage_kind is not None:
-        neon_env_builder.enable_remote_storage(
-            remote_storage_kind, "test_timeline_physical_size_init"
-        )
+        neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start()
 
@@ -349,9 +347,7 @@ def test_timeline_physical_size_post_checkpoint(
     neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind]
 ):
     if remote_storage_kind is not None:
-        neon_env_builder.enable_remote_storage(
-            remote_storage_kind, "test_timeline_physical_size_init"
-        )
+        neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start()
 
@@ -382,9 +378,7 @@ def test_timeline_physical_size_post_compaction(
     neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind]
 ):
     if remote_storage_kind is not None:
-        neon_env_builder.enable_remote_storage(
-            remote_storage_kind, "test_timeline_physical_size_init"
-        )
+        neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     # Disable background compaction as we don't want it to happen after `get_physical_size` request
     # and before checking the expected size on disk, which makes the assertion failed
@@ -437,9 +431,7 @@ def test_timeline_physical_size_post_gc(
     neon_env_builder: NeonEnvBuilder, remote_storage_kind: Optional[RemoteStorageKind]
 ):
     if remote_storage_kind is not None:
-        neon_env_builder.enable_remote_storage(
-            remote_storage_kind, "test_timeline_physical_size_init"
-        )
+        neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     # Disable background compaction and GC as we don't want it to happen after `get_physical_size` request
     # and before checking the expected size on disk, which makes the assertion failed
@@ -572,9 +564,7 @@ def test_tenant_physical_size(
     random.seed(100)
 
     if remote_storage_kind is not None:
-        neon_env_builder.enable_remote_storage(
-            remote_storage_kind, "test_timeline_physical_size_init"
-        )
+        neon_env_builder.enable_remote_storage(remote_storage_kind)
 
     env = neon_env_builder.init_start()
 
diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py
index 8ca93845b2..119a597d43 100644
--- a/test_runner/regress/test_wal_acceptor.py
+++ b/test_runner/regress/test_wal_acceptor.py
@@ -439,7 +439,6 @@ def test_wal_backup(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Remot
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_safekeepers_wal_backup",
     )
 
     neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
@@ -491,7 +490,6 @@ def test_s3_wal_replay(neon_env_builder: NeonEnvBuilder, remote_storage_kind: Re
 
     neon_env_builder.enable_remote_storage(
         remote_storage_kind=remote_storage_kind,
-        test_name="test_s3_wal_replay",
     )
 
     neon_env_builder.remote_storage_users = RemoteStorageUsers.SAFEKEEPER
diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml
index 4ec4b01f66..c170ad2edf 100644
--- a/workspace_hack/Cargo.toml
+++ b/workspace_hack/Cargo.toml
@@ -14,6 +14,10 @@ publish = false
 ### BEGIN HAKARI SECTION
 [dependencies]
 anyhow = { version = "1", features = ["backtrace"] }
+aws-config = { version = "0.56", default-features = false, features = ["credentials-sso", "rustls"] }
+aws-runtime = { version = "0.56", default-features = false, features = ["event-stream"] }
+aws-sigv4 = { version = "0.56", features = ["sign-eventstream"] }
+aws-smithy-http = { version = "0.56", default-features = false, features = ["event-stream", "rt-tokio"] }
 axum = { version = "0.6", features = ["ws"] }
 base64 = { version = "0.21", features = ["alloc"] }
 bytes = { version = "1", features = ["serde"] }
@@ -21,7 +25,6 @@ chrono = { version = "0.4", default-features = false, features = ["clock", "serd
 clap = { version = "4", features = ["derive", "string"] }
 clap_builder = { version = "4", default-features = false, features = ["color", "help", "std", "string", "suggestions", "usage"] }
 crossbeam-utils = { version = "0.8" }
-digest = { version = "0.10", features = ["mac", "std"] }
 either = { version = "1" }
 fail = { version = "0.5", default-features = false, features = ["failpoints"] }
 futures = { version = "0.3" }
@@ -30,6 +33,7 @@ futures-core = { version = "0.3" }
 futures-executor = { version = "0.3" }
 futures-sink = { version = "0.3" }
 futures-util = { version = "0.3", features = ["channel", "io", "sink"] }
+hex = { version = "0.4", features = ["serde"] }
 hyper = { version = "0.14", features = ["full"] }
 itertools = { version = "0.10" }
 libc = { version = "0.2", features = ["extra_traits"] }
@@ -51,6 +55,7 @@ serde = { version = "1", features = ["alloc", "derive"] }
 serde_json = { version = "1", features = ["raw_value"] }
 smallvec = { version = "1", default-features = false, features = ["write"] }
 socket2 = { version = "0.4", default-features = false, features = ["all"] }
+time = { version = "0.3", features = ["formatting", "macros", "parsing"] }
 tokio = { version = "1", features = ["fs", "io-std", "io-util", "macros", "net", "process", "rt-multi-thread", "signal", "test-util"] }
 tokio-rustls = { version = "0.24" }
 tokio-util = { version = "0.7", features = ["codec", "io"] }
@@ -60,6 +65,7 @@ tower = { version = "0.4", features = ["balance", "buffer", "limit", "retry", "t
 tracing = { version = "0.1", features = ["log"] }
 tracing-core = { version = "0.1" }
 url = { version = "2", features = ["serde"] }
+uuid = { version = "1", features = ["serde", "v4"] }
 
 [build-dependencies]
 anyhow = { version = "1", features = ["backtrace"] }